c: Finish the phogen module

This commit implements the `phogen_encode` function, which takes a bhash
as argument and generates a phonetic string.
This commit is contained in:
2021-11-14 09:12:20 +01:00
parent ad0627b9c1
commit 560db14a85
4 changed files with 78 additions and 4 deletions

View File

@ -1,10 +1,78 @@
#include <stdlib.h>
/*
* =========================================================================
* Phonetic generator -- used to convert binary data to semi-pronouncable
* strings
* =========================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include "phogen_map.h"
#include "phogen.h"
/* Number of letters in the english alphabet */
#define PHOGEN_ENGLISH_NUM ('z' - 'a' + 1)
/* Number of valid characters in a n gram */
#define PHOGEN_NGRAM_CHARS (PHOGEN_ENGLISH_NUM + 1)
static int phogen_ngram_to_index(const char *word);
/*
* Create a phonetic string of size `out_sz` by consuming bits from the bhash
* `bh_in` and write it to out.
*/
bool phogen_encode(char *out, size_t out_sz, bhash_t *bh_in)
{
size_t ii;
size_t nmap;
uint32_t nm;
char ngram[3] = " ";
if (out_sz == 0) return true;
for (ii = 0; ii < (out_sz - 1); ii++)
{
int ni = phogen_ngram_to_index(ngram);
/* Calculte the number of letters in the current map */
for (nmap = 0; nmap < PHOGEN_ENGLISH_NUM; nmap++)
{
if (phogen_map[ni].map[nmap] == '\0') break;
}
/* Get the index of the next phonetic letter */
nm = bhash_mod32(bh_in, nmap);
if (nm == BHASH_MOD32_ERR)
{
fprintf(stderr, "bhash error, unable to compute next phoentic char.");
return false;
}
out[ii] = phogen_map[ni].map[nm];
/* Calculate the next ngram */
ngram[0] = ngram[1];
ngram[1] = out[ii];
}
out[ii] = '\0';
return true;
}
/*
* The phogen_map table is fixed size and sorted. We can calculate the offset
* of a ngram with the simple calculation below.
*/
int phogen_ngram_to_index(const char *word)
{
int i;
i = (word[0] == ' ' ? 0 : word[0] - 'a' + 1);
i *= PHOGEN_NGRAM_CHARS;
i += (word[1] == ' ' ? 0 : word[1] - 'a' + 1);
return i;
}