c: Finish the phogen module

This commit implements the `phogen_encode` function, which takes a bhash as argument and generates a phonetic string.
2021-11-14 09:12:20 +01:00
parent ad0627b9c1
commit 560db14a85
4 changed files with 78 additions and 4 deletions
--- a/c/phogen/inc/phogen.h
+++ b/c/phogen/inc/phogen.h
@ -1,4 +1,10 @@
 #ifndef PHOGEN_H_INCLUDED
 #define PHOGEN_H_INCLUDED
 #include <stdbool.h>
 #include "bhash.h"
 bool phogen_encode(char *out, size_t out_len, bhash_t *bh_in);
 #endif /* PHOGEN_H_INCLUDED */
--- a/c/phogen/meson.build
+++ b/c/phogen/meson.build
@ -13,6 +13,7 @@ phogen_inc = include_directories('inc')
 phogen_lib = static_library(
        'phogen',
        [phogen_map_h, 'src/phogen.c'],
-        include_directories : phogen_inc)
+        include_directories : phogen_inc,
        dependencies: bhash_dep)
 phogen_dep = declare_dependency(link_with : phogen_lib, include_directories : phogen_inc)
--- a/c/phogen/phogen_map/phogen_map.c
+++ b/c/phogen/phogen_map/phogen_map.c
@ -452,7 +452,6 @@ void phogen_rstrip(char *str, char *what)
    }
 }
 /*
 * Take the buffer in `bigin` and treat is as a big-endian big number.
 * Perform a division using the 32-bit divisor in `base` and return the
--- a/c/phogen/src/phogen.c
+++ b/c/phogen/src/phogen.c
@ -1,10 +1,78 @@
 #include <stdlib.h>
 /*
 * =========================================================================
 * Phonetic generator -- used to convert binary data to semi-pronouncable
 * strings
 * =========================================================================
 */
 #include <stdlib.h>
 #include <stdio.h>
 #include "phogen_map.h"
 #include "phogen.h"
 /* Number of letters in the english alphabet */
 #define PHOGEN_ENGLISH_NUM  ('z' - 'a' + 1)
 /* Number of valid characters in a n gram */
 #define PHOGEN_NGRAM_CHARS (PHOGEN_ENGLISH_NUM + 1)
 static int phogen_ngram_to_index(const char *word);
 /*
 * Create a phonetic string of size `out_sz` by consuming bits from the bhash
 * `bh_in` and write it to out.
 */
 bool phogen_encode(char *out, size_t out_sz, bhash_t *bh_in)
 {
    size_t ii;
    size_t nmap;
    uint32_t nm;
    char ngram[3] = "  ";
    if (out_sz == 0) return true;
    for (ii = 0; ii < (out_sz - 1); ii++)
    {
        int ni = phogen_ngram_to_index(ngram);
        /* Calculte the number of letters in the current map */
        for (nmap = 0; nmap < PHOGEN_ENGLISH_NUM; nmap++)
        {
            if (phogen_map[ni].map[nmap] == '\0') break;
        }
        /* Get the index of the next phonetic letter */
        nm = bhash_mod32(bh_in, nmap);
        if (nm == BHASH_MOD32_ERR)
        {
            fprintf(stderr, "bhash error, unable to compute next phoentic char.");
            return false;
        }
        out[ii] = phogen_map[ni].map[nm];
        /* Calculate the next ngram */
        ngram[0] = ngram[1];
        ngram[1] = out[ii];
    }
    out[ii] = '\0';
    return true;
 }
 /*
 * The phogen_map table is fixed size and sorted. We can calculate the offset
 * of a ngram with the simple calculation below.
 */
 int phogen_ngram_to_index(const char *word)
 {
    int i;
    i = (word[0] == ' ' ? 0 : word[0] - 'a' + 1);
    i *= PHOGEN_NGRAM_CHARS;
    i += (word[1] == ' ' ? 0 : word[1] - 'a' + 1);
    return i;
 }