c: Finish the phogen module
This commit implements the `phogen_encode` function, which takes a bhash as argument and generates a phonetic string.
This commit is contained in:
@ -1,4 +1,10 @@
|
|||||||
#ifndef PHOGEN_H_INCLUDED
|
#ifndef PHOGEN_H_INCLUDED
|
||||||
#define PHOGEN_H_INCLUDED
|
#define PHOGEN_H_INCLUDED
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "bhash.h"
|
||||||
|
|
||||||
|
bool phogen_encode(char *out, size_t out_len, bhash_t *bh_in);
|
||||||
|
|
||||||
#endif /* PHOGEN_H_INCLUDED */
|
#endif /* PHOGEN_H_INCLUDED */
|
||||||
|
|||||||
@ -13,6 +13,7 @@ phogen_inc = include_directories('inc')
|
|||||||
phogen_lib = static_library(
|
phogen_lib = static_library(
|
||||||
'phogen',
|
'phogen',
|
||||||
[phogen_map_h, 'src/phogen.c'],
|
[phogen_map_h, 'src/phogen.c'],
|
||||||
include_directories : phogen_inc)
|
include_directories : phogen_inc,
|
||||||
|
dependencies: bhash_dep)
|
||||||
|
|
||||||
phogen_dep = declare_dependency(link_with : phogen_lib, include_directories : phogen_inc)
|
phogen_dep = declare_dependency(link_with : phogen_lib, include_directories : phogen_inc)
|
||||||
|
|||||||
@ -452,7 +452,6 @@ void phogen_rstrip(char *str, char *what)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Take the buffer in `bigin` and treat is as a big-endian big number.
|
* Take the buffer in `bigin` and treat is as a big-endian big number.
|
||||||
* Perform a division using the 32-bit divisor in `base` and return the
|
* Perform a division using the 32-bit divisor in `base` and return the
|
||||||
|
|||||||
@ -1,10 +1,78 @@
|
|||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* =========================================================================
|
||||||
* Phonetic generator -- used to convert binary data to semi-pronouncable
|
* Phonetic generator -- used to convert binary data to semi-pronouncable
|
||||||
* strings
|
* strings
|
||||||
|
* =========================================================================
|
||||||
*/
|
*/
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "phogen_map.h"
|
#include "phogen_map.h"
|
||||||
#include "phogen.h"
|
#include "phogen.h"
|
||||||
|
|
||||||
|
/* Number of letters in the english alphabet */
|
||||||
|
#define PHOGEN_ENGLISH_NUM ('z' - 'a' + 1)
|
||||||
|
/* Number of valid characters in a n gram */
|
||||||
|
#define PHOGEN_NGRAM_CHARS (PHOGEN_ENGLISH_NUM + 1)
|
||||||
|
|
||||||
|
static int phogen_ngram_to_index(const char *word);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a phonetic string of size `out_sz` by consuming bits from the bhash
|
||||||
|
* `bh_in` and write it to out.
|
||||||
|
*/
|
||||||
|
bool phogen_encode(char *out, size_t out_sz, bhash_t *bh_in)
|
||||||
|
{
|
||||||
|
size_t ii;
|
||||||
|
size_t nmap;
|
||||||
|
uint32_t nm;
|
||||||
|
|
||||||
|
char ngram[3] = " ";
|
||||||
|
|
||||||
|
if (out_sz == 0) return true;
|
||||||
|
|
||||||
|
for (ii = 0; ii < (out_sz - 1); ii++)
|
||||||
|
{
|
||||||
|
int ni = phogen_ngram_to_index(ngram);
|
||||||
|
|
||||||
|
/* Calculte the number of letters in the current map */
|
||||||
|
for (nmap = 0; nmap < PHOGEN_ENGLISH_NUM; nmap++)
|
||||||
|
{
|
||||||
|
if (phogen_map[ni].map[nmap] == '\0') break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the index of the next phonetic letter */
|
||||||
|
nm = bhash_mod32(bh_in, nmap);
|
||||||
|
if (nm == BHASH_MOD32_ERR)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "bhash error, unable to compute next phoentic char.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
out[ii] = phogen_map[ni].map[nm];
|
||||||
|
|
||||||
|
/* Calculate the next ngram */
|
||||||
|
ngram[0] = ngram[1];
|
||||||
|
ngram[1] = out[ii];
|
||||||
|
}
|
||||||
|
|
||||||
|
out[ii] = '\0';
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The phogen_map table is fixed size and sorted. We can calculate the offset
|
||||||
|
* of a ngram with the simple calculation below.
|
||||||
|
*/
|
||||||
|
int phogen_ngram_to_index(const char *word)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
i = (word[0] == ' ' ? 0 : word[0] - 'a' + 1);
|
||||||
|
i *= PHOGEN_NGRAM_CHARS;
|
||||||
|
i += (word[1] == ' ' ? 0 : word[1] - 'a' + 1);
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user