phogen: Add the C implementation of the phoentic generator

2021-09-14 23:52:59 +02:00
parent 93f9c75131
commit 50b0758ffc
1 changed files with 823 additions and 0 deletions
--- a/c/phogen/phogen_map/phogen_map.c
+++ b/c/phogen/phogen_map/phogen_map.c
@ -0,0 +1,823 @@
+/*
+ * ===========================================================================
+ *  Generate the phoentic mapping; this is used for mapping n-grams (two
+ *  letter combinations to the letter that most likely follows it.
+ * ===========================================================================
+ */
+
+#include <arpa/inet.h> /* For nothl() */
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string.h>
+
+#include <openssl/sha.h>
+
+/* List of vowels */
+#define PHOGEN_VOWELS       "aeiou"
+
+/* Number of letters in the english alphabet */
+#define PHOGEN_ENGLISH_NUM  ('z' - 'a' + 1)
+
+/*
+ * Number of valid characters in a ngram. This is the number of letters in the
+ * english alphabet with the addition of space (' ')
+ */
+#define PHOGEN_NGRAM_CHARS (PHOGEN_ENGLISH_NUM + 1)
+
+/* Number of all possible ngrams (two letter combination, including space) */
+#define PHOGEN_NGRAM_NUM    (PHOGEN_NGRAM_CHARS * PHOGEN_NGRAM_CHARS)
+
+static void phogen_rstrip(char *str, char *what);
+
+/* The largest roman number (in terms of string length) is 3888 */
+#define ROMAN_MAX_LEN   sizeof("MMMDCCCLXXXVIII")
+
+struct roman_symbol
+{
+    char    *rs_symbol;
+    int      rs_value;
+};
+
+static struct roman_symbol roman_symbol_list[] =
+{
+    {   .rs_symbol = "M",   .rs_value = 1000    },
+    {   .rs_symbol = "CM",  .rs_value = 900     },
+    {   .rs_symbol = "D",   .rs_value = 500     },
+    {   .rs_symbol = "CD",  .rs_value = 400     },
+    {   .rs_symbol = "C",   .rs_value = 100     },
+    {   .rs_symbol = "XC",  .rs_value = 90      },
+    {   .rs_symbol = "L",   .rs_value = 50      },
+    {   .rs_symbol = "XL",  .rs_value = 40      },
+    {   .rs_symbol = "X",   .rs_value = 10      },
+    {   .rs_symbol = "IX",  .rs_value = 9       },
+    {   .rs_symbol = "V",   .rs_value = 5       },
+    {   .rs_symbol = "IV",  .rs_value = 4       },
+    {   .rs_symbol = "I",   .rs_value = 1       }
+};
+
+static struct option phogen_map_long_options[] =
+{
+    {   "input",    required_argument,      NULL,   'i' },
+    {   "json",     required_argument,      NULL,   'j' },
+    {   "python",   required_argument,      NULL,   'p' },
+    {   "clang",    required_argument,      NULL,   'c' },
+    {   "test",     no_argument,            NULL,   't' },
+    {   "verbose",  no_argument,            NULL,   'v' },
+    {   NULL,       0,                      NULL,   0   }
+};
+
+struct
+{
+    char *input;
+    char *output;
+} phogen_test_table[] =
+{
+    { "passgeny",  "herang xiasem zitend qibele" },
+    { "phonetic",  "lineum foneum zybale mangur" },
+    { "generator", "latole elitab ackina exprou" },
+    { "password",  "nulize nomere fonici crednt" },
+    { "duck",      "catabb rompor cricin prunsi" },
+};
+
+const char phogen_python_header[] =
+"#\n"
+"# Phonetic Map -- autogenerated, do not edit.\n"
+"#\n";
+
+const char phogen_clang_header[] =
+"/*\n"
+" * Phonetic Map -- autogenerated, do not edit.\n"
+" *\n"
+" * The list contains exactly 27*27 entries (all combinations of 2 characters\n"
+" * from the array  [\" \", \"a\"...\"z\"]) and is sorted alphabetically. This makes\n"
+" * the ngram lookup time constant, but it also means that the map contains\n"
+" * holes. The holes have a value of `map` set to NULL.\n"
+" */\n"
+"\n"
+"struct phogen_entry\n"
+"{\n"
+"    char    *ngram;     /* Ngram */\n"
+"    char    *map;       /* Character map */\n"
+"};\n"
+"\n"
+"struct phogen_entry phogen_map[] =\n"
+"{\n";
+
+static char *g_word_list = NULL;
+static int g_verbose = 0;
+
+struct phogen_freq_entry
+{
+    char    fe_letter;
+    int     fe_freq;
+};
+
+/* Table containing ngrams and next letter frequency */
+static struct phogen_freq_entry phogen_freq_list[PHOGEN_NGRAM_NUM][PHOGEN_ENGLISH_NUM];
+
+static int phogen_entry_cmp(const void *_a, const void *_b);
+
+/*
+ * Convert an integer to its roman representation as string
+ *
+ * This function returns NULL on error.
+ */
+char *roman_from_int(char (*roman)[ROMAN_MAX_LEN], int i)
+{
+    char *proman;
+    int ii;
+    int ij;
+
+    if (i < 1 || i > 3999) return NULL;
+
+    proman = (char *)roman;
+
+    for (ii = 0; ii < sizeof(roman_symbol_list) / sizeof(roman_symbol_list[0]); ii++)
+    {
+        int n = i / roman_symbol_list[ii].rs_value;
+
+        for (ij = 0; ij < n; ij++)
+        {
+            strcpy(proman, roman_symbol_list[ii].rs_symbol);
+            proman += strlen(roman_symbol_list[ii].rs_symbol);
+        }
+
+        if (n != 0)
+        {
+            i %= (n * roman_symbol_list[ii].rs_value);
+        }
+    }
+
+    return (char *)roman;
+}
+
+/*
+ * Convert a roman number to its integer value; return 0 on error
+ */
+int roman_to_int(const char *roman)
+{
+    char rs[ROMAN_MAX_LEN];
+    char *proman;
+    int retval;
+    int ii;
+
+    char uroman[strlen(roman) + 1];
+    strcpy(uroman, roman);
+
+    /* Convert all to upper */
+    for (proman = uroman; *proman != '\0'; proman++)
+    {
+        *proman = toupper(*proman);
+    }
+
+    proman = uroman;
+    retval = 0;
+    while (*proman != '\0')
+    {
+        for (ii = 0; ii < sizeof(roman_symbol_list) / sizeof(roman_symbol_list[0]); ii++)
+        {
+            if (strncmp(proman,
+                        roman_symbol_list[ii].rs_symbol,
+                        strlen(roman_symbol_list[ii].rs_symbol)) == 0)
+            {
+                break;
+            }
+        }
+
+        if (ii >= sizeof(roman_symbol_list) / sizeof(roman_symbol_list[0]))
+        {
+            return 0;
+        }
+
+        retval += roman_symbol_list[ii].rs_value;
+        proman += strlen(roman_symbol_list[ii].rs_symbol);
+    }
+
+    /*
+     * Verify that the input string is a corret roman number by converting the
+     * value back to int
+     */
+    if (roman_from_int(&rs, retval) == NULL || strcmp(rs, uroman) != 0)
+    {
+        return 0;
+    }
+
+    return retval;
+}
+
+/*
+ * Read the words file line by line (each line should represent a single word).
+ * Filter out words that contain special characters or are roman numbers.
+ */
+char *phogen_word_list(void)
+{
+    static char wl_buf[1024];
+    static FILE *wl = NULL;
+
+    if (wl == NULL)
+    {
+        wl = fopen(g_word_list, "r");
+        if (wl == NULL)
+        {
+            fprintf(stderr, "Error opening file: %s\n", g_word_list);
+            exit(2);
+        }
+    }
+
+    while (fgets(wl_buf, sizeof(wl_buf), wl) != NULL)
+    {
+        return wl_buf;
+    }
+
+    if (ferror(wl))
+    {
+        fprintf(stderr, "Error reading file : %s\n", g_word_list);
+    }
+
+    fclose(wl);
+    wl = NULL;
+    return NULL;
+}
+
+/*
+ * Calculate the ngram index
+ */
+int phogen_ngram_to_index(const char *word)
+{
+    int i;
+
+    i = (word[0] == ' ' ? 0 : word[0] - 'a' + 1);
+    i *= PHOGEN_NGRAM_CHARS;
+    i += (word[1] == ' ' ? 0 : word[1] - 'a' + 1);
+
+    return i;
+}
+
+/*
+ * Map an index to its ngram string
+ */
+void phogen_index_to_ngram(char (*ngram)[3], int index)
+{
+    int n;
+    n = index / PHOGEN_NGRAM_CHARS;
+    (*ngram)[0] = (n == 0) ? ' ' : (char)('a' + n - 1);
+    n = index % PHOGEN_NGRAM_CHARS;
+    (*ngram)[1] = (n == 0) ? ' ' : (char)('a' + n - 1);
+    (*ngram)[2] = '\0';
+}
+
+/*
+ * Create a frequency map of `ngram -> next letter` mapping sorted by
+ * frequency. Take the 5 (number of vowels) most frequent letters. If the
+ * mapping contains less than 5 letters, fill it with vowels.
+ */
+void phogen_freq(void)
+{
+    char *word;
+    int ni;
+
+    /* Take the word list and split it into ngrams. Build a frequency list
+     * of ngram -> next letter.
+     */
+    while ((word = phogen_word_list()) != NULL)
+    {
+        char *pword;
+
+        char ngram[3] = "  ";
+
+        phogen_rstrip(word, "\n\r");
+        if (roman_to_int(word) > 0)
+        {
+            if (g_verbose) fprintf(stderr, "Ignoring roman number: %s\n", word);
+            continue;
+        }
+
+        for (pword = word; *pword != '\0'; pword++)
+        {
+            *pword = tolower(*pword);
+            if (*pword < 'a' || *pword > 'z')
+            {
+                if (g_verbose) fprintf(stderr, "Ignoring invalid word: %s\n", word);
+                break;
+            }
+        }
+        if (*pword != '\0') continue;
+
+        for (pword = word; *pword != '\0'; pword++)
+        {
+            int ni = phogen_ngram_to_index(ngram);
+            int ci = *pword - 'a';
+
+            /* Update the frequency */
+            phogen_freq_list[ni][ci].fe_letter = *pword;
+            phogen_freq_list[ni][ci].fe_freq++;
+
+            /* Calculate next ngram */
+            ngram[0] = ngram[1];
+            ngram[1] = *pword;
+        }
+    }
+
+    /*
+     * Scan the list, sort letters by frequency and use 5 most common letters.
+     */
+    for (ni = 0; ni < (sizeof(phogen_freq_list) / sizeof(phogen_freq_list[0])); ni++)
+    {
+        /* Sort the letters by frequency */
+        qsort(
+                phogen_freq_list[ni],
+                sizeof(phogen_freq_list[ni]) / sizeof(phogen_freq_list[ni][0]),
+                sizeof(struct phogen_freq_entry),
+                phogen_entry_cmp);
+    }
+
+    /*
+     * Filter out letters that do not produce a valid next-ngram mapping.
+     *
+     * If the list is shorter than 5 letters, fill it with vowels.
+     */
+    for (ni = 0; ni < (sizeof(phogen_freq_list) / sizeof(phogen_freq_list[0])); ni++)
+    {
+        char ngram[3];
+        int ci;
+
+        /*
+         * Do not process "  ", this is the _starting_ ngram and should contain
+         * all detected combinations
+         */
+        if (ni == 0) continue;
+
+        /*
+         * Ignore ngrams that do not have any next mappings (never seen in
+         * the word list)
+         */
+        if (phogen_freq_list[ni][0].fe_letter == '\0') continue;
+
+        /*
+         * From the current ngram and the list of letters, generate all possible
+         * next-ngrams and remove letters that produce invalid mappings.
+         */
+        phogen_index_to_ngram(&ngram, ni);
+        ci = 0;
+        while (phogen_freq_list[ni][ci].fe_letter != '\0')
+        {
+            char next_ngram[3];
+            int next_ni;
+
+            if ((ni != 0) && (ci >= (sizeof(PHOGEN_VOWELS) - 1)))
+            {
+                break;
+            }
+
+            next_ngram[0] = ngram[1];
+            next_ngram[1] = phogen_freq_list[ni][ci].fe_letter;
+            next_ngram[2] = '\0';
+
+            next_ni = phogen_ngram_to_index(next_ngram);
+            if (phogen_freq_list[next_ni][0].fe_letter != '\0')
+            {
+                ci++;
+                continue;
+            }
+
+            if (g_verbose)
+            {
+                fprintf(stderr, "Ignoring valid ngram mapping %s + %c -> %s\n",
+                        ngram,
+                        phogen_freq_list[ni][ci].fe_letter,
+                        next_ngram);
+            }
+
+            memmove(&phogen_freq_list[ni][ci], &phogen_freq_list[ni][ci + 1],
+                    sizeof(phogen_freq_list[ni][0]) * (PHOGEN_ENGLISH_NUM - ci));
+        }
+
+        /*
+         * Clip the number of letters to 5 (number of vowels). If the list is
+         * shorter than 5, fill it with vowels
+         */
+
+        for (ci = 0; ci < sizeof(PHOGEN_VOWELS) - 1; ci++)
+        {
+            int cci;
+            int vi;
+
+            if (phogen_freq_list[ni][ci].fe_letter != '\0')
+            {
+                continue;
+            }
+
+            /* Find a vowel that doesn't exist in the list yet */
+            for (vi = 0; vi < sizeof(PHOGEN_VOWELS) - 1; vi++)
+            {
+                for (cci = 0; cci <= ci; cci++)
+                {
+                    if (PHOGEN_VOWELS[vi] == phogen_freq_list[ni][cci].fe_letter)
+                    {
+                        break;
+                    }
+                }
+                /* Vowel not found, break out */
+                if (cci > ci)
+                {
+                    break;
+                }
+            }
+            phogen_freq_list[ni][ci].fe_letter = PHOGEN_VOWELS[vi];
+        }
+
+        phogen_freq_list[ni][ci].fe_letter = '\0';
+    }
+}
+
+/*
+ * Strip all characters in `what` from the end of string `str`
+ *
+ * Note: This function modifies `str`
+ */
+void phogen_rstrip(char *str, char *what)
+{
+    char *sl = str + strlen(str) - 1;
+
+    while (sl > str)
+    {
+        if (strspn(sl, what) == 0) return;
+        *sl-- = '\0';
+    }
+}
+
+
+/*
+ * Take the buffer in `bigin` and treat is as a big-endian big number.
+ * Perform a division using the 32-bit divisor in `base` and return the
+ * 32-bit modulo.
+ */
+uint32_t bigint_mod32(void *bigint, size_t bigintsz, uint32_t base)
+{
+    uint32_t *pi;
+    uint64_t n;
+
+    uint32_t mod = 0;
+
+    for (pi = (uint32_t *)bigint;
+            pi < (uint32_t *)(bigint + bigintsz);
+            pi++)
+    {
+        n = mod;
+        n <<= sizeof(uint32_t) * 8;
+        n |= htonl(*pi);
+
+        *pi = ntohl(n / base);
+        mod = n % base;
+    }
+
+    return mod;
+}
+
+/*
+ * Take input buffer `in` and generate its phonetic representation
+ * and store it to out.
+ */
+void phogen(char *out, size_t outsz, void *in, size_t insz)
+{
+    int ii;
+
+    /* Starting ngram */
+    char ngram[3] = "  ";
+
+    for (ii = 0; ii < (outsz - 1); ii++)
+    {
+        int nsize;
+        int nsel;
+
+        int ni = phogen_ngram_to_index(ngram);
+
+        /* Calculate the length of the letter pool */
+        for (nsize = 0; nsize < PHOGEN_ENGLISH_NUM; nsize++)
+        {
+            if (phogen_freq_list[ni][nsize].fe_letter == '\0') break;
+        }
+        nsel = bigint_mod32(in, insz, nsize);
+
+        out[ii] = phogen_freq_list[ni][nsel].fe_letter;
+
+        /* Generate next ngram */
+        ngram[0] = ngram[1];
+        ngram[1] = phogen_freq_list[ni][nsel].fe_letter;
+    }
+
+    out[ii] = '\0';
+}
+
+/*
+ * Run basic tests
+ */
+bool phogen_test(void)
+{
+    unsigned char sha256[SHA256_DIGEST_LENGTH];
+    SHA256_CTX sha256_ctx;
+    int ii;
+    int ij;
+
+    bool retval = true;
+
+    for (ii = 0; ii < sizeof(phogen_test_table) / sizeof(phogen_test_table[0]); ii++)
+    {
+        SHA256_Init(&sha256_ctx);
+        SHA256_Update(&sha256_ctx, phogen_test_table[ii].input, strlen(phogen_test_table[ii].input));
+        SHA256_Final(sha256, &sha256_ctx);
+
+        /* 4 words, each 6 characters long and 1 more character for spaces */
+        char buf[(6 + 1) * 4];
+        char *pbuf = buf;
+        for (ij = 0; ij < 4; ij++)
+        {
+            phogen(pbuf, 7, sha256, sizeof(sha256));
+            pbuf += strlen(pbuf);
+            *pbuf++ = ' ';
+        }
+        *(--pbuf) = '\0';
+
+        if (strcmp(buf, phogen_test_table[ii].output) != 0)
+        {
+            fprintf(stderr, "Error, test failed: %s != %s\n", buf, phogen_test_table[ii].output);
+            retval = false;
+        }
+    }
+
+    return retval;
+}
+
+/*
+ * qsort() comparator
+ */
+int phogen_entry_cmp(const void *_a, const void *_b)
+{
+    const struct phogen_freq_entry *a = _a;
+    const struct phogen_freq_entry *b = _b;
+
+    /* Reverse a/b below, so the sorting order is inverted */
+    if (a->fe_freq != b->fe_freq) return (b->fe_freq - a->fe_freq);
+    if (a->fe_letter != b->fe_letter) return (b->fe_letter - a->fe_letter);
+    return 0;
+}
+
+bool phogen_pre_test(void)
+{
+    char ngram[3];
+    int ii;
+    int ni;
+
+    for (ii = 0; ii < PHOGEN_NGRAM_NUM; ii++)
+    {
+        phogen_index_to_ngram(&ngram, ii);
+        ni = phogen_ngram_to_index(ngram);
+
+        if (ii != ni)
+        {
+            if (g_verbose) fprintf(stderr, "Internal error, index mapping functions are broken.\n");
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/*
+ * Dump the frequency table in JSON format
+ */
+void phogen_dump_json(FILE *f)
+{
+    int ni;
+    int ci;
+
+    fprintf(f, "{\n");
+    for (ni = 0; ni < (sizeof(phogen_freq_list) / sizeof(phogen_freq_list[0])); ni++)
+    {
+        char ngram[3];
+
+        if (phogen_freq_list[ni][0].fe_letter == '\0') continue;
+
+        if (ni != 0)
+        {
+            fprintf(f, "\n    ],\n");
+        }
+
+        phogen_index_to_ngram(&ngram, ni);
+        fprintf(f, "    \"%s\": [\n", ngram);
+
+        for (ci = 0; ci < PHOGEN_ENGLISH_NUM; ci++)
+        {
+            if (phogen_freq_list[ni][ci].fe_letter == '\0') break;
+            if (ci != 0)
+            {
+                fprintf(f, ",\n");
+            }
+
+            fprintf(f, "        \"%c\"", phogen_freq_list[ni][ci].fe_letter);
+        }
+    }
+    fprintf(f, "\n    ]\n}\n");
+}
+
+/*
+ * Dump the frequency table in PYTHON format
+ */
+void phogen_dump_python(FILE *f)
+{
+    int ni;
+    int ci;
+
+    fprintf(f, "%sg_phonetic_map = \\", phogen_python_header);
+    for (ni = 0; ni < (sizeof(phogen_freq_list) / sizeof(phogen_freq_list[0])); ni++)
+    {
+        char ngram[3];
+
+        if (phogen_freq_list[ni][0].fe_letter == '\0') continue;
+
+        phogen_index_to_ngram(&ngram, ni);
+        fprintf(f, "%s'%s': ", (ni == 0) ? "\n{" : "],\n ", ngram);
+
+        for (ci = 0; ci < PHOGEN_ENGLISH_NUM; ci++)
+        {
+            if (phogen_freq_list[ni][ci].fe_letter == '\0') break;
+            fprintf(f, "%s'%c'", (ci == 0) ? "[" : ", ", phogen_freq_list[ni][ci].fe_letter);
+        }
+    }
+    fprintf(f, "]}\n");
+}
+
+/*
+ * Dump the frequency table in C format
+ */
+void phogen_dump_clang(FILE *f)
+{
+    int ni;
+    int ci;
+
+    fprintf(f, "%s", phogen_clang_header);
+    for (ni = 0; ni < (sizeof(phogen_freq_list) / sizeof(phogen_freq_list[0])); ni++)
+    {
+        char ngram[3];
+
+        phogen_index_to_ngram(&ngram, ni);
+
+        if (ni != 0)
+        {
+            fprintf(f, "    },\n");
+        }
+
+        fprintf(f, "    {\n");
+        fprintf(f, "        .ngram = \"%s\",\n", ngram);
+
+        if (phogen_freq_list[ni][0].fe_letter == '\0')
+        {
+            fprintf(f, "        .map = NULL\n");
+        }
+        else
+        {
+            fprintf(f, "        .map = \"");
+            for (ci = 0; ci < PHOGEN_ENGLISH_NUM; ci++)
+            {
+                if (phogen_freq_list[ni][ci].fe_letter == '\0') break;
+                fprintf(f, "%c", phogen_freq_list[ni][ci].fe_letter);
+            }
+            fprintf(f, "\"\n");
+        }
+    }
+    fprintf(f, "    }\n};\n");
+}
+
+
+void help(void)
+{
+    printf(
+            "usage: phogen_map.py -i INPUT [-j JSON] [-p PYTHON] [-c CLANG] [-t] [-v]\n"
+            "\n"
+            "Generate mappings for the phonetic generator. Without arguments, just run the built-in tests.\n"
+            "\n"
+            "optional arguments:\n"
+            "  -i INPUT, --input INPUT\n"
+            "                        Input file (word list)\n"
+            "  -j JSON, --json JSON  JSON output file\n"
+            "  -p PYTHON, --python PYTHON\n"
+            "                        Python output file\n"
+            "  -c CLANG, --clang CLANG\n"
+            "                        C output file\n"
+            "  -t, --test            Output random passwords\n"
+            "  -v, --verbose         Verbose\n");
+}
+
+int main(int argc, char *argv[])
+{
+    int opt;
+
+    char *json_output = NULL;
+    char *clang_output = NULL;
+    char *python_output = NULL;
+
+    //bool test = false;
+
+    while ((opt = getopt_long(argc, argv, "i:j:p:c:tv", phogen_map_long_options, NULL)) != -1)
+    {
+        switch (opt)
+        {
+            case 'i':
+                g_word_list = optarg;
+                break;
+
+            case 'j':
+                json_output = optarg;
+                break;
+
+            case 'p':
+                python_output = optarg;
+                break;
+
+            case 'c':
+                clang_output = optarg;
+                break;
+
+            case 't':
+                break;
+
+            case 'v':
+                g_verbose++;
+                break;
+
+            default:
+                help();
+                return 127;
+        }
+    }
+
+    if (g_word_list == NULL)
+    {
+        fprintf(stderr, "An input parameter is required (--input or -i).\n");
+        return 127;
+    }
+
+    if (!phogen_pre_test())
+    {
+        printf("Basic tests failed.");
+        return 1;
+    }
+
+    /* Generate the frequency map */
+    phogen_freq();
+    /* Run basic tests */
+    phogen_test();
+
+    /* JSON output */
+    if (json_output != NULL)
+    {
+        FILE *f = fopen(json_output, "w");
+        if (f == NULL)
+        {
+            fprintf(stderr, "Error opening JSON output file %s: %s\n",
+                    json_output,
+                    strerror(errno));
+            return 1;
+        }
+        phogen_dump_json(f);
+        fclose(f);
+    }
+
+    /* PYTHON output */
+    if (python_output != NULL)
+    {
+        FILE *f = fopen(python_output, "w");
+        if (f == NULL)
+        {
+            fprintf(stderr, "Error opening PYTHON output file %s: %s\n",
+                    python_output,
+                    strerror(errno));
+            return 1;
+        }
+        phogen_dump_python(f);
+        fclose(f);
+    }
+
+    /* C output */
+    if (clang_output != NULL)
+    {
+        FILE *f = fopen(clang_output, "w");
+        if (f == NULL)
+        {
+            fprintf(stderr, "Error opening CLANG output file %s: %s\n",
+                    python_output,
+                    strerror(errno));
+            return 1;
+        }
+        phogen_dump_clang(f);
+        fclose(f);
+    }
+
+    return 0;
+}