From 40f14ae579c60127df8a0966a9aeb260917a1542 Mon Sep 17 00:00:00 2001 From: Mitja Horvat Date: Fri, 27 Sep 2024 13:46:00 +0200 Subject: [PATCH] BORK --- qstr.c | 587 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 587 insertions(+) create mode 100644 qstr.c diff --git a/qstr.c b/qstr.c new file mode 100644 index 0000000..c51cf51 --- /dev/null +++ b/qstr.c @@ -0,0 +1,587 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This bit defines whether this string is memory managed -- or in other words, not a slice */ +#define QSTR_FLAG_MM ((size_t)1 << (sizeof(size_t) * 8 - 1)) +#define QSTR_MAX SSIZE_MAX + +#define QBOOM() do { printf("BOOM: %s:%d!\n", __FILE__, __LINE__); exit(1); } while(0) + +struct qstr +{ + uint8_t *q_ptr; + size_t q_sz; +}; + +typedef struct qstr qstr; + +/* Initialize a qstr where the string is variable */ +#define Q(x) (qstr){ .q_sz = sizeof(x) - 1, .q_ptr = (uint8_t[]){ x }} +/* Initialize a qstr where the string is constant (global) */ +#define QC(x) { .q_sz = sizeof(x) - 1, .q_ptr = (uint8_t *)(x) } + +#define QB(x) (qstr){ .q_sz = sizeof(x), .q_ptr = (uint8_t *)(x) } +#define QBC(x) { .q_sz = sizeof(x), .q_ptr = (uint8_t *)(x) } + +#define QSZ(x) ((x).q_sz & ~QSTR_FLAG_MM) +#define QSTR_IS_MM(x) ((x).q_sz & QSTR_FLAG_MM) + +/* + * Put element by index + */ +void qsetchar(const qstr str, size_t idx, char a) +{ + if (idx >= QSZ(str)) + { + QBOOM(); + return; + } + + str.q_ptr[idx] = a; +} + +/* + * Get element by index + */ +char qgetchar(const qstr str, size_t idx) +{ + if (idx >= QSZ(str)) + { + QBOOM(); + return -1; + } + + return str.q_ptr[idx]; +} + +size_t qstrlen(const qstr str) +{ + return QSZ(str); +} + +/* + * Resize the current string to fit into `newsz`. The string may grow or shrink. + * + * If the string grows, the data in the newly allocated area is undefined. + * If the string shrinks, the string effectively truncated. + * + * This function returns: + * 0 - On success + * ENOMEM - If unable to allocate memory + * EINVAL - Invalid arguments + */ +int qstrdgrow(qstr *out, size_t newsz) +{ + uint8_t *optr; + + if (newsz > QSTR_MAX) return EINVAL; + + /* New string is shorter, change the size and preserve the MM flag */ + if (newsz <= QSZ(*out)) + { + if (QSTR_IS_MM(*out)) + { + out->q_ptr = realloc(out->q_ptr, newsz); + if (out->q_ptr == NULL) return ENOMEM; + newsz |= QSTR_FLAG_MM; + } + out->q_sz = newsz; + return 0; + } + + if (QSTR_IS_MM(*out)) + { + optr = realloc(out->q_ptr, newsz); + if (optr == NULL) + { + QBOOM(); + return ENOMEM; + } + } + else + { + optr = malloc(newsz); + if (optr == NULL) + { + QBOOM(); + return ENOMEM; + } + if (out->q_ptr != NULL) memcpy(optr, out->q_ptr, QSZ(*out)); + } + + out->q_ptr = optr; + out->q_sz = newsz | QSTR_FLAG_MM; + return 0; +} + +/* + * Copy string `in` to position `idx` of string `out` + * + * This function returns: + * - 0 on success + * - EINVAL if arguments are invalid + * - E2BIG if the resulting string would be too long + * - ENOMEM in case it was unable to allocate memory + */ +int qstrdcpi(qstr *out, size_t idx, const qstr in) +{ + if (idx >= QSTR_MAX) return EINVAL; + if (idx > QSZ(*out)) return EINVAL; + if (QSZ(in) > (QSTR_MAX - idx)) return E2BIG; + + if (idx + QSZ(in) > QSZ(*out)) + { + size_t newsz = idx + QSZ(in); + if (newsz > QSZ(*out) && qstrdgrow(out, newsz) != 0) + { + return ENOMEM; + } + } + + memcpy(out->q_ptr + idx, in.q_ptr, QSZ(in)); + return 0; +} + +void qstrdfree(qstr *str) +{ + if (!QSTR_IS_MM(*str)) return; + free(str->q_ptr); +} + +qstr qstrslice(qstr in, size_t idx, size_t end) +{ + qstr out = { .q_ptr = in.q_ptr }; + + /* Index is out of bounds, just return an empty slice */ + if (idx >= QSZ(in) || end <= idx) return out; + if (end > QSZ(in)) end = QSZ(in); + + out.q_ptr = in.q_ptr + idx; + out.q_sz = end - idx; + return out; +} + +size_t memspn(uint8_t *mem, size_t memsz, uint8_t *accept, size_t acceptsz) +{ + size_t ii; + + uint32_t set[256 >> 5] = { 0 }; + + for (ii = 0; ii < acceptsz; ii++) + { + set[accept[ii] >> 5] |= 1 << (accept[ii] & 31); + } + + for (ii = 0; ii < memsz; ii++) + { + if (!(set[mem[ii] >> 5] & (1 << (mem[ii] & 31)))) break; + } + + return ii; +} + +size_t memcspn(uint8_t *mem, size_t memsz, uint8_t *accept, size_t acceptsz) +{ + size_t ii; + + uint32_t set[256 >> 5] = { 0 }; + + for (ii = 0; ii < acceptsz; ii++) + { + set[accept[ii] >> 5] |= 1 << (accept[ii] & 31); + } + + for (ii = 0; ii < memsz; ii++) + { + if (set[mem[ii] >> 5] & (1 << (mem[ii] & 31))) break; + } + + return ii; +} + + +/* + * Search from the beginning of the string annd returns the index of the + * first occurence of `c`. If not found, return the lenght of the string + * (the index of the last character in the string + 1) + */ +size_t qstrchr(qstr str, int c) +{ + uint8_t *p = memchr(str.q_ptr, c, QSZ(str)); + if (p == NULL) return QSZ(str); + + return (size_t)(p - str.q_ptr); +} + +/* + * Search from the end of the string annd returns the index of the + * first occurence of `c`. If not found, return the lenght of the string + * (the index of the last character in the string + 1) + */ +size_t qstrrchr(qstr str, int c) +{ + size_t ii; + + for (ii = QSZ(str); ii > 0; ii--) + { + if (str.q_ptr[ii - 1] == c) break; + } + + if (ii == 0) return QSZ(str); + + return ii - 1; +} + +size_t qstrspni(qstr str, size_t start, qstr accept) +{ + if (start >= QSZ(str)) return QSZ(str); + return start + memspn(str.q_ptr + start, QSZ(str) - start, accept.q_ptr, QSZ(accept)); +} + +size_t qstrcspni(qstr str, size_t start, qstr accept) +{ + if (start >= QSZ(str)) return QSZ(str); + return start + memcspn(str.q_ptr + start, QSZ(str) - start, accept.q_ptr, QSZ(accept)); +} + +size_t qstrspn(qstr str, qstr accept) +{ + return qstrspni(str, 0, accept); +} + +size_t qstrcspn(qstr str, qstr accept) +{ + return qstrcspni(str, 0, accept); +} + +bool qstrsep(qstr *out, qstr in, qstr delim) +{ + uint8_t *ptr; + size_t ptrsz; + + /* Out points somewhere out of the range of `in`, find the first delimiter */ + if (out->q_ptr < in.q_ptr || out->q_ptr > (in.q_ptr + QSZ(in))) + { + ptr = in.q_ptr; + } + /* We reached end of the string, return */ + else if (out->q_ptr + QSZ(*out) == in.q_ptr + QSZ(in)) + { + return false; + } + /* Find the next delimiter */ + else + { + ptr = out->q_ptr + QSZ(*out) + 1; + } + + ptrsz = memcspn((uint8_t *)ptr, in.q_ptr + QSZ(in) - ptr, (uint8_t *)delim.q_ptr, QSZ(delim)); + out->q_ptr = ptr; + out->q_sz = ptrsz; + + return true; +} + +qstr qstrbuf(void *buf, size_t buf_sz) +{ + return (qstr){.q_ptr = buf, .q_sz = buf_sz }; +} + +/* + * Concatenate two strings + * + * Returns: + * 0 - on success + * EINVAL - if arguments are invalid + * E2BIG - if the resulting string would be too long + * ENOMEM - if unable to allocate memory + */ +int qstrdcat(qstr *out, const qstr in) +{ + return qstrdcpi(out, QSZ(*out), in); +} + +/* + * Copy strings + * + * Returns: + * 0 - on success + * EINVAL - if arguments are invalid + * E2BIG - if the resulting string would be too long + * ENOMEM - if unable to allocate memory + */ +int qstrdcpy(qstr *dest, qstr src) +{ + return qstrdcpi(dest, 0, src); +} + +const char *qstrdc(qstr *str) +{ + if (QSZ(*str) == 0 || qgetchar(*str, QSZ(*str) - 1) != '\0') + { + if (qstrdcat(str, Q("\0")) != 0) return "/#error/"; + } + + return (const char *)str->q_ptr; +} + +size_t qstrcpi(qstr out, size_t idx, qstr in) +{ + size_t sz = QSZ(in); + + if (idx >= QSZ(out)) + { + return idx + sz; + } + + if (idx > (QSTR_MAX - sz)) + { + sz = QSTR_MAX - idx; + } + + if (idx + sz > QSZ(out)) + { + sz = QSZ(out) - idx; + } + + memcpy(out.q_ptr + idx, in.q_ptr, sz); + + return idx + sz; +} + +size_t qstrjoin(qstr out, qstr in[], size_t len, qstr delim) +{ + size_t idx = 0; + + for (size_t ii = 0; ii < len; ii++) + { + if (ii != 0) idx = qstrcpi(out, idx, delim); + idx = qstrcpi(out, idx, in[ii]); + } + + return idx; +} + +int qstrdjoin(qstr *out, qstr in[], size_t len, qstr delim) +{ + int retval; + size_t idx; + + idx = 0; + for (size_t ii = 0; ii < len; ii++) + { + if (ii != 0) + { + retval = qstrdcpi(out, idx, delim); + if (retval != 0) return retval; + idx += QSZ(delim); + } + + retval = qstrdcpi(out, idx, in[ii]); + if (retval != 0) return retval; + idx += QSZ(in[ii]); + } + + return qstrdgrow(out, idx); +} + +int qstrcmp(qstr a, qstr b) +{ + size_t sz = QSZ(a) > QSZ(b) ? QSZ(a) : QSZ(b); + return memcmp(a.q_ptr, b.q_ptr, sz); +} + +int qstrncmp(qstr a, qstr b, size_t n) +{ + size_t cn = QSZ(a) > QSZ(b) ? QSZ(a) : QSZ(b); + if (n > cn) n = cn; + return memcmp(a.q_ptr, b.q_ptr, n); +} + +size_t qfputs(qstr str, FILE *f) +{ + return fwrite(str.q_ptr, 1, str.q_sz, f); +} + + +size_t qfprint(FILE *stream, qstr in) +{ + size_t idx; + + size_t retval = 0; + + if (in.q_sz == 0) return 0; + if (in.q_ptr == NULL) return 0; + + while (qstrlen(in) > 0) + { + idx = qstrcspn(in, Q("\0")); + retval += qfputs(qstrslice(in, 0, idx), stream); + if (idx < QSZ(in)) retval += fwrite("\\0", 1, strlen("\\0"), stream); + in = qstrslice(in, idx + 1, QSTR_MAX); + } + + return retval; +} + +size_t qstrc(char *buf, size_t bufsz, qstr in) +{ + size_t sz = QSZ(in); + + if (sz >= bufsz) sz = bufsz - 1; + memcpy(buf, in.q_ptr, sz); + buf[sz] = '\0'; + return QSZ(in) + 1; +} + +size_t qfprintf_fmt(FILE *stream, qstr fmt, va_list va) +{ + char tbuf[qstrlen(fmt) + 1]; + qstrc(tbuf, sizeof(tbuf), fmt); + if (qgetchar(fmt, QSZ(fmt) - 1) == 'Q') + { + return qfprint(stream, va_arg(va, qstr)); + } + + return vfprintf(stream, tbuf, va); +} + +size_t qvfprintf(FILE *stream, qstr fmt, va_list va) +{ + size_t pidx; + + size_t retval = 0; + + while (qstrlen(fmt) > 0) + { + pidx = qstrchr(fmt, '%'); + retval += qfprint(stream, qstrslice(fmt, 0, pidx)); + if (pidx < qstrlen(fmt)) + { + size_t fidx = qstrcspni(fmt, pidx + 1, Q("%diouxXeEfFgGaAcsCSPnmQ")); + retval += qfprintf_fmt(stream, qstrslice(fmt, pidx, fidx + 1), va); + pidx = fidx + 1; + } + + fmt = qstrslice(fmt, pidx, QSTR_MAX); + } + + return retval; +} + +size_t qfprintf(FILE *stream, qstr fmt, ...) +{ + va_list va; + + size_t retval; + + va_start(va, fmt); + retval = qvfprintf(stream, fmt, va); + va_end(va); + return retval; +} + +/* + * Write the string to out, and return the number of bytes that would have + * been written, even if it exceeds the size of out. + * + * Note: fmemopen() in glibc always pads the end buffer with a '\0', so if + * `out` is too small to hold the whole string, it will contain an ending + * '\0'. + */ +size_t qsnprintf(qstr out, qstr fmt, ...) +{ + FILE *stream; + va_list va; + + size_t retval = 0; + + stream = fmemopen(out.q_ptr, QSZ(out), "wb"); + if (stream == NULL) goto error; + + va_start(va, fmt); + retval = qvfprintf(stream, fmt, va); + va_end(va); + +error: + if (stream != NULL) fclose(stream); + return retval; +} + +static qstr global_test = QC("test"); + +int main(int argc, char *argv[]) +{ + (void)argc; + (void)argv; + + (void)global_test; + + uint8_t buf[1024]; + + size_t qw = qsnprintf(QB(buf), Q("Hello %Q 1 2 3, argc=%d, float=%0.2f"), Q("b\0\0b"), argc, 1.0 / 3.0); + qfprintf(stdout, Q("BUF(%zd) = %Q\n"), qw, qstrslice(QB(buf), 0, qw)); + + qstr test = Q("Hello new World"); + qsetchar(test, 6, 'N'); + if (qstrdcat(&test, Q("\0Order")) != 0) + { + printf("Error concatenating string\n"); + } + + printf("C: %s\n", qstrdc(&test)); + + qfprintf(stdout, Q("Q: %Q\n"), test); + + qfprintf(stdout, Q("SLICE: %Q\n"), qstrslice(test, 6, 9)); + + qstr tok = { 0 }; + int ii = 0; + while (qstrsep(&tok, test, Q(" \0"))) + { + qfprintf(stdout, Q("TOKEN[%d]: %Q\n"), ii, tok); + ii++; + } + + qstrdfree(&test); + + qstr q = QB(buf); + + qstr cc[] = + { + Q(""), + Q("usr"), + Q("local"), + Q("bin"), + }; + + size_t idx = qstrjoin(q, cc, 4, Q("/")); + if (idx > qstrlen(q)) + { + printf("Bork\n"); + } + + qfprintf(stdout, Q("%s\n"), qstrslice(q, 0, idx)); + + q = QB(buf); + + int err = qstrdjoin(&q, cc, 4, Q("/")); + if (err != 0) + { + printf("Error allocatating: %s\n", strerror(err)); + } + + qfprint(stdout, q); qfprint(stdout, Q("\n")); + qstrdfree(&q); + + qfprintf(stdout, Q("Hello\0world %0.5d end %0.5f -> %Q.\n"), 5, 1/3.0, Q("b\0\0b")); + + return 0; +}