/* * wc.h - wirecode library - Richard Lupton 2025. * * Wirecode is a tag-length-value format for serializing/ * deserializing data which lends itself to table driven * encoding/decoding. * * This is a single header library, with the following * options that can be enabled with #define. * * - WC_OPT_USE_STDINT * Use stdint.h to define the integer types used * in the wirecode library. Without this, you will * need to define wc_u8, wc_i32, wc_u32, wc_i64, * wc_u64 and wc_iptr. * * - WC_IMPLEMENTATION * Include implementations of the main interfaces. * This doesn't include the core kernels that need * specific compilers, or can be implemented in * assembly. See WC_KERNEL_IMPLEMENTATION. * * - WC_KERNEL_IMPLEMENTATION * Include the kernel implementations. This requires * that you either use clang to compile the file, or * alternatively provide a definition of wc_tail_call * which *guarantees* a tail call is emitted by the * compiler (clang, for example, has the musttail * attribute). * * Versions: * 0.1 - (2025-08-17) initial version. * Encoders/decoders for bytes, 32-bit ints, and * submessages and arrays of submessages. */ #ifdef WC_OPT_USE_STDINT #include typedef uint8_t wc_u8; typedef int32_t wc_i32; typedef uint32_t wc_u32; typedef int64_t wc_i64; typedef uint64_t wc_u64; typedef intptr_t wc_iptr; #endif /* WC_OPT_USE_STDINT */ #ifndef wc_assert #define wc_assert(cond, note) do { if (!(cond)) __builtin_trap(); } while (0) #endif #ifndef wc_memcpy #define wc_memcpy __builtin_memcpy #endif #define WC_NULL ((void *)0) enum { WC_DECODE_OK, WC_DECODE_ERR, }; enum { WC_WIRE_FORMAT_BYTE, WC_WIRE_FORMAT_4BYTE, WC_WIRE_FORMAT_SUBMESSAGE, }; struct wc_encoder; struct wc_decoder; struct wc_evm; struct wc_dvm; struct wc_arena { wc_u8 *cur; wc_u8 *end; }; #define WC_MAX_FIELD 32 #define WC_ENCODE_ARGS struct wc_evm *vm, int fdx, const void *in, wc_u64 data, wc_u8 *pb, const wc_u8 *pe #define WC_DECODE_ARGS struct wc_dvm *vm, void *out, wc_u64 data, const wc_u8 *pb, const wc_u8 *pe, wc_u64 dbits struct wc_field_encoder { wc_u8 *(*encode)(WC_ENCODE_ARGS); wc_u64 data; wc_u8 lbyte; const struct wc_encoder *subencoder; }; struct wc_encoder { struct wc_field_encoder field[WC_MAX_FIELD]; wc_i32 size; wc_u8 count; }; struct wc_field_decoder { int (*decode)(WC_DECODE_ARGS); wc_u64 data; const struct wc_decoder *subdecoder; wc_u8 lbyte; }; struct wc_decoder { struct wc_field_decoder field[WC_MAX_FIELD]; wc_i32 size; wc_i32 align; wc_u32 required; }; struct wc_arena wc_arena_from_memory(wc_i64 size, void *mem); int wc_encode(const struct wc_encoder *e, const void *in, wc_i32 length, wc_u8 *out); void * wc_decode(const struct wc_decoder *d, wc_i32 len, const wc_u8 *bytes, struct wc_arena *a); int wc_decode_in_place(const struct wc_decoder *d, wc_i32 len, const wc_u8 *bytes, void *out, struct wc_arena *a); #define WC_LBYTE(tag, wf) ((tag) << 3 | (wf)) #define WC_DATA_PCKI32(v, w) ((wc_u32)(v) | (wc_u64)(w) << 32) #define WC_DATA_UPCKI32_0(data) ((wc_i32)((data) & 0xFFFFFFFF)) #define WC_DATA_UPCKI32_1(data) ((wc_i32)((data) >> 32)) /* Field encoders */ wc_u8 *wc__encode_byte(WC_ENCODE_ARGS); wc_u8 *wc__encode_i32(WC_ENCODE_ARGS); wc_u8 *wc__encode_submessage(WC_ENCODE_ARGS); wc_u8 *wc__encode_submessage_array(WC_ENCODE_ARGS); /* Field decoders */ int wc__decode_byte(WC_DECODE_ARGS); int wc__decode_i32(WC_DECODE_ARGS); int wc__decode_submessage(WC_DECODE_ARGS); int wc__decode_submessage_array(WC_DECODE_ARGS); #ifdef WC_IMPLEMENTATION #ifndef WC_MAX_DEPTH #define WC_MAX_DEPTH 16 #endif /* Entrypoints for encoding / decoding. */ wc_u8 *wc__encode(WC_ENCODE_ARGS); int wc__decode(WC_DECODE_ARGS); struct wc__eframe { const struct wc_encoder *e; const void *in; int fdx; wc_u64 data; wc_u8 *(*cont)(WC_ENCODE_ARGS); }; struct wc_evm { const struct wc_encoder *e; struct wc__eframe frame[WC_MAX_DEPTH]; int sp; }; struct wc__dframe { const struct wc_decoder *d; const wc_u8 *pe; void *out; wc_u64 dbits; int (*cont)(WC_DECODE_ARGS); }; struct wc_dvm { const struct wc_decoder *d; struct wc__dframe frame[WC_MAX_DEPTH]; struct wc_arena *arena; int sp; }; struct wc_arena wc_arena_from_memory(wc_i64 size, void *mem) { struct wc_arena a; a.cur = mem; a.end = (wc_u8 *)mem + size; return a; } #define wc_maxof(t) ((t)-1 > (t)1 ? (t)-1 : (t)-1 ^ (t)(1ULL << (8 * sizeof(t) - 1))) static void * wc__arena_alloc(struct wc_arena *a, wc_i32 align, wc_i32 count, wc_i32 size) { wc_assert(a != WC_NULL, "arena is non-null"); wc_assert(align > 0, "align is greater than zero"); wc_assert(count > 0, "count is greater than zero"); wc_assert(size > 0, "size is greater than zero"); wc_iptr p = (wc_iptr)(void *)a->cur; wc_iptr pad = -p & (wc_iptr)(align - 1); if (wc_maxof(wc_iptr) / (wc_iptr)size < (wc_iptr)count) return WC_NULL; if (a->end - a->cur < pad) return WC_NULL; if (a->end - a->cur - pad < (wc_iptr)size * (wc_iptr)count) return WC_NULL; void *ret = (void *)(a->cur + pad); a->cur += pad + (wc_iptr)count * (wc_iptr)size; return ret; } int wc_encode(const struct wc_encoder *e, const void *in, wc_i32 outlen, wc_u8 *out) { struct wc_evm vm = { .e = e }; wc_u8 *pe = wc__encode(&vm, 0, in, 0, out, out + outlen); if (pe == WC_NULL) return -1; return pe - out; } void * wc_decode(const struct wc_decoder *d, wc_i32 length, const wc_u8 *bytes, struct wc_arena *a) { wc_assert(a != WC_NULL, "wc_decode requires non-null arena parameter"); struct wc_arena tmp = *a; void *base = wc__arena_alloc(&tmp, d->align, 1, d->size); if (base == WC_NULL) return WC_NULL; if (wc_decode_in_place(d, length, bytes, base, &tmp) != WC_DECODE_OK) return WC_NULL; *a = tmp; return base; } int wc_decode_in_place(const struct wc_decoder *d, wc_i32 length, const wc_u8 *bytes, void *out, struct wc_arena *a) { struct wc_arena empty = { .cur = WC_NULL, .end = WC_NULL }; if (a == WC_NULL) a = ∅ struct wc_arena tmp = *a; struct wc_dvm vm = { .d = d, .arena = &tmp, }; int rc = wc__decode(&vm, out, 0, bytes, bytes + length, 0); if (rc == WC_DECODE_OK) *a = tmp; return rc; } #endif /* WC_IMPLEMENTATION */ #ifdef WC_KERNEL_IMPLEMENTATION #ifndef wc_tail_call #ifdef __clang__ #define wc_tail_call(call) __attribute__((musttail)) return (call) #else #error "wc_tail_call not defined" #endif #endif /* Internal encoder completions. */ static wc_u8 *wc__encode_complete(WC_ENCODE_ARGS); static wc_u8 *wc__encode_submessage_complete(WC_ENCODE_ARGS); static wc_u8 *wc__encode_submessage_array_complete_item(WC_ENCODE_ARGS); /* Internal decoder completions. */ static int wc__decode_complete(WC_DECODE_ARGS); wc_u8 * wc__encode(WC_ENCODE_ARGS) { if (fdx == vm->e->count) wc_tail_call(wc__encode_complete(vm, fdx, in, data, pb, pe)); const struct wc_field_encoder *f = &vm->e->field[fdx]; wc_assert((void *)f->encode != WC_NULL, "encoder should be non-null"); if (pb == pe) return WC_NULL; *pb++ = f->lbyte; wc_tail_call(f->encode(vm, fdx, in, f->data, pb, pe)); } static wc_u8 * wc__encode_complete(WC_ENCODE_ARGS) { wc_assert(vm->sp >= 0, "stack pointer should be non-negative"); (void)fdx; (void)in; (void)data; if (vm->sp == 0) return pb; const struct wc__eframe *restore = &vm->frame[--vm->sp]; vm->e = restore->e; wc_assert(vm->sp >= 0, "stack pointer should be non-negative"); wc_tail_call(restore->cont(vm, restore->fdx, restore->in, restore->data, pb, pe)); } wc_u8 * wc__encode_byte(WC_ENCODE_ARGS) { wc_assert(WC_DATA_UPCKI32_1(data) == 0, "extension offset should be 0"); if (pb == pe) return WC_NULL; *pb++ = *((const wc_u8 *)in + WC_DATA_UPCKI32_0(data)); wc_tail_call(wc__encode(vm, fdx + 1, in, 0, pb, pe)); } wc_u8 * wc__encode_i32(WC_ENCODE_ARGS) { wc_assert(WC_DATA_UPCKI32_1(data) == 0, "extension offset should be 0"); if (pe - pb < 4) return WC_NULL; wc_u32 v32; wc_memcpy(&v32, (const wc_u8 *)in + WC_DATA_UPCKI32_0(data), sizeof(v32)); *pb++ = v32 & 0xFF; *pb++ = (v32 >> 8) & 0xFF; *pb++ = (v32 >> 16) & 0xFF; *pb++ = (v32 >> 24) & 0xFF; wc_tail_call(wc__encode(vm, fdx + 1, in, 0, pb, pe)); } wc_u8 * wc__encode_submessage(WC_ENCODE_ARGS) { if (pe - pb < 4) return WC_NULL; if (vm->sp == WC_MAX_DEPTH) return WC_NULL; const struct wc_encoder *submessage = vm->e->field[fdx].subencoder; const void *base = (const uint8_t *)in + WC_DATA_UPCKI32_0(data); wc_i32 patch_offset = pe - pb; struct wc__eframe *save = &vm->frame[vm->sp++]; save->e = vm->e; save->in = in; save->fdx = fdx; save->data = WC_DATA_PCKI32(patch_offset, 0); save->cont = &wc__encode_submessage_complete; vm->e = submessage; /* Use the maximal length, so if this is not patched correctly * it's more likely to cause a decode failure in tests. */ *pb++ = 0xFF; *pb++ = 0xFF; *pb++ = 0xFF; *pb++ = 0xFF; wc_tail_call(wc__encode(vm, 0, base, 0, pb, pe)); } static wc_u8 * wc__encode_submessage_complete(WC_ENCODE_ARGS) { wc_assert(WC_DATA_UPCKI32_0(data) > 0, "patch offset should be non-zero"); wc_u8 *patch = (wc_u8 *)pe - WC_DATA_UPCKI32_0(data); wc_assert(pb - patch >= 4, "should have reserved 4 bytes for length"); wc_i32 length = (pb - patch) - 4; *patch++ = length & 0xFF; *patch++ = (length >> 8) & 0xFF; *patch++ = (length >> 16) & 0xFF; *patch++ = (length >> 24) & 0xFF; wc_tail_call(wc__encode(vm, fdx + 1, in, 0, pb, pe)); } static wc_u8 * wc__encode_submessage_array_next(WC_ENCODE_ARGS) { wc_i32 len = WC_DATA_UPCKI32_1(data); if (fdx == len) wc_tail_call(wc__encode_complete(vm, fdx, in, 0, pb, pe)); wc_assert(fdx < len, "fdx should never exceed length"); if (pe - pb < 4) return WC_NULL; if (vm->sp == WC_MAX_DEPTH) return WC_NULL; wc_i32 patch_offset = pe - pb; /* Use the maximal length, so if this is not patched correctly * it's more likely to cause a decode failure in tests. */ *pb++ = 0xFF; *pb++ = 0xFF; *pb++ = 0xFF; *pb++ = 0xFF; /* Push a frame for the next element of the array */ struct wc__eframe *save = &vm->frame[vm->sp++]; save->e = vm->e; save->in = in; save->fdx = fdx; save->data = WC_DATA_PCKI32(patch_offset, len); save->cont = &wc__encode_submessage_array_complete_item; /* No need to update vm->e */ wc_assert(wc_maxof(wc_iptr) / (wc_iptr)vm->e->size > (wc_iptr)fdx, "don't overflow wc_iptr"); const wc_u8 *pelt = (const wc_u8 *)in + ((wc_iptr)fdx * (wc_iptr)vm->e->size); wc_tail_call(wc__encode(vm, 0, pelt, WC_DATA_PCKI32(patch_offset, len), pb, pe)); } static wc_u8 * wc__encode_submessage_array_complete_item(WC_ENCODE_ARGS) { wc_assert(WC_DATA_UPCKI32_0(data) > 0, "patch offset should be non-zero"); wc_assert(WC_DATA_UPCKI32_1(data) > 0, "array length should be non-zero"); wc_u8 *patch = (wc_u8 *)pe - WC_DATA_UPCKI32_0(data); wc_assert(pb - patch >= 4, "should have reserved 4 bytes for length"); wc_i32 length = (pb - patch) - 4; *patch++ = length & 0xFF; *patch++ = (length >> 8) & 0xFF; *patch++ = (length >> 16) & 0xFF; *patch++ = (length >> 24) & 0xFF; wc_tail_call(wc__encode_submessage_array_next(vm, fdx + 1, in, WC_DATA_PCKI32(0, WC_DATA_UPCKI32_1(data)), pb, pe)); } wc_u8 * wc__encode_submessage_array(WC_ENCODE_ARGS) { if (pe - pb < 4) return WC_NULL; if (vm->sp == WC_MAX_DEPTH) return WC_NULL; const void *pbase = (const uint8_t *)in + WC_DATA_UPCKI32_0(data); const void *plen = (const uint8_t *)in + WC_DATA_UPCKI32_1(data); wc_u32 len; wc_memcpy(&len, plen, sizeof(len)); /* Every submessage is going to use at least 2 bytes, and * the very top bit is a potential sign bit if length is signed * so reject anything with the high bit set. */ if (len & (1ULL << 31)) return WC_NULL; const void *base; wc_memcpy(&base, pbase, sizeof(base)); wc_i32 patch_offset = pe - pb; /* Use the maximal length, so if this is not patched correctly * it's more likely to cause a decode failure in tests. */ *pb++ = 0xFF; *pb++ = 0xFF; *pb++ = 0xFF; *pb++ = 0xFF; struct wc__eframe *save = &vm->frame[vm->sp++]; save->e = vm->e; save->in = in; save->fdx = fdx; save->data = WC_DATA_PCKI32(patch_offset, 0); save->cont = &wc__encode_submessage_complete; vm->e = vm->e->field[fdx].subencoder; wc_assert(vm->e->size > 0, "submessage should have a size"); wc_tail_call(wc__encode_submessage_array_next(vm, 0, base, WC_DATA_PCKI32(0, (wc_i32)len), pb, pe)); } int wc__decode(WC_DECODE_ARGS) { if (pb == pe) wc_tail_call(wc__decode_complete(vm, out, data, pb, pe, dbits)); wc_u8 tag = *pb >> 3; const struct wc_field_decoder *f = &vm->d->field[tag]; wc_assert(f->lbyte >> 3 == tag, "field decoder tag should agree with table index"); if ((void *)f->decode == WC_NULL) return WC_DECODE_ERR; if (f->lbyte != *pb) return WC_DECODE_ERR; if (dbits & (1ULL << tag)) return WC_DECODE_ERR; wc_tail_call(f->decode(vm, out, f->data, pb, pe, dbits | (1ULL << tag))); } static int wc__decode_complete(WC_DECODE_ARGS) { wc_assert(pb == pe, "wc__decode_complete should be called at the end of a message"); (void)out; (void)data; wc_u32 required = vm->d->required; if ((required & dbits) != required) return WC_DECODE_ERR; if (vm->sp == 0) return WC_DECODE_OK; const struct wc__dframe *restore = &vm->frame[--vm->sp]; vm->d = restore->d; wc_tail_call(restore->cont(vm, restore->out, 0, pb, restore->pe, restore->dbits)); } int wc__decode_byte(WC_DECODE_ARGS) { wc_assert((*pb & 0x7) == WC_WIRE_FORMAT_BYTE, "wire-type should be WC_WIRE_FORMAT_BYTE"); pb++; wc_u8 *pout = (wc_u8 *)out + WC_DATA_UPCKI32_0(data); if (pb == pe) return WC_DECODE_ERR; *pout = *pb++; wc_tail_call(wc__decode(vm, out, 0, pb, pe, dbits)); } int wc__decode_i32(WC_DECODE_ARGS) { wc_assert((*pb & 0x7) == WC_WIRE_FORMAT_4BYTE, "wire-type for decode byte should be WC_WIRE_FORMAT_I32"); pb++; if (pe - pb < 4) return WC_DECODE_ERR; uint32_t v32 = pb[0] | pb[1] << 8 | pb[2] << 16 | (uint32_t)pb[3] << 24; wc_memcpy((wc_u8 *)out + WC_DATA_UPCKI32_0(data), &v32, sizeof(v32)); wc_tail_call(wc__decode(vm, out, 0, pb + 4, pe, dbits)); } int wc__decode_submessage(WC_DECODE_ARGS) { wc_assert((*pb & 0x7) == WC_WIRE_FORMAT_SUBMESSAGE, "wire-type for decode byte should be WC_WIRE_FORMAT_SUBMESSAGE"); if (vm->sp == WC_MAX_DEPTH) return WC_DECODE_ERR; const struct wc_decoder *submessage = vm->d->field[*pb >> 3].subdecoder; pb++; if (pe - pb < 4) return WC_DECODE_ERR; if (pb[3] & 0x80) return WC_DECODE_ERR; wc_i32 len = pb[0] | pb[1] << 8 | pb[2] << 16 | (wc_i32)pb[3] << 24; pb += 4; if (pe - pb < len) return WC_DECODE_ERR; struct wc__dframe *save = &vm->frame[vm->sp++]; save->d = vm->d; save->pe = pe; save->out = out; save->dbits = dbits; save->cont = &wc__decode; vm->d = submessage; wc_tail_call(wc__decode(vm, (wc_u8 *)out + WC_DATA_UPCKI32_0(data), 0, pb, pb + len, 0)); } static int wc__decode_array_next(WC_DECODE_ARGS) { if (vm->sp == WC_MAX_DEPTH) return WC_DECODE_ERR; if (pb == pe) wc_tail_call(wc__decode_complete(vm, out, data, pb, pe, dbits)); if (pe - pb < 4) return WC_DECODE_ERR; if (pb[3] & 0x80) return WC_DECODE_ERR; wc_i32 len = pb[0] | pb[1] << 8 | pb[2] << 16 | (wc_i32)pb[3] << 24; pb += 4; if (pe - pb < len) return WC_DECODE_ERR; struct wc__dframe *save = &vm->frame[vm->sp++]; save->d = vm->d; save->pe = pe; save->out = (wc_u8 *)out + vm->d->size; save->dbits = dbits; save->cont = &wc__decode_array_next; /* vm->d doesn't need updating */ wc_tail_call(wc__decode(vm, out, 0, pb, pb + len, 0)); } int wc__decode_submessage_array(WC_DECODE_ARGS) { if (vm->sp == WC_MAX_DEPTH) return WC_DECODE_ERR; wc_assert((*pb & 0x7) == WC_WIRE_FORMAT_SUBMESSAGE, "wire-type for decode byte should be WC_WIRE_FORMAT_SUBMESSAGE"); const struct wc_decoder *submessage = vm->d->field[*pb >> 3].subdecoder; pb++; if (pe - pb < 4) return WC_DECODE_ERR; if (pb[3] & 0x80) return WC_DECODE_ERR; wc_i32 len = pb[0] | pb[1] << 8 | pb[2] << 16 | (wc_i32)pb[3] << 24; pb += 4; if (pe - pb < len) return WC_DECODE_ERR; wc_i32 count = 0; { const uint8_t *pm_b = pb; const uint8_t *pm_e = pb + len; for (;;) { if (pm_b == pm_e) break; if (pm_e - pm_b < 4) return WC_DECODE_ERR; if (pm_b[3] & 0x80) return WC_DECODE_ERR; wc_i32 mlen = pb[0] | pb[1] << 8 | pb[2] << 16 | (wc_i32)pb[3] << 24; if (pm_e - pm_b < mlen) return WC_DECODE_ERR; pm_b += 4 + mlen; count += 1; } } void *pelts = wc__arena_alloc(vm->arena, submessage->align, count, submessage->size); if (pelts == WC_NULL) return WC_DECODE_ERR; wc_memcpy((wc_u8 *)out + WC_DATA_UPCKI32_0(data), &pelts, sizeof(pelts)); void *pcount = (wc_u8 *)out + WC_DATA_UPCKI32_1(data); wc_memcpy(pcount, &count, sizeof(count)); struct wc__dframe *save = &vm->frame[vm->sp++]; save->d = vm->d; save->pe = pe; save->out = out; save->dbits = dbits; save->cont = &wc__decode; vm->d = submessage; wc_tail_call(wc__decode_array_next(vm, pelts, 0, pb, pb + len, 0)); } #endif /* WC_KERNEL_IMPLEMENTATION */