From ebbe7a1672c5a8750a57019c4df6d259dda12a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Eric=20Janouch?= Date: Thu, 29 Sep 2022 21:06:46 +0200 Subject: [PATCH] Import protocol code generator from xK, add tests Also add a VIM syntax highlighting file. This also fixes some previously untriggered bugs. --- CMakeLists.txt | 41 ++++ libertyxdr.vim | 21 ++ tests/lxdrgen.c | 123 ++++++++++ tests/lxdrgen.lxdr | 23 ++ tools/lxdrgen-c.awk | 324 +++++++++++++++++++++++++ tools/lxdrgen-go.awk | 541 ++++++++++++++++++++++++++++++++++++++++++ tools/lxdrgen-mjs.awk | 226 ++++++++++++++++++ tools/lxdrgen.awk | 309 ++++++++++++++++++++++++ 8 files changed, 1608 insertions(+) create mode 100644 libertyxdr.vim create mode 100644 tests/lxdrgen.c create mode 100644 tests/lxdrgen.lxdr create mode 100644 tools/lxdrgen-c.awk create mode 100644 tools/lxdrgen-go.awk create mode 100644 tools/lxdrgen-mjs.awk create mode 100644 tools/lxdrgen.awk diff --git a/CMakeLists.txt b/CMakeLists.txt index af9c910..eb1d2d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,47 @@ foreach (name ${tests}) add_test (NAME test-${name} COMMAND test-${name}) endforeach () +# Test CMake script parsing add_test (test-cmake-parser env LC_ALL=C awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-parser.awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-dump.awk ${CMAKE_CURRENT_LIST_FILE}) + +# Test protocol code generation +set (lxdrgen_outputs) +set (lxdrgen_base "${PROJECT_BINARY_DIR}/lxdrgen.lxdr") +foreach (backend c go mjs) + list (APPEND lxdrgen_outputs ${lxdrgen_base}.${backend}) + add_custom_command (OUTPUT ${lxdrgen_base}.${backend} + COMMAND env LC_ALL=C awk + -f ${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk + -f ${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk + -v PrefixCamel=ProtoGen + ${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr + > ${lxdrgen_base}.${backend} + DEPENDS + ${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk + ${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk + ${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr + COMMENT "Generating test protocol code (${backend})" VERBATIM) +endforeach () +add_custom_target (test-lxdrgen-outputs ALL DEPENDS ${lxdrgen_outputs}) + +set_source_files_properties (${lxdrgen_base}.c + PROPERTIES HEADER_FILE_ONLY TRUE) +add_executable (test-lxdrgen tests/lxdrgen.c ${lxdrgen_base}.c) +target_include_directories (test-lxdrgen PUBLIC ${PROJECT_BINARY_DIR}) +add_test (NAME test-lxdrgen-c COMMAND test-lxdrgen) + +find_program (GO_EXECUTABLE go) +if (GO_EXECUTABLE) + add_test (test-lxdrgen-go ${GO_EXECUTABLE} vet ${lxdrgen_base}.go) +else () + message (WARNING "Cannot test generated protocol code for Go") +endif () + +find_program (NODE_EXECUTABLE node) +if (NODE_EXECUTABLE) + add_test (test-lxdrgen-mjs ${NODE_EXECUTABLE} -c ${lxdrgen_base}.mjs) +else () + message (WARNING "Cannot test generated protocol code for Javascript") +endif () diff --git a/libertyxdr.vim b/libertyxdr.vim new file mode 100644 index 0000000..d980d77 --- /dev/null +++ b/libertyxdr.vim @@ -0,0 +1,21 @@ +" filetype.vim: au! BufNewFile,BufRead *.lxdr setf libertyxdr +if exists("b:current_syntax") + finish +endif + +syn match libertyxdrError "[^[:space:]:;,(){}<>=]\+" +syn region libertyxdrBlockComment start=+/[*]+ end=+[*]/+ +syn match libertyxdrComment "//.*" +syn match libertyxdrIdentifier "\<[[:alpha:]][[:alnum:]_]*\>" +syn match libertyxdrNumber "\<0\>\|\(-\|\<\)[1-9][[:digit:]]*\>" +syn keyword libertyxdrKeyword const enum struct union switch case +syn keyword libertyxdrType bool u8 u16 u32 u64 i8 i16 i32 i64 string void + +let b:current_syntax = "libertyxdr" +hi def link libertyxdrError Error +hi def link libertyxdrBlockComment Comment +hi def link libertyxdrComment Comment +hi def link libertyxdrIdentifier Identifier +hi def link libertyxdrNumber Number +hi def link libertyxdrKeyword Statement +hi def link libertyxdrType Type diff --git a/tests/lxdrgen.c b/tests/lxdrgen.c new file mode 100644 index 0000000..14f40e2 --- /dev/null +++ b/tests/lxdrgen.c @@ -0,0 +1,123 @@ +/* + * tests/lxdrgen.c + * + * Copyright (c) 2022, Přemysl Eric Janouch + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#define PROGRAM_NAME "test" +#define PROGRAM_VERSION "0" + +#include "../liberty.c" +#include "lxdrgen.lxdr.c" + +static void +test_ser_deser_free (void) +{ + hard_assert (PROTO_GEN_VERSION == 1); + + enum { CASES = 3 }; + + struct proto_gen_struct a = {}, b = {}; + a.u = xcalloc ((a.u_len = CASES + rand () % 100), sizeof *a.u); + for (size_t i = 0; i < a.u_len; i++) + { + union proto_gen_union *u = a.u + i; + switch (i % CASES) + { + case 0: + u->tag = PROTO_GEN_ENUM_NUMBERS; + u->numbers.a = rand () % UINT8_MAX; + u->numbers.b = rand () % UINT16_MAX; + u->numbers.c = rand () % UINT32_MAX; + u->numbers.d = rand () % UINT64_MAX; + u->numbers.e = rand () % UINT8_MAX; + u->numbers.f = rand () % UINT16_MAX; + u->numbers.g = rand () % UINT32_MAX; + u->numbers.h = rand () % UINT64_MAX; + break; + case 1: + u->tag = PROTO_GEN_ENUM_OTHERS; + u->others.foo = rand () % 2; + u->others.bar = str_make (); + for (int i = rand () % 0x30; i > 0; i--) + str_append_c (&u->others.bar, 0x30 + i); + break; + case 2: + u->tag = PROTO_GEN_ENUM_NOTHING; + break; + default: + hard_assert (!"unhandled case"); + } + } + + struct str buf = str_make (); + hard_assert (proto_gen_struct_serialize (&a, &buf)); + struct msg_unpacker r = msg_unpacker_make (buf.str, buf.len); + hard_assert (proto_gen_struct_deserialize (&b, &r)); + hard_assert (!msg_unpacker_get_available (&r)); + str_free (&buf); + + hard_assert (a.u_len == b.u_len); + for (size_t i = 0; i < a.u_len; i++) + { + union proto_gen_union *ua = a.u + i; + union proto_gen_union *ub = b.u + i; + hard_assert (ua->tag == ub->tag); + switch (ua->tag) + { + case PROTO_GEN_ENUM_NUMBERS: + hard_assert (ua->numbers.a == ub->numbers.a); + hard_assert (ua->numbers.b == ub->numbers.b); + hard_assert (ua->numbers.c == ub->numbers.c); + hard_assert (ua->numbers.d == ub->numbers.d); + hard_assert (ua->numbers.e == ub->numbers.e); + hard_assert (ua->numbers.f == ub->numbers.f); + hard_assert (ua->numbers.g == ub->numbers.g); + hard_assert (ua->numbers.h == ub->numbers.h); + break; + case PROTO_GEN_ENUM_OTHERS: + hard_assert (ua->others.foo == ub->others.foo); + hard_assert (ua->others.bar.len == ub->others.bar.len); + hard_assert (!memcmp (ua->others.bar.str, ub->others.bar.str, + ua->others.bar.len)); + break; + case PROTO_GEN_ENUM_NOTHING: + break; + default: + hard_assert (!"unexpected case"); + } + } + + // Emulate partially deserialized data to test disposal of that. + for (size_t i = b.u_len - CASES; i < b.u_len; i++) + { + proto_gen_union_free (&b.u[i]); + memset (&b.u[i], 0, sizeof b.u[i]); + } + + proto_gen_struct_free (&a); + proto_gen_struct_free (&b); +} + +int +main (int argc, char *argv[]) +{ + struct test test; + test_init (&test, argc, argv); + + test_add_simple (&test, "/ser-deser-free", NULL, test_ser_deser_free); + + return test_run (&test); +} diff --git a/tests/lxdrgen.lxdr b/tests/lxdrgen.lxdr new file mode 100644 index 0000000..6c53de5 --- /dev/null +++ b/tests/lxdrgen.lxdr @@ -0,0 +1,23 @@ +/* + * tests/lxdrgen.lxdr: a test protocol for the generator + */ +const VERSION = 1; +const NOISREV = -1; + +// TODO: Test failure paths, and in general go for full coverage. +struct Struct { + union Union switch (enum Enum { + NUMBERS = VERSION, + OTHERS = 2, + NOTHING, + } tag) { + case NUMBERS: + i8 a; i16 b; i32 c; i64 d; + u8 e; u16 f; u32 g; u64 h; + case OTHERS: + bool foo; + string bar; + case NOTHING: + void; + } u<>; +}; diff --git a/tools/lxdrgen-c.awk b/tools/lxdrgen-c.awk new file mode 100644 index 0000000..0d43785 --- /dev/null +++ b/tools/lxdrgen-c.awk @@ -0,0 +1,324 @@ +# lxdrgen-c.awk: C backend for lxdrgen.awk. +# +# Copyright (c) 2022, Přemysl Eric Janouch +# SPDX-License-Identifier: 0BSD +# +# Neither *_new() nor *_destroy() functions are provided, because they'd only +# be useful for top-levels, and are merely extra malloc()/free() calls. +# Users are expected to reuse buffers. +# +# Similarly, no constructors are produced--those are easy to write manually. +# +# All arrays are deserialized zero-terminated, so u8<> and i8<> can be directly +# used as C strings. +# +# All types must be able to dispose partially zero values going from the back, +# i.e., in the reverse order of deserialization. + +function define_internal(name, ctype) { + Types[name] = "internal" + CodegenCType[name] = ctype +} + +function define_int(shortname, ctype) { + define_internal(shortname, ctype) + CodegenSerialize[shortname] = \ + "\tstr_pack_" shortname "(w, %s);\n" + CodegenDeserialize[shortname] = \ + "\tif (!msg_unpacker_" shortname "(r, &%s))\n" \ + "\t\treturn false;\n" +} + +function define_sint(size) { define_int("i" size, "int" size "_t") } +function define_uint(size) { define_int("u" size, "uint" size "_t") } + +function codegen_begin() { + define_sint("8") + define_sint("16") + define_sint("32") + define_sint("64") + define_uint("8") + define_uint("16") + define_uint("32") + define_uint("64") + + define_internal("string", "struct str") + CodegenDispose["string"] = "\tstr_free(&%s);\n" + CodegenSerialize["string"] = \ + "\tif (!proto_string_serialize(&%s, w))\n" \ + "\t\treturn false;\n" + CodegenDeserialize["string"] = \ + "\tif (!proto_string_deserialize(&%s, r))\n" \ + "\t\treturn false;\n" + + define_internal("bool", "bool") + CodegenSerialize["bool"] = \ + "\tstr_pack_u8(w, !!%s);\n" + CodegenDeserialize["bool"] = \ + "\t{\n" \ + "\t\tuint8_t v = 0;\n" \ + "\t\tif (!msg_unpacker_u8(r, &v))\n" \ + "\t\t\treturn false;\n" \ + "\t\t%s = !!v;\n" \ + "\t}\n" + + print "// Code generated from " FILENAME ". DO NOT EDIT." + print "// This file directly depends on liberty.c, but doesn't include it." + print "" + print "static bool" + print "proto_string_serialize(const struct str *s, struct str *w) {" + print "\tif (s->len > UINT32_MAX)" + print "\t\treturn false;" + print "\tstr_pack_u32(w, s->len);" + print "\tstr_append_str(w, s);" + print "\treturn true;" + print "}" + print "" + print "static bool" + print "proto_string_deserialize(struct str *s, struct msg_unpacker *r) {" + print "\tuint32_t len = 0;" + print "\tif (!msg_unpacker_u32(r, &len))" + print "\t\treturn false;" + print "\tif (msg_unpacker_get_available(r) < len)" + print "\t\treturn false;" + print "\t*s = str_make();" + print "\tstr_append_data(s, r->data + r->offset, len);" + print "\tr->offset += len;" + print "\tif (!utf8_validate (s->str, s->len))" + print "\t\treturn false;" + print "\treturn true;" + print "}" +} + +function codegen_constant(name, value) { + print "" + print "enum { " PrefixUpper name " = " value " };" +} + +function codegen_enum_value(name, subname, value, cg) { + append(cg, "fields", + "\t" PrefixUpper toupper(cameltosnake(name)) "_" subname \ + " = " value ",\n") +} + +function codegen_enum(name, cg, ctype) { + ctype = "enum " PrefixLower cameltosnake(name) + print "" + print ctype " {" + print cg["fields"] "};" + + # XXX: This should also check if it isn't out-of-range for any reason, + # but our usage of sprintf() stands in the way a bit. + CodegenSerialize[name] = "\tstr_pack_i8(w, %s);\n" + CodegenDeserialize[name] = \ + "\t{\n" \ + "\t\tint8_t v = 0;\n" \ + "\t\tif (!msg_unpacker_i8(r, &v) || !v)\n" \ + "\t\t\treturn false;\n" \ + "\t\t%s = v;\n" \ + "\t}\n" + + CodegenCType[name] = ctype + for (i in cg) + delete cg[i] +} + +function codegen_struct_tag(d, cg, f) { + f = "self->" d["name"] + append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n") + append(cg, "dispose", sprintf(CodegenDispose[d["type"]], f)) + append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f)) + # Do not deserialize here, that would be out of order. +} + +function codegen_struct_field(d, cg, f, dispose, serialize, deserialize) { + f = "self->" d["name"] + dispose = CodegenDispose[d["type"]] + serialize = CodegenSerialize[d["type"]] + deserialize = CodegenDeserialize[d["type"]] + if (!d["isarray"]) { + append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n") + append(cg, "dispose", sprintf(dispose, f)) + append(cg, "serialize", sprintf(serialize, f)) + append(cg, "deserialize", sprintf(deserialize, f)) + return + } + + append(cg, "fields", + "\t" CodegenCType["u32"] " " d["name"] "_len;\n" \ + "\t" CodegenCType[d["type"]] " *" d["name"] ";\n") + + if (dispose) + append(cg, "dispose", "\tif (" f ")\n" \ + "\t\tfor (size_t i = 0; i < " f "_len; i++)\n" \ + indent(indent(sprintf(dispose, f "[i]")))) + append(cg, "dispose", "\tfree(" f ");\n") + + append(cg, "serialize", sprintf(CodegenSerialize["u32"], f "_len")) + if (d["type"] == "u8" || d["type"] == "i8") { + append(cg, "serialize", + "\tstr_append_data(w, " f ", " f "_len);\n") + } else if (serialize) { + append(cg, "serialize", + "\tfor (size_t i = 0; i < " f "_len; i++)\n" \ + indent(sprintf(serialize, f "[i]"))) + } + + append(cg, "deserialize", sprintf(CodegenDeserialize["u32"], f "_len") \ + "\tif (!(" f " = calloc(" f "_len + 1, sizeof *" f ")))\n" \ + "\t\treturn false;\n") + if (d["type"] == "u8" || d["type"] == "i8") { + append(cg, "deserialize", + "\tif (msg_unpacker_get_available(r) < " f "_len)\n" \ + "\t\treturn false;\n" \ + "\tmemcpy(" f ", r->data + r->offset, " f "_len);\n" \ + "\tr->offset += " f "_len;\n") + } else if (deserialize) { + append(cg, "deserialize", + "\tfor (size_t i = 0; i < " f "_len; i++)\n" \ + indent(sprintf(deserialize, f "[i]"))) + } +} + +function codegen_struct(name, cg, ctype, funcname) { + ctype = "struct " PrefixLower cameltosnake(name) + print "" + print ctype " {" + print cg["fields"] "};" + + if (cg["dispose"]) { + funcname = PrefixLower cameltosnake(name) "_free" + print "" + print "static void\n" funcname "(" ctype " *self) {" + print cg["dispose"] "}" + + CodegenDispose[name] = "\t" funcname "(&%s);\n" + } + if (cg["serialize"]) { + funcname = PrefixLower cameltosnake(name) "_serialize" + print "" + print "static bool\n" \ + funcname "(\n\t\tconst " ctype " *self, struct str *w) {" + print cg["serialize"] "\treturn true;" + print "}" + + CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \ + "\t\treturn false;\n" + } + if (cg["deserialize"]) { + funcname = PrefixLower cameltosnake(name) "_deserialize" + print "" + print "static bool\n" \ + funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {" + print cg["deserialize"] "\treturn true;" + print "}" + + CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \ + "\t\treturn false;\n" + } + + CodegenCType[name] = ctype + for (i in cg) + delete cg[i] +} + +function codegen_union_tag(d, cg) { + cg["tagtype"] = d["type"] + cg["tagname"] = d["name"] + append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n") +} + +function codegen_union_struct( \ + name, casename, cg, scg, structname, fieldname, fullcasename) { + # Don't generate obviously useless structs. + fullcasename = toupper(cameltosnake(cg["tagtype"])) "_" casename + if (!scg["dispose"] && !scg["deserialize"]) { + append(cg, "structless", "\tcase " PrefixUpper fullcasename ":\n") + for (i in scg) + delete scg[i] + return + } + + # And thus not all generated structs are present in Types. + structname = name "_" casename + fieldname = tolower(casename) + codegen_struct(structname, scg) + + append(cg, "fields", "\t" CodegenCType[structname] " " fieldname ";\n") + if (CodegenDispose[structname]) + append(cg, "dispose", "\tcase " PrefixUpper fullcasename ":\n" \ + indent(sprintf(CodegenDispose[structname], "self->" fieldname)) \ + "\t\tbreak;\n") + + # With no de/serialization code, this will simply recognize the tag. + append(cg, "serialize", "\tcase " PrefixUpper fullcasename ":\n" \ + indent(sprintf(CodegenSerialize[structname], "self->" fieldname)) \ + "\t\tbreak;\n") + append(cg, "deserialize", "\tcase " PrefixUpper fullcasename ":\n" \ + indent(sprintf(CodegenDeserialize[structname], "self->" fieldname)) \ + "\t\tbreak;\n") +} + +function codegen_union(name, cg, f, ctype, funcname) { + ctype = "union " PrefixLower cameltosnake(name) + print "" + print ctype " {" + print cg["fields"] "};" + + f = "self->" cg["tagname"] + if (cg["dispose"]) { + funcname = PrefixLower cameltosnake(name) "_free" + print "" + print "static void\n" funcname "(" ctype " *self) {" + print "\tswitch (" f ") {" + if (cg["structless"]) + print cg["structless"] \ + indent(sprintf(CodegenDispose[cg["tagtype"]], f)) "\t\tbreak;" + print cg["dispose"] "\tdefault:" + print "\t\tbreak;" + print "\t}" + print "}" + + CodegenDispose[name] = "\t" funcname "(&%s);\n" + } + if (cg["serialize"]) { + funcname = PrefixLower cameltosnake(name) "_serialize" + print "" + print "static bool\n" \ + funcname "(\n\t\tconst " ctype " *self, struct str *w) {" + print "\tswitch (" f ") {" + if (cg["structless"]) + print cg["structless"] \ + indent(sprintf(CodegenSerialize[cg["tagtype"]], f)) "\t\tbreak;" + print cg["serialize"] "\tdefault:" + print "\t\treturn false;" + print "\t}" + print "\treturn true;" + print "}" + + CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \ + "\t\treturn false;\n" + } + if (cg["deserialize"]) { + funcname = PrefixLower cameltosnake(name) "_deserialize" + print "" + print "static bool\n" \ + funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {" + print sprintf(CodegenDeserialize[cg["tagtype"]], f) + print "\tswitch (" f ") {" + if (cg["structless"]) + print cg["structless"] "\t\tbreak;" + print cg["deserialize"] "\tdefault:" + print "\t\treturn false;" + print "\t}" + print "\treturn true;" + print "}" + + CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \ + "\t\treturn false;\n" + } + + CodegenCType[name] = ctype + for (i in cg) + delete cg[i] +} diff --git a/tools/lxdrgen-go.awk b/tools/lxdrgen-go.awk new file mode 100644 index 0000000..6aa0d04 --- /dev/null +++ b/tools/lxdrgen-go.awk @@ -0,0 +1,541 @@ +# lxdrgen-go.awk: Go backend for lxdrgen.awk. +# +# Copyright (c) 2022, Přemysl Eric Janouch +# SPDX-License-Identifier: 0BSD +# +# This backend also enables proxying to other endpoints using JSON. + +function define_internal(name, gotype) { + Types[name] = "internal" + CodegenGoType[name] = gotype +} + +function define_sint(size, shortname, gotype) { + shortname = "i" size + gotype = "int" size + define_internal(shortname, gotype) + + CodegenAppendJSON[shortname] = \ + "\tb = strconv.AppendInt(b, int64(%s), 10)\n" + if (size == 8) { + CodegenSerialize[shortname] = "\tdata = append(data, uint8(%s))\n" + CodegenDeserialize[shortname] = \ + "\tif len(data) >= 1 {\n" \ + "\t\t%s, data = int8(data[0]), data[1:]\n" \ + "\t} else {\n" \ + "\t\treturn nil, false\n" \ + "\t}\n" + return + } + + CodegenSerialize[shortname] = \ + "\tdata = binary.BigEndian.AppendUint" size "(data, uint" size "(%s))\n" + CodegenDeserialize[shortname] = \ + "\tif len(data) >= " (size / 8) " {\n" \ + "\t\t%s = " gotype "(binary.BigEndian.Uint" size "(data))\n" \ + "\t\tdata = data[" (size / 8) ":]\n" \ + "\t} else {\n" \ + "\t\treturn nil, false\n" \ + "\t}\n" +} + +function define_uint(size, shortname, gotype) { + # Both []byte and []uint8 luckily marshal as base64-encoded JSON strings, + # so there's no need to rename the type as an exception. + shortname = "u" size + gotype = "uint" size + define_internal(shortname, gotype) + + CodegenAppendJSON[shortname] = \ + "\tb = strconv.AppendUint(b, uint64(%s), 10)\n" + if (size == 8) { + CodegenSerialize[shortname] = "\tdata = append(data, %s)\n" + CodegenDeserialize[shortname] = \ + "\tif len(data) >= 1 {\n" \ + "\t\t%s, data = data[0], data[1:]\n" \ + "\t} else {\n" \ + "\t\treturn nil, false\n" \ + "\t}\n" + return + } + + CodegenSerialize[shortname] = \ + "\tdata = binary.BigEndian.AppendUint" size "(data, %s)\n" + CodegenDeserialize[shortname] = \ + "\tif len(data) >= " (size / 8) " {\n" \ + "\t\t%s = binary.BigEndian.Uint" size "(data)\n" \ + "\t\tdata = data[" (size / 8) ":]\n" \ + "\t} else {\n" \ + "\t\treturn nil, false\n" \ + "\t}\n" +} + +# Currently two outputs cannot coexist within the same package. +function codegen_private(name) { + return "proto" name +} + +function codegen_begin( funcname) { + define_sint("8") + define_sint("16") + define_sint("32") + define_sint("64") + define_uint("8") + define_uint("16") + define_uint("32") + define_uint("64") + define_internal("bool", "bool") + define_internal("string", "string") + + # Cater to "go generate", for what it's worth. + CodegenPackage = ENV["GOPACKAGE"] + if (!CodegenPackage) + CodegenPackage = "main" + + print "// Code generated from " FILENAME ". DO NOT EDIT." + print "" + print "package " CodegenPackage + print "" + print "import (" + print "\t`encoding/base64`" + print "\t`encoding/binary`" + print "\t`encoding/json`" + print "\t`errors`" + print "\t`math`" + print "\t`strconv`" + print "\t`unicode/utf8`" + print ")" + print "" + print "// This is a hack to always use the base64 import." + print "var _ = base64.StdEncoding" + print "" + + CodegenAppendJSON["bool"] = \ + "\tb = strconv.AppendBool(b, %s)\n" + CodegenSerialize["bool"] = \ + "\tif %s {\n" \ + "\t\tdata = append(data, 1)\n" \ + "\t} else {\n" \ + "\t\tdata = append(data, 0)\n" \ + "\t}\n" + + funcname = codegen_private("ConsumeBoolFrom") + print "// " funcname " tries to deserialize a boolean value" + print "// from the beginning of a byte stream. When successful," + print "// it returns a subslice with any data that might follow." + print "func " funcname "(data []byte, b *bool) ([]byte, bool) {" + print "\tif len(data) < 1 {" + print "\t\treturn nil, false" + print "\t}" + print "\tif data[0] != 0 {" + print "\t\t*b = true" + print "\t} else {" + print "\t\t*b = false" + print "\t}" + print "\treturn data[1:], true" + print "}" + print "" + + CodegenDeserialize["bool"] = \ + "\tif data, ok = " funcname "(data, &%s); !ok {\n" \ + "\t\treturn nil, ok\n" \ + "\t}\n" + + funcname = codegen_private("AppendStringTo") + print "// " funcname " tries to serialize a string value," + print "// appending it to the end of a byte stream." + print "func " funcname "(data []byte, s string) ([]byte, bool) {" + print "\tif len(s) > math.MaxUint32 {" + print "\t\treturn nil, false" + print "\t}" + print "\tdata = binary.BigEndian.AppendUint32(data, uint32(len(s)))" + print "\treturn append(data, s...), true" + print "}" + print "" + + CodegenSerialize["string"] = \ + "\tif data, ok = " funcname "(data, %s); !ok {\n" \ + "\t\treturn nil, ok\n" \ + "\t}\n" + + funcname = codegen_private("ConsumeStringFrom") + print "// " funcname " tries to deserialize a string value" + print "// from the beginning of a byte stream. When successful," + print "// it returns a subslice with any data that might follow." + print "func " funcname "(data []byte, s *string) ([]byte, bool) {" + print "\tif len(data) < 4 {" + print "\t\treturn nil, false" + print "\t}" + print "\tlength := binary.BigEndian.Uint32(data)" + print "\tif data = data[4:]; uint64(len(data)) < uint64(length) {" + print "\t\treturn nil, false" + print "\t}" + print "\t*s = string(data[:length])" + print "\tif !utf8.ValidString(*s) {" + print "\t\treturn nil, false" + print "\t}" + print "\treturn data[length:], true" + print "}" + print "" + + CodegenDeserialize["string"] = \ + "\tif data, ok = " funcname "(data, &%s); !ok {\n" \ + "\t\treturn nil, ok\n" \ + "\t}\n" + + funcname = codegen_private("UnmarshalEnumJSON") + print "// " funcname " converts a JSON fragment to an integer," + print "// ensuring that it's within the expected range of enum values." + print "func " funcname "(data []byte) (int64, error) {" + print "\tvar n int64" + print "\tif err := json.Unmarshal(data, &n); err != nil {" + print "\t\treturn 0, err" + print "\t} else if n > math.MaxInt8 || n < math.MinInt8 {" + print "\t\treturn 0, errors.New(`integer out of range`)" + print "\t} else {" + print "\t\treturn n, nil" + print "\t}" + print "}" + print "" +} + +function codegen_constant(name, value) { + print "const " PrefixCamel snaketocamel(name) " = " value + print "" +} + +function codegen_enum_value(name, subname, value, cg, goname) { + goname = PrefixCamel name snaketocamel(subname) + append(cg, "fields", + "\t" goname " = " value "\n") + append(cg, "stringer", + "\tcase " goname ":\n" \ + "\t\treturn `" snaketocamel(subname) "`\n") + append(cg, "marshal", + goname ",\n") + append(cg, "unmarshal", + "\tcase `" snaketocamel(subname) "`:\n" \ + "\t\t*v = " goname "\n") +} + +function codegen_enum(name, cg, gotype, fields, funcname) { + gotype = PrefixCamel name + print "type " gotype " int8" + print "" + + print "const (" + print cg["fields"] ")" + print "" + + print "func (v " gotype ") String() string {" + print "\tswitch v {" + print cg["stringer"] "\tdefault:" + print "\t\treturn strconv.Itoa(int(v))" + print "\t}" + print "}" + print "" + + CodegenIsMarshaler[name] = 1 + fields = cg["marshal"] + sub(/,\n$/, ":", fields) + gsub(/\n/, "\n\t", fields) + print "func (v " gotype ") MarshalJSON() ([]byte, error) {" + print "\tswitch v {" + print indent("case " fields) + print "\t\treturn []byte(`\"` + v.String() + `\"`), nil" + print "\t}" + print "\treturn json.Marshal(int(v))" + print "}" + print "" + + funcname = codegen_private("UnmarshalEnumJSON") + print "func (v *" gotype ") UnmarshalJSON(data []byte) error {" + print "\tvar s string" + print "\tif json.Unmarshal(data, &s) == nil {" + print "\t\t// Handled below." + print "\t} else if n, err := " funcname "(data); err != nil {" + print "\t\treturn err" + print "\t} else {" + print "\t\t*v = " gotype "(n)" + print "\t\treturn nil" + print "\t}" + print "" + print "\tswitch s {" + print cg["unmarshal"] "\tdefault:" + print "\t\treturn errors.New(`unrecognized value: ` + s)" + print "\t}" + print "\treturn nil" + print "}" + print "" + + # XXX: This should also check if it isn't out-of-range for any reason, + # but our usage of sprintf() stands in the way a bit. + CodegenSerialize[name] = "\tdata = append(data, uint8(%s))\n" + CodegenDeserialize[name] = \ + "\tif len(data) >= 1 {\n" \ + "\t\t%s, data = " gotype "(data[0]), data[1:]\n" \ + "\t} else {\n" \ + "\t\treturn nil, false\n" \ + "\t}\n" + + CodegenGoType[name] = gotype + for (i in cg) + delete cg[i] +} + +function codegen_marshal(type, f, marshal) { + if (CodegenAppendJSON[type]) + return sprintf(CodegenAppendJSON[type], f) + + # Complex types are json.Marshalers, there's no need to json.Marshal(&f). + if (CodegenIsMarshaler[type]) + marshal = f ".MarshalJSON()" + else + marshal = "json.Marshal(" f ")" + + return \ + "\tif j, err := " marshal "; err != nil {\n" \ + "\t\treturn nil, err\n" \ + "\t} else {\n" \ + "\t\tb = append(b, j...)\n" \ + "\t}\n" +} + +function codegen_struct_field_marshal(d, cg, camel, f, marshal) { + camel = snaketocamel(d["name"]) + f = "s." camel + if (!d["isarray"]) { + append(cg, "marshal", + "\tb = append(b, `,\"" decapitalize(camel) "\":`...)\n" \ + codegen_marshal(d["type"], f)) + return + } + + # Note that we do not produce `null` for nil slices, unlike encoding/json. + # And arrays never get deserialized as such. + if (d["type"] == "u8") { + append(cg, "marshal", + "\tb = append(b, `,\"" decapitalize(camel) "\":\"`...)\n" \ + "\tb = append(b, base64.StdEncoding.EncodeToString(" f ")...)\n" \ + "\tb = append(b, '\"')\n") + return + } + + append(cg, "marshal", + "\tb = append(b, `,\"" decapitalize(camel) "\":[`...)\n" \ + "\tfor i := 0; i < len(" f "); i++ {\n" \ + "\t\tif i > 0 {\n" \ + "\t\t\tb = append(b, ',')\n" \ + "\t\t}\n" \ + indent(codegen_marshal(d["type"], f "[i]")) \ + "\t}\n" \ + "\tb = append(b, ']')\n") +} + +function codegen_struct_field(d, cg, camel, f, serialize, deserialize) { + codegen_struct_field_marshal(d, cg) + + camel = snaketocamel(d["name"]) + f = "s." camel + serialize = CodegenSerialize[d["type"]] + deserialize = CodegenDeserialize[d["type"]] + if (!d["isarray"]) { + append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \ + " `json:\"" decapitalize(camel) "\"`\n") + append(cg, "serialize", sprintf(serialize, f)) + append(cg, "deserialize", sprintf(deserialize, f)) + return + } + + append(cg, "fields", "\t" camel " []" CodegenGoType[d["type"]] \ + " `json:\"" decapitalize(camel) "\"`\n") + + # XXX: This should also check if it isn't out-of-range for any reason. + append(cg, "serialize", + sprintf(CodegenSerialize["u32"], "uint32(len(" f "))")) + if (d["type"] == "u8") { + append(cg, "serialize", + "\tdata = append(data, " f "...)\n") + } else { + append(cg, "serialize", + "\tfor i := 0; i < len(" f "); i++ {\n" \ + indent(sprintf(serialize, f "[i]")) \ + "\t}\n") + } + + append(cg, "deserialize", + "\t{\n" \ + "\t\tvar length uint32\n" \ + indent(sprintf(CodegenDeserialize["u32"], "length"))) + if (d["type"] == "u8") { + append(cg, "deserialize", + "\t\tif uint64(len(data)) < uint64(length) {\n" \ + "\t\t\treturn nil, false\n" \ + "\t\t}\n" \ + "\t\t" f ", data = data[:length], data[length:]\n" \ + "\t}\n") + } else { + append(cg, "deserialize", + "\t\t" f " = make([]" CodegenGoType[d["type"]] ", length)\n" \ + "\t}\n" \ + "\tfor i := 0; i < len(" f "); i++ {\n" \ + indent(sprintf(deserialize, f "[i]")) \ + "\t}\n") + } +} + +function codegen_struct_tag(d, cg, camel, f) { + codegen_struct_field_marshal(d, cg) + + camel = snaketocamel(d["name"]) + f = "s." camel + append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \ + " `json:\"" decapitalize(camel) "\"`\n") + append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f)) + # Do not deserialize here, that is already done by the containing union. +} + +function codegen_struct(name, cg, gotype) { + gotype = PrefixCamel name + print "type " gotype " struct {\n" cg["fields"] "}\n" + + if (cg["marshal"]) { + CodegenIsMarshaler[name] = 1 + print "func (s *" gotype ") MarshalJSON() ([]byte, error) {" + print "\tb := []byte{}" + print cg["marshal"] "\tb[0] = '{'" + print "\treturn append(b, '}'), nil" + print "}" + print "" + } + + if (cg["serialize"]) { + print "func (s *" gotype ") AppendTo(data []byte) ([]byte, bool) {" + print "\tok := true" + print cg["serialize"] "\treturn data, ok" + print "}" + print "" + + CodegenSerialize[name] = \ + "\tif data, ok = %s.AppendTo(data); !ok {\n" \ + "\t\treturn nil, ok\n" \ + "\t}\n" + } + if (cg["deserialize"]) { + print "func (s *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {" + print "\tok := true" + print cg["deserialize"] "\treturn data, ok" + print "}" + print "" + + CodegenDeserialize[name] = \ + "\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \ + "\t\treturn nil, ok\n" \ + "\t}\n" + } + + CodegenGoType[name] = gotype + for (i in cg) + delete cg[i] +} + +function codegen_union_tag(d, cg) { + cg["tagtype"] = d["type"] + cg["tagname"] = d["name"] + # The tag is implied from the type of struct stored in the interface. +} + +function codegen_union_struct(name, casename, cg, scg, structname, init) { + # And thus not all generated structs are present in Types. + structname = name snaketocamel(casename) + codegen_struct(structname, scg) + + init = CodegenGoType[structname] "{" snaketocamel(cg["tagname"]) \ + ": " decapitalize(snaketocamel(cg["tagname"])) "}" + append(cg, "unmarshal", + "\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \ + "\t\ts := " init "\n" \ + "\t\terr = json.Unmarshal(data, &s)\n" \ + "\t\tu.Interface = &s\n") + append(cg, "serialize", + "\tcase *" CodegenGoType[structname] ":\n" \ + indent(sprintf(CodegenSerialize[structname], "union"))) + append(cg, "deserialize", + "\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \ + "\t\ts := " init "\n" \ + indent(sprintf(CodegenDeserialize[structname], "s")) \ + "\t\tu.Interface = &s\n") +} + +function codegen_union(name, cg, gotype, tagfield, tagvar) { + gotype = PrefixCamel name + print "type " gotype " struct {" + print "\tInterface any" + print "}" + print "" + + # This cannot be a pointer method, it wouldn't work recursively. + CodegenIsMarshaler[name] = 1 + print "func (u " gotype ") MarshalJSON() ([]byte, error) {" + print "\treturn u.Interface.(json.Marshaler).MarshalJSON()" + print "}" + print "" + + tagfield = snaketocamel(cg["tagname"]) + tagvar = decapitalize(tagfield) + print "func (u *" gotype ") UnmarshalJSON(data []byte) (err error) {" + print "\tvar t struct {" + print "\t\t" tagfield " " CodegenGoType[cg["tagtype"]] \ + " `json:\"" tagvar "\"`" + print "\t}" + print "\tif err := json.Unmarshal(data, &t); err != nil {" + print "\t\treturn err" + print "\t}" + print "" + print "\tswitch " tagvar " := t." tagfield "; " tagvar " {" + print cg["unmarshal"] "\tdefault:" + print "\t\terr = errors.New(`unsupported value: ` + " tagvar ".String())" + print "\t}" + print "\treturn err" + print "}" + print "" + + # XXX: Consider changing the interface into an AppendTo/ConsumeFrom one, + # that would eliminate these type case switches entirely. + # On the other hand, it would make it possible to send unsuitable structs. + print "func (u *" gotype ") AppendTo(data []byte) ([]byte, bool) {" + print "\tok := true" + print "\tswitch union := u.Interface.(type) {" + print cg["serialize"] "\tdefault:" + print "\t\treturn nil, false" + print "\t}" + print "\treturn data, ok" + print "}" + print "" + + CodegenSerialize[name] = \ + "\tif data, ok = %s.AppendTo(data); !ok {\n" \ + "\t\treturn nil, ok\n" \ + "\t}\n" + + print "func (u *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {" + print "\tok := true" + print "\tvar " tagvar " " CodegenGoType[cg["tagtype"]] + print sprintf(CodegenDeserialize[cg["tagtype"]], tagvar) + print "\tswitch " tagvar " {" + print cg["deserialize"] "\tdefault:" + print "\t\treturn nil, false" + print "\t}" + print "\treturn data, ok" + print "}" + print "" + + CodegenDeserialize[name] = \ + "\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \ + "\t\treturn nil, ok\n" \ + "\t}\n" + + CodegenGoType[name] = gotype + for (i in cg) + delete cg[i] +} diff --git a/tools/lxdrgen-mjs.awk b/tools/lxdrgen-mjs.awk new file mode 100644 index 0000000..a9a81f7 --- /dev/null +++ b/tools/lxdrgen-mjs.awk @@ -0,0 +1,226 @@ +# lxdrgen-mjs.awk: Javascript backend for lxdrgen.awk. +# +# Copyright (c) 2022, Přemysl Eric Janouch +# SPDX-License-Identifier: 0BSD +# +# This backend is currently for decoding the binary format only. +# (JSON is way too expensive to process and transfer.) +# +# Import the resulting script as a Javascript module. +# Identifiers intentionally aren't prefixed. + +function define_internal(name) { + Types[name] = "internal" +} + +function define_sint(size, shortname) { + shortname = "i" size + define_internal(shortname) + CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n" + + print "" + print "\t" shortname "() {" + if (size == "64") { + # XXX: 2^53 - 1 must be enough for anyone. BigInts are a PITA. + print "\t\tconst " shortname \ + " = Number(this.getBigInt" size "(this.offset))" + } else { + print "\t\tconst " shortname " = this.getInt" size "(this.offset)" + } + print "\t\tthis.offset += " (size / 8) + print "\t\treturn " shortname + print "\t}" +} + +function define_uint(size, shortname) { + shortname = "u" size + define_internal(shortname) + CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n" + + print "" + print "\t" shortname "() {" + if (size == "64") { + # XXX: 2^53 - 1 must be enough for anyone. BigInts are a PITA. + print "\t\tconst " shortname \ + " = Number(this.getBigUint" size "(this.offset))" + } else { + print "\t\tconst " shortname " = this.getUint" size "(this.offset)" + } + print "\t\tthis.offset += " (size / 8) + print "\t\treturn " shortname + print "\t}" +} + +function codegen_begin() { + print "// Code generated from " FILENAME ". DO NOT EDIT." + print "" + print "export class Reader extends DataView {" + print "\tconstructor() {" + print "\t\tsuper(...arguments)" + print "\t\tthis.offset = 0" + print "\t\tthis.decoder = new TextDecoder('utf-8', {fatal: true})" + print "\t}" + print "" + print "\tget empty() {" + print "\t\treturn this.byteLength <= this.offset" + print "\t}" + print "" + print "\trequire(len) {" + print "\t\tif (this.byteLength - this.offset < len)" + print "\t\t\tthrow `Premature end of data`" + print "\t\treturn this.byteOffset + this.offset" + print "\t}" + + define_internal("string") + CodegenDeserialize["string"] = "\t%s = r.string()\n" + + print "" + print "\tstring() {" + print "\t\tconst len = this.getUint32(this.offset)" + print "\t\tthis.offset += 4" + print "\t\tconst array = new Uint8Array(" + print "\t\t\tthis.buffer, this.require(len), len)" + print "\t\tthis.offset += len" + print "\t\treturn this.decoder.decode(array)" + print "\t}" + + define_internal("bool") + CodegenDeserialize["bool"] = "\t%s = r.bool()\n" + + print "" + print "\tbool() {" + print "\t\tconst u8 = this.getUint8(this.offset)" + print "\t\tthis.offset += 1" + print "\t\treturn u8 != 0" + print "\t}" + + define_sint("8") + define_sint("16") + define_sint("32") + define_sint("64") + define_uint("8") + define_uint("16") + define_uint("32") + define_uint("64") + + print "}" +} + +function codegen_constant(name, value) { + print "" + print "export const " decapitalize(snaketocamel(name)) " = " value +} + +function codegen_enum_value(name, subname, value, cg) { + append(cg, "fields", "\t" snaketocamel(subname) ": " value ",\n") +} + +function codegen_enum(name, cg) { + print "" + print "export const " name " = Object.freeze({" + print cg["fields"] "})" + + CodegenDeserialize[name] = "\t%s = r.i8()\n" + for (i in cg) + delete cg[i] +} + +function codegen_struct_field(d, cg, camel, f, deserialize) { + camel = decapitalize(snaketocamel(d["name"])) + f = "s." camel + append(cg, "fields", "\t" camel "\n") + + deserialize = CodegenDeserialize[d["type"]] + if (!d["isarray"]) { + append(cg, "deserialize", sprintf(deserialize, f)) + return + } + + append(cg, "deserialize", + "\t{\n" \ + indent(sprintf(CodegenDeserialize["u32"], "const len"))) + if (d["type"] == "u8") { + append(cg, "deserialize", + "\t\t" f " = new Uint8Array(\n" \ + "\t\t\tr.buffer, r.require(len), len)\n" \ + "\t\tr.offset += len\n" \ + "\t}\n") + return + } + if (d["type"] == "i8") { + append(cg, "deserialize", + "\t\t" f " = new Int8Array(\n" \ + "\t\t\tr.buffer, r.require(len), len)\n" \ + "\t\tr.offset += len\n" \ + "\t}\n") + return + } + + append(cg, "deserialize", + "\t\t" f " = new Array(len)\n" \ + "\t}\n" \ + "\tfor (let i = 0; i < " f ".length; i++)\n" \ + indent(sprintf(deserialize, f "[i]"))) +} + +function codegen_struct_tag(d, cg) { + append(cg, "fields", "\t" decapitalize(snaketocamel(d["name"])) "\n") + # Do not deserialize here, that is already done by the containing union. +} + +function codegen_struct(name, cg) { + print "" + print "export class " name " {" + print cg["fields"] cg["methods"] + print "\tstatic deserialize(r) {" + print "\t\tconst s = new " name "()" + print indent(cg["deserialize"]) "\t\treturn s" + print "\t}" + print "}" + + CodegenDeserialize[name] = "\t%s = " name ".deserialize(r)\n" + for (i in cg) + delete cg[i] +} + +function codegen_union_tag(d, cg) { + cg["tagtype"] = d["type"] + cg["tagname"] = d["name"] +} + +function codegen_union_struct(name, casename, cg, scg, structname) { + append(scg, "methods", + "\n" \ + "\tconstructor() {\n" \ + "\t\tthis." decapitalize(snaketocamel(cg["tagname"])) \ + " = " cg["tagtype"] "." snaketocamel(casename) "\n" \ + "\t}\n") + + # And thus not all generated structs are present in Types. + structname = name snaketocamel(casename) + codegen_struct(structname, scg) + + append(cg, "deserialize", + "\tcase " cg["tagtype"] "." snaketocamel(casename) ":\n" \ + "\t{\n" \ + indent(sprintf(CodegenDeserialize[structname], "const s")) \ + "\t\treturn s\n" \ + "\t}\n") +} + +function codegen_union(name, cg, tagvar) { + tagvar = decapitalize(snaketocamel(cg["tagname"])) + + print "" + print "export function deserialize" name "(r) {" + print sprintf(CodegenDeserialize[cg["tagtype"]], "const " tagvar) \ + "\tswitch (" tagvar ") {" + print cg["deserialize"] "\tdefault:" + print "\t\tthrow `Unknown " cg["tagtype"] " (${tagvar})`" + print "\t}" + print "}" + + CodegenDeserialize[name] = "\t%s = deserialize" name "(r)\n" + for (i in cg) + delete cg[i] +} diff --git a/tools/lxdrgen.awk b/tools/lxdrgen.awk new file mode 100644 index 0000000..2b4adb6 --- /dev/null +++ b/tools/lxdrgen.awk @@ -0,0 +1,309 @@ +# lxdrgen.awk: an XDR-derived code generator for network protocols. +# +# Copyright (c) 2022, Přemysl Eric Janouch +# SPDX-License-Identifier: 0BSD +# +# You may read RFC 4506 for context, however it is only a source of inspiration. +# Grammar is easy to deduce from the parser. +# +# Native types: bool, u{8,16,32,64}, i{8,16,32,64}, string +# +# Don't define any new types, unless you hate yourself, then it's okay to do so. +# Backends tend to be a pain in the arse, for different reasons. +# +# All numbers are encoded in big-endian byte order. +# Booleans are one byte each. +# Strings must be valid UTF-8, use u8<> to lift that restriction. +# String and array lengths are encoded as u32. +# Enumeration values automatically start at 1, and are encoded as i8. +# Any struct or union field may be a variable-length array. +# +# Message framing is done externally, but is advised to also prefix u32 lengths, +# unless this role is already filled by, e.g., WebSocket. +# +# Usage: env LC_ALL=C awk -f lxdrgen.awk -f lxdrgen-{c,go,mjs}.awk \ +# -v PrefixCamel=Foo foo.lxdr > foo.{c,go,mjs} | {clang-format,gofmt,...} + +# --- Utilities ---------------------------------------------------------------- + +function cameltosnake(s) { + while (match(s, /[[:lower:]][[:upper:]]/)) { + s = substr(s, 1, RSTART) "_" \ + tolower(substr(s, RSTART + 1, RLENGTH - 1)) \ + substr(s, RSTART + RLENGTH) + } + return tolower(s) +} + +function snaketocamel(s) { + s = toupper(substr(s, 1, 1)) tolower(substr(s, 2)) + while (match(s, /_[[:alnum:]]/)) { + s = substr(s, 1, RSTART - 1) \ + toupper(substr(s, RSTART + 1, RLENGTH - 1)) \ + substr(s, RSTART + RLENGTH) + } + return s +} + +function decapitalize(s) { + if (match(s, /[[:upper:]][[:lower:]]/)) { + return tolower(substr(s, 1, 1)) substr(s, 2) + } + return s +} + +function indent(s) { + if (!s) + return s + + gsub(/\n/, "\n\t", s) + sub(/\t*$/, "", s) + return "\t" s +} + +function append(a, key, value) { + a[key] = a[key] value +} + +# --- Parsing ------------------------------------------------------------------ + +function fatal(message) { + print "// " FILENAME ":" FNR ": fatal error: " message + print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr" + exit 1 +} + +function skipcomment() { + do { + if (match($0, /[*]\//)) { + $0 = substr($0, RSTART + RLENGTH) + return + } + } while (getline > 0) + fatal("unterminated block comment") +} + +function nexttoken() { + do { + if (match($0, /^[[:space:]]+/)) { + $0 = substr($0, RLENGTH + 1) + } else if (match($0, /^\/\/.*/)) { + $0 = "" + } else if (match($0, /^\/[*]/)) { + $0 = substr($0, RLENGTH + 1) + skipcomment() + } else if (match($0, /^[[:alpha:]][[:alnum:]_]*/)) { + Token = substr($0, 1, RLENGTH) + $0 = substr($0, RLENGTH + 1) + return Token + # AWK implementations rarely support non-decimal notations + # in their implicit string-to-number conversions. + } else if (match($0, /^(0|-?[1-9][0-9]*)/)) { + Token = substr($0, 1, RLENGTH) + $0 = substr($0, RLENGTH + 1) + return Token + } else if ($0) { + Token = substr($0, 1, 1) + $0 = substr($0, 2) + return Token + } + } while ($0 || getline > 0) + Token = "" + return Token +} + +function expect(v) { + if (!v) + fatal("broken expectations at `" Token "' before `" $0 "'") + return v +} + +function accept(what) { + if (Token != what) + return 0 + nexttoken() + return 1 +} + +function identifier( v) { + if (Token !~ /^[[:alpha:]]/) + return 0 + v = Token + nexttoken() + return v +} + +function number( v) { + if (Token !~ /^(0|-?[1-9])/) + return 0 + v = Token + nexttoken() + return v +} + +function readnumber( ident) { + ident = identifier() + if (!ident) + return expect(number()) + if (!(ident in Consts)) + fatal("unknown constant: " ident) + return Consts[ident] +} + +function defconst( ident, num) { + if (!accept("const")) + return 0 + + ident = expect(identifier()) + expect(accept("=")) + num = readnumber() + if (ident in Consts) + fatal("constant redefined: " ident) + + Consts[ident] = num + codegen_constant(ident, num) + return 1 +} + +function readtype( ident) { + ident = deftype() + if (ident) + return ident + + ident = identifier() + if (!ident) + return 0 + + if (!(ident in Types)) + fatal("unknown type: " ident) + return ident +} + +function defenum( name, ident, value, cg) { + delete cg[0] + + name = expect(identifier()) + expect(accept("{")) + while (!accept("}")) { + ident = expect(identifier()) + value = value + 1 + if (accept("=")) + value = readnumber() + 0 + if (!value) + fatal("enumeration values cannot be zero") + if (value < -128 || value > 127) + fatal("enumeration value out of range") + expect(accept(",")) + append(EnumValues, name, SUBSEP ident) + if (EnumValues[name, ident]++) + fatal("duplicate enum value: " ident) + codegen_enum_value(name, ident, value, cg) + } + + Types[name] = "enum" + codegen_enum(name, cg) + return name +} + +function readfield(out, nonvoid) { + nonvoid = !accept("void") + if (nonvoid) { + out["type"] = expect(readtype()) + out["name"] = expect(identifier()) + # TODO: Consider supporting XDR's VLA length limits here. + # TODO: Consider supporting XDR's fixed-length syntax for string limits. + out["isarray"] = accept("<") && expect(accept(">")) + } + expect(accept(";")) + return nonvoid +} + +function defstruct( name, d, cg) { + delete d[0] + delete cg[0] + + name = expect(identifier()) + expect(accept("{")) + while (!accept("}")) { + if (readfield(d)) + codegen_struct_field(d, cg) + } + + Types[name] = "struct" + codegen_struct(name, cg) + return name +} + +function defunion( name, tag, tagtype, tagvalue, cg, scg, d, a, i, unseen) { + delete cg[0] + delete scg[0] + delete d[0] + + name = expect(identifier()) + expect(accept("switch")) + expect(accept("(")) + tag["type"] = tagtype = expect(readtype()) + tag["name"] = expect(identifier()) + expect(accept(")")) + + if (Types[tagtype] != "enum") + fatal("not an enum type: " tagtype) + codegen_union_tag(tag, cg) + + split(EnumValues[tagtype], a, SUBSEP) + for (i in a) + unseen[a[i]]++ + + expect(accept("{")) + while (!accept("}")) { + if (accept("case")) { + if (tagvalue) + codegen_union_struct(name, tagvalue, cg, scg) + + tagvalue = expect(identifier()) + expect(accept(":")) + if (!unseen[tagvalue]--) + fatal("no such value or duplicate case: " tagtype "." tagvalue) + codegen_struct_tag(tag, scg) + } else if (tagvalue) { + if (readfield(d)) + codegen_struct_field(d, scg) + } else { + fatal("union fields must fall under a case") + } + } + if (tagvalue) + codegen_union_struct(name, tagvalue, cg, scg) + + # What remains non-zero in unseen[2..] is simply not recognized/allowed. + Types[name] = "union" + codegen_union(name, cg) + return name +} + +function deftype() { + if (accept("enum")) + return defenum() + if (accept("struct")) + return defstruct() + if (accept("union")) + return defunion() + return 0 +} + +{ + if (PrefixCamel) { + PrefixLower = tolower(cameltosnake(PrefixCamel)) "_" + PrefixUpper = toupper(cameltosnake(PrefixCamel)) "_" + } + + # This is not in a BEGIN clause (even though it consumes all input), + # so that the code generator can insert the first FILENAME. + codegen_begin() + + nexttoken() + while (Token != "") { + expect(defconst() || deftype()) + expect(accept(";")) + } +}