Document the recently added scripts

Import protocol code generator from xK, add tests
Also add a VIM syntax highlighting file. This also fixes some previously untriggered bugs.
2022-09-30 03:09:04 +02:00 · 2022-09-30 03:06:36 +02:00
10 changed files with 1745 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -53,6 +53,58 @@ foreach (name ${tests})
 	add_test (NAME test-${name} COMMAND test-${name})
 endforeach ()

+# --- Tools --------------------------------------------------------------------
+
+# Test the AsciiDoc manual page generator for a successful parse
+set (ASCIIMAN ${PROJECT_SOURCE_DIR}/tools/asciiman.awk)
+add_custom_command (OUTPUT libertyxdr.7
+	COMMAND env LC_ALL=C awk -f ${ASCIIMAN}
+		"${PROJECT_SOURCE_DIR}/libertyxdr.adoc" > libertyxdr.7
+	DEPENDS libertyxdr.adoc ${ASCIIMAN}
+	COMMENT "Generating man page for libertyxdr" VERBATIM)
+add_custom_target (docs ALL DEPENDS libertyxdr.7)
+
+# Test CMake script parsing
 add_test (test-cmake-parser
 	env LC_ALL=C awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-parser.awk
 	-f ${PROJECT_SOURCE_DIR}/tools/cmake-dump.awk ${CMAKE_CURRENT_LIST_FILE})
+
+# Test protocol code generation
+set (lxdrgen_outputs)
+set (lxdrgen_base "${PROJECT_BINARY_DIR}/lxdrgen.lxdr")
+foreach (backend c go mjs)
+	list (APPEND lxdrgen_outputs ${lxdrgen_base}.${backend})
+	add_custom_command (OUTPUT ${lxdrgen_base}.${backend}
+		COMMAND env LC_ALL=C awk
+			-f ${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk
+			-f ${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk
+			-v PrefixCamel=ProtoGen
+			${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr
+			> ${lxdrgen_base}.${backend}
+		DEPENDS
+			${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk
+			${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk
+			${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr
+		COMMENT "Generating test protocol code (${backend})" VERBATIM)
+endforeach ()
+add_custom_target (test-lxdrgen-outputs ALL DEPENDS ${lxdrgen_outputs})
+
+set_source_files_properties (${lxdrgen_base}.c
+	PROPERTIES HEADER_FILE_ONLY TRUE)
+add_executable (test-lxdrgen tests/lxdrgen.c ${lxdrgen_base}.c)
+target_include_directories (test-lxdrgen PUBLIC ${PROJECT_BINARY_DIR})
+add_test (NAME test-lxdrgen-c COMMAND test-lxdrgen)
+
+find_program (GO_EXECUTABLE go)
+if (GO_EXECUTABLE)
+	add_test (test-lxdrgen-go ${GO_EXECUTABLE} vet ${lxdrgen_base}.go)
+else ()
+	message (WARNING "Cannot test generated protocol code for Go")
+endif ()
+
+find_program (NODE_EXECUTABLE node)
+if (NODE_EXECUTABLE)
+	add_test (test-lxdrgen-mjs ${NODE_EXECUTABLE} -c ${lxdrgen_base}.mjs)
+else ()
+	message (WARNING "Cannot test generated protocol code for Javascript")
+endif ()
--- a/README.adoc
+++ b/README.adoc
@@ -17,6 +17,42 @@ All development is done on Linux, but other POSIX-compatible operating systems
 should be supported as well.  They have an extremely low priority, however, and
 I'm not testing them at all, with the exception of OpenBSD.

+Tools
+-----
+This project also hosts a number of supporting scripts written in portable AWK:
+
+asciiman.awk::
+	A fallback manual page generator for AsciiDoc documents,
+	motivated by the hugeness of AsciiDoc's and Asciidoctor's dependency trees.
+	It uses the _man_ macro package.
+
+cmake-parser.awk::
+	Parses the CMake language to the extent that is necessary to reliably
+	extract project versions.  Its greatest limitation is its inability
+	to expand variables, which would require a full interpreter.
+
+cmake-dump.awk::
+	This can be used in conjunction with the previous script to dump CMake
+	scripts in a normalized format for further processing.
+
+lxdrgen.awk::
+	Protocol code generator for a variant of XDR,
+	which is link:libertyxdr.adoc[documented separately].
+	Successfully employed in https://git.janouch.name/p/xK[xK].
+
+lxdrgen-c.awk::
+	LibertyXDR backend that builds on top of the C pseudolibrary.
+
+lxdrgen-go.awk::
+	LibertyXDR backend for Go, supporting _encoding/json_ interfaces.  It also
+	produces optimized JSON marshallers (however, note that the _json.Marshaler_
+	interface is bound to be underperforming, due to the amount of otherwise
+	avoidable memory allocations it necessitates).
+
+lxdrgen-mjs.awk::
+	LibertyXDR backend for Javascript, currently for decoding only.
+	It cuts a corner by not using BigInts, on par with `JSON.parse()`.
+
 Contributing and Support
 ------------------------
 Use https://git.janouch.name/p/liberty to report any bugs, request features,
--- a/libertyxdr.adoc
+++ b/libertyxdr.adoc
@@ -0,0 +1,108 @@
+libertyxdr(7)
+=============
+:doctype: manpage
+
+Name
+----
+LibertyXDR - an XDR-derived IDL and data serialization format
+
+Description
+-----------
+*LibertyXDR* is an interface description language, as well as a data
+serialization format, that has been largely derived from XDR, though notably
+simplified.
+
+Conventions
+~~~~~~~~~~~
+User-defined types should be named in *CamelCase*, field names in *snake_case*,
+and constants in *SCREAMING_SNAKE_CASE*.  Code generators will convert these to
+whatever is appropriate in their target language.
+
+Primitive data types
+~~~~~~~~~~~~~~~~~~~~
+Like in XDR, all data is serialized in the network byte order, i.e., big-endian.
+
+ * *void*: 0 bytes
+
+This is a dummy type that cannot be assigned a field name.
+
+ * *bool*: 1 byte 
+
+This is a boolean value: 0 means _false_, any other value means _true_.
+
+ * *u8*, *u16*, *u32*, *u64*: 1, 2, 4, and 8 bytes respectively
+
+These are unsigned integers.
+
+ * *i8*, *i16*, *i32*, *i64*: 1, 2, 4, and 8 bytes respectively
+
+These are signed integers in two's complement.
+
+ * *string*: implicitly prefixed by its length as a *u32*,
+   then immediately followed by its contents, with no trailing NUL byte
+
+This is a valid UTF-8 string without a byte order mark.  Note that strings are
+always unbounded, unlike in XDR.
+
+Constants
+~~~~~~~~~
+At the top level of a document, outside other definitions, you can define
+typeless integer constants:
+
+ const VERSION = 1;
+
+The value can be either a name of another previously defined constant,
+or an immediate decimal value, which may not contain leading zeros.
+
+Enumerations
+~~~~~~~~~~~~
+An *enum* is an *i8* with uniquely named values, in their own namespace.
+
+Values can be either specified explicitly, in the same way as with a constant,
+or they can be left implicit, in which case names assume a value that is one
+larger than their predecessor.  Zero is reserved for internal use, thus
+enumerations implicitly begin with a value of one.  For example, these form
+a sequence from one to three:
+
+ enum Vehicle { CAR, LORRY = 2, PLANE, };
+
+Structures
+~~~~~~~~~~
+A *struct* is a sequence of fields, specified by their type, and their chosen
+name.  You can add a *<>* suffix to change a field to an array, in which case
+it is implicitly preceded by a *u32* specifying its length in terms of its
+elements.
+
+Unlike in XDR, there is no padding between subsequent fields, and type
+definitions can be arbitrarily syntactically nested, as in C.
+
+ struct StockReport {
+   u8 version;       // Version of this report.
+   struct Item {
+     Vehicle kind;   // The vehicle in question.
+     i32 count;      // How many vehicle of that kind there are.
+   } items<>;        // Reported items.
+ };
+
+Unions
+~~~~~~
+A *union* is a kind of structure whose fields depend on the value of its first
+and always-present field, which must be a tag *enum*:
+
+ union VehicleDetails switch (Vehicle kind) {
+ case CAR:   void;
+ case LORRY: i8 axles;
+ case PLANE: i8 engines;
+ };
+
+All possible enumeration values must be named, and there is no *case*
+fall-through.
+
+Framing
+-------
+Unless this role is already filled by, e.g., WebSocket, _LibertyXDR_ structures
+should be prefixed by their byte length in the *u32* format, once serialized.
+
+See also
+--------
+_XDR: External Data Representation Standard_, RFC 4506
--- a/libertyxdr.vim
+++ b/libertyxdr.vim
@@ -0,0 +1,21 @@
+" filetype.vim: au! BufNewFile,BufRead *.lxdr setf libertyxdr
+if exists("b:current_syntax")
+	finish
+endif
+
+syn match libertyxdrError "[^[:space:]:;,(){}<>=]\+"
+syn region libertyxdrBlockComment start=+/[*]+ end=+[*]/+
+syn match libertyxdrComment "//.*"
+syn match libertyxdrIdentifier "\<[[:alpha:]][[:alnum:]_]*\>"
+syn match libertyxdrNumber "\<0\>\|\(-\|\<\)[1-9][[:digit:]]*\>"
+syn keyword libertyxdrKeyword const enum struct union switch case
+syn keyword libertyxdrType bool u8 u16 u32 u64 i8 i16 i32 i64 string void
+
+let b:current_syntax = "libertyxdr"
+hi def link libertyxdrError Error
+hi def link libertyxdrBlockComment Comment
+hi def link libertyxdrComment Comment
+hi def link libertyxdrIdentifier Identifier
+hi def link libertyxdrNumber Number
+hi def link libertyxdrKeyword Statement
+hi def link libertyxdrType Type
--- a/tests/lxdrgen.c
+++ b/tests/lxdrgen.c
@@ -0,0 +1,123 @@
+/*
+ * tests/lxdrgen.c
+ *
+ * Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#define PROGRAM_NAME "test"
+#define PROGRAM_VERSION "0"
+
+#include "../liberty.c"
+#include "lxdrgen.lxdr.c"
+
+static void
+test_ser_deser_free (void)
+{
+	hard_assert (PROTO_GEN_VERSION == 1);
+
+	enum { CASES = 3 };
+
+	struct proto_gen_struct a = {}, b = {};
+	a.u = xcalloc ((a.u_len = CASES + rand () % 100), sizeof *a.u);
+	for (size_t i = 0; i < a.u_len; i++)
+	{
+		union proto_gen_union *u = a.u + i;
+		switch (i % CASES)
+		{
+		case 0:
+			u->tag = PROTO_GEN_ENUM_NUMBERS;
+			u->numbers.a = rand () % UINT8_MAX;
+			u->numbers.b = rand () % UINT16_MAX;
+			u->numbers.c = rand () % UINT32_MAX;
+			u->numbers.d = rand () % UINT64_MAX;
+			u->numbers.e = rand () % UINT8_MAX;
+			u->numbers.f = rand () % UINT16_MAX;
+			u->numbers.g = rand () % UINT32_MAX;
+			u->numbers.h = rand () % UINT64_MAX;
+			break;
+		case 1:
+			u->tag = PROTO_GEN_ENUM_OTHERS;
+			u->others.foo = rand () % 2;
+			u->others.bar = str_make ();
+			for (int i = rand () % 0x30; i > 0; i--)
+				str_append_c (&u->others.bar, 0x30 + i);
+			break;
+		case 2:
+			u->tag = PROTO_GEN_ENUM_NOTHING;
+			break;
+		default:
+			hard_assert (!"unhandled case");
+		}
+	}
+
+	struct str buf = str_make ();
+	hard_assert (proto_gen_struct_serialize (&a, &buf));
+	struct msg_unpacker r = msg_unpacker_make (buf.str, buf.len);
+	hard_assert (proto_gen_struct_deserialize (&b, &r));
+	hard_assert (!msg_unpacker_get_available (&r));
+	str_free (&buf);
+
+	hard_assert (a.u_len == b.u_len);
+	for (size_t i = 0; i < a.u_len; i++)
+	{
+		union proto_gen_union *ua = a.u + i;
+		union proto_gen_union *ub = b.u + i;
+		hard_assert (ua->tag == ub->tag);
+		switch (ua->tag)
+		{
+		case PROTO_GEN_ENUM_NUMBERS:
+			hard_assert (ua->numbers.a == ub->numbers.a);
+			hard_assert (ua->numbers.b == ub->numbers.b);
+			hard_assert (ua->numbers.c == ub->numbers.c);
+			hard_assert (ua->numbers.d == ub->numbers.d);
+			hard_assert (ua->numbers.e == ub->numbers.e);
+			hard_assert (ua->numbers.f == ub->numbers.f);
+			hard_assert (ua->numbers.g == ub->numbers.g);
+			hard_assert (ua->numbers.h == ub->numbers.h);
+			break;
+		case PROTO_GEN_ENUM_OTHERS:
+			hard_assert (ua->others.foo == ub->others.foo);
+			hard_assert (ua->others.bar.len == ub->others.bar.len);
+			hard_assert (!memcmp (ua->others.bar.str, ub->others.bar.str,
+				ua->others.bar.len));
+			break;
+		case PROTO_GEN_ENUM_NOTHING:
+			break;
+		default:
+			hard_assert (!"unexpected case");
+		}
+	}
+
+	// Emulate partially deserialized data to test disposal of that.
+	for (size_t i = b.u_len - CASES; i < b.u_len; i++)
+	{
+		proto_gen_union_free (&b.u[i]);
+		memset (&b.u[i], 0, sizeof b.u[i]);
+	}
+
+	proto_gen_struct_free (&a);
+	proto_gen_struct_free (&b);
+}
+
+int
+main (int argc, char *argv[])
+{
+	struct test test;
+	test_init (&test, argc, argv);
+
+	test_add_simple (&test, "/ser-deser-free", NULL, test_ser_deser_free);
+
+	return test_run (&test);
+}
--- a/tests/lxdrgen.lxdr
+++ b/tests/lxdrgen.lxdr
@@ -0,0 +1,23 @@
+/*
+ * tests/lxdrgen.lxdr: a test protocol for the generator
+ */
+const VERSION = 1;
+const NOISREV = -1;
+
+// TODO: Test failure paths, and in general go for full coverage.
+struct Struct {
+	union Union switch (enum Enum {
+		NUMBERS = VERSION,
+		OTHERS = 2,
+		NOTHING,
+	} tag) {
+	case NUMBERS:
+		i8 a; i16 b; i32 c; i64 d;
+		u8 e; u16 f; u32 g; u64 h;
+	case OTHERS:
+		bool foo;
+		string bar;
+	case NOTHING:
+		void;
+	} u<>;
+};
--- a/tools/lxdrgen-c.awk
+++ b/tools/lxdrgen-c.awk
@@ -0,0 +1,324 @@
+# lxdrgen-c.awk: C backend for lxdrgen.awk.
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# Neither *_new() nor *_destroy() functions are provided, because they'd only
+# be useful for top-levels, and are merely extra malloc()/free() calls.
+# Users are expected to reuse buffers.
+#
+# Similarly, no constructors are produced--those are easy to write manually.
+#
+# All arrays are deserialized zero-terminated, so u8<> and i8<> can be directly
+# used as C strings.
+#
+# All types must be able to dispose partially zero values going from the back,
+# i.e., in the reverse order of deserialization.
+
+function define_internal(name, ctype) {
+	Types[name] = "internal"
+	CodegenCType[name] = ctype
+}
+
+function define_int(shortname, ctype) {
+	define_internal(shortname, ctype)
+	CodegenSerialize[shortname] = \
+		"\tstr_pack_" shortname "(w, %s);\n"
+	CodegenDeserialize[shortname] = \
+		"\tif (!msg_unpacker_" shortname "(r, &%s))\n" \
+		"\t\treturn false;\n"
+}
+
+function define_sint(size) { define_int("i" size, "int" size "_t") }
+function define_uint(size) { define_int("u" size, "uint" size "_t") }
+
+function codegen_begin() {
+	define_sint("8")
+	define_sint("16")
+	define_sint("32")
+	define_sint("64")
+	define_uint("8")
+	define_uint("16")
+	define_uint("32")
+	define_uint("64")
+
+	define_internal("string", "struct str")
+	CodegenDispose["string"] = "\tstr_free(&%s);\n"
+	CodegenSerialize["string"] = \
+		"\tif (!proto_string_serialize(&%s, w))\n" \
+		"\t\treturn false;\n"
+	CodegenDeserialize["string"] = \
+		"\tif (!proto_string_deserialize(&%s, r))\n" \
+		"\t\treturn false;\n"
+
+	define_internal("bool", "bool")
+	CodegenSerialize["bool"] = \
+		"\tstr_pack_u8(w, !!%s);\n"
+	CodegenDeserialize["bool"] = \
+		"\t{\n" \
+		"\t\tuint8_t v = 0;\n" \
+		"\t\tif (!msg_unpacker_u8(r, &v))\n" \
+		"\t\t\treturn false;\n" \
+		"\t\t%s = !!v;\n" \
+		"\t}\n"
+
+	print "// Code generated from " FILENAME ". DO NOT EDIT."
+	print "// This file directly depends on liberty.c, but doesn't include it."
+	print ""
+	print "static bool"
+	print "proto_string_serialize(const struct str *s, struct str *w) {"
+	print "\tif (s->len > UINT32_MAX)"
+	print "\t\treturn false;"
+	print "\tstr_pack_u32(w, s->len);"
+	print "\tstr_append_str(w, s);"
+	print "\treturn true;"
+	print "}"
+	print ""
+	print "static bool"
+	print "proto_string_deserialize(struct str *s, struct msg_unpacker *r) {"
+	print "\tuint32_t len = 0;"
+	print "\tif (!msg_unpacker_u32(r, &len))"
+	print "\t\treturn false;"
+	print "\tif (msg_unpacker_get_available(r) < len)"
+	print "\t\treturn false;"
+	print "\t*s = str_make();"
+	print "\tstr_append_data(s, r->data + r->offset, len);"
+	print "\tr->offset += len;"
+	print "\tif (!utf8_validate (s->str, s->len))"
+	print "\t\treturn false;"
+	print "\treturn true;"
+	print "}"
+}
+
+function codegen_constant(name, value) {
+	print ""
+	print "enum { " PrefixUpper name " = " value " };"
+}
+
+function codegen_enum_value(name, subname, value, cg) {
+	append(cg, "fields",
+		"\t" PrefixUpper toupper(cameltosnake(name)) "_" subname \
+		" = " value ",\n")
+}
+
+function codegen_enum(name, cg,    ctype) {
+	ctype = "enum " PrefixLower cameltosnake(name)
+	print ""
+	print ctype " {"
+	print cg["fields"] "};"
+
+	# XXX: This should also check if it isn't out-of-range for any reason,
+	# but our usage of sprintf() stands in the way a bit.
+	CodegenSerialize[name] = "\tstr_pack_i8(w, %s);\n"
+	CodegenDeserialize[name] = \
+		"\t{\n" \
+		"\t\tint8_t v = 0;\n" \
+		"\t\tif (!msg_unpacker_i8(r, &v) || !v)\n" \
+		"\t\t\treturn false;\n" \
+		"\t\t%s = v;\n" \
+		"\t}\n"
+
+	CodegenCType[name] = ctype
+	for (i in cg)
+		delete cg[i]
+}
+
+function codegen_struct_tag(d, cg,    f) {
+	f = "self->" d["name"]
+	append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
+	append(cg, "dispose", sprintf(CodegenDispose[d["type"]], f))
+	append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f))
+	# Do not deserialize here, that would be out of order.
+}
+
+function codegen_struct_field(d, cg,    f, dispose, serialize, deserialize) {
+	f = "self->" d["name"]
+	dispose = CodegenDispose[d["type"]]
+	serialize = CodegenSerialize[d["type"]]
+	deserialize = CodegenDeserialize[d["type"]]
+	if (!d["isarray"]) {
+		append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
+		append(cg, "dispose", sprintf(dispose, f))
+		append(cg, "serialize", sprintf(serialize, f))
+		append(cg, "deserialize", sprintf(deserialize, f))
+		return
+	}
+
+	append(cg, "fields",
+		"\t" CodegenCType["u32"] " " d["name"] "_len;\n" \
+		"\t" CodegenCType[d["type"]] " *" d["name"] ";\n")
+
+	if (dispose)
+		append(cg, "dispose", "\tif (" f ")\n" \
+			"\t\tfor (size_t i = 0; i < " f "_len; i++)\n" \
+			indent(indent(sprintf(dispose, f "[i]"))))
+	append(cg, "dispose", "\tfree(" f ");\n")
+
+	append(cg, "serialize", sprintf(CodegenSerialize["u32"], f "_len"))
+	if (d["type"] == "u8" || d["type"] == "i8") {
+		append(cg, "serialize",
+			"\tstr_append_data(w, " f ", " f "_len);\n")
+	} else if (serialize) {
+		append(cg, "serialize",
+			"\tfor (size_t i = 0; i < " f "_len; i++)\n" \
+			indent(sprintf(serialize, f "[i]")))
+	}
+
+	append(cg, "deserialize", sprintf(CodegenDeserialize["u32"], f "_len") \
+		"\tif (!(" f " = calloc(" f "_len + 1, sizeof *" f ")))\n" \
+		"\t\treturn false;\n")
+	if (d["type"] == "u8" || d["type"] == "i8") {
+		append(cg, "deserialize",
+			"\tif (msg_unpacker_get_available(r) < " f "_len)\n" \
+			"\t\treturn false;\n" \
+			"\tmemcpy(" f ", r->data + r->offset, " f "_len);\n" \
+			"\tr->offset += " f "_len;\n")
+	} else if (deserialize) {
+		append(cg, "deserialize",
+			"\tfor (size_t i = 0; i < " f "_len; i++)\n" \
+			indent(sprintf(deserialize, f "[i]")))
+	}
+}
+
+function codegen_struct(name, cg,    ctype, funcname) {
+	ctype = "struct " PrefixLower cameltosnake(name)
+	print ""
+	print ctype " {"
+	print cg["fields"] "};"
+
+	if (cg["dispose"]) {
+		funcname = PrefixLower cameltosnake(name) "_free"
+		print ""
+		print "static void\n" funcname "(" ctype " *self) {"
+		print cg["dispose"] "}"
+
+		CodegenDispose[name] = "\t" funcname "(&%s);\n"
+	}
+	if (cg["serialize"]) {
+		funcname = PrefixLower cameltosnake(name) "_serialize"
+		print ""
+		print "static bool\n" \
+			  funcname "(\n\t\tconst " ctype " *self, struct str *w) {"
+		print cg["serialize"] "\treturn true;"
+		print "}"
+
+		CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \
+			"\t\treturn false;\n"
+	}
+	if (cg["deserialize"]) {
+		funcname = PrefixLower cameltosnake(name) "_deserialize"
+		print ""
+		print "static bool\n" \
+			  funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {"
+		print cg["deserialize"] "\treturn true;"
+		print "}"
+
+		CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \
+			"\t\treturn false;\n"
+	}
+
+	CodegenCType[name] = ctype
+	for (i in cg)
+		delete cg[i]
+}
+
+function codegen_union_tag(d, cg) {
+	cg["tagtype"] = d["type"]
+	cg["tagname"] = d["name"]
+	append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
+}
+
+function codegen_union_struct( \
+		name, casename, cg, scg,     structname, fieldname, fullcasename) {
+	# Don't generate obviously useless structs.
+	fullcasename = toupper(cameltosnake(cg["tagtype"])) "_" casename
+	if (!scg["dispose"] && !scg["deserialize"]) {
+		append(cg, "structless", "\tcase " PrefixUpper fullcasename ":\n")
+		for (i in scg)
+			delete scg[i]
+		return
+	}
+
+	# And thus not all generated structs are present in Types.
+	structname = name "_" casename
+	fieldname = tolower(casename)
+	codegen_struct(structname, scg)
+
+	append(cg, "fields", "\t" CodegenCType[structname] " " fieldname ";\n")
+	if (CodegenDispose[structname])
+		append(cg, "dispose", "\tcase " PrefixUpper fullcasename ":\n" \
+			indent(sprintf(CodegenDispose[structname], "self->" fieldname)) \
+			"\t\tbreak;\n")
+
+	# With no de/serialization code, this will simply recognize the tag.
+	append(cg, "serialize", "\tcase " PrefixUpper fullcasename ":\n" \
+		indent(sprintf(CodegenSerialize[structname], "self->" fieldname)) \
+		"\t\tbreak;\n")
+	append(cg, "deserialize", "\tcase " PrefixUpper fullcasename ":\n" \
+		indent(sprintf(CodegenDeserialize[structname], "self->" fieldname)) \
+		"\t\tbreak;\n")
+}
+
+function codegen_union(name, cg,    f, ctype, funcname) {
+	ctype = "union " PrefixLower cameltosnake(name)
+	print ""
+	print ctype " {"
+	print cg["fields"] "};"
+
+	f = "self->" cg["tagname"]
+	if (cg["dispose"]) {
+		funcname = PrefixLower cameltosnake(name) "_free"
+		print ""
+		print "static void\n" funcname "(" ctype " *self) {"
+		print "\tswitch (" f ") {"
+		if (cg["structless"])
+			print cg["structless"] \
+				indent(sprintf(CodegenDispose[cg["tagtype"]], f)) "\t\tbreak;"
+		print cg["dispose"] "\tdefault:"
+		print "\t\tbreak;"
+		print "\t}"
+		print "}"
+
+		CodegenDispose[name] = "\t" funcname "(&%s);\n"
+	}
+	if (cg["serialize"]) {
+		funcname = PrefixLower cameltosnake(name) "_serialize"
+		print ""
+		print "static bool\n" \
+			  funcname "(\n\t\tconst " ctype " *self, struct str *w) {"
+		print "\tswitch (" f ") {"
+		if (cg["structless"])
+			print cg["structless"] \
+				indent(sprintf(CodegenSerialize[cg["tagtype"]], f)) "\t\tbreak;"
+		print cg["serialize"] "\tdefault:"
+		print "\t\treturn false;"
+		print "\t}"
+		print "\treturn true;"
+		print "}"
+
+		CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \
+			"\t\treturn false;\n"
+	}
+	if (cg["deserialize"]) {
+		funcname = PrefixLower cameltosnake(name) "_deserialize"
+		print ""
+		print "static bool\n" \
+			  funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {"
+		print sprintf(CodegenDeserialize[cg["tagtype"]], f)
+		print "\tswitch (" f ") {"
+		if (cg["structless"])
+			print cg["structless"] "\t\tbreak;"
+		print cg["deserialize"] "\tdefault:"
+		print "\t\treturn false;"
+		print "\t}"
+		print "\treturn true;"
+		print "}"
+
+		CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \
+			"\t\treturn false;\n"
+	}
+
+	CodegenCType[name] = ctype
+	for (i in cg)
+		delete cg[i]
+}
--- a/tools/lxdrgen-go.awk
+++ b/tools/lxdrgen-go.awk
@@ -0,0 +1,541 @@
+# lxdrgen-go.awk: Go backend for lxdrgen.awk.
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# This backend also enables proxying to other endpoints using JSON.
+
+function define_internal(name, gotype) {
+	Types[name] = "internal"
+	CodegenGoType[name] = gotype
+}
+
+function define_sint(size,    shortname, gotype) {
+	shortname = "i" size
+	gotype = "int" size
+	define_internal(shortname, gotype)
+
+	CodegenAppendJSON[shortname] = \
+		"\tb = strconv.AppendInt(b, int64(%s), 10)\n"
+	if (size == 8) {
+		CodegenSerialize[shortname] = "\tdata = append(data, uint8(%s))\n"
+		CodegenDeserialize[shortname] = \
+			"\tif len(data) >= 1 {\n" \
+			"\t\t%s, data = int8(data[0]), data[1:]\n" \
+			"\t} else {\n" \
+			"\t\treturn nil, false\n" \
+			"\t}\n"
+		return
+	}
+
+	CodegenSerialize[shortname] = \
+		"\tdata = binary.BigEndian.AppendUint" size "(data, uint" size "(%s))\n"
+	CodegenDeserialize[shortname] = \
+		"\tif len(data) >= " (size / 8) " {\n" \
+		"\t\t%s = " gotype "(binary.BigEndian.Uint" size "(data))\n" \
+		"\t\tdata = data[" (size / 8) ":]\n" \
+		"\t} else {\n" \
+		"\t\treturn nil, false\n" \
+		"\t}\n"
+}
+
+function define_uint(size,    shortname, gotype) {
+	# Both []byte and []uint8 luckily marshal as base64-encoded JSON strings,
+	# so there's no need to rename the type as an exception.
+	shortname = "u" size
+	gotype = "uint" size
+	define_internal(shortname, gotype)
+
+	CodegenAppendJSON[shortname] = \
+		"\tb = strconv.AppendUint(b, uint64(%s), 10)\n"
+	if (size == 8) {
+		CodegenSerialize[shortname] = "\tdata = append(data, %s)\n"
+		CodegenDeserialize[shortname] = \
+			"\tif len(data) >= 1 {\n" \
+			"\t\t%s, data = data[0], data[1:]\n" \
+			"\t} else {\n" \
+			"\t\treturn nil, false\n" \
+			"\t}\n"
+		return
+	}
+
+	CodegenSerialize[shortname] = \
+		"\tdata = binary.BigEndian.AppendUint" size "(data, %s)\n"
+	CodegenDeserialize[shortname] = \
+		"\tif len(data) >= " (size / 8) " {\n" \
+		"\t\t%s = binary.BigEndian.Uint" size "(data)\n" \
+		"\t\tdata = data[" (size / 8) ":]\n" \
+		"\t} else {\n" \
+		"\t\treturn nil, false\n" \
+		"\t}\n"
+}
+
+# Currently two outputs cannot coexist within the same package.
+function codegen_private(name) {
+	return "proto" name
+}
+
+function codegen_begin(    funcname) {
+	define_sint("8")
+	define_sint("16")
+	define_sint("32")
+	define_sint("64")
+	define_uint("8")
+	define_uint("16")
+	define_uint("32")
+	define_uint("64")
+	define_internal("bool", "bool")
+	define_internal("string", "string")
+
+	# Cater to "go generate", for what it's worth.
+	CodegenPackage = ENV["GOPACKAGE"]
+	if (!CodegenPackage)
+		CodegenPackage = "main"
+
+	print "// Code generated from " FILENAME ". DO NOT EDIT."
+	print ""
+	print "package " CodegenPackage
+	print ""
+	print "import ("
+	print "\t`encoding/base64`"
+	print "\t`encoding/binary`"
+	print "\t`encoding/json`"
+	print "\t`errors`"
+	print "\t`math`"
+	print "\t`strconv`"
+	print "\t`unicode/utf8`"
+	print ")"
+	print ""
+	print "// This is a hack to always use the base64 import."
+	print "var _ = base64.StdEncoding"
+	print ""
+
+	CodegenAppendJSON["bool"] = \
+		"\tb = strconv.AppendBool(b, %s)\n"
+	CodegenSerialize["bool"] = \
+		"\tif %s {\n" \
+		"\t\tdata = append(data, 1)\n" \
+		"\t} else {\n" \
+		"\t\tdata = append(data, 0)\n" \
+		"\t}\n"
+
+	funcname = codegen_private("ConsumeBoolFrom")
+	print "// " funcname " tries to deserialize a boolean value"
+	print "// from the beginning of a byte stream. When successful,"
+	print "// it returns a subslice with any data that might follow."
+	print "func " funcname "(data []byte, b *bool) ([]byte, bool) {"
+	print "\tif len(data) < 1 {"
+	print "\t\treturn nil, false"
+	print "\t}"
+	print "\tif data[0] != 0 {"
+	print "\t\t*b = true"
+	print "\t} else {"
+	print "\t\t*b = false"
+	print "\t}"
+	print "\treturn data[1:], true"
+	print "}"
+	print ""
+
+	CodegenDeserialize["bool"] = \
+		"\tif data, ok = " funcname "(data, &%s); !ok {\n" \
+		"\t\treturn nil, ok\n" \
+		"\t}\n"
+
+	funcname = codegen_private("AppendStringTo")
+	print "// " funcname " tries to serialize a string value,"
+	print "// appending it to the end of a byte stream."
+	print "func " funcname "(data []byte, s string) ([]byte, bool) {"
+	print "\tif len(s) > math.MaxUint32 {"
+	print "\t\treturn nil, false"
+	print "\t}"
+	print "\tdata = binary.BigEndian.AppendUint32(data, uint32(len(s)))"
+	print "\treturn append(data, s...), true"
+	print "}"
+	print ""
+
+	CodegenSerialize["string"] = \
+		"\tif data, ok = " funcname "(data, %s); !ok {\n" \
+		"\t\treturn nil, ok\n" \
+		"\t}\n"
+
+	funcname = codegen_private("ConsumeStringFrom")
+	print "// " funcname " tries to deserialize a string value"
+	print "// from the beginning of a byte stream. When successful,"
+	print "// it returns a subslice with any data that might follow."
+	print "func " funcname "(data []byte, s *string) ([]byte, bool) {"
+	print "\tif len(data) < 4 {"
+	print "\t\treturn nil, false"
+	print "\t}"
+	print "\tlength := binary.BigEndian.Uint32(data)"
+	print "\tif data = data[4:]; uint64(len(data)) < uint64(length) {"
+	print "\t\treturn nil, false"
+	print "\t}"
+	print "\t*s = string(data[:length])"
+	print "\tif !utf8.ValidString(*s) {"
+	print "\t\treturn nil, false"
+	print "\t}"
+	print "\treturn data[length:], true"
+	print "}"
+	print ""
+
+	CodegenDeserialize["string"] = \
+		"\tif data, ok = " funcname "(data, &%s); !ok {\n" \
+		"\t\treturn nil, ok\n" \
+		"\t}\n"
+
+	funcname = codegen_private("UnmarshalEnumJSON")
+	print "// " funcname " converts a JSON fragment to an integer,"
+	print "// ensuring that it's within the expected range of enum values."
+	print "func " funcname "(data []byte) (int64, error) {"
+	print "\tvar n int64"
+	print "\tif err := json.Unmarshal(data, &n); err != nil {"
+	print "\t\treturn 0, err"
+	print "\t} else if n > math.MaxInt8 || n < math.MinInt8 {"
+	print "\t\treturn 0, errors.New(`integer out of range`)"
+	print "\t} else {"
+	print "\t\treturn n, nil"
+	print "\t}"
+	print "}"
+	print ""
+}
+
+function codegen_constant(name, value) {
+	print "const " PrefixCamel snaketocamel(name) " = " value
+	print ""
+}
+
+function codegen_enum_value(name, subname, value, cg,    goname) {
+	goname = PrefixCamel name snaketocamel(subname)
+	append(cg, "fields",
+		"\t" goname " = " value "\n")
+	append(cg, "stringer",
+		"\tcase " goname ":\n" \
+		"\t\treturn `" snaketocamel(subname) "`\n")
+	append(cg, "marshal",
+		goname ",\n")
+	append(cg, "unmarshal",
+		"\tcase `" snaketocamel(subname) "`:\n" \
+		"\t\t*v = " goname "\n")
+}
+
+function codegen_enum(name, cg,    gotype, fields, funcname) {
+	gotype = PrefixCamel name
+	print "type " gotype " int8"
+	print ""
+
+	print "const ("
+	print cg["fields"] ")"
+	print ""
+
+	print "func (v " gotype ") String() string {"
+	print "\tswitch v {"
+	print cg["stringer"] "\tdefault:"
+	print "\t\treturn strconv.Itoa(int(v))"
+	print "\t}"
+	print "}"
+	print ""
+
+	CodegenIsMarshaler[name] = 1
+	fields = cg["marshal"]
+	sub(/,\n$/, ":", fields)
+	gsub(/\n/, "\n\t", fields)
+	print "func (v " gotype ") MarshalJSON() ([]byte, error) {"
+	print "\tswitch v {"
+	print indent("case " fields)
+	print "\t\treturn []byte(`\"` + v.String() + `\"`), nil"
+	print "\t}"
+	print "\treturn json.Marshal(int(v))"
+	print "}"
+	print ""
+
+	funcname = codegen_private("UnmarshalEnumJSON")
+	print "func (v *" gotype ") UnmarshalJSON(data []byte) error {"
+	print "\tvar s string"
+	print "\tif json.Unmarshal(data, &s) == nil {"
+	print "\t\t// Handled below."
+	print "\t} else if n, err := " funcname "(data); err != nil {"
+	print "\t\treturn err"
+	print "\t} else {"
+	print "\t\t*v = " gotype "(n)"
+	print "\t\treturn nil"
+	print "\t}"
+	print ""
+	print "\tswitch s {"
+	print cg["unmarshal"] "\tdefault:"
+	print "\t\treturn errors.New(`unrecognized value: ` + s)"
+	print "\t}"
+	print "\treturn nil"
+	print "}"
+	print ""
+
+	# XXX: This should also check if it isn't out-of-range for any reason,
+	# but our usage of sprintf() stands in the way a bit.
+	CodegenSerialize[name] = "\tdata = append(data, uint8(%s))\n"
+	CodegenDeserialize[name] = \
+		"\tif len(data) >= 1 {\n" \
+		"\t\t%s, data = " gotype "(data[0]), data[1:]\n" \
+		"\t} else {\n" \
+		"\t\treturn nil, false\n" \
+		"\t}\n"
+
+	CodegenGoType[name] = gotype
+	for (i in cg)
+		delete cg[i]
+}
+
+function codegen_marshal(type, f,    marshal) {
+	if (CodegenAppendJSON[type])
+		return sprintf(CodegenAppendJSON[type], f)
+
+	# Complex types are json.Marshalers, there's no need to json.Marshal(&f).
+	if (CodegenIsMarshaler[type])
+		marshal = f ".MarshalJSON()"
+	else
+		marshal = "json.Marshal(" f ")"
+
+	return \
+		"\tif j, err := " marshal "; err != nil {\n" \
+		"\t\treturn nil, err\n" \
+		"\t} else {\n" \
+		"\t\tb = append(b, j...)\n" \
+		"\t}\n"
+}
+
+function codegen_struct_field_marshal(d, cg,    camel, f, marshal) {
+	camel = snaketocamel(d["name"])
+	f = "s." camel
+	if (!d["isarray"]) {
+		append(cg, "marshal",
+			"\tb = append(b, `,\"" decapitalize(camel) "\":`...)\n" \
+			codegen_marshal(d["type"], f))
+		return
+	}
+
+	# Note that we do not produce `null` for nil slices, unlike encoding/json.
+	# And arrays never get deserialized as such.
+	if (d["type"] == "u8") {
+		append(cg, "marshal",
+			"\tb = append(b, `,\"" decapitalize(camel) "\":\"`...)\n" \
+			"\tb = append(b, base64.StdEncoding.EncodeToString(" f ")...)\n" \
+			"\tb = append(b, '\"')\n")
+		return
+	}
+
+	append(cg, "marshal",
+		"\tb = append(b, `,\"" decapitalize(camel) "\":[`...)\n" \
+		"\tfor i := 0; i < len(" f "); i++ {\n" \
+		"\t\tif i > 0 {\n" \
+		"\t\t\tb = append(b, ',')\n" \
+		"\t\t}\n" \
+		indent(codegen_marshal(d["type"], f "[i]")) \
+		"\t}\n" \
+		"\tb = append(b, ']')\n")
+}
+
+function codegen_struct_field(d, cg,    camel, f, serialize, deserialize) {
+	codegen_struct_field_marshal(d, cg)
+
+	camel = snaketocamel(d["name"])
+	f = "s." camel
+	serialize = CodegenSerialize[d["type"]]
+	deserialize = CodegenDeserialize[d["type"]]
+	if (!d["isarray"]) {
+		append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \
+			" `json:\"" decapitalize(camel) "\"`\n")
+		append(cg, "serialize", sprintf(serialize, f))
+		append(cg, "deserialize", sprintf(deserialize, f))
+		return
+	}
+
+	append(cg, "fields", "\t" camel " []" CodegenGoType[d["type"]] \
+		" `json:\"" decapitalize(camel) "\"`\n")
+
+	# XXX: This should also check if it isn't out-of-range for any reason.
+	append(cg, "serialize",
+		sprintf(CodegenSerialize["u32"], "uint32(len(" f "))"))
+	if (d["type"] == "u8") {
+		append(cg, "serialize",
+			"\tdata = append(data, " f "...)\n")
+	} else {
+		append(cg, "serialize",
+			"\tfor i := 0; i < len(" f "); i++ {\n" \
+			indent(sprintf(serialize, f "[i]")) \
+			"\t}\n")
+	}
+
+	append(cg, "deserialize",
+		"\t{\n" \
+		"\t\tvar length uint32\n" \
+		indent(sprintf(CodegenDeserialize["u32"], "length")))
+	if (d["type"] == "u8") {
+		append(cg, "deserialize",
+			"\t\tif uint64(len(data)) < uint64(length) {\n" \
+			"\t\t\treturn nil, false\n" \
+			"\t\t}\n" \
+			"\t\t" f ", data = data[:length], data[length:]\n" \
+			"\t}\n")
+	} else {
+		append(cg, "deserialize",
+			"\t\t" f " = make([]" CodegenGoType[d["type"]] ", length)\n" \
+			"\t}\n" \
+			"\tfor i := 0; i < len(" f "); i++ {\n" \
+			indent(sprintf(deserialize, f "[i]")) \
+			"\t}\n")
+	}
+}
+
+function codegen_struct_tag(d, cg,    camel, f) {
+	codegen_struct_field_marshal(d, cg)
+
+	camel = snaketocamel(d["name"])
+	f = "s." camel
+	append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \
+		" `json:\"" decapitalize(camel) "\"`\n")
+	append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f))
+	# Do not deserialize here, that is already done by the containing union.
+}
+
+function codegen_struct(name, cg,    gotype) {
+	gotype = PrefixCamel name
+	print "type " gotype " struct {\n" cg["fields"] "}\n"
+
+	if (cg["marshal"]) {
+		CodegenIsMarshaler[name] = 1
+		print "func (s *" gotype ") MarshalJSON() ([]byte, error) {"
+		print "\tb := []byte{}"
+		print cg["marshal"] "\tb[0] = '{'"
+		print "\treturn append(b, '}'), nil"
+		print "}"
+		print ""
+	}
+
+	if (cg["serialize"]) {
+		print "func (s *" gotype ") AppendTo(data []byte) ([]byte, bool) {"
+		print "\tok := true"
+		print cg["serialize"] "\treturn data, ok"
+		print "}"
+		print ""
+
+		CodegenSerialize[name] = \
+			"\tif data, ok = %s.AppendTo(data); !ok {\n" \
+			"\t\treturn nil, ok\n" \
+			"\t}\n"
+	}
+	if (cg["deserialize"]) {
+		print "func (s *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {"
+		print "\tok := true"
+		print cg["deserialize"] "\treturn data, ok"
+		print "}"
+		print ""
+
+		CodegenDeserialize[name] = \
+			"\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \
+			"\t\treturn nil, ok\n" \
+			"\t}\n"
+	}
+
+	CodegenGoType[name] = gotype
+	for (i in cg)
+		delete cg[i]
+}
+
+function codegen_union_tag(d, cg) {
+	cg["tagtype"] = d["type"]
+	cg["tagname"] = d["name"]
+	# The tag is implied from the type of struct stored in the interface.
+}
+
+function codegen_union_struct(name, casename, cg, scg,     structname, init) {
+	# And thus not all generated structs are present in Types.
+	structname = name snaketocamel(casename)
+	codegen_struct(structname, scg)
+
+	init = CodegenGoType[structname] "{" snaketocamel(cg["tagname"]) \
+		": " decapitalize(snaketocamel(cg["tagname"])) "}"
+	append(cg, "unmarshal",
+		"\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \
+		"\t\ts := " init "\n" \
+		"\t\terr = json.Unmarshal(data, &s)\n" \
+		"\t\tu.Interface = &s\n")
+	append(cg, "serialize",
+		"\tcase *" CodegenGoType[structname] ":\n" \
+		indent(sprintf(CodegenSerialize[structname], "union")))
+	append(cg, "deserialize",
+		"\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \
+		"\t\ts := " init "\n" \
+		indent(sprintf(CodegenDeserialize[structname], "s")) \
+		"\t\tu.Interface = &s\n")
+}
+
+function codegen_union(name, cg,    gotype, tagfield, tagvar) {
+	gotype = PrefixCamel name
+	print "type " gotype " struct {"
+	print "\tInterface any"
+	print "}"
+	print ""
+
+	# This cannot be a pointer method, it wouldn't work recursively.
+	CodegenIsMarshaler[name] = 1
+	print "func (u " gotype ") MarshalJSON() ([]byte, error) {"
+	print "\treturn u.Interface.(json.Marshaler).MarshalJSON()"
+	print "}"
+	print ""
+
+	tagfield = snaketocamel(cg["tagname"])
+	tagvar = decapitalize(tagfield)
+	print "func (u *" gotype ") UnmarshalJSON(data []byte) (err error) {"
+	print "\tvar t struct {"
+	print "\t\t" tagfield " " CodegenGoType[cg["tagtype"]] \
+		" `json:\"" tagvar "\"`"
+	print "\t}"
+	print "\tif err := json.Unmarshal(data, &t); err != nil {"
+	print "\t\treturn err"
+	print "\t}"
+	print ""
+	print "\tswitch " tagvar " := t." tagfield "; " tagvar " {"
+	print cg["unmarshal"] "\tdefault:"
+	print "\t\terr = errors.New(`unsupported value: ` + " tagvar ".String())"
+	print "\t}"
+	print "\treturn err"
+	print "}"
+	print ""
+
+	# XXX: Consider changing the interface into an AppendTo/ConsumeFrom one,
+	# that would eliminate these type case switches entirely.
+	# On the other hand, it would make it possible to send unsuitable structs.
+	print "func (u *" gotype ") AppendTo(data []byte) ([]byte, bool) {"
+	print "\tok := true"
+	print "\tswitch union := u.Interface.(type) {"
+	print cg["serialize"] "\tdefault:"
+	print "\t\treturn nil, false"
+	print "\t}"
+	print "\treturn data, ok"
+	print "}"
+	print ""
+
+	CodegenSerialize[name] = \
+		"\tif data, ok = %s.AppendTo(data); !ok {\n" \
+		"\t\treturn nil, ok\n" \
+		"\t}\n"
+
+	print "func (u *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {"
+	print "\tok := true"
+	print "\tvar " tagvar " " CodegenGoType[cg["tagtype"]]
+	print sprintf(CodegenDeserialize[cg["tagtype"]], tagvar)
+	print "\tswitch " tagvar " {"
+	print cg["deserialize"] "\tdefault:"
+	print "\t\treturn nil, false"
+	print "\t}"
+	print "\treturn data, ok"
+	print "}"
+	print ""
+
+	CodegenDeserialize[name] = \
+		"\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \
+		"\t\treturn nil, ok\n" \
+		"\t}\n"
+
+	CodegenGoType[name] = gotype
+	for (i in cg)
+		delete cg[i]
+}
--- a/tools/lxdrgen-mjs.awk
+++ b/tools/lxdrgen-mjs.awk
@@ -0,0 +1,226 @@
+# lxdrgen-mjs.awk: Javascript backend for lxdrgen.awk.
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# This backend is currently for decoding the binary format only.
+# (JSON is way too expensive to process and transfer.)
+#
+# Import the resulting script as a Javascript module.
+# Identifiers intentionally aren't prefixed.
+
+function define_internal(name) {
+	Types[name] = "internal"
+}
+
+function define_sint(size,    shortname) {
+	shortname = "i" size
+	define_internal(shortname)
+	CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n"
+
+	print ""
+	print "\t" shortname "() {"
+	if (size == "64") {
+		# XXX: 2^53 - 1 must be enough for anyone.  BigInts are a PITA.
+		print "\t\tconst " shortname \
+			" = Number(this.getBigInt" size "(this.offset))"
+	} else {
+		print "\t\tconst " shortname " = this.getInt" size "(this.offset)"
+	}
+	print "\t\tthis.offset += " (size / 8)
+	print "\t\treturn " shortname
+	print "\t}"
+}
+
+function define_uint(size,    shortname) {
+	shortname = "u" size
+	define_internal(shortname)
+	CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n"
+
+	print ""
+	print "\t" shortname "() {"
+	if (size == "64") {
+		# XXX: 2^53 - 1 must be enough for anyone.  BigInts are a PITA.
+		print "\t\tconst " shortname \
+			" = Number(this.getBigUint" size "(this.offset))"
+	} else {
+		print "\t\tconst " shortname " = this.getUint" size "(this.offset)"
+	}
+	print "\t\tthis.offset += " (size / 8)
+	print "\t\treturn " shortname
+	print "\t}"
+}
+
+function codegen_begin() {
+	print "// Code generated from " FILENAME ". DO NOT EDIT."
+	print ""
+	print "export class Reader extends DataView {"
+	print "\tconstructor() {"
+	print "\t\tsuper(...arguments)"
+	print "\t\tthis.offset = 0"
+	print "\t\tthis.decoder = new TextDecoder('utf-8', {fatal: true})"
+	print "\t}"
+	print ""
+	print "\tget empty() {"
+	print "\t\treturn this.byteLength <= this.offset"
+	print "\t}"
+	print ""
+	print "\trequire(len) {"
+	print "\t\tif (this.byteLength - this.offset < len)"
+	print "\t\t\tthrow `Premature end of data`"
+	print "\t\treturn this.byteOffset + this.offset"
+	print "\t}"
+
+	define_internal("string")
+	CodegenDeserialize["string"] = "\t%s = r.string()\n"
+
+	print ""
+	print "\tstring() {"
+	print "\t\tconst len = this.getUint32(this.offset)"
+	print "\t\tthis.offset += 4"
+	print "\t\tconst array = new Uint8Array("
+	print "\t\t\tthis.buffer, this.require(len), len)"
+	print "\t\tthis.offset += len"
+	print "\t\treturn this.decoder.decode(array)"
+	print "\t}"
+
+	define_internal("bool")
+	CodegenDeserialize["bool"] = "\t%s = r.bool()\n"
+
+	print ""
+	print "\tbool() {"
+	print "\t\tconst u8 = this.getUint8(this.offset)"
+	print "\t\tthis.offset += 1"
+	print "\t\treturn u8 != 0"
+	print "\t}"
+
+	define_sint("8")
+	define_sint("16")
+	define_sint("32")
+	define_sint("64")
+	define_uint("8")
+	define_uint("16")
+	define_uint("32")
+	define_uint("64")
+
+	print "}"
+}
+
+function codegen_constant(name, value) {
+	print ""
+	print "export const " decapitalize(snaketocamel(name)) " = " value
+}
+
+function codegen_enum_value(name, subname, value, cg) {
+	append(cg, "fields", "\t" snaketocamel(subname) ": " value ",\n")
+}
+
+function codegen_enum(name, cg) {
+	print ""
+	print "export const " name " = Object.freeze({"
+	print cg["fields"] "})"
+
+	CodegenDeserialize[name] = "\t%s = r.i8()\n"
+	for (i in cg)
+		delete cg[i]
+}
+
+function codegen_struct_field(d, cg,    camel, f, deserialize) {
+	camel = decapitalize(snaketocamel(d["name"]))
+	f = "s." camel
+	append(cg, "fields", "\t" camel "\n")
+
+	deserialize = CodegenDeserialize[d["type"]]
+	if (!d["isarray"]) {
+		append(cg, "deserialize", sprintf(deserialize, f))
+		return
+	}
+
+	append(cg, "deserialize",
+		"\t{\n" \
+		indent(sprintf(CodegenDeserialize["u32"], "const len")))
+	if (d["type"] == "u8") {
+		append(cg, "deserialize",
+			"\t\t" f " = new Uint8Array(\n" \
+			"\t\t\tr.buffer, r.require(len), len)\n" \
+			"\t\tr.offset += len\n" \
+			"\t}\n")
+		return
+	}
+	if (d["type"] == "i8") {
+		append(cg, "deserialize",
+			"\t\t" f " = new Int8Array(\n" \
+			"\t\t\tr.buffer, r.require(len), len)\n" \
+			"\t\tr.offset += len\n" \
+			"\t}\n")
+		return
+	}
+
+	append(cg, "deserialize",
+		"\t\t" f " = new Array(len)\n" \
+		"\t}\n" \
+		"\tfor (let i = 0; i < " f ".length; i++)\n" \
+		indent(sprintf(deserialize, f "[i]")))
+}
+
+function codegen_struct_tag(d, cg) {
+	append(cg, "fields", "\t" decapitalize(snaketocamel(d["name"])) "\n")
+	# Do not deserialize here, that is already done by the containing union.
+}
+
+function codegen_struct(name, cg) {
+	print ""
+	print "export class " name " {"
+	print cg["fields"] cg["methods"]
+	print "\tstatic deserialize(r) {"
+	print "\t\tconst s = new " name "()"
+	print indent(cg["deserialize"]) "\t\treturn s"
+	print "\t}"
+	print "}"
+
+	CodegenDeserialize[name] = "\t%s = " name ".deserialize(r)\n"
+	for (i in cg)
+		delete cg[i]
+}
+
+function codegen_union_tag(d, cg) {
+	cg["tagtype"] = d["type"]
+	cg["tagname"] = d["name"]
+}
+
+function codegen_union_struct(name, casename, cg, scg,     structname) {
+	append(scg, "methods",
+		"\n" \
+		"\tconstructor() {\n" \
+		"\t\tthis." decapitalize(snaketocamel(cg["tagname"])) \
+			" = " cg["tagtype"] "." snaketocamel(casename) "\n" \
+		"\t}\n")
+
+	# And thus not all generated structs are present in Types.
+	structname = name snaketocamel(casename)
+	codegen_struct(structname, scg)
+
+	append(cg, "deserialize",
+		"\tcase " cg["tagtype"] "." snaketocamel(casename) ":\n" \
+		"\t{\n" \
+		indent(sprintf(CodegenDeserialize[structname], "const s")) \
+		"\t\treturn s\n" \
+		"\t}\n")
+}
+
+function codegen_union(name, cg,    tagvar) {
+	tagvar = decapitalize(snaketocamel(cg["tagname"]))
+
+	print ""
+	print "export function deserialize" name "(r) {"
+	print sprintf(CodegenDeserialize[cg["tagtype"]], "const " tagvar) \
+		"\tswitch (" tagvar ") {"
+	print cg["deserialize"] "\tdefault:"
+	print "\t\tthrow `Unknown " cg["tagtype"] " (${tagvar})`"
+	print "\t}"
+	print "}"
+
+	CodegenDeserialize[name] = "\t%s = deserialize" name "(r)\n"
+	for (i in cg)
+		delete cg[i]
+}
--- a/tools/lxdrgen.awk
+++ b/tools/lxdrgen.awk
@@ -0,0 +1,291 @@
+# lxdrgen.awk: an XDR-derived code generator for network protocols.
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# Usage: env LC_ALL=C awk -f lxdrgen.awk -f lxdrgen-{c,go,mjs}.awk \
+#  -v PrefixCamel=Foo foo.lxdr > foo.{c,go,mjs} | {clang-format,gofmt,...}
+
+# --- Utilities ----------------------------------------------------------------
+
+function cameltosnake(s) {
+	while (match(s, /[[:lower:]][[:upper:]]/)) {
+		s = substr(s, 1, RSTART) "_" \
+			tolower(substr(s, RSTART + 1, RLENGTH - 1)) \
+			substr(s, RSTART + RLENGTH)
+	}
+	return tolower(s)
+}
+
+function snaketocamel(s) {
+	s = toupper(substr(s, 1, 1)) tolower(substr(s, 2))
+	while (match(s, /_[[:alnum:]]/)) {
+		s = substr(s, 1, RSTART - 1) \
+			toupper(substr(s, RSTART + 1, RLENGTH - 1)) \
+			substr(s, RSTART + RLENGTH)
+	}
+	return s
+}
+
+function decapitalize(s) {
+	if (match(s, /[[:upper:]][[:lower:]]/)) {
+		return tolower(substr(s, 1, 1)) substr(s, 2)
+	}
+	return s
+}
+
+function indent(s) {
+	if (!s)
+		return s
+
+	gsub(/\n/, "\n\t", s)
+	sub(/\t*$/, "", s)
+	return "\t" s
+}
+
+function append(a, key, value) {
+	a[key] = a[key] value
+}
+
+# --- Parsing ------------------------------------------------------------------
+
+function fatal(message) {
+	print "// " FILENAME ":" FNR ": fatal error: " message
+	print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
+	exit 1
+}
+
+function skipcomment() {
+	do {
+		if (match($0, /[*]\//)) {
+			$0 = substr($0, RSTART + RLENGTH)
+			return
+		}
+	} while (getline > 0)
+	fatal("unterminated block comment")
+}
+
+function nexttoken() {
+	do {
+		if (match($0, /^[[:space:]]+/)) {
+			$0 = substr($0, RLENGTH + 1)
+		} else if (match($0, /^\/\/.*/)) {
+			$0 = ""
+		} else if (match($0, /^\/[*]/)) {
+			$0 = substr($0, RLENGTH + 1)
+			skipcomment()
+		} else if (match($0, /^[[:alpha:]][[:alnum:]_]*/)) {
+			Token = substr($0, 1, RLENGTH)
+			$0 = substr($0, RLENGTH + 1)
+			return Token
+		# AWK implementations rarely support non-decimal notations
+		# in their implicit string-to-number conversions.
+		} else if (match($0, /^(0|-?[1-9][0-9]*)/)) {
+			Token = substr($0, 1, RLENGTH)
+			$0 = substr($0, RLENGTH + 1)
+			return Token
+		} else if ($0) {
+			Token = substr($0, 1, 1)
+			$0 = substr($0, 2)
+			return Token
+		}
+	} while ($0 || getline > 0)
+	Token = ""
+	return Token
+}
+
+function expect(v) {
+	if (!v)
+		fatal("broken expectations at `" Token "' before `" $0 "'")
+	return v
+}
+
+function accept(what) {
+	if (Token != what)
+		return 0
+	nexttoken()
+	return 1
+}
+
+function identifier(    v) {
+	if (Token !~ /^[[:alpha:]]/)
+		return 0
+	v = Token
+	nexttoken()
+	return v
+}
+
+function number(    v) {
+	if (Token !~ /^(0|-?[1-9])/)
+		return 0
+	v = Token
+	nexttoken()
+	return v
+}
+
+function readnumber(    ident) {
+	ident = identifier()
+	if (!ident)
+		return expect(number())
+	if (!(ident in Consts))
+		fatal("unknown constant: " ident)
+	return Consts[ident]
+}
+
+function defconst(    ident, num) {
+	if (!accept("const"))
+		return 0
+
+	ident = expect(identifier())
+	expect(accept("="))
+	num = readnumber()
+	if (ident in Consts)
+		fatal("constant redefined: " ident)
+
+	Consts[ident] = num
+	codegen_constant(ident, num)
+	return 1
+}
+
+function readtype(    ident) {
+	ident = deftype()
+	if (ident)
+		return ident
+
+	ident = identifier()
+	if (!ident)
+		return 0
+
+	if (!(ident in Types))
+		fatal("unknown type: " ident)
+	return ident
+}
+
+function defenum(    name, ident, value, cg) {
+	delete cg[0]
+
+	name = expect(identifier())
+	expect(accept("{"))
+	while (!accept("}")) {
+		ident = expect(identifier())
+		value = value + 1
+		if (accept("="))
+			value = readnumber() + 0
+		if (!value)
+			fatal("enumeration values cannot be zero")
+		if (value < -128 || value > 127)
+			fatal("enumeration value out of range")
+		expect(accept(","))
+		append(EnumValues, name, SUBSEP ident)
+		if (EnumValues[name, ident]++)
+			fatal("duplicate enum value: " ident)
+		codegen_enum_value(name, ident, value, cg)
+	}
+
+	Types[name] = "enum"
+	codegen_enum(name, cg)
+	return name
+}
+
+function readfield(out,    nonvoid) {
+	nonvoid = !accept("void")
+	if (nonvoid) {
+		out["type"] = expect(readtype())
+		out["name"] = expect(identifier())
+		# TODO: Consider supporting XDR's VLA length limits here.
+		# TODO: Consider supporting XDR's fixed-length syntax for string limits.
+		out["isarray"] = accept("<") && expect(accept(">"))
+	}
+	expect(accept(";"))
+	return nonvoid
+}
+
+function defstruct(    name, d, cg) {
+	delete d[0]
+	delete cg[0]
+
+	name = expect(identifier())
+	expect(accept("{"))
+	while (!accept("}")) {
+		if (readfield(d))
+			codegen_struct_field(d, cg)
+	}
+
+	Types[name] = "struct"
+	codegen_struct(name, cg)
+	return name
+}
+
+function defunion(    name, tag, tagtype, tagvalue, cg, scg, d, a, i, unseen) {
+	delete cg[0]
+	delete scg[0]
+	delete d[0]
+
+	name = expect(identifier())
+	expect(accept("switch"))
+	expect(accept("("))
+	tag["type"] = tagtype = expect(readtype())
+	tag["name"] = expect(identifier())
+	expect(accept(")"))
+
+	if (Types[tagtype] != "enum")
+		fatal("not an enum type: " tagtype)
+	codegen_union_tag(tag, cg)
+
+	split(EnumValues[tagtype], a, SUBSEP)
+	for (i in a)
+		unseen[a[i]]++
+
+	expect(accept("{"))
+	while (!accept("}")) {
+		if (accept("case")) {
+			if (tagvalue)
+				codegen_union_struct(name, tagvalue, cg, scg)
+
+			tagvalue = expect(identifier())
+			expect(accept(":"))
+			if (!unseen[tagvalue]--)
+				fatal("no such value or duplicate case: " tagtype "." tagvalue)
+			codegen_struct_tag(tag, scg)
+		} else if (tagvalue) {
+			if (readfield(d))
+				codegen_struct_field(d, scg)
+		} else {
+			fatal("union fields must fall under a case")
+		}
+	}
+	if (tagvalue)
+		codegen_union_struct(name, tagvalue, cg, scg)
+
+	# What remains non-zero in unseen[2..] is simply not recognized/allowed.
+	Types[name] = "union"
+	codegen_union(name, cg)
+	return name
+}
+
+function deftype() {
+	if (accept("enum"))
+		return defenum()
+	if (accept("struct"))
+		return defstruct()
+	if (accept("union"))
+		return defunion()
+	return 0
+}
+
+{
+	if (PrefixCamel) {
+		PrefixLower = tolower(cameltosnake(PrefixCamel)) "_"
+		PrefixUpper = toupper(cameltosnake(PrefixCamel)) "_"
+	}
+
+	# This is not in a BEGIN clause (even though it consumes all input),
+	# so that the code generator can insert the first FILENAME.
+	codegen_begin()
+
+	nexttoken()
+	while (Token != "") {
+		expect(defconst() || deftype())
+		expect(accept(";"))
+	}
+}
Author	SHA1	Message	Date
Přemysl Eric Janouch	035bfe5e81	Document the recently added scripts	2022-09-30 03:09:04 +02:00
Přemysl Eric Janouch	ebbe7a1672	Import protocol code generator from xK, add tests Also add a VIM syntax highlighting file. This also fixes some previously untriggered bugs.	2022-09-30 03:06:36 +02:00