Compare commits

...

2 Commits

Author SHA1 Message Date
Přemysl Eric Janouch 035bfe5e81
Document the recently added scripts 2022-09-30 03:09:04 +02:00
Přemysl Eric Janouch ebbe7a1672
Import protocol code generator from xK, add tests
Also add a VIM syntax highlighting file.

This also fixes some previously untriggered bugs.
2022-09-30 03:06:36 +02:00
10 changed files with 1745 additions and 0 deletions

View File

@ -53,6 +53,58 @@ foreach (name ${tests})
add_test (NAME test-${name} COMMAND test-${name})
endforeach ()
# --- Tools --------------------------------------------------------------------
# Test the AsciiDoc manual page generator for a successful parse
set (ASCIIMAN ${PROJECT_SOURCE_DIR}/tools/asciiman.awk)
add_custom_command (OUTPUT libertyxdr.7
COMMAND env LC_ALL=C awk -f ${ASCIIMAN}
"${PROJECT_SOURCE_DIR}/libertyxdr.adoc" > libertyxdr.7
DEPENDS libertyxdr.adoc ${ASCIIMAN}
COMMENT "Generating man page for libertyxdr" VERBATIM)
add_custom_target (docs ALL DEPENDS libertyxdr.7)
# Test CMake script parsing
add_test (test-cmake-parser
env LC_ALL=C awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-parser.awk
-f ${PROJECT_SOURCE_DIR}/tools/cmake-dump.awk ${CMAKE_CURRENT_LIST_FILE})
# Test protocol code generation
set (lxdrgen_outputs)
set (lxdrgen_base "${PROJECT_BINARY_DIR}/lxdrgen.lxdr")
foreach (backend c go mjs)
list (APPEND lxdrgen_outputs ${lxdrgen_base}.${backend})
add_custom_command (OUTPUT ${lxdrgen_base}.${backend}
COMMAND env LC_ALL=C awk
-f ${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk
-f ${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk
-v PrefixCamel=ProtoGen
${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr
> ${lxdrgen_base}.${backend}
DEPENDS
${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk
${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk
${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr
COMMENT "Generating test protocol code (${backend})" VERBATIM)
endforeach ()
add_custom_target (test-lxdrgen-outputs ALL DEPENDS ${lxdrgen_outputs})
set_source_files_properties (${lxdrgen_base}.c
PROPERTIES HEADER_FILE_ONLY TRUE)
add_executable (test-lxdrgen tests/lxdrgen.c ${lxdrgen_base}.c)
target_include_directories (test-lxdrgen PUBLIC ${PROJECT_BINARY_DIR})
add_test (NAME test-lxdrgen-c COMMAND test-lxdrgen)
find_program (GO_EXECUTABLE go)
if (GO_EXECUTABLE)
add_test (test-lxdrgen-go ${GO_EXECUTABLE} vet ${lxdrgen_base}.go)
else ()
message (WARNING "Cannot test generated protocol code for Go")
endif ()
find_program (NODE_EXECUTABLE node)
if (NODE_EXECUTABLE)
add_test (test-lxdrgen-mjs ${NODE_EXECUTABLE} -c ${lxdrgen_base}.mjs)
else ()
message (WARNING "Cannot test generated protocol code for Javascript")
endif ()

View File

@ -17,6 +17,42 @@ All development is done on Linux, but other POSIX-compatible operating systems
should be supported as well. They have an extremely low priority, however, and
I'm not testing them at all, with the exception of OpenBSD.
Tools
-----
This project also hosts a number of supporting scripts written in portable AWK:
asciiman.awk::
A fallback manual page generator for AsciiDoc documents,
motivated by the hugeness of AsciiDoc's and Asciidoctor's dependency trees.
It uses the _man_ macro package.
cmake-parser.awk::
Parses the CMake language to the extent that is necessary to reliably
extract project versions. Its greatest limitation is its inability
to expand variables, which would require a full interpreter.
cmake-dump.awk::
This can be used in conjunction with the previous script to dump CMake
scripts in a normalized format for further processing.
lxdrgen.awk::
Protocol code generator for a variant of XDR,
which is link:libertyxdr.adoc[documented separately].
Successfully employed in https://git.janouch.name/p/xK[xK].
lxdrgen-c.awk::
LibertyXDR backend that builds on top of the C pseudolibrary.
lxdrgen-go.awk::
LibertyXDR backend for Go, supporting _encoding/json_ interfaces. It also
produces optimized JSON marshallers (however, note that the _json.Marshaler_
interface is bound to be underperforming, due to the amount of otherwise
avoidable memory allocations it necessitates).
lxdrgen-mjs.awk::
LibertyXDR backend for Javascript, currently for decoding only.
It cuts a corner by not using BigInts, on par with `JSON.parse()`.
Contributing and Support
------------------------
Use https://git.janouch.name/p/liberty to report any bugs, request features,

108
libertyxdr.adoc Normal file
View File

@ -0,0 +1,108 @@
libertyxdr(7)
=============
:doctype: manpage
Name
----
LibertyXDR - an XDR-derived IDL and data serialization format
Description
-----------
*LibertyXDR* is an interface description language, as well as a data
serialization format, that has been largely derived from XDR, though notably
simplified.
Conventions
~~~~~~~~~~~
User-defined types should be named in *CamelCase*, field names in *snake_case*,
and constants in *SCREAMING_SNAKE_CASE*. Code generators will convert these to
whatever is appropriate in their target language.
Primitive data types
~~~~~~~~~~~~~~~~~~~~
Like in XDR, all data is serialized in the network byte order, i.e., big-endian.
* *void*: 0 bytes
+
This is a dummy type that cannot be assigned a field name.
* *bool*: 1 byte
+
This is a boolean value: 0 means _false_, any other value means _true_.
* *u8*, *u16*, *u32*, *u64*: 1, 2, 4, and 8 bytes respectively
+
These are unsigned integers.
* *i8*, *i16*, *i32*, *i64*: 1, 2, 4, and 8 bytes respectively
+
These are signed integers in two's complement.
* *string*: implicitly prefixed by its length as a *u32*,
then immediately followed by its contents, with no trailing NUL byte
+
This is a valid UTF-8 string without a byte order mark. Note that strings are
always unbounded, unlike in XDR.
Constants
~~~~~~~~~
At the top level of a document, outside other definitions, you can define
typeless integer constants:
const VERSION = 1;
The value can be either a name of another previously defined constant,
or an immediate decimal value, which may not contain leading zeros.
Enumerations
~~~~~~~~~~~~
An *enum* is an *i8* with uniquely named values, in their own namespace.
Values can be either specified explicitly, in the same way as with a constant,
or they can be left implicit, in which case names assume a value that is one
larger than their predecessor. Zero is reserved for internal use, thus
enumerations implicitly begin with a value of one. For example, these form
a sequence from one to three:
enum Vehicle { CAR, LORRY = 2, PLANE, };
Structures
~~~~~~~~~~
A *struct* is a sequence of fields, specified by their type, and their chosen
name. You can add a *<>* suffix to change a field to an array, in which case
it is implicitly preceded by a *u32* specifying its length in terms of its
elements.
Unlike in XDR, there is no padding between subsequent fields, and type
definitions can be arbitrarily syntactically nested, as in C.
struct StockReport {
u8 version; // Version of this report.
struct Item {
Vehicle kind; // The vehicle in question.
i32 count; // How many vehicle of that kind there are.
} items<>; // Reported items.
};
Unions
~~~~~~
A *union* is a kind of structure whose fields depend on the value of its first
and always-present field, which must be a tag *enum*:
union VehicleDetails switch (Vehicle kind) {
case CAR: void;
case LORRY: i8 axles;
case PLANE: i8 engines;
};
All possible enumeration values must be named, and there is no *case*
fall-through.
Framing
-------
Unless this role is already filled by, e.g., WebSocket, _LibertyXDR_ structures
should be prefixed by their byte length in the *u32* format, once serialized.
See also
--------
_XDR: External Data Representation Standard_, RFC 4506

21
libertyxdr.vim Normal file
View File

@ -0,0 +1,21 @@
" filetype.vim: au! BufNewFile,BufRead *.lxdr setf libertyxdr
if exists("b:current_syntax")
finish
endif
syn match libertyxdrError "[^[:space:]:;,(){}<>=]\+"
syn region libertyxdrBlockComment start=+/[*]+ end=+[*]/+
syn match libertyxdrComment "//.*"
syn match libertyxdrIdentifier "\<[[:alpha:]][[:alnum:]_]*\>"
syn match libertyxdrNumber "\<0\>\|\(-\|\<\)[1-9][[:digit:]]*\>"
syn keyword libertyxdrKeyword const enum struct union switch case
syn keyword libertyxdrType bool u8 u16 u32 u64 i8 i16 i32 i64 string void
let b:current_syntax = "libertyxdr"
hi def link libertyxdrError Error
hi def link libertyxdrBlockComment Comment
hi def link libertyxdrComment Comment
hi def link libertyxdrIdentifier Identifier
hi def link libertyxdrNumber Number
hi def link libertyxdrKeyword Statement
hi def link libertyxdrType Type

123
tests/lxdrgen.c Normal file
View File

@ -0,0 +1,123 @@
/*
* tests/lxdrgen.c
*
* Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#define PROGRAM_NAME "test"
#define PROGRAM_VERSION "0"
#include "../liberty.c"
#include "lxdrgen.lxdr.c"
static void
test_ser_deser_free (void)
{
hard_assert (PROTO_GEN_VERSION == 1);
enum { CASES = 3 };
struct proto_gen_struct a = {}, b = {};
a.u = xcalloc ((a.u_len = CASES + rand () % 100), sizeof *a.u);
for (size_t i = 0; i < a.u_len; i++)
{
union proto_gen_union *u = a.u + i;
switch (i % CASES)
{
case 0:
u->tag = PROTO_GEN_ENUM_NUMBERS;
u->numbers.a = rand () % UINT8_MAX;
u->numbers.b = rand () % UINT16_MAX;
u->numbers.c = rand () % UINT32_MAX;
u->numbers.d = rand () % UINT64_MAX;
u->numbers.e = rand () % UINT8_MAX;
u->numbers.f = rand () % UINT16_MAX;
u->numbers.g = rand () % UINT32_MAX;
u->numbers.h = rand () % UINT64_MAX;
break;
case 1:
u->tag = PROTO_GEN_ENUM_OTHERS;
u->others.foo = rand () % 2;
u->others.bar = str_make ();
for (int i = rand () % 0x30; i > 0; i--)
str_append_c (&u->others.bar, 0x30 + i);
break;
case 2:
u->tag = PROTO_GEN_ENUM_NOTHING;
break;
default:
hard_assert (!"unhandled case");
}
}
struct str buf = str_make ();
hard_assert (proto_gen_struct_serialize (&a, &buf));
struct msg_unpacker r = msg_unpacker_make (buf.str, buf.len);
hard_assert (proto_gen_struct_deserialize (&b, &r));
hard_assert (!msg_unpacker_get_available (&r));
str_free (&buf);
hard_assert (a.u_len == b.u_len);
for (size_t i = 0; i < a.u_len; i++)
{
union proto_gen_union *ua = a.u + i;
union proto_gen_union *ub = b.u + i;
hard_assert (ua->tag == ub->tag);
switch (ua->tag)
{
case PROTO_GEN_ENUM_NUMBERS:
hard_assert (ua->numbers.a == ub->numbers.a);
hard_assert (ua->numbers.b == ub->numbers.b);
hard_assert (ua->numbers.c == ub->numbers.c);
hard_assert (ua->numbers.d == ub->numbers.d);
hard_assert (ua->numbers.e == ub->numbers.e);
hard_assert (ua->numbers.f == ub->numbers.f);
hard_assert (ua->numbers.g == ub->numbers.g);
hard_assert (ua->numbers.h == ub->numbers.h);
break;
case PROTO_GEN_ENUM_OTHERS:
hard_assert (ua->others.foo == ub->others.foo);
hard_assert (ua->others.bar.len == ub->others.bar.len);
hard_assert (!memcmp (ua->others.bar.str, ub->others.bar.str,
ua->others.bar.len));
break;
case PROTO_GEN_ENUM_NOTHING:
break;
default:
hard_assert (!"unexpected case");
}
}
// Emulate partially deserialized data to test disposal of that.
for (size_t i = b.u_len - CASES; i < b.u_len; i++)
{
proto_gen_union_free (&b.u[i]);
memset (&b.u[i], 0, sizeof b.u[i]);
}
proto_gen_struct_free (&a);
proto_gen_struct_free (&b);
}
int
main (int argc, char *argv[])
{
struct test test;
test_init (&test, argc, argv);
test_add_simple (&test, "/ser-deser-free", NULL, test_ser_deser_free);
return test_run (&test);
}

23
tests/lxdrgen.lxdr Normal file
View File

@ -0,0 +1,23 @@
/*
* tests/lxdrgen.lxdr: a test protocol for the generator
*/
const VERSION = 1;
const NOISREV = -1;
// TODO: Test failure paths, and in general go for full coverage.
struct Struct {
union Union switch (enum Enum {
NUMBERS = VERSION,
OTHERS = 2,
NOTHING,
} tag) {
case NUMBERS:
i8 a; i16 b; i32 c; i64 d;
u8 e; u16 f; u32 g; u64 h;
case OTHERS:
bool foo;
string bar;
case NOTHING:
void;
} u<>;
};

324
tools/lxdrgen-c.awk Normal file
View File

@ -0,0 +1,324 @@
# lxdrgen-c.awk: C backend for lxdrgen.awk.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Neither *_new() nor *_destroy() functions are provided, because they'd only
# be useful for top-levels, and are merely extra malloc()/free() calls.
# Users are expected to reuse buffers.
#
# Similarly, no constructors are produced--those are easy to write manually.
#
# All arrays are deserialized zero-terminated, so u8<> and i8<> can be directly
# used as C strings.
#
# All types must be able to dispose partially zero values going from the back,
# i.e., in the reverse order of deserialization.
function define_internal(name, ctype) {
Types[name] = "internal"
CodegenCType[name] = ctype
}
function define_int(shortname, ctype) {
define_internal(shortname, ctype)
CodegenSerialize[shortname] = \
"\tstr_pack_" shortname "(w, %s);\n"
CodegenDeserialize[shortname] = \
"\tif (!msg_unpacker_" shortname "(r, &%s))\n" \
"\t\treturn false;\n"
}
function define_sint(size) { define_int("i" size, "int" size "_t") }
function define_uint(size) { define_int("u" size, "uint" size "_t") }
function codegen_begin() {
define_sint("8")
define_sint("16")
define_sint("32")
define_sint("64")
define_uint("8")
define_uint("16")
define_uint("32")
define_uint("64")
define_internal("string", "struct str")
CodegenDispose["string"] = "\tstr_free(&%s);\n"
CodegenSerialize["string"] = \
"\tif (!proto_string_serialize(&%s, w))\n" \
"\t\treturn false;\n"
CodegenDeserialize["string"] = \
"\tif (!proto_string_deserialize(&%s, r))\n" \
"\t\treturn false;\n"
define_internal("bool", "bool")
CodegenSerialize["bool"] = \
"\tstr_pack_u8(w, !!%s);\n"
CodegenDeserialize["bool"] = \
"\t{\n" \
"\t\tuint8_t v = 0;\n" \
"\t\tif (!msg_unpacker_u8(r, &v))\n" \
"\t\t\treturn false;\n" \
"\t\t%s = !!v;\n" \
"\t}\n"
print "// Code generated from " FILENAME ". DO NOT EDIT."
print "// This file directly depends on liberty.c, but doesn't include it."
print ""
print "static bool"
print "proto_string_serialize(const struct str *s, struct str *w) {"
print "\tif (s->len > UINT32_MAX)"
print "\t\treturn false;"
print "\tstr_pack_u32(w, s->len);"
print "\tstr_append_str(w, s);"
print "\treturn true;"
print "}"
print ""
print "static bool"
print "proto_string_deserialize(struct str *s, struct msg_unpacker *r) {"
print "\tuint32_t len = 0;"
print "\tif (!msg_unpacker_u32(r, &len))"
print "\t\treturn false;"
print "\tif (msg_unpacker_get_available(r) < len)"
print "\t\treturn false;"
print "\t*s = str_make();"
print "\tstr_append_data(s, r->data + r->offset, len);"
print "\tr->offset += len;"
print "\tif (!utf8_validate (s->str, s->len))"
print "\t\treturn false;"
print "\treturn true;"
print "}"
}
function codegen_constant(name, value) {
print ""
print "enum { " PrefixUpper name " = " value " };"
}
function codegen_enum_value(name, subname, value, cg) {
append(cg, "fields",
"\t" PrefixUpper toupper(cameltosnake(name)) "_" subname \
" = " value ",\n")
}
function codegen_enum(name, cg, ctype) {
ctype = "enum " PrefixLower cameltosnake(name)
print ""
print ctype " {"
print cg["fields"] "};"
# XXX: This should also check if it isn't out-of-range for any reason,
# but our usage of sprintf() stands in the way a bit.
CodegenSerialize[name] = "\tstr_pack_i8(w, %s);\n"
CodegenDeserialize[name] = \
"\t{\n" \
"\t\tint8_t v = 0;\n" \
"\t\tif (!msg_unpacker_i8(r, &v) || !v)\n" \
"\t\t\treturn false;\n" \
"\t\t%s = v;\n" \
"\t}\n"
CodegenCType[name] = ctype
for (i in cg)
delete cg[i]
}
function codegen_struct_tag(d, cg, f) {
f = "self->" d["name"]
append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
append(cg, "dispose", sprintf(CodegenDispose[d["type"]], f))
append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f))
# Do not deserialize here, that would be out of order.
}
function codegen_struct_field(d, cg, f, dispose, serialize, deserialize) {
f = "self->" d["name"]
dispose = CodegenDispose[d["type"]]
serialize = CodegenSerialize[d["type"]]
deserialize = CodegenDeserialize[d["type"]]
if (!d["isarray"]) {
append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
append(cg, "dispose", sprintf(dispose, f))
append(cg, "serialize", sprintf(serialize, f))
append(cg, "deserialize", sprintf(deserialize, f))
return
}
append(cg, "fields",
"\t" CodegenCType["u32"] " " d["name"] "_len;\n" \
"\t" CodegenCType[d["type"]] " *" d["name"] ";\n")
if (dispose)
append(cg, "dispose", "\tif (" f ")\n" \
"\t\tfor (size_t i = 0; i < " f "_len; i++)\n" \
indent(indent(sprintf(dispose, f "[i]"))))
append(cg, "dispose", "\tfree(" f ");\n")
append(cg, "serialize", sprintf(CodegenSerialize["u32"], f "_len"))
if (d["type"] == "u8" || d["type"] == "i8") {
append(cg, "serialize",
"\tstr_append_data(w, " f ", " f "_len);\n")
} else if (serialize) {
append(cg, "serialize",
"\tfor (size_t i = 0; i < " f "_len; i++)\n" \
indent(sprintf(serialize, f "[i]")))
}
append(cg, "deserialize", sprintf(CodegenDeserialize["u32"], f "_len") \
"\tif (!(" f " = calloc(" f "_len + 1, sizeof *" f ")))\n" \
"\t\treturn false;\n")
if (d["type"] == "u8" || d["type"] == "i8") {
append(cg, "deserialize",
"\tif (msg_unpacker_get_available(r) < " f "_len)\n" \
"\t\treturn false;\n" \
"\tmemcpy(" f ", r->data + r->offset, " f "_len);\n" \
"\tr->offset += " f "_len;\n")
} else if (deserialize) {
append(cg, "deserialize",
"\tfor (size_t i = 0; i < " f "_len; i++)\n" \
indent(sprintf(deserialize, f "[i]")))
}
}
function codegen_struct(name, cg, ctype, funcname) {
ctype = "struct " PrefixLower cameltosnake(name)
print ""
print ctype " {"
print cg["fields"] "};"
if (cg["dispose"]) {
funcname = PrefixLower cameltosnake(name) "_free"
print ""
print "static void\n" funcname "(" ctype " *self) {"
print cg["dispose"] "}"
CodegenDispose[name] = "\t" funcname "(&%s);\n"
}
if (cg["serialize"]) {
funcname = PrefixLower cameltosnake(name) "_serialize"
print ""
print "static bool\n" \
funcname "(\n\t\tconst " ctype " *self, struct str *w) {"
print cg["serialize"] "\treturn true;"
print "}"
CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \
"\t\treturn false;\n"
}
if (cg["deserialize"]) {
funcname = PrefixLower cameltosnake(name) "_deserialize"
print ""
print "static bool\n" \
funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {"
print cg["deserialize"] "\treturn true;"
print "}"
CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \
"\t\treturn false;\n"
}
CodegenCType[name] = ctype
for (i in cg)
delete cg[i]
}
function codegen_union_tag(d, cg) {
cg["tagtype"] = d["type"]
cg["tagname"] = d["name"]
append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
}
function codegen_union_struct( \
name, casename, cg, scg, structname, fieldname, fullcasename) {
# Don't generate obviously useless structs.
fullcasename = toupper(cameltosnake(cg["tagtype"])) "_" casename
if (!scg["dispose"] && !scg["deserialize"]) {
append(cg, "structless", "\tcase " PrefixUpper fullcasename ":\n")
for (i in scg)
delete scg[i]
return
}
# And thus not all generated structs are present in Types.
structname = name "_" casename
fieldname = tolower(casename)
codegen_struct(structname, scg)
append(cg, "fields", "\t" CodegenCType[structname] " " fieldname ";\n")
if (CodegenDispose[structname])
append(cg, "dispose", "\tcase " PrefixUpper fullcasename ":\n" \
indent(sprintf(CodegenDispose[structname], "self->" fieldname)) \
"\t\tbreak;\n")
# With no de/serialization code, this will simply recognize the tag.
append(cg, "serialize", "\tcase " PrefixUpper fullcasename ":\n" \
indent(sprintf(CodegenSerialize[structname], "self->" fieldname)) \
"\t\tbreak;\n")
append(cg, "deserialize", "\tcase " PrefixUpper fullcasename ":\n" \
indent(sprintf(CodegenDeserialize[structname], "self->" fieldname)) \
"\t\tbreak;\n")
}
function codegen_union(name, cg, f, ctype, funcname) {
ctype = "union " PrefixLower cameltosnake(name)
print ""
print ctype " {"
print cg["fields"] "};"
f = "self->" cg["tagname"]
if (cg["dispose"]) {
funcname = PrefixLower cameltosnake(name) "_free"
print ""
print "static void\n" funcname "(" ctype " *self) {"
print "\tswitch (" f ") {"
if (cg["structless"])
print cg["structless"] \
indent(sprintf(CodegenDispose[cg["tagtype"]], f)) "\t\tbreak;"
print cg["dispose"] "\tdefault:"
print "\t\tbreak;"
print "\t}"
print "}"
CodegenDispose[name] = "\t" funcname "(&%s);\n"
}
if (cg["serialize"]) {
funcname = PrefixLower cameltosnake(name) "_serialize"
print ""
print "static bool\n" \
funcname "(\n\t\tconst " ctype " *self, struct str *w) {"
print "\tswitch (" f ") {"
if (cg["structless"])
print cg["structless"] \
indent(sprintf(CodegenSerialize[cg["tagtype"]], f)) "\t\tbreak;"
print cg["serialize"] "\tdefault:"
print "\t\treturn false;"
print "\t}"
print "\treturn true;"
print "}"
CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \
"\t\treturn false;\n"
}
if (cg["deserialize"]) {
funcname = PrefixLower cameltosnake(name) "_deserialize"
print ""
print "static bool\n" \
funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {"
print sprintf(CodegenDeserialize[cg["tagtype"]], f)
print "\tswitch (" f ") {"
if (cg["structless"])
print cg["structless"] "\t\tbreak;"
print cg["deserialize"] "\tdefault:"
print "\t\treturn false;"
print "\t}"
print "\treturn true;"
print "}"
CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \
"\t\treturn false;\n"
}
CodegenCType[name] = ctype
for (i in cg)
delete cg[i]
}

541
tools/lxdrgen-go.awk Normal file
View File

@ -0,0 +1,541 @@
# lxdrgen-go.awk: Go backend for lxdrgen.awk.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# This backend also enables proxying to other endpoints using JSON.
function define_internal(name, gotype) {
Types[name] = "internal"
CodegenGoType[name] = gotype
}
function define_sint(size, shortname, gotype) {
shortname = "i" size
gotype = "int" size
define_internal(shortname, gotype)
CodegenAppendJSON[shortname] = \
"\tb = strconv.AppendInt(b, int64(%s), 10)\n"
if (size == 8) {
CodegenSerialize[shortname] = "\tdata = append(data, uint8(%s))\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= 1 {\n" \
"\t\t%s, data = int8(data[0]), data[1:]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
return
}
CodegenSerialize[shortname] = \
"\tdata = binary.BigEndian.AppendUint" size "(data, uint" size "(%s))\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= " (size / 8) " {\n" \
"\t\t%s = " gotype "(binary.BigEndian.Uint" size "(data))\n" \
"\t\tdata = data[" (size / 8) ":]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
}
function define_uint(size, shortname, gotype) {
# Both []byte and []uint8 luckily marshal as base64-encoded JSON strings,
# so there's no need to rename the type as an exception.
shortname = "u" size
gotype = "uint" size
define_internal(shortname, gotype)
CodegenAppendJSON[shortname] = \
"\tb = strconv.AppendUint(b, uint64(%s), 10)\n"
if (size == 8) {
CodegenSerialize[shortname] = "\tdata = append(data, %s)\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= 1 {\n" \
"\t\t%s, data = data[0], data[1:]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
return
}
CodegenSerialize[shortname] = \
"\tdata = binary.BigEndian.AppendUint" size "(data, %s)\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= " (size / 8) " {\n" \
"\t\t%s = binary.BigEndian.Uint" size "(data)\n" \
"\t\tdata = data[" (size / 8) ":]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
}
# Currently two outputs cannot coexist within the same package.
function codegen_private(name) {
return "proto" name
}
function codegen_begin( funcname) {
define_sint("8")
define_sint("16")
define_sint("32")
define_sint("64")
define_uint("8")
define_uint("16")
define_uint("32")
define_uint("64")
define_internal("bool", "bool")
define_internal("string", "string")
# Cater to "go generate", for what it's worth.
CodegenPackage = ENV["GOPACKAGE"]
if (!CodegenPackage)
CodegenPackage = "main"
print "// Code generated from " FILENAME ". DO NOT EDIT."
print ""
print "package " CodegenPackage
print ""
print "import ("
print "\t`encoding/base64`"
print "\t`encoding/binary`"
print "\t`encoding/json`"
print "\t`errors`"
print "\t`math`"
print "\t`strconv`"
print "\t`unicode/utf8`"
print ")"
print ""
print "// This is a hack to always use the base64 import."
print "var _ = base64.StdEncoding"
print ""
CodegenAppendJSON["bool"] = \
"\tb = strconv.AppendBool(b, %s)\n"
CodegenSerialize["bool"] = \
"\tif %s {\n" \
"\t\tdata = append(data, 1)\n" \
"\t} else {\n" \
"\t\tdata = append(data, 0)\n" \
"\t}\n"
funcname = codegen_private("ConsumeBoolFrom")
print "// " funcname " tries to deserialize a boolean value"
print "// from the beginning of a byte stream. When successful,"
print "// it returns a subslice with any data that might follow."
print "func " funcname "(data []byte, b *bool) ([]byte, bool) {"
print "\tif len(data) < 1 {"
print "\t\treturn nil, false"
print "\t}"
print "\tif data[0] != 0 {"
print "\t\t*b = true"
print "\t} else {"
print "\t\t*b = false"
print "\t}"
print "\treturn data[1:], true"
print "}"
print ""
CodegenDeserialize["bool"] = \
"\tif data, ok = " funcname "(data, &%s); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
funcname = codegen_private("AppendStringTo")
print "// " funcname " tries to serialize a string value,"
print "// appending it to the end of a byte stream."
print "func " funcname "(data []byte, s string) ([]byte, bool) {"
print "\tif len(s) > math.MaxUint32 {"
print "\t\treturn nil, false"
print "\t}"
print "\tdata = binary.BigEndian.AppendUint32(data, uint32(len(s)))"
print "\treturn append(data, s...), true"
print "}"
print ""
CodegenSerialize["string"] = \
"\tif data, ok = " funcname "(data, %s); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
funcname = codegen_private("ConsumeStringFrom")
print "// " funcname " tries to deserialize a string value"
print "// from the beginning of a byte stream. When successful,"
print "// it returns a subslice with any data that might follow."
print "func " funcname "(data []byte, s *string) ([]byte, bool) {"
print "\tif len(data) < 4 {"
print "\t\treturn nil, false"
print "\t}"
print "\tlength := binary.BigEndian.Uint32(data)"
print "\tif data = data[4:]; uint64(len(data)) < uint64(length) {"
print "\t\treturn nil, false"
print "\t}"
print "\t*s = string(data[:length])"
print "\tif !utf8.ValidString(*s) {"
print "\t\treturn nil, false"
print "\t}"
print "\treturn data[length:], true"
print "}"
print ""
CodegenDeserialize["string"] = \
"\tif data, ok = " funcname "(data, &%s); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
funcname = codegen_private("UnmarshalEnumJSON")
print "// " funcname " converts a JSON fragment to an integer,"
print "// ensuring that it's within the expected range of enum values."
print "func " funcname "(data []byte) (int64, error) {"
print "\tvar n int64"
print "\tif err := json.Unmarshal(data, &n); err != nil {"
print "\t\treturn 0, err"
print "\t} else if n > math.MaxInt8 || n < math.MinInt8 {"
print "\t\treturn 0, errors.New(`integer out of range`)"
print "\t} else {"
print "\t\treturn n, nil"
print "\t}"
print "}"
print ""
}
function codegen_constant(name, value) {
print "const " PrefixCamel snaketocamel(name) " = " value
print ""
}
function codegen_enum_value(name, subname, value, cg, goname) {
goname = PrefixCamel name snaketocamel(subname)
append(cg, "fields",
"\t" goname " = " value "\n")
append(cg, "stringer",
"\tcase " goname ":\n" \
"\t\treturn `" snaketocamel(subname) "`\n")
append(cg, "marshal",
goname ",\n")
append(cg, "unmarshal",
"\tcase `" snaketocamel(subname) "`:\n" \
"\t\t*v = " goname "\n")
}
function codegen_enum(name, cg, gotype, fields, funcname) {
gotype = PrefixCamel name
print "type " gotype " int8"
print ""
print "const ("
print cg["fields"] ")"
print ""
print "func (v " gotype ") String() string {"
print "\tswitch v {"
print cg["stringer"] "\tdefault:"
print "\t\treturn strconv.Itoa(int(v))"
print "\t}"
print "}"
print ""
CodegenIsMarshaler[name] = 1
fields = cg["marshal"]
sub(/,\n$/, ":", fields)
gsub(/\n/, "\n\t", fields)
print "func (v " gotype ") MarshalJSON() ([]byte, error) {"
print "\tswitch v {"
print indent("case " fields)
print "\t\treturn []byte(`\"` + v.String() + `\"`), nil"
print "\t}"
print "\treturn json.Marshal(int(v))"
print "}"
print ""
funcname = codegen_private("UnmarshalEnumJSON")
print "func (v *" gotype ") UnmarshalJSON(data []byte) error {"
print "\tvar s string"
print "\tif json.Unmarshal(data, &s) == nil {"
print "\t\t// Handled below."
print "\t} else if n, err := " funcname "(data); err != nil {"
print "\t\treturn err"
print "\t} else {"
print "\t\t*v = " gotype "(n)"
print "\t\treturn nil"
print "\t}"
print ""
print "\tswitch s {"
print cg["unmarshal"] "\tdefault:"
print "\t\treturn errors.New(`unrecognized value: ` + s)"
print "\t}"
print "\treturn nil"
print "}"
print ""
# XXX: This should also check if it isn't out-of-range for any reason,
# but our usage of sprintf() stands in the way a bit.
CodegenSerialize[name] = "\tdata = append(data, uint8(%s))\n"
CodegenDeserialize[name] = \
"\tif len(data) >= 1 {\n" \
"\t\t%s, data = " gotype "(data[0]), data[1:]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
CodegenGoType[name] = gotype
for (i in cg)
delete cg[i]
}
function codegen_marshal(type, f, marshal) {
if (CodegenAppendJSON[type])
return sprintf(CodegenAppendJSON[type], f)
# Complex types are json.Marshalers, there's no need to json.Marshal(&f).
if (CodegenIsMarshaler[type])
marshal = f ".MarshalJSON()"
else
marshal = "json.Marshal(" f ")"
return \
"\tif j, err := " marshal "; err != nil {\n" \
"\t\treturn nil, err\n" \
"\t} else {\n" \
"\t\tb = append(b, j...)\n" \
"\t}\n"
}
function codegen_struct_field_marshal(d, cg, camel, f, marshal) {
camel = snaketocamel(d["name"])
f = "s." camel
if (!d["isarray"]) {
append(cg, "marshal",
"\tb = append(b, `,\"" decapitalize(camel) "\":`...)\n" \
codegen_marshal(d["type"], f))
return
}
# Note that we do not produce `null` for nil slices, unlike encoding/json.
# And arrays never get deserialized as such.
if (d["type"] == "u8") {
append(cg, "marshal",
"\tb = append(b, `,\"" decapitalize(camel) "\":\"`...)\n" \
"\tb = append(b, base64.StdEncoding.EncodeToString(" f ")...)\n" \
"\tb = append(b, '\"')\n")
return
}
append(cg, "marshal",
"\tb = append(b, `,\"" decapitalize(camel) "\":[`...)\n" \
"\tfor i := 0; i < len(" f "); i++ {\n" \
"\t\tif i > 0 {\n" \
"\t\t\tb = append(b, ',')\n" \
"\t\t}\n" \
indent(codegen_marshal(d["type"], f "[i]")) \
"\t}\n" \
"\tb = append(b, ']')\n")
}
function codegen_struct_field(d, cg, camel, f, serialize, deserialize) {
codegen_struct_field_marshal(d, cg)
camel = snaketocamel(d["name"])
f = "s." camel
serialize = CodegenSerialize[d["type"]]
deserialize = CodegenDeserialize[d["type"]]
if (!d["isarray"]) {
append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \
" `json:\"" decapitalize(camel) "\"`\n")
append(cg, "serialize", sprintf(serialize, f))
append(cg, "deserialize", sprintf(deserialize, f))
return
}
append(cg, "fields", "\t" camel " []" CodegenGoType[d["type"]] \
" `json:\"" decapitalize(camel) "\"`\n")
# XXX: This should also check if it isn't out-of-range for any reason.
append(cg, "serialize",
sprintf(CodegenSerialize["u32"], "uint32(len(" f "))"))
if (d["type"] == "u8") {
append(cg, "serialize",
"\tdata = append(data, " f "...)\n")
} else {
append(cg, "serialize",
"\tfor i := 0; i < len(" f "); i++ {\n" \
indent(sprintf(serialize, f "[i]")) \
"\t}\n")
}
append(cg, "deserialize",
"\t{\n" \
"\t\tvar length uint32\n" \
indent(sprintf(CodegenDeserialize["u32"], "length")))
if (d["type"] == "u8") {
append(cg, "deserialize",
"\t\tif uint64(len(data)) < uint64(length) {\n" \
"\t\t\treturn nil, false\n" \
"\t\t}\n" \
"\t\t" f ", data = data[:length], data[length:]\n" \
"\t}\n")
} else {
append(cg, "deserialize",
"\t\t" f " = make([]" CodegenGoType[d["type"]] ", length)\n" \
"\t}\n" \
"\tfor i := 0; i < len(" f "); i++ {\n" \
indent(sprintf(deserialize, f "[i]")) \
"\t}\n")
}
}
function codegen_struct_tag(d, cg, camel, f) {
codegen_struct_field_marshal(d, cg)
camel = snaketocamel(d["name"])
f = "s." camel
append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \
" `json:\"" decapitalize(camel) "\"`\n")
append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f))
# Do not deserialize here, that is already done by the containing union.
}
function codegen_struct(name, cg, gotype) {
gotype = PrefixCamel name
print "type " gotype " struct {\n" cg["fields"] "}\n"
if (cg["marshal"]) {
CodegenIsMarshaler[name] = 1
print "func (s *" gotype ") MarshalJSON() ([]byte, error) {"
print "\tb := []byte{}"
print cg["marshal"] "\tb[0] = '{'"
print "\treturn append(b, '}'), nil"
print "}"
print ""
}
if (cg["serialize"]) {
print "func (s *" gotype ") AppendTo(data []byte) ([]byte, bool) {"
print "\tok := true"
print cg["serialize"] "\treturn data, ok"
print "}"
print ""
CodegenSerialize[name] = \
"\tif data, ok = %s.AppendTo(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
}
if (cg["deserialize"]) {
print "func (s *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {"
print "\tok := true"
print cg["deserialize"] "\treturn data, ok"
print "}"
print ""
CodegenDeserialize[name] = \
"\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
}
CodegenGoType[name] = gotype
for (i in cg)
delete cg[i]
}
function codegen_union_tag(d, cg) {
cg["tagtype"] = d["type"]
cg["tagname"] = d["name"]
# The tag is implied from the type of struct stored in the interface.
}
function codegen_union_struct(name, casename, cg, scg, structname, init) {
# And thus not all generated structs are present in Types.
structname = name snaketocamel(casename)
codegen_struct(structname, scg)
init = CodegenGoType[structname] "{" snaketocamel(cg["tagname"]) \
": " decapitalize(snaketocamel(cg["tagname"])) "}"
append(cg, "unmarshal",
"\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \
"\t\ts := " init "\n" \
"\t\terr = json.Unmarshal(data, &s)\n" \
"\t\tu.Interface = &s\n")
append(cg, "serialize",
"\tcase *" CodegenGoType[structname] ":\n" \
indent(sprintf(CodegenSerialize[structname], "union")))
append(cg, "deserialize",
"\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \
"\t\ts := " init "\n" \
indent(sprintf(CodegenDeserialize[structname], "s")) \
"\t\tu.Interface = &s\n")
}
function codegen_union(name, cg, gotype, tagfield, tagvar) {
gotype = PrefixCamel name
print "type " gotype " struct {"
print "\tInterface any"
print "}"
print ""
# This cannot be a pointer method, it wouldn't work recursively.
CodegenIsMarshaler[name] = 1
print "func (u " gotype ") MarshalJSON() ([]byte, error) {"
print "\treturn u.Interface.(json.Marshaler).MarshalJSON()"
print "}"
print ""
tagfield = snaketocamel(cg["tagname"])
tagvar = decapitalize(tagfield)
print "func (u *" gotype ") UnmarshalJSON(data []byte) (err error) {"
print "\tvar t struct {"
print "\t\t" tagfield " " CodegenGoType[cg["tagtype"]] \
" `json:\"" tagvar "\"`"
print "\t}"
print "\tif err := json.Unmarshal(data, &t); err != nil {"
print "\t\treturn err"
print "\t}"
print ""
print "\tswitch " tagvar " := t." tagfield "; " tagvar " {"
print cg["unmarshal"] "\tdefault:"
print "\t\terr = errors.New(`unsupported value: ` + " tagvar ".String())"
print "\t}"
print "\treturn err"
print "}"
print ""
# XXX: Consider changing the interface into an AppendTo/ConsumeFrom one,
# that would eliminate these type case switches entirely.
# On the other hand, it would make it possible to send unsuitable structs.
print "func (u *" gotype ") AppendTo(data []byte) ([]byte, bool) {"
print "\tok := true"
print "\tswitch union := u.Interface.(type) {"
print cg["serialize"] "\tdefault:"
print "\t\treturn nil, false"
print "\t}"
print "\treturn data, ok"
print "}"
print ""
CodegenSerialize[name] = \
"\tif data, ok = %s.AppendTo(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
print "func (u *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {"
print "\tok := true"
print "\tvar " tagvar " " CodegenGoType[cg["tagtype"]]
print sprintf(CodegenDeserialize[cg["tagtype"]], tagvar)
print "\tswitch " tagvar " {"
print cg["deserialize"] "\tdefault:"
print "\t\treturn nil, false"
print "\t}"
print "\treturn data, ok"
print "}"
print ""
CodegenDeserialize[name] = \
"\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
CodegenGoType[name] = gotype
for (i in cg)
delete cg[i]
}

226
tools/lxdrgen-mjs.awk Normal file
View File

@ -0,0 +1,226 @@
# lxdrgen-mjs.awk: Javascript backend for lxdrgen.awk.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# This backend is currently for decoding the binary format only.
# (JSON is way too expensive to process and transfer.)
#
# Import the resulting script as a Javascript module.
# Identifiers intentionally aren't prefixed.
function define_internal(name) {
Types[name] = "internal"
}
function define_sint(size, shortname) {
shortname = "i" size
define_internal(shortname)
CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n"
print ""
print "\t" shortname "() {"
if (size == "64") {
# XXX: 2^53 - 1 must be enough for anyone. BigInts are a PITA.
print "\t\tconst " shortname \
" = Number(this.getBigInt" size "(this.offset))"
} else {
print "\t\tconst " shortname " = this.getInt" size "(this.offset)"
}
print "\t\tthis.offset += " (size / 8)
print "\t\treturn " shortname
print "\t}"
}
function define_uint(size, shortname) {
shortname = "u" size
define_internal(shortname)
CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n"
print ""
print "\t" shortname "() {"
if (size == "64") {
# XXX: 2^53 - 1 must be enough for anyone. BigInts are a PITA.
print "\t\tconst " shortname \
" = Number(this.getBigUint" size "(this.offset))"
} else {
print "\t\tconst " shortname " = this.getUint" size "(this.offset)"
}
print "\t\tthis.offset += " (size / 8)
print "\t\treturn " shortname
print "\t}"
}
function codegen_begin() {
print "// Code generated from " FILENAME ". DO NOT EDIT."
print ""
print "export class Reader extends DataView {"
print "\tconstructor() {"
print "\t\tsuper(...arguments)"
print "\t\tthis.offset = 0"
print "\t\tthis.decoder = new TextDecoder('utf-8', {fatal: true})"
print "\t}"
print ""
print "\tget empty() {"
print "\t\treturn this.byteLength <= this.offset"
print "\t}"
print ""
print "\trequire(len) {"
print "\t\tif (this.byteLength - this.offset < len)"
print "\t\t\tthrow `Premature end of data`"
print "\t\treturn this.byteOffset + this.offset"
print "\t}"
define_internal("string")
CodegenDeserialize["string"] = "\t%s = r.string()\n"
print ""
print "\tstring() {"
print "\t\tconst len = this.getUint32(this.offset)"
print "\t\tthis.offset += 4"
print "\t\tconst array = new Uint8Array("
print "\t\t\tthis.buffer, this.require(len), len)"
print "\t\tthis.offset += len"
print "\t\treturn this.decoder.decode(array)"
print "\t}"
define_internal("bool")
CodegenDeserialize["bool"] = "\t%s = r.bool()\n"
print ""
print "\tbool() {"
print "\t\tconst u8 = this.getUint8(this.offset)"
print "\t\tthis.offset += 1"
print "\t\treturn u8 != 0"
print "\t}"
define_sint("8")
define_sint("16")
define_sint("32")
define_sint("64")
define_uint("8")
define_uint("16")
define_uint("32")
define_uint("64")
print "}"
}
function codegen_constant(name, value) {
print ""
print "export const " decapitalize(snaketocamel(name)) " = " value
}
function codegen_enum_value(name, subname, value, cg) {
append(cg, "fields", "\t" snaketocamel(subname) ": " value ",\n")
}
function codegen_enum(name, cg) {
print ""
print "export const " name " = Object.freeze({"
print cg["fields"] "})"
CodegenDeserialize[name] = "\t%s = r.i8()\n"
for (i in cg)
delete cg[i]
}
function codegen_struct_field(d, cg, camel, f, deserialize) {
camel = decapitalize(snaketocamel(d["name"]))
f = "s." camel
append(cg, "fields", "\t" camel "\n")
deserialize = CodegenDeserialize[d["type"]]
if (!d["isarray"]) {
append(cg, "deserialize", sprintf(deserialize, f))
return
}
append(cg, "deserialize",
"\t{\n" \
indent(sprintf(CodegenDeserialize["u32"], "const len")))
if (d["type"] == "u8") {
append(cg, "deserialize",
"\t\t" f " = new Uint8Array(\n" \
"\t\t\tr.buffer, r.require(len), len)\n" \
"\t\tr.offset += len\n" \
"\t}\n")
return
}
if (d["type"] == "i8") {
append(cg, "deserialize",
"\t\t" f " = new Int8Array(\n" \
"\t\t\tr.buffer, r.require(len), len)\n" \
"\t\tr.offset += len\n" \
"\t}\n")
return
}
append(cg, "deserialize",
"\t\t" f " = new Array(len)\n" \
"\t}\n" \
"\tfor (let i = 0; i < " f ".length; i++)\n" \
indent(sprintf(deserialize, f "[i]")))
}
function codegen_struct_tag(d, cg) {
append(cg, "fields", "\t" decapitalize(snaketocamel(d["name"])) "\n")
# Do not deserialize here, that is already done by the containing union.
}
function codegen_struct(name, cg) {
print ""
print "export class " name " {"
print cg["fields"] cg["methods"]
print "\tstatic deserialize(r) {"
print "\t\tconst s = new " name "()"
print indent(cg["deserialize"]) "\t\treturn s"
print "\t}"
print "}"
CodegenDeserialize[name] = "\t%s = " name ".deserialize(r)\n"
for (i in cg)
delete cg[i]
}
function codegen_union_tag(d, cg) {
cg["tagtype"] = d["type"]
cg["tagname"] = d["name"]
}
function codegen_union_struct(name, casename, cg, scg, structname) {
append(scg, "methods",
"\n" \
"\tconstructor() {\n" \
"\t\tthis." decapitalize(snaketocamel(cg["tagname"])) \
" = " cg["tagtype"] "." snaketocamel(casename) "\n" \
"\t}\n")
# And thus not all generated structs are present in Types.
structname = name snaketocamel(casename)
codegen_struct(structname, scg)
append(cg, "deserialize",
"\tcase " cg["tagtype"] "." snaketocamel(casename) ":\n" \
"\t{\n" \
indent(sprintf(CodegenDeserialize[structname], "const s")) \
"\t\treturn s\n" \
"\t}\n")
}
function codegen_union(name, cg, tagvar) {
tagvar = decapitalize(snaketocamel(cg["tagname"]))
print ""
print "export function deserialize" name "(r) {"
print sprintf(CodegenDeserialize[cg["tagtype"]], "const " tagvar) \
"\tswitch (" tagvar ") {"
print cg["deserialize"] "\tdefault:"
print "\t\tthrow `Unknown " cg["tagtype"] " (${tagvar})`"
print "\t}"
print "}"
CodegenDeserialize[name] = "\t%s = deserialize" name "(r)\n"
for (i in cg)
delete cg[i]
}

291
tools/lxdrgen.awk Normal file
View File

@ -0,0 +1,291 @@
# lxdrgen.awk: an XDR-derived code generator for network protocols.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Usage: env LC_ALL=C awk -f lxdrgen.awk -f lxdrgen-{c,go,mjs}.awk \
# -v PrefixCamel=Foo foo.lxdr > foo.{c,go,mjs} | {clang-format,gofmt,...}
# --- Utilities ----------------------------------------------------------------
function cameltosnake(s) {
while (match(s, /[[:lower:]][[:upper:]]/)) {
s = substr(s, 1, RSTART) "_" \
tolower(substr(s, RSTART + 1, RLENGTH - 1)) \
substr(s, RSTART + RLENGTH)
}
return tolower(s)
}
function snaketocamel(s) {
s = toupper(substr(s, 1, 1)) tolower(substr(s, 2))
while (match(s, /_[[:alnum:]]/)) {
s = substr(s, 1, RSTART - 1) \
toupper(substr(s, RSTART + 1, RLENGTH - 1)) \
substr(s, RSTART + RLENGTH)
}
return s
}
function decapitalize(s) {
if (match(s, /[[:upper:]][[:lower:]]/)) {
return tolower(substr(s, 1, 1)) substr(s, 2)
}
return s
}
function indent(s) {
if (!s)
return s
gsub(/\n/, "\n\t", s)
sub(/\t*$/, "", s)
return "\t" s
}
function append(a, key, value) {
a[key] = a[key] value
}
# --- Parsing ------------------------------------------------------------------
function fatal(message) {
print "// " FILENAME ":" FNR ": fatal error: " message
print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
exit 1
}
function skipcomment() {
do {
if (match($0, /[*]\//)) {
$0 = substr($0, RSTART + RLENGTH)
return
}
} while (getline > 0)
fatal("unterminated block comment")
}
function nexttoken() {
do {
if (match($0, /^[[:space:]]+/)) {
$0 = substr($0, RLENGTH + 1)
} else if (match($0, /^\/\/.*/)) {
$0 = ""
} else if (match($0, /^\/[*]/)) {
$0 = substr($0, RLENGTH + 1)
skipcomment()
} else if (match($0, /^[[:alpha:]][[:alnum:]_]*/)) {
Token = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return Token
# AWK implementations rarely support non-decimal notations
# in their implicit string-to-number conversions.
} else if (match($0, /^(0|-?[1-9][0-9]*)/)) {
Token = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return Token
} else if ($0) {
Token = substr($0, 1, 1)
$0 = substr($0, 2)
return Token
}
} while ($0 || getline > 0)
Token = ""
return Token
}
function expect(v) {
if (!v)
fatal("broken expectations at `" Token "' before `" $0 "'")
return v
}
function accept(what) {
if (Token != what)
return 0
nexttoken()
return 1
}
function identifier( v) {
if (Token !~ /^[[:alpha:]]/)
return 0
v = Token
nexttoken()
return v
}
function number( v) {
if (Token !~ /^(0|-?[1-9])/)
return 0
v = Token
nexttoken()
return v
}
function readnumber( ident) {
ident = identifier()
if (!ident)
return expect(number())
if (!(ident in Consts))
fatal("unknown constant: " ident)
return Consts[ident]
}
function defconst( ident, num) {
if (!accept("const"))
return 0
ident = expect(identifier())
expect(accept("="))
num = readnumber()
if (ident in Consts)
fatal("constant redefined: " ident)
Consts[ident] = num
codegen_constant(ident, num)
return 1
}
function readtype( ident) {
ident = deftype()
if (ident)
return ident
ident = identifier()
if (!ident)
return 0
if (!(ident in Types))
fatal("unknown type: " ident)
return ident
}
function defenum( name, ident, value, cg) {
delete cg[0]
name = expect(identifier())
expect(accept("{"))
while (!accept("}")) {
ident = expect(identifier())
value = value + 1
if (accept("="))
value = readnumber() + 0
if (!value)
fatal("enumeration values cannot be zero")
if (value < -128 || value > 127)
fatal("enumeration value out of range")
expect(accept(","))
append(EnumValues, name, SUBSEP ident)
if (EnumValues[name, ident]++)
fatal("duplicate enum value: " ident)
codegen_enum_value(name, ident, value, cg)
}
Types[name] = "enum"
codegen_enum(name, cg)
return name
}
function readfield(out, nonvoid) {
nonvoid = !accept("void")
if (nonvoid) {
out["type"] = expect(readtype())
out["name"] = expect(identifier())
# TODO: Consider supporting XDR's VLA length limits here.
# TODO: Consider supporting XDR's fixed-length syntax for string limits.
out["isarray"] = accept("<") && expect(accept(">"))
}
expect(accept(";"))
return nonvoid
}
function defstruct( name, d, cg) {
delete d[0]
delete cg[0]
name = expect(identifier())
expect(accept("{"))
while (!accept("}")) {
if (readfield(d))
codegen_struct_field(d, cg)
}
Types[name] = "struct"
codegen_struct(name, cg)
return name
}
function defunion( name, tag, tagtype, tagvalue, cg, scg, d, a, i, unseen) {
delete cg[0]
delete scg[0]
delete d[0]
name = expect(identifier())
expect(accept("switch"))
expect(accept("("))
tag["type"] = tagtype = expect(readtype())
tag["name"] = expect(identifier())
expect(accept(")"))
if (Types[tagtype] != "enum")
fatal("not an enum type: " tagtype)
codegen_union_tag(tag, cg)
split(EnumValues[tagtype], a, SUBSEP)
for (i in a)
unseen[a[i]]++
expect(accept("{"))
while (!accept("}")) {
if (accept("case")) {
if (tagvalue)
codegen_union_struct(name, tagvalue, cg, scg)
tagvalue = expect(identifier())
expect(accept(":"))
if (!unseen[tagvalue]--)
fatal("no such value or duplicate case: " tagtype "." tagvalue)
codegen_struct_tag(tag, scg)
} else if (tagvalue) {
if (readfield(d))
codegen_struct_field(d, scg)
} else {
fatal("union fields must fall under a case")
}
}
if (tagvalue)
codegen_union_struct(name, tagvalue, cg, scg)
# What remains non-zero in unseen[2..] is simply not recognized/allowed.
Types[name] = "union"
codegen_union(name, cg)
return name
}
function deftype() {
if (accept("enum"))
return defenum()
if (accept("struct"))
return defstruct()
if (accept("union"))
return defunion()
return 0
}
{
if (PrefixCamel) {
PrefixLower = tolower(cameltosnake(PrefixCamel)) "_"
PrefixUpper = toupper(cameltosnake(PrefixCamel)) "_"
}
# This is not in a BEGIN clause (even though it consumes all input),
# so that the code generator can insert the first FILENAME.
codegen_begin()
nexttoken()
while (Token != "") {
expect(defconst() || deftype())
expect(accept(";"))
}
}