liberty/tools/lxdrgen.awk
Přemysl Eric Janouch 717c301207
lxdrgen: fix decapitalization
decapitalize() is typically called on snaketocamel() output,
which always makes the first letter uppercase.
2023-06-28 16:24:59 +02:00

293 lines
6.2 KiB
Awk

# lxdrgen.awk: an XDR-derived code generator for network protocols.
#
# Copyright (c) 2022 - 2023, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Usage: env LC_ALL=C awk -f lxdrgen.awk -f lxdrgen-{c,go,mjs}.awk \
# -v PrefixCamel=Foo foo.lxdr > foo.{c,go,mjs} | {clang-format,gofmt,...}
# --- Utilities ----------------------------------------------------------------
function cameltosnake(s) {
while (match(s, /[[:lower:]][[:upper:]]/)) {
s = substr(s, 1, RSTART) "_" \
tolower(substr(s, RSTART + 1, RLENGTH - 1)) \
substr(s, RSTART + RLENGTH)
}
return tolower(s)
}
function snaketocamel(s) {
s = toupper(substr(s, 1, 1)) tolower(substr(s, 2))
while (match(s, /_[[:alnum:]]/)) {
s = substr(s, 1, RSTART - 1) \
toupper(substr(s, RSTART + 1, RLENGTH - 1)) \
substr(s, RSTART + RLENGTH)
}
return s
}
function decapitalize(s) {
if (match(s, /^[[:upper:]][[:lower:]]/))
return tolower(substr(s, 1, 1)) substr(s, 2)
if (match(s, /^[[:upper:]]$/))
return tolower(s)
return s
}
function indent(s) {
if (!s)
return s
gsub(/\n/, "\n\t", s)
sub(/\t*$/, "", s)
return "\t" s
}
function append(a, key, value) {
a[key] = a[key] value
}
# --- Parsing ------------------------------------------------------------------
function fatal(message) {
print "// " FILENAME ":" FNR ": fatal error: " message
print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
exit 1
}
function skipcomment() {
do {
if (match($0, /[*]\//)) {
$0 = substr($0, RSTART + RLENGTH)
return
}
} while (getline > 0)
fatal("unterminated block comment")
}
function nexttoken() {
do {
if (match($0, /^[[:space:]]+/)) {
$0 = substr($0, RLENGTH + 1)
} else if (match($0, /^\/\/.*/)) {
$0 = ""
} else if (match($0, /^\/[*]/)) {
$0 = substr($0, RLENGTH + 1)
skipcomment()
} else if (match($0, /^[[:alpha:]][[:alnum:]_]*/)) {
Token = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return Token
# AWK implementations rarely support non-decimal notations
# in their implicit string-to-number conversions.
} else if (match($0, /^(0|-?[1-9][0-9]*)/)) {
Token = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return Token
} else if ($0) {
Token = substr($0, 1, 1)
$0 = substr($0, 2)
return Token
}
} while ($0 || getline > 0)
Token = ""
return Token
}
function expect(v) {
if (!v)
fatal("broken expectations at `" Token "' before `" $0 "'")
return v
}
function accept(what) {
if (Token != what)
return 0
nexttoken()
return 1
}
function identifier( v) {
if (Token !~ /^[[:alpha:]]/)
return 0
v = Token
nexttoken()
return v
}
function number( v) {
if (Token !~ /^(0|-?[1-9])/)
return 0
v = Token
nexttoken()
return v
}
function readnumber( ident) {
ident = identifier()
if (!ident)
return expect(number())
if (!(ident in Consts))
fatal("unknown constant: " ident)
return Consts[ident]
}
function defconst( ident, num) {
if (!accept("const"))
return 0
ident = expect(identifier())
expect(accept("="))
num = readnumber()
if (ident in Consts)
fatal("constant redefined: " ident)
Consts[ident] = num
codegen_constant(ident, num)
return 1
}
function readtype( ident) {
ident = deftype()
if (ident)
return ident
ident = identifier()
if (!ident)
return 0
if (!(ident in Types))
fatal("unknown type: " ident)
return ident
}
function defenum( name, ident, value, cg) {
delete cg[0]
name = expect(identifier())
expect(accept("{"))
while (!accept("}")) {
ident = expect(identifier())
value = value + 1
if (accept("="))
value = readnumber() + 0
if (!value)
fatal("enumeration values cannot be zero")
if (value < -128 || value > 127)
fatal("enumeration value out of range")
expect(accept(","))
append(EnumValues, name, SUBSEP ident)
if (EnumValues[name, ident]++)
fatal("duplicate enum value: " ident)
codegen_enum_value(name, ident, value, cg)
}
Types[name] = "enum"
codegen_enum(name, cg)
return name
}
function readfield(out, nonvoid) {
nonvoid = !accept("void")
if (nonvoid) {
out["type"] = expect(readtype())
out["name"] = expect(identifier())
# TODO: Consider supporting XDR's VLA length limits here.
# TODO: Consider supporting XDR's fixed-length syntax for string limits.
out["isarray"] = accept("<") && expect(accept(">"))
}
expect(accept(";"))
return nonvoid
}
function defstruct( name, d, cg) {
delete d[0]
delete cg[0]
name = expect(identifier())
expect(accept("{"))
while (!accept("}")) {
if (readfield(d))
codegen_struct_field(d, cg)
}
Types[name] = "struct"
codegen_struct(name, cg)
return name
}
function defunion( name, tag, tagtype, tagvalue, cg, scg, d, a, i, unseen) {
delete cg[0]
delete scg[0]
delete d[0]
name = expect(identifier())
expect(accept("switch"))
expect(accept("("))
tag["type"] = tagtype = expect(readtype())
tag["name"] = expect(identifier())
expect(accept(")"))
if (Types[tagtype] != "enum")
fatal("not an enum type: " tagtype)
codegen_union_tag(tag, cg)
split(EnumValues[tagtype], a, SUBSEP)
for (i in a)
unseen[a[i]]++
expect(accept("{"))
while (!accept("}")) {
if (accept("case")) {
if (tagvalue)
codegen_union_struct(name, tagvalue, cg, scg)
tagvalue = expect(identifier())
expect(accept(":"))
if (!unseen[tagvalue]--)
fatal("no such value or duplicate case: " tagtype "." tagvalue)
codegen_struct_tag(tag, scg)
} else if (tagvalue) {
if (readfield(d))
codegen_struct_field(d, scg)
} else {
fatal("union fields must fall under a case")
}
}
if (tagvalue)
codegen_union_struct(name, tagvalue, cg, scg)
# What remains non-zero in unseen[2..] is simply not recognized/allowed.
Types[name] = "union"
codegen_union(name, cg)
return name
}
function deftype() {
if (accept("enum"))
return defenum()
if (accept("struct"))
return defstruct()
if (accept("union"))
return defunion()
return 0
}
{
if (PrefixCamel) {
PrefixLower = tolower(cameltosnake(PrefixCamel)) "_"
PrefixUpper = toupper(cameltosnake(PrefixCamel)) "_"
}
# This is not in a BEGIN clause (even though it consumes all input),
# so that the code generator can insert the first FILENAME.
codegen_begin()
nexttoken()
while (Token != "") {
expect(defconst() || deftype())
expect(accept(";"))
}
}