Compare commits

...

40 Commits

Author SHA1 Message Date
0f3ed14575 asciiman: support attribute passing 2022-09-30 18:13:01 +02:00
089593bb0f asciiman: render libertyxdr.adoc properly 2022-09-30 15:01:14 +02:00
035bfe5e81 Document the recently added scripts 2022-09-30 03:09:04 +02:00
ebbe7a1672 Import protocol code generator from xK, add tests
Also add a VIM syntax highlighting file.

This also fixes some previously untriggered bugs.
2022-09-30 03:06:36 +02:00
4c3837ae2c cmake-parser: clean up 2022-09-28 00:06:51 +02:00
af2756ee01 Add a rudimentary CMake script parser 2022-09-27 23:27:06 +02:00
688c458095 asciiman: fix the first line of output 2022-09-25 21:11:45 +02:00
34460ca715 asciiman: improve command escaping 2022-09-25 20:55:51 +02:00
9883caf849 Add a stupid AsciiDoc to manual page converter
Most of my projects that could need it make use of liberty.
2022-09-25 20:07:10 +02:00
22a121383f Allow for overriding feature test macros 2022-09-11 00:44:14 +02:00
0e89bb9f46 Add some consts to function arguments 2022-09-01 12:44:58 +02:00
63aed8f0fd Fix up the PEG change from the last commit
This is not a regular expression.
2022-08-14 19:09:52 +02:00
f545be725d Extend string syntax in config
And actually test the results of string parsing.
2022-08-14 18:14:21 +02:00
7e8e085c97 Remove pointless, wrong constant
C99 allows trailing commas.
2021-12-18 00:25:13 +01:00
782a9a5977 Import libpulse poller integration, add tests 2021-11-07 15:37:21 +01:00
34f86651f6 Update .gitignore 2021-10-30 03:31:08 +02:00
5dec46df2c Add clang-format configuration, clean up 2021-10-30 03:10:17 +02:00
1b9d89cab3 Use kqueue on Darwin as well
Since poll() is implemented in terms of kqueue() there,
it doesn't seem like this could have improved anything.

Besides man 3 ev, libevent code, and [1],
I haven't managed to find much relevant information.

[1] https://daniel.haxx.se/blog/2016/10/11/poll-on-mac-10-12-is-broken/
2021-09-29 12:07:25 +02:00
a3ad5e7751 Ignore empty XDG_*_DIRS env. variables
As the specification says we should.  GLib does this as well.

It is still possible to achieve an empty set by using ":",
which are two non-absolute paths that should be ignored.
GLib doesn't implement this.  Thus, we're now better than GLib.
2021-09-26 08:49:51 +02:00
960420df3e Escape DEL character in config_item_write_string() 2020-10-31 21:28:29 +01:00
d71c47f8ce CMakeLists.txt: omit end{if,foreach} expressions
Their usefulness was almost negative.
2020-10-29 15:32:26 +01:00
425ea57b17 CMakeLists.txt: clean up OpenBSD support
A few things might have changed.
2020-10-29 15:31:05 +01:00
8822d06091 Don't suppress -Wimplicit-fallthrough
Might have already been resolved by: 9494e8e da75b6f
2020-10-26 18:25:32 +01:00
9639777814 Fix validation of overlong UTF-8
It was too strict and Egyptian dicks didn't want to pass,
so we'll do it half-arsedly for a subset.
2020-10-24 19:09:09 +02:00
929229a1d7 Fix config PEG grammar to match strtoll() 2020-10-24 08:05:17 +02:00
53bcebc2f0 Split out utf8_validate_cp(), adhere to RFC 3629 2020-10-21 05:20:20 +02:00
b08cf6c29f Reject overlong UTF-8 sequences 2020-10-21 05:08:59 +02:00
69101eb155 Fix optional arguments in --help output
An equals sign is necessary.
2020-10-13 21:27:46 +02:00
9d14562f7e Improve the UTF-8 API
We need to be able to detect partial sequences.
2020-10-12 22:56:22 +02:00
9b72304963 Fix a memory leak in mpd_client_parse_line() 2020-10-12 02:07:15 +02:00
1cd9ba8d97 Import configuration test from degesch 2020-10-12 02:07:15 +02:00
7e5b6c5343 Fix crashes in the config parser
It had a duality between not requiring null-terminated input
and relying on it, depending on where you looked.
2020-10-12 02:07:14 +02:00
c2c5031538 Add remaining fuzzing entry points
Closes #1
2020-10-12 02:07:07 +02:00
df3f53bd5c Add a basic fuzzing framework using libFuzzer
Updates #1
2020-10-11 20:04:34 +02:00
e029aae1d3 Import xwrite(), cstr_set(), resolve_..._template()
From degesch and json-rpc-shell.
2020-10-10 04:31:52 +02:00
b9457c321f Rename cstr_transform() argument
It does not always have to be tolower().
2020-10-10 04:30:19 +02:00
2201becca4 Mark some issues 2020-10-10 04:29:41 +02:00
7023c51347 Get rid of CMake dev warnings 2020-10-02 06:47:34 +02:00
d21f8466b5 Bump copyright years 2020-10-02 06:43:16 +02:00
7f919025ee Add iscntrl_ascii()
It's too easy to miss the DEL character.
2020-10-02 06:31:46 +02:00
27 changed files with 3405 additions and 109 deletions

32
.clang-format Normal file
View File

@@ -0,0 +1,32 @@
# clang-format is fairly limited, and these rules are approximate:
# - array initializers can get terribly mangled with clang-format 12.0,
# - sometimes it still aligns with space characters,
# - struct name NL { NL ... NL } NL name; is unachievable.
BasedOnStyle: GNU
ColumnLimit: 80
IndentWidth: 4
TabWidth: 4
UseTab: ForContinuationAndIndentation
BreakBeforeBraces: Allman
SpaceAfterCStyleCast: true
AlignAfterOpenBracket: DontAlign
AlignOperands: DontAlign
AlignConsecutiveMacros: Consecutive
AllowAllArgumentsOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
IndentGotoLabels: false
# IncludeCategories has some potential, but it may also break the build.
# Note that the documentation says the value should be "Never".
SortIncludes: false
# This is a compromise, it generally works out aesthetically better.
BinPackArguments: false
# Unfortunately, this can't be told to align to column 40 or so.
SpacesBeforeTrailingComments: 2
# liberty-specific macro body wrappers.
MacroBlockBegin: "BLOCK_START"
MacroBlockEnd: "BLOCK_END"
ForEachMacros: ["LIST_FOR_EACH"]

2
.gitignore vendored
View File

@@ -7,3 +7,5 @@
/liberty.files
/liberty.creator*
/liberty.includes
/liberty.cflags
/liberty.cxxflags

View File

@@ -1,12 +1,12 @@
project (liberty C)
cmake_minimum_required (VERSION 2.8.5)
cmake_minimum_required (VERSION 2.8.12)
# Moar warnings
if ("${CMAKE_C_COMPILER_ID}" MATCHES "GNU" OR CMAKE_COMPILER_IS_GNUCC)
# -Wunused-function is pretty annoying here, as everything is static
set (wdisabled "-Wno-unused-function -Wno-implicit-fallthrough")
set (wdisabled "-Wno-unused-function")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99 -Wall -Wextra ${wdisabled}")
endif ("${CMAKE_C_COMPILER_ID}" MATCHES "GNU" OR CMAKE_COMPILER_IS_GNUCC)
endif ()
# Dependencies
set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@@ -16,11 +16,9 @@ find_package (PkgConfig REQUIRED)
pkg_check_modules (libssl REQUIRED libssl libcrypto)
if ("${CMAKE_SYSTEM_NAME}" MATCHES "BSD")
include_directories (/usr/local/include)
link_directories (/usr/local/lib)
# Our POSIX version macros make these undefined
add_definitions (-D__BSD_VISIBLE=1 -D_BSD_SOURCE=1)
endif ("${CMAKE_SYSTEM_NAME}" MATCHES "BSD")
endif ()
set (common_libraries ${libssl_LIBRARIES})
include_directories (${libssl_INCLUDE_DIRS})
@@ -32,15 +30,81 @@ foreach (extra iconv rt)
find_library (extra_lib_${extra} ${extra})
if (extra_lib_${extra})
list (APPEND common_libraries ${extra})
endif (extra_lib_${extra})
endforeach (extra)
endif ()
endforeach ()
# Build some unit tests
include_directories (${PROJECT_SOURCE_DIR})
enable_testing ()
foreach (name liberty proto)
set (tests liberty proto)
pkg_check_modules (libpulse libpulse)
if (libpulse_FOUND)
list (APPEND tests pulse)
list (APPEND common_libraries ${libpulse_LIBRARIES})
include_directories (${libpulse_INCLUDE_DIRS})
link_directories (${libpulse_LIBRARY_DIRS})
endif ()
foreach (name ${tests})
add_executable (test-${name} tests/${name}.c ${common_sources})
add_threads (test-${name})
target_link_libraries (test-${name} ${common_libraries})
add_test (NAME test-${name} COMMAND test-${name})
endforeach (name)
endforeach ()
# --- Tools --------------------------------------------------------------------
# Test the AsciiDoc manual page generator for a successful parse
set (ASCIIMAN ${PROJECT_SOURCE_DIR}/tools/asciiman.awk)
add_custom_command (OUTPUT libertyxdr.7
COMMAND env LC_ALL=C awk -f ${ASCIIMAN}
"${PROJECT_SOURCE_DIR}/libertyxdr.adoc" > libertyxdr.7
DEPENDS libertyxdr.adoc ${ASCIIMAN}
COMMENT "Generating man page for libertyxdr" VERBATIM)
add_custom_target (docs ALL DEPENDS libertyxdr.7)
# Test CMake script parsing
add_test (test-cmake-parser
env LC_ALL=C awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-parser.awk
-f ${PROJECT_SOURCE_DIR}/tools/cmake-dump.awk ${CMAKE_CURRENT_LIST_FILE})
# Test protocol code generation
set (lxdrgen_outputs)
set (lxdrgen_base "${PROJECT_BINARY_DIR}/lxdrgen.lxdr")
foreach (backend c go mjs)
list (APPEND lxdrgen_outputs ${lxdrgen_base}.${backend})
add_custom_command (OUTPUT ${lxdrgen_base}.${backend}
COMMAND env LC_ALL=C awk
-f ${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk
-f ${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk
-v PrefixCamel=ProtoGen
${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr
> ${lxdrgen_base}.${backend}
DEPENDS
${PROJECT_SOURCE_DIR}/tools/lxdrgen.awk
${PROJECT_SOURCE_DIR}/tools/lxdrgen-${backend}.awk
${PROJECT_SOURCE_DIR}/tests/lxdrgen.lxdr
COMMENT "Generating test protocol code (${backend})" VERBATIM)
endforeach ()
add_custom_target (test-lxdrgen-outputs ALL DEPENDS ${lxdrgen_outputs})
set_source_files_properties (${lxdrgen_base}.c
PROPERTIES HEADER_FILE_ONLY TRUE)
add_executable (test-lxdrgen tests/lxdrgen.c ${lxdrgen_base}.c)
target_include_directories (test-lxdrgen PUBLIC ${PROJECT_BINARY_DIR})
add_test (NAME test-lxdrgen-c COMMAND test-lxdrgen)
find_program (GO_EXECUTABLE go)
if (GO_EXECUTABLE)
add_test (test-lxdrgen-go ${GO_EXECUTABLE} vet ${lxdrgen_base}.go)
else ()
message (WARNING "Cannot test generated protocol code for Go")
endif ()
find_program (NODE_EXECUTABLE node)
if (NODE_EXECUTABLE)
add_test (test-lxdrgen-mjs ${NODE_EXECUTABLE} -c ${lxdrgen_base}.mjs)
else ()
message (WARNING "Cannot test generated protocol code for Javascript")
endif ()

View File

@@ -1,4 +1,4 @@
Copyright (c) 2014 - 2018, Přemysl Eric Janouch <p@janouch.name>
Copyright (c) 2014 - 2022, Přemysl Eric Janouch <p@janouch.name>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.

View File

@@ -17,6 +17,42 @@ All development is done on Linux, but other POSIX-compatible operating systems
should be supported as well. They have an extremely low priority, however, and
I'm not testing them at all, with the exception of OpenBSD.
Tools
-----
This project also hosts a number of supporting scripts written in portable AWK:
asciiman.awk::
A fallback manual page generator for AsciiDoc documents,
motivated by the hugeness of AsciiDoc's and Asciidoctor's dependency trees.
It uses the _man_ macro package.
cmake-parser.awk::
Parses the CMake language to the extent that is necessary to reliably
extract project versions. Its greatest limitation is its inability
to expand variables, which would require a full interpreter.
cmake-dump.awk::
This can be used in conjunction with the previous script to dump CMake
scripts in a normalized format for further processing.
lxdrgen.awk::
Protocol code generator for a variant of XDR,
which is link:libertyxdr.adoc[documented separately].
Successfully employed in https://git.janouch.name/p/xK[xK].
lxdrgen-c.awk::
LibertyXDR backend that builds on top of the C pseudolibrary.
lxdrgen-go.awk::
LibertyXDR backend for Go, supporting _encoding/json_ interfaces. It also
produces optimized JSON marshallers (however, note that the _json.Marshaler_
interface is bound to be underperforming, due to the amount of otherwise
avoidable memory allocations it necessitates).
lxdrgen-mjs.awk::
LibertyXDR backend for Javascript, currently for decoding only.
It cuts a corner by not using BigInts, on par with `JSON.parse()`.
Contributing and Support
------------------------
Use https://git.janouch.name/p/liberty to report any bugs, request features,

View File

@@ -1,17 +1,17 @@
# Public Domain
find_package (PkgConfig REQUIRED)
pkg_check_modules (NCURSESW QUIET ncursesw)
pkg_check_modules (Ncursesw QUIET ncursesw)
# OpenBSD doesn't provide a pkg-config file
set (required_vars NCURSESW_LIBRARIES)
if (NOT NCURSESW_FOUND)
find_library (NCURSESW_LIBRARIES NAMES ncursesw)
find_path (NCURSESW_INCLUDE_DIRS ncurses.h)
list (APPEND required_vars NCURSESW_INCLUDE_DIRS)
endif (NOT NCURSESW_FOUND)
set (required_vars Ncursesw_LIBRARIES)
if (NOT Ncursesw_FOUND)
find_library (Ncursesw_LIBRARIES NAMES ncursesw)
find_path (Ncursesw_INCLUDE_DIRS ncurses.h)
list (APPEND required_vars Ncursesw_INCLUDE_DIRS)
endif (NOT Ncursesw_FOUND)
include (FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS (NCURSESW DEFAULT_MSG ${required_vars})
FIND_PACKAGE_HANDLE_STANDARD_ARGS (Ncursesw DEFAULT_MSG ${required_vars})
mark_as_advanced (NCURSESW_LIBRARIES NCURSESW_INCLUDE_DIRS)
mark_as_advanced (Ncursesw_LIBRARIES Ncursesw_INCLUDE_DIRS)

View File

@@ -1,10 +1,10 @@
# Public Domain
find_path (UNISTRING_INCLUDE_DIRS unistr.h)
find_library (UNISTRING_LIBRARIES NAMES unistring libunistring)
find_path (Unistring_INCLUDE_DIRS unistr.h)
find_library (Unistring_LIBRARIES NAMES unistring libunistring)
include (FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS (UNISTRING DEFAULT_MSG
UNISTRING_INCLUDE_DIRS UNISTRING_LIBRARIES)
FIND_PACKAGE_HANDLE_STANDARD_ARGS (Unistring DEFAULT_MSG
Unistring_INCLUDE_DIRS Unistring_LIBRARIES)
mark_as_advanced (UNISTRING_LIBRARIES UNISTRING_INCLUDE_DIRS)
mark_as_advanced (Unistring_LIBRARIES Unistring_INCLUDE_DIRS)

18
fuzz Executable file
View File

@@ -0,0 +1,18 @@
#!/bin/sh
# I'm not sure how to make maximum use of this invention
# Make sure to have llvm-symbolizer installed
clang -g -fsanitize=address,undefined,fuzzer -fno-sanitize-recover=all \
tests/fuzz.c -o fuzz-executor
fuzz () {
echo "`tput bold`-- Fuzzing $1`tput sgr0`"
mkdir -p /tmp/corpus-$1
./fuzz-executor -test=$1 -artifact_prefix=$1- \
-max_total_time=600 -timeout=1 /tmp/corpus-$1
}
if [ $# -gt 0 ]; then
for test in "$@"; do fuzz $test; done
else
for test in $(./fuzz-executor); do fuzz $test; done
fi

View File

@@ -148,7 +148,7 @@ irc_free_message (struct irc_message *msg)
static void
irc_process_buffer (struct str *buf,
void (*callback)(const struct irc_message *, const char *, void *),
void (*callback) (const struct irc_message *, const char *, void *),
void *user_data)
{
char *start = buf->str, *end = start + buf->len;
@@ -1321,7 +1321,6 @@ enum mpd_subsystem
#define XX(a, b, c) MPD_SUBSYSTEM_ ## a = (1 << b),
MPD_SUBSYSTEM_TABLE (XX)
#undef XX
MPD_SUBSYSTEM_MAX
};
static const char *mpd_subsystem_names[] =
@@ -1567,13 +1566,12 @@ mpd_client_parse_line (struct mpd_client *self, const char *line)
if (!strcmp (line, "list_OK"))
strv_append_owned (&self->data, NULL);
else if (mpd_client_parse_response (line, &response))
{
mpd_client_dispatch (self, &response);
free (response.current_command);
free (response.message_text);
}
else
strv_append (&self->data, line);
free (response.current_command);
free (response.message_text);
return true;
}

348
liberty-pulse.c Normal file
View File

@@ -0,0 +1,348 @@
/*
* liberty-pulse.c: PulseAudio mainloop abstraction
*
* Copyright (c) 2016 - 2021, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include <pulse/mainloop.h>
// --- PulseAudio mainloop abstraction -----------------------------------------
struct pa_io_event
{
LIST_HEADER (pa_io_event)
pa_mainloop_api *api; ///< Parent structure
struct poller_fd fd; ///< Underlying FD event
pa_io_event_cb_t dispatch; ///< Dispatcher
pa_io_event_destroy_cb_t free; ///< Destroyer
void *user_data; ///< User data
};
struct pa_time_event
{
LIST_HEADER (pa_time_event)
pa_mainloop_api *api; ///< Parent structure
struct poller_timer timer; ///< Underlying timer event
pa_time_event_cb_t dispatch; ///< Dispatcher
pa_time_event_destroy_cb_t free; ///< Destroyer
void *user_data; ///< User data
};
struct pa_defer_event
{
LIST_HEADER (pa_defer_event)
pa_mainloop_api *api; ///< Parent structure
struct poller_idle idle; ///< Underlying idle event
pa_defer_event_cb_t dispatch; ///< Dispatcher
pa_defer_event_destroy_cb_t free; ///< Destroyer
void *user_data; ///< User data
};
struct poller_pa
{
struct poller *poller; ///< The underlying event loop
pa_io_event *io_list; ///< I/O events
pa_time_event *time_list; ///< Timer events
pa_defer_event *defer_list; ///< Deferred events
};
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static short
poller_pa_flags_to_events (pa_io_event_flags_t flags)
{
short result = 0;
if (flags & PA_IO_EVENT_ERROR) result |= POLLERR;
if (flags & PA_IO_EVENT_HANGUP) result |= POLLHUP;
if (flags & PA_IO_EVENT_INPUT) result |= POLLIN;
if (flags & PA_IO_EVENT_OUTPUT) result |= POLLOUT;
return result;
}
static pa_io_event_flags_t
poller_pa_events_to_flags (short events)
{
pa_io_event_flags_t result = 0;
if (events & POLLERR) result |= PA_IO_EVENT_ERROR;
if (events & POLLHUP) result |= PA_IO_EVENT_HANGUP;
if (events & POLLIN) result |= PA_IO_EVENT_INPUT;
if (events & POLLOUT) result |= PA_IO_EVENT_OUTPUT;
return result;
}
static struct timeval
poller_pa_get_current_time (void)
{
struct timeval tv;
#ifdef _POSIX_TIMERS
struct timespec tp;
hard_assert (clock_gettime (CLOCK_REALTIME, &tp) != -1);
tv.tv_sec = tp.tv_sec;
tv.tv_usec = tp.tv_nsec / 1000;
#else
gettimeofday (&tv, NULL);
#endif
return tv;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static void
poller_pa_io_dispatcher (const struct pollfd *pfd, void *user_data)
{
pa_io_event *self = user_data;
self->dispatch (self->api, self,
pfd->fd, poller_pa_events_to_flags (pfd->revents), self->user_data);
}
static void
poller_pa_io_enable (pa_io_event *self, pa_io_event_flags_t events)
{
struct poller_fd *fd = &self->fd;
if (events)
poller_fd_set (fd, poller_pa_flags_to_events (events));
else
poller_fd_reset (fd);
}
static pa_io_event *
poller_pa_io_new (pa_mainloop_api *api, int fd_, pa_io_event_flags_t events,
pa_io_event_cb_t cb, void *userdata)
{
pa_io_event *self = xcalloc (1, sizeof *self);
self->api = api;
self->dispatch = cb;
self->user_data = userdata;
struct poller_pa *data = api->userdata;
self->fd = poller_fd_make (data->poller, fd_);
self->fd.user_data = self;
self->fd.dispatcher = poller_pa_io_dispatcher;
// FIXME: under x2go PA tries to register twice for the same FD,
// which fails with our curent poller implementation;
// we could maintain a list of { poller_fd, listeners } structures;
// or maybe we're doing something wrong, which is yet to be determined
poller_pa_io_enable (self, events);
LIST_PREPEND (data->io_list, self);
return self;
}
static void
poller_pa_io_free (pa_io_event *self)
{
if (self->free)
self->free (self->api, self, self->user_data);
struct poller_pa *data = self->api->userdata;
poller_fd_reset (&self->fd);
LIST_UNLINK (data->io_list, self);
free (self);
}
static void
poller_pa_io_set_destroy (pa_io_event *self, pa_io_event_destroy_cb_t cb)
{
self->free = cb;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static void
poller_pa_time_dispatcher (void *user_data)
{
pa_time_event *self = user_data;
// XXX: the meaning of the time argument is undocumented,
// so let's just put current Unix time in there
struct timeval now = poller_pa_get_current_time ();
self->dispatch (self->api, self, &now, self->user_data);
}
static void
poller_pa_time_restart (pa_time_event *self, const struct timeval *tv)
{
struct poller_timer *timer = &self->timer;
if (tv)
{
struct timeval now = poller_pa_get_current_time ();
poller_timer_set (timer,
(tv->tv_sec - now.tv_sec) * 1000 +
(tv->tv_usec - now.tv_usec) / 1000);
}
else
poller_timer_reset (timer);
}
static pa_time_event *
poller_pa_time_new (pa_mainloop_api *api, const struct timeval *tv,
pa_time_event_cb_t cb, void *userdata)
{
pa_time_event *self = xcalloc (1, sizeof *self);
self->api = api;
self->dispatch = cb;
self->user_data = userdata;
struct poller_pa *data = api->userdata;
self->timer = poller_timer_make (data->poller);
self->timer.user_data = self;
self->timer.dispatcher = poller_pa_time_dispatcher;
poller_pa_time_restart (self, tv);
LIST_PREPEND (data->time_list, self);
return self;
}
static void
poller_pa_time_free (pa_time_event *self)
{
if (self->free)
self->free (self->api, self, self->user_data);
struct poller_pa *data = self->api->userdata;
poller_timer_reset (&self->timer);
LIST_UNLINK (data->time_list, self);
free (self);
}
static void
poller_pa_time_set_destroy (pa_time_event *self, pa_time_event_destroy_cb_t cb)
{
self->free = cb;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static void
poller_pa_defer_dispatcher (void *user_data)
{
pa_defer_event *self = user_data;
self->dispatch (self->api, self, self->user_data);
}
static pa_defer_event *
poller_pa_defer_new (pa_mainloop_api *api,
pa_defer_event_cb_t cb, void *userdata)
{
pa_defer_event *self = xcalloc (1, sizeof *self);
self->api = api;
self->dispatch = cb;
self->user_data = userdata;
struct poller_pa *data = api->userdata;
self->idle = poller_idle_make (data->poller);
self->idle.user_data = self;
self->idle.dispatcher = poller_pa_defer_dispatcher;
poller_idle_set (&self->idle);
LIST_PREPEND (data->defer_list, self);
return self;
}
static void
poller_pa_defer_enable (pa_defer_event *self, int enable)
{
struct poller_idle *idle = &self->idle;
if (enable)
poller_idle_set (idle);
else
poller_idle_reset (idle);
}
static void
poller_pa_defer_free (pa_defer_event *self)
{
if (self->free)
self->free (self->api, self, self->user_data);
struct poller_pa *data = self->api->userdata;
poller_idle_reset (&self->idle);
LIST_UNLINK (data->defer_list, self);
free (self);
}
static void
poller_pa_defer_set_destroy (pa_defer_event *self,
pa_defer_event_destroy_cb_t cb)
{
self->free = cb;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static void
poller_pa_quit (pa_mainloop_api *api, int retval)
{
(void) api;
(void) retval;
// This is not called from within libpulse
hard_assert (!"quitting the libpulse event loop is unimplemented");
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static struct pa_mainloop_api g_poller_pa_template =
{
.io_new = poller_pa_io_new,
.io_enable = poller_pa_io_enable,
.io_free = poller_pa_io_free,
.io_set_destroy = poller_pa_io_set_destroy,
.time_new = poller_pa_time_new,
.time_restart = poller_pa_time_restart,
.time_free = poller_pa_time_free,
.time_set_destroy = poller_pa_time_set_destroy,
.defer_new = poller_pa_defer_new,
.defer_enable = poller_pa_defer_enable,
.defer_free = poller_pa_defer_free,
.defer_set_destroy = poller_pa_defer_set_destroy,
.quit = poller_pa_quit,
};
static struct pa_mainloop_api *
poller_pa_new (struct poller *self)
{
struct poller_pa *data = xcalloc (1, sizeof *data);
data->poller = self;
struct pa_mainloop_api *api = xmalloc (sizeof *api);
*api = g_poller_pa_template;
api->userdata = data;
return api;
}
static void
poller_pa_destroy (struct pa_mainloop_api *api)
{
struct poller_pa *data = api->userdata;
LIST_FOR_EACH (pa_io_event, iter, data->io_list)
poller_pa_io_free (iter);
LIST_FOR_EACH (pa_time_event, iter, data->time_list)
poller_pa_time_free (iter);
LIST_FOR_EACH (pa_defer_event, iter, data->defer_list)
poller_pa_defer_free (iter);
free (data);
free (api);
}

238
liberty.c
View File

@@ -1,7 +1,7 @@
/*
* liberty.c: the ultimate C unlibrary
*
* Copyright (c) 2014 - 2018, Přemysl Eric Janouch <p@janouch.name>
* Copyright (c) 2014 - 2022, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@@ -16,8 +16,13 @@
*
*/
#define _POSIX_C_SOURCE 199309L
#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200112L
#endif
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif
#include <stdio.h>
#include <stddef.h>
@@ -731,6 +736,21 @@ set_blocking (int fd, bool blocking)
return prev;
}
static bool
xwrite (int fd, const char *data, size_t len, struct error **e)
{
size_t written = 0;
while (written < len)
{
ssize_t res = write (fd, data + written, len - written);
if (res >= 0)
written += res;
else if (errno != EINTR)
return error_set (e, "%s", strerror (errno));
}
return true;
}
static void
xclose (int fd)
{
@@ -1357,6 +1377,7 @@ struct poller_idle
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// The heap could definitely be made faster but we'll prefer simplicity
struct poller_timers
{
struct poller_timer **heap; ///< Min-heap of timers
@@ -1733,10 +1754,9 @@ poller_run (struct poller *self)
self->revents_len = 0;
}
#elif defined (BSD)
// Mac OS X's kqueue is fatally broken, or so I've been told; leaving it out.
// Otherwise this is sort of similar to the epoll version.
// Sort of similar to the epoll version. Let's hope Darwin isn't broken,
// that'd mean reimplementing this in terms of select() just because of Crapple.
#elif defined (BSD) || defined (__APPLE__)
#include <sys/types.h>
#include <sys/event.h>
@@ -2435,7 +2455,7 @@ write_queue_processed (struct write_queue *self, size_t len)
}
static bool
write_queue_is_empty (struct write_queue *self)
write_queue_is_empty (const struct write_queue *self)
{
return self->head == NULL;
}
@@ -2530,7 +2550,7 @@ msg_unpacker_make (const void *data, size_t len)
}
static size_t
msg_unpacker_get_available (struct msg_unpacker *self)
msg_unpacker_get_available (const struct msg_unpacker *self)
{
return self->len - self->offset;
}
@@ -2672,6 +2692,12 @@ strncasecmp_ascii (const char *a, const char *b, size_t n)
return 0;
}
static bool
iscntrl_ascii (int c)
{
return (c >= 0 && c < 32) || c == 0x7f;
}
static bool
isalpha_ascii (int c)
{
@@ -2700,63 +2726,67 @@ isspace_ascii (int c)
// --- UTF-8 -------------------------------------------------------------------
/// Return a pointer to the next UTF-8 character, or NULL on error
static const char *
utf8_next (const char *s, size_t len, int32_t *codepoint)
/// Return the value of the UTF-8 character at `*s` and advance the pointer
/// to the next one. Returns -2 if there is only a partial but possibly valid
/// character sequence, or -1 on other errors. Either way, `*s` is untouched.
static int32_t
utf8_decode (const char **s, size_t len)
{
// End of string, we go no further
if (!len)
return NULL;
return -1;
// Find out how long the sequence is (0 for ASCII)
unsigned mask = 0x80;
unsigned sequence_len = 0;
const uint8_t *p = (const uint8_t *) s;
const uint8_t *p = (const uint8_t *) *s, *end = p + len;
while ((*p & mask) == mask)
{
// Invalid start of sequence
if (mask == 0xFE)
return NULL;
return -1;
mask |= mask >> 1;
sequence_len++;
}
// In the middle of a character or the input is too short
if (sequence_len == 1 || sequence_len > len)
return NULL;
// In the middle of a character
// or an overlong sequence (subset, possibly MUTF-8, not supported)
if (sequence_len == 1 || *p == 0xC0 || *p == 0xC1)
return -1;
// Check the rest of the sequence
uint32_t cp = *p++ & ~mask;
while (sequence_len && --sequence_len)
{
if (p == end)
return -2;
if ((*p & 0xC0) != 0x80)
return NULL;
return -1;
cp = cp << 6 | (*p++ & 0x3F);
}
if (codepoint)
*codepoint = cp;
return (const char *) p;
*s = (const char *) p;
return cp;
}
static inline bool
utf8_validate_cp (int32_t cp)
{
// RFC 3629, CESU-8 not allowed
return cp >= 0 && cp <= 0x10FFFF && (cp < 0xD800 || cp > 0xDFFF);
}
/// Very rough UTF-8 validation, just makes sure codepoints can be iterated
static bool
utf8_validate (const char *s, size_t len)
{
const char *next;
while (len)
{
const char *end = s + len;
int32_t codepoint;
// TODO: better validations
if (!(next = utf8_next (s, len, &codepoint))
|| codepoint > 0x10FFFF)
return false;
len -= next - s;
s = next;
}
return true;
while ((codepoint = utf8_decode (&s, end - s)) >= 0
&& utf8_validate_cp (codepoint))
;
return s == end;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -2780,12 +2810,12 @@ utf8_iter_next (struct utf8_iter *self, size_t *len)
return -1;
const char *old = self->s;
int32_t codepoint;
if (!soft_assert ((self->s = utf8_next (old, self->len, &codepoint))))
int32_t codepoint = utf8_decode (&self->s, self->len);
if (!soft_assert (codepoint >= 0))
{
// Invalid UTF-8
self->len = 0;
return -1;
return codepoint;
}
size_t advance = self->s - old;
@@ -2917,6 +2947,13 @@ base64_encode (const void *data, size_t len, struct str *output)
// --- Utilities ---------------------------------------------------------------
static void
cstr_set (char **s, char *new)
{
free (*s);
*s = new;
}
static void
cstr_split (const char *s, const char *delimiters, bool ignore_empty,
struct strv *out)
@@ -2946,10 +2983,10 @@ cstr_strip_in_place (char *s, const char *stripped_chars)
}
static void
cstr_transform (char *s, int (*tolower) (int c))
cstr_transform (char *s, int (*xform) (int c))
{
for (; *s; s++)
*s = tolower (*s);
*s = xform (*s);
}
static char *
@@ -2996,6 +3033,7 @@ iconv_xstrdup (iconv_t conv, char *in, size_t in_len, size_t *out_len)
char *in_ptr = in;
if (in_len == (size_t) -1)
// XXX: out_len will be one character longer than the string!
in_len = strlen (in) + 1;
while (iconv (conv, (char **) &in_ptr, &in_len,
@@ -3213,7 +3251,7 @@ get_xdg_config_dirs (struct strv *out)
str_free (&config_home);
const char *xdg_config_dirs;
if (!(xdg_config_dirs = getenv ("XDG_CONFIG_DIRS")))
if (!(xdg_config_dirs = getenv ("XDG_CONFIG_DIRS")) || !*xdg_config_dirs)
xdg_config_dirs = "/etc/xdg";
cstr_split (xdg_config_dirs, ":", true, out);
}
@@ -3238,7 +3276,7 @@ get_xdg_data_dirs (struct strv *out)
str_free (&data_home);
const char *xdg_data_dirs;
if (!(xdg_data_dirs = getenv ("XDG_DATA_DIRS")))
if (!(xdg_data_dirs = getenv ("XDG_DATA_DIRS")) || !*xdg_data_dirs)
xdg_data_dirs = "/usr/local/share/:/usr/share/";
cstr_split (xdg_data_dirs, ":", true, out);
}
@@ -3255,16 +3293,8 @@ resolve_relative_data_filename (const char *filename)
}
static char *
resolve_relative_runtime_filename (const char *filename)
resolve_relative_runtime_filename_finish (struct str path)
{
struct str path = str_make ();
const char *runtime_dir = getenv ("XDG_RUNTIME_DIR");
if (runtime_dir && *runtime_dir == '/')
str_append (&path, runtime_dir);
else
get_xdg_home_dir (&path, "XDG_DATA_HOME", ".local/share");
str_append_printf (&path, "/%s/%s", PROGRAM_NAME, filename);
// Try to create the file's ancestors;
// typically the user will want to immediately create a file in there
const char *last_slash = strrchr (path.str, '/');
@@ -3277,6 +3307,41 @@ resolve_relative_runtime_filename (const char *filename)
return str_steal (&path);
}
static char *
resolve_relative_runtime_filename (const char *filename)
{
struct str path = str_make ();
const char *runtime_dir = getenv ("XDG_RUNTIME_DIR");
if (runtime_dir && *runtime_dir == '/')
str_append (&path, runtime_dir);
else
get_xdg_home_dir (&path, "XDG_DATA_HOME", ".local/share");
str_append_printf (&path, "/%s/%s", PROGRAM_NAME, filename);
return resolve_relative_runtime_filename_finish (path);
}
/// This differs from resolve_relative_runtime_filename() in that we expect
/// the filename to be something like a pattern for mkstemp(), so the resulting
/// path can reside in a system-wide directory with no risk of a conflict.
/// However, we have to take care about permissions. Do we even need this?
static char *
resolve_relative_runtime_template (const char *template)
{
struct str path = str_make ();
const char *runtime_dir = getenv ("XDG_RUNTIME_DIR");
const char *tmpdir = getenv ("TMPDIR");
if (runtime_dir && *runtime_dir == '/')
str_append_printf (&path, "%s/%s", runtime_dir, PROGRAM_NAME);
else if (tmpdir && *tmpdir == '/')
str_append_printf (&path, "%s/%s.%d", tmpdir, PROGRAM_NAME, geteuid ());
else
str_append_printf (&path, "/tmp/%s.%d", PROGRAM_NAME, geteuid ());
str_append_printf (&path, "/%s", template);
return resolve_relative_runtime_filename_finish (path);
}
static char *
try_expand_tilde (const char *filename)
{
@@ -3729,7 +3794,7 @@ opt_handler_usage (const struct opt_handler *self, FILE *stream)
str_append_printf (&row, "--%s", opt->long_name);
if (opt->arg_hint)
str_append_printf (&row, (opt->flags & OPT_OPTIONAL_ARG)
? " [%s]" : " %s", opt->arg_hint);
? "[=%s]" : " %s", opt->arg_hint);
// TODO: keep the indent if there are multiple lines
if (row.len + 2 <= OPT_USAGE_ALIGNMENT_COLUMN)
@@ -4352,11 +4417,13 @@ socket_io_try_write (int socket_fd, struct str *wb)
// object = lws '{' entries endobj
// endobj = lws '}'
//
// string = lws '"' ('\\' escape / ![\\"] char)* '"'
// quoted = lws '"' (!["\\] char / '\\' escape)* '"'
// / lws '`' (![`] char)* '`'
// string = (quoted)+
// char = [\0-\177] # or any Unicode codepoint in the UTF-8 encoding
// escape = [\\"abfnrtv] / [xX][0-9A-Fa-f][0-9A-Fa-f]? / [0-7][0-7]?[0-7]?
//
// integer = lws '-'? [0-9]+ # whatever strtoll() accepts on your system
// integer = lws [-+]? [0-9]+ # whatever strtoll() accepts on your system
// null = lws 'null'
// boolean = lws 'yes' / lws 'YES' / lws 'no' / lws 'NO'
// / lws 'on' / lws 'ON' / lws 'off' / lws 'OFF'
@@ -4652,8 +4719,10 @@ config_item_write_string (struct str *output, const struct str *s)
else if (c == '\t') str_append (output, "\\t");
else if (c == '\\') str_append (output, "\\\\");
else if (c == '"') str_append (output, "\\\"");
else if (c < 32) str_append_printf (output, "\\x%02x", c);
else str_append_c (output, c);
else if (iscntrl_ascii (c))
str_append_printf (output, "\\x%02x", c);
else
str_append_c (output, c);
}
str_append_c (output, '"');
}
@@ -4977,10 +5046,10 @@ config_tokenizer_escape_sequence
}
static bool
config_tokenizer_string
(struct config_tokenizer *self, struct str *output, struct error **e)
config_tokenizer_dq_string (struct config_tokenizer *self, struct str *output,
struct error **e)
{
unsigned char c;
unsigned char c = config_tokenizer_advance (self);
while (self->len)
{
if ((c = config_tokenizer_advance (self)) == '"')
@@ -4994,6 +5063,44 @@ config_tokenizer_string
return false;
}
static bool
config_tokenizer_bt_string (struct config_tokenizer *self, struct str *output,
struct error **e)
{
unsigned char c = config_tokenizer_advance (self);
while (self->len)
{
if ((c = config_tokenizer_advance (self)) == '`')
return true;
str_append_c (output, c);
}
config_tokenizer_error (self, e, "premature end of string");
return false;
}
static bool
config_tokenizer_string (struct config_tokenizer *self, struct str *output,
struct error **e)
{
// Go-like strings, with C/AWK-like automatic concatenation
while (self->len)
{
bool ok = true;
if (isspace_ascii (*self->p) && *self->p != '\n')
config_tokenizer_advance (self);
else if (*self->p == '"')
ok = config_tokenizer_dq_string (self, output, e);
else if (*self->p == '`')
ok = config_tokenizer_bt_string (self, output, e);
else
break;
if (!ok)
return false;
}
return true;
}
static enum config_token
config_tokenizer_next (struct config_tokenizer *self, struct error **e)
{
@@ -5018,7 +5125,7 @@ config_tokenizer_next (struct config_tokenizer *self, struct error **e)
return CONFIG_T_ABORT;
case '"':
config_tokenizer_advance (self);
case '`':
str_reset (&self->string);
if (!config_tokenizer_string (self, &self->string, e))
return CONFIG_T_ABORT;
@@ -5030,18 +5137,21 @@ config_tokenizer_next (struct config_tokenizer *self, struct error **e)
return CONFIG_T_STRING;
}
char *end;
// Our input doesn't need to be NUL-terminated but we want to use strtoll()
char buf[48] = "", *end = buf;
size_t buf_len = MIN (sizeof buf - 1, self->len);
errno = 0;
self->integer = strtoll (self->p, &end, 10);
self->integer = strtoll (strncpy (buf, self->p, buf_len), &end, 10);
if (errno == ERANGE)
{
config_tokenizer_error (self, e, "integer out of range");
return CONFIG_T_ABORT;
}
if (end != self->p)
if (end != buf)
{
self->len -= end - self->p;
self->p = end;
self->len -= end - buf;
self->p += end - buf;
return CONFIG_T_INTEGER;
}
@@ -5054,7 +5164,7 @@ config_tokenizer_next (struct config_tokenizer *self, struct error **e)
str_reset (&self->string);
do
str_append_c (&self->string, config_tokenizer_advance (self));
while (config_tokenizer_is_word_char (*self->p));
while (self->len && config_tokenizer_is_word_char (*self->p));
if (!strcmp (self->string.str, "null"))
return CONFIG_T_NULL;

108
libertyxdr.adoc Normal file
View File

@@ -0,0 +1,108 @@
libertyxdr(7)
=============
:doctype: manpage
Name
----
LibertyXDR - an XDR-derived IDL and data serialization format
Description
-----------
*LibertyXDR* is an interface description language, as well as a data
serialization format, that has been largely derived from XDR, though notably
simplified.
Conventions
~~~~~~~~~~~
User-defined types should be named in *CamelCase*, field names in *snake_case*,
and constants in *SCREAMING_SNAKE_CASE*. Code generators will convert these to
whatever is appropriate in their target language.
Primitive data types
~~~~~~~~~~~~~~~~~~~~
Like in XDR, all data is serialized in the network byte order, i.e., big-endian.
* *void*: 0 bytes
+
This is a dummy type that cannot be assigned a field name.
* *bool*: 1 byte
+
This is a boolean value: 0 means _false_, any other value means _true_.
* *u8*, *u16*, *u32*, *u64*: 1, 2, 4, and 8 bytes respectively
+
These are unsigned integers.
* *i8*, *i16*, *i32*, *i64*: 1, 2, 4, and 8 bytes respectively
+
These are signed integers in two's complement.
* *string*: implicitly prefixed by its length as a *u32*,
then immediately followed by its contents, with no trailing NUL byte
+
This is a valid UTF-8 string without a byte order mark. Note that strings are
always unbounded, unlike in XDR.
Constants
~~~~~~~~~
At the top level of a document, outside other definitions, you can define
typeless integer constants:
const VERSION = 1;
The value can be either a name of another previously defined constant,
or an immediate decimal value, which may not contain leading zeros.
Enumerations
~~~~~~~~~~~~
An *enum* is an *i8* with uniquely named values, in their own namespace.
Values can be either specified explicitly, in the same way as with a constant,
or they can be left implicit, in which case names assume a value that is one
larger than their predecessor. Zero is reserved for internal use, thus
enumerations implicitly begin with a value of one. For example, these form
a sequence from one to three:
enum Vehicle { CAR, LORRY = 2, PLANE, };
Structures
~~~~~~~~~~
A *struct* is a sequence of fields, specified by their type, and their chosen
name. You can add a *<>* suffix to change a field to an array, in which case
it is implicitly preceded by a *u32* specifying its length in terms of its
elements.
Unlike in XDR, there is no padding between subsequent fields, and type
definitions can be arbitrarily syntactically nested, as in C.
struct StockReport {
u8 version; // Version of this report.
struct Item {
Vehicle kind; // The vehicle in question.
i32 count; // How many vehicle of that kind there are.
} items<>; // Reported items.
};
Unions
~~~~~~
A *union* is a kind of structure whose fields depend on the value of its first
and always-present field, which must be a tag *enum*:
union VehicleDetails switch (Vehicle kind) {
case CAR: void;
case LORRY: i8 axles;
case PLANE: i8 engines;
};
All possible enumeration values must be named, and there is no *case*
fall-through.
Framing
-------
Unless this role is already filled by, e.g., WebSocket, _LibertyXDR_ structures
should be prefixed by their byte length in the *u32* format, once serialized.
See also
--------
_XDR: External Data Representation Standard_, RFC 4506

21
libertyxdr.vim Normal file
View File

@@ -0,0 +1,21 @@
" filetype.vim: au! BufNewFile,BufRead *.lxdr setf libertyxdr
if exists("b:current_syntax")
finish
endif
syn match libertyxdrError "[^[:space:]:;,(){}<>=]\+"
syn region libertyxdrBlockComment start=+/[*]+ end=+[*]/+
syn match libertyxdrComment "//.*"
syn match libertyxdrIdentifier "\<[[:alpha:]][[:alnum:]_]*\>"
syn match libertyxdrNumber "\<0\>\|\(-\|\<\)[1-9][[:digit:]]*\>"
syn keyword libertyxdrKeyword const enum struct union switch case
syn keyword libertyxdrType bool u8 u16 u32 u64 i8 i16 i32 i64 string void
let b:current_syntax = "libertyxdr"
hi def link libertyxdrError Error
hi def link libertyxdrBlockComment Comment
hi def link libertyxdrComment Comment
hi def link libertyxdrIdentifier Identifier
hi def link libertyxdrNumber Number
hi def link libertyxdrKeyword Statement
hi def link libertyxdrType Type

View File

@@ -82,4 +82,3 @@ siphash (const unsigned char key[16], const unsigned char *m, size_t len)
return v0 ^ v1 ^ v2 ^ v3;
}

297
tests/fuzz.c Normal file
View File

@@ -0,0 +1,297 @@
/*
* tests/fuzz.c
*
* Copyright (c) 2020, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#define PROGRAM_NAME "fuzz"
#define PROGRAM_VERSION "0"
#define LIBERTY_WANT_SSL
// The MPD client is a full wrapper and needs the network
#define LIBERTY_WANT_POLLER
#define LIBERTY_WANT_ASYNC
#define LIBERTY_WANT_PROTO_IRC
#define LIBERTY_WANT_PROTO_HTTP
#define LIBERTY_WANT_PROTO_SCGI
#define LIBERTY_WANT_PROTO_FASTCGI
#define LIBERTY_WANT_PROTO_WS
#define LIBERTY_WANT_PROTO_MPD
#include "../liberty.c"
#include "../liberty-tui.c"
static bool
app_is_character_in_locale (ucs4_t ch)
{
return ch < 128;
}
// --- UTF-8 -------------------------------------------------------------------
static void
test_utf8_validate (const uint8_t *data, size_t size)
{
utf8_validate ((const char *) data, size);
}
// --- Base 64 -----------------------------------------------------------------
static void
test_base64_decode (const uint8_t *data, size_t size)
{
struct str wrap = str_make ();
str_append_data (&wrap, data, size);
struct str out = str_make ();
base64_decode (wrap.str, true /* ignore_ws */, &out);
str_free (&out);
str_free (&wrap);
}
// --- IRC ---------------------------------------------------------------------
static void
test_irc_parse_message (const uint8_t *data, size_t size)
{
struct str wrap = str_make ();
str_append_data (&wrap, data, size);
struct irc_message msg;
irc_parse_message (&msg, wrap.str);
irc_free_message (&msg);
str_free (&wrap);
}
// --- HTTP --------------------------------------------------------------------
static void
test_http_parse_media_type (const uint8_t *data, size_t size)
{
struct str wrap = str_make ();
str_append_data (&wrap, data, size);
char *type = NULL;
char *subtype = NULL;
struct str_map parameters = str_map_make (free);
http_parse_media_type (wrap.str, &type, &subtype, &parameters);
free (type);
free (subtype);
str_map_free (&parameters);
str_free (&wrap);
}
static void
test_http_parse_upgrade (const uint8_t *data, size_t size)
{
struct str wrap = str_make ();
str_append_data (&wrap, data, size);
struct http_protocol *protocols = NULL;
http_parse_upgrade (wrap.str, &protocols);
LIST_FOR_EACH (struct http_protocol, iter, protocols)
http_protocol_destroy (iter);
str_free (&wrap);
}
// --- SCGI --------------------------------------------------------------------
static bool
test_scgi_parser_on_headers_read (void *user_data)
{
(void) user_data;
return true;
}
static bool
test_scgi_parser_on_content (void *user_data, const void *data, size_t len)
{
(void) user_data;
(void) data;
(void) len;
return true;
}
static void
test_scgi_parser_push (const uint8_t *data, size_t size)
{
struct scgi_parser parser = scgi_parser_make ();
parser.on_headers_read = test_scgi_parser_on_headers_read;
parser.on_content = test_scgi_parser_on_content;
scgi_parser_push (&parser, data, size, NULL);
scgi_parser_free (&parser);
}
// --- WebSockets --------------------------------------------------------------
static bool
test_ws_parser_on_frame_header (void *user_data, const struct ws_parser *self)
{
(void) user_data;
(void) self;
return true;
}
static bool
test_ws_parser_on_frame (void *user_data, const struct ws_parser *self)
{
(void) user_data;
(void) self;
return true;
}
static void
test_ws_parser_push (const uint8_t *data, size_t size)
{
struct ws_parser parser = ws_parser_make ();
parser.on_frame_header = test_ws_parser_on_frame_header;
parser.on_frame = test_ws_parser_on_frame;
ws_parser_push (&parser, data, size);
ws_parser_free (&parser);
}
// --- FastCGI -----------------------------------------------------------------
static bool
test_fcgi_parser_on_message (const struct fcgi_parser *parser, void *user_data)
{
(void) parser;
(void) user_data;
return true;
}
static void
test_fcgi_parser_push (const uint8_t *data, size_t size)
{
struct fcgi_parser parser = fcgi_parser_make ();
parser.on_message = test_fcgi_parser_on_message;
fcgi_parser_push (&parser, data, size);
fcgi_parser_free (&parser);
}
static void
test_fcgi_nv_parser_push (const uint8_t *data, size_t size)
{
struct str_map values = str_map_make (free);
struct fcgi_nv_parser nv_parser = fcgi_nv_parser_make ();
nv_parser.output = &values;
fcgi_nv_parser_push (&nv_parser, data, size);
fcgi_nv_parser_free (&nv_parser);
str_map_free (&values);
}
// --- Config ------------------------------------------------------------------
static void
test_config_item_parse (const uint8_t *data, size_t size)
{
struct config_item *item =
config_item_parse ((const char *) data, size, false, NULL);
if (item)
config_item_destroy (item);
}
// --- TUI ---------------------------------------------------------------------
static void
test_attrs_decode (const uint8_t *data, size_t size)
{
struct str wrap = str_make ();
str_append_data (&wrap, data, size);
attrs_decode (wrap.str);
str_free (&wrap);
}
// --- MPD ---------------------------------------------------------------------
static void
test_mpd_client_process_input (const uint8_t *data, size_t size)
{
struct poller poller;
poller_init (&poller);
struct mpd_client mpd = mpd_client_make (&poller);
str_append_data (&mpd.read_buffer, data, size);
mpd_client_process_input (&mpd);
mpd_client_free (&mpd);
poller_free (&poller);
}
// --- Main --------------------------------------------------------------------
typedef void (*fuzz_test_fn) (const uint8_t *data, size_t size);
static fuzz_test_fn generator = NULL;
void
LLVMFuzzerTestOneInput (const uint8_t *data, size_t size)
{
generator (data, size);
}
int
LLVMFuzzerInitialize (int *argcp, char ***argvp)
{
struct str_map targets = str_map_make (NULL);
#define REGISTER(name) str_map_set (&targets, #name, test_ ## name);
REGISTER (utf8_validate)
REGISTER (base64_decode)
REGISTER (irc_parse_message)
REGISTER (http_parse_media_type)
REGISTER (http_parse_upgrade)
REGISTER (scgi_parser_push)
REGISTER (ws_parser_push)
REGISTER (fcgi_parser_push)
REGISTER (fcgi_nv_parser_push)
REGISTER (config_item_parse)
REGISTER (attrs_decode)
REGISTER (mpd_client_process_input)
char **argv = *argvp, *option = "-test=", *name = NULL;
for (int i = 1; i < *argcp; i++)
if (!strncmp (argv[i], option, strlen (option)))
{
name = argv[i] + strlen (option);
memmove (argv + i, argv + i + 1, (*argcp - i) * sizeof *argv);
(*argcp)--;
}
if (!name)
{
struct str_map_iter iter = str_map_iter_make (&targets);
while (str_map_iter_next (&iter))
printf ("%s\n", iter.link->key);
exit (EXIT_FAILURE);
}
if (!(generator = str_map_find (&targets, name)))
{
fprintf (stderr, "Unknown test: %s\n", name);
exit (EXIT_FAILURE);
}
str_map_free (&targets);
return 0;
}

View File

@@ -1,7 +1,7 @@
/*
* tests/liberty.c
*
* Copyright (c) 2015 - 2016, Přemysl Eric Janouch <p@janouch.name>
* Copyright (c) 2015 - 2022, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
@@ -326,10 +326,19 @@ test_str_map (void)
static void
test_utf8 (void)
{
const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
const char invalid[] = "\xf0\x90\x28\xbc";
soft_assert ( utf8_validate (valid, sizeof valid));
soft_assert (!utf8_validate (invalid, sizeof invalid));
const char *full = "\xc5\x99", *partial = full, *empty = full;
soft_assert (utf8_decode (&full, 2) == 0x0159);
soft_assert (utf8_decode (&partial, 1) == -2);
soft_assert (utf8_decode (&empty, 0) == -1);
const char valid_1[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
const char valid_2[] = "\xf0\x93\x82\xb9";
const char invalid_1[] = "\xf0\x90\x28\xbc";
const char invalid_2[] = "\xc0\x80";
soft_assert ( utf8_validate (valid_1, sizeof valid_1));
soft_assert ( utf8_validate (valid_2, sizeof valid_2));
soft_assert (!utf8_validate (invalid_1, sizeof invalid_1));
soft_assert (!utf8_validate (invalid_2, sizeof invalid_2));
struct utf8_iter iter = utf8_iter_make ("fóọ");
size_t ch_len;
@@ -604,6 +613,81 @@ test_connector (const void *user_data, struct test_connector_fixture *self)
connector_free (&connector);
}
// --- Configuration -----------------------------------------------------------
static void
on_test_config_foo_change (struct config_item *item)
{
*(bool *) item->user_data = item->value.boolean;
}
static bool
test_config_validate_nonnegative
(const struct config_item *item, struct error **e)
{
if (item->type == CONFIG_ITEM_NULL)
return true;
hard_assert (item->type == CONFIG_ITEM_INTEGER);
if (item->value.integer >= 0)
return true;
error_set (e, "must be non-negative");
return false;
}
static struct config_schema g_config_test[] =
{
{ .name = "foo",
.comment = "baz",
.type = CONFIG_ITEM_BOOLEAN,
.default_ = "off",
.on_change = on_test_config_foo_change },
{ .name = "bar",
.type = CONFIG_ITEM_INTEGER,
.validate = test_config_validate_nonnegative,
.default_ = "1" },
{ .name = "foobar",
.type = CONFIG_ITEM_STRING,
.default_ = "\"qux\\x01`\" \"\"`a`" },
{}
};
static void
test_config_load (struct config_item *subtree, void *user_data)
{
config_schema_apply_to_object (g_config_test, subtree, user_data);
}
static void
test_config (void)
{
struct config config = config_make ();
bool b = true;
config_register_module (&config, "top", test_config_load, &b);
config_load (&config, config_item_object ());
config_schema_call_changed (config.root);
hard_assert (b == false);
struct config_item *invalid = config_item_integer (-1);
hard_assert (!config_item_set_from (config_item_get (config.root,
"top.bar", NULL), invalid, NULL));
config_item_destroy (invalid);
hard_assert (!strcmp ("qux\001`a",
config_item_get (config.root, "top.foobar", NULL)->value.string.str));
struct str s = str_make ();
config_item_write (config.root, true, &s);
struct config_item *parsed = config_item_parse (s.str, s.len, false, NULL);
hard_assert (parsed);
config_item_destroy (parsed);
str_free (&s);
config_free (&config);
}
// --- Main --------------------------------------------------------------------
int
@@ -622,6 +706,7 @@ main (int argc, char *argv[])
test_add_simple (&test, "/utf-8", NULL, test_utf8);
test_add_simple (&test, "/base64", NULL, test_base64);
test_add_simple (&test, "/async", NULL, test_async);
test_add_simple (&test, "/config", NULL, test_config);
test_add (&test, "/connector", struct test_connector_fixture, NULL,
test_connector_fixture_init,

123
tests/lxdrgen.c Normal file
View File

@@ -0,0 +1,123 @@
/*
* tests/lxdrgen.c
*
* Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#define PROGRAM_NAME "test"
#define PROGRAM_VERSION "0"
#include "../liberty.c"
#include "lxdrgen.lxdr.c"
static void
test_ser_deser_free (void)
{
hard_assert (PROTO_GEN_VERSION == 1);
enum { CASES = 3 };
struct proto_gen_struct a = {}, b = {};
a.u = xcalloc ((a.u_len = CASES + rand () % 100), sizeof *a.u);
for (size_t i = 0; i < a.u_len; i++)
{
union proto_gen_union *u = a.u + i;
switch (i % CASES)
{
case 0:
u->tag = PROTO_GEN_ENUM_NUMBERS;
u->numbers.a = rand () % UINT8_MAX;
u->numbers.b = rand () % UINT16_MAX;
u->numbers.c = rand () % UINT32_MAX;
u->numbers.d = rand () % UINT64_MAX;
u->numbers.e = rand () % UINT8_MAX;
u->numbers.f = rand () % UINT16_MAX;
u->numbers.g = rand () % UINT32_MAX;
u->numbers.h = rand () % UINT64_MAX;
break;
case 1:
u->tag = PROTO_GEN_ENUM_OTHERS;
u->others.foo = rand () % 2;
u->others.bar = str_make ();
for (int i = rand () % 0x30; i > 0; i--)
str_append_c (&u->others.bar, 0x30 + i);
break;
case 2:
u->tag = PROTO_GEN_ENUM_NOTHING;
break;
default:
hard_assert (!"unhandled case");
}
}
struct str buf = str_make ();
hard_assert (proto_gen_struct_serialize (&a, &buf));
struct msg_unpacker r = msg_unpacker_make (buf.str, buf.len);
hard_assert (proto_gen_struct_deserialize (&b, &r));
hard_assert (!msg_unpacker_get_available (&r));
str_free (&buf);
hard_assert (a.u_len == b.u_len);
for (size_t i = 0; i < a.u_len; i++)
{
union proto_gen_union *ua = a.u + i;
union proto_gen_union *ub = b.u + i;
hard_assert (ua->tag == ub->tag);
switch (ua->tag)
{
case PROTO_GEN_ENUM_NUMBERS:
hard_assert (ua->numbers.a == ub->numbers.a);
hard_assert (ua->numbers.b == ub->numbers.b);
hard_assert (ua->numbers.c == ub->numbers.c);
hard_assert (ua->numbers.d == ub->numbers.d);
hard_assert (ua->numbers.e == ub->numbers.e);
hard_assert (ua->numbers.f == ub->numbers.f);
hard_assert (ua->numbers.g == ub->numbers.g);
hard_assert (ua->numbers.h == ub->numbers.h);
break;
case PROTO_GEN_ENUM_OTHERS:
hard_assert (ua->others.foo == ub->others.foo);
hard_assert (ua->others.bar.len == ub->others.bar.len);
hard_assert (!memcmp (ua->others.bar.str, ub->others.bar.str,
ua->others.bar.len));
break;
case PROTO_GEN_ENUM_NOTHING:
break;
default:
hard_assert (!"unexpected case");
}
}
// Emulate partially deserialized data to test disposal of that.
for (size_t i = b.u_len - CASES; i < b.u_len; i++)
{
proto_gen_union_free (&b.u[i]);
memset (&b.u[i], 0, sizeof b.u[i]);
}
proto_gen_struct_free (&a);
proto_gen_struct_free (&b);
}
int
main (int argc, char *argv[])
{
struct test test;
test_init (&test, argc, argv);
test_add_simple (&test, "/ser-deser-free", NULL, test_ser_deser_free);
return test_run (&test);
}

23
tests/lxdrgen.lxdr Normal file
View File

@@ -0,0 +1,23 @@
/*
* tests/lxdrgen.lxdr: a test protocol for the generator
*/
const VERSION = 1;
const NOISREV = -1;
// TODO: Test failure paths, and in general go for full coverage.
struct Struct {
union Union switch (enum Enum {
NUMBERS = VERSION,
OTHERS = 2,
NOTHING,
} tag) {
case NUMBERS:
i8 a; i16 b; i32 c; i64 d;
u8 e; u16 f; u32 g; u64 h;
case OTHERS:
bool foo;
string bar;
case NOTHING:
void;
} u<>;
};

View File

@@ -77,7 +77,7 @@ test_irc (void)
static void
test_http_parser (void)
{
struct str_map parameters = str_map_make (NULL);
struct str_map parameters = str_map_make (free);
parameters.key_xfrm = tolower_ascii_strxfrm;
char *type = NULL;
@@ -88,9 +88,11 @@ test_http_parser (void)
soft_assert (!strcasecmp_ascii (subtype, "html"));
soft_assert (parameters.len == 1);
soft_assert (!strcmp (str_map_find (&parameters, "charset"), "utf-8"));
free (type);
free (subtype);
str_map_free (&parameters);
struct http_protocol *protocols;
struct http_protocol *protocols = NULL;
soft_assert (http_parse_upgrade ("websocket, HTTP/2.0, , ", &protocols));
soft_assert (!strcmp (protocols->name, "websocket"));

127
tests/pulse.c Normal file
View File

@@ -0,0 +1,127 @@
/*
* tests/pulse.c
*
* Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#define PROGRAM_NAME "test"
#define PROGRAM_VERSION "0"
#define LIBERTY_WANT_POLLER
#include "../liberty.c"
#include "../liberty-pulse.c"
// --- Tests -------------------------------------------------------------------
enum
{
EVENT_IO = 1 << 0,
EVENT_TIME = 1 << 1,
EVENT_DEFER = 1 << 2,
EVENT_ALL = (1 << 3) - 1
};
static intptr_t g_events = 0;
static intptr_t g_destroys = 0;
static void
io_event_cb (pa_mainloop_api *a,
pa_io_event *e, int fd, pa_io_event_flags_t events, void *userdata)
{
(void) a; (void) e; (void) fd; (void) events;
g_events |= (intptr_t) userdata;
}
static void
io_event_destroy_cb (pa_mainloop_api *a, pa_io_event *e, void *userdata)
{
(void) a; (void) e;
g_destroys += (intptr_t) userdata;
}
static void
time_event_cb (pa_mainloop_api *a,
pa_time_event *e, const struct timeval *tv, void *userdata)
{
(void) a; (void) e; (void) tv;
g_events |= (intptr_t) userdata;
}
static void
time_event_destroy_cb (pa_mainloop_api *a, pa_time_event *e, void *userdata)
{
(void) a; (void) e;
g_destroys += (intptr_t) userdata;
}
static void
defer_event_cb (pa_mainloop_api *a, pa_defer_event *e, void *userdata)
{
(void) a; (void) e;
g_events |= (intptr_t) userdata;
}
static void
defer_event_destroy_cb (pa_mainloop_api *a, pa_defer_event *e, void *userdata)
{
(void) a; (void) e;
g_destroys += (intptr_t) userdata;
}
static void
test_pulse (void)
{
struct poller poller;
poller_init (&poller);
// Let's just get this over with, not aiming for high test coverage here
pa_mainloop_api *api = poller_pa_new (&poller);
pa_io_event *ie = api->io_new (api, STDOUT_FILENO, PA_IO_EVENT_OUTPUT,
io_event_cb, (void *) EVENT_IO);
api->io_set_destroy (ie, io_event_destroy_cb);
const struct timeval tv = poller_pa_get_current_time ();
pa_time_event *te = api->time_new (api, &tv,
time_event_cb, (void *) EVENT_TIME);
api->time_set_destroy (te, time_event_destroy_cb);
api->time_restart (te, &tv);
pa_defer_event *de = api->defer_new (api,
defer_event_cb, (void *) EVENT_DEFER);
api->defer_set_destroy (de, defer_event_destroy_cb);
api->defer_enable (api->defer_new (api,
defer_event_cb, (void *) EVENT_DEFER), false);
alarm (1);
while (g_events != EVENT_ALL)
poller_run (&poller);
poller_pa_destroy (api);
soft_assert (g_destroys == EVENT_ALL);
poller_free (&poller);
}
// --- Main --------------------------------------------------------------------
int
main (int argc, char *argv[])
{
struct test test;
test_init (&test, argc, argv);
test_add_simple (&test, "/pulse", NULL, test_pulse);
return test_run (&test);
}

245
tools/asciiman.awk Normal file
View File

@@ -0,0 +1,245 @@
# asciiman.awk: stupid AsciiDoc to manual page converter
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# This is not intended to produce great output, merely useful output.
# As such, input documents should restrict themselves as follows:
#
# - In-line formatting sequences must not overlap,
# cannot be escaped, and cannot span lines.
# - Heading underlines must match in byte length exactly.
# - Only a small subset of syntax is supported overall.
#
# Also beware that the output has only been tested with GNU troff.
# Attributes can be passed via environment variables starting with "asciidoc-".
function fatal(message) {
print ".\\\" " FILENAME ":" FNR ": fatal error: " message
print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
exit 1
}
function getattribute(name) {
if (!(name in Attrs) && ("asciidoc-" name) in ENVIRON)
Attrs[name] = ENVIRON["asciidoc-" name]
return Attrs[name]
}
function expand(s, attr, v) {
# TODO: This should not expand unknown attribute names.
while (match(s, /[{][^{}]*[}]/)) {
s = substr(s, 1, RSTART - 1) \
getattribute(substr(s, RSTART + 1, RLENGTH - 2)) \
substr(s, RSTART + RLENGTH)
}
return s
}
function escape(s) {
gsub(/\\/, "\\\\", s)
gsub(/-/, "\\-", s)
sub(/^[.']/, "\\\\\\&&", s)
return s
}
function readattribute(line, attrname, attrvalue) {
if (match(line, /^:[^:]*: /)) {
attrname = substr(line, RSTART + 1, RLENGTH - 3)
attrvalue = substr(line, RSTART + RLENGTH)
Attrs[attrname] = expand(attrvalue)
return 1
}
}
NR == 1 {
nameline = $0
if (match(nameline, /[(][[:digit:]][)]$/)) {
name = substr(nameline, 1, RSTART - 1)
section = substr(nameline, RSTART + 1, RLENGTH - 2)
} else {
fatal("invalid header line")
}
getline
if (length(nameline) != length($0) || /[^=]/)
fatal("invalid header underline")
getline
while (readattribute($0))
getline
if ($0)
fatal("expected an empty line after the header")
# Requesting tbl(1), even though we currently do not support tables.
print "'\\\" t"
printf ".TH \"%s\" \"%s\" \"\" \"%s\"",
toupper(name), section, getattribute("mansource")
if (getattribute("manmanual"))
printf " \"%s\"", getattribute("manmanual")
print ""
# Hyphenation is indeed rather annoying, in particular with long links.
print ".nh"
}
function format(line, v) {
# Pass-through, otherwise useful for hacks, is a bit of a lie here,
# and formatting doesn't fully respect word boundaries.
while (line) {
if (match(line, /^[+][+][+][^+]+[+][+][+]/)) {
v = v substr(line, RSTART + 3, RLENGTH - 6)
} else if (match(line, /^__[^_]+__/)) {
v = v "\\fI" substr(line, RSTART + 2, RLENGTH - 4) "\\fP"
} else if (match(line, /^[*][*][^*]+[*][*]/)) {
v = v "\\fB" substr(line, RSTART + 2, RLENGTH - 4) "\\fP"
} else if (match(line, /^_[^_]+_/) &&
substr(line, RSTART + RLENGTH) !~ /^[[:alnum:]]/) {
v = v "\\fI" substr(line, RSTART + 1, RLENGTH - 2) "\\fP"
} else if (match(line, /^[*][^*]+[*]/) &&
substr(line, RSTART + RLENGTH) !~ /^[[:alnum:]]/) {
v = v "\\fB" substr(line, RSTART + 1, RLENGTH - 2) "\\fP"
} else {
v = v substr(line, 1, 1)
line = substr(line, 2)
continue
}
line = substr(line, RSTART + RLENGTH)
}
return v
}
function inline(line) {
if (!line) {
print ".sp"
return
}
line = format(escape(expand(line)))
# Strip empty URL descriptions, otherwise useful for demarking the end.
while (match(line, /[^[:space:]]+\[\]/)) {
line = substr(line, 1, RSTART + RLENGTH - 3) \
substr(line, RSTART + RLENGTH)
}
# Enable double-spacing after the end of a sentence.
gsub(/[.][[:space:]]+/, ".\n", line)
gsub(/[!][[:space:]]+/, "!\n", line)
gsub(/[?][[:space:]]+/, "?\n", line)
# Quote commands resulting from that, as well as from expand().
gsub(/\n[.]/, "\n\\\\\\&.", line)
gsub(/\n[']/, "\n\\\\\\&'", line)
sub(/[[:space:]]+[+]$/, "\n.br", line)
print line
}
# Returns 1 iff the left-over $0 should be processed further.
function process(firstline) {
if (readattribute(firstline))
return 0
if (getline <= 0) {
inline(firstline)
return 0
}
if (length(firstline) == length($0) && /^-+$/) {
print ".SH \"" escape(toupper(expand(firstline))) "\""
return 0
}
if (length(firstline) == length($0) && /^~+$/) {
print ".SS \"" escape(expand(firstline)) "\""
return 0
}
if (firstline ~ /^(-{4,}|[.]{4,})$/) {
print ".if n .RS 4"
print ".nf"
print ".fam C"
do {
print escape($0)
} while (getline > 0 && $0 != firstline)
print ".fam"
print ".fi"
print ".if n .RE"
return 0
}
if (firstline ~ /^\/{4,}$/) {
do {
print ".\\\" " $0
} while (getline > 0 && $0 != firstline)
return 0
}
if (match(firstline, /^\/\//)) {
print ".\\\" " firstline
return 1
}
# We generally assume these block end with a blank line.
if (match(firstline, /^[[:space:]]*[*][[:space:]]+/)) {
# Bullet magic copied over from AsciiDoc/Asciidoctor generators.
print ".RS 4"
print ".ie n \\{\\"
print "\\h'-04'\\(bu\\h'+03'\\c"
print ".\\}"
print ".el \\{\\"
print ".sp -1"
print ".IP \\(bu 2.3"
print ".\\}"
inline(substr(firstline, RSTART + RLENGTH))
while ($0) {
sub(/^[[:space:]]+/, "")
sub(/^[+]$/, "")
if (!process($0) && getline <= 0)
fatal("unexpected EOF")
if (match($0, /^[[:space:]]*[*][[:space:]]+/))
break
}
print ".RE"
print ".sp"
return !!$0
}
if (match(firstline, /^[[:space:]]+/)) {
print ".if n .RS 4"
print ".nf"
print ".fam C"
do {
print escape(substr(firstline, RLENGTH + 1))
firstline = $0
} while ($0 && getline > 0)
print ".fam"
print ".fi"
print ".if n .RE"
return 1
}
if (match(firstline, /::$/)) {
inline(substr(firstline, 1, RSTART - 1))
while (match($0, /::$/)) {
print ".br"
inline(substr($0, 1, RSTART - 1))
if (getline <= 0)
fatal("unexpected EOF")
}
print ".RS 4"
while ($0) {
sub(/^[[:space:]]+/, "")
sub(/^[+]$/, "")
if (!process($0) && getline <= 0)
fatal("unexpected EOF")
if (match($0, /::$/))
break
}
print ".RE"
print ".sp"
return !!$0
}
inline(firstline)
return 1
}
{
while (process($0)) {}
}

24
tools/cmake-dump.awk Normal file
View File

@@ -0,0 +1,24 @@
# cmake-dump.awk: dump parsed CMake scripts as tables
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Parsed scripts are output in a table, with commands separated using ASCII
# Record Separators, and arguments using Unit Separators.
#
# Example usage: awk -f cmake-parser.awk -f cmake-dump.awk CMakeLists.txt \
# | sed 'y/\x1F\x1E\t\n/\t\n /' \
# | sed -n '/^project\t\([^\t]*\).*\tVERSION\t\([^\t]*\).*/{s//\1 \2/p;q;}'
function sanitize(s) {
if (s ~ /[\x1E\x1F]/)
fatal("conflicting ASCII control characters found in source")
return s
}
Command {
out = sanitize(Command)
for (i in Args)
out = out "\x1F" sanitize(Args[i])
printf "%s\x1E", out
}

252
tools/cmake-parser.awk Normal file
View File

@@ -0,0 +1,252 @@
# cmake-parser.awk: rudimentary CMake script parser
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Implemented roughly according to the grammar described in cmake-language(7),
# which is self-conflicting, and not an accurate description.
#
# The result of parsing is stored in the case-normalized Command variable,
# and the Args array. These can be used by subsequent scripts.
function warning(message) {
print FILENAME ":" FNR ": warning: " message > "/dev/stderr"
}
function fatal(message) {
print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
exit 1
}
function expect(v) {
if (!v && v == 0)
fatal("broken expectations at `" $0 "'")
return v
}
function literal(v) {
if (substr($0, 1, length(v)) != v)
return 0
$0 = substr($0, length(v) + 1)
return 1
}
function regexp(re) {
if (!match($0, "^" re))
return 0
$0 = substr($0, RLENGTH + 1)
return 1
}
function space() {
return regexp("[ \t]+")
}
function unbracket(len, v) {
do {
if (match($0, "]={" len "}]")) {
v = v substr($0, 1, RSTART - 1)
$0 = substr($0, RSTART + RLENGTH)
return v
}
v = v $0 RS
} while (getline > 0)
fatal("unterminated bracket")
}
function bracket_comment() {
if (!match($0, /^#\[=*\[/))
return 0
$0 = substr($0, RSTART + RLENGTH)
unbracket(RLENGTH - 3)
return 1
}
function line_ending() {
while (space() || bracket_comment()) {}
if (/^#/)
$0 = ""
return !$0
}
# ------------------------------------------------------------------------------
# While elementary expansion of previously set variables is implementable,
# it doesn't seem to be worth the effort.
function expand(s, v) {
v = s
while (match(v, /\\*[$](|ENV|CACHE)[{]/)) {
if (index(substr(v, RSTART), "$") % 2 != 0) {
warning("variable expansion is not supported: " s)
return s
}
v = substr(v, RSTART + RLENGTH)
}
return s
}
function escape_sequence( v) {
if (!literal("\\"))
return 0
if (literal("t")) return "\t"
if (literal("r")) return "\r"
if (literal("n")) return "\n"
# escape_semicolon isn't treated any specially here.
if (regexp("[A-Za-z0-9]"))
fatal("unsupported escape sequence")
if ($0) {
v = substr($0, 1, 1)
$0 = substr($0, 2)
return v
}
if (getline > 0)
return ""
fatal("premature end of file")
}
function quoted_argument( v, unescaped) {
if (!literal("\""))
return 0
v = ""
while (!literal("\"")) {
if (!$0) {
if (getline <= 0)
fatal("premature end of file")
v = v RS
} else if ((unescaped = escape_sequence())) {
if (unescaped == "\\" || unescaped == "$")
v = v "\\"
else if (unescaped == ";")
v = v "\\\\"
v = v unescaped
} else if (unescaped == "") {
# quoted_continuation
} else {
v = v substr($0, 1, 1)
$0 = substr($0, 2)
}
}
return v
}
function finalize_quoted(expanded, v) {
while (match(expanded, /\\./)) {
v = v substr(expanded, 1, RSTART - 1) \
substr(expanded, RSTART + 1, 1)
expanded = substr(expanded, RSTART + RLENGTH)
}
Args[++N] = v expanded
}
function unquoted_argument( v, unescaped) {
while (1) {
if (match($0, /^[^[:space:]()#"\\]+/)) {
v = v substr($0, RSTART, RLENGTH)
$0 = substr($0, RSTART + RLENGTH)
} else if ((unescaped = escape_sequence())) {
if (unescaped == "\\" || unescaped == "$" || unescaped == ";")
v = v "\\"
v = v unescaped
} else if (unescaped == "") {
fatal("unexpected backslash in an unquoted argument")
} else {
# unquoted_legacy is not supported.
return v
}
}
}
function finalize_unquoted(expanded, v) {
while (expanded) {
if (expanded ~ /^;/) {
if (v)
Args[++N] = v
v = ""
expanded = substr(expanded, 2)
} else if (expanded ~ /^\\./) {
v = v substr(expanded, 2, 1)
expanded = substr(expanded, 3)
} else {
v = v substr(expanded, 1, 1)
expanded = substr(expanded, 2)
}
}
if (v)
Args[++N] = v
}
# We keep and reprocess some escape sequences in here.
function argument( arg, expanded, v) {
if (regexp("\\[=*\\["))
Args[++N] = unbracket(RLENGTH - 2)
else if ((arg = quoted_argument()) || arg == "")
finalize_quoted(expand(arg))
else if ((arg = unquoted_argument()))
finalize_unquoted(expand(arg))
else
return 0
return 1
}
# ------------------------------------------------------------------------------
function identifier( v) {
if (!match($0, /^[A-Za-z_][A-Za-z0-9_]*/))
return 0
v = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return v
}
function separation() {
if (space() || bracket_comment())
return 1
if (!line_ending())
return 0
if (getline > 0)
return 1
fatal("premature end of file")
}
function command_invocation( level) {
while (space()) {}
Command = identifier()
if (!Command)
return 0
while (space()) {}
Command = tolower(Command)
for (N in Args)
delete Args[N]
N = 0
expect(literal("("))
while (1) {
while (separation()) {}
if (literal(")")) {
if (!level--)
break
Args[++N] = ")"
continue
}
if (literal("(")) {
level++
Args[++N] = "("
continue
}
expect(argument())
if (!/^[()]/)
expect(separation())
}
return 1
}
{
command_invocation()
expect(line_ending())
}

324
tools/lxdrgen-c.awk Normal file
View File

@@ -0,0 +1,324 @@
# lxdrgen-c.awk: C backend for lxdrgen.awk.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Neither *_new() nor *_destroy() functions are provided, because they'd only
# be useful for top-levels, and are merely extra malloc()/free() calls.
# Users are expected to reuse buffers.
#
# Similarly, no constructors are produced--those are easy to write manually.
#
# All arrays are deserialized zero-terminated, so u8<> and i8<> can be directly
# used as C strings.
#
# All types must be able to dispose partially zero values going from the back,
# i.e., in the reverse order of deserialization.
function define_internal(name, ctype) {
Types[name] = "internal"
CodegenCType[name] = ctype
}
function define_int(shortname, ctype) {
define_internal(shortname, ctype)
CodegenSerialize[shortname] = \
"\tstr_pack_" shortname "(w, %s);\n"
CodegenDeserialize[shortname] = \
"\tif (!msg_unpacker_" shortname "(r, &%s))\n" \
"\t\treturn false;\n"
}
function define_sint(size) { define_int("i" size, "int" size "_t") }
function define_uint(size) { define_int("u" size, "uint" size "_t") }
function codegen_begin() {
define_sint("8")
define_sint("16")
define_sint("32")
define_sint("64")
define_uint("8")
define_uint("16")
define_uint("32")
define_uint("64")
define_internal("string", "struct str")
CodegenDispose["string"] = "\tstr_free(&%s);\n"
CodegenSerialize["string"] = \
"\tif (!proto_string_serialize(&%s, w))\n" \
"\t\treturn false;\n"
CodegenDeserialize["string"] = \
"\tif (!proto_string_deserialize(&%s, r))\n" \
"\t\treturn false;\n"
define_internal("bool", "bool")
CodegenSerialize["bool"] = \
"\tstr_pack_u8(w, !!%s);\n"
CodegenDeserialize["bool"] = \
"\t{\n" \
"\t\tuint8_t v = 0;\n" \
"\t\tif (!msg_unpacker_u8(r, &v))\n" \
"\t\t\treturn false;\n" \
"\t\t%s = !!v;\n" \
"\t}\n"
print "// Code generated from " FILENAME ". DO NOT EDIT."
print "// This file directly depends on liberty.c, but doesn't include it."
print ""
print "static bool"
print "proto_string_serialize(const struct str *s, struct str *w) {"
print "\tif (s->len > UINT32_MAX)"
print "\t\treturn false;"
print "\tstr_pack_u32(w, s->len);"
print "\tstr_append_str(w, s);"
print "\treturn true;"
print "}"
print ""
print "static bool"
print "proto_string_deserialize(struct str *s, struct msg_unpacker *r) {"
print "\tuint32_t len = 0;"
print "\tif (!msg_unpacker_u32(r, &len))"
print "\t\treturn false;"
print "\tif (msg_unpacker_get_available(r) < len)"
print "\t\treturn false;"
print "\t*s = str_make();"
print "\tstr_append_data(s, r->data + r->offset, len);"
print "\tr->offset += len;"
print "\tif (!utf8_validate (s->str, s->len))"
print "\t\treturn false;"
print "\treturn true;"
print "}"
}
function codegen_constant(name, value) {
print ""
print "enum { " PrefixUpper name " = " value " };"
}
function codegen_enum_value(name, subname, value, cg) {
append(cg, "fields",
"\t" PrefixUpper toupper(cameltosnake(name)) "_" subname \
" = " value ",\n")
}
function codegen_enum(name, cg, ctype) {
ctype = "enum " PrefixLower cameltosnake(name)
print ""
print ctype " {"
print cg["fields"] "};"
# XXX: This should also check if it isn't out-of-range for any reason,
# but our usage of sprintf() stands in the way a bit.
CodegenSerialize[name] = "\tstr_pack_i8(w, %s);\n"
CodegenDeserialize[name] = \
"\t{\n" \
"\t\tint8_t v = 0;\n" \
"\t\tif (!msg_unpacker_i8(r, &v) || !v)\n" \
"\t\t\treturn false;\n" \
"\t\t%s = v;\n" \
"\t}\n"
CodegenCType[name] = ctype
for (i in cg)
delete cg[i]
}
function codegen_struct_tag(d, cg, f) {
f = "self->" d["name"]
append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
append(cg, "dispose", sprintf(CodegenDispose[d["type"]], f))
append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f))
# Do not deserialize here, that would be out of order.
}
function codegen_struct_field(d, cg, f, dispose, serialize, deserialize) {
f = "self->" d["name"]
dispose = CodegenDispose[d["type"]]
serialize = CodegenSerialize[d["type"]]
deserialize = CodegenDeserialize[d["type"]]
if (!d["isarray"]) {
append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
append(cg, "dispose", sprintf(dispose, f))
append(cg, "serialize", sprintf(serialize, f))
append(cg, "deserialize", sprintf(deserialize, f))
return
}
append(cg, "fields",
"\t" CodegenCType["u32"] " " d["name"] "_len;\n" \
"\t" CodegenCType[d["type"]] " *" d["name"] ";\n")
if (dispose)
append(cg, "dispose", "\tif (" f ")\n" \
"\t\tfor (size_t i = 0; i < " f "_len; i++)\n" \
indent(indent(sprintf(dispose, f "[i]"))))
append(cg, "dispose", "\tfree(" f ");\n")
append(cg, "serialize", sprintf(CodegenSerialize["u32"], f "_len"))
if (d["type"] == "u8" || d["type"] == "i8") {
append(cg, "serialize",
"\tstr_append_data(w, " f ", " f "_len);\n")
} else if (serialize) {
append(cg, "serialize",
"\tfor (size_t i = 0; i < " f "_len; i++)\n" \
indent(sprintf(serialize, f "[i]")))
}
append(cg, "deserialize", sprintf(CodegenDeserialize["u32"], f "_len") \
"\tif (!(" f " = calloc(" f "_len + 1, sizeof *" f ")))\n" \
"\t\treturn false;\n")
if (d["type"] == "u8" || d["type"] == "i8") {
append(cg, "deserialize",
"\tif (msg_unpacker_get_available(r) < " f "_len)\n" \
"\t\treturn false;\n" \
"\tmemcpy(" f ", r->data + r->offset, " f "_len);\n" \
"\tr->offset += " f "_len;\n")
} else if (deserialize) {
append(cg, "deserialize",
"\tfor (size_t i = 0; i < " f "_len; i++)\n" \
indent(sprintf(deserialize, f "[i]")))
}
}
function codegen_struct(name, cg, ctype, funcname) {
ctype = "struct " PrefixLower cameltosnake(name)
print ""
print ctype " {"
print cg["fields"] "};"
if (cg["dispose"]) {
funcname = PrefixLower cameltosnake(name) "_free"
print ""
print "static void\n" funcname "(" ctype " *self) {"
print cg["dispose"] "}"
CodegenDispose[name] = "\t" funcname "(&%s);\n"
}
if (cg["serialize"]) {
funcname = PrefixLower cameltosnake(name) "_serialize"
print ""
print "static bool\n" \
funcname "(\n\t\tconst " ctype " *self, struct str *w) {"
print cg["serialize"] "\treturn true;"
print "}"
CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \
"\t\treturn false;\n"
}
if (cg["deserialize"]) {
funcname = PrefixLower cameltosnake(name) "_deserialize"
print ""
print "static bool\n" \
funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {"
print cg["deserialize"] "\treturn true;"
print "}"
CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \
"\t\treturn false;\n"
}
CodegenCType[name] = ctype
for (i in cg)
delete cg[i]
}
function codegen_union_tag(d, cg) {
cg["tagtype"] = d["type"]
cg["tagname"] = d["name"]
append(cg, "fields", "\t" CodegenCType[d["type"]] " " d["name"] ";\n")
}
function codegen_union_struct( \
name, casename, cg, scg, structname, fieldname, fullcasename) {
# Don't generate obviously useless structs.
fullcasename = toupper(cameltosnake(cg["tagtype"])) "_" casename
if (!scg["dispose"] && !scg["deserialize"]) {
append(cg, "structless", "\tcase " PrefixUpper fullcasename ":\n")
for (i in scg)
delete scg[i]
return
}
# And thus not all generated structs are present in Types.
structname = name "_" casename
fieldname = tolower(casename)
codegen_struct(structname, scg)
append(cg, "fields", "\t" CodegenCType[structname] " " fieldname ";\n")
if (CodegenDispose[structname])
append(cg, "dispose", "\tcase " PrefixUpper fullcasename ":\n" \
indent(sprintf(CodegenDispose[structname], "self->" fieldname)) \
"\t\tbreak;\n")
# With no de/serialization code, this will simply recognize the tag.
append(cg, "serialize", "\tcase " PrefixUpper fullcasename ":\n" \
indent(sprintf(CodegenSerialize[structname], "self->" fieldname)) \
"\t\tbreak;\n")
append(cg, "deserialize", "\tcase " PrefixUpper fullcasename ":\n" \
indent(sprintf(CodegenDeserialize[structname], "self->" fieldname)) \
"\t\tbreak;\n")
}
function codegen_union(name, cg, f, ctype, funcname) {
ctype = "union " PrefixLower cameltosnake(name)
print ""
print ctype " {"
print cg["fields"] "};"
f = "self->" cg["tagname"]
if (cg["dispose"]) {
funcname = PrefixLower cameltosnake(name) "_free"
print ""
print "static void\n" funcname "(" ctype " *self) {"
print "\tswitch (" f ") {"
if (cg["structless"])
print cg["structless"] \
indent(sprintf(CodegenDispose[cg["tagtype"]], f)) "\t\tbreak;"
print cg["dispose"] "\tdefault:"
print "\t\tbreak;"
print "\t}"
print "}"
CodegenDispose[name] = "\t" funcname "(&%s);\n"
}
if (cg["serialize"]) {
funcname = PrefixLower cameltosnake(name) "_serialize"
print ""
print "static bool\n" \
funcname "(\n\t\tconst " ctype " *self, struct str *w) {"
print "\tswitch (" f ") {"
if (cg["structless"])
print cg["structless"] \
indent(sprintf(CodegenSerialize[cg["tagtype"]], f)) "\t\tbreak;"
print cg["serialize"] "\tdefault:"
print "\t\treturn false;"
print "\t}"
print "\treturn true;"
print "}"
CodegenSerialize[name] = "\tif (!" funcname "(&%s, w))\n" \
"\t\treturn false;\n"
}
if (cg["deserialize"]) {
funcname = PrefixLower cameltosnake(name) "_deserialize"
print ""
print "static bool\n" \
funcname "(\n\t\t" ctype " *self, struct msg_unpacker *r) {"
print sprintf(CodegenDeserialize[cg["tagtype"]], f)
print "\tswitch (" f ") {"
if (cg["structless"])
print cg["structless"] "\t\tbreak;"
print cg["deserialize"] "\tdefault:"
print "\t\treturn false;"
print "\t}"
print "\treturn true;"
print "}"
CodegenDeserialize[name] = "\tif (!" funcname "(&%s, r))\n" \
"\t\treturn false;\n"
}
CodegenCType[name] = ctype
for (i in cg)
delete cg[i]
}

541
tools/lxdrgen-go.awk Normal file
View File

@@ -0,0 +1,541 @@
# lxdrgen-go.awk: Go backend for lxdrgen.awk.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# This backend also enables proxying to other endpoints using JSON.
function define_internal(name, gotype) {
Types[name] = "internal"
CodegenGoType[name] = gotype
}
function define_sint(size, shortname, gotype) {
shortname = "i" size
gotype = "int" size
define_internal(shortname, gotype)
CodegenAppendJSON[shortname] = \
"\tb = strconv.AppendInt(b, int64(%s), 10)\n"
if (size == 8) {
CodegenSerialize[shortname] = "\tdata = append(data, uint8(%s))\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= 1 {\n" \
"\t\t%s, data = int8(data[0]), data[1:]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
return
}
CodegenSerialize[shortname] = \
"\tdata = binary.BigEndian.AppendUint" size "(data, uint" size "(%s))\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= " (size / 8) " {\n" \
"\t\t%s = " gotype "(binary.BigEndian.Uint" size "(data))\n" \
"\t\tdata = data[" (size / 8) ":]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
}
function define_uint(size, shortname, gotype) {
# Both []byte and []uint8 luckily marshal as base64-encoded JSON strings,
# so there's no need to rename the type as an exception.
shortname = "u" size
gotype = "uint" size
define_internal(shortname, gotype)
CodegenAppendJSON[shortname] = \
"\tb = strconv.AppendUint(b, uint64(%s), 10)\n"
if (size == 8) {
CodegenSerialize[shortname] = "\tdata = append(data, %s)\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= 1 {\n" \
"\t\t%s, data = data[0], data[1:]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
return
}
CodegenSerialize[shortname] = \
"\tdata = binary.BigEndian.AppendUint" size "(data, %s)\n"
CodegenDeserialize[shortname] = \
"\tif len(data) >= " (size / 8) " {\n" \
"\t\t%s = binary.BigEndian.Uint" size "(data)\n" \
"\t\tdata = data[" (size / 8) ":]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
}
# Currently two outputs cannot coexist within the same package.
function codegen_private(name) {
return "proto" name
}
function codegen_begin( funcname) {
define_sint("8")
define_sint("16")
define_sint("32")
define_sint("64")
define_uint("8")
define_uint("16")
define_uint("32")
define_uint("64")
define_internal("bool", "bool")
define_internal("string", "string")
# Cater to "go generate", for what it's worth.
CodegenPackage = ENV["GOPACKAGE"]
if (!CodegenPackage)
CodegenPackage = "main"
print "// Code generated from " FILENAME ". DO NOT EDIT."
print ""
print "package " CodegenPackage
print ""
print "import ("
print "\t`encoding/base64`"
print "\t`encoding/binary`"
print "\t`encoding/json`"
print "\t`errors`"
print "\t`math`"
print "\t`strconv`"
print "\t`unicode/utf8`"
print ")"
print ""
print "// This is a hack to always use the base64 import."
print "var _ = base64.StdEncoding"
print ""
CodegenAppendJSON["bool"] = \
"\tb = strconv.AppendBool(b, %s)\n"
CodegenSerialize["bool"] = \
"\tif %s {\n" \
"\t\tdata = append(data, 1)\n" \
"\t} else {\n" \
"\t\tdata = append(data, 0)\n" \
"\t}\n"
funcname = codegen_private("ConsumeBoolFrom")
print "// " funcname " tries to deserialize a boolean value"
print "// from the beginning of a byte stream. When successful,"
print "// it returns a subslice with any data that might follow."
print "func " funcname "(data []byte, b *bool) ([]byte, bool) {"
print "\tif len(data) < 1 {"
print "\t\treturn nil, false"
print "\t}"
print "\tif data[0] != 0 {"
print "\t\t*b = true"
print "\t} else {"
print "\t\t*b = false"
print "\t}"
print "\treturn data[1:], true"
print "}"
print ""
CodegenDeserialize["bool"] = \
"\tif data, ok = " funcname "(data, &%s); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
funcname = codegen_private("AppendStringTo")
print "// " funcname " tries to serialize a string value,"
print "// appending it to the end of a byte stream."
print "func " funcname "(data []byte, s string) ([]byte, bool) {"
print "\tif len(s) > math.MaxUint32 {"
print "\t\treturn nil, false"
print "\t}"
print "\tdata = binary.BigEndian.AppendUint32(data, uint32(len(s)))"
print "\treturn append(data, s...), true"
print "}"
print ""
CodegenSerialize["string"] = \
"\tif data, ok = " funcname "(data, %s); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
funcname = codegen_private("ConsumeStringFrom")
print "// " funcname " tries to deserialize a string value"
print "// from the beginning of a byte stream. When successful,"
print "// it returns a subslice with any data that might follow."
print "func " funcname "(data []byte, s *string) ([]byte, bool) {"
print "\tif len(data) < 4 {"
print "\t\treturn nil, false"
print "\t}"
print "\tlength := binary.BigEndian.Uint32(data)"
print "\tif data = data[4:]; uint64(len(data)) < uint64(length) {"
print "\t\treturn nil, false"
print "\t}"
print "\t*s = string(data[:length])"
print "\tif !utf8.ValidString(*s) {"
print "\t\treturn nil, false"
print "\t}"
print "\treturn data[length:], true"
print "}"
print ""
CodegenDeserialize["string"] = \
"\tif data, ok = " funcname "(data, &%s); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
funcname = codegen_private("UnmarshalEnumJSON")
print "// " funcname " converts a JSON fragment to an integer,"
print "// ensuring that it's within the expected range of enum values."
print "func " funcname "(data []byte) (int64, error) {"
print "\tvar n int64"
print "\tif err := json.Unmarshal(data, &n); err != nil {"
print "\t\treturn 0, err"
print "\t} else if n > math.MaxInt8 || n < math.MinInt8 {"
print "\t\treturn 0, errors.New(`integer out of range`)"
print "\t} else {"
print "\t\treturn n, nil"
print "\t}"
print "}"
print ""
}
function codegen_constant(name, value) {
print "const " PrefixCamel snaketocamel(name) " = " value
print ""
}
function codegen_enum_value(name, subname, value, cg, goname) {
goname = PrefixCamel name snaketocamel(subname)
append(cg, "fields",
"\t" goname " = " value "\n")
append(cg, "stringer",
"\tcase " goname ":\n" \
"\t\treturn `" snaketocamel(subname) "`\n")
append(cg, "marshal",
goname ",\n")
append(cg, "unmarshal",
"\tcase `" snaketocamel(subname) "`:\n" \
"\t\t*v = " goname "\n")
}
function codegen_enum(name, cg, gotype, fields, funcname) {
gotype = PrefixCamel name
print "type " gotype " int8"
print ""
print "const ("
print cg["fields"] ")"
print ""
print "func (v " gotype ") String() string {"
print "\tswitch v {"
print cg["stringer"] "\tdefault:"
print "\t\treturn strconv.Itoa(int(v))"
print "\t}"
print "}"
print ""
CodegenIsMarshaler[name] = 1
fields = cg["marshal"]
sub(/,\n$/, ":", fields)
gsub(/\n/, "\n\t", fields)
print "func (v " gotype ") MarshalJSON() ([]byte, error) {"
print "\tswitch v {"
print indent("case " fields)
print "\t\treturn []byte(`\"` + v.String() + `\"`), nil"
print "\t}"
print "\treturn json.Marshal(int(v))"
print "}"
print ""
funcname = codegen_private("UnmarshalEnumJSON")
print "func (v *" gotype ") UnmarshalJSON(data []byte) error {"
print "\tvar s string"
print "\tif json.Unmarshal(data, &s) == nil {"
print "\t\t// Handled below."
print "\t} else if n, err := " funcname "(data); err != nil {"
print "\t\treturn err"
print "\t} else {"
print "\t\t*v = " gotype "(n)"
print "\t\treturn nil"
print "\t}"
print ""
print "\tswitch s {"
print cg["unmarshal"] "\tdefault:"
print "\t\treturn errors.New(`unrecognized value: ` + s)"
print "\t}"
print "\treturn nil"
print "}"
print ""
# XXX: This should also check if it isn't out-of-range for any reason,
# but our usage of sprintf() stands in the way a bit.
CodegenSerialize[name] = "\tdata = append(data, uint8(%s))\n"
CodegenDeserialize[name] = \
"\tif len(data) >= 1 {\n" \
"\t\t%s, data = " gotype "(data[0]), data[1:]\n" \
"\t} else {\n" \
"\t\treturn nil, false\n" \
"\t}\n"
CodegenGoType[name] = gotype
for (i in cg)
delete cg[i]
}
function codegen_marshal(type, f, marshal) {
if (CodegenAppendJSON[type])
return sprintf(CodegenAppendJSON[type], f)
# Complex types are json.Marshalers, there's no need to json.Marshal(&f).
if (CodegenIsMarshaler[type])
marshal = f ".MarshalJSON()"
else
marshal = "json.Marshal(" f ")"
return \
"\tif j, err := " marshal "; err != nil {\n" \
"\t\treturn nil, err\n" \
"\t} else {\n" \
"\t\tb = append(b, j...)\n" \
"\t}\n"
}
function codegen_struct_field_marshal(d, cg, camel, f, marshal) {
camel = snaketocamel(d["name"])
f = "s." camel
if (!d["isarray"]) {
append(cg, "marshal",
"\tb = append(b, `,\"" decapitalize(camel) "\":`...)\n" \
codegen_marshal(d["type"], f))
return
}
# Note that we do not produce `null` for nil slices, unlike encoding/json.
# And arrays never get deserialized as such.
if (d["type"] == "u8") {
append(cg, "marshal",
"\tb = append(b, `,\"" decapitalize(camel) "\":\"`...)\n" \
"\tb = append(b, base64.StdEncoding.EncodeToString(" f ")...)\n" \
"\tb = append(b, '\"')\n")
return
}
append(cg, "marshal",
"\tb = append(b, `,\"" decapitalize(camel) "\":[`...)\n" \
"\tfor i := 0; i < len(" f "); i++ {\n" \
"\t\tif i > 0 {\n" \
"\t\t\tb = append(b, ',')\n" \
"\t\t}\n" \
indent(codegen_marshal(d["type"], f "[i]")) \
"\t}\n" \
"\tb = append(b, ']')\n")
}
function codegen_struct_field(d, cg, camel, f, serialize, deserialize) {
codegen_struct_field_marshal(d, cg)
camel = snaketocamel(d["name"])
f = "s." camel
serialize = CodegenSerialize[d["type"]]
deserialize = CodegenDeserialize[d["type"]]
if (!d["isarray"]) {
append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \
" `json:\"" decapitalize(camel) "\"`\n")
append(cg, "serialize", sprintf(serialize, f))
append(cg, "deserialize", sprintf(deserialize, f))
return
}
append(cg, "fields", "\t" camel " []" CodegenGoType[d["type"]] \
" `json:\"" decapitalize(camel) "\"`\n")
# XXX: This should also check if it isn't out-of-range for any reason.
append(cg, "serialize",
sprintf(CodegenSerialize["u32"], "uint32(len(" f "))"))
if (d["type"] == "u8") {
append(cg, "serialize",
"\tdata = append(data, " f "...)\n")
} else {
append(cg, "serialize",
"\tfor i := 0; i < len(" f "); i++ {\n" \
indent(sprintf(serialize, f "[i]")) \
"\t}\n")
}
append(cg, "deserialize",
"\t{\n" \
"\t\tvar length uint32\n" \
indent(sprintf(CodegenDeserialize["u32"], "length")))
if (d["type"] == "u8") {
append(cg, "deserialize",
"\t\tif uint64(len(data)) < uint64(length) {\n" \
"\t\t\treturn nil, false\n" \
"\t\t}\n" \
"\t\t" f ", data = data[:length], data[length:]\n" \
"\t}\n")
} else {
append(cg, "deserialize",
"\t\t" f " = make([]" CodegenGoType[d["type"]] ", length)\n" \
"\t}\n" \
"\tfor i := 0; i < len(" f "); i++ {\n" \
indent(sprintf(deserialize, f "[i]")) \
"\t}\n")
}
}
function codegen_struct_tag(d, cg, camel, f) {
codegen_struct_field_marshal(d, cg)
camel = snaketocamel(d["name"])
f = "s." camel
append(cg, "fields", "\t" camel " " CodegenGoType[d["type"]] \
" `json:\"" decapitalize(camel) "\"`\n")
append(cg, "serialize", sprintf(CodegenSerialize[d["type"]], f))
# Do not deserialize here, that is already done by the containing union.
}
function codegen_struct(name, cg, gotype) {
gotype = PrefixCamel name
print "type " gotype " struct {\n" cg["fields"] "}\n"
if (cg["marshal"]) {
CodegenIsMarshaler[name] = 1
print "func (s *" gotype ") MarshalJSON() ([]byte, error) {"
print "\tb := []byte{}"
print cg["marshal"] "\tb[0] = '{'"
print "\treturn append(b, '}'), nil"
print "}"
print ""
}
if (cg["serialize"]) {
print "func (s *" gotype ") AppendTo(data []byte) ([]byte, bool) {"
print "\tok := true"
print cg["serialize"] "\treturn data, ok"
print "}"
print ""
CodegenSerialize[name] = \
"\tif data, ok = %s.AppendTo(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
}
if (cg["deserialize"]) {
print "func (s *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {"
print "\tok := true"
print cg["deserialize"] "\treturn data, ok"
print "}"
print ""
CodegenDeserialize[name] = \
"\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
}
CodegenGoType[name] = gotype
for (i in cg)
delete cg[i]
}
function codegen_union_tag(d, cg) {
cg["tagtype"] = d["type"]
cg["tagname"] = d["name"]
# The tag is implied from the type of struct stored in the interface.
}
function codegen_union_struct(name, casename, cg, scg, structname, init) {
# And thus not all generated structs are present in Types.
structname = name snaketocamel(casename)
codegen_struct(structname, scg)
init = CodegenGoType[structname] "{" snaketocamel(cg["tagname"]) \
": " decapitalize(snaketocamel(cg["tagname"])) "}"
append(cg, "unmarshal",
"\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \
"\t\ts := " init "\n" \
"\t\terr = json.Unmarshal(data, &s)\n" \
"\t\tu.Interface = &s\n")
append(cg, "serialize",
"\tcase *" CodegenGoType[structname] ":\n" \
indent(sprintf(CodegenSerialize[structname], "union")))
append(cg, "deserialize",
"\tcase " CodegenGoType[cg["tagtype"]] snaketocamel(casename) ":\n" \
"\t\ts := " init "\n" \
indent(sprintf(CodegenDeserialize[structname], "s")) \
"\t\tu.Interface = &s\n")
}
function codegen_union(name, cg, gotype, tagfield, tagvar) {
gotype = PrefixCamel name
print "type " gotype " struct {"
print "\tInterface any"
print "}"
print ""
# This cannot be a pointer method, it wouldn't work recursively.
CodegenIsMarshaler[name] = 1
print "func (u " gotype ") MarshalJSON() ([]byte, error) {"
print "\treturn u.Interface.(json.Marshaler).MarshalJSON()"
print "}"
print ""
tagfield = snaketocamel(cg["tagname"])
tagvar = decapitalize(tagfield)
print "func (u *" gotype ") UnmarshalJSON(data []byte) (err error) {"
print "\tvar t struct {"
print "\t\t" tagfield " " CodegenGoType[cg["tagtype"]] \
" `json:\"" tagvar "\"`"
print "\t}"
print "\tif err := json.Unmarshal(data, &t); err != nil {"
print "\t\treturn err"
print "\t}"
print ""
print "\tswitch " tagvar " := t." tagfield "; " tagvar " {"
print cg["unmarshal"] "\tdefault:"
print "\t\terr = errors.New(`unsupported value: ` + " tagvar ".String())"
print "\t}"
print "\treturn err"
print "}"
print ""
# XXX: Consider changing the interface into an AppendTo/ConsumeFrom one,
# that would eliminate these type case switches entirely.
# On the other hand, it would make it possible to send unsuitable structs.
print "func (u *" gotype ") AppendTo(data []byte) ([]byte, bool) {"
print "\tok := true"
print "\tswitch union := u.Interface.(type) {"
print cg["serialize"] "\tdefault:"
print "\t\treturn nil, false"
print "\t}"
print "\treturn data, ok"
print "}"
print ""
CodegenSerialize[name] = \
"\tif data, ok = %s.AppendTo(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
print "func (u *" gotype ") ConsumeFrom(data []byte) ([]byte, bool) {"
print "\tok := true"
print "\tvar " tagvar " " CodegenGoType[cg["tagtype"]]
print sprintf(CodegenDeserialize[cg["tagtype"]], tagvar)
print "\tswitch " tagvar " {"
print cg["deserialize"] "\tdefault:"
print "\t\treturn nil, false"
print "\t}"
print "\treturn data, ok"
print "}"
print ""
CodegenDeserialize[name] = \
"\tif data, ok = %s.ConsumeFrom(data); !ok {\n" \
"\t\treturn nil, ok\n" \
"\t}\n"
CodegenGoType[name] = gotype
for (i in cg)
delete cg[i]
}

226
tools/lxdrgen-mjs.awk Normal file
View File

@@ -0,0 +1,226 @@
# lxdrgen-mjs.awk: Javascript backend for lxdrgen.awk.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# This backend is currently for decoding the binary format only.
# (JSON is way too expensive to process and transfer.)
#
# Import the resulting script as a Javascript module.
# Identifiers intentionally aren't prefixed.
function define_internal(name) {
Types[name] = "internal"
}
function define_sint(size, shortname) {
shortname = "i" size
define_internal(shortname)
CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n"
print ""
print "\t" shortname "() {"
if (size == "64") {
# XXX: 2^53 - 1 must be enough for anyone. BigInts are a PITA.
print "\t\tconst " shortname \
" = Number(this.getBigInt" size "(this.offset))"
} else {
print "\t\tconst " shortname " = this.getInt" size "(this.offset)"
}
print "\t\tthis.offset += " (size / 8)
print "\t\treturn " shortname
print "\t}"
}
function define_uint(size, shortname) {
shortname = "u" size
define_internal(shortname)
CodegenDeserialize[shortname] = "\t%s = r." shortname "()\n"
print ""
print "\t" shortname "() {"
if (size == "64") {
# XXX: 2^53 - 1 must be enough for anyone. BigInts are a PITA.
print "\t\tconst " shortname \
" = Number(this.getBigUint" size "(this.offset))"
} else {
print "\t\tconst " shortname " = this.getUint" size "(this.offset)"
}
print "\t\tthis.offset += " (size / 8)
print "\t\treturn " shortname
print "\t}"
}
function codegen_begin() {
print "// Code generated from " FILENAME ". DO NOT EDIT."
print ""
print "export class Reader extends DataView {"
print "\tconstructor() {"
print "\t\tsuper(...arguments)"
print "\t\tthis.offset = 0"
print "\t\tthis.decoder = new TextDecoder('utf-8', {fatal: true})"
print "\t}"
print ""
print "\tget empty() {"
print "\t\treturn this.byteLength <= this.offset"
print "\t}"
print ""
print "\trequire(len) {"
print "\t\tif (this.byteLength - this.offset < len)"
print "\t\t\tthrow `Premature end of data`"
print "\t\treturn this.byteOffset + this.offset"
print "\t}"
define_internal("string")
CodegenDeserialize["string"] = "\t%s = r.string()\n"
print ""
print "\tstring() {"
print "\t\tconst len = this.getUint32(this.offset)"
print "\t\tthis.offset += 4"
print "\t\tconst array = new Uint8Array("
print "\t\t\tthis.buffer, this.require(len), len)"
print "\t\tthis.offset += len"
print "\t\treturn this.decoder.decode(array)"
print "\t}"
define_internal("bool")
CodegenDeserialize["bool"] = "\t%s = r.bool()\n"
print ""
print "\tbool() {"
print "\t\tconst u8 = this.getUint8(this.offset)"
print "\t\tthis.offset += 1"
print "\t\treturn u8 != 0"
print "\t}"
define_sint("8")
define_sint("16")
define_sint("32")
define_sint("64")
define_uint("8")
define_uint("16")
define_uint("32")
define_uint("64")
print "}"
}
function codegen_constant(name, value) {
print ""
print "export const " decapitalize(snaketocamel(name)) " = " value
}
function codegen_enum_value(name, subname, value, cg) {
append(cg, "fields", "\t" snaketocamel(subname) ": " value ",\n")
}
function codegen_enum(name, cg) {
print ""
print "export const " name " = Object.freeze({"
print cg["fields"] "})"
CodegenDeserialize[name] = "\t%s = r.i8()\n"
for (i in cg)
delete cg[i]
}
function codegen_struct_field(d, cg, camel, f, deserialize) {
camel = decapitalize(snaketocamel(d["name"]))
f = "s." camel
append(cg, "fields", "\t" camel "\n")
deserialize = CodegenDeserialize[d["type"]]
if (!d["isarray"]) {
append(cg, "deserialize", sprintf(deserialize, f))
return
}
append(cg, "deserialize",
"\t{\n" \
indent(sprintf(CodegenDeserialize["u32"], "const len")))
if (d["type"] == "u8") {
append(cg, "deserialize",
"\t\t" f " = new Uint8Array(\n" \
"\t\t\tr.buffer, r.require(len), len)\n" \
"\t\tr.offset += len\n" \
"\t}\n")
return
}
if (d["type"] == "i8") {
append(cg, "deserialize",
"\t\t" f " = new Int8Array(\n" \
"\t\t\tr.buffer, r.require(len), len)\n" \
"\t\tr.offset += len\n" \
"\t}\n")
return
}
append(cg, "deserialize",
"\t\t" f " = new Array(len)\n" \
"\t}\n" \
"\tfor (let i = 0; i < " f ".length; i++)\n" \
indent(sprintf(deserialize, f "[i]")))
}
function codegen_struct_tag(d, cg) {
append(cg, "fields", "\t" decapitalize(snaketocamel(d["name"])) "\n")
# Do not deserialize here, that is already done by the containing union.
}
function codegen_struct(name, cg) {
print ""
print "export class " name " {"
print cg["fields"] cg["methods"]
print "\tstatic deserialize(r) {"
print "\t\tconst s = new " name "()"
print indent(cg["deserialize"]) "\t\treturn s"
print "\t}"
print "}"
CodegenDeserialize[name] = "\t%s = " name ".deserialize(r)\n"
for (i in cg)
delete cg[i]
}
function codegen_union_tag(d, cg) {
cg["tagtype"] = d["type"]
cg["tagname"] = d["name"]
}
function codegen_union_struct(name, casename, cg, scg, structname) {
append(scg, "methods",
"\n" \
"\tconstructor() {\n" \
"\t\tthis." decapitalize(snaketocamel(cg["tagname"])) \
" = " cg["tagtype"] "." snaketocamel(casename) "\n" \
"\t}\n")
# And thus not all generated structs are present in Types.
structname = name snaketocamel(casename)
codegen_struct(structname, scg)
append(cg, "deserialize",
"\tcase " cg["tagtype"] "." snaketocamel(casename) ":\n" \
"\t{\n" \
indent(sprintf(CodegenDeserialize[structname], "const s")) \
"\t\treturn s\n" \
"\t}\n")
}
function codegen_union(name, cg, tagvar) {
tagvar = decapitalize(snaketocamel(cg["tagname"]))
print ""
print "export function deserialize" name "(r) {"
print sprintf(CodegenDeserialize[cg["tagtype"]], "const " tagvar) \
"\tswitch (" tagvar ") {"
print cg["deserialize"] "\tdefault:"
print "\t\tthrow `Unknown " cg["tagtype"] " (${tagvar})`"
print "\t}"
print "}"
CodegenDeserialize[name] = "\t%s = deserialize" name "(r)\n"
for (i in cg)
delete cg[i]
}

291
tools/lxdrgen.awk Normal file
View File

@@ -0,0 +1,291 @@
# lxdrgen.awk: an XDR-derived code generator for network protocols.
#
# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
# SPDX-License-Identifier: 0BSD
#
# Usage: env LC_ALL=C awk -f lxdrgen.awk -f lxdrgen-{c,go,mjs}.awk \
# -v PrefixCamel=Foo foo.lxdr > foo.{c,go,mjs} | {clang-format,gofmt,...}
# --- Utilities ----------------------------------------------------------------
function cameltosnake(s) {
while (match(s, /[[:lower:]][[:upper:]]/)) {
s = substr(s, 1, RSTART) "_" \
tolower(substr(s, RSTART + 1, RLENGTH - 1)) \
substr(s, RSTART + RLENGTH)
}
return tolower(s)
}
function snaketocamel(s) {
s = toupper(substr(s, 1, 1)) tolower(substr(s, 2))
while (match(s, /_[[:alnum:]]/)) {
s = substr(s, 1, RSTART - 1) \
toupper(substr(s, RSTART + 1, RLENGTH - 1)) \
substr(s, RSTART + RLENGTH)
}
return s
}
function decapitalize(s) {
if (match(s, /[[:upper:]][[:lower:]]/)) {
return tolower(substr(s, 1, 1)) substr(s, 2)
}
return s
}
function indent(s) {
if (!s)
return s
gsub(/\n/, "\n\t", s)
sub(/\t*$/, "", s)
return "\t" s
}
function append(a, key, value) {
a[key] = a[key] value
}
# --- Parsing ------------------------------------------------------------------
function fatal(message) {
print "// " FILENAME ":" FNR ": fatal error: " message
print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
exit 1
}
function skipcomment() {
do {
if (match($0, /[*]\//)) {
$0 = substr($0, RSTART + RLENGTH)
return
}
} while (getline > 0)
fatal("unterminated block comment")
}
function nexttoken() {
do {
if (match($0, /^[[:space:]]+/)) {
$0 = substr($0, RLENGTH + 1)
} else if (match($0, /^\/\/.*/)) {
$0 = ""
} else if (match($0, /^\/[*]/)) {
$0 = substr($0, RLENGTH + 1)
skipcomment()
} else if (match($0, /^[[:alpha:]][[:alnum:]_]*/)) {
Token = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return Token
# AWK implementations rarely support non-decimal notations
# in their implicit string-to-number conversions.
} else if (match($0, /^(0|-?[1-9][0-9]*)/)) {
Token = substr($0, 1, RLENGTH)
$0 = substr($0, RLENGTH + 1)
return Token
} else if ($0) {
Token = substr($0, 1, 1)
$0 = substr($0, 2)
return Token
}
} while ($0 || getline > 0)
Token = ""
return Token
}
function expect(v) {
if (!v)
fatal("broken expectations at `" Token "' before `" $0 "'")
return v
}
function accept(what) {
if (Token != what)
return 0
nexttoken()
return 1
}
function identifier( v) {
if (Token !~ /^[[:alpha:]]/)
return 0
v = Token
nexttoken()
return v
}
function number( v) {
if (Token !~ /^(0|-?[1-9])/)
return 0
v = Token
nexttoken()
return v
}
function readnumber( ident) {
ident = identifier()
if (!ident)
return expect(number())
if (!(ident in Consts))
fatal("unknown constant: " ident)
return Consts[ident]
}
function defconst( ident, num) {
if (!accept("const"))
return 0
ident = expect(identifier())
expect(accept("="))
num = readnumber()
if (ident in Consts)
fatal("constant redefined: " ident)
Consts[ident] = num
codegen_constant(ident, num)
return 1
}
function readtype( ident) {
ident = deftype()
if (ident)
return ident
ident = identifier()
if (!ident)
return 0
if (!(ident in Types))
fatal("unknown type: " ident)
return ident
}
function defenum( name, ident, value, cg) {
delete cg[0]
name = expect(identifier())
expect(accept("{"))
while (!accept("}")) {
ident = expect(identifier())
value = value + 1
if (accept("="))
value = readnumber() + 0
if (!value)
fatal("enumeration values cannot be zero")
if (value < -128 || value > 127)
fatal("enumeration value out of range")
expect(accept(","))
append(EnumValues, name, SUBSEP ident)
if (EnumValues[name, ident]++)
fatal("duplicate enum value: " ident)
codegen_enum_value(name, ident, value, cg)
}
Types[name] = "enum"
codegen_enum(name, cg)
return name
}
function readfield(out, nonvoid) {
nonvoid = !accept("void")
if (nonvoid) {
out["type"] = expect(readtype())
out["name"] = expect(identifier())
# TODO: Consider supporting XDR's VLA length limits here.
# TODO: Consider supporting XDR's fixed-length syntax for string limits.
out["isarray"] = accept("<") && expect(accept(">"))
}
expect(accept(";"))
return nonvoid
}
function defstruct( name, d, cg) {
delete d[0]
delete cg[0]
name = expect(identifier())
expect(accept("{"))
while (!accept("}")) {
if (readfield(d))
codegen_struct_field(d, cg)
}
Types[name] = "struct"
codegen_struct(name, cg)
return name
}
function defunion( name, tag, tagtype, tagvalue, cg, scg, d, a, i, unseen) {
delete cg[0]
delete scg[0]
delete d[0]
name = expect(identifier())
expect(accept("switch"))
expect(accept("("))
tag["type"] = tagtype = expect(readtype())
tag["name"] = expect(identifier())
expect(accept(")"))
if (Types[tagtype] != "enum")
fatal("not an enum type: " tagtype)
codegen_union_tag(tag, cg)
split(EnumValues[tagtype], a, SUBSEP)
for (i in a)
unseen[a[i]]++
expect(accept("{"))
while (!accept("}")) {
if (accept("case")) {
if (tagvalue)
codegen_union_struct(name, tagvalue, cg, scg)
tagvalue = expect(identifier())
expect(accept(":"))
if (!unseen[tagvalue]--)
fatal("no such value or duplicate case: " tagtype "." tagvalue)
codegen_struct_tag(tag, scg)
} else if (tagvalue) {
if (readfield(d))
codegen_struct_field(d, scg)
} else {
fatal("union fields must fall under a case")
}
}
if (tagvalue)
codegen_union_struct(name, tagvalue, cg, scg)
# What remains non-zero in unseen[2..] is simply not recognized/allowed.
Types[name] = "union"
codegen_union(name, cg)
return name
}
function deftype() {
if (accept("enum"))
return defenum()
if (accept("struct"))
return defstruct()
if (accept("union"))
return defunion()
return 0
}
{
if (PrefixCamel) {
PrefixLower = tolower(cameltosnake(PrefixCamel)) "_"
PrefixUpper = toupper(cameltosnake(PrefixCamel)) "_"
}
# This is not in a BEGIN clause (even though it consumes all input),
# so that the code generator can insert the first FILENAME.
codegen_begin()
nexttoken()
while (Token != "") {
expect(defconst() || deftype())
expect(accept(";"))
}
}