Add more stuff

And break the API. All in the name of progress!
2015-03-29 01:55:10 +01:00 · 2015-03-29 01:55:10 +01:00 · f20218e73e
commit f20218e73e
parent 2b3bc18269
6 changed files with 1764 additions and 41 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -27,7 +27,7 @@ set (common_sources ${PROJECT_BINARY_DIR}/liberty-config.h)

 # Build some unit tests
 enable_testing ()
-foreach (name liberty)
+foreach (name liberty proto)
 	add_executable (test-${name} tests/${name}.c ${common_sources})
 	target_link_libraries (test-${name} ${common_libraries})
 	add_test (test-${name} test-${name})
--- a/10
+++ b/10
@ -2,13 +2,21 @@ liberty
 =======

 `liberty' is a pseudolibrary of all the common C code I have written for various
-projects.  It can be thought of as a successor to my other C library, libxtnd.
+projects.  I used to copy-paste large swaths of code with minimal changes to it
+and it slowly became awfully painful to synchronize.  The project can be thought
+of as a successor to my other C library, libxtnd.

 You are supposed to import it as a git submodule and include the main source
 file directly everywhere you need it.  Everything is declared "static".  I have
 come to the conclusion that this style of C programming suits me the best, as it
 allows me to nearly forget about the mess that are header files.

+The API is intentionally unstable, which allows for easy refactoring.
+
+All development is done on Linux, but other POSIX-compatible operating systems
+should be supported as well.  They have an extremely low priority, however, and
+I'm not testing them at all.
+
 License
 -------
 `liberty' is written by Přemysl Janouch <p.janouch@gmail.com>.
--- a/liberty-proto.c
+++ b/liberty-proto.c
--- a/liberty.c
+++ b/liberty.c
@ -61,6 +61,7 @@
 #endif // ! NI_MAXSERV

 #ifdef LIBERTY_WANT_SSL
+#include <openssl/sha.h>
 #include <openssl/ssl.h>
 #include <openssl/err.h>
 #endif // LIBERTY_WANT_SSL
@ -544,6 +545,42 @@ str_remove_slice (struct str *self, size_t start, size_t length)
 		self->str = xrealloc (self->str, self->alloc >>= 2);
 }

+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+static void
+str_pack_u8 (struct str *self, uint8_t x)
+{
+	str_append_data (self, &x, 1);
+}
+
+static void
+str_pack_u16 (struct str *self, uint64_t x)
+{
+	uint8_t tmp[2] = { x >> 8, x };
+	str_append_data (self, tmp, sizeof tmp);
+}
+
+static void
+str_pack_u32 (struct str *self, uint32_t x)
+{
+	uint32_t u = x;
+	uint8_t tmp[4] = { u >> 24, u >> 16, u >> 8, u };
+	str_append_data (self, tmp, sizeof tmp);
+}
+
+static void
+str_pack_u64 (struct str *self, uint64_t x)
+{
+	uint8_t tmp[8] =
+		{ x >> 56, x >> 48, x >> 40, x >> 32, x >> 24, x >> 16, x >> 8, x };
+	str_append_data (self, tmp, sizeof tmp);
+}
+
+#define str_pack_i8(self, x)   str_pack_u8  ((self), (uint8_t)  (x))
+#define str_pack_i16(self, x)  str_pack_u16 ((self), (uint16_t) (x))
+#define str_pack_i32(self, x)  str_pack_u32 ((self), (uint32_t) (x))
+#define str_pack_i64(self, x)  str_pack_u64 ((self), (uint64_t) (x))
+
 // --- Errors ------------------------------------------------------------------

 // Error reporting utilities.  Inspired by GError, only much simpler.
@ -1660,7 +1697,16 @@ msg_unpacker_u8 (struct msg_unpacker *self, uint8_t *value)
 }

 static bool
-msg_unpacker_i32 (struct msg_unpacker *self, int32_t *value)
+msg_unpacker_u16 (struct msg_unpacker *self, uint16_t *value)
+{
+	UNPACKER_INT_BEGIN
+	*value
+		= (uint16_t) x[0] << 24 | (uint16_t) x[1] << 16;
+	return true;
+}
+
+static bool
+msg_unpacker_u32 (struct msg_unpacker *self, uint32_t *value)
 {
 	UNPACKER_INT_BEGIN
 	*value
@ -1681,10 +1727,22 @@ msg_unpacker_u64 (struct msg_unpacker *self, uint64_t *value)
 	return true;
 }

+#define msg_unpacker_i8(self, value)                                           \
+	msg_unpacker_u8  ((self), (uint8_t *) (value))
+#define msg_unpacker_i16(self, value)                                          \
+	msg_unpacker_u16 ((self), (uint16_t *) (value))
+#define msg_unpacker_i32(self, value)                                          \
+	msg_unpacker_u32 ((self), (uint32_t *) (value))
+#define msg_unpacker_i64(self, value)                                          \
+	msg_unpacker_u64 ((self), (uint64_t *) (value))
+
 #undef UNPACKER_INT_BEGIN

 // --- Message packer and writer -----------------------------------------------

+// Use str_pack_*() or other methods to append to the internal buffer, then
+// flush it to get a nice frame.  Handy for iovec.
+
 struct msg_writer
 {
 	struct str buf;                     ///< Holds the message data
@ -1698,28 +1756,6 @@ msg_writer_init (struct msg_writer *self)
 	str_append_data (&self->buf, "\x00\x00\x00\x00" "\x00\x00\x00\x00", 8);
 }

-static void
-msg_writer_u8 (struct msg_writer *self, uint8_t x)
-{
-	str_append_data (&self->buf, &x, 1);
-}
-
-static void
-msg_writer_i32 (struct msg_writer *self, int32_t x)
-{
-	uint32_t u = x;
-	uint8_t tmp[4] = { u >> 24, u >> 16, u >> 8, u };
-	str_append_data (&self->buf, tmp, sizeof tmp);
-}
-
-static void
-msg_writer_u64 (struct msg_writer *self, uint64_t x)
-{
-	uint8_t tmp[8] =
-		{ x >> 56, x >> 48, x >> 40, x >> 32, x >> 24, x >> 16, x >> 8, x };
-	str_append_data (&self->buf, tmp, sizeof tmp);
-}
-
 static void *
 msg_writer_flush (struct msg_writer *self, size_t *len)
 {
@ -1733,6 +1769,220 @@ msg_writer_flush (struct msg_writer *self, size_t *len)
 	return str_steal (&self->buf);
 }

+// --- ASCII -------------------------------------------------------------------
+
+static int
+tolower_ascii (int c)
+{
+	return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c;
+}
+
+static size_t
+tolower_ascii_strxfrm (char *dest, const char *src, size_t n)
+{
+	size_t len = strlen (src);
+	while (n-- && (*dest++ = tolower_ascii (*src++)))
+		;
+	return len;
+}
+
+static int
+strcasecmp_ascii (const char *a, const char *b)
+{
+	int x;
+	while (*a || *b)
+		if ((x = tolower_ascii (*(const unsigned char *) a++)
+			- tolower_ascii (*(const unsigned char *) b++)))
+			return x;
+	return 0;
+}
+
+static bool
+isspace_ascii (int c)
+{
+	return c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
+}
+
+// --- UTF-8 -------------------------------------------------------------------
+
+/// Return a pointer to the next UTF-8 character, or NULL on error
+// TODO: decode the sequence while we're at it
+static const char *
+utf8_next (const char *s, size_t len)
+{
+	// End of string, we go no further
+	if (!len)
+		return NULL;
+
+	// In the middle of a character -> error
+	const uint8_t *p = (const unsigned char *) s;
+	if ((*p & 0xC0) == 0x80)
+		return NULL;
+
+	// Find out how long the sequence is
+	unsigned mask = 0xC0;
+	unsigned tail_len = 0;
+	while ((*p & mask) == mask)
+	{
+		// Invalid start of sequence
+		if (mask == 0xFE)
+			return NULL;
+
+		mask |= mask >> 1;
+		tail_len++;
+	}
+
+	p++;
+
+	// Check the rest of the sequence
+	if (tail_len > --len)
+		return NULL;
+
+	while (tail_len--)
+		if ((*p++ & 0xC0) != 0x80)
+			return NULL;
+
+	return (const char *) p;
+}
+
+/// Very rough UTF-8 validation, just makes sure codepoints can be iterated
+// TODO: also validate the codepoints
+static bool
+utf8_validate (const char *s, size_t len)
+{
+	const char *next;
+	while (len)
+	{
+		if (!(next = utf8_next (s, len)))
+			return false;
+
+		len -= next - s;
+		s = next;
+	}
+	return true;
+}
+
+// --- Base 64 -----------------------------------------------------------------
+
+static uint8_t g_base64_table[256] =
+{
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 62, 64, 64, 64, 63,
+	52, 53, 54, 55, 56, 57, 58, 59,  60, 61, 64, 64, 64,  0, 64, 64,
+	64,  0,  1,  2,  3,  4,  5,  6,   7,  8,  9, 10, 11, 12, 13, 14,
+	15, 16, 17, 18, 19, 20, 21, 22,  23, 24, 25, 64, 64, 64, 64, 64,
+	64, 26, 27, 28, 29, 30, 31, 32,  33, 34, 35, 36, 37, 38, 39, 40,
+	41, 42, 43, 44, 45, 46, 47, 48,  49, 50, 51, 64, 64, 64, 64, 64,
+
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+	64, 64, 64, 64, 64, 64, 64, 64,  64, 64, 64, 64, 64, 64, 64, 64,
+};
+
+static inline bool
+base64_decode_group (const char **s, bool ignore_ws, struct str *output)
+{
+	uint8_t input[4];
+	size_t loaded = 0;
+	for (; loaded < 4; (*s)++)
+	{
+		if (!**s)
+			return loaded == 0;
+		if (!ignore_ws || !isspace_ascii (**s))
+			input[loaded++] = **s;
+	}
+
+	size_t len = 3;
+	if (input[0] == '=' || input[1] == '=')
+		return false;
+	if (input[2] == '=' && input[3] != '=')
+		return false;
+	if (input[2] == '=')
+		len--;
+	if (input[3] == '=')
+		len--;
+
+	uint8_t a = g_base64_table[input[0]];
+	uint8_t b = g_base64_table[input[1]];
+	uint8_t c = g_base64_table[input[2]];
+	uint8_t d = g_base64_table[input[3]];
+
+	if (((a | b) | (c | d)) & 0x40)
+		return false;
+
+	uint32_t block = a << 18 | b << 12 | c << 6 | d;
+	switch (len)
+	{
+	case 1:
+		str_append_c (output, block >> 16);
+		break;
+	case 2:
+		str_append_c (output, block >> 16);
+		str_append_c (output, block >> 8);
+		break;
+	case 3:
+		str_append_c (output, block >> 16);
+		str_append_c (output, block >> 8);
+		str_append_c (output, block);
+	}
+	return true;
+}
+
+static bool
+base64_decode (const char *s, bool ignore_ws, struct str *output)
+{
+	while (*s)
+		if (!base64_decode_group (&s, ignore_ws, output))
+			return false;
+	return true;
+}
+
+static void
+base64_encode (const void *data, size_t len, struct str *output)
+{
+	const char *alphabet =
+		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+	const uint8_t *p = data;
+	size_t n_groups = len / 3;
+	size_t tail = len - n_groups * 3;
+	uint32_t group;
+
+	for (; n_groups--; p += 3)
+	{
+		group = p[0] << 16 | p[1] << 8 | p[2];
+		str_append_c (output, alphabet[(group >> 18) & 63]);
+		str_append_c (output, alphabet[(group >> 12) & 63]);
+		str_append_c (output, alphabet[(group >>  6) & 63]);
+		str_append_c (output, alphabet[ group        & 63]);
+	}
+
+	switch (tail)
+	{
+	case 2:
+		group = p[0] << 16 | p[1] << 8;
+		str_append_c (output, alphabet[(group >> 18) & 63]);
+		str_append_c (output, alphabet[(group >> 12) & 63]);
+		str_append_c (output, alphabet[(group >>  6) & 63]);
+		str_append_c (output, '=');
+		break;
+	case 1:
+		group = p[0] << 16;
+		str_append_c (output, alphabet[(group >> 18) & 63]);
+		str_append_c (output, alphabet[(group >> 12) & 63]);
+		str_append_c (output, '=');
+		str_append_c (output, '=');
+	default:
+		break;
+	}
+}
+
 // --- Utilities ---------------------------------------------------------------

 static void
@ -2575,3 +2825,7 @@ test_run (struct test *self)
 	str_map_free (&self->blacklist);
 	return 0;
 }
+
+// --- Protocol modules --------------------------------------------------------
+
+#include "liberty-proto.c"
--- a/tests/liberty.c
+++ b/tests/liberty.c
@ -236,21 +236,6 @@ test_error (void)

 // --- Hash map ----------------------------------------------------------------

-static int
-tolower_ascii (int c)
-{
-	return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c;
-}
-
-static size_t
-tolower_strxfrm (char *dest, const char *src, size_t n)
-{
-	size_t len = strlen (src);
-	while (n-- && (*dest++ = tolower_ascii (*src++)))
-		;
-	return len;
-}
-
 static void
 free_counter (void *data)
 {
@ -280,7 +265,7 @@ test_str_map (void)
 	// Put two reference counted objects in the map under case-insensitive keys
 	struct str_map m;
 	str_map_init (&m);
-	m.key_xfrm = tolower_strxfrm;
+	m.key_xfrm = tolower_ascii_strxfrm;
 	m.free = free_counter;

 	int *a = make_counter ();
@ -323,6 +308,34 @@ test_str_map (void)
 	free_counter (b);
 }

+static void
+test_utf8 (void)
+{
+	const char valid  [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
+	const char invalid[] = "\xf0\x90\x28\xbc";
+	soft_assert ( utf8_validate (valid,   sizeof valid));
+	soft_assert (!utf8_validate (invalid, sizeof invalid));
+}
+
+static void
+test_base64 (void)
+{
+	char data[65];
+	for (size_t i = 0; i < N_ELEMENTS (data); i++)
+		data[i] = i;
+
+	struct str encoded;  str_init (&encoded);
+	struct str decoded;  str_init (&decoded);
+
+	base64_encode (data, sizeof data, &encoded);
+	soft_assert (base64_decode (encoded.str, false, &decoded));
+	soft_assert (decoded.len == sizeof data);
+	soft_assert (!memcmp (decoded.str, data, sizeof data));
+
+	str_free (&encoded);
+	str_free (&decoded);
+}
+
 // --- Main --------------------------------------------------------------------

 int
@ -338,6 +351,8 @@ main (int argc, char *argv[])
 	test_add_simple (&test, "/str",            NULL, test_str);
 	test_add_simple (&test, "/error",          NULL, test_error);
 	test_add_simple (&test, "/str-map",        NULL, test_str_map);
+	test_add_simple (&test, "/utf-8",          NULL, test_utf8);
+	test_add_simple (&test, "/base64",         NULL, test_base64);

 	// TODO: write tests for the rest of the library

--- a/tests/proto.c
+++ b/tests/proto.c
@ -0,0 +1,167 @@
+/*
+ * tests/proto.c
+ *
+ * Copyright (c) 2015, Přemysl Janouch <p.janouch@gmail.com>
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#define PROGRAM_NAME "test"
+#define PROGRAM_VERSION "0"
+
+#define LIBERTY_WANT_SSL
+
+#define LIBERTY_WANT_PROTO_IRC
+#define LIBERTY_WANT_PROTO_HTTP
+#define LIBERTY_WANT_PROTO_SCGI
+#define LIBERTY_WANT_PROTO_FASTCGI
+#define LIBERTY_WANT_PROTO_WS
+
+#include "../liberty.c"
+
+// --- Tests -------------------------------------------------------------------
+
+static void
+test_http_parser (void)
+{
+	struct str_map parameters;
+	str_map_init (&parameters);
+	parameters.key_xfrm = tolower_ascii_strxfrm;
+
+	char *type = NULL;
+	char *subtype = NULL;
+	soft_assert (http_parse_media_type ("TEXT/html; CHARset=\"utf\\-8\"",
+		&type, &subtype, &parameters));
+	soft_assert (!strcasecmp_ascii (type, "text"));
+	soft_assert (!strcasecmp_ascii (subtype, "html"));
+	soft_assert (parameters.len == 1);
+	soft_assert (!strcmp (str_map_find (&parameters, "charset"), "utf-8"));
+	str_map_free (&parameters);
+
+	struct http_protocol *protocols;
+	soft_assert (http_parse_upgrade ("websocket, HTTP/2.0, , ", &protocols));
+
+	soft_assert (!strcmp (protocols->name, "websocket"));
+	soft_assert (!protocols->version);
+
+	soft_assert (!strcmp (protocols->next->name, "HTTP"));
+	soft_assert (!strcmp (protocols->next->version, "2.0"));
+
+	soft_assert (!protocols->next->next);
+
+	LIST_FOR_EACH (struct http_protocol, iter, protocols)
+		http_protocol_destroy (iter);
+}
+
+static bool
+test_scgi_parser_on_headers_read (void *user_data)
+{
+	struct scgi_parser *parser = user_data;
+	soft_assert (parser->headers.len == 4);
+	soft_assert (!strcmp (str_map_find (&parser->headers,
+		"CONTENT_LENGTH"), "27"));
+	soft_assert (!strcmp (str_map_find (&parser->headers,
+		"SCGI"), "1"));
+	soft_assert (!strcmp (str_map_find (&parser->headers,
+		"REQUEST_METHOD"), "POST"));
+	soft_assert (!strcmp (str_map_find (&parser->headers,
+		"REQUEST_URI"), "/deepthought"));
+	return true;
+}
+
+static bool
+test_scgi_parser_on_content (void *user_data, const void *data, size_t len)
+{
+	(void) user_data;
+	soft_assert (!strncmp (data, "What is the answer to life?", len));
+	return true;
+}
+
+static void
+test_scgi_parser (void)
+{
+	struct scgi_parser parser;
+	scgi_parser_init (&parser);
+	parser.on_headers_read = test_scgi_parser_on_headers_read;
+	parser.on_content      = test_scgi_parser_on_content;
+	parser.user_data       = &parser;
+
+	// This is an example straight from the specification
+	const char example[] =
+		"70:"
+			"CONTENT_LENGTH" "\0" "27" "\0"
+			"SCGI" "\0" "1" "\0"
+			"REQUEST_METHOD" "\0" "POST" "\0"
+			"REQUEST_URI" "\0" "/deepthought" "\0"
+		","
+		"What is the answer to life?";
+
+	soft_assert (scgi_parser_push (&parser, example, sizeof example, NULL));
+	scgi_parser_free (&parser);
+}
+
+static bool
+test_websockets_on_frame_header (void *user_data, const struct ws_parser *self)
+{
+	(void) user_data;
+	soft_assert (self->is_fin);
+	soft_assert (self->is_masked);
+	soft_assert (self->opcode == WS_OPCODE_TEXT);
+	return true;
+}
+
+static bool
+test_websockets_on_frame (void *user_data, const struct ws_parser *self)
+{
+	(void) user_data;
+	soft_assert (self->input.len == self->payload_len);
+	soft_assert (!strncmp (self->input.str, "Hello", self->input.len));
+	return true;
+}
+
+static void
+test_websockets (void)
+{
+	char *accept = ws_encode_response_key ("dGhlIHNhbXBsZSBub25jZQ==");
+	soft_assert (!strcmp (accept, "s3pPLMBiTxaQ9kYGzzhZRbK+xOo="));
+	free (accept);
+
+	struct ws_parser parser;
+	ws_parser_init (&parser);
+	parser.on_frame_header = test_websockets_on_frame_header;
+	parser.on_frame        = test_websockets_on_frame;
+	parser.user_data       = &parser;
+
+	const char frame[] = "\x81\x85\x37\xfa\x21\x3d\x7f\x9f\x4d\x51\x58";
+	soft_assert (ws_parser_push (&parser, frame, sizeof frame - 1));
+	ws_parser_free (&parser);
+}
+
+// --- Main --------------------------------------------------------------------
+
+int
+main (int argc, char *argv[])
+{
+	struct test test;
+	test_init (&test, argc, argv);
+
+	test_add_simple (&test, "/http-parser",    NULL, test_http_parser);
+	test_add_simple (&test, "/scgi-parser",    NULL, test_scgi_parser);
+	test_add_simple (&test, "/websockets",     NULL, test_websockets);
+	// TODO: test FastCGI
+	// TODO: test IRC
+
+	return test_run (&test);
+}