From f20218e73ecae27f9e9f9bf34e9798080de6f93d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C5=99emysl=20Janouch?= Date: Sun, 29 Mar 2015 01:55:10 +0100 Subject: [PATCH] Add more stuff And break the API. All in the name of progress! --- CMakeLists.txt | 2 +- README | 10 +- liberty-proto.c | 1279 +++++++++++++++++++++++++++++++++++++++++++++++ liberty.c | 300 ++++++++++- tests/liberty.c | 47 +- tests/proto.c | 167 +++++++ 6 files changed, 1764 insertions(+), 41 deletions(-) create mode 100644 liberty-proto.c create mode 100644 tests/proto.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 75ef930..63f5f6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ set (common_sources ${PROJECT_BINARY_DIR}/liberty-config.h) # Build some unit tests enable_testing () -foreach (name liberty) +foreach (name liberty proto) add_executable (test-${name} tests/${name}.c ${common_sources}) target_link_libraries (test-${name} ${common_libraries}) add_test (test-${name} test-${name}) diff --git a/README b/README index 7d20085..25e9543 100644 --- a/README +++ b/README @@ -2,13 +2,21 @@ liberty ======= `liberty' is a pseudolibrary of all the common C code I have written for various -projects. It can be thought of as a successor to my other C library, libxtnd. +projects. I used to copy-paste large swaths of code with minimal changes to it +and it slowly became awfully painful to synchronize. The project can be thought +of as a successor to my other C library, libxtnd. You are supposed to import it as a git submodule and include the main source file directly everywhere you need it. Everything is declared "static". I have come to the conclusion that this style of C programming suits me the best, as it allows me to nearly forget about the mess that are header files. +The API is intentionally unstable, which allows for easy refactoring. + +All development is done on Linux, but other POSIX-compatible operating systems +should be supported as well. They have an extremely low priority, however, and +I'm not testing them at all. + License ------- `liberty' is written by Přemysl Janouch . diff --git a/liberty-proto.c b/liberty-proto.c new file mode 100644 index 0000000..9207a4a --- /dev/null +++ b/liberty-proto.c @@ -0,0 +1,1279 @@ +/* + * liberty-proto.c: the ultimate C unlibrary: protocols + * + * Copyright (c) 2014 - 2015, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +// Mostly parsers and various utilities relating to various protocols + +#ifdef LIBERTY_WANT_PROTO_IRC + +// --- IRC utilities ----------------------------------------------------------- + +struct irc_message +{ + struct str_map tags; ///< IRC 3.2 message tags + char *prefix; ///< Message prefix + char *command; ///< IRC command + struct str_vector params; ///< Command parameters +}; + +static void +irc_parse_message_tags (const char *tags, struct str_map *out) +{ + struct str_vector v; + str_vector_init (&v); + split_str_ignore_empty (tags, ';', &v); + + for (size_t i = 0; i < v.len; i++) + { + char *key = v.vector[i], *equal_sign = strchr (key, '='); + if (equal_sign) + { + *equal_sign = '\0'; + str_map_set (out, key, xstrdup (equal_sign + 1)); + } + else + str_map_set (out, key, xstrdup ("")); + } + + str_vector_free (&v); +} + +static void +irc_parse_message (struct irc_message *msg, const char *line) +{ + str_map_init (&msg->tags); + msg->tags.free = free; + + msg->prefix = NULL; + msg->command = NULL; + str_vector_init (&msg->params); + + // IRC 3.2 message tags + if (*line == '@') + { + size_t tags_len = strcspn (++line, " "); + char *tags = xstrndup (line, tags_len); + irc_parse_message_tags (tags, &msg->tags); + free (tags); + + line += tags_len; + while (*line == ' ') + line++; + } + + // Prefix + if (*line == ':') + { + size_t prefix_len = strcspn (++line, " "); + msg->prefix = xstrndup (line, prefix_len); + line += prefix_len; + } + + // Command name + { + while (*line == ' ') + line++; + + size_t cmd_len = strcspn (line, " "); + msg->command = xstrndup (line, cmd_len); + line += cmd_len; + } + + // Arguments + while (true) + { + while (*line == ' ') + line++; + + if (*line == ':') + { + str_vector_add (&msg->params, ++line); + break; + } + + size_t param_len = strcspn (line, " "); + if (!param_len) + break; + + str_vector_add_owned (&msg->params, xstrndup (line, param_len)); + line += param_len; + } +} + +static void +irc_free_message (struct irc_message *msg) +{ + str_map_free (&msg->tags); + free (msg->prefix); + free (msg->command); + str_vector_free (&msg->params); +} + +static void +irc_process_buffer (struct str *buf, + void (*callback)(const struct irc_message *, const char *, void *), + void *user_data) +{ + char *start = buf->str, *end = start + buf->len; + for (char *p = start; p + 1 < end; p++) + { + // Split the input on newlines + if (p[0] != '\r' || p[1] != '\n') + continue; + + *p = 0; + + struct irc_message msg; + irc_parse_message (&msg, start); + callback (&msg, start, user_data); + irc_free_message (&msg); + + start = p + 2; + } + + // XXX: we might want to just advance some kind of an offset to avoid + // moving memory around unnecessarily. + str_remove_slice (buf, 0, start - buf->str); +} + +static int +irc_tolower (int c) +{ + if (c == '[') return '{'; + if (c == ']') return '}'; + if (c == '\\') return '|'; + if (c == '~') return '^'; + return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; +} + +static size_t +irc_strxfrm (char *dest, const char *src, size_t n) +{ + size_t len = strlen (src); + while (n-- && (*dest++ = irc_tolower (*src++))) + ; + return len; +} + +static int +irc_strcmp (const char *a, const char *b) +{ + int x; + while (*a || *b) + if ((x = irc_tolower (*a++) - irc_tolower (*b++))) + return x; + return 0; +} + +static int +irc_fnmatch (const char *pattern, const char *string) +{ + size_t pattern_size = strlen (pattern) + 1; + size_t string_size = strlen (string) + 1; + char x_pattern[pattern_size], x_string[string_size]; + irc_strxfrm (x_pattern, pattern, pattern_size); + irc_strxfrm (x_string, string, string_size); + return fnmatch (x_pattern, x_string, 0); +} + +#endif + +#ifdef LIBERTY_WANT_PROTO_HTTP + +// --- HTTP parsing ------------------------------------------------------------ + +// Basic tokenizer for HTTP header field values, to be used in various parsers. +// The input should already be unwrapped. + +// Recommended literature: +// http://tools.ietf.org/html/rfc7230#section-3.2.6 +// http://tools.ietf.org/html/rfc7230#appendix-B +// http://tools.ietf.org/html/rfc5234#appendix-B.1 + +#define HTTP_TOKENIZER_CLASS(name, definition) \ + static inline bool \ + http_tokenizer_is_ ## name (int c) \ + { \ + return (definition); \ + } + +HTTP_TOKENIZER_CLASS (vchar, c >= 0x21 && c <= 0x7E) +HTTP_TOKENIZER_CLASS (delimiter, !!strchr ("\"(),/:;<=>?@[\\]{}", c)) +HTTP_TOKENIZER_CLASS (whitespace, c == '\t' || c == ' ') +HTTP_TOKENIZER_CLASS (obs_text, c >= 0x80 && c <= 0xFF) + +HTTP_TOKENIZER_CLASS (tchar, + http_tokenizer_is_vchar (c) && !http_tokenizer_is_delimiter (c)) + +HTTP_TOKENIZER_CLASS (qdtext, + c == '\t' || c == ' ' || c == '!' + || (c >= 0x23 && c <= 0x5B) + || (c >= 0x5D && c <= 0x7E) + || http_tokenizer_is_obs_text (c)) + +HTTP_TOKENIZER_CLASS (quoted_pair, + c == '\t' || c == ' ' + || http_tokenizer_is_vchar (c) + || http_tokenizer_is_obs_text (c)) + +#undef HTTP_TOKENIZER_CLASS + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +enum http_tokenizer_token +{ + HTTP_T_EOF, ///< Input error + HTTP_T_ERROR, ///< End of input + + HTTP_T_TOKEN, ///< "token" + HTTP_T_QUOTED_STRING, ///< "quoted-string" + HTTP_T_DELIMITER, ///< "delimiters" + HTTP_T_WHITESPACE ///< RWS/OWS/BWS +}; + +struct http_tokenizer +{ + const unsigned char *input; ///< The input string + size_t input_len; ///< Length of the input + size_t offset; ///< Position in the input + + char delimiter; ///< The delimiter character + struct str string; ///< "token" / "quoted-string" content +}; + +static void +http_tokenizer_init (struct http_tokenizer *self, const char *input, size_t len) +{ + memset (self, 0, sizeof *self); + self->input = (const unsigned char *) input; + self->input_len = len; + + str_init (&self->string); +} + +static void +http_tokenizer_free (struct http_tokenizer *self) +{ + str_free (&self->string); +} + +static enum http_tokenizer_token +http_tokenizer_quoted_string (struct http_tokenizer *self) +{ + bool quoted_pair = false; + while (self->offset < self->input_len) + { + int c = self->input[self->offset++]; + if (quoted_pair) + { + if (!http_tokenizer_is_quoted_pair (c)) + return HTTP_T_ERROR; + + str_append_c (&self->string, c); + quoted_pair = false; + } + else if (c == '\\') + quoted_pair = true; + else if (c == '"') + return HTTP_T_QUOTED_STRING; + else if (http_tokenizer_is_qdtext (c)) + str_append_c (&self->string, c); + else + return HTTP_T_ERROR; + } + + // Premature end of input + return HTTP_T_ERROR; +} + +static enum http_tokenizer_token +http_tokenizer_next (struct http_tokenizer *self, bool skip_ows) +{ + str_reset (&self->string); + if (self->offset >= self->input_len) + return HTTP_T_EOF; + + int c = self->input[self->offset++]; + + if (skip_ows) + while (http_tokenizer_is_whitespace (c)) + { + if (self->offset >= self->input_len) + return HTTP_T_EOF; + c = self->input[self->offset++]; + } + + if (c == '"') + return http_tokenizer_quoted_string (self); + + if (http_tokenizer_is_delimiter (c)) + { + self->delimiter = c; + return HTTP_T_DELIMITER; + } + + // Simple variable-length tokens + enum http_tokenizer_token result; + bool (*eater) (int c) = NULL; + if (http_tokenizer_is_whitespace (c)) + { + eater = http_tokenizer_is_whitespace; + result = HTTP_T_WHITESPACE; + } + else if (http_tokenizer_is_tchar (c)) + { + eater = http_tokenizer_is_tchar; + result = HTTP_T_TOKEN; + } + else + return HTTP_T_ERROR; + + str_append_c (&self->string, c); + while (self->offset < self->input_len) + { + if (!eater (c = self->input[self->offset])) + break; + + str_append_c (&self->string, c); + self->offset++; + } + return result; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static bool +http_parse_media_type_parameter + (struct http_tokenizer *t, struct str_map *parameters) +{ + bool result = false; + char *attribute = NULL; + + if (http_tokenizer_next (t, true) != HTTP_T_TOKEN) + goto end; + attribute = xstrdup (t->string.str); + + if (http_tokenizer_next (t, false) != HTTP_T_DELIMITER + || t->delimiter != '=') + goto end; + + switch (http_tokenizer_next (t, false)) + { + case HTTP_T_TOKEN: + case HTTP_T_QUOTED_STRING: + str_map_set (parameters, attribute, xstrdup (t->string.str)); + result = true; + default: + break; + } + +end: + free (attribute); + return result; +} + +/// Parser for "Content-Type". @a type and @a subtype may be non-NULL +/// even if the function fails. @a parameters should be case-insensitive. +static bool +http_parse_media_type (const char *media_type, + char **type, char **subtype, struct str_map *parameters) +{ + bool result = false; + struct http_tokenizer t; + http_tokenizer_init (&t, media_type, strlen (media_type)); + + if (http_tokenizer_next (&t, true) != HTTP_T_TOKEN) + goto end; + *type = xstrdup (t.string.str); + + if (http_tokenizer_next (&t, false) != HTTP_T_DELIMITER + || t.delimiter != '/') + goto end; + + if (http_tokenizer_next (&t, false) != HTTP_T_TOKEN) + goto end; + *subtype = xstrdup (t.string.str); + + while (true) + switch (http_tokenizer_next (&t, true)) + { + case HTTP_T_DELIMITER: + if (t.delimiter != ';') + goto end; + if (!http_parse_media_type_parameter (&t, parameters)) + goto end; + break; + case HTTP_T_EOF: + result = true; + default: + goto end; + } + +end: + http_tokenizer_free (&t); + return result; +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct http_protocol +{ + LIST_HEADER (struct http_protocol) + + char *name; ///< The protocol to upgrade to + char *version; ///< Version of the protocol, if any +}; + +static void +http_protocol_destroy (struct http_protocol *self) +{ + free (self->name); + free (self->version); + free (self); +} + +static bool +http_parse_upgrade (const char *upgrade, struct http_protocol **out) +{ + // HTTP grammar makes this more complicated than it should be + + bool result = false; + struct http_protocol *list = NULL; + struct http_protocol *tail = NULL; + + struct http_tokenizer t; + http_tokenizer_init (&t, upgrade, strlen (upgrade)); + + enum { + STATE_PROTOCOL_NAME, + STATE_SLASH, + STATE_PROTOCOL_VERSION, + STATE_EXPECT_COMMA + } state = STATE_PROTOCOL_NAME; + struct http_protocol *proto = NULL; + + while (true) + switch (state) + { + case STATE_PROTOCOL_NAME: + switch (http_tokenizer_next (&t, false)) + { + case HTTP_T_DELIMITER: + if (t.delimiter != ',') + goto end; + case HTTP_T_WHITESPACE: + break; + case HTTP_T_TOKEN: + proto = xcalloc (1, sizeof *proto); + proto->name = xstrdup (t.string.str); + LIST_APPEND_WITH_TAIL (list, tail, proto); + state = STATE_SLASH; + break; + case HTTP_T_EOF: + result = true; + default: + goto end; + } + break; + case STATE_SLASH: + switch (http_tokenizer_next (&t, false)) + { + case HTTP_T_DELIMITER: + if (t.delimiter == '/') + state = STATE_PROTOCOL_VERSION; + else if (t.delimiter == ',') + state = STATE_PROTOCOL_NAME; + else + goto end; + break; + case HTTP_T_WHITESPACE: + state = STATE_EXPECT_COMMA; + break; + case HTTP_T_EOF: + result = true; + default: + goto end; + } + break; + case STATE_PROTOCOL_VERSION: + switch (http_tokenizer_next (&t, false)) + { + case HTTP_T_TOKEN: + proto->version = xstrdup (t.string.str); + state = STATE_EXPECT_COMMA; + break; + default: + goto end; + } + break; + case STATE_EXPECT_COMMA: + switch (http_tokenizer_next (&t, false)) + { + case HTTP_T_DELIMITER: + if (t.delimiter != ',') + goto end; + state = STATE_PROTOCOL_NAME; + case HTTP_T_WHITESPACE: + break; + case HTTP_T_EOF: + result = true; + default: + goto end; + } + } + +end: + if (result) + *out = list; + else + LIST_FOR_EACH (struct http_protocol, iter, list) + http_protocol_destroy (iter); + + http_tokenizer_free (&t); + return result; +} + +#endif + +#ifdef LIBERTY_WANT_PROTO_SCGI + +// --- SCGI -------------------------------------------------------------------- + +enum scgi_parser_state +{ + SCGI_READING_NETSTRING_LENGTH, ///< The length of the header netstring + SCGI_READING_NAME, ///< Header name + SCGI_READING_VALUE, ///< Header value + SCGI_READING_CONTENT ///< Incoming data +}; + +struct scgi_parser +{ + enum scgi_parser_state state; ///< Parsing state + struct str input; ///< Input buffer + + struct str_map headers; ///< Headers parsed + + size_t headers_len; ///< Length of the netstring contents + struct str name; ///< Header name so far + struct str value; ///< Header value so far + + /// Finished parsing request headers. + /// Return false to abort further processing of input. + bool (*on_headers_read) (void *user_data); + + /// Content available; len == 0 means end of file. + /// Return false to abort further processing of input. + bool (*on_content) (void *user_data, const void *data, size_t len); + + void *user_data; ///< User data passed to callbacks +}; + +static void +scgi_parser_init (struct scgi_parser *self) +{ + memset (self, 0, sizeof *self); + + str_init (&self->input); + str_map_init (&self->headers); + self->headers.free = free; + str_init (&self->name); + str_init (&self->value); +} + +static void +scgi_parser_free (struct scgi_parser *self) +{ + str_free (&self->input); + str_map_free (&self->headers); + str_free (&self->name); + str_free (&self->value); +} + +static bool +scgi_parser_push (struct scgi_parser *self, + const void *data, size_t len, struct error **e) +{ + if (!len) + { + if (self->state != SCGI_READING_CONTENT) + { + error_set (e, "premature EOF"); + return false; + } + + // Indicate end of file + return self->on_content (self->user_data, NULL, 0); + } + + // Notice that this madness is significantly harder to parse than FastCGI; + // this procedure could also be optimized significantly + str_append_data (&self->input, data, len); + + bool keep_running = true; + while (keep_running) + switch (self->state) + { + case SCGI_READING_NETSTRING_LENGTH: + { + if (self->input.len < 1) + return true; + + char digit = *self->input.str; + // XXX: this allows for omitting the netstring length altogether + if (digit == ':') + { + self->state = SCGI_READING_NAME; + break; + } + + if (digit < '0' || digit >= '9') + { + error_set (e, "invalid header netstring"); + return false; + } + + size_t new_len = self->headers_len * 10 + (digit - '0'); + if (new_len < self->headers_len) + { + error_set (e, "header netstring is too long"); + return false; + } + self->headers_len = new_len; + str_remove_slice (&self->input, 0, 1); + break; + } + case SCGI_READING_NAME: + { + if (self->input.len < 1) + return true; + + char c = *self->input.str; + if (!self->headers_len) + { + // The netstring is ending but we haven't finished parsing it, + // or the netstring doesn't end with a comma + if (self->name.len || c != ',') + { + error_set (e, "invalid header netstring"); + return false; + } + self->state = SCGI_READING_CONTENT; + keep_running = self->on_headers_read (self->user_data); + } + else if (c != '\0') + str_append_c (&self->name, c); + else + self->state = SCGI_READING_VALUE; + + str_remove_slice (&self->input, 0, 1); + break; + } + case SCGI_READING_VALUE: + { + if (self->input.len < 1) + return true; + + char c = *self->input.str; + if (!self->headers_len) + { + // The netstring is ending but we haven't finished parsing it + error_set (e, "invalid header netstring"); + return false; + } + else if (c != '\0') + str_append_c (&self->value, c); + else + { + // We've got a name-value pair, let's put it in the map + str_map_set (&self->headers, + self->name.str, str_steal (&self->value)); + + str_reset (&self->name); + str_init (&self->value); + + self->state = SCGI_READING_NAME; + } + + str_remove_slice (&self->input, 0, 1); + break; + } + case SCGI_READING_CONTENT: + keep_running = self->on_content + (self->user_data, self->input.str, self->input.len); + str_remove_slice (&self->input, 0, self->input.len); + return keep_running; + } + return false; +} + +#endif + +#ifdef LIBERTY_WANT_PROTO_FASTCGI + +// --- FastCGI ----------------------------------------------------------------- + +// Constants from the FastCGI specification document + +#define FCGI_HEADER_LEN 8 + +#define FCGI_VERSION_1 1 +#define FCGI_NULL_REQUEST_ID 0 +#define FCGI_KEEP_CONN 1 + +enum fcgi_type +{ + FCGI_BEGIN_REQUEST = 1, + FCGI_ABORT_REQUEST = 2, + FCGI_END_REQUEST = 3, + FCGI_PARAMS = 4, + FCGI_STDIN = 5, + FCGI_STDOUT = 6, + FCGI_STDERR = 7, + FCGI_DATA = 8, + FCGI_GET_VALUES = 9, + FCGI_GET_VALUES_RESULT = 10, + FCGI_UNKNOWN_TYPE = 11, + FCGI_MAXTYPE = FCGI_UNKNOWN_TYPE +}; + +enum fcgi_role +{ + FCGI_RESPONDER = 1, + FCGI_AUTHORIZER = 2, + FCGI_FILTER = 3 +}; + +enum fcgi_protocol_status +{ + FCGI_REQUEST_COMPLETE = 0, + FCGI_CANT_MPX_CONN = 1, + FCGI_OVERLOADED = 2, + FCGI_UNKNOWN_ROLE = 3 +}; + +#define FCGI_MAX_CONNS "FCGI_MAX_CONNS" +#define FCGI_MAX_REQS "FCGI_MAX_REQS" +#define FCGI_MPXS_CONNS "FCGI_MPXS_CONNS" + +// - - Message stream parser - - - - - - - - - - - - - - - - - - - - - - - - - - + +struct fcgi_parser; + +typedef void (*fcgi_message_fn) + (const struct fcgi_parser *parser, void *user_data); + +enum fcgi_parser_state +{ + FCGI_READING_HEADER, ///< Reading the fixed header portion + FCGI_READING_CONTENT, ///< Reading the message content + FCGI_READING_PADDING ///< Reading the padding +}; + +struct fcgi_parser +{ + enum fcgi_parser_state state; ///< Parsing state + struct str input; ///< Input buffer + + // The next block of fields is considered public: + + uint8_t version; ///< FastCGI protocol version + uint8_t type; ///< FastCGI record type + uint16_t request_id; ///< FastCGI request ID + struct str content; ///< Message data + + uint16_t content_length; ///< Message content length + uint8_t padding_length; ///< Message padding length + + fcgi_message_fn on_message; ///< Callback on message + void *user_data; ///< User data +}; + +static void +fcgi_parser_init (struct fcgi_parser *self) +{ + memset (self, 0, sizeof *self); + str_init (&self->input); + str_init (&self->content); +} + +static void +fcgi_parser_free (struct fcgi_parser *self) +{ + str_free (&self->input); + str_free (&self->content); +} + +static void +fcgi_parser_unpack_header (struct fcgi_parser *self) +{ + struct msg_unpacker unpacker; + msg_unpacker_init (&unpacker, self->input.str, self->input.len); + + bool success = true; + uint8_t reserved; + success &= msg_unpacker_u8 (&unpacker, &self->version); + success &= msg_unpacker_u8 (&unpacker, &self->type); + success &= msg_unpacker_u16 (&unpacker, &self->request_id); + success &= msg_unpacker_u16 (&unpacker, &self->content_length); + success &= msg_unpacker_u8 (&unpacker, &self->padding_length); + success &= msg_unpacker_u8 (&unpacker, &reserved); + hard_assert (success); + + str_remove_slice (&self->input, 0, unpacker.offset); +} + +static void +fcgi_parser_push (struct fcgi_parser *self, const void *data, size_t len) +{ + // This could be made considerably faster for high-throughput applications + // if we use a circular buffer instead of constantly calling memmove() + str_append_data (&self->input, data, len); + + while (true) + switch (self->state) + { + case FCGI_READING_HEADER: + if (self->input.len < FCGI_HEADER_LEN) + return; + + fcgi_parser_unpack_header (self); + self->state = FCGI_READING_CONTENT; + break; + case FCGI_READING_CONTENT: + if (self->input.len < self->content_length) + return; + + // Move an appropriate part of the input buffer to the content buffer + str_reset (&self->content); + str_append_data (&self->content, self->input.str, self->content_length); + str_remove_slice (&self->input, 0, self->content_length); + self->state = FCGI_READING_PADDING; + break; + case FCGI_READING_PADDING: + if (self->input.len < self->padding_length) + return; + + // Call the callback to further process the message + self->on_message (self, self->user_data); + + // Remove the padding from the input buffer + str_remove_slice (&self->input, 0, self->padding_length); + self->state = FCGI_READING_HEADER; + break; + } +} + +// - - Name-value pair parser - - - - - - - - - - - - - - - - - - - - - - - - - + +enum fcgi_nv_parser_state +{ + FCGI_NV_PARSER_NAME_LEN, ///< The first name length octet + FCGI_NV_PARSER_NAME_LEN_FULL, ///< Remaining name length octets + FCGI_NV_PARSER_VALUE_LEN, ///< The first value length octet + FCGI_NV_PARSER_VALUE_LEN_FULL, ///< Remaining value length octets + FCGI_NV_PARSER_NAME, ///< Reading the name + FCGI_NV_PARSER_VALUE ///< Reading the value +}; + +struct fcgi_nv_parser +{ + struct str_map *output; ///< Where the pairs will be stored + + enum fcgi_nv_parser_state state; ///< Parsing state + struct str input; ///< Input buffer + + uint32_t name_len; ///< Length of the name + uint32_t value_len; ///< Length of the value + + char *name; ///< The current name, 0-terminated + char *value; ///< The current value, 0-terminated +}; + +static void +fcgi_nv_parser_init (struct fcgi_nv_parser *self) +{ + memset (self, 0, sizeof *self); + str_init (&self->input); +} + +static void +fcgi_nv_parser_free (struct fcgi_nv_parser *self) +{ + str_free (&self->input); + free (self->name); + free (self->value); +} + +static void +fcgi_nv_parser_push (struct fcgi_nv_parser *self, const void *data, size_t len) +{ + // This could be optimized significantly; I'm not even trying + str_append_data (&self->input, data, len); + + while (true) + { + struct msg_unpacker unpacker; + msg_unpacker_init (&unpacker, self->input.str, self->input.len); + + switch (self->state) + { + uint8_t len; + uint32_t len_full; + + case FCGI_NV_PARSER_NAME_LEN: + if (!msg_unpacker_u8 (&unpacker, &len)) + return; + + if (len >> 7) + self->state = FCGI_NV_PARSER_NAME_LEN_FULL; + else + { + self->name_len = len; + str_remove_slice (&self->input, 0, unpacker.offset); + self->state = FCGI_NV_PARSER_VALUE_LEN; + } + break; + case FCGI_NV_PARSER_NAME_LEN_FULL: + if (!msg_unpacker_u32 (&unpacker, &len_full)) + return; + + self->name_len = len_full & ~(1U << 31); + str_remove_slice (&self->input, 0, unpacker.offset); + self->state = FCGI_NV_PARSER_VALUE_LEN; + break; + case FCGI_NV_PARSER_VALUE_LEN: + if (!msg_unpacker_u8 (&unpacker, &len)) + return; + + if (len >> 7) + self->state = FCGI_NV_PARSER_VALUE_LEN_FULL; + else + { + self->value_len = len; + str_remove_slice (&self->input, 0, unpacker.offset); + self->state = FCGI_NV_PARSER_NAME; + } + break; + case FCGI_NV_PARSER_VALUE_LEN_FULL: + if (!msg_unpacker_u32 (&unpacker, &len_full)) + return; + + self->value_len = len_full & ~(1U << 31); + str_remove_slice (&self->input, 0, unpacker.offset); + self->state = FCGI_NV_PARSER_NAME; + break; + case FCGI_NV_PARSER_NAME: + if (self->input.len < self->name_len) + return; + + self->name = xmalloc (self->name_len + 1); + self->name[self->name_len] = '\0'; + memcpy (self->name, self->input.str, self->name_len); + str_remove_slice (&self->input, 0, self->name_len); + self->state = FCGI_NV_PARSER_VALUE; + break; + case FCGI_NV_PARSER_VALUE: + if (self->input.len < self->value_len) + return; + + self->value = xmalloc (self->value_len + 1); + self->value[self->value_len] = '\0'; + memcpy (self->value, self->input.str, self->value_len); + str_remove_slice (&self->input, 0, self->value_len); + self->state = FCGI_NV_PARSER_NAME_LEN; + + // The map takes ownership of the value + str_map_set (self->output, self->name, self->value); + free (self->name); + + self->name = NULL; + self->value = NULL; + break; + } + } +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static void +fcgi_nv_convert_len (size_t len, struct str *output) +{ + if (len < 0x80) + str_pack_u8 (output, len); + else + { + len |= (uint32_t) 1 << 31; + str_pack_u32 (output, len); + } +} + +static void +fcgi_nv_convert (struct str_map *map, struct str *output) +{ + struct str_map_iter iter; + str_map_iter_init (&iter, map); + while (str_map_iter_next (&iter)) + { + const char *name = iter.link->key; + const char *value = iter.link->data; + size_t name_len = iter.link->key_length; + size_t value_len = strlen (value); + + fcgi_nv_convert_len (name_len, output); + fcgi_nv_convert_len (value_len, output); + str_append_data (output, name, name_len); + str_append_data (output, value, value_len); + } +} + +#endif + +#ifdef LIBERTY_WANT_PROTO_WS + +// --- WebSockets -------------------------------------------------------------- + +#define WS_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" + +#define SEC_WS_KEY "Sec-WebSocket-Key" +#define SEC_WS_ACCEPT "Sec-WebSocket-Accept" +#define SEC_WS_PROTOCOL "Sec-WebSocket-Protocol" +#define SEC_WS_EXTENSIONS "Sec-WebSocket-Extensions" +#define SEC_WS_VERSION "Sec-WebSocket-Version" + +#define WS_MAX_CONTROL_PAYLOAD_LEN 125 + +static char * +ws_encode_response_key (const char *key) +{ + char *response_key = xstrdup_printf ("%s" WS_GUID, key); + unsigned char hash[SHA_DIGEST_LENGTH]; + SHA1 ((unsigned char *) response_key, strlen (response_key), hash); + free (response_key); + + struct str base64; + str_init (&base64); + base64_encode (hash, sizeof hash, &base64); + return str_steal (&base64); +} + +enum ws_status +{ + // Named according to the meaning specified in RFC 6455, section 11.2 + + WS_STATUS_NORMAL_CLOSURE = 1000, + WS_STATUS_GOING_AWAY = 1001, + WS_STATUS_PROTOCOL_ERROR = 1002, + WS_STATUS_UNSUPPORTED_DATA = 1003, + WS_STATUS_INVALID_PAYLOAD_DATA = 1007, + WS_STATUS_POLICY_VIOLATION = 1008, + WS_STATUS_MESSAGE_TOO_BIG = 1009, + WS_STATUS_MANDATORY_EXTENSION = 1010, + WS_STATUS_INTERNAL_SERVER_ERROR = 1011, + + // Reserved for internal usage + WS_STATUS_NO_STATUS_RECEIVED = 1005, + WS_STATUS_ABNORMAL_CLOSURE = 1006, + WS_STATUS_TLS_HANDSHAKE = 1015 +}; + +// - - Frame parser - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +enum ws_parser_state +{ + WS_PARSER_FIXED, ///< Parsing fixed length part + WS_PARSER_PAYLOAD_LEN_16, ///< Parsing extended payload length + WS_PARSER_PAYLOAD_LEN_64, ///< Parsing extended payload length + WS_PARSER_MASK, ///< Parsing masking-key + WS_PARSER_PAYLOAD ///< Parsing payload +}; + +enum ws_opcode +{ + // Non-control + WS_OPCODE_CONT = 0, + WS_OPCODE_TEXT = 1, + WS_OPCODE_BINARY = 2, + + // Control + WS_OPCODE_CLOSE = 8, + WS_OPCODE_PING = 9, + WS_OPCODE_PONG = 10 +}; + +static bool +ws_is_control_frame (int opcode) +{ + return opcode >= WS_OPCODE_CLOSE; +} + +struct ws_parser +{ + struct str input; ///< External input buffer + enum ws_parser_state state; ///< Parsing state + + unsigned is_fin : 1; ///< Final frame of a message? + unsigned is_masked : 1; ///< Is the frame masked? + unsigned reserved_1 : 1; ///< Reserved + unsigned reserved_2 : 1; ///< Reserved + unsigned reserved_3 : 1; ///< Reserved + enum ws_opcode opcode; ///< Opcode + uint32_t mask; ///< Frame mask + uint64_t payload_len; ///< Payload length + + bool (*on_frame_header) (void *user_data, const struct ws_parser *self); + + /// Callback for when a message is successfully parsed. + /// The actual payload is stored in "input", of length "payload_len". + bool (*on_frame) (void *user_data, const struct ws_parser *self); + + void *user_data; ///< User data for callbacks +}; + +static void +ws_parser_init (struct ws_parser *self) +{ + memset (self, 0, sizeof *self); + str_init (&self->input); +} + +static void +ws_parser_free (struct ws_parser *self) +{ + str_free (&self->input); +} + +static void +ws_parser_unmask (char *payload, uint64_t len, uint32_t mask) +{ + // This could be made faster. For example by reading the mask in + // native byte ordering and applying it directly here. + + uint64_t end = len & ~(uint64_t) 3; + for (uint64_t i = 0; i < end; i += 4) + { + payload[i + 3] ^= mask & 0xFF; + payload[i + 2] ^= (mask >> 8) & 0xFF; + payload[i + 1] ^= (mask >> 16) & 0xFF; + payload[i ] ^= (mask >> 24) & 0xFF; + } + + switch (len - end) + { + case 3: + payload[end + 2] ^= (mask >> 8) & 0xFF; + case 2: + payload[end + 1] ^= (mask >> 16) & 0xFF; + case 1: + payload[end ] ^= (mask >> 24) & 0xFF; + } +} + +static bool +ws_parser_push (struct ws_parser *self, const void *data, size_t len) +{ + bool success = false; + str_append_data (&self->input, data, len); + + struct msg_unpacker unpacker; + msg_unpacker_init (&unpacker, self->input.str, self->input.len); + + while (true) + switch (self->state) + { + uint8_t u8; + uint16_t u16; + + case WS_PARSER_FIXED: + if (unpacker.len - unpacker.offset < 2) + goto need_data; + + (void) msg_unpacker_u8 (&unpacker, &u8); + self->is_fin = (u8 >> 7) & 1; + self->reserved_1 = (u8 >> 6) & 1; + self->reserved_2 = (u8 >> 5) & 1; + self->reserved_3 = (u8 >> 4) & 1; + self->opcode = u8 & 15; + + (void) msg_unpacker_u8 (&unpacker, &u8); + self->is_masked = (u8 >> 7) & 1; + self->payload_len = u8 & 127; + + if (self->payload_len == 127) + self->state = WS_PARSER_PAYLOAD_LEN_64; + else if (self->payload_len == 126) + self->state = WS_PARSER_PAYLOAD_LEN_16; + else + self->state = WS_PARSER_MASK; + break; + + case WS_PARSER_PAYLOAD_LEN_16: + if (!msg_unpacker_u16 (&unpacker, &u16)) + goto need_data; + self->payload_len = u16; + + self->state = WS_PARSER_MASK; + break; + + case WS_PARSER_PAYLOAD_LEN_64: + if (!msg_unpacker_u64 (&unpacker, &self->payload_len)) + goto need_data; + + self->state = WS_PARSER_MASK; + break; + + case WS_PARSER_MASK: + if (!self->is_masked) + goto end_of_header; + if (!msg_unpacker_u32 (&unpacker, &self->mask)) + goto need_data; + + end_of_header: + self->state = WS_PARSER_PAYLOAD; + if (!self->on_frame_header (self->user_data, self)) + goto fail; + break; + + case WS_PARSER_PAYLOAD: + // Move the buffer so that payload data is at the front + str_remove_slice (&self->input, 0, unpacker.offset); + + // And continue unpacking frames past the payload + msg_unpacker_init (&unpacker, self->input.str, self->input.len); + unpacker.offset = self->payload_len; + + if (self->input.len < self->payload_len) + goto need_data; + if (self->is_masked) + ws_parser_unmask (self->input.str, self->payload_len, self->mask); + if (!self->on_frame (self->user_data, self)) + goto fail; + + self->state = WS_PARSER_FIXED; + break; + } + +need_data: + success = true; +fail: + str_remove_slice (&self->input, 0, unpacker.offset); + return success; +} + +#endif diff --git a/liberty.c b/liberty.c index 163921a..a46bc0d 100644 --- a/liberty.c +++ b/liberty.c @@ -61,6 +61,7 @@ #endif // ! NI_MAXSERV #ifdef LIBERTY_WANT_SSL +#include #include #include #endif // LIBERTY_WANT_SSL @@ -544,6 +545,42 @@ str_remove_slice (struct str *self, size_t start, size_t length) self->str = xrealloc (self->str, self->alloc >>= 2); } +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +static void +str_pack_u8 (struct str *self, uint8_t x) +{ + str_append_data (self, &x, 1); +} + +static void +str_pack_u16 (struct str *self, uint64_t x) +{ + uint8_t tmp[2] = { x >> 8, x }; + str_append_data (self, tmp, sizeof tmp); +} + +static void +str_pack_u32 (struct str *self, uint32_t x) +{ + uint32_t u = x; + uint8_t tmp[4] = { u >> 24, u >> 16, u >> 8, u }; + str_append_data (self, tmp, sizeof tmp); +} + +static void +str_pack_u64 (struct str *self, uint64_t x) +{ + uint8_t tmp[8] = + { x >> 56, x >> 48, x >> 40, x >> 32, x >> 24, x >> 16, x >> 8, x }; + str_append_data (self, tmp, sizeof tmp); +} + +#define str_pack_i8(self, x) str_pack_u8 ((self), (uint8_t) (x)) +#define str_pack_i16(self, x) str_pack_u16 ((self), (uint16_t) (x)) +#define str_pack_i32(self, x) str_pack_u32 ((self), (uint32_t) (x)) +#define str_pack_i64(self, x) str_pack_u64 ((self), (uint64_t) (x)) + // --- Errors ------------------------------------------------------------------ // Error reporting utilities. Inspired by GError, only much simpler. @@ -1660,7 +1697,16 @@ msg_unpacker_u8 (struct msg_unpacker *self, uint8_t *value) } static bool -msg_unpacker_i32 (struct msg_unpacker *self, int32_t *value) +msg_unpacker_u16 (struct msg_unpacker *self, uint16_t *value) +{ + UNPACKER_INT_BEGIN + *value + = (uint16_t) x[0] << 24 | (uint16_t) x[1] << 16; + return true; +} + +static bool +msg_unpacker_u32 (struct msg_unpacker *self, uint32_t *value) { UNPACKER_INT_BEGIN *value @@ -1681,10 +1727,22 @@ msg_unpacker_u64 (struct msg_unpacker *self, uint64_t *value) return true; } +#define msg_unpacker_i8(self, value) \ + msg_unpacker_u8 ((self), (uint8_t *) (value)) +#define msg_unpacker_i16(self, value) \ + msg_unpacker_u16 ((self), (uint16_t *) (value)) +#define msg_unpacker_i32(self, value) \ + msg_unpacker_u32 ((self), (uint32_t *) (value)) +#define msg_unpacker_i64(self, value) \ + msg_unpacker_u64 ((self), (uint64_t *) (value)) + #undef UNPACKER_INT_BEGIN // --- Message packer and writer ----------------------------------------------- +// Use str_pack_*() or other methods to append to the internal buffer, then +// flush it to get a nice frame. Handy for iovec. + struct msg_writer { struct str buf; ///< Holds the message data @@ -1698,28 +1756,6 @@ msg_writer_init (struct msg_writer *self) str_append_data (&self->buf, "\x00\x00\x00\x00" "\x00\x00\x00\x00", 8); } -static void -msg_writer_u8 (struct msg_writer *self, uint8_t x) -{ - str_append_data (&self->buf, &x, 1); -} - -static void -msg_writer_i32 (struct msg_writer *self, int32_t x) -{ - uint32_t u = x; - uint8_t tmp[4] = { u >> 24, u >> 16, u >> 8, u }; - str_append_data (&self->buf, tmp, sizeof tmp); -} - -static void -msg_writer_u64 (struct msg_writer *self, uint64_t x) -{ - uint8_t tmp[8] = - { x >> 56, x >> 48, x >> 40, x >> 32, x >> 24, x >> 16, x >> 8, x }; - str_append_data (&self->buf, tmp, sizeof tmp); -} - static void * msg_writer_flush (struct msg_writer *self, size_t *len) { @@ -1733,6 +1769,220 @@ msg_writer_flush (struct msg_writer *self, size_t *len) return str_steal (&self->buf); } +// --- ASCII ------------------------------------------------------------------- + +static int +tolower_ascii (int c) +{ + return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; +} + +static size_t +tolower_ascii_strxfrm (char *dest, const char *src, size_t n) +{ + size_t len = strlen (src); + while (n-- && (*dest++ = tolower_ascii (*src++))) + ; + return len; +} + +static int +strcasecmp_ascii (const char *a, const char *b) +{ + int x; + while (*a || *b) + if ((x = tolower_ascii (*(const unsigned char *) a++) + - tolower_ascii (*(const unsigned char *) b++))) + return x; + return 0; +} + +static bool +isspace_ascii (int c) +{ + return c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; +} + +// --- UTF-8 ------------------------------------------------------------------- + +/// Return a pointer to the next UTF-8 character, or NULL on error +// TODO: decode the sequence while we're at it +static const char * +utf8_next (const char *s, size_t len) +{ + // End of string, we go no further + if (!len) + return NULL; + + // In the middle of a character -> error + const uint8_t *p = (const unsigned char *) s; + if ((*p & 0xC0) == 0x80) + return NULL; + + // Find out how long the sequence is + unsigned mask = 0xC0; + unsigned tail_len = 0; + while ((*p & mask) == mask) + { + // Invalid start of sequence + if (mask == 0xFE) + return NULL; + + mask |= mask >> 1; + tail_len++; + } + + p++; + + // Check the rest of the sequence + if (tail_len > --len) + return NULL; + + while (tail_len--) + if ((*p++ & 0xC0) != 0x80) + return NULL; + + return (const char *) p; +} + +/// Very rough UTF-8 validation, just makes sure codepoints can be iterated +// TODO: also validate the codepoints +static bool +utf8_validate (const char *s, size_t len) +{ + const char *next; + while (len) + { + if (!(next = utf8_next (s, len))) + return false; + + len -= next - s; + s = next; + } + return true; +} + +// --- Base 64 ----------------------------------------------------------------- + +static uint8_t g_base64_table[256] = +{ + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 0, 64, 64, + 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, + 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64, + + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, +}; + +static inline bool +base64_decode_group (const char **s, bool ignore_ws, struct str *output) +{ + uint8_t input[4]; + size_t loaded = 0; + for (; loaded < 4; (*s)++) + { + if (!**s) + return loaded == 0; + if (!ignore_ws || !isspace_ascii (**s)) + input[loaded++] = **s; + } + + size_t len = 3; + if (input[0] == '=' || input[1] == '=') + return false; + if (input[2] == '=' && input[3] != '=') + return false; + if (input[2] == '=') + len--; + if (input[3] == '=') + len--; + + uint8_t a = g_base64_table[input[0]]; + uint8_t b = g_base64_table[input[1]]; + uint8_t c = g_base64_table[input[2]]; + uint8_t d = g_base64_table[input[3]]; + + if (((a | b) | (c | d)) & 0x40) + return false; + + uint32_t block = a << 18 | b << 12 | c << 6 | d; + switch (len) + { + case 1: + str_append_c (output, block >> 16); + break; + case 2: + str_append_c (output, block >> 16); + str_append_c (output, block >> 8); + break; + case 3: + str_append_c (output, block >> 16); + str_append_c (output, block >> 8); + str_append_c (output, block); + } + return true; +} + +static bool +base64_decode (const char *s, bool ignore_ws, struct str *output) +{ + while (*s) + if (!base64_decode_group (&s, ignore_ws, output)) + return false; + return true; +} + +static void +base64_encode (const void *data, size_t len, struct str *output) +{ + const char *alphabet = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + const uint8_t *p = data; + size_t n_groups = len / 3; + size_t tail = len - n_groups * 3; + uint32_t group; + + for (; n_groups--; p += 3) + { + group = p[0] << 16 | p[1] << 8 | p[2]; + str_append_c (output, alphabet[(group >> 18) & 63]); + str_append_c (output, alphabet[(group >> 12) & 63]); + str_append_c (output, alphabet[(group >> 6) & 63]); + str_append_c (output, alphabet[ group & 63]); + } + + switch (tail) + { + case 2: + group = p[0] << 16 | p[1] << 8; + str_append_c (output, alphabet[(group >> 18) & 63]); + str_append_c (output, alphabet[(group >> 12) & 63]); + str_append_c (output, alphabet[(group >> 6) & 63]); + str_append_c (output, '='); + break; + case 1: + group = p[0] << 16; + str_append_c (output, alphabet[(group >> 18) & 63]); + str_append_c (output, alphabet[(group >> 12) & 63]); + str_append_c (output, '='); + str_append_c (output, '='); + default: + break; + } +} + // --- Utilities --------------------------------------------------------------- static void @@ -2575,3 +2825,7 @@ test_run (struct test *self) str_map_free (&self->blacklist); return 0; } + +// --- Protocol modules -------------------------------------------------------- + +#include "liberty-proto.c" diff --git a/tests/liberty.c b/tests/liberty.c index 8956ed0..349ee78 100644 --- a/tests/liberty.c +++ b/tests/liberty.c @@ -236,21 +236,6 @@ test_error (void) // --- Hash map ---------------------------------------------------------------- -static int -tolower_ascii (int c) -{ - return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; -} - -static size_t -tolower_strxfrm (char *dest, const char *src, size_t n) -{ - size_t len = strlen (src); - while (n-- && (*dest++ = tolower_ascii (*src++))) - ; - return len; -} - static void free_counter (void *data) { @@ -280,7 +265,7 @@ test_str_map (void) // Put two reference counted objects in the map under case-insensitive keys struct str_map m; str_map_init (&m); - m.key_xfrm = tolower_strxfrm; + m.key_xfrm = tolower_ascii_strxfrm; m.free = free_counter; int *a = make_counter (); @@ -323,6 +308,34 @@ test_str_map (void) free_counter (b); } +static void +test_utf8 (void) +{ + const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm"; + const char invalid[] = "\xf0\x90\x28\xbc"; + soft_assert ( utf8_validate (valid, sizeof valid)); + soft_assert (!utf8_validate (invalid, sizeof invalid)); +} + +static void +test_base64 (void) +{ + char data[65]; + for (size_t i = 0; i < N_ELEMENTS (data); i++) + data[i] = i; + + struct str encoded; str_init (&encoded); + struct str decoded; str_init (&decoded); + + base64_encode (data, sizeof data, &encoded); + soft_assert (base64_decode (encoded.str, false, &decoded)); + soft_assert (decoded.len == sizeof data); + soft_assert (!memcmp (decoded.str, data, sizeof data)); + + str_free (&encoded); + str_free (&decoded); +} + // --- Main -------------------------------------------------------------------- int @@ -338,6 +351,8 @@ main (int argc, char *argv[]) test_add_simple (&test, "/str", NULL, test_str); test_add_simple (&test, "/error", NULL, test_error); test_add_simple (&test, "/str-map", NULL, test_str_map); + test_add_simple (&test, "/utf-8", NULL, test_utf8); + test_add_simple (&test, "/base64", NULL, test_base64); // TODO: write tests for the rest of the library diff --git a/tests/proto.c b/tests/proto.c new file mode 100644 index 0000000..caa9cf2 --- /dev/null +++ b/tests/proto.c @@ -0,0 +1,167 @@ +/* + * tests/proto.c + * + * Copyright (c) 2015, Přemysl Janouch + * All rights reserved. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#define PROGRAM_NAME "test" +#define PROGRAM_VERSION "0" + +#define LIBERTY_WANT_SSL + +#define LIBERTY_WANT_PROTO_IRC +#define LIBERTY_WANT_PROTO_HTTP +#define LIBERTY_WANT_PROTO_SCGI +#define LIBERTY_WANT_PROTO_FASTCGI +#define LIBERTY_WANT_PROTO_WS + +#include "../liberty.c" + +// --- Tests ------------------------------------------------------------------- + +static void +test_http_parser (void) +{ + struct str_map parameters; + str_map_init (¶meters); + parameters.key_xfrm = tolower_ascii_strxfrm; + + char *type = NULL; + char *subtype = NULL; + soft_assert (http_parse_media_type ("TEXT/html; CHARset=\"utf\\-8\"", + &type, &subtype, ¶meters)); + soft_assert (!strcasecmp_ascii (type, "text")); + soft_assert (!strcasecmp_ascii (subtype, "html")); + soft_assert (parameters.len == 1); + soft_assert (!strcmp (str_map_find (¶meters, "charset"), "utf-8")); + str_map_free (¶meters); + + struct http_protocol *protocols; + soft_assert (http_parse_upgrade ("websocket, HTTP/2.0, , ", &protocols)); + + soft_assert (!strcmp (protocols->name, "websocket")); + soft_assert (!protocols->version); + + soft_assert (!strcmp (protocols->next->name, "HTTP")); + soft_assert (!strcmp (protocols->next->version, "2.0")); + + soft_assert (!protocols->next->next); + + LIST_FOR_EACH (struct http_protocol, iter, protocols) + http_protocol_destroy (iter); +} + +static bool +test_scgi_parser_on_headers_read (void *user_data) +{ + struct scgi_parser *parser = user_data; + soft_assert (parser->headers.len == 4); + soft_assert (!strcmp (str_map_find (&parser->headers, + "CONTENT_LENGTH"), "27")); + soft_assert (!strcmp (str_map_find (&parser->headers, + "SCGI"), "1")); + soft_assert (!strcmp (str_map_find (&parser->headers, + "REQUEST_METHOD"), "POST")); + soft_assert (!strcmp (str_map_find (&parser->headers, + "REQUEST_URI"), "/deepthought")); + return true; +} + +static bool +test_scgi_parser_on_content (void *user_data, const void *data, size_t len) +{ + (void) user_data; + soft_assert (!strncmp (data, "What is the answer to life?", len)); + return true; +} + +static void +test_scgi_parser (void) +{ + struct scgi_parser parser; + scgi_parser_init (&parser); + parser.on_headers_read = test_scgi_parser_on_headers_read; + parser.on_content = test_scgi_parser_on_content; + parser.user_data = &parser; + + // This is an example straight from the specification + const char example[] = + "70:" + "CONTENT_LENGTH" "\0" "27" "\0" + "SCGI" "\0" "1" "\0" + "REQUEST_METHOD" "\0" "POST" "\0" + "REQUEST_URI" "\0" "/deepthought" "\0" + "," + "What is the answer to life?"; + + soft_assert (scgi_parser_push (&parser, example, sizeof example, NULL)); + scgi_parser_free (&parser); +} + +static bool +test_websockets_on_frame_header (void *user_data, const struct ws_parser *self) +{ + (void) user_data; + soft_assert (self->is_fin); + soft_assert (self->is_masked); + soft_assert (self->opcode == WS_OPCODE_TEXT); + return true; +} + +static bool +test_websockets_on_frame (void *user_data, const struct ws_parser *self) +{ + (void) user_data; + soft_assert (self->input.len == self->payload_len); + soft_assert (!strncmp (self->input.str, "Hello", self->input.len)); + return true; +} + +static void +test_websockets (void) +{ + char *accept = ws_encode_response_key ("dGhlIHNhbXBsZSBub25jZQ=="); + soft_assert (!strcmp (accept, "s3pPLMBiTxaQ9kYGzzhZRbK+xOo=")); + free (accept); + + struct ws_parser parser; + ws_parser_init (&parser); + parser.on_frame_header = test_websockets_on_frame_header; + parser.on_frame = test_websockets_on_frame; + parser.user_data = &parser; + + const char frame[] = "\x81\x85\x37\xfa\x21\x3d\x7f\x9f\x4d\x51\x58"; + soft_assert (ws_parser_push (&parser, frame, sizeof frame - 1)); + ws_parser_free (&parser); +} + +// --- Main -------------------------------------------------------------------- + +int +main (int argc, char *argv[]) +{ + struct test test; + test_init (&test, argc, argv); + + test_add_simple (&test, "/http-parser", NULL, test_http_parser); + test_add_simple (&test, "/scgi-parser", NULL, test_scgi_parser); + test_add_simple (&test, "/websockets", NULL, test_websockets); + // TODO: test FastCGI + // TODO: test IRC + + return test_run (&test); +}