diff --git a/demo-json-rpc-server.c b/demo-json-rpc-server.c index 8aa070b..2196217 100644 --- a/demo-json-rpc-server.c +++ b/demo-json-rpc-server.c @@ -25,6 +25,10 @@ #define print_debug_data ((void *) LOG_DEBUG) #define LIBERTY_WANT_SSL +#define LIBERTY_WANT_PROTO_HTTP +#define LIBERTY_WANT_PROTO_WS +#define LIBERTY_WANT_PROTO_SCGI +#define LIBERTY_WANT_PROTO_FASTCGI #include "config.h" #include "liberty/liberty.c" @@ -42,640 +46,7 @@ // --- Extensions to liberty --------------------------------------------------- -// These should be incorporated into the library ASAP - -#define UNPACKER_INT_BEGIN \ - if (self->len - self->offset < sizeof *value) \ - return false; \ - uint8_t *x = (uint8_t *) self->data + self->offset; \ - self->offset += sizeof *value; - -static bool -msg_unpacker_u16 (struct msg_unpacker *self, uint16_t *value) -{ - UNPACKER_INT_BEGIN - *value - = (uint16_t) x[0] << 24 | (uint16_t) x[1] << 16; - return true; -} - -static bool -msg_unpacker_u32 (struct msg_unpacker *self, uint32_t *value) -{ - UNPACKER_INT_BEGIN - *value - = (uint32_t) x[0] << 24 | (uint32_t) x[1] << 16 - | (uint32_t) x[2] << 8 | (uint32_t) x[3]; - return true; -} - -#undef UNPACKER_INT_BEGIN - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -// "msg_writer" should be rewritten on top of this - -static void -str_pack_u8 (struct str *self, uint8_t x) -{ - str_append_data (self, &x, 1); -} - -static void -str_pack_u16 (struct str *self, uint64_t x) -{ - uint8_t tmp[2] = { x >> 8, x }; - str_append_data (self, tmp, sizeof tmp); -} - -static void -str_pack_u32 (struct str *self, uint32_t x) -{ - uint32_t u = x; - uint8_t tmp[4] = { u >> 24, u >> 16, u >> 8, u }; - str_append_data (self, tmp, sizeof tmp); -} - -static void -str_pack_i32 (struct str *self, int32_t x) -{ - str_pack_u32 (self, (uint32_t) x); -} - -static void -str_pack_u64 (struct str *self, uint64_t x) -{ - uint8_t tmp[8] = - { x >> 56, x >> 48, x >> 40, x >> 32, x >> 24, x >> 16, x >> 8, x }; - str_append_data (self, tmp, sizeof tmp); -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -static int -tolower_ascii (int c) -{ - return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; -} - -static size_t -tolower_ascii_strxfrm (char *dest, const char *src, size_t n) -{ - size_t len = strlen (src); - while (n-- && (*dest++ = tolower_ascii (*src++))) - ; - return len; -} - -static int -strcasecmp_ascii (const char *a, const char *b) -{ - while (*a && tolower_ascii (*a) == tolower_ascii (*b)) - { - a++; - b++; - } - return *(const unsigned char *) a - *(const unsigned char *) b; -} - -static bool -isspace_ascii (int c) -{ - return c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -/// Return a pointer to the next UTF-8 character, or NULL on error -// TODO: decode the sequence while we're at it -static const char * -utf8_next (const char *s, size_t len) -{ - // End of string, we go no further - if (!len) - return NULL; - - // In the middle of a character -> error - const uint8_t *p = (const unsigned char *) s; - if ((*p & 0xC0) == 0x80) - return NULL; - - // Find out how long the sequence is - unsigned mask = 0xC0; - unsigned tail_len = 0; - while ((*p & mask) == mask) - { - // Invalid start of sequence - if (mask == 0xFE) - return NULL; - - mask |= mask >> 1; - tail_len++; - } - - p++; - - // Check the rest of the sequence - if (tail_len > --len) - return NULL; - - while (tail_len--) - if ((*p++ & 0xC0) != 0x80) - return NULL; - - return (const char *) p; -} - -/// Very rough UTF-8 validation, just makes sure codepoints can be iterated -// TODO: also validate the codepoints -static bool -utf8_validate (const char *s, size_t len) -{ - const char *next; - while (len) - { - if (!(next = utf8_next (s, len))) - return false; - - len -= next - s; - s = next; - } - return true; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -static uint8_t g_base64_table[256] = -{ - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 0, 64, 64, - 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, - 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64, - - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, -}; - -static inline bool -base64_decode_group (const char **s, bool ignore_ws, struct str *output) -{ - uint8_t input[4]; - size_t loaded = 0; - for (; loaded < 4; (*s)++) - { - if (!**s) - return loaded == 0; - if (!ignore_ws || !isspace_ascii (**s)) - input[loaded++] = **s; - } - - size_t len = 3; - if (input[0] == '=' || input[1] == '=') - return false; - if (input[2] == '=' && input[3] != '=') - return false; - if (input[2] == '=') - len--; - if (input[3] == '=') - len--; - - uint8_t a = g_base64_table[input[0]]; - uint8_t b = g_base64_table[input[1]]; - uint8_t c = g_base64_table[input[2]]; - uint8_t d = g_base64_table[input[3]]; - - if (((a | b) | (c | d)) & 0x40) - return false; - - uint32_t block = a << 18 | b << 12 | c << 6 | d; - switch (len) - { - case 1: - str_append_c (output, block >> 16); - break; - case 2: - str_append_c (output, block >> 16); - str_append_c (output, block >> 8); - break; - case 3: - str_append_c (output, block >> 16); - str_append_c (output, block >> 8); - str_append_c (output, block); - } - return true; -} - -static bool -base64_decode (const char *s, bool ignore_ws, struct str *output) -{ - while (*s) - if (!base64_decode_group (&s, ignore_ws, output)) - return false; - return true; -} - -static void -base64_encode (const void *data, size_t len, struct str *output) -{ - const char *alphabet = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - const uint8_t *p = data; - size_t n_groups = len / 3; - size_t tail = len - n_groups * 3; - uint32_t group; - - for (; n_groups--; p += 3) - { - group = p[0] << 16 | p[1] << 8 | p[2]; - str_append_c (output, alphabet[(group >> 18) & 63]); - str_append_c (output, alphabet[(group >> 12) & 63]); - str_append_c (output, alphabet[(group >> 6) & 63]); - str_append_c (output, alphabet[ group & 63]); - } - - switch (tail) - { - case 2: - group = p[0] << 16 | p[1] << 8; - str_append_c (output, alphabet[(group >> 18) & 63]); - str_append_c (output, alphabet[(group >> 12) & 63]); - str_append_c (output, alphabet[(group >> 6) & 63]); - str_append_c (output, '='); - break; - case 1: - group = p[0] << 16; - str_append_c (output, alphabet[(group >> 18) & 63]); - str_append_c (output, alphabet[(group >> 12) & 63]); - str_append_c (output, '='); - str_append_c (output, '='); - default: - break; - } -} - -// --- HTTP parsing ------------------------------------------------------------ - -// Basic tokenizer for HTTP header field values, to be used in various parsers. -// The input should already be unwrapped. - -// Recommended literature: -// http://tools.ietf.org/html/rfc7230#section-3.2.6 -// http://tools.ietf.org/html/rfc7230#appendix-B -// http://tools.ietf.org/html/rfc5234#appendix-B.1 - -#define HTTP_TOKENIZER_CLASS(name, definition) \ - static inline bool \ - http_tokenizer_is_ ## name (int c) \ - { \ - return (definition); \ - } - -HTTP_TOKENIZER_CLASS (vchar, c >= 0x21 && c <= 0x7E) -HTTP_TOKENIZER_CLASS (delimiter, !!strchr ("\"(),/:;<=>?@[\\]{}", c)) -HTTP_TOKENIZER_CLASS (whitespace, c == '\t' || c == ' ') -HTTP_TOKENIZER_CLASS (obs_text, c >= 0x80 && c <= 0xFF) - -HTTP_TOKENIZER_CLASS (tchar, - http_tokenizer_is_vchar (c) && !http_tokenizer_is_delimiter (c)) - -HTTP_TOKENIZER_CLASS (qdtext, - c == '\t' || c == ' ' || c == '!' - || (c >= 0x23 && c <= 0x5B) - || (c >= 0x5D && c <= 0x7E) - || http_tokenizer_is_obs_text (c)) - -HTTP_TOKENIZER_CLASS (quoted_pair, - c == '\t' || c == ' ' - || http_tokenizer_is_vchar (c) - || http_tokenizer_is_obs_text (c)) - -#undef HTTP_TOKENIZER_CLASS - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -enum http_tokenizer_token -{ - HTTP_T_EOF, ///< Input error - HTTP_T_ERROR, ///< End of input - - HTTP_T_TOKEN, ///< "token" - HTTP_T_QUOTED_STRING, ///< "quoted-string" - HTTP_T_DELIMITER, ///< "delimiters" - HTTP_T_WHITESPACE ///< RWS/OWS/BWS -}; - -struct http_tokenizer -{ - const unsigned char *input; ///< The input string - size_t input_len; ///< Length of the input - size_t offset; ///< Position in the input - - char delimiter; ///< The delimiter character - struct str string; ///< "token" / "quoted-string" content -}; - -static void -http_tokenizer_init (struct http_tokenizer *self, const char *input, size_t len) -{ - memset (self, 0, sizeof *self); - self->input = (const unsigned char *) input; - self->input_len = len; - - str_init (&self->string); -} - -static void -http_tokenizer_free (struct http_tokenizer *self) -{ - str_free (&self->string); -} - -static enum http_tokenizer_token -http_tokenizer_quoted_string (struct http_tokenizer *self) -{ - bool quoted_pair = false; - while (self->offset < self->input_len) - { - int c = self->input[self->offset++]; - if (quoted_pair) - { - if (!http_tokenizer_is_quoted_pair (c)) - return HTTP_T_ERROR; - - str_append_c (&self->string, c); - quoted_pair = false; - } - else if (c == '\\') - quoted_pair = true; - else if (c == '"') - return HTTP_T_QUOTED_STRING; - else if (http_tokenizer_is_qdtext (c)) - str_append_c (&self->string, c); - else - return HTTP_T_ERROR; - } - - // Premature end of input - return HTTP_T_ERROR; -} - -static enum http_tokenizer_token -http_tokenizer_next (struct http_tokenizer *self, bool skip_ows) -{ - str_reset (&self->string); - if (self->offset >= self->input_len) - return HTTP_T_EOF; - - int c = self->input[self->offset++]; - - if (skip_ows) - while (http_tokenizer_is_whitespace (c)) - { - if (self->offset >= self->input_len) - return HTTP_T_EOF; - c = self->input[self->offset++]; - } - - if (c == '"') - return http_tokenizer_quoted_string (self); - - if (http_tokenizer_is_delimiter (c)) - { - self->delimiter = c; - return HTTP_T_DELIMITER; - } - - // Simple variable-length tokens - enum http_tokenizer_token result; - bool (*eater) (int c) = NULL; - if (http_tokenizer_is_whitespace (c)) - { - eater = http_tokenizer_is_whitespace; - result = HTTP_T_WHITESPACE; - } - else if (http_tokenizer_is_tchar (c)) - { - eater = http_tokenizer_is_tchar; - result = HTTP_T_TOKEN; - } - else - return HTTP_T_ERROR; - - str_append_c (&self->string, c); - while (self->offset < self->input_len) - { - if (!eater (c = self->input[self->offset])) - break; - - str_append_c (&self->string, c); - self->offset++; - } - return result; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -static bool -http_parse_media_type_parameter - (struct http_tokenizer *t, struct str_map *parameters) -{ - bool result = false; - char *attribute = NULL; - - if (http_tokenizer_next (t, true) != HTTP_T_TOKEN) - goto end; - attribute = xstrdup (t->string.str); - - if (http_tokenizer_next (t, false) != HTTP_T_DELIMITER - || t->delimiter != '=') - goto end; - - switch (http_tokenizer_next (t, false)) - { - case HTTP_T_TOKEN: - case HTTP_T_QUOTED_STRING: - str_map_set (parameters, attribute, xstrdup (t->string.str)); - result = true; - default: - break; - } - -end: - free (attribute); - return result; -} - -/// Parser for "Content-Type". @a type and @a subtype may be non-NULL -/// even if the function fails. @a parameters should be case-insensitive. -static bool -http_parse_media_type (const char *media_type, - char **type, char **subtype, struct str_map *parameters) -{ - bool result = false; - struct http_tokenizer t; - http_tokenizer_init (&t, media_type, strlen (media_type)); - - if (http_tokenizer_next (&t, true) != HTTP_T_TOKEN) - goto end; - *type = xstrdup (t.string.str); - - if (http_tokenizer_next (&t, false) != HTTP_T_DELIMITER - || t.delimiter != '/') - goto end; - - if (http_tokenizer_next (&t, false) != HTTP_T_TOKEN) - goto end; - *subtype = xstrdup (t.string.str); - - while (true) - switch (http_tokenizer_next (&t, true)) - { - case HTTP_T_DELIMITER: - if (t.delimiter != ';') - goto end; - if (!http_parse_media_type_parameter (&t, parameters)) - goto end; - break; - case HTTP_T_EOF: - result = true; - default: - goto end; - } - -end: - http_tokenizer_free (&t); - return result; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -struct http_protocol -{ - LIST_HEADER (struct http_protocol) - - char *name; ///< The protocol to upgrade to - char *version; ///< Version of the protocol, if any -}; - -static void -http_protocol_destroy (struct http_protocol *self) -{ - free (self->name); - free (self->version); - free (self); -} - -static bool -http_parse_upgrade (const char *upgrade, struct http_protocol **out) -{ - // HTTP grammar makes this more complicated than it should be - - bool result = false; - struct http_protocol *list = NULL; - struct http_protocol *tail = NULL; - - struct http_tokenizer t; - http_tokenizer_init (&t, upgrade, strlen (upgrade)); - - enum { - STATE_PROTOCOL_NAME, - STATE_SLASH, - STATE_PROTOCOL_VERSION, - STATE_EXPECT_COMMA - } state = STATE_PROTOCOL_NAME; - struct http_protocol *proto = NULL; - - while (true) - switch (state) - { - case STATE_PROTOCOL_NAME: - switch (http_tokenizer_next (&t, false)) - { - case HTTP_T_DELIMITER: - if (t.delimiter != ',') - goto end; - case HTTP_T_WHITESPACE: - break; - case HTTP_T_TOKEN: - proto = xcalloc (1, sizeof *proto); - proto->name = xstrdup (t.string.str); - LIST_APPEND_WITH_TAIL (list, tail, proto); - state = STATE_SLASH; - break; - case HTTP_T_EOF: - result = true; - default: - goto end; - } - break; - case STATE_SLASH: - switch (http_tokenizer_next (&t, false)) - { - case HTTP_T_DELIMITER: - if (t.delimiter == '/') - state = STATE_PROTOCOL_VERSION; - else if (t.delimiter == ',') - state = STATE_PROTOCOL_NAME; - else - goto end; - break; - case HTTP_T_WHITESPACE: - state = STATE_EXPECT_COMMA; - break; - case HTTP_T_EOF: - result = true; - default: - goto end; - } - break; - case STATE_PROTOCOL_VERSION: - switch (http_tokenizer_next (&t, false)) - { - case HTTP_T_TOKEN: - proto->version = xstrdup (t.string.str); - state = STATE_EXPECT_COMMA; - break; - default: - goto end; - } - break; - case STATE_EXPECT_COMMA: - switch (http_tokenizer_next (&t, false)) - { - case HTTP_T_DELIMITER: - if (t.delimiter != ',') - goto end; - state = STATE_PROTOCOL_NAME; - case HTTP_T_WHITESPACE: - break; - case HTTP_T_EOF: - result = true; - default: - goto end; - } - } - -end: - if (result) - *out = list; - else - LIST_FOR_EACH (struct http_protocol, iter, list) - http_protocol_destroy (iter); - - http_tokenizer_free (&t); - return result; -} +// Currently in sync, nothing to be moved. // --- libev helpers ----------------------------------------------------------- @@ -730,322 +101,6 @@ log_message_syslog (void *user_data, const char *quote, const char *fmt, // --- FastCGI ----------------------------------------------------------------- -// Constants from the FastCGI specification document - -#define FCGI_HEADER_LEN 8 - -#define FCGI_VERSION_1 1 -#define FCGI_NULL_REQUEST_ID 0 -#define FCGI_KEEP_CONN 1 - -enum fcgi_type -{ - FCGI_BEGIN_REQUEST = 1, - FCGI_ABORT_REQUEST = 2, - FCGI_END_REQUEST = 3, - FCGI_PARAMS = 4, - FCGI_STDIN = 5, - FCGI_STDOUT = 6, - FCGI_STDERR = 7, - FCGI_DATA = 8, - FCGI_GET_VALUES = 9, - FCGI_GET_VALUES_RESULT = 10, - FCGI_UNKNOWN_TYPE = 11, - FCGI_MAXTYPE = FCGI_UNKNOWN_TYPE -}; - -enum fcgi_role -{ - FCGI_RESPONDER = 1, - FCGI_AUTHORIZER = 2, - FCGI_FILTER = 3 -}; - -enum fcgi_protocol_status -{ - FCGI_REQUEST_COMPLETE = 0, - FCGI_CANT_MPX_CONN = 1, - FCGI_OVERLOADED = 2, - FCGI_UNKNOWN_ROLE = 3 -}; - -#define FCGI_MAX_CONNS "FCGI_MAX_CONNS" -#define FCGI_MAX_REQS "FCGI_MAX_REQS" -#define FCGI_MPXS_CONNS "FCGI_MPXS_CONNS" - -// - - Message stream parser - - - - - - - - - - - - - - - - - - - - - - - - - - - -struct fcgi_parser; - -typedef void (*fcgi_message_fn) - (const struct fcgi_parser *parser, void *user_data); - -enum fcgi_parser_state -{ - FCGI_READING_HEADER, ///< Reading the fixed header portion - FCGI_READING_CONTENT, ///< Reading the message content - FCGI_READING_PADDING ///< Reading the padding -}; - -struct fcgi_parser -{ - enum fcgi_parser_state state; ///< Parsing state - struct str input; ///< Input buffer - - // The next block of fields is considered public: - - uint8_t version; ///< FastCGI protocol version - uint8_t type; ///< FastCGI record type - uint16_t request_id; ///< FastCGI request ID - struct str content; ///< Message data - - uint16_t content_length; ///< Message content length - uint8_t padding_length; ///< Message padding length - - fcgi_message_fn on_message; ///< Callback on message - void *user_data; ///< User data -}; - -static void -fcgi_parser_init (struct fcgi_parser *self) -{ - memset (self, 0, sizeof *self); - str_init (&self->input); - str_init (&self->content); -} - -static void -fcgi_parser_free (struct fcgi_parser *self) -{ - str_free (&self->input); - str_free (&self->content); -} - -static void -fcgi_parser_unpack_header (struct fcgi_parser *self) -{ - struct msg_unpacker unpacker; - msg_unpacker_init (&unpacker, self->input.str, self->input.len); - - bool success = true; - uint8_t reserved; - success &= msg_unpacker_u8 (&unpacker, &self->version); - success &= msg_unpacker_u8 (&unpacker, &self->type); - success &= msg_unpacker_u16 (&unpacker, &self->request_id); - success &= msg_unpacker_u16 (&unpacker, &self->content_length); - success &= msg_unpacker_u8 (&unpacker, &self->padding_length); - success &= msg_unpacker_u8 (&unpacker, &reserved); - hard_assert (success); - - str_remove_slice (&self->input, 0, unpacker.offset); -} - -static void -fcgi_parser_push (struct fcgi_parser *self, const void *data, size_t len) -{ - // This could be made considerably faster for high-throughput applications - // if we use a circular buffer instead of constantly calling memmove() - str_append_data (&self->input, data, len); - - while (true) - switch (self->state) - { - case FCGI_READING_HEADER: - if (self->input.len < FCGI_HEADER_LEN) - return; - - fcgi_parser_unpack_header (self); - self->state = FCGI_READING_CONTENT; - break; - case FCGI_READING_CONTENT: - if (self->input.len < self->content_length) - return; - - // Move an appropriate part of the input buffer to the content buffer - str_reset (&self->content); - str_append_data (&self->content, self->input.str, self->content_length); - str_remove_slice (&self->input, 0, self->content_length); - self->state = FCGI_READING_PADDING; - break; - case FCGI_READING_PADDING: - if (self->input.len < self->padding_length) - return; - - // Call the callback to further process the message - self->on_message (self, self->user_data); - - // Remove the padding from the input buffer - str_remove_slice (&self->input, 0, self->padding_length); - self->state = FCGI_READING_HEADER; - break; - } -} - -// - - Name-value pair parser - - - - - - - - - - - - - - - - - - - - - - - - - - -enum fcgi_nv_parser_state -{ - FCGI_NV_PARSER_NAME_LEN, ///< The first name length octet - FCGI_NV_PARSER_NAME_LEN_FULL, ///< Remaining name length octets - FCGI_NV_PARSER_VALUE_LEN, ///< The first value length octet - FCGI_NV_PARSER_VALUE_LEN_FULL, ///< Remaining value length octets - FCGI_NV_PARSER_NAME, ///< Reading the name - FCGI_NV_PARSER_VALUE ///< Reading the value -}; - -struct fcgi_nv_parser -{ - struct str_map *output; ///< Where the pairs will be stored - - enum fcgi_nv_parser_state state; ///< Parsing state - struct str input; ///< Input buffer - - uint32_t name_len; ///< Length of the name - uint32_t value_len; ///< Length of the value - - char *name; ///< The current name, 0-terminated - char *value; ///< The current value, 0-terminated -}; - -static void -fcgi_nv_parser_init (struct fcgi_nv_parser *self) -{ - memset (self, 0, sizeof *self); - str_init (&self->input); -} - -static void -fcgi_nv_parser_free (struct fcgi_nv_parser *self) -{ - str_free (&self->input); - free (self->name); - free (self->value); -} - -static void -fcgi_nv_parser_push (struct fcgi_nv_parser *self, const void *data, size_t len) -{ - // This could be optimized significantly; I'm not even trying - str_append_data (&self->input, data, len); - - while (true) - { - struct msg_unpacker unpacker; - msg_unpacker_init (&unpacker, self->input.str, self->input.len); - - switch (self->state) - { - uint8_t len; - uint32_t len_full; - - case FCGI_NV_PARSER_NAME_LEN: - if (!msg_unpacker_u8 (&unpacker, &len)) - return; - - if (len >> 7) - self->state = FCGI_NV_PARSER_NAME_LEN_FULL; - else - { - self->name_len = len; - str_remove_slice (&self->input, 0, unpacker.offset); - self->state = FCGI_NV_PARSER_VALUE_LEN; - } - break; - case FCGI_NV_PARSER_NAME_LEN_FULL: - if (!msg_unpacker_u32 (&unpacker, &len_full)) - return; - - self->name_len = len_full & ~(1U << 31); - str_remove_slice (&self->input, 0, unpacker.offset); - self->state = FCGI_NV_PARSER_VALUE_LEN; - break; - case FCGI_NV_PARSER_VALUE_LEN: - if (!msg_unpacker_u8 (&unpacker, &len)) - return; - - if (len >> 7) - self->state = FCGI_NV_PARSER_VALUE_LEN_FULL; - else - { - self->value_len = len; - str_remove_slice (&self->input, 0, unpacker.offset); - self->state = FCGI_NV_PARSER_NAME; - } - break; - case FCGI_NV_PARSER_VALUE_LEN_FULL: - if (!msg_unpacker_u32 (&unpacker, &len_full)) - return; - - self->value_len = len_full & ~(1U << 31); - str_remove_slice (&self->input, 0, unpacker.offset); - self->state = FCGI_NV_PARSER_NAME; - break; - case FCGI_NV_PARSER_NAME: - if (self->input.len < self->name_len) - return; - - self->name = xmalloc (self->name_len + 1); - self->name[self->name_len] = '\0'; - memcpy (self->name, self->input.str, self->name_len); - str_remove_slice (&self->input, 0, self->name_len); - self->state = FCGI_NV_PARSER_VALUE; - break; - case FCGI_NV_PARSER_VALUE: - if (self->input.len < self->value_len) - return; - - self->value = xmalloc (self->value_len + 1); - self->value[self->value_len] = '\0'; - memcpy (self->value, self->input.str, self->value_len); - str_remove_slice (&self->input, 0, self->value_len); - self->state = FCGI_NV_PARSER_NAME_LEN; - - // The map takes ownership of the value - str_map_set (self->output, self->name, self->value); - free (self->name); - - self->name = NULL; - self->value = NULL; - break; - } - } -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -static void -fcgi_nv_convert_len (size_t len, struct str *output) -{ - if (len < 0x80) - str_pack_u8 (output, len); - else - { - len |= (uint32_t) 1 << 31; - str_pack_u32 (output, len); - } -} - -static void -fcgi_nv_convert (struct str_map *map, struct str *output) -{ - struct str_map_iter iter; - str_map_iter_init (&iter, map); - while (str_map_iter_next (&iter)) - { - const char *name = iter.link->key; - const char *value = iter.link->data; - size_t name_len = iter.link->key_length; - size_t value_len = strlen (value); - - fcgi_nv_convert_len (name_len, output); - fcgi_nv_convert_len (value_len, output); - str_append_data (output, name, name_len); - str_append_data (output, value, value_len); - } -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - enum fcgi_request_state { FCGI_REQUEST_PARAMS, ///< Reading headers @@ -1401,408 +456,8 @@ fcgi_muxer_push (struct fcgi_muxer *self, const void *data, size_t len) fcgi_parser_push (&self->parser, data, len); } -// --- SCGI -------------------------------------------------------------------- - -enum scgi_parser_state -{ - SCGI_READING_NETSTRING_LENGTH, ///< The length of the header netstring - SCGI_READING_NAME, ///< Header name - SCGI_READING_VALUE, ///< Header value - SCGI_READING_CONTENT ///< Incoming data -}; - -struct scgi_parser -{ - enum scgi_parser_state state; ///< Parsing state - struct str input; ///< Input buffer - - struct str_map headers; ///< Headers parsed - - size_t headers_len; ///< Length of the netstring contents - struct str name; ///< Header name so far - struct str value; ///< Header value so far - - /// Finished parsing request headers. - /// Return false to abort further processing of input. - bool (*on_headers_read) (void *user_data); - - /// Content available; len == 0 means end of file. - /// Return false to abort further processing of input. - bool (*on_content) (void *user_data, const void *data, size_t len); - - void *user_data; ///< User data passed to callbacks -}; - -static void -scgi_parser_init (struct scgi_parser *self) -{ - memset (self, 0, sizeof *self); - - str_init (&self->input); - str_map_init (&self->headers); - self->headers.free = free; - str_init (&self->name); - str_init (&self->value); -} - -static void -scgi_parser_free (struct scgi_parser *self) -{ - str_free (&self->input); - str_map_free (&self->headers); - str_free (&self->name); - str_free (&self->value); -} - -static bool -scgi_parser_push (struct scgi_parser *self, - const void *data, size_t len, struct error **e) -{ - if (!len) - { - if (self->state != SCGI_READING_CONTENT) - { - error_set (e, "premature EOF"); - return false; - } - - // Indicate end of file - return self->on_content (self->user_data, NULL, 0); - } - - // Notice that this madness is significantly harder to parse than FastCGI; - // this procedure could also be optimized significantly - str_append_data (&self->input, data, len); - - bool keep_running = true; - while (keep_running) - switch (self->state) - { - case SCGI_READING_NETSTRING_LENGTH: - { - if (self->input.len < 1) - return true; - - char digit = *self->input.str; - // XXX: this allows for omitting the netstring length altogether - if (digit == ':') - { - self->state = SCGI_READING_NAME; - break; - } - - if (digit < '0' || digit >= '9') - { - error_set (e, "invalid header netstring"); - return false; - } - - size_t new_len = self->headers_len * 10 + (digit - '0'); - if (new_len < self->headers_len) - { - error_set (e, "header netstring is too long"); - return false; - } - self->headers_len = new_len; - str_remove_slice (&self->input, 0, 1); - break; - } - case SCGI_READING_NAME: - { - if (self->input.len < 1) - return true; - - char c = *self->input.str; - if (!self->headers_len) - { - // The netstring is ending but we haven't finished parsing it, - // or the netstring doesn't end with a comma - if (self->name.len || c != ',') - { - error_set (e, "invalid header netstring"); - return false; - } - self->state = SCGI_READING_CONTENT; - keep_running = self->on_headers_read (self->user_data); - } - else if (c != '\0') - str_append_c (&self->name, c); - else - self->state = SCGI_READING_VALUE; - - str_remove_slice (&self->input, 0, 1); - break; - } - case SCGI_READING_VALUE: - { - if (self->input.len < 1) - return true; - - char c = *self->input.str; - if (!self->headers_len) - { - // The netstring is ending but we haven't finished parsing it - error_set (e, "invalid header netstring"); - return false; - } - else if (c != '\0') - str_append_c (&self->value, c); - else - { - // We've got a name-value pair, let's put it in the map - str_map_set (&self->headers, - self->name.str, str_steal (&self->value)); - - str_reset (&self->name); - str_init (&self->value); - - self->state = SCGI_READING_NAME; - } - - str_remove_slice (&self->input, 0, 1); - break; - } - case SCGI_READING_CONTENT: - keep_running = self->on_content - (self->user_data, self->input.str, self->input.len); - str_remove_slice (&self->input, 0, self->input.len); - return keep_running; - } - return false; -} - // --- WebSockets -------------------------------------------------------------- -#define WS_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" - -#define SEC_WS_KEY "Sec-WebSocket-Key" -#define SEC_WS_ACCEPT "Sec-WebSocket-Accept" -#define SEC_WS_PROTOCOL "Sec-WebSocket-Protocol" -#define SEC_WS_EXTENSIONS "Sec-WebSocket-Extensions" -#define SEC_WS_VERSION "Sec-WebSocket-Version" - -#define WS_MAX_CONTROL_PAYLOAD_LEN 125 - -static char * -ws_encode_response_key (const char *key) -{ - char *response_key = xstrdup_printf ("%s" WS_GUID, key); - unsigned char hash[SHA_DIGEST_LENGTH]; - SHA1 ((unsigned char *) response_key, strlen (response_key), hash); - free (response_key); - - struct str base64; - str_init (&base64); - base64_encode (hash, sizeof hash, &base64); - return str_steal (&base64); -} - -enum ws_status -{ - // Named according to the meaning specified in RFC 6455, section 11.2 - - WS_STATUS_NORMAL_CLOSURE = 1000, - WS_STATUS_GOING_AWAY = 1001, - WS_STATUS_PROTOCOL_ERROR = 1002, - WS_STATUS_UNSUPPORTED_DATA = 1003, - WS_STATUS_INVALID_PAYLOAD_DATA = 1007, - WS_STATUS_POLICY_VIOLATION = 1008, - WS_STATUS_MESSAGE_TOO_BIG = 1009, - WS_STATUS_MANDATORY_EXTENSION = 1010, - WS_STATUS_INTERNAL_SERVER_ERROR = 1011, - - // Reserved for internal usage - WS_STATUS_NO_STATUS_RECEIVED = 1005, - WS_STATUS_ABNORMAL_CLOSURE = 1006, - WS_STATUS_TLS_HANDSHAKE = 1015 -}; - -// - - Frame parser - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -enum ws_parser_state -{ - WS_PARSER_FIXED, ///< Parsing fixed length part - WS_PARSER_PAYLOAD_LEN_16, ///< Parsing extended payload length - WS_PARSER_PAYLOAD_LEN_64, ///< Parsing extended payload length - WS_PARSER_MASK, ///< Parsing masking-key - WS_PARSER_PAYLOAD ///< Parsing payload -}; - -enum ws_opcode -{ - // Non-control - WS_OPCODE_CONT = 0, - WS_OPCODE_TEXT = 1, - WS_OPCODE_BINARY = 2, - - // Control - WS_OPCODE_CLOSE = 8, - WS_OPCODE_PING = 9, - WS_OPCODE_PONG = 10 -}; - -static bool -ws_is_control_frame (int opcode) -{ - return opcode >= WS_OPCODE_CLOSE; -} - -struct ws_parser -{ - struct str input; ///< External input buffer - enum ws_parser_state state; ///< Parsing state - - unsigned is_fin : 1; ///< Final frame of a message? - unsigned is_masked : 1; ///< Is the frame masked? - unsigned reserved_1 : 1; ///< Reserved - unsigned reserved_2 : 1; ///< Reserved - unsigned reserved_3 : 1; ///< Reserved - enum ws_opcode opcode; ///< Opcode - uint32_t mask; ///< Frame mask - uint64_t payload_len; ///< Payload length - - bool (*on_frame_header) (void *user_data, const struct ws_parser *self); - - /// Callback for when a message is successfully parsed. - /// The actual payload is stored in "input", of length "payload_len". - bool (*on_frame) (void *user_data, const struct ws_parser *self); - - void *user_data; ///< User data for callbacks -}; - -static void -ws_parser_init (struct ws_parser *self) -{ - memset (self, 0, sizeof *self); - str_init (&self->input); -} - -static void -ws_parser_free (struct ws_parser *self) -{ - str_free (&self->input); -} - -static void -ws_parser_unmask (struct ws_parser *self) -{ - // This could be made faster. For example by reading the mask in - // native byte ordering and applying it directly here. - - uint64_t end = self->payload_len & ~(uint64_t) 3; - for (uint64_t i = 0; i < end; i += 4) - { - self->input.str[i + 3] ^= self->mask & 0xFF; - self->input.str[i + 2] ^= (self->mask >> 8) & 0xFF; - self->input.str[i + 1] ^= (self->mask >> 16) & 0xFF; - self->input.str[i ] ^= (self->mask >> 24) & 0xFF; - } - - switch (self->payload_len - end) - { - case 3: - self->input.str[end + 2] ^= (self->mask >> 8) & 0xFF; - case 2: - self->input.str[end + 1] ^= (self->mask >> 16) & 0xFF; - case 1: - self->input.str[end ] ^= (self->mask >> 24) & 0xFF; - break; - } -} - -static bool -ws_parser_push (struct ws_parser *self, const void *data, size_t len) -{ - bool success = false; - str_append_data (&self->input, data, len); - - struct msg_unpacker unpacker; - msg_unpacker_init (&unpacker, self->input.str, self->input.len); - - while (true) - switch (self->state) - { - uint8_t u8; - uint16_t u16; - - case WS_PARSER_FIXED: - if (unpacker.len - unpacker.offset < 2) - goto need_data; - - (void) msg_unpacker_u8 (&unpacker, &u8); - self->is_fin = (u8 >> 7) & 1; - self->reserved_1 = (u8 >> 6) & 1; - self->reserved_2 = (u8 >> 5) & 1; - self->reserved_3 = (u8 >> 4) & 1; - self->opcode = u8 & 15; - - (void) msg_unpacker_u8 (&unpacker, &u8); - self->is_masked = (u8 >> 7) & 1; - self->payload_len = u8 & 127; - - if (self->payload_len == 127) - self->state = WS_PARSER_PAYLOAD_LEN_64; - else if (self->payload_len == 126) - self->state = WS_PARSER_PAYLOAD_LEN_16; - else - self->state = WS_PARSER_MASK; - break; - - case WS_PARSER_PAYLOAD_LEN_16: - if (!msg_unpacker_u16 (&unpacker, &u16)) - goto need_data; - self->payload_len = u16; - - self->state = WS_PARSER_MASK; - break; - - case WS_PARSER_PAYLOAD_LEN_64: - if (!msg_unpacker_u64 (&unpacker, &self->payload_len)) - goto need_data; - - self->state = WS_PARSER_MASK; - break; - - case WS_PARSER_MASK: - if (!self->is_masked) - goto end_of_header; - if (!msg_unpacker_u32 (&unpacker, &self->mask)) - goto need_data; - - end_of_header: - self->state = WS_PARSER_PAYLOAD; - if (!self->on_frame_header (self->user_data, self)) - goto fail; - break; - - case WS_PARSER_PAYLOAD: - // Move the buffer so that payload data is at the front - str_remove_slice (&self->input, 0, unpacker.offset); - - // And continue unpacking frames past the payload - msg_unpacker_init (&unpacker, self->input.str, self->input.len); - unpacker.offset = self->payload_len; - - if (self->input.len < self->payload_len) - goto need_data; - if (self->is_masked) - ws_parser_unmask (self); - if (!self->on_frame (self->user_data, self)) - goto fail; - - self->state = WS_PARSER_FIXED; - break; - } - -need_data: - success = true; -fail: - str_remove_slice (&self->input, 0, unpacker.offset); - return success; -} - -// - - Server handler - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // WebSockets aren't CGI-compatible, therefore we must handle the initial HTTP // handshake ourselves. Luckily it's not too much of a bother with http-parser. // Typically there will be a normal HTTP server in front of us, proxying the @@ -2016,7 +671,7 @@ ws_handler_on_frame (void *user_data, const struct ws_parser *parser) } bool result = self->on_message (self->user_data, self->message_opcode, - self->parser.input.str, self->parser.payload_len); + self->message_data.str, self->message_data.len); str_reset (&self->message_data); return result; } @@ -3643,152 +2298,6 @@ lock_pid_file (struct server_context *ctx, struct error **e) // --- Tests ------------------------------------------------------------------- -static void -test_utf8 (void) -{ - const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm"; - const char invalid[] = "\xf0\x90\x28\xbc"; - soft_assert ( utf8_validate (valid, sizeof valid)); - soft_assert (!utf8_validate (invalid, sizeof invalid)); -} - -static void -test_base64 (void) -{ - char data[65]; - for (size_t i = 0; i < N_ELEMENTS (data); i++) - data[i] = i; - - struct str encoded; str_init (&encoded); - struct str decoded; str_init (&decoded); - - base64_encode (data, sizeof data, &encoded); - soft_assert (base64_decode (encoded.str, false, &decoded)); - soft_assert (decoded.len == sizeof data); - soft_assert (!memcmp (decoded.str, data, sizeof data)); - - str_free (&encoded); - str_free (&decoded); -} - -static void -test_http_parser (void) -{ - struct str_map parameters; - str_map_init (¶meters); - parameters.key_xfrm = tolower_ascii_strxfrm; - - char *type = NULL; - char *subtype = NULL; - soft_assert (http_parse_media_type ("TEXT/html; CHARset=\"utf\\-8\"", - &type, &subtype, ¶meters)); - soft_assert (!strcasecmp_ascii (type, "text")); - soft_assert (!strcasecmp_ascii (subtype, "html")); - soft_assert (parameters.len == 1); - soft_assert (!strcmp (str_map_find (¶meters, "charset"), "utf-8")); - str_map_free (¶meters); - - struct http_protocol *protocols; - soft_assert (http_parse_upgrade ("websocket, HTTP/2.0, , ", &protocols)); - - soft_assert (!strcmp (protocols->name, "websocket")); - soft_assert (!protocols->version); - - soft_assert (!strcmp (protocols->next->name, "HTTP")); - soft_assert (!strcmp (protocols->next->version, "2.0")); - - soft_assert (!protocols->next->next); - - LIST_FOR_EACH (struct http_protocol, iter, protocols) - http_protocol_destroy (iter); -} - -static bool -test_scgi_parser_on_headers_read (void *user_data) -{ - struct scgi_parser *parser = user_data; - soft_assert (parser->headers.len == 4); - soft_assert (!strcmp (str_map_find (&parser->headers, - "CONTENT_LENGTH"), "27")); - soft_assert (!strcmp (str_map_find (&parser->headers, - "SCGI"), "1")); - soft_assert (!strcmp (str_map_find (&parser->headers, - "REQUEST_METHOD"), "POST")); - soft_assert (!strcmp (str_map_find (&parser->headers, - "REQUEST_URI"), "/deepthought")); - return true; -} - -static bool -test_scgi_parser_on_content (void *user_data, const void *data, size_t len) -{ - (void) user_data; - soft_assert (!strncmp (data, "What is the answer to life?", len)); - return true; -} - -static void -test_scgi_parser (void) -{ - struct scgi_parser parser; - scgi_parser_init (&parser); - parser.on_headers_read = test_scgi_parser_on_headers_read; - parser.on_content = test_scgi_parser_on_content; - parser.user_data = &parser; - - // This is an example straight from the specification - const char example[] = - "70:" - "CONTENT_LENGTH" "\0" "27" "\0" - "SCGI" "\0" "1" "\0" - "REQUEST_METHOD" "\0" "POST" "\0" - "REQUEST_URI" "\0" "/deepthought" "\0" - "," - "What is the answer to life?"; - - soft_assert (scgi_parser_push (&parser, example, sizeof example, NULL)); - scgi_parser_free (&parser); -} - -static bool -test_websockets_on_frame_header (void *user_data, const struct ws_parser *self) -{ - (void) user_data; - soft_assert (self->is_fin); - soft_assert (self->is_masked); - soft_assert (self->opcode == WS_OPCODE_TEXT); - return true; -} - -static bool -test_websockets_on_frame (void *user_data, const struct ws_parser *self) -{ - (void) user_data; - soft_assert (self->input.len == self->payload_len); - soft_assert (!strncmp (self->input.str, "Hello", self->input.len)); - return true; -} - -static void -test_websockets (void) -{ - char *accept = ws_encode_response_key ("dGhlIHNhbXBsZSBub25jZQ=="); - soft_assert (!strcmp (accept, "s3pPLMBiTxaQ9kYGzzhZRbK+xOo=")); - free (accept); - - struct ws_parser parser; - ws_parser_init (&parser); - parser.on_frame_header = test_websockets_on_frame_header; - parser.on_frame = test_websockets_on_frame; - parser.user_data = &parser; - - const char frame[] = "\x81\x85\x37\xfa\x21\x3d\x7f\x9f\x4d\x51\x58"; - soft_assert (ws_parser_push (&parser, frame, sizeof frame - 1)); - ws_parser_free (&parser); - - // TODO: test the server handler (happy path) -} - static void test_misc (void) { @@ -3808,15 +2317,10 @@ test_main (int argc, char *argv[]) struct test test; test_init (&test, argc, argv); - test_add_simple (&test, "/utf-8", NULL, test_utf8); - test_add_simple (&test, "/base64", NULL, test_base64); - test_add_simple (&test, "/http-parser", NULL, test_http_parser); - test_add_simple (&test, "/scgi-parser", NULL, test_scgi_parser); - test_add_simple (&test, "/websockets", NULL, test_websockets); - test_add_simple (&test, "/misc", NULL, test_misc); // TODO: write more tests + // TODO: test the server handler (happy path) return test_run (&test); } diff --git a/liberty b/liberty index 0876458..8c6d187 160000 --- a/liberty +++ b/liberty @@ -1 +1 @@ -Subproject commit 087645848baec5e59e4296817850bd5dd240cbb2 +Subproject commit 8c6d18757d2d4135963f3dbab6d2d5ec8c8b6af3