Steady progress

Renamed some constants, added basic UTF-8 validation.
This commit is contained in:
Přemysl Eric Janouch 2015-03-23 16:47:21 +01:00
parent 9b7dd630e3
commit 987eae5661
1 changed files with 126 additions and 41 deletions

View File

@ -144,6 +144,61 @@ isspace_ascii (int c)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
/// Return a pointer to the next UTF-8 character, or NULL on error
static const char *
utf8_next (const char *s, size_t len)
{
// End of string, we go no further
if (!len)
return NULL;
// In the middle of a character -> error
const uint8_t *p = (const unsigned char *) s;
if ((*p & 0xC0) == 0x80)
return NULL;
// Find out how long the sequence is
unsigned mask = 0xC0;
unsigned tail_len = 0;
while ((*p & mask) == mask)
{
// Invalid start of sequence
if (mask == 0xFE)
return NULL;
mask |= mask >> 1;
tail_len++;
}
// Check the rest of the sequence
if (tail_len < --len)
return NULL;
while (tail_len--)
if ((*++p & 0xC0) != 0x80)
return NULL;
return (const char *) p;
}
/// Very rough UTF-8 validation, just makes sure codepoints can be iterated
static bool
utf8_validate (const char *s, size_t len)
{
const char *next;
while (len)
{
if (!(next = utf8_next (s, len)))
return false;
len -= next - s;
s = next;
}
return true;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
static uint8_t g_base64_table[256] = static uint8_t g_base64_table[256] =
{ {
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
@ -1536,23 +1591,22 @@ ws_encode_response_key (const char *key)
enum ws_status enum ws_status
{ {
// These names aren't really standard, just somewhat descriptive. // Named according to the meaning specified in RFC 6455, section 11.2
// The RFC isn't really much cleaner about their meaning.
WS_STATUS_NORMAL = 1000, WS_STATUS_NORMAL_CLOSURE = 1000,
WS_STATUS_GOING_AWAY = 1001, WS_STATUS_GOING_AWAY = 1001,
WS_STATUS_PROTOCOL = 1002, WS_STATUS_PROTOCOL_ERROR = 1002,
WS_STATUS_UNACCEPTABLE = 1003, WS_STATUS_UNSUPPORTED_DATA = 1003,
WS_STATUS_INCONSISTENT = 1007, WS_STATUS_INVALID_PAYLOAD_DATA = 1007,
WS_STATUS_POLICY = 1008, WS_STATUS_POLICY_VIOLATION = 1008,
WS_STATUS_TOO_BIG = 1009, WS_STATUS_MESSAGE_TOO_BIG = 1009,
WS_STATUS_EXTENSION = 1010, WS_STATUS_MANDATORY_EXTENSION = 1010,
WS_STATUS_UNEXPECTED = 1011, WS_STATUS_INTERNAL_SERVER_ERROR = 1011,
// Reserved for internal usage // Reserved for internal usage
WS_STATUS_MISSING = 1005, WS_STATUS_NO_STATUS_RECEIVED = 1005,
WS_STATUS_ABNORMAL = 1006, WS_STATUS_ABNORMAL_CLOSURE = 1006,
WS_STATUS_TLS = 1015 WS_STATUS_TLS_HANDSHAKE = 1015
}; };
// - - Frame parser - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - - Frame parser - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@ -1624,7 +1678,7 @@ ws_parser_free (struct ws_parser *self)
static void static void
ws_parser_unmask (struct ws_parser *self) ws_parser_unmask (struct ws_parser *self)
{ {
// Yes, this could be made faster. For example by reading the mask in // This could be made faster. For example by reading the mask in
// native byte ordering and applying it directly here. // native byte ordering and applying it directly here.
uint64_t end = self->payload_len & ~(uint64_t) 3; uint64_t end = self->payload_len & ~(uint64_t) 3;
@ -1747,9 +1801,10 @@ ws_parser_push (struct ws_parser *self, const void *data, size_t len)
enum ws_handler_state enum ws_handler_state
{ {
WS_HANDLER_HANDSHAKE, ///< Parsing HTTP WS_HANDLER_CONNECTING, ///< Parsing HTTP
WS_HANDLER_OPEN, ///< Parsing WebSockets frames WS_HANDLER_OPEN, ///< Parsing WebSockets frames
WS_HANDLER_CLOSING ///< Closing the connection WS_HANDLER_CLOSING, ///< Closing the connection
WS_HANDLER_CLOSED ///< Dead
}; };
struct ws_handler struct ws_handler
@ -1780,27 +1835,27 @@ struct ws_handler
ev_timer ping_timer; ///< Ping timer ev_timer ping_timer; ///< Ping timer
bool received_pong; ///< Received PONG since the last PING bool received_pong; ///< Received PONG since the last PING
// TODO: void (*on_handshake) (protocols) that will allow the user
// to choose any sub-protocol, if the client has provided any.
// TODO: "on_connected" after the handshake has finished?
/// Called upon reception of a single full message /// Called upon reception of a single full message
bool (*on_message) (void *user_data, bool (*on_message) (void *user_data,
enum ws_opcode type, const void *data, size_t len); enum ws_opcode type, const void *data, size_t len);
// TODO: void (*on_initialized) () that will allow the user to choose /// The connection has been closed. @a close_code may, or may not, be one
// any sub-protocol, if the client has provided any. /// of enum ws_status. The @a reason is never NULL.
/// The connection has been closed.
/// @a close_code may, or may not, be one of enum ws_status.
// NOTE: the "close_code" is what we receive from the remote endpoint,
// or one of 1005/1006/1015
// NOTE: the reason is an empty string if omitted
// TODO; also note that ideally, the handler should (be able to) first // TODO; also note that ideally, the handler should (be able to) first
// receive a notification about the connection being closed because of // receive a notification about the connection being closed because of
// an error (recv()) returns -1, and call on_close() in reaction. // an error (recv()) returns -1, and call on_close() in reaction.
// Actually, calling push() could work pretty fine for this.
void (*on_close) (void *user_data, int close_code, const char *reason); void (*on_close) (void *user_data, int close_code, const char *reason);
/// Write a chunk of data to the stream /// Write a chunk of data to the stream
void (*write_cb) (void *user_data, const void *data, size_t len); void (*write_cb) (void *user_data, const void *data, size_t len);
// TODO: close_cb // TODO: "close_cb"; to be used from a ping timer e.g.
void *user_data; ///< User data for callbacks void *user_data; ///< User data for callbacks
}; };
@ -1831,6 +1886,9 @@ ws_handler_fail (struct ws_handler *self, enum ws_status reason)
// some flag for the case that we're in the middle of ws_handler_push(), // some flag for the case that we're in the middle of ws_handler_push(),
// and/or add a mechanism to stop the caller from polling the socket for // and/or add a mechanism to stop the caller from polling the socket for
// reads). // reads).
// TODO: set the state to FAILED (not CLOSED as that means the TCP
// connection is closed) and wait until all is sent?
// TODO: make sure we don't send pings after the close
} }
// TODO: ws_handler_close() that behaves like ws_handler_fail() but doesn't // TODO: ws_handler_close() that behaves like ws_handler_fail() but doesn't
@ -1842,6 +1900,8 @@ static void
ws_handler_send (struct ws_handler *self, ws_handler_send (struct ws_handler *self,
enum ws_opcode opcode, const void *data, size_t len) enum ws_opcode opcode, const void *data, size_t len)
{ {
// TODO: make sure (just assert?) we're in the OPEN state
struct str header; struct str header;
str_init (&header); str_init (&header);
str_pack_u8 (&header, 0x80 | (opcode & 0x0F)); str_pack_u8 (&header, 0x80 | (opcode & 0x0F));
@ -1869,15 +1929,19 @@ ws_handler_on_frame_header (void *user_data, const struct ws_parser *parser)
{ {
struct ws_handler *self = user_data; struct ws_handler *self = user_data;
// Note that we aren't expected to send any close frame before closing the
// connection when the frame is unmasked
if (parser->reserved_1 || parser->reserved_2 || parser->reserved_3 if (parser->reserved_1 || parser->reserved_2 || parser->reserved_3
|| !parser->is_masked // client -> server payload must be masked || !parser->is_masked // client -> server payload must be masked
|| (ws_is_control_frame (parser->opcode) && || (ws_is_control_frame (parser->opcode) &&
(!parser->is_fin || parser->payload_len > WS_MAX_CONTROL_PAYLOAD_LEN)) (!parser->is_fin || parser->payload_len > WS_MAX_CONTROL_PAYLOAD_LEN))
|| (!ws_is_control_frame (parser->opcode) && || (!ws_is_control_frame (parser->opcode) &&
(self->expecting_continuation && parser->opcode != WS_OPCODE_CONT))) (self->expecting_continuation && parser->opcode != WS_OPCODE_CONT))
ws_handler_fail (self, WS_STATUS_PROTOCOL); || parser->payload_len >= 0x8000000000000000ULL)
ws_handler_fail (self, WS_STATUS_PROTOCOL_ERROR);
else if (parser->payload_len > self->max_payload_len) else if (parser->payload_len > self->max_payload_len)
ws_handler_fail (self, WS_STATUS_TOO_BIG); ws_handler_fail (self, WS_STATUS_MESSAGE_TOO_BIG);
else else
return true; return true;
return false; return false;
@ -1891,6 +1955,9 @@ ws_handler_on_control_frame
{ {
case WS_OPCODE_CLOSE: case WS_OPCODE_CLOSE:
// TODO: confirm the close // TODO: confirm the close
// TODO: change the state to CLOSING
// TODO: call "on_close"
// NOTE: the reason is an empty string if omitted
break; break;
case WS_OPCODE_PING: case WS_OPCODE_PING:
ws_handler_send_control (self, WS_OPCODE_PONG, ws_handler_send_control (self, WS_OPCODE_PONG,
@ -1901,9 +1968,9 @@ ws_handler_on_control_frame
self->received_pong = true; self->received_pong = true;
break; break;
default: default:
// TODO: shouldn't we rather fail on unknown control frames? // Unknown control frame
// But should we actually return false at any time? Yes? ws_handler_fail (self, WS_STATUS_PROTOCOL_ERROR);
break; return false;
} }
return true; return true;
} }
@ -1918,8 +1985,8 @@ ws_handler_on_frame (void *user_data, const struct ws_parser *parser)
// TODO: do this rather in "on_frame_header" // TODO: do this rather in "on_frame_header"
if (self->message_data.len + parser->payload_len > self->max_payload_len) if (self->message_data.len + parser->payload_len > self->max_payload_len)
{ {
ws_handler_fail (self, WS_STATUS_TOO_BIG); ws_handler_fail (self, WS_STATUS_MESSAGE_TOO_BIG);
return true; return false;
} }
if (!self->expecting_continuation) if (!self->expecting_continuation)
@ -1932,6 +1999,13 @@ ws_handler_on_frame (void *user_data, const struct ws_parser *parser)
if (!parser->is_fin) if (!parser->is_fin)
return true; return true;
if (self->message_opcode == WS_OPCODE_TEXT
&& !utf8_validate (self->parser.input.str, self->parser.input.len))
{
ws_handler_fail (self, WS_STATUS_INVALID_PAYLOAD_DATA);
return false;
}
bool result = self->on_message (self->user_data, self->message_opcode, bool result = self->on_message (self->user_data, self->message_opcode,
self->parser.input.str, self->parser.payload_len); self->parser.input.str, self->parser.payload_len);
str_reset (&self->message_data); str_reset (&self->message_data);
@ -1976,7 +2050,8 @@ ws_handler_init (struct ws_handler *self)
str_init (&self->message_data); str_init (&self->message_data);
self->ping_interval = 60; self->ping_interval = 60;
// This is still ridiculously high // This is still ridiculously high. Note that the most significant bit
// must always be zero, i.e. the protocol maximum is 0x7FFF FFFF FFFF FFFF.
self->max_payload_len = UINT32_MAX; self->max_payload_len = UINT32_MAX;
// Just so we can safely stop it // Just so we can safely stop it
@ -2124,6 +2199,8 @@ ws_handler_http_response (struct ws_handler *self, const char *status, ...)
str_vector_free (&v); str_vector_free (&v);
} }
// TODO: also set the connection to some FAILED state or anything that's neither
// CONNECTING nor OPEN
#define FAIL_HANDSHAKE(status, ...) \ #define FAIL_HANDSHAKE(status, ...) \
BLOCK_START \ BLOCK_START \
ws_handler_http_response (self, (status), __VA_ARGS__); \ ws_handler_http_response (self, (status), __VA_ARGS__); \
@ -2219,14 +2296,22 @@ ws_handler_finish_handshake (struct ws_handler *self)
static bool static bool
ws_handler_push (struct ws_handler *self, const void *data, size_t len) ws_handler_push (struct ws_handler *self, const void *data, size_t len)
{ {
if (self->state != WS_HANDLER_HANDSHAKE) if (!len)
{ {
// TODO: handle the case of len == 0: if (self->state == WS_HANDLER_OPEN)
// OPEN: "on_close" WS_STATUS_ABNORMAL self->on_close (self->user_data, WS_STATUS_ABNORMAL_CLOSURE, "");
// CLOSING: just close the connection else
return ws_parser_push (&self->parser, data, len); {
// TODO: anything to do besides just closing the connection?
}
self->state = WS_HANDLER_CLOSED;
return false;
} }
if (self->state != WS_HANDLER_CONNECTING)
return ws_parser_push (&self->parser, data, len);
// The handshake hasn't been done yet, process HTTP headers // The handshake hasn't been done yet, process HTTP headers
static const http_parser_settings http_settings = static const http_parser_settings http_settings =
{ {
@ -3182,7 +3267,7 @@ client_ws_on_message (void *user_data,
if (type != WS_OPCODE_TEXT) if (type != WS_OPCODE_TEXT)
{ {
ws_handler_fail (&self->handler, WS_STATUS_UNACCEPTABLE); ws_handler_fail (&self->handler, WS_STATUS_UNSUPPORTED_DATA);
return false; return false;
} }