config: implement string tokenizing

This commit is contained in:
Přemysl Eric Janouch 2015-05-02 04:58:08 +02:00
parent 83e159d945
commit c4ea0e28fd
1 changed files with 123 additions and 2 deletions

125
common.c
View File

@ -1009,6 +1009,120 @@ config_tokenizer_error (struct config_tokenizer *self,
str_free (&description);
}
static bool
config_tokenizer_hexa_escape (struct config_tokenizer *self, struct str *output)
{
int i;
unsigned char code = 0;
for (i = 0; self->len && i < 2; i++)
{
unsigned char c = tolower_ascii (*self->p);
if (c >= '0' && c <= '9')
code = (code << 4) | (c - '0');
else if (c >= 'a' && c <= 'f')
code = (code << 4) | (c - 'a' + 10);
else
break;
config_tokenizer_advance (self);
}
if (!i)
return false;
str_append_c (output, code);
return true;
}
static bool
config_tokenizer_octal_escape
(struct config_tokenizer *self, struct str *output)
{
int i;
unsigned char code = 0;
for (i = 0; self->len && i < 3; i++)
{
unsigned char c = *self->p;
if (c >= '0' && c <= '7')
code = (code << 3) | (c - '0');
else
break;
config_tokenizer_advance (self);
}
if (!i)
return false;
str_append_c (output, code);
return true;
}
static bool
config_tokenizer_escape_sequence
(struct config_tokenizer *self, struct str *output, struct error **e)
{
if (!self->len)
{
config_tokenizer_error (self, e, "premature end of escape sequence");
return false;
}
unsigned char c;
switch ((c = *self->p))
{
case '"': break;
case '\\': break;
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case 'x':
case 'X':
config_tokenizer_advance (self);
if (config_tokenizer_hexa_escape (self, output))
return true;
config_tokenizer_error (self, e, "invalid hexadecimal escape");
return false;
default:
if (config_tokenizer_octal_escape (self, output))
return true;
config_tokenizer_error (self, e, "unknown escape sequence");
return false;
}
str_append_c (output, c);
config_tokenizer_advance (self);
return true;
}
static bool
config_tokenizer_string
(struct config_tokenizer *self, struct str *output, struct error **e)
{
unsigned char c;
while (self->len)
{
if ((c = config_tokenizer_advance (self)) == '"')
return true;
if (c != '\\')
str_append_c (output, c);
else if (!config_tokenizer_escape_sequence (self, output, e))
return false;
}
config_tokenizer_error (self, e, "premature end of string");
return false;
}
static enum config_token
config_tokenizer_next (struct config_tokenizer *self, struct error **e)
{
@ -1033,8 +1147,15 @@ config_tokenizer_next (struct config_tokenizer *self, struct error **e)
return CONFIG_T_ABORT;
case '"':
// TODO: string, validate as UTF-8
break;
config_tokenizer_advance (self);
str_reset (&self->string);
if (!config_tokenizer_string (self, &self->string, e))
return CONFIG_T_ABORT;
if (!utf8_validate (self->string.str, self->string.len))
{
config_tokenizer_error (self, e, "not a valid UTF-8 string");
return CONFIG_T_ABORT;
}
}
bool is_word = false;