config: implement string tokenizing
This commit is contained in:
parent
83e159d945
commit
c4ea0e28fd
125
common.c
125
common.c
|
@ -1009,6 +1009,120 @@ config_tokenizer_error (struct config_tokenizer *self,
|
|||
str_free (&description);
|
||||
}
|
||||
|
||||
static bool
|
||||
config_tokenizer_hexa_escape (struct config_tokenizer *self, struct str *output)
|
||||
{
|
||||
int i;
|
||||
unsigned char code = 0;
|
||||
|
||||
for (i = 0; self->len && i < 2; i++)
|
||||
{
|
||||
unsigned char c = tolower_ascii (*self->p);
|
||||
if (c >= '0' && c <= '9')
|
||||
code = (code << 4) | (c - '0');
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
code = (code << 4) | (c - 'a' + 10);
|
||||
else
|
||||
break;
|
||||
|
||||
config_tokenizer_advance (self);
|
||||
}
|
||||
|
||||
if (!i)
|
||||
return false;
|
||||
|
||||
str_append_c (output, code);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
config_tokenizer_octal_escape
|
||||
(struct config_tokenizer *self, struct str *output)
|
||||
{
|
||||
int i;
|
||||
unsigned char code = 0;
|
||||
|
||||
for (i = 0; self->len && i < 3; i++)
|
||||
{
|
||||
unsigned char c = *self->p;
|
||||
if (c >= '0' && c <= '7')
|
||||
code = (code << 3) | (c - '0');
|
||||
else
|
||||
break;
|
||||
|
||||
config_tokenizer_advance (self);
|
||||
}
|
||||
|
||||
if (!i)
|
||||
return false;
|
||||
|
||||
str_append_c (output, code);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
config_tokenizer_escape_sequence
|
||||
(struct config_tokenizer *self, struct str *output, struct error **e)
|
||||
{
|
||||
if (!self->len)
|
||||
{
|
||||
config_tokenizer_error (self, e, "premature end of escape sequence");
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned char c;
|
||||
switch ((c = *self->p))
|
||||
{
|
||||
case '"': break;
|
||||
case '\\': break;
|
||||
case 'a': c = '\a'; break;
|
||||
case 'b': c = '\b'; break;
|
||||
case 'f': c = '\f'; break;
|
||||
case 'n': c = '\n'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case 't': c = '\t'; break;
|
||||
case 'v': c = '\v'; break;
|
||||
|
||||
case 'x':
|
||||
case 'X':
|
||||
config_tokenizer_advance (self);
|
||||
if (config_tokenizer_hexa_escape (self, output))
|
||||
return true;
|
||||
|
||||
config_tokenizer_error (self, e, "invalid hexadecimal escape");
|
||||
return false;
|
||||
|
||||
default:
|
||||
if (config_tokenizer_octal_escape (self, output))
|
||||
return true;
|
||||
|
||||
config_tokenizer_error (self, e, "unknown escape sequence");
|
||||
return false;
|
||||
}
|
||||
|
||||
str_append_c (output, c);
|
||||
config_tokenizer_advance (self);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
config_tokenizer_string
|
||||
(struct config_tokenizer *self, struct str *output, struct error **e)
|
||||
{
|
||||
unsigned char c;
|
||||
while (self->len)
|
||||
{
|
||||
if ((c = config_tokenizer_advance (self)) == '"')
|
||||
return true;
|
||||
if (c != '\\')
|
||||
str_append_c (output, c);
|
||||
else if (!config_tokenizer_escape_sequence (self, output, e))
|
||||
return false;
|
||||
}
|
||||
config_tokenizer_error (self, e, "premature end of string");
|
||||
return false;
|
||||
}
|
||||
|
||||
static enum config_token
|
||||
config_tokenizer_next (struct config_tokenizer *self, struct error **e)
|
||||
{
|
||||
|
@ -1033,8 +1147,15 @@ config_tokenizer_next (struct config_tokenizer *self, struct error **e)
|
|||
return CONFIG_T_ABORT;
|
||||
|
||||
case '"':
|
||||
// TODO: string, validate as UTF-8
|
||||
break;
|
||||
config_tokenizer_advance (self);
|
||||
str_reset (&self->string);
|
||||
if (!config_tokenizer_string (self, &self->string, e))
|
||||
return CONFIG_T_ABORT;
|
||||
if (!utf8_validate (self->string.str, self->string.len))
|
||||
{
|
||||
config_tokenizer_error (self, e, "not a valid UTF-8 string");
|
||||
return CONFIG_T_ABORT;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_word = false;
|
||||
|
|
Loading…
Reference in New Issue