Compare commits

..

2 Commits

Author SHA1 Message Date
9639777814
Fix validation of overlong UTF-8
It was too strict and Egyptian dicks didn't want to pass,
so we'll do it half-arsedly for a subset.
2020-10-24 19:09:09 +02:00
929229a1d7
Fix config PEG grammar to match strtoll() 2020-10-24 08:05:17 +02:00
2 changed files with 7 additions and 9 deletions

View File

@ -2748,16 +2748,12 @@ utf8_decode (const char **s, size_t len)
}
// In the middle of a character
if (sequence_len == 1)
// or an overlong sequence (subset, possibly MUTF-8, not supported)
if (sequence_len == 1 || *p == 0xC0 || *p == 0xC1)
return -1;
// Check the rest of the sequence
uint32_t cp = *p++ & ~mask;
// Overlong sequence (possibly MUTF-8, not supported)
if (!cp && sequence_len)
return -1;
while (sequence_len && --sequence_len)
{
if (p == end)
@ -4421,7 +4417,7 @@ socket_io_try_write (int socket_fd, struct str *wb)
// char = [\0-\177] # or any Unicode codepoint in the UTF-8 encoding
// escape = [\\"abfnrtv] / [xX][0-9A-Fa-f][0-9A-Fa-f]? / [0-7][0-7]?[0-7]?
//
// integer = lws '-'? [0-9]+ # whatever strtoll() accepts on your system
// integer = lws [-+]? [0-9]+ # whatever strtoll() accepts on your system
// null = lws 'null'
// boolean = lws 'yes' / lws 'YES' / lws 'no' / lws 'NO'
// / lws 'on' / lws 'ON' / lws 'off' / lws 'OFF'

View File

@ -331,10 +331,12 @@ test_utf8 (void)
soft_assert (utf8_decode (&partial, 1) == -2);
soft_assert (utf8_decode (&empty, 0) == -1);
const char valid[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
const char valid_1[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
const char valid_2[] = "\xf0\x93\x82\xb9";
const char invalid_1[] = "\xf0\x90\x28\xbc";
const char invalid_2[] = "\xc0\x80";
soft_assert ( utf8_validate (valid, sizeof valid));
soft_assert ( utf8_validate (valid_1, sizeof valid_1));
soft_assert ( utf8_validate (valid_2, sizeof valid_2));
soft_assert (!utf8_validate (invalid_1, sizeof invalid_1));
soft_assert (!utf8_validate (invalid_2, sizeof invalid_2));