Reject overlong UTF-8 sequences

This commit is contained in:
Přemysl Eric Janouch 2020-10-21 04:52:24 +02:00
parent 69101eb155
commit b08cf6c29f
Signed by: p
GPG Key ID: A0420B94F92B9493
2 changed files with 11 additions and 4 deletions

View File

@ -2753,6 +2753,11 @@ utf8_decode (const char **s, size_t len)
// Check the rest of the sequence
uint32_t cp = *p++ & ~mask;
// Overlong sequence (possibly MUTF-8, not supported)
if (!cp && sequence_len)
return -1;
while (sequence_len && --sequence_len)
{
if (p == end)

View File

@ -331,10 +331,12 @@ test_utf8 (void)
soft_assert (utf8_decode (&partial, 1) == -2);
soft_assert (utf8_decode (&empty, 0) == -1);
const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
const char invalid[] = "\xf0\x90\x28\xbc";
soft_assert ( utf8_validate (valid, sizeof valid));
soft_assert (!utf8_validate (invalid, sizeof invalid));
const char valid[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
const char invalid_1[] = "\xf0\x90\x28\xbc";
const char invalid_2[] = "\xc0\x80";
soft_assert ( utf8_validate (valid, sizeof valid));
soft_assert (!utf8_validate (invalid_1, sizeof invalid_1));
soft_assert (!utf8_validate (invalid_2, sizeof invalid_2));
struct utf8_iter iter = utf8_iter_make ("fóọ");
size_t ch_len;