Compare commits

..

No commits in common. "53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385" and "69101eb1554ad2fca6de30cdbaccac076210d7e3" have entirely different histories.

2 changed files with 5 additions and 19 deletions

View File

@ -2753,11 +2753,6 @@ utf8_decode (const char **s, size_t len)
// Check the rest of the sequence // Check the rest of the sequence
uint32_t cp = *p++ & ~mask; uint32_t cp = *p++ & ~mask;
// Overlong sequence (possibly MUTF-8, not supported)
if (!cp && sequence_len)
return -1;
while (sequence_len && --sequence_len) while (sequence_len && --sequence_len)
{ {
if (p == end) if (p == end)
@ -2770,13 +2765,6 @@ utf8_decode (const char **s, size_t len)
return cp; return cp;
} }
static inline bool
utf8_validate_cp (int32_t cp)
{
// RFC 3629, CESU-8 not allowed
return cp >= 0 && cp <= 0x10FFFF && (cp < 0xD800 || cp > 0xDFFF);
}
/// Very rough UTF-8 validation, just makes sure codepoints can be iterated /// Very rough UTF-8 validation, just makes sure codepoints can be iterated
static bool static bool
utf8_validate (const char *s, size_t len) utf8_validate (const char *s, size_t len)
@ -2784,7 +2772,7 @@ utf8_validate (const char *s, size_t len)
const char *end = s + len; const char *end = s + len;
int32_t codepoint; int32_t codepoint;
while ((codepoint = utf8_decode (&s, end - s)) >= 0 while ((codepoint = utf8_decode (&s, end - s)) >= 0
&& utf8_validate_cp (codepoint)) && codepoint <= 0x10FFFF /* TODO: better validations */)
; ;
return s == end; return s == end;
} }

View File

@ -332,11 +332,9 @@ test_utf8 (void)
soft_assert (utf8_decode (&empty, 0) == -1); soft_assert (utf8_decode (&empty, 0) == -1);
const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm"; const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
const char invalid_1[] = "\xf0\x90\x28\xbc"; const char invalid[] = "\xf0\x90\x28\xbc";
const char invalid_2[] = "\xc0\x80";
soft_assert ( utf8_validate (valid, sizeof valid)); soft_assert ( utf8_validate (valid, sizeof valid));
soft_assert (!utf8_validate (invalid_1, sizeof invalid_1)); soft_assert (!utf8_validate (invalid, sizeof invalid));
soft_assert (!utf8_validate (invalid_2, sizeof invalid_2));
struct utf8_iter iter = utf8_iter_make ("fóọ"); struct utf8_iter iter = utf8_iter_make ("fóọ");
size_t ch_len; size_t ch_len;