Compare commits
No commits in common. "53bcebc2f0bae3ba0bbcefb849bdb0ede0ea4385" and "69101eb1554ad2fca6de30cdbaccac076210d7e3" have entirely different histories.
53bcebc2f0
...
69101eb155
14
liberty.c
14
liberty.c
@ -2753,11 +2753,6 @@ utf8_decode (const char **s, size_t len)
|
|||||||
|
|
||||||
// Check the rest of the sequence
|
// Check the rest of the sequence
|
||||||
uint32_t cp = *p++ & ~mask;
|
uint32_t cp = *p++ & ~mask;
|
||||||
|
|
||||||
// Overlong sequence (possibly MUTF-8, not supported)
|
|
||||||
if (!cp && sequence_len)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
while (sequence_len && --sequence_len)
|
while (sequence_len && --sequence_len)
|
||||||
{
|
{
|
||||||
if (p == end)
|
if (p == end)
|
||||||
@ -2770,13 +2765,6 @@ utf8_decode (const char **s, size_t len)
|
|||||||
return cp;
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
|
||||||
utf8_validate_cp (int32_t cp)
|
|
||||||
{
|
|
||||||
// RFC 3629, CESU-8 not allowed
|
|
||||||
return cp >= 0 && cp <= 0x10FFFF && (cp < 0xD800 || cp > 0xDFFF);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Very rough UTF-8 validation, just makes sure codepoints can be iterated
|
/// Very rough UTF-8 validation, just makes sure codepoints can be iterated
|
||||||
static bool
|
static bool
|
||||||
utf8_validate (const char *s, size_t len)
|
utf8_validate (const char *s, size_t len)
|
||||||
@ -2784,7 +2772,7 @@ utf8_validate (const char *s, size_t len)
|
|||||||
const char *end = s + len;
|
const char *end = s + len;
|
||||||
int32_t codepoint;
|
int32_t codepoint;
|
||||||
while ((codepoint = utf8_decode (&s, end - s)) >= 0
|
while ((codepoint = utf8_decode (&s, end - s)) >= 0
|
||||||
&& utf8_validate_cp (codepoint))
|
&& codepoint <= 0x10FFFF /* TODO: better validations */)
|
||||||
;
|
;
|
||||||
return s == end;
|
return s == end;
|
||||||
}
|
}
|
||||||
|
@ -331,12 +331,10 @@ test_utf8 (void)
|
|||||||
soft_assert (utf8_decode (&partial, 1) == -2);
|
soft_assert (utf8_decode (&partial, 1) == -2);
|
||||||
soft_assert (utf8_decode (&empty, 0) == -1);
|
soft_assert (utf8_decode (&empty, 0) == -1);
|
||||||
|
|
||||||
const char valid[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
|
const char valid [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
|
||||||
const char invalid_1[] = "\xf0\x90\x28\xbc";
|
const char invalid[] = "\xf0\x90\x28\xbc";
|
||||||
const char invalid_2[] = "\xc0\x80";
|
soft_assert ( utf8_validate (valid, sizeof valid));
|
||||||
soft_assert ( utf8_validate (valid, sizeof valid));
|
soft_assert (!utf8_validate (invalid, sizeof invalid));
|
||||||
soft_assert (!utf8_validate (invalid_1, sizeof invalid_1));
|
|
||||||
soft_assert (!utf8_validate (invalid_2, sizeof invalid_2));
|
|
||||||
|
|
||||||
struct utf8_iter iter = utf8_iter_make ("fóọ");
|
struct utf8_iter iter = utf8_iter_make ("fóọ");
|
||||||
size_t ch_len;
|
size_t ch_len;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user