Fix a nasty bug in utf8_next()

Uppercase ASCII was read incorrectly.
This commit is contained in:
Přemysl Eric Janouch 2015-12-10 19:34:10 +01:00
parent 122ab355a6
commit 75d063e363

View File

@ -2242,14 +2242,11 @@ utf8_next (const char *s, size_t len, int32_t *codepoint)
if (!len) if (!len)
return NULL; return NULL;
// In the middle of a character -> error // Find out how long the sequence is (0 for ASCII)
const uint8_t *p = (const unsigned char *) s; unsigned mask = 0x80;
if ((*p & 0xC0) == 0x80) unsigned sequence_len = 0;
return NULL;
// Find out how long the sequence is const uint8_t *p = (const uint8_t *) s;
unsigned mask = 0xC0;
unsigned tail_len = 0;
while ((*p & mask) == mask) while ((*p & mask) == mask)
{ {
// Invalid start of sequence // Invalid start of sequence
@ -2257,15 +2254,16 @@ utf8_next (const char *s, size_t len, int32_t *codepoint)
return NULL; return NULL;
mask |= mask >> 1; mask |= mask >> 1;
tail_len++; sequence_len++;
} }
// Check the rest of the sequence // In the middle of a character or the input is too short
if (tail_len > --len) if (sequence_len == 1 || sequence_len > len)
return NULL; return NULL;
// Check the rest of the sequence
uint32_t cp = *p++ & ~mask; uint32_t cp = *p++ & ~mask;
while (tail_len--) while (sequence_len && --sequence_len)
{ {
if ((*p & 0xC0) != 0x80) if ((*p & 0xC0) != 0x80)
return NULL; return NULL;