Fix a nasty bug in utf8_next()
Uppercase ASCII was read incorrectly.
This commit is contained in:
parent
122ab355a6
commit
75d063e363
20
liberty.c
20
liberty.c
@ -2242,14 +2242,11 @@ utf8_next (const char *s, size_t len, int32_t *codepoint)
|
|||||||
if (!len)
|
if (!len)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
// In the middle of a character -> error
|
// Find out how long the sequence is (0 for ASCII)
|
||||||
const uint8_t *p = (const unsigned char *) s;
|
unsigned mask = 0x80;
|
||||||
if ((*p & 0xC0) == 0x80)
|
unsigned sequence_len = 0;
|
||||||
return NULL;
|
|
||||||
|
|
||||||
// Find out how long the sequence is
|
const uint8_t *p = (const uint8_t *) s;
|
||||||
unsigned mask = 0xC0;
|
|
||||||
unsigned tail_len = 0;
|
|
||||||
while ((*p & mask) == mask)
|
while ((*p & mask) == mask)
|
||||||
{
|
{
|
||||||
// Invalid start of sequence
|
// Invalid start of sequence
|
||||||
@ -2257,15 +2254,16 @@ utf8_next (const char *s, size_t len, int32_t *codepoint)
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
mask |= mask >> 1;
|
mask |= mask >> 1;
|
||||||
tail_len++;
|
sequence_len++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the rest of the sequence
|
// In the middle of a character or the input is too short
|
||||||
if (tail_len > --len)
|
if (sequence_len == 1 || sequence_len > len)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
// Check the rest of the sequence
|
||||||
uint32_t cp = *p++ & ~mask;
|
uint32_t cp = *p++ & ~mask;
|
||||||
while (tail_len--)
|
while (sequence_len && --sequence_len)
|
||||||
{
|
{
|
||||||
if ((*p & 0xC0) != 0x80)
|
if ((*p & 0xC0) != 0x80)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
Loading…
Reference in New Issue
Block a user