Reject overlong UTF-8 sequences
This commit is contained in:
		@@ -2753,6 +2753,11 @@ utf8_decode (const char **s, size_t len)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	// Check the rest of the sequence
 | 
						// Check the rest of the sequence
 | 
				
			||||||
	uint32_t cp = *p++ & ~mask;
 | 
						uint32_t cp = *p++ & ~mask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Overlong sequence (possibly MUTF-8, not supported)
 | 
				
			||||||
 | 
						if (!cp && sequence_len)
 | 
				
			||||||
 | 
							return -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (sequence_len && --sequence_len)
 | 
						while (sequence_len && --sequence_len)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		if (p == end)
 | 
							if (p == end)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -331,10 +331,12 @@ test_utf8 (void)
 | 
				
			|||||||
	soft_assert (utf8_decode (&partial, 1) == -2);
 | 
						soft_assert (utf8_decode (&partial, 1) == -2);
 | 
				
			||||||
	soft_assert (utf8_decode (&empty,   0) == -1);
 | 
						soft_assert (utf8_decode (&empty,   0) == -1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	const char valid  [] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
 | 
						const char valid[] = "2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm";
 | 
				
			||||||
	const char invalid[] = "\xf0\x90\x28\xbc";
 | 
						const char invalid_1[] = "\xf0\x90\x28\xbc";
 | 
				
			||||||
 | 
						const char invalid_2[] = "\xc0\x80";
 | 
				
			||||||
	soft_assert ( utf8_validate (valid,     sizeof valid));
 | 
						soft_assert ( utf8_validate (valid,     sizeof valid));
 | 
				
			||||||
	soft_assert (!utf8_validate (invalid, sizeof invalid));
 | 
						soft_assert (!utf8_validate (invalid_1, sizeof invalid_1));
 | 
				
			||||||
 | 
						soft_assert (!utf8_validate (invalid_2, sizeof invalid_2));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct utf8_iter iter = utf8_iter_make ("fóọ");
 | 
						struct utf8_iter iter = utf8_iter_make ("fóọ");
 | 
				
			||||||
	size_t ch_len;
 | 
						size_t ch_len;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user