UTF-8 decoder: Error out on overlong encodings
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
apio 2022-12-21 20:08:43 +01:00
parent fcefab4383
commit 16bf8b38ea
Signed by: apio
GPG Key ID: B8A7D06E42258954

View File

@ -21,6 +21,14 @@ static Result<usize> wide_char_length_as_utf8(wchar_t c)
return err(EILSEQ); return err(EILSEQ);
} }
static inline usize wide_char_length_as_utf8_unchecked(wchar_t c)
{
if (c <= 0x7f) return 1;
if (c <= 0x7ff) return 2;
if (c <= 0xffff) return 3;
return 4;
}
static Result<void> encode_wide_char_as_utf8(wchar_t c, char* result, usize& len) static Result<void> encode_wide_char_as_utf8(wchar_t c, char* result, usize& len)
{ {
len = TRY(wide_char_length_as_utf8(c)); len = TRY(wide_char_length_as_utf8(c));
@ -57,7 +65,7 @@ static Result<void> encode_wide_char_as_utf8(wchar_t c, char* result, usize& len
unreachable(); unreachable();
} }
static Result<wchar_t> encode_utf8_as_wide_char(const char* beg, usize& len) static Result<wchar_t> encode_utf8_as_wide_char_impl(const char* beg, usize& len)
{ {
usize utf8_len = TRY(utf8_char_length(*beg)); usize utf8_len = TRY(utf8_char_length(*beg));
if (utf8_len > len) return err(EILSEQ); // Unterminated sequence if (utf8_len > len) return err(EILSEQ); // Unterminated sequence
@ -102,6 +110,19 @@ static Result<wchar_t> encode_utf8_as_wide_char(const char* beg, usize& len)
unreachable(); unreachable();
} }
static Result<wchar_t> encode_utf8_as_wide_char(const char* beg, usize& len)
{
wchar_t result = TRY(encode_utf8_as_wide_char_impl(beg, len));
// NOTE: We already know this is a valid code-point, since we constructed it ourselves and already checked the
// range.
if (len != wide_char_length_as_utf8_unchecked(result))
{
// OVERLONG ENCODING!! This is nasty, error out.
return err(EILSEQ);
}
return result;
}
Utf8StringDecoder::Utf8StringDecoder(const char* str) : m_str(str), m_byte_length(strlen(str)) Utf8StringDecoder::Utf8StringDecoder(const char* str) : m_str(str), m_byte_length(strlen(str))
{ {
} }