luna: Make Utf8String{De,En}coders return the number of bytes written
This means we can avoid a call to code_points() in mbstowcs(), which would parse a string twice.
This commit is contained in:
parent
da805eec83
commit
00ee8314b3
@ -104,22 +104,15 @@ extern "C"
|
|||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: This is walking a UTF-8 string twice. Once to decode, and another to count code points.
|
|
||||||
size_t mbstowcs(wchar_t* buf, const char* src, size_t max)
|
size_t mbstowcs(wchar_t* buf, const char* src, size_t max)
|
||||||
{
|
{
|
||||||
if (max == 0) return 0;
|
if (max == 0) return 0;
|
||||||
|
|
||||||
Utf8StringDecoder decoder(src);
|
Utf8StringDecoder decoder(src);
|
||||||
|
|
||||||
auto rc = decoder.decode(buf, max);
|
if (!buf) { return decoder.code_points().value_or((size_t)-1); }
|
||||||
|
|
||||||
if (rc.has_error()) return (size_t)-1;
|
return decoder.decode(buf, max).value_or((size_t)-1);
|
||||||
|
|
||||||
size_t code_points = decoder.code_points().value_or(0);
|
|
||||||
|
|
||||||
if (code_points >= max) return max - 1;
|
|
||||||
|
|
||||||
return code_points;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void* malloc(size_t size)
|
void* malloc(size_t size)
|
||||||
|
@ -16,9 +16,9 @@ class Utf8StringDecoder
|
|||||||
Result<usize> code_points() const;
|
Result<usize> code_points() const;
|
||||||
|
|
||||||
// The caller must ensure that 'buf' is at least code_points() + a NULL wide.
|
// The caller must ensure that 'buf' is at least code_points() + a NULL wide.
|
||||||
Result<void> decode(wchar_t* buf) const;
|
Result<usize> decode(wchar_t* buf) const;
|
||||||
|
|
||||||
Result<void> decode(wchar_t* buf, usize max) const;
|
Result<usize> decode(wchar_t* buf, usize max) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const char* m_str;
|
const char* m_str;
|
||||||
@ -38,9 +38,9 @@ class Utf8StringEncoder
|
|||||||
Result<usize> byte_length() const;
|
Result<usize> byte_length() const;
|
||||||
|
|
||||||
// The caller must ensure that 'buf' is at least byte_length() + a NULL wide.
|
// The caller must ensure that 'buf' is at least byte_length() + a NULL wide.
|
||||||
Result<void> encode(char* buf) const;
|
Result<usize> encode(char* buf) const;
|
||||||
|
|
||||||
Result<void> encode(char* buf, usize max) const;
|
Result<usize> encode(char* buf, usize max) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const wchar_t* m_str;
|
const wchar_t* m_str;
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
#include <luna/CString.h>
|
#include <luna/CString.h>
|
||||||
#include <luna/Utf8.h>
|
#include <luna/Utf8.h>
|
||||||
|
|
||||||
|
// FIXME: Not enough space for a sequence is not an error. (mbstowcs(3) and wcstombs(3), case 2 when buf is not NULL)
|
||||||
|
|
||||||
static_assert(WCHAR_MAX > 0x10ffff);
|
static_assert(WCHAR_MAX > 0x10ffff);
|
||||||
|
|
||||||
static Result<usize> utf8_char_length(char c)
|
static Result<usize> utf8_char_length(char c)
|
||||||
@ -146,9 +148,10 @@ Result<usize> Utf8StringDecoder::code_points() const
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
Result<void> Utf8StringDecoder::decode(wchar_t* buf, usize max) const
|
Result<usize> Utf8StringDecoder::decode(wchar_t* buf, usize max) const
|
||||||
{
|
{
|
||||||
const char* it = m_str;
|
const char* it = m_str;
|
||||||
|
wchar_t* const buf_start = buf;
|
||||||
|
|
||||||
while ((usize)(it - m_str) < m_byte_length && max--)
|
while ((usize)(it - m_str) < m_byte_length && max--)
|
||||||
{
|
{
|
||||||
@ -160,10 +163,10 @@ Result<void> Utf8StringDecoder::decode(wchar_t* buf, usize max) const
|
|||||||
|
|
||||||
*buf = 0;
|
*buf = 0;
|
||||||
|
|
||||||
return {};
|
return (usize)(buf - buf_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
Result<void> Utf8StringDecoder::decode(wchar_t* buf) const
|
Result<usize> Utf8StringDecoder::decode(wchar_t* buf) const
|
||||||
{
|
{
|
||||||
return decode(buf, (usize)-1);
|
return decode(buf, (usize)-1);
|
||||||
}
|
}
|
||||||
@ -186,9 +189,10 @@ Result<usize> Utf8StringEncoder::byte_length() const
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
Result<void> Utf8StringEncoder::encode(char* buf, usize max) const
|
Result<usize> Utf8StringEncoder::encode(char* buf, usize max) const
|
||||||
{
|
{
|
||||||
const wchar_t* it = m_str;
|
const wchar_t* it = m_str;
|
||||||
|
char* const buf_start = buf;
|
||||||
|
|
||||||
while (*it && max > 1)
|
while (*it && max > 1)
|
||||||
{
|
{
|
||||||
@ -201,10 +205,10 @@ Result<void> Utf8StringEncoder::encode(char* buf, usize max) const
|
|||||||
|
|
||||||
*buf = 0;
|
*buf = 0;
|
||||||
|
|
||||||
return {};
|
return (usize)(buf - buf_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
Result<void> Utf8StringEncoder::encode(char* buf) const
|
Result<usize> Utf8StringEncoder::encode(char* buf) const
|
||||||
{
|
{
|
||||||
return encode(buf, (usize)-1);
|
return encode(buf, (usize)-1);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user