Compare commits

...

2 Commits

Author SHA1 Message Date
e3ef29e80d
libc: Implement wcstombs()
All checks were successful
continuous-integration/drone/push Build is passing
2023-01-14 11:59:08 +01:00
00ee8314b3
luna: Make Utf8String{De,En}coders return the number of bytes written
This means we can avoid a call to code_points() in mbstowcs(),
which would parse a string twice.
2023-01-14 11:55:19 +01:00
4 changed files with 27 additions and 16 deletions

View File

@ -110,6 +110,9 @@ extern "C"
/* Convert a multibyte character string to a wide character string. */ /* Convert a multibyte character string to a wide character string. */
size_t mbstowcs(wchar_t* buf, const char* src, size_t max); size_t mbstowcs(wchar_t* buf, const char* src, size_t max);
/* Convert a wide character string to a multibyte character string. */
size_t wcstombs(char* buf, const wchar_t* src, size_t max);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -104,22 +104,26 @@ extern "C"
__builtin_unreachable(); __builtin_unreachable();
} }
// FIXME: This is walking a UTF-8 string twice. Once to decode, and another to count code points.
size_t mbstowcs(wchar_t* buf, const char* src, size_t max) size_t mbstowcs(wchar_t* buf, const char* src, size_t max)
{ {
if (max == 0) return 0; if (max == 0) return 0;
Utf8StringDecoder decoder(src); Utf8StringDecoder decoder(src);
auto rc = decoder.decode(buf, max); if (!buf) { return decoder.code_points().value_or((size_t)-1); }
if (rc.has_error()) return (size_t)-1; return decoder.decode(buf, max).value_or((size_t)-1);
}
size_t code_points = decoder.code_points().value_or(0); size_t wcstombs(char* buf, const wchar_t* src, size_t max)
{
if (max == 0) return 0;
if (code_points >= max) return max - 1; Utf8StringEncoder encoder(src);
return code_points; if (!buf) { return encoder.byte_length().value_or((size_t)-1); }
return encoder.encode(buf, max).value_or((size_t)-1);
} }
void* malloc(size_t size) void* malloc(size_t size)

View File

@ -16,9 +16,9 @@ class Utf8StringDecoder
Result<usize> code_points() const; Result<usize> code_points() const;
// The caller must ensure that 'buf' is at least code_points() + a NULL wide. // The caller must ensure that 'buf' is at least code_points() + a NULL wide.
Result<void> decode(wchar_t* buf) const; Result<usize> decode(wchar_t* buf) const;
Result<void> decode(wchar_t* buf, usize max) const; Result<usize> decode(wchar_t* buf, usize max) const;
private: private:
const char* m_str; const char* m_str;
@ -38,9 +38,9 @@ class Utf8StringEncoder
Result<usize> byte_length() const; Result<usize> byte_length() const;
// The caller must ensure that 'buf' is at least byte_length() + a NULL wide. // The caller must ensure that 'buf' is at least byte_length() + a NULL wide.
Result<void> encode(char* buf) const; Result<usize> encode(char* buf) const;
Result<void> encode(char* buf, usize max) const; Result<usize> encode(char* buf, usize max) const;
private: private:
const wchar_t* m_str; const wchar_t* m_str;

View File

@ -2,6 +2,8 @@
#include <luna/CString.h> #include <luna/CString.h>
#include <luna/Utf8.h> #include <luna/Utf8.h>
// FIXME: Not enough space for a sequence is not an error. (mbstowcs(3) and wcstombs(3), case 2 when buf is not NULL)
static_assert(WCHAR_MAX > 0x10ffff); static_assert(WCHAR_MAX > 0x10ffff);
static Result<usize> utf8_char_length(char c) static Result<usize> utf8_char_length(char c)
@ -146,9 +148,10 @@ Result<usize> Utf8StringDecoder::code_points() const
return len; return len;
} }
Result<void> Utf8StringDecoder::decode(wchar_t* buf, usize max) const Result<usize> Utf8StringDecoder::decode(wchar_t* buf, usize max) const
{ {
const char* it = m_str; const char* it = m_str;
wchar_t* const buf_start = buf;
while ((usize)(it - m_str) < m_byte_length && max--) while ((usize)(it - m_str) < m_byte_length && max--)
{ {
@ -160,10 +163,10 @@ Result<void> Utf8StringDecoder::decode(wchar_t* buf, usize max) const
*buf = 0; *buf = 0;
return {}; return (usize)(buf - buf_start);
} }
Result<void> Utf8StringDecoder::decode(wchar_t* buf) const Result<usize> Utf8StringDecoder::decode(wchar_t* buf) const
{ {
return decode(buf, (usize)-1); return decode(buf, (usize)-1);
} }
@ -186,9 +189,10 @@ Result<usize> Utf8StringEncoder::byte_length() const
return len; return len;
} }
Result<void> Utf8StringEncoder::encode(char* buf, usize max) const Result<usize> Utf8StringEncoder::encode(char* buf, usize max) const
{ {
const wchar_t* it = m_str; const wchar_t* it = m_str;
char* const buf_start = buf;
while (*it && max > 1) while (*it && max > 1)
{ {
@ -201,10 +205,10 @@ Result<void> Utf8StringEncoder::encode(char* buf, usize max) const
*buf = 0; *buf = 0;
return {}; return (usize)(buf - buf_start);
} }
Result<void> Utf8StringEncoder::encode(char* buf) const Result<usize> Utf8StringEncoder::encode(char* buf) const
{ {
return encode(buf, (usize)-1); return encode(buf, (usize)-1);
} }