libc: Implement mbstowcs() using Utf8StringDecoder
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
apio 2023-01-06 21:01:37 +01:00
parent 8891304509
commit b851dcf9b9
Signed by: apio
GPG Key ID: B8A7D06E42258954
4 changed files with 46 additions and 6 deletions

View File

@ -27,6 +27,8 @@ typedef struct
long long rem; long long rem;
} lldiv_t; } lldiv_t;
#define MB_CUR_MAX 4
#ifdef __cplusplus #ifdef __cplusplus
extern "C" extern "C"
{ {
@ -98,6 +100,9 @@ extern "C"
void qsort(void*, size_t, size_t, int (*)(const void*, const void*)); void qsort(void*, size_t, size_t, int (*)(const void*, const void*));
void* bsearch(const void*, const void*, size_t, size_t, int (*)(const void*, const void*)); void* bsearch(const void*, const void*, size_t, size_t, int (*)(const void*, const void*));
/* Convert a multibyte character string to a wide character string. */
size_t mbstowcs(wchar_t* buf, const char* src, size_t max);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -1,5 +1,6 @@
#include <limits.h> #include <limits.h>
#include <luna/NumberParsing.h> #include <luna/NumberParsing.h>
#include <luna/Utf8.h>
#include <stdlib.h> #include <stdlib.h>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <unistd.h> #include <unistd.h>
@ -99,4 +100,21 @@ extern "C"
syscall(SYS_exit); syscall(SYS_exit);
__builtin_unreachable(); __builtin_unreachable();
} }
size_t mbstowcs(wchar_t* buf, const char* src, size_t max)
{
if (max == 0) return 0;
Utf8StringDecoder decoder(src);
auto rc = decoder.decode(buf, max);
if (rc.has_error()) return (size_t)-1;
size_t code_points = decoder.code_points().value_or(0);
if (code_points >= max) return max - 1;
return code_points;
}
} }

View File

@ -18,6 +18,8 @@ class Utf8StringDecoder
// The caller must ensure that 'buf' is at least code_points() + a NULL wide. // The caller must ensure that 'buf' is at least code_points() + a NULL wide.
Result<void> decode(wchar_t* buf) const; Result<void> decode(wchar_t* buf) const;
Result<void> decode(wchar_t* buf, size_t max) const;
private: private:
const char* m_str; const char* m_str;
usize m_byte_length; usize m_byte_length;
@ -38,6 +40,8 @@ class Utf8StringEncoder
// The caller must ensure that 'buf' is at least byte_length() + a NULL wide. // The caller must ensure that 'buf' is at least byte_length() + a NULL wide.
Result<void> encode(char* buf) const; Result<void> encode(char* buf) const;
Result<void> encode(char* buf, size_t max) const;
private: private:
const wchar_t* m_str; const wchar_t* m_str;
usize m_code_points; usize m_code_points;

View File

@ -32,7 +32,9 @@ static inline usize wide_char_length_as_utf8_unchecked(wchar_t c)
static Result<void> encode_wide_char_as_utf8(wchar_t c, char* result, usize& len) static Result<void> encode_wide_char_as_utf8(wchar_t c, char* result, usize& len)
{ {
len = TRY(wide_char_length_as_utf8(c)); usize utf8_len = TRY(wide_char_length_as_utf8(c));
if (utf8_len > len) { return err(EILSEQ); }
u8* buf = (u8*)result; u8* buf = (u8*)result;
@ -144,11 +146,11 @@ Result<usize> Utf8StringDecoder::code_points() const
return len; return len;
} }
Result<void> Utf8StringDecoder::decode(wchar_t* buf) const Result<void> Utf8StringDecoder::decode(wchar_t* buf, size_t max) const
{ {
const char* it = m_str; const char* it = m_str;
while ((usize)(it - m_str) < m_byte_length) while ((usize)(it - m_str) < m_byte_length && max--)
{ {
usize len = m_byte_length - (usize)(it - m_str); // Remaining space usize len = m_byte_length - (usize)(it - m_str); // Remaining space
*buf = TRY(encode_utf8_as_wide_char(it, len)); *buf = TRY(encode_utf8_as_wide_char(it, len));
@ -161,6 +163,11 @@ Result<void> Utf8StringDecoder::decode(wchar_t* buf) const
return {}; return {};
} }
Result<void> Utf8StringDecoder::decode(wchar_t* buf) const
{
return decode(buf, (size_t)-1);
}
Utf8StringEncoder::Utf8StringEncoder(const wchar_t* str) : m_str(str), m_code_points(wcslen(str)) Utf8StringEncoder::Utf8StringEncoder(const wchar_t* str) : m_str(str), m_code_points(wcslen(str))
{ {
} }
@ -179,15 +186,16 @@ Result<usize> Utf8StringEncoder::byte_length() const
return len; return len;
} }
Result<void> Utf8StringEncoder::encode(char* buf) const Result<void> Utf8StringEncoder::encode(char* buf, size_t max) const
{ {
const wchar_t* it = m_str; const wchar_t* it = m_str;
while (*it) while (*it && max > 1)
{ {
usize len = 0; usize len = max - 1;
TRY(encode_wide_char_as_utf8(*it, buf, len)); TRY(encode_wide_char_as_utf8(*it, buf, len));
buf += len; buf += len;
max -= len;
it++; it++;
} }
@ -196,6 +204,11 @@ Result<void> Utf8StringEncoder::encode(char* buf) const
return {}; return {};
} }
Result<void> Utf8StringEncoder::encode(char* buf) const
{
return encode(buf, (size_t)-1);
}
Utf8StateDecoder::Utf8StateDecoder() : m_state_len(0), m_state_index(0) Utf8StateDecoder::Utf8StateDecoder() : m_state_len(0), m_state_index(0)
{ {
} }