libc: Implement mbstowcs() using Utf8StringDecoder

libc: Add ctype.h
2023-01-06 21:01:37 +01:00 · 2023-01-06 20:48:08 +01:00
7 changed files with 109 additions and 6 deletions
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@ -8,6 +8,7 @@ set(SOURCES
    src/errno.cpp
    src/string.cpp
    src/atexit.cpp
    src/ctype.cpp
 )
 if(${ARCH} STREQUAL "x86_64")
--- a/libc/include/ctype.h
+++ b/libc/include/ctype.h
@ -0,0 +1,31 @@
 /* ctype.h: Character handling functions. */
 #ifndef _CTYPE_H
 #define _CTYPE_H
 #ifdef __cplusplus
 extern "C"
 {
 #endif
    int isalnum(int c);
    int isalpha(int c);
    int isascii(int c);
    int iscntrl(int c);
    int isdigit(int c);
    int isxdigit(int c);
    int isspace(int c);
    int ispunct(int c);
    int isprint(int c);
    int isgraph(int c);
    int islower(int c);
    int isupper(int c);
    int isblank(int c);
    int tolower(int c);
    int toupper(int c);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/libc/include/stdlib.h
+++ b/libc/include/stdlib.h
@ -27,6 +27,8 @@ typedef struct
    long long rem;
 } lldiv_t;
 #define MB_CUR_MAX 4
 #ifdef __cplusplus
 extern "C"
 {
@ -98,6 +100,9 @@ extern "C"
    void qsort(void*, size_t, size_t, int (*)(const void*, const void*));
    void* bsearch(const void*, const void*, size_t, size_t, int (*)(const void*, const void*));
    /* Convert a multibyte character string to a wide character string. */
    size_t mbstowcs(wchar_t* buf, const char* src, size_t max);
 #ifdef __cplusplus
 }
 #endif
--- a/libc/src/ctype.cpp
+++ b/libc/src/ctype.cpp
@ -0,0 +1,31 @@
 #include <ctype.h>
 #include <luna/CType.h>
 #define ctype_wrapper(a, b)                                                                                            \
    int a(int c)                                                                                                       \
    {                                                                                                                  \
        return b(c);                                                                                                   \
    }
 // clang-format off
 extern "C"
 {
    ctype_wrapper(isalnum, _isalnum)
    ctype_wrapper(isalpha, _isalpha)
    ctype_wrapper(isascii, _isascii)
    ctype_wrapper(iscntrl, _iscntrl)
    ctype_wrapper(isdigit, _isdigit)
    ctype_wrapper(isxdigit, _isxdigit)
    ctype_wrapper(isspace, _isspace)
    ctype_wrapper(ispunct, _ispunct)
    ctype_wrapper(isprint, _isprint)
    ctype_wrapper(isgraph, _isgraph)
    ctype_wrapper(islower, _islower)
    ctype_wrapper(isupper, _isupper)
    ctype_wrapper(isblank, _isblank)
    ctype_wrapper(tolower, _tolower)
    ctype_wrapper(toupper, _toupper)
 }
 // clang-format on
--- a/libc/src/stdlib.cpp
+++ b/libc/src/stdlib.cpp
@ -1,5 +1,6 @@
 #include <limits.h>
 #include <luna/NumberParsing.h>
 #include <luna/Utf8.h>
 #include <stdlib.h>
 #include <sys/syscall.h>
 #include <unistd.h>
@ -99,4 +100,21 @@ extern "C"
        syscall(SYS_exit);
        __builtin_unreachable();
    }
    size_t mbstowcs(wchar_t* buf, const char* src, size_t max)
    {
        if (max == 0) return 0;
        Utf8StringDecoder decoder(src);
        auto rc = decoder.decode(buf, max);
        if (rc.has_error()) return (size_t)-1;
        size_t code_points = decoder.code_points().value_or(0);
        if (code_points >= max) return max - 1;
        return code_points;
    }
 }
--- a/luna/include/luna/Utf8.h
+++ b/luna/include/luna/Utf8.h
@ -18,6 +18,8 @@ class Utf8StringDecoder
    // The caller must ensure that 'buf' is at least code_points() + a NULL wide.
    Result<void> decode(wchar_t* buf) const;
    Result<void> decode(wchar_t* buf, size_t max) const;
  private:
    const char* m_str;
    usize m_byte_length;
@ -38,6 +40,8 @@ class Utf8StringEncoder
    // The caller must ensure that 'buf' is at least byte_length() + a NULL wide.
    Result<void> encode(char* buf) const;
    Result<void> encode(char* buf, size_t max) const;
  private:
    const wchar_t* m_str;
    usize m_code_points;
--- a/luna/src/Utf8.cpp
+++ b/luna/src/Utf8.cpp
@ -32,7 +32,9 @@ static inline usize wide_char_length_as_utf8_unchecked(wchar_t c)
 static Result<void> encode_wide_char_as_utf8(wchar_t c, char* result, usize& len)
 {
-    len = TRY(wide_char_length_as_utf8(c));
+    usize utf8_len = TRY(wide_char_length_as_utf8(c));
    if (utf8_len > len) { return err(EILSEQ); }
    u8* buf = (u8*)result;
@ -144,11 +146,11 @@ Result<usize> Utf8StringDecoder::code_points() const
    return len;
 }
-Result<void> Utf8StringDecoder::decode(wchar_t* buf) const
+Result<void> Utf8StringDecoder::decode(wchar_t* buf, size_t max) const
 {
    const char* it = m_str;
-    while ((usize)(it - m_str) < m_byte_length)
+    while ((usize)(it - m_str) < m_byte_length && max--)
    {
        usize len = m_byte_length - (usize)(it - m_str); // Remaining space
        *buf = TRY(encode_utf8_as_wide_char(it, len));
@ -161,6 +163,11 @@ Result<void> Utf8StringDecoder::decode(wchar_t* buf) const
    return {};
 }
 Result<void> Utf8StringDecoder::decode(wchar_t* buf) const
 {
    return decode(buf, (size_t)-1);
 }
 Utf8StringEncoder::Utf8StringEncoder(const wchar_t* str) : m_str(str), m_code_points(wcslen(str))
 {
 }
@ -179,15 +186,16 @@ Result<usize> Utf8StringEncoder::byte_length() const
    return len;
 }
-Result<void> Utf8StringEncoder::encode(char* buf) const
+Result<void> Utf8StringEncoder::encode(char* buf, size_t max) const
 {
    const wchar_t* it = m_str;
-    while (*it)
+    while (*it && max > 1)
    {
-        usize len = 0;
+        usize len = max - 1;
        TRY(encode_wide_char_as_utf8(*it, buf, len));
        buf += len;
        max -= len;
        it++;
    }
@ -196,6 +204,11 @@ Result<void> Utf8StringEncoder::encode(char* buf) const
    return {};
 }
 Result<void> Utf8StringEncoder::encode(char* buf) const
 {
    return encode(buf, (size_t)-1);
 }
 Utf8StateDecoder::Utf8StateDecoder() : m_state_len(0), m_state_index(0)
 {
 }
Author	SHA1	Message	Date
apio	b851dcf9b9	libc: Implement mbstowcs() using Utf8StringDecoder Some checks failed continuous-integration/drone/push Build is failing Details	2023-01-06 21:01:37 +01:00
apio	8891304509	libc: Add ctype.h	2023-01-06 20:48:08 +01:00