2022-12-18 12:04:40 +00:00
|
|
|
#pragma once
|
|
|
|
#include <luna/Result.h>
|
|
|
|
#include <luna/Types.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
|
|
|
|
class Utf8StringDecoder
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Utf8StringDecoder(const char* str);
|
|
|
|
|
|
|
|
usize byte_length() const
|
|
|
|
{
|
|
|
|
return m_byte_length;
|
|
|
|
}
|
|
|
|
|
|
|
|
Result<usize> code_points() const;
|
|
|
|
|
2022-12-18 13:34:50 +00:00
|
|
|
// The caller must ensure that 'buf' is at least code_points() + a NULL wide.
|
2023-01-14 10:55:19 +00:00
|
|
|
Result<usize> decode(wchar_t* buf) const;
|
2022-12-18 12:04:40 +00:00
|
|
|
|
2023-01-14 10:55:19 +00:00
|
|
|
Result<usize> decode(wchar_t* buf, usize max) const;
|
2023-01-06 20:01:37 +00:00
|
|
|
|
2022-12-18 12:04:40 +00:00
|
|
|
private:
|
|
|
|
const char* m_str;
|
|
|
|
usize m_byte_length;
|
|
|
|
};
|
|
|
|
|
2022-12-18 13:34:50 +00:00
|
|
|
class Utf8StringEncoder
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Utf8StringEncoder(const wchar_t* str);
|
|
|
|
|
|
|
|
usize code_points() const
|
|
|
|
{
|
|
|
|
return m_code_points;
|
|
|
|
}
|
|
|
|
|
|
|
|
Result<usize> byte_length() const;
|
|
|
|
|
|
|
|
// The caller must ensure that 'buf' is at least byte_length() + a NULL wide.
|
2023-01-14 10:55:19 +00:00
|
|
|
Result<usize> encode(char* buf) const;
|
2022-12-18 13:34:50 +00:00
|
|
|
|
2023-01-14 10:55:19 +00:00
|
|
|
Result<usize> encode(char* buf, usize max) const;
|
2023-01-06 20:01:37 +00:00
|
|
|
|
2022-12-18 13:34:50 +00:00
|
|
|
private:
|
|
|
|
const wchar_t* m_str;
|
|
|
|
usize m_code_points;
|
|
|
|
};
|
|
|
|
|
2022-12-18 12:04:40 +00:00
|
|
|
class Utf8StateDecoder
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Utf8StateDecoder();
|
|
|
|
|
2023-06-18 16:38:01 +00:00
|
|
|
Result<bool> feed(char c);
|
|
|
|
Result<wchar_t> extract();
|
2022-12-18 12:04:40 +00:00
|
|
|
void reset();
|
|
|
|
|
|
|
|
private:
|
|
|
|
char m_state[4];
|
|
|
|
usize m_state_len = 0;
|
|
|
|
usize m_state_index = 0;
|
2023-06-18 16:38:01 +00:00
|
|
|
wchar_t m_decoded_character;
|
|
|
|
bool m_has_character_ready { false };
|
2022-12-18 13:34:50 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class Utf8Encoder
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
// Does not null-terminate. Returns the number of bytes written.
|
|
|
|
Result<usize> encode(wchar_t c, char buf[4]);
|
2023-01-02 12:07:29 +00:00
|
|
|
};
|