Luna/libluna/src/NumberParsing.cpp

101 lines
2.9 KiB
C++
Raw Normal View History

#include <limits.h>
#include <luna/CType.h>
#include <luna/NumberParsing.h>
2022-11-19 14:43:09 +00:00
// This function assumes you have called is_valid_digit_for_base() to validate the digit first.
static isize parse_digit_unchecked(char c)
{
if (_isdigit(c)) return c - '0';
if (_islower(c)) return (c - 'a') + 10;
return (c - 'A') + 10;
}
2022-11-19 14:43:09 +00:00
static bool is_valid_digit_for_base(int base, char c)
{
if (!_isalnum(c)) return false;
if (parse_digit_unchecked(c) >= (isize)base) return false;
return true;
}
static usize do_unsigned_parse(const char* str, const char** endptr, int base)
{
usize val = 0;
2022-11-19 14:43:09 +00:00
// 1. If base is zero or 16, the string may then include a "0x" prefix, and the number will be read in base 16;
// otherwise, a zero base is taken as 10 (decimal) unless the next character is '0', in which case it is taken as
// 8 (octal).
2022-11-19 14:43:09 +00:00
if ((base == 0 || base == 16) && *str == '0')
{
str++;
if (_tolower(*str) == 'x')
{
base = 16;
str++;
}
else if (base == 0)
base = 8;
}
else if (base == 0)
base = 10;
// 2. The remainder of the string is converted to an unsigned long value in
// the obvious manner, stopping at the first character which is not a
// valid digit in the given base.
while (is_valid_digit_for_base(base, *str))
2022-11-19 14:43:09 +00:00
{
val = ((usize)base * val) + (usize)parse_digit_unchecked(*str);
2022-11-19 14:43:09 +00:00
str++;
}
// 3. If endptr is not NULL, this function stores the address of the first invalid character in *endptr.
2022-11-19 14:43:09 +00:00
if (endptr) *endptr = str;
return val;
}
usize parse_unsigned_integer(const char* str, const char** endptr, int base)
{
// The string may begin with an arbitrary amount of white space (as determined by isspace(3)),
while (_isspace(*str)) str++;
// followed by a single optional '+' or '-' sign.
if (*str == '-' || *str == '+') str++;
return do_unsigned_parse(str, endptr, base);
}
#define SSIZE_MAX LONG_MAX
#define SSIZE_MIN (-SSIZE_MAX - (isize)1)
isize parse_signed_integer(const char* str, const char** endptr, int base)
2022-11-19 14:43:09 +00:00
{
bool negative = false;
// The string may begin with an arbitrary amount of white space (as determined by isspace(3)),
2022-11-19 14:43:09 +00:00
while (_isspace(*str)) str++;
// followed by a single optional '+' or '-' sign.
2022-11-19 14:43:09 +00:00
if (*str == '-' || *str == '+')
{
if (*str == '-') negative = true;
str++;
}
usize rc = do_unsigned_parse(str, endptr, base);
// If an underflow occurs, this function returns SSIZE_MIN. If an overflow occurs, this function returns SSIZE_MAX.
if (rc > SSIZE_MAX) { return negative ? SSIZE_MIN : SSIZE_MAX; }
2022-11-19 14:43:09 +00:00
return negative ? -(isize)rc : (isize)rc;
2022-11-19 14:43:09 +00:00
}
2022-11-20 08:28:17 +00:00
usize scan_unsigned_integer(const char** str, int base)
2022-11-20 08:28:17 +00:00
{
return parse_unsigned_integer(*str, str, base);
2022-11-20 08:28:17 +00:00
}
isize scan_signed_integer(const char** str, int base)
2022-11-20 08:28:17 +00:00
{
return parse_signed_integer(*str, str, base);
2022-11-20 08:28:17 +00:00
}