REFACTOR!!
This commit is contained in:
parent
7094698df7
commit
99f56d3a61
@ -27,12 +27,8 @@ add_executable(
|
||||
src/external/FormatString/FormatString.hpp
|
||||
src/FileIO.h
|
||||
src/FileIO.cpp
|
||||
src/Importer.cpp
|
||||
src/Importer.h
|
||||
src/Arguments.cpp
|
||||
src/Arguments.h
|
||||
src/Normalizer.cpp
|
||||
src/Normalizer.h
|
||||
src/AST/ASTNode.cpp
|
||||
src/AST/ASTNode.h
|
||||
src/AST/BinaryOpNode.cpp
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include "Error.h"
|
||||
#include "Importer.h"
|
||||
#include "utils.h"
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
@ -42,7 +41,7 @@ void Error::show_import_lines(const Location& loc, void (*import_line_printer)(c
|
||||
{
|
||||
show_import_lines(loc, show_import_line, std::cerr);
|
||||
|
||||
std::string linestr = to_string(loc.line);
|
||||
std::string line = to_string(loc.line);
|
||||
|
||||
std::cerr << "\033[1;1m";
|
||||
std::cerr << loc.str();
|
||||
@ -56,12 +55,12 @@ void Error::show_import_lines(const Location& loc, void (*import_line_printer)(c
|
||||
std::cerr << details;
|
||||
std::cerr << std::endl;
|
||||
|
||||
std::cerr << linestr;
|
||||
std::cerr << line;
|
||||
std::cerr << std::string(4, ' ');
|
||||
std::cerr << line_text;
|
||||
std::cerr << std::endl;
|
||||
|
||||
std::cerr << std::string(4 + linestr.size() + loc.column - 1, ' ');
|
||||
std::cerr << std::string(4 + line.size() + loc.column - 1, ' ');
|
||||
|
||||
std::cerr << "\033[31;49m";
|
||||
std::cerr << "^";
|
||||
@ -101,7 +100,7 @@ void Error::throw_warning(const Location& loc, const std::string line_text, cons
|
||||
{
|
||||
show_import_lines(loc, show_import_line, std::cout);
|
||||
|
||||
std::string linestr = to_string(loc.line);
|
||||
std::string line = to_string(loc.line);
|
||||
|
||||
std::cout << "\033[1;1m";
|
||||
std::cout << loc.str();
|
||||
@ -115,12 +114,12 @@ void Error::throw_warning(const Location& loc, const std::string line_text, cons
|
||||
std::cout << details;
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << linestr;
|
||||
std::cout << line;
|
||||
std::cout << std::string(4, ' ');
|
||||
std::cout << line_text;
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << std::string(4 + linestr.size() + loc.column - 1, ' ');
|
||||
std::cout << std::string(4 + line.size() + loc.column - 1, ' ');
|
||||
|
||||
std::cout << "\033[33;49m";
|
||||
std::cout << "^";
|
||||
|
@ -31,7 +31,7 @@ void IRBuilder::create_program(std::shared_ptr<ProgramNode> program)
|
||||
program->walk([&](std::shared_ptr<TopLevelNode> node) { node->codegen(this, module.get()); });
|
||||
}
|
||||
|
||||
void IRBuilder::resolveToLLVMIR(std::string path)
|
||||
void IRBuilder::resolveToLLVMIR(std::string_view path)
|
||||
{
|
||||
std::error_code EC;
|
||||
llvm::raw_fd_ostream dest(path, EC, llvm::sys::fs::OF_None);
|
||||
@ -45,7 +45,7 @@ void IRBuilder::resolveToLLVMIR(std::string path)
|
||||
dest.flush();
|
||||
}
|
||||
|
||||
void IRBuilder::resolveToObjectFile(std::string path)
|
||||
void IRBuilder::resolveToObjectFile(std::string_view path)
|
||||
{
|
||||
// edit this section when adding support for more architectures
|
||||
LLVMInitializeX86TargetInfo();
|
||||
|
@ -19,7 +19,7 @@ class IRBuilder
|
||||
|
||||
llvm::IRBuilder<>* getBuilder();
|
||||
|
||||
void resolveToLLVMIR(std::string path);
|
||||
void resolveToLLVMIR(std::string_view path);
|
||||
|
||||
void resolveToObjectFile(std::string path);
|
||||
void resolveToObjectFile(std::string_view path);
|
||||
};
|
168
src/Importer.cpp
168
src/Importer.cpp
@ -1,168 +0,0 @@
|
||||
#include "Importer.h"
|
||||
#include "Arguments.h"
|
||||
#include "Error.h"
|
||||
#include "FileIO.h"
|
||||
#include "sapphirepch.h"
|
||||
#include "utils.h"
|
||||
#include <fstream>
|
||||
#define MAX_IMPORTS 100
|
||||
|
||||
int Importer::import_count = 0;
|
||||
|
||||
std::vector<std::shared_ptr<Location>> Importer::import_stack;
|
||||
std::vector<std::string> Importer::imported_files;
|
||||
|
||||
TokenStream Importer::evaluate(const TokenStream& original)
|
||||
{
|
||||
int i = 0;
|
||||
auto ret_tk = original;
|
||||
TokenStream new_tokens;
|
||||
|
||||
while (original[i].tk_type != TT_EOF)
|
||||
{
|
||||
Token current_token = original[i];
|
||||
|
||||
if (current_token.tk_type == TT_Import)
|
||||
{
|
||||
Token next_token = original[i + 1];
|
||||
if (next_token.tk_type == TT_EOF)
|
||||
Error::throw_error(current_token.loc, current_token.line(),
|
||||
"did not expect EOF after import statement");
|
||||
|
||||
if (next_token.tk_type == TT_Identifier) // TODO: add support for strings
|
||||
{
|
||||
Token last_token = original[i + 2];
|
||||
|
||||
if (last_token.tk_type != TT_Semicolon)
|
||||
Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon");
|
||||
|
||||
if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) !=
|
||||
imported_files.end())
|
||||
{
|
||||
if (Arguments::wimport)
|
||||
Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping");
|
||||
Token::erase(ret_tk[i]);
|
||||
Token::erase(ret_tk[i + 1]);
|
||||
Token::erase(ret_tk[i + 2]);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (import_count > MAX_IMPORTS)
|
||||
|
||||
Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded");
|
||||
|
||||
std::string input_file_name = next_token.string_value + ".sp";
|
||||
|
||||
std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards
|
||||
if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found");
|
||||
input_file.close();
|
||||
|
||||
auto file_contents = FileIO::read_all(input_file_name);
|
||||
|
||||
auto top_location = std::make_shared<Location>(current_token.loc.line, current_token.loc.column,
|
||||
current_token.loc.fname);
|
||||
top_location.get()->parent = current_token.loc.parent;
|
||||
|
||||
import_stack.push_back(top_location); // Keep ref_count above 0, just in case
|
||||
|
||||
auto import_lexer = Lexer::make_lexer(input_file_name);
|
||||
|
||||
Lexer::assign_parent_location(import_lexer, top_location);
|
||||
|
||||
TokenStream imported_tokens = import_lexer->lex(file_contents);
|
||||
|
||||
imported_tokens.pop_back(); // remove EOF at end of token stream
|
||||
|
||||
for (auto& tk : imported_tokens)
|
||||
{
|
||||
tk.loc.parent = top_location;
|
||||
}
|
||||
|
||||
imported_files.push_back(next_token.string_value);
|
||||
|
||||
new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end());
|
||||
|
||||
Token::erase(ret_tk[i]);
|
||||
Token::erase(ret_tk[i + 1]);
|
||||
Token::erase(ret_tk[i + 2]);
|
||||
}
|
||||
else if (next_token.tk_type == TT_Path)
|
||||
{
|
||||
Token last_token = original[i + 2];
|
||||
|
||||
if (last_token.tk_type != TT_Semicolon)
|
||||
Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon");
|
||||
|
||||
if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) !=
|
||||
imported_files.end())
|
||||
{
|
||||
if (Arguments::wimport)
|
||||
Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping");
|
||||
Token::erase(ret_tk[i]);
|
||||
Token::erase(ret_tk[i + 1]);
|
||||
Token::erase(ret_tk[i + 2]);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (import_count > MAX_IMPORTS)
|
||||
Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded");
|
||||
|
||||
std::string input_file_name = next_token.string_value + ".sp";
|
||||
|
||||
std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards
|
||||
if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found");
|
||||
input_file.close();
|
||||
|
||||
auto file_contents = FileIO::read_all(input_file_name);
|
||||
|
||||
auto top_location = std::make_shared<Location>(current_token.loc.line, current_token.loc.column,
|
||||
current_token.loc.fname);
|
||||
top_location.get()->parent = current_token.loc.parent;
|
||||
|
||||
import_stack.push_back(top_location); // Keep ref_count above 0, just in case
|
||||
|
||||
auto import_lexer = Lexer::make_lexer(input_file_name);
|
||||
|
||||
Lexer::assign_parent_location(import_lexer, top_location);
|
||||
|
||||
TokenStream imported_tokens = import_lexer->lex(file_contents);
|
||||
|
||||
imported_tokens.pop_back(); // remove EOF at end of token stream
|
||||
|
||||
for (auto& tk : imported_tokens)
|
||||
{
|
||||
tk.loc.parent = top_location;
|
||||
}
|
||||
|
||||
imported_files.push_back(next_token.string_value);
|
||||
|
||||
new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end());
|
||||
|
||||
Token::erase(ret_tk[i]);
|
||||
Token::erase(ret_tk[i + 1]);
|
||||
Token::erase(ret_tk[i + 2]);
|
||||
}
|
||||
else
|
||||
Error::throw_error(next_token.loc, next_token.line(),
|
||||
"import keyword should be followed by an identifier");
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
if (new_tokens.size() != 0)
|
||||
{
|
||||
new_tokens.insert(new_tokens.end(), ret_tk.begin(), ret_tk.end());
|
||||
import_count++;
|
||||
return evaluate(new_tokens);
|
||||
}
|
||||
|
||||
return ret_tk;
|
||||
}
|
||||
|
||||
void Importer::init(std::string init_file)
|
||||
{
|
||||
imported_files.push_back(init_file);
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
#pragma once
|
||||
#include "Lexer.h"
|
||||
#include "Token.h"
|
||||
|
||||
namespace Importer
|
||||
{
|
||||
extern int import_count;
|
||||
extern std::vector<std::shared_ptr<Location>> import_stack;
|
||||
extern std::vector<std::string> imported_files;
|
||||
TokenStream evaluate(const TokenStream& original);
|
||||
void init(std::string init_file);
|
||||
} // namespace Importer
|
228
src/Lexer.cpp
228
src/Lexer.cpp
@ -7,10 +7,7 @@
|
||||
#define IDENTIFIERS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWYZ_0123456789"
|
||||
#define DIGITS "0123456789"
|
||||
|
||||
const std::array<std::string, TYPE_COUNT> Lexer::types = {"void", "bool", "str", "i8", "i16", "i32", "i64",
|
||||
"u8", "u16", "u32", "u64", "f32", "f64", "f128"};
|
||||
|
||||
Lexer::Lexer(const std::string& fname) : loc(1, 0, fname), index(-1), prev_loc(1, 0, fname)
|
||||
Lexer::Lexer(const std::string& fname) : location(1, 0, fname), index(-1), previous_location(1, 0, fname)
|
||||
{
|
||||
}
|
||||
|
||||
@ -20,12 +17,12 @@ Lexer::~Lexer()
|
||||
|
||||
int Lexer::advance()
|
||||
{
|
||||
prev_loc = loc;
|
||||
previous_location = location;
|
||||
++index;
|
||||
loc.advance();
|
||||
location.advance();
|
||||
if (index >= current_lexed_text.size()) return 0;
|
||||
current_char = current_lexed_text[index];
|
||||
loc.pos_from_char(current_char);
|
||||
location.pos_from_char(current_char);
|
||||
if (current_char == '\n')
|
||||
{
|
||||
previous_line_text = current_line_text;
|
||||
@ -36,7 +33,7 @@ int Lexer::advance()
|
||||
|
||||
int Lexer::rewind()
|
||||
{
|
||||
loc = prev_loc;
|
||||
location = previous_location;
|
||||
--index;
|
||||
if (index == -1) return 0;
|
||||
if (current_char == '\n')
|
||||
@ -67,7 +64,7 @@ std::unique_ptr<Lexer> Lexer::make_lexer(const std::string& fname)
|
||||
|
||||
void Lexer::assign_parent_location(std::unique_ptr<Lexer>& lexer, const std::shared_ptr<Location>& loc)
|
||||
{
|
||||
lexer->loc.parent = loc;
|
||||
lexer->location.parent = loc;
|
||||
}
|
||||
|
||||
bool Lexer::is_in_string(const std::string& string, const char& character)
|
||||
@ -117,81 +114,82 @@ TokenStream Lexer::lex(const std::string& text)
|
||||
break;
|
||||
}
|
||||
}
|
||||
result.push_back(Token::make_with_line({TT_Div, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Div, location}, current_line_text));
|
||||
break;
|
||||
case '+':
|
||||
result.push_back(Token::make_with_line({TT_Plus, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Plus, location}, current_line_text));
|
||||
break;
|
||||
case '-':
|
||||
result.push_back(Token::make_with_line({TT_Minus, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Minus, location}, current_line_text));
|
||||
break;
|
||||
case '*':
|
||||
result.push_back(Token::make_with_line({TT_Mul, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Mul, location}, current_line_text));
|
||||
break;
|
||||
case '@':
|
||||
result.push_back(Token::make_with_line({TT_At, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_At, location}, current_line_text));
|
||||
break;
|
||||
case '=':
|
||||
result.push_back(Token::make_with_line({TT_Equal, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Equal, location}, current_line_text));
|
||||
break;
|
||||
case '>':
|
||||
result.push_back(Token::make_with_line({TT_GreaterThan, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_GreaterThan, location}, current_line_text));
|
||||
break;
|
||||
case '<':
|
||||
result.push_back(Token::make_with_line({TT_LessThan, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_LessThan, location}, current_line_text));
|
||||
break;
|
||||
case '(':
|
||||
result.push_back(Token::make_with_line({TT_LParen, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_LParen, location}, current_line_text));
|
||||
break;
|
||||
case ')':
|
||||
result.push_back(Token::make_with_line({TT_RParen, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_RParen, location}, current_line_text));
|
||||
break;
|
||||
case '{':
|
||||
result.push_back(Token::make_with_line({TT_LBracket, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_LBracket, location}, current_line_text));
|
||||
break;
|
||||
case '}':
|
||||
result.push_back(Token::make_with_line({TT_RBracket, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_RBracket, location}, current_line_text));
|
||||
break;
|
||||
case ';':
|
||||
result.push_back(Token::make_with_line({TT_Semicolon, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Semicolon, location}, current_line_text));
|
||||
break;
|
||||
case '.':
|
||||
result.push_back(Token::make_with_line({TT_Period, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Period, location}, current_line_text));
|
||||
break;
|
||||
case ',':
|
||||
result.push_back(Token::make_with_line({TT_Comma, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Comma, location}, current_line_text));
|
||||
break;
|
||||
case '!':
|
||||
result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text));
|
||||
break;
|
||||
case '[':
|
||||
result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text));
|
||||
break;
|
||||
case ']':
|
||||
result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text));
|
||||
result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text));
|
||||
break;
|
||||
case ':':
|
||||
result.push_back(Token::make_with_line({TT_Colon, location}, current_line_text));
|
||||
break;
|
||||
case '\377':
|
||||
result.push_back(Token(TT_EOF, loc));
|
||||
return result;
|
||||
result.push_back(Token(TT_EOF, location));
|
||||
return std::move(result);
|
||||
default:
|
||||
Error::throw_error(loc, current_line_text, "unknown character");
|
||||
Error::throw_error(location, current_line_text, "unknown character");
|
||||
}
|
||||
}
|
||||
|
||||
result.push_back(Token(TT_EOF, loc));
|
||||
result.push_back(Token(TT_EOF, location));
|
||||
|
||||
return result;
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
Token Lexer::create_identifier()
|
||||
{
|
||||
std::vector<char> characters;
|
||||
int prev_line = loc.line;
|
||||
int prev_column = loc.column;
|
||||
bool is_path = false;
|
||||
bool last_was_path = false;
|
||||
Location saved_loc = this->loc;
|
||||
Location saved_prev_loc = this->prev_loc;
|
||||
int prev_line = location.line;
|
||||
int prev_column = location.column;
|
||||
Location saved_loc = this->location;
|
||||
Location saved_prev_loc = this->previous_location;
|
||||
|
||||
characters.push_back(current_char);
|
||||
|
||||
@ -200,103 +198,48 @@ Token Lexer::create_identifier()
|
||||
if (is_in_string(IDENTIFIERS, current_char))
|
||||
{
|
||||
characters.push_back(current_char);
|
||||
last_was_path = false;
|
||||
}
|
||||
else if (current_char == '/')
|
||||
{
|
||||
if (last_was_path)
|
||||
{
|
||||
characters.pop_back();
|
||||
this->loc = saved_loc;
|
||||
this->prev_loc = saved_prev_loc;
|
||||
this->rewind();
|
||||
std::string identifier(characters.begin(), characters.end());
|
||||
return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
}
|
||||
|
||||
saved_loc = this->loc;
|
||||
saved_prev_loc = this->prev_loc;
|
||||
|
||||
characters.push_back(current_char);
|
||||
is_path = true;
|
||||
last_was_path = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
this->rewind();
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
end:
|
||||
std::string identifier(characters.begin(), characters.end());
|
||||
if (is_path)
|
||||
return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
auto location = std::find(types.begin(), types.end(), identifier);
|
||||
if (location != types.end())
|
||||
{
|
||||
return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
}
|
||||
if (identifier == "import")
|
||||
return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "syscall0")
|
||||
return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Syscall0, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
if (identifier == "syscall1")
|
||||
return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Syscall1, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
if (identifier == "syscall2")
|
||||
return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Syscall2, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
if (identifier == "syscall3")
|
||||
return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Syscall3, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
if (identifier == "syscall4")
|
||||
return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Syscall4, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
if (identifier == "syscall5")
|
||||
return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "compmacro")
|
||||
return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Syscall5, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
if (identifier == "let")
|
||||
return Token::make_with_line({TT_Let, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Let, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
if (identifier == "in")
|
||||
return Token::make_with_line({TT_In, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
}
|
||||
}
|
||||
|
||||
std::string identifier(characters.begin(), characters.end());
|
||||
if (is_path)
|
||||
return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
auto location = std::find(types.begin(), types.end(), identifier);
|
||||
if (location != types.end())
|
||||
{
|
||||
return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
}
|
||||
if (identifier == "import")
|
||||
return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "syscall0")
|
||||
return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "syscall1")
|
||||
return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "syscall2")
|
||||
return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "syscall3")
|
||||
return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "syscall4")
|
||||
return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "syscall5")
|
||||
return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "compmacro")
|
||||
return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "let")
|
||||
return Token::make_with_line({TT_Let, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
if (identifier == "in")
|
||||
return Token::make_with_line({TT_In, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
return std::move(
|
||||
Token::make_with_line({TT_In, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
return std::move(Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, location.filename}},
|
||||
current_line_text));
|
||||
}
|
||||
|
||||
Token Lexer::create_number()
|
||||
{
|
||||
std::vector<char> characters;
|
||||
int prev_line = loc.line;
|
||||
int prev_column = loc.column;
|
||||
int prev_line = location.line;
|
||||
int prev_column = location.column;
|
||||
int dot_count = 0;
|
||||
|
||||
characters.push_back(current_char);
|
||||
@ -316,60 +259,53 @@ Token Lexer::create_number()
|
||||
}
|
||||
else
|
||||
{
|
||||
Error::throw_warning(loc, current_line_text, "floats can only have one dot");
|
||||
Error::throw_warning(location, current_line_text, "floats can only have one dot");
|
||||
this->rewind();
|
||||
float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str());
|
||||
return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
this->rewind();
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
end:
|
||||
if (dot_count != 0)
|
||||
{
|
||||
float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str());
|
||||
return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
float value = std::stof(std::string(characters.begin(), characters.end()).c_str());
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Float, value, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
}
|
||||
int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str());
|
||||
return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
}
|
||||
}
|
||||
|
||||
if (dot_count != 0)
|
||||
{
|
||||
float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str());
|
||||
return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
}
|
||||
int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str());
|
||||
return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text);
|
||||
int value = std::atoi(std::string(characters.begin(), characters.end()).c_str());
|
||||
return std::move(
|
||||
Token::make_with_line({TT_Number, value, {prev_line, prev_column, location.filename}}, current_line_text));
|
||||
}
|
||||
|
||||
Token Lexer::create_string()
|
||||
{
|
||||
std::vector<char> characters;
|
||||
int prev_line = loc.line;
|
||||
int prev_column = loc.column;
|
||||
int prev_line = location.line;
|
||||
int prev_column = location.column;
|
||||
|
||||
while (this->advance())
|
||||
{
|
||||
if (current_char == '\n')
|
||||
{
|
||||
this->rewind();
|
||||
Error::throw_error(loc, current_line_text, "expected end of string but got newline");
|
||||
Error::throw_error(location, current_line_text, "expected end of string but got newline");
|
||||
}
|
||||
if (current_char == '\'')
|
||||
{
|
||||
std::string identifier(characters.begin(), characters.end());
|
||||
return Token::make_with_line({TT_String, identifier, {prev_line, prev_column, loc.fname}},
|
||||
current_line_text);
|
||||
return std::move(Token::make_with_line({TT_String, identifier, {prev_line, prev_column, location.filename}},
|
||||
current_line_text));
|
||||
}
|
||||
if (current_char == '\\')
|
||||
{
|
||||
if (index + 1 == current_lexed_text.size())
|
||||
{
|
||||
Error::throw_error(loc, current_line_text, "unfinished escape sequence");
|
||||
Error::throw_error(location, current_line_text, "unfinished escape sequence");
|
||||
}
|
||||
switch (current_lexed_text[index + 1])
|
||||
{
|
||||
@ -383,17 +319,17 @@ Token Lexer::create_string()
|
||||
characters.push_back('\\');
|
||||
break;
|
||||
default:
|
||||
Error::throw_error(loc, current_line_text, "unknown escape sequence");
|
||||
Error::throw_error(location, current_line_text, "unknown escape sequence");
|
||||
}
|
||||
++index;
|
||||
++loc.column;
|
||||
++location.column;
|
||||
continue;
|
||||
}
|
||||
|
||||
characters.push_back(current_char);
|
||||
}
|
||||
this->rewind();
|
||||
Error::throw_error(loc, current_line_text, "expected end of string but got EOF");
|
||||
Error::throw_error(location, current_line_text, "expected end of string but got EOF");
|
||||
|
||||
return Token(TT_Null, loc); // unreachable since Error::throw_error calls exit()
|
||||
return *(Token*)nullptr; // unreachable
|
||||
}
|
||||
|
11
src/Lexer.h
11
src/Lexer.h
@ -13,15 +13,15 @@ typedef std::vector<Token> TokenStream;
|
||||
class Lexer
|
||||
{
|
||||
private:
|
||||
Location loc;
|
||||
Location prev_loc;
|
||||
Location location;
|
||||
Location previous_location;
|
||||
|
||||
int advance();
|
||||
int rewind();
|
||||
char current_char;
|
||||
int index;
|
||||
|
||||
Lexer(const std::string& fname);
|
||||
Lexer(const std::string& filename);
|
||||
|
||||
std::string current_line_text;
|
||||
std::string previous_line_text;
|
||||
@ -37,16 +37,13 @@ class Lexer
|
||||
bool is_in_string(const std::string& string, const char& character);
|
||||
|
||||
public:
|
||||
/* An array containing Sapphire's current data types. */
|
||||
static const std::array<std::string, TYPE_COUNT> types;
|
||||
|
||||
~Lexer();
|
||||
|
||||
/* Lex the given text, turning it into a stream of tokens. */
|
||||
TokenStream lex(const std::string& text);
|
||||
|
||||
/* Create a new Lexer and return a pointer to it. */
|
||||
static std::unique_ptr<Lexer> make_lexer(const std::string& fname);
|
||||
static std::unique_ptr<Lexer> make_lexer(const std::string& filename);
|
||||
|
||||
/* If the Lexer is lexing an impòrted file, give it the location in the parent file at which it was imported. */
|
||||
static void assign_parent_location(std::unique_ptr<Lexer>& lexer, const std::shared_ptr<Location>& loc);
|
||||
|
@ -2,7 +2,8 @@
|
||||
#include "FormatString/FormatString.hpp"
|
||||
#include <sstream>
|
||||
|
||||
Location::Location(int ln, int col, std::string file) : line(ln), column(col), fname(file)
|
||||
Location::Location(int line, int column, std::string filename)
|
||||
: line(line), column(column), filename(std::move(filename))
|
||||
{
|
||||
}
|
||||
|
||||
@ -12,12 +13,7 @@ Location::~Location()
|
||||
|
||||
std::string Location::str() const
|
||||
{
|
||||
return format_string("%s:%d:%d", fname, line, column);
|
||||
}
|
||||
|
||||
std::string Location::paren_str() const
|
||||
{
|
||||
return format_string("(%s:%d:%d)", fname, line, column);
|
||||
return format_string("%s:%d:%d", filename, line, column);
|
||||
}
|
||||
|
||||
void Location::advance()
|
||||
@ -36,13 +32,16 @@ void Location::pos_from_char(const char& character)
|
||||
|
||||
void Location::operator=(const Location& other)
|
||||
{
|
||||
this->parent = other.parent;
|
||||
this->line = other.line;
|
||||
this->column = other.column;
|
||||
this->fname = other.fname;
|
||||
Location copied = copy(other);
|
||||
line = copied.line;
|
||||
column = copied.column;
|
||||
parent = copied.parent;
|
||||
filename = std::move(copied.filename);
|
||||
}
|
||||
|
||||
void Location::copy(const Location& other)
|
||||
Location Location::copy(const Location& other)
|
||||
{
|
||||
this->operator=(other);
|
||||
Location result(other.line, other.column, other.filename);
|
||||
result.parent = other.parent;
|
||||
return std::move(result);
|
||||
}
|
||||
|
@ -6,20 +6,18 @@ struct Location
|
||||
{
|
||||
int line;
|
||||
int column;
|
||||
std::string fname;
|
||||
std::string filename;
|
||||
|
||||
/* The location at which this location was imported, for error traces in imported files. */
|
||||
std::shared_ptr<Location> parent = nullptr;
|
||||
|
||||
/* Creates a Location with the given parameters. */
|
||||
Location(int ln, int col, std::string file);
|
||||
Location(int line, int column, std::string filename);
|
||||
|
||||
~Location();
|
||||
|
||||
/* Returns a string of the format FILE:LINE:COL. */
|
||||
std::string str() const;
|
||||
/* Returns a string of the format (FILE:LINE:COL). */
|
||||
std::string paren_str() const;
|
||||
|
||||
/* Advance to the next column in the file. */
|
||||
void advance();
|
||||
@ -29,6 +27,6 @@ struct Location
|
||||
|
||||
void operator=(const Location& other);
|
||||
|
||||
/* Copies the other location into this one. */
|
||||
void copy(const Location& other);
|
||||
/* Returns a copy of the original Location. */
|
||||
static Location copy(const Location& other);
|
||||
};
|
||||
|
@ -1,82 +0,0 @@
|
||||
#include "Normalizer.h"
|
||||
#include "utils.h"
|
||||
|
||||
TokenStream Normalizer::normalize(const TokenStream& input)
|
||||
{
|
||||
TokenStream result;
|
||||
int i = 0;
|
||||
while (i < input.size())
|
||||
{
|
||||
Token current = input[i];
|
||||
if (current.tk_type == TT_Null)
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if (current.tk_type == TT_Equal)
|
||||
{
|
||||
if (i + 1 != input.size())
|
||||
{
|
||||
if (input[i + 1].tk_type == TT_Equal)
|
||||
{
|
||||
i += 2;
|
||||
result.push_back(current.copy_with_new_type(TT_Equals));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
result.push_back(current);
|
||||
continue;
|
||||
}
|
||||
if (current.tk_type == TT_Exclamation)
|
||||
{
|
||||
if (i + 1 != input.size())
|
||||
{
|
||||
if (input[i + 1].tk_type == TT_Equal)
|
||||
{
|
||||
i += 2;
|
||||
result.push_back(current.copy_with_new_type(TT_NEqual));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
result.push_back(current);
|
||||
continue;
|
||||
}
|
||||
if (current.tk_type == TT_GreaterThan)
|
||||
{
|
||||
if (i + 1 != input.size())
|
||||
{
|
||||
if (input[i + 1].tk_type == TT_Equal)
|
||||
{
|
||||
i += 2;
|
||||
result.push_back(current.copy_with_new_type(TT_GTE));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
result.push_back(current);
|
||||
continue;
|
||||
}
|
||||
if (current.tk_type == TT_LessThan)
|
||||
{
|
||||
if (i + 1 != input.size())
|
||||
{
|
||||
if (input[i + 1].tk_type == TT_Equal)
|
||||
{
|
||||
i += 2;
|
||||
result.push_back(current.copy_with_new_type(TT_LTE));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
result.push_back(current);
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
result.push_back(current);
|
||||
continue;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
#pragma once
|
||||
#include "Lexer.h" // for TokenStream
|
||||
#include "Token.h"
|
||||
|
||||
/* Namespace to normalize a TokenStream. */
|
||||
namespace Normalizer
|
||||
{
|
||||
/* Some tokens are difficult for the Lexer to parse right, or maybe I'm just lazy.
|
||||
Anyways, this function transforms > and = tokens next to each other into a single >=, which has a different meaning,
|
||||
etc... For example: = + = : ==, < + = : <=...
|
||||
|
||||
It also takes blank tokens and removes them. */
|
||||
TokenStream normalize(const TokenStream& input);
|
||||
} // namespace Normalizer
|
@ -25,7 +25,7 @@ std::shared_ptr<ProgramNode> Parser::parse()
|
||||
auto result = toplevel();
|
||||
if (result.is_error()) result.ethrow();
|
||||
final_result->append(result.get());
|
||||
if (current_token->tk_type == TT_EOF) break;
|
||||
if (current_token->type == TT_EOF) break;
|
||||
}
|
||||
return final_result;
|
||||
}
|
||||
@ -44,16 +44,18 @@ Result<ExprNode> Parser::factor()
|
||||
{
|
||||
Token& token = *current_token;
|
||||
|
||||
if (token.tk_type == TT_Number)
|
||||
if (token.type == TT_Number)
|
||||
{
|
||||
advance();
|
||||
return Ok<ExprNode>(new IntegerNode(token.int_value), &token);
|
||||
assert(token.int_value.has_value());
|
||||
return Ok<ExprNode>(new IntegerNode(token.int_value.value()), &token);
|
||||
}
|
||||
|
||||
if (token.tk_type == TT_Float)
|
||||
if (token.type == TT_Float)
|
||||
{
|
||||
advance();
|
||||
return Ok<ExprNode>(new FloatNode(token.float_value), &token);
|
||||
assert(token.float_value.has_value());
|
||||
return Ok<ExprNode>(new FloatNode(token.float_value.value()), &token);
|
||||
}
|
||||
|
||||
return Err<ExprNode>("expected a number", &token);
|
||||
@ -63,13 +65,13 @@ Result<ExprNode> Parser::term()
|
||||
{
|
||||
Result<ExprNode> left = factor();
|
||||
if (left.is_error()) return left;
|
||||
while (current_token->tk_type == TT_Mul || current_token->tk_type == TT_Div)
|
||||
while (current_token->type == TT_Mul || current_token->type == TT_Div)
|
||||
{
|
||||
Token& op = *current_token;
|
||||
advance();
|
||||
Result<ExprNode> right = factor();
|
||||
if (right.is_error()) return right;
|
||||
left = Ok<ExprNode>(new MulNode(left.get(), right.get(), op.tk_type == TT_Mul ? '*' : '/'), &op);
|
||||
left = Ok<ExprNode>(new MulNode(left.get(), right.get(), op.type == TT_Mul ? '*' : '/'), &op);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
@ -78,13 +80,13 @@ Result<ExprNode> Parser::expr()
|
||||
{
|
||||
Result<ExprNode> left = term();
|
||||
if (left.is_error()) return left;
|
||||
while (current_token->tk_type == TT_Plus || current_token->tk_type == TT_Minus)
|
||||
while (current_token->type == TT_Plus || current_token->type == TT_Minus)
|
||||
{
|
||||
Token& op = *current_token;
|
||||
advance();
|
||||
Result<ExprNode> right = term();
|
||||
if (right.is_error()) return right;
|
||||
left = Ok<ExprNode>(new SumNode(left.get(), right.get(), op.tk_type == TT_Plus ? '+' : '-'), &op);
|
||||
left = Ok<ExprNode>(new SumNode(left.get(), right.get(), op.type == TT_Plus ? '+' : '-'), &op);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
@ -98,35 +100,35 @@ Result<TopLevelNode> Parser::toplevel()
|
||||
Result<TopLevelNode> Parser::function()
|
||||
{
|
||||
FunctionPrototype proto;
|
||||
Token* ftoken = current_token;
|
||||
Token* start_token = current_token;
|
||||
proto.returnType = llvm::IntegerType::getInt32Ty(*globalContext); // FIXME: allow specifying return type
|
||||
proto.arguments = {}; // FIXME: allow specifying arguments
|
||||
if (current_token->tk_type != TT_Let)
|
||||
return Err<TopLevelNode>("Expected let at the beginning of a function", current_token);
|
||||
if (current_token->type != TT_Let) return Err<TopLevelNode>("Expected let", current_token);
|
||||
advance();
|
||||
if (current_token->tk_type != TT_At)
|
||||
return Err<TopLevelNode>("Expected @ at the beginning of a function", current_token);
|
||||
if (current_token->type != TT_At) return Err<TopLevelNode>("Expected @", current_token);
|
||||
advance();
|
||||
if (current_token->tk_type != TT_Identifier) return Err<TopLevelNode>("Expected an identifier", current_token);
|
||||
if (current_token->type != TT_Identifier) return Err<TopLevelNode>("Expected an identifier", current_token);
|
||||
else
|
||||
proto.name = current_token->string_value;
|
||||
advance();
|
||||
if (current_token->tk_type != TT_In && current_token->tk_type != TT_Semicolon)
|
||||
return Err<TopLevelNode>("Expected 'in' or semicolon", current_token);
|
||||
if (current_token->tk_type == TT_Semicolon)
|
||||
{
|
||||
advance();
|
||||
return Ok<TopLevelNode>(new EmptyFunctionNode(proto), ftoken);
|
||||
assert(current_token->string_value.has_value());
|
||||
proto.name = current_token->string_value.value();
|
||||
}
|
||||
advance();
|
||||
if (current_token->tk_type != TT_LBracket)
|
||||
if (current_token->type != TT_In && current_token->type != TT_Semicolon)
|
||||
return Err<TopLevelNode>("Expected 'in' or semicolon", current_token);
|
||||
if (current_token->type == TT_Semicolon)
|
||||
{
|
||||
advance();
|
||||
return Ok<TopLevelNode>(new EmptyFunctionNode(proto), start_token);
|
||||
}
|
||||
advance();
|
||||
if (current_token->type != TT_LBracket)
|
||||
return Err<TopLevelNode>("Invalid syntax",
|
||||
current_token); // FIXME: Do not be lazy and return a meaningful error message.
|
||||
advance();
|
||||
Result<ExprNode> body = expr();
|
||||
if (body.is_error()) return Err<TopLevelNode>(body.error(), body.token());
|
||||
if (current_token->tk_type != TT_RBracket)
|
||||
return Err<TopLevelNode>(format_string("Invalid syntax %d", current_token->tk_type), current_token);
|
||||
if (current_token->type != TT_RBracket) return Err<TopLevelNode>("Invalid syntax", current_token);
|
||||
advance();
|
||||
return Ok<TopLevelNode>(new FunctionNode(proto, body.get()), ftoken);
|
||||
return Ok<TopLevelNode>(new FunctionNode(proto, body.get()), start_token);
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ template<typename T> class Result
|
||||
}
|
||||
void ethrow()
|
||||
{
|
||||
Error::throw_error(m_token->loc, m_token->line(), m_error);
|
||||
Error::throw_error(m_token->location, m_token->line(), m_error);
|
||||
}
|
||||
Token* token()
|
||||
{
|
||||
@ -38,7 +38,7 @@ template<typename T> class Result
|
||||
Result(T* result, Token* token) : m_result(result), m_token(token), m_is_error(false)
|
||||
{
|
||||
}
|
||||
Result(const std::string& error, Token* token) : m_error(std::move(error)), m_token(token), m_is_error(true)
|
||||
Result(std::string&& error, Token* token) : m_error(error), m_token(token), m_is_error(true)
|
||||
{
|
||||
}
|
||||
};
|
||||
@ -54,7 +54,7 @@ template<typename T> class Ok final : public Result<T>
|
||||
template<typename T> class Err final : public Result<T>
|
||||
{
|
||||
public:
|
||||
Err(const std::string& error, Token* token) : Result<T>(std::move(error), token)
|
||||
Err(std::string error, Token* token) : Result<T>(std::move(error), token)
|
||||
{
|
||||
}
|
||||
};
|
184
src/Token.cpp
184
src/Token.cpp
@ -1,205 +1,61 @@
|
||||
#include "Token.h"
|
||||
#include "utils.h"
|
||||
|
||||
const std::string token_strings[] = {
|
||||
"TT_IDENTIFIER", "TT_NUMBER", "TT_FLOAT", "TT_KEYWORD", "TT_STRING", "TT_PLUS",
|
||||
"TT_MINUS", "TT_MUL", "TT_DIV", "TT_AT", "TT_EQUAL", "TT_LESSTHAN",
|
||||
"TT_GREATERTHAN", "TT_LPAREN", "TT_RPAREN", "TT_LBRACKET", "TT_RBRACKET", "TT_SEMICOLON",
|
||||
"TT_LOADEDSTRING", "TT_EOF", "TT_NULL", "TT_EQUALS", "TT_GTE", "TT_LTE",
|
||||
"TT_PERIOD", "TT_COMMA", "TT_PATH", "TT_EXCLAMATION", "TT_NEQUAL", "TT_LSQB",
|
||||
"TT_RSQB", "TT_TYPE", "TT_IMPORT", "TT_SYSCALL0", "TT_SYSCALL1", "TT_SYSCALL2",
|
||||
"TT_SYSCALL3", "TT_SYSCALL4", "TT_SYSCALL5", "TT_COMPILERMACRO"};
|
||||
|
||||
Token::Token(const TokenType& type) : tk_type(type), loc(0, 0, "")
|
||||
Token::Token(TokenType type) : type(type), location(0, 0, "")
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const TokenType& type, const Location& location) : tk_type(type), loc(location)
|
||||
Token::Token(TokenType type, const Location& location) : type(type), location(location)
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const TokenType& type, const std::string& val) : tk_type(type), loc(0, 0, ""), string_value(val)
|
||||
Token::Token(TokenType type, std::string value) : type(type), location(0, 0, ""), string_value(std::move(value))
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const TokenType& type, const int& val, const Location& location)
|
||||
: tk_type(type), int_value(val), loc(location)
|
||||
Token::Token(TokenType type, int value, const Location& location) : type(type), int_value(value), location(location)
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const TokenType& type, const std::string& val, const Location& location)
|
||||
: tk_type(type), string_value(val), loc(location)
|
||||
Token::Token(TokenType type, std::string value, const Location& location)
|
||||
: type(type), string_value(std::move(value)), location(location)
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const TokenType& type, const float& val, const Location& location)
|
||||
: tk_type(type), float_value(val), loc(location)
|
||||
Token::Token(TokenType type, float value, const Location& location) : type(type), float_value(value), location(location)
|
||||
{
|
||||
}
|
||||
|
||||
Token::~Token()
|
||||
Token Token::copy_with_new_type(const TokenType& type) const
|
||||
{
|
||||
}
|
||||
Token result(type, location);
|
||||
|
||||
Token Token::copy_with_new_type(const TokenType& type)
|
||||
{
|
||||
Token result(type, loc);
|
||||
result.m_line_text = m_line_text;
|
||||
|
||||
result.int_value = int_value;
|
||||
result.float_value = float_value;
|
||||
result.string_value = string_value;
|
||||
|
||||
result.line_text = line_text;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string Token::to_string() const
|
||||
{
|
||||
std::string details = loc.paren_str();
|
||||
if (tk_type == TT_Number)
|
||||
if (int_value.has_value())
|
||||
{
|
||||
return format_string("INT:%d %s", int_value, details);
|
||||
result.int_value = int_value.value();
|
||||
}
|
||||
else if (tk_type == TT_Float)
|
||||
else if (float_value.has_value())
|
||||
{
|
||||
return format_string("FLOAT:%f %s", float_value, details);
|
||||
result.float_value = float_value.value();
|
||||
}
|
||||
else if (tk_type == TT_Identifier)
|
||||
else if (string_value.has_value())
|
||||
{
|
||||
return format_string("ID:%s %s", string_value, details);
|
||||
result.string_value = string_value.value();
|
||||
}
|
||||
else if (tk_type == TT_Keyword)
|
||||
{
|
||||
return format_string("KEYWORD:%s %s", string_value, details);
|
||||
}
|
||||
else if (tk_type == TT_Type)
|
||||
{
|
||||
return format_string("TYPE:%s %s", string_value, details);
|
||||
}
|
||||
else if (tk_type == TT_String)
|
||||
{
|
||||
replace(const_cast<std::string&>(string_value), "\n", "\\n");
|
||||
return format_string("STRING:'%s' %s", string_value, details);
|
||||
}
|
||||
switch (tk_type)
|
||||
{
|
||||
case TT_EOF:
|
||||
return "EOF " + details;
|
||||
case TT_Plus:
|
||||
return "PLUS " + details;
|
||||
case TT_Minus:
|
||||
return "MINUS " + details;
|
||||
case TT_Mul:
|
||||
return "MUL " + details;
|
||||
case TT_Div:
|
||||
return "DIV " + details;
|
||||
case TT_At:
|
||||
return "AT " + details;
|
||||
case TT_Equal:
|
||||
return "EQUAL " + details;
|
||||
case TT_LessThan:
|
||||
return "LESSTHAN " + details;
|
||||
case TT_GreaterThan:
|
||||
return "GREATERTHAN " + details;
|
||||
case TT_LParen:
|
||||
return "LPAREN " + details;
|
||||
case TT_RParen:
|
||||
return "RPAREN " + details;
|
||||
case TT_LBracket:
|
||||
return "LBRACKET " + details;
|
||||
case TT_RBracket:
|
||||
return "RBRACKET " + details;
|
||||
case TT_Semicolon:
|
||||
return "SEMICOLON " + details;
|
||||
case TT_LoadedString:
|
||||
return "LDSTRING " + details;
|
||||
case TT_Equals:
|
||||
return "EQUALS " + details;
|
||||
case TT_GTE:
|
||||
return "GTE " + details;
|
||||
case TT_LTE:
|
||||
return "LTE " + details;
|
||||
case TT_Period:
|
||||
return "PERIOD " + details;
|
||||
case TT_Comma:
|
||||
return "COMMA " + details;
|
||||
case TT_Path:
|
||||
return "PATH " + details;
|
||||
case TT_Exclamation:
|
||||
return "EXCLAMATION " + details;
|
||||
case TT_NEqual:
|
||||
return "NEQUAL " + details;
|
||||
case TT_LSQB:
|
||||
return "LEFTSQB " + details;
|
||||
case TT_RSQB:
|
||||
return "RIGHTSQB " + details;
|
||||
case TT_Import:
|
||||
return "IMPORT " + details;
|
||||
case TT_Syscall0:
|
||||
return "SYSCALL0 " + details;
|
||||
case TT_Syscall1:
|
||||
return "SYSCALL1 " + details;
|
||||
case TT_Syscall2:
|
||||
return "SYSCALL2 " + details;
|
||||
case TT_Syscall3:
|
||||
return "SYSCALL3 " + details;
|
||||
case TT_Syscall4:
|
||||
return "SYSCALL4 " + details;
|
||||
case TT_Syscall5:
|
||||
return "SYSCALL5 " + details;
|
||||
case TT_CompilerMacro:
|
||||
return "COMPMACRO " + details;
|
||||
case TT_Let:
|
||||
return "LET " + details;
|
||||
case TT_In:
|
||||
return "IN " + details;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string Token::line() const
|
||||
{
|
||||
return this->line_text;
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
Token Token::make_with_line(const Token& origin, const std::string& line_text)
|
||||
{
|
||||
Token result(origin.tk_type, origin.loc);
|
||||
Token result = origin.copy_with_new_type(origin.type);
|
||||
|
||||
result.int_value = origin.int_value;
|
||||
result.float_value = origin.float_value;
|
||||
result.string_value = origin.string_value;
|
||||
|
||||
result.line_text = line_text;
|
||||
|
||||
return result;
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
void Token::operator=(const Token& other)
|
||||
{
|
||||
tk_type = other.tk_type;
|
||||
int_value = other.int_value;
|
||||
string_value = other.string_value;
|
||||
float_value = other.float_value;
|
||||
line_text = other.line_text;
|
||||
}
|
||||
|
||||
void Token::erase(Token& tk)
|
||||
{
|
||||
tk.tk_type = TT_Null;
|
||||
}
|
||||
|
||||
bool Token::match_token_types(const std::vector<Token>& a, const std::vector<Token>& b, int count)
|
||||
{
|
||||
int size = [](int a, int b) { return a > b ? b : a; }(a.size() - count, b.size());
|
||||
|
||||
for (int i = 0; i < size; ++i)
|
||||
{
|
||||
if (a[i + count].tk_type != b[i].tk_type)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
*this = other.copy_with_new_type(other.type);
|
||||
}
|
||||
|
56
src/Token.h
56
src/Token.h
@ -25,62 +25,56 @@ enum TokenType
|
||||
TT_Semicolon,
|
||||
TT_LoadedString,
|
||||
TT_EOF,
|
||||
TT_Null,
|
||||
TT_Equals,
|
||||
TT_GTE,
|
||||
TT_LTE,
|
||||
TT_Period,
|
||||
TT_Comma,
|
||||
TT_Path,
|
||||
TT_Exclamation,
|
||||
TT_NEqual,
|
||||
TT_LSQB,
|
||||
TT_RSQB,
|
||||
TT_Type,
|
||||
TT_Import,
|
||||
TT_Syscall0,
|
||||
TT_Syscall1,
|
||||
TT_Syscall2,
|
||||
TT_Syscall3,
|
||||
TT_Syscall4,
|
||||
TT_Syscall5,
|
||||
TT_CompilerMacro,
|
||||
TT_Let,
|
||||
TT_In
|
||||
TT_In,
|
||||
TT_Colon,
|
||||
};
|
||||
|
||||
extern const std::string token_strings[];
|
||||
|
||||
/* Struct to represent tokens generated by the Lexer. */
|
||||
struct Token
|
||||
{
|
||||
TokenType tk_type;
|
||||
TokenType type;
|
||||
|
||||
int int_value;
|
||||
std::string string_value;
|
||||
float float_value;
|
||||
std::optional<int> int_value;
|
||||
std::optional<std::string> string_value;
|
||||
std::optional<float> float_value;
|
||||
|
||||
Location loc;
|
||||
Location location;
|
||||
|
||||
Token(const TokenType& type);
|
||||
Token(TokenType type);
|
||||
|
||||
Token(const TokenType& type, const Location& location);
|
||||
Token(TokenType type, const Location& location);
|
||||
|
||||
Token(const TokenType& type, const int& val, const Location& location);
|
||||
Token(TokenType type, int value, const Location& location);
|
||||
|
||||
Token(const TokenType& type, const std::string& val, const Location& location);
|
||||
Token(TokenType type, std::string value, const Location& location);
|
||||
|
||||
Token(const TokenType& type, const std::string& val);
|
||||
Token(TokenType type, std::string value);
|
||||
|
||||
Token(const TokenType& type, const float& val, const Location& location);
|
||||
Token(TokenType type, float val, const Location& location);
|
||||
|
||||
~Token();
|
||||
|
||||
/* Return a string representation of the Token's contents. */
|
||||
std::string to_string() const;
|
||||
~Token() = default;
|
||||
|
||||
/* Return the contents of the line where the Token was located. */
|
||||
std::string line() const;
|
||||
std::string line() const
|
||||
{
|
||||
return m_line_text;
|
||||
}
|
||||
|
||||
/* Return a copy of the original token, but adding the contents of the line where
|
||||
the token was located. */
|
||||
@ -88,19 +82,13 @@ struct Token
|
||||
|
||||
void operator=(const Token& other);
|
||||
|
||||
/* Convert the Token into a blank token (does not delete it), so that the Normalizer can remove it afterwards.
|
||||
This is to not alter vectors while iterating over them. */
|
||||
static void erase(Token& tk);
|
||||
|
||||
/* Return a copy of this Token, but with its TokenType changed. */
|
||||
Token copy_with_new_type(const TokenType& type);
|
||||
|
||||
/* Iterate over two vectors of Tokens, starting from count for vector A, starting from 0 for vector B, checking if
|
||||
the current Tokens' types match. If at any point they don't, return false. Else, return true. */
|
||||
static bool match_token_types(const std::vector<Token>& a, const std::vector<Token>& b, int count);
|
||||
Token copy_with_new_type(const TokenType& type) const;
|
||||
|
||||
private:
|
||||
std::string line_text;
|
||||
// FIXME: this should be moved to Location, to remove all Token* that are only used to throw errors at a certain
|
||||
// location.
|
||||
std::string m_line_text;
|
||||
};
|
||||
|
||||
/* typedef to make it easier to see a what a std::vector of tokens is being used for. */
|
||||
|
@ -2,9 +2,7 @@
|
||||
#include "FileIO.h"
|
||||
#include "GlobalContext.h"
|
||||
#include "IRBuilder.h"
|
||||
#include "Importer.h"
|
||||
#include "Lexer.h"
|
||||
#include "Normalizer.h"
|
||||
#include "Parser.h"
|
||||
#include "sapphirepch.h"
|
||||
#include "utils.h"
|
||||
@ -24,16 +22,6 @@ int main(int argc, char** argv)
|
||||
result = lexer->lex(contents);
|
||||
}
|
||||
|
||||
Importer::init(FileIO::remove_file_extension(fname));
|
||||
{
|
||||
benchmark("Importing");
|
||||
result = Importer::evaluate(result);
|
||||
}
|
||||
{
|
||||
benchmark("Normalizing");
|
||||
result = Normalizer::normalize(result);
|
||||
}
|
||||
|
||||
initGlobalContext();
|
||||
|
||||
auto parser = Parser::new_parser(result);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
@ -4,6 +4,6 @@
|
||||
"flags": [],
|
||||
"exit-code": 1,
|
||||
"stdout": "",
|
||||
"stderr": "\u001b[1;1mtests/import-inexistent.sp:1:8: \u001b[31;49merror: \u001b[0;0mfile not found\n1 import penguin_boi;\n \u001b[31;49m^\u001b[0;0m\n"
|
||||
"stderr": "\u001b[1;1mtests/import-inexistent.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n"
|
||||
}
|
||||
}
|
@ -4,6 +4,6 @@
|
||||
"flags": [],
|
||||
"exit-code": 1,
|
||||
"stdout": "",
|
||||
"stderr": "\u001b[1;1mtests/simple.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let at the beginning of a function\n1 const { outln } from @'core/io';\n \u001b[31;49m^\u001b[0;0m\n"
|
||||
"stderr": "\u001b[1;1mtests/simple.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n"
|
||||
}
|
||||
}
|
@ -4,13 +4,8 @@
|
||||
"flags": [
|
||||
"--wimport"
|
||||
],
|
||||
"exit-code": 0,
|
||||
"stdout": "\u001b[1;1mtests/wimport.sp:1:8: \u001b[33;49mwarning: \u001b[0;0mfile already imported, skipping\n1 import tests/wimport;\n \u001b[33;49m^\u001b[0;0m\n",
|
||||
"stderr": ""
|
||||
},
|
||||
"run": {
|
||||
"exit-code": 0,
|
||||
"exit-code": 1,
|
||||
"stdout": "",
|
||||
"stderr": ""
|
||||
"stderr": "\u001b[1;1mtests/wimport.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n"
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user