From 99f56d3a6110c6df4abd2ee1abbeb84b3fdddbb1 Mon Sep 17 00:00:00 2001 From: apio Date: Fri, 26 Aug 2022 12:00:14 +0200 Subject: [PATCH] REFACTOR!! --- CMakeLists.txt | 4 - src/Error.cpp | 13 +-- src/IRBuilder.cpp | 4 +- src/IRBuilder.h | 4 +- src/Importer.cpp | 168 -------------------------- src/Importer.h | 12 -- src/Lexer.cpp | 220 +++++++++++++---------------------- src/Lexer.h | 11 +- src/Location.cpp | 25 ++-- src/Location.h | 10 +- src/Normalizer.cpp | 82 ------------- src/Normalizer.h | 14 --- src/Parser.cpp | 54 ++++----- src/Result.h | 6 +- src/Token.cpp | 184 ++++------------------------- src/Token.h | 56 ++++----- src/sapphire.cpp | 12 -- src/sapphirepch.h | 1 + tests/import-inexistent.json | 2 +- tests/simple.json | 2 +- tests/wimport.json | 9 +- 21 files changed, 186 insertions(+), 707 deletions(-) delete mode 100644 src/Importer.cpp delete mode 100644 src/Importer.h delete mode 100644 src/Normalizer.cpp delete mode 100644 src/Normalizer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index cefacd0..6013ef4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,12 +27,8 @@ add_executable( src/external/FormatString/FormatString.hpp src/FileIO.h src/FileIO.cpp - src/Importer.cpp - src/Importer.h src/Arguments.cpp src/Arguments.h - src/Normalizer.cpp - src/Normalizer.h src/AST/ASTNode.cpp src/AST/ASTNode.h src/AST/BinaryOpNode.cpp diff --git a/src/Error.cpp b/src/Error.cpp index 0aa6a34..3b8ec07 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -1,5 +1,4 @@ #include "Error.h" -#include "Importer.h" #include "utils.h" #include #include @@ -42,7 +41,7 @@ void Error::show_import_lines(const Location& loc, void (*import_line_printer)(c { show_import_lines(loc, show_import_line, std::cerr); - std::string linestr = to_string(loc.line); + std::string line = to_string(loc.line); std::cerr << "\033[1;1m"; std::cerr << loc.str(); @@ -56,12 +55,12 @@ void Error::show_import_lines(const Location& loc, void (*import_line_printer)(c std::cerr << details; std::cerr << std::endl; - std::cerr << linestr; + std::cerr << line; std::cerr << std::string(4, ' '); std::cerr << line_text; std::cerr << std::endl; - std::cerr << std::string(4 + linestr.size() + loc.column - 1, ' '); + std::cerr << std::string(4 + line.size() + loc.column - 1, ' '); std::cerr << "\033[31;49m"; std::cerr << "^"; @@ -101,7 +100,7 @@ void Error::throw_warning(const Location& loc, const std::string line_text, cons { show_import_lines(loc, show_import_line, std::cout); - std::string linestr = to_string(loc.line); + std::string line = to_string(loc.line); std::cout << "\033[1;1m"; std::cout << loc.str(); @@ -115,12 +114,12 @@ void Error::throw_warning(const Location& loc, const std::string line_text, cons std::cout << details; std::cout << std::endl; - std::cout << linestr; + std::cout << line; std::cout << std::string(4, ' '); std::cout << line_text; std::cout << std::endl; - std::cout << std::string(4 + linestr.size() + loc.column - 1, ' '); + std::cout << std::string(4 + line.size() + loc.column - 1, ' '); std::cout << "\033[33;49m"; std::cout << "^"; diff --git a/src/IRBuilder.cpp b/src/IRBuilder.cpp index 11a7394..083f390 100644 --- a/src/IRBuilder.cpp +++ b/src/IRBuilder.cpp @@ -31,7 +31,7 @@ void IRBuilder::create_program(std::shared_ptr program) program->walk([&](std::shared_ptr node) { node->codegen(this, module.get()); }); } -void IRBuilder::resolveToLLVMIR(std::string path) +void IRBuilder::resolveToLLVMIR(std::string_view path) { std::error_code EC; llvm::raw_fd_ostream dest(path, EC, llvm::sys::fs::OF_None); @@ -45,7 +45,7 @@ void IRBuilder::resolveToLLVMIR(std::string path) dest.flush(); } -void IRBuilder::resolveToObjectFile(std::string path) +void IRBuilder::resolveToObjectFile(std::string_view path) { // edit this section when adding support for more architectures LLVMInitializeX86TargetInfo(); diff --git a/src/IRBuilder.h b/src/IRBuilder.h index 254b58a..01fbfc8 100644 --- a/src/IRBuilder.h +++ b/src/IRBuilder.h @@ -19,7 +19,7 @@ class IRBuilder llvm::IRBuilder<>* getBuilder(); - void resolveToLLVMIR(std::string path); + void resolveToLLVMIR(std::string_view path); - void resolveToObjectFile(std::string path); + void resolveToObjectFile(std::string_view path); }; \ No newline at end of file diff --git a/src/Importer.cpp b/src/Importer.cpp deleted file mode 100644 index bb26551..0000000 --- a/src/Importer.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "Importer.h" -#include "Arguments.h" -#include "Error.h" -#include "FileIO.h" -#include "sapphirepch.h" -#include "utils.h" -#include -#define MAX_IMPORTS 100 - -int Importer::import_count = 0; - -std::vector> Importer::import_stack; -std::vector Importer::imported_files; - -TokenStream Importer::evaluate(const TokenStream& original) -{ - int i = 0; - auto ret_tk = original; - TokenStream new_tokens; - - while (original[i].tk_type != TT_EOF) - { - Token current_token = original[i]; - - if (current_token.tk_type == TT_Import) - { - Token next_token = original[i + 1]; - if (next_token.tk_type == TT_EOF) - Error::throw_error(current_token.loc, current_token.line(), - "did not expect EOF after import statement"); - - if (next_token.tk_type == TT_Identifier) // TODO: add support for strings - { - Token last_token = original[i + 2]; - - if (last_token.tk_type != TT_Semicolon) - Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon"); - - if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) != - imported_files.end()) - { - if (Arguments::wimport) - Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping"); - Token::erase(ret_tk[i]); - Token::erase(ret_tk[i + 1]); - Token::erase(ret_tk[i + 2]); - ++i; - continue; - } - - if (import_count > MAX_IMPORTS) - - Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded"); - - std::string input_file_name = next_token.string_value + ".sp"; - - std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards - if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found"); - input_file.close(); - - auto file_contents = FileIO::read_all(input_file_name); - - auto top_location = std::make_shared(current_token.loc.line, current_token.loc.column, - current_token.loc.fname); - top_location.get()->parent = current_token.loc.parent; - - import_stack.push_back(top_location); // Keep ref_count above 0, just in case - - auto import_lexer = Lexer::make_lexer(input_file_name); - - Lexer::assign_parent_location(import_lexer, top_location); - - TokenStream imported_tokens = import_lexer->lex(file_contents); - - imported_tokens.pop_back(); // remove EOF at end of token stream - - for (auto& tk : imported_tokens) - { - tk.loc.parent = top_location; - } - - imported_files.push_back(next_token.string_value); - - new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end()); - - Token::erase(ret_tk[i]); - Token::erase(ret_tk[i + 1]); - Token::erase(ret_tk[i + 2]); - } - else if (next_token.tk_type == TT_Path) - { - Token last_token = original[i + 2]; - - if (last_token.tk_type != TT_Semicolon) - Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon"); - - if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) != - imported_files.end()) - { - if (Arguments::wimport) - Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping"); - Token::erase(ret_tk[i]); - Token::erase(ret_tk[i + 1]); - Token::erase(ret_tk[i + 2]); - ++i; - continue; - } - - if (import_count > MAX_IMPORTS) - Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded"); - - std::string input_file_name = next_token.string_value + ".sp"; - - std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards - if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found"); - input_file.close(); - - auto file_contents = FileIO::read_all(input_file_name); - - auto top_location = std::make_shared(current_token.loc.line, current_token.loc.column, - current_token.loc.fname); - top_location.get()->parent = current_token.loc.parent; - - import_stack.push_back(top_location); // Keep ref_count above 0, just in case - - auto import_lexer = Lexer::make_lexer(input_file_name); - - Lexer::assign_parent_location(import_lexer, top_location); - - TokenStream imported_tokens = import_lexer->lex(file_contents); - - imported_tokens.pop_back(); // remove EOF at end of token stream - - for (auto& tk : imported_tokens) - { - tk.loc.parent = top_location; - } - - imported_files.push_back(next_token.string_value); - - new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end()); - - Token::erase(ret_tk[i]); - Token::erase(ret_tk[i + 1]); - Token::erase(ret_tk[i + 2]); - } - else - Error::throw_error(next_token.loc, next_token.line(), - "import keyword should be followed by an identifier"); - } - - ++i; - } - - if (new_tokens.size() != 0) - { - new_tokens.insert(new_tokens.end(), ret_tk.begin(), ret_tk.end()); - import_count++; - return evaluate(new_tokens); - } - - return ret_tk; -} - -void Importer::init(std::string init_file) -{ - imported_files.push_back(init_file); -} diff --git a/src/Importer.h b/src/Importer.h deleted file mode 100644 index 6b54b4c..0000000 --- a/src/Importer.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once -#include "Lexer.h" -#include "Token.h" - -namespace Importer -{ -extern int import_count; -extern std::vector> import_stack; -extern std::vector imported_files; -TokenStream evaluate(const TokenStream& original); -void init(std::string init_file); -} // namespace Importer diff --git a/src/Lexer.cpp b/src/Lexer.cpp index f7e7aad..e32dd09 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -7,10 +7,7 @@ #define IDENTIFIERS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWYZ_0123456789" #define DIGITS "0123456789" -const std::array Lexer::types = {"void", "bool", "str", "i8", "i16", "i32", "i64", - "u8", "u16", "u32", "u64", "f32", "f64", "f128"}; - -Lexer::Lexer(const std::string& fname) : loc(1, 0, fname), index(-1), prev_loc(1, 0, fname) +Lexer::Lexer(const std::string& fname) : location(1, 0, fname), index(-1), previous_location(1, 0, fname) { } @@ -20,12 +17,12 @@ Lexer::~Lexer() int Lexer::advance() { - prev_loc = loc; + previous_location = location; ++index; - loc.advance(); + location.advance(); if (index >= current_lexed_text.size()) return 0; current_char = current_lexed_text[index]; - loc.pos_from_char(current_char); + location.pos_from_char(current_char); if (current_char == '\n') { previous_line_text = current_line_text; @@ -36,7 +33,7 @@ int Lexer::advance() int Lexer::rewind() { - loc = prev_loc; + location = previous_location; --index; if (index == -1) return 0; if (current_char == '\n') @@ -67,7 +64,7 @@ std::unique_ptr Lexer::make_lexer(const std::string& fname) void Lexer::assign_parent_location(std::unique_ptr& lexer, const std::shared_ptr& loc) { - lexer->loc.parent = loc; + lexer->location.parent = loc; } bool Lexer::is_in_string(const std::string& string, const char& character) @@ -117,81 +114,82 @@ TokenStream Lexer::lex(const std::string& text) break; } } - result.push_back(Token::make_with_line({TT_Div, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Div, location}, current_line_text)); break; case '+': - result.push_back(Token::make_with_line({TT_Plus, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Plus, location}, current_line_text)); break; case '-': - result.push_back(Token::make_with_line({TT_Minus, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Minus, location}, current_line_text)); break; case '*': - result.push_back(Token::make_with_line({TT_Mul, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Mul, location}, current_line_text)); break; case '@': - result.push_back(Token::make_with_line({TT_At, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_At, location}, current_line_text)); break; case '=': - result.push_back(Token::make_with_line({TT_Equal, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Equal, location}, current_line_text)); break; case '>': - result.push_back(Token::make_with_line({TT_GreaterThan, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_GreaterThan, location}, current_line_text)); break; case '<': - result.push_back(Token::make_with_line({TT_LessThan, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_LessThan, location}, current_line_text)); break; case '(': - result.push_back(Token::make_with_line({TT_LParen, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_LParen, location}, current_line_text)); break; case ')': - result.push_back(Token::make_with_line({TT_RParen, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_RParen, location}, current_line_text)); break; case '{': - result.push_back(Token::make_with_line({TT_LBracket, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_LBracket, location}, current_line_text)); break; case '}': - result.push_back(Token::make_with_line({TT_RBracket, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_RBracket, location}, current_line_text)); break; case ';': - result.push_back(Token::make_with_line({TT_Semicolon, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Semicolon, location}, current_line_text)); break; case '.': - result.push_back(Token::make_with_line({TT_Period, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Period, location}, current_line_text)); break; case ',': - result.push_back(Token::make_with_line({TT_Comma, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Comma, location}, current_line_text)); break; case '!': - result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text)); break; case '[': - result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text)); break; case ']': - result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text)); + result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text)); + break; + case ':': + result.push_back(Token::make_with_line({TT_Colon, location}, current_line_text)); break; case '\377': - result.push_back(Token(TT_EOF, loc)); - return result; + result.push_back(Token(TT_EOF, location)); + return std::move(result); default: - Error::throw_error(loc, current_line_text, "unknown character"); + Error::throw_error(location, current_line_text, "unknown character"); } } - result.push_back(Token(TT_EOF, loc)); + result.push_back(Token(TT_EOF, location)); - return result; + return std::move(result); } Token Lexer::create_identifier() { std::vector characters; - int prev_line = loc.line; - int prev_column = loc.column; - bool is_path = false; - bool last_was_path = false; - Location saved_loc = this->loc; - Location saved_prev_loc = this->prev_loc; + int prev_line = location.line; + int prev_column = location.column; + Location saved_loc = this->location; + Location saved_prev_loc = this->previous_location; characters.push_back(current_char); @@ -200,103 +198,48 @@ Token Lexer::create_identifier() if (is_in_string(IDENTIFIERS, current_char)) { characters.push_back(current_char); - last_was_path = false; - } - else if (current_char == '/') - { - if (last_was_path) - { - characters.pop_back(); - this->loc = saved_loc; - this->prev_loc = saved_prev_loc; - this->rewind(); - std::string identifier(characters.begin(), characters.end()); - return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, - current_line_text); - } - - saved_loc = this->loc; - saved_prev_loc = this->prev_loc; - - characters.push_back(current_char); - is_path = true; - last_was_path = true; } else { this->rewind(); - std::string identifier(characters.begin(), characters.end()); - if (is_path) - return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, - current_line_text); - auto location = std::find(types.begin(), types.end(), identifier); - if (location != types.end()) - { - return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}}, - current_line_text); - } - if (identifier == "import") - return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "syscall0") - return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "syscall1") - return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "syscall2") - return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "syscall3") - return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "syscall4") - return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "syscall5") - return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "compmacro") - return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}}, - current_line_text); - if (identifier == "let") - return Token::make_with_line({TT_Let, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "in") - return Token::make_with_line({TT_In, {prev_line, prev_column, loc.fname}}, current_line_text); - return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}}, - current_line_text); + goto end; } } - +end: std::string identifier(characters.begin(), characters.end()); - if (is_path) - return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, current_line_text); - auto location = std::find(types.begin(), types.end(), identifier); - if (location != types.end()) - { - return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}}, current_line_text); - } - if (identifier == "import") - return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text); if (identifier == "syscall0") - return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_Syscall0, {prev_line, prev_column, location.filename}}, current_line_text)); if (identifier == "syscall1") - return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_Syscall1, {prev_line, prev_column, location.filename}}, current_line_text)); if (identifier == "syscall2") - return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_Syscall2, {prev_line, prev_column, location.filename}}, current_line_text)); if (identifier == "syscall3") - return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_Syscall3, {prev_line, prev_column, location.filename}}, current_line_text)); if (identifier == "syscall4") - return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_Syscall4, {prev_line, prev_column, location.filename}}, current_line_text)); if (identifier == "syscall5") - return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text); - if (identifier == "compmacro") - return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_Syscall5, {prev_line, prev_column, location.filename}}, current_line_text)); if (identifier == "let") - return Token::make_with_line({TT_Let, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_Let, {prev_line, prev_column, location.filename}}, current_line_text)); if (identifier == "in") - return Token::make_with_line({TT_In, {prev_line, prev_column, loc.fname}}, current_line_text); - return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}}, current_line_text); + return std::move( + Token::make_with_line({TT_In, {prev_line, prev_column, location.filename}}, current_line_text)); + return std::move(Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, location.filename}}, + current_line_text)); } Token Lexer::create_number() { std::vector characters; - int prev_line = loc.line; - int prev_column = loc.column; + int prev_line = location.line; + int prev_column = location.column; int dot_count = 0; characters.push_back(current_char); @@ -316,60 +259,53 @@ Token Lexer::create_number() } else { - Error::throw_warning(loc, current_line_text, "floats can only have one dot"); + Error::throw_warning(location, current_line_text, "floats can only have one dot"); this->rewind(); - float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, - current_line_text); + goto end; } } else { this->rewind(); - if (dot_count != 0) - { - float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, - current_line_text); - } - int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text); + goto end; } } - +end: if (dot_count != 0) { - float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text); + float value = std::stof(std::string(characters.begin(), characters.end()).c_str()); + return std::move( + Token::make_with_line({TT_Float, value, {prev_line, prev_column, location.filename}}, current_line_text)); } - int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text); + int value = std::atoi(std::string(characters.begin(), characters.end()).c_str()); + return std::move( + Token::make_with_line({TT_Number, value, {prev_line, prev_column, location.filename}}, current_line_text)); } Token Lexer::create_string() { std::vector characters; - int prev_line = loc.line; - int prev_column = loc.column; + int prev_line = location.line; + int prev_column = location.column; while (this->advance()) { if (current_char == '\n') { this->rewind(); - Error::throw_error(loc, current_line_text, "expected end of string but got newline"); + Error::throw_error(location, current_line_text, "expected end of string but got newline"); } if (current_char == '\'') { std::string identifier(characters.begin(), characters.end()); - return Token::make_with_line({TT_String, identifier, {prev_line, prev_column, loc.fname}}, - current_line_text); + return std::move(Token::make_with_line({TT_String, identifier, {prev_line, prev_column, location.filename}}, + current_line_text)); } if (current_char == '\\') { if (index + 1 == current_lexed_text.size()) { - Error::throw_error(loc, current_line_text, "unfinished escape sequence"); + Error::throw_error(location, current_line_text, "unfinished escape sequence"); } switch (current_lexed_text[index + 1]) { @@ -383,17 +319,17 @@ Token Lexer::create_string() characters.push_back('\\'); break; default: - Error::throw_error(loc, current_line_text, "unknown escape sequence"); + Error::throw_error(location, current_line_text, "unknown escape sequence"); } ++index; - ++loc.column; + ++location.column; continue; } characters.push_back(current_char); } this->rewind(); - Error::throw_error(loc, current_line_text, "expected end of string but got EOF"); + Error::throw_error(location, current_line_text, "expected end of string but got EOF"); - return Token(TT_Null, loc); // unreachable since Error::throw_error calls exit() + return *(Token*)nullptr; // unreachable } diff --git a/src/Lexer.h b/src/Lexer.h index 0994ddd..32848ff 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -13,15 +13,15 @@ typedef std::vector TokenStream; class Lexer { private: - Location loc; - Location prev_loc; + Location location; + Location previous_location; int advance(); int rewind(); char current_char; int index; - Lexer(const std::string& fname); + Lexer(const std::string& filename); std::string current_line_text; std::string previous_line_text; @@ -37,16 +37,13 @@ class Lexer bool is_in_string(const std::string& string, const char& character); public: - /* An array containing Sapphire's current data types. */ - static const std::array types; - ~Lexer(); /* Lex the given text, turning it into a stream of tokens. */ TokenStream lex(const std::string& text); /* Create a new Lexer and return a pointer to it. */ - static std::unique_ptr make_lexer(const std::string& fname); + static std::unique_ptr make_lexer(const std::string& filename); /* If the Lexer is lexing an impòrted file, give it the location in the parent file at which it was imported. */ static void assign_parent_location(std::unique_ptr& lexer, const std::shared_ptr& loc); diff --git a/src/Location.cpp b/src/Location.cpp index 13971e0..dfac9af 100644 --- a/src/Location.cpp +++ b/src/Location.cpp @@ -2,7 +2,8 @@ #include "FormatString/FormatString.hpp" #include -Location::Location(int ln, int col, std::string file) : line(ln), column(col), fname(file) +Location::Location(int line, int column, std::string filename) + : line(line), column(column), filename(std::move(filename)) { } @@ -12,12 +13,7 @@ Location::~Location() std::string Location::str() const { - return format_string("%s:%d:%d", fname, line, column); -} - -std::string Location::paren_str() const -{ - return format_string("(%s:%d:%d)", fname, line, column); + return format_string("%s:%d:%d", filename, line, column); } void Location::advance() @@ -36,13 +32,16 @@ void Location::pos_from_char(const char& character) void Location::operator=(const Location& other) { - this->parent = other.parent; - this->line = other.line; - this->column = other.column; - this->fname = other.fname; + Location copied = copy(other); + line = copied.line; + column = copied.column; + parent = copied.parent; + filename = std::move(copied.filename); } -void Location::copy(const Location& other) +Location Location::copy(const Location& other) { - this->operator=(other); + Location result(other.line, other.column, other.filename); + result.parent = other.parent; + return std::move(result); } diff --git a/src/Location.h b/src/Location.h index 7ba4589..f60223a 100644 --- a/src/Location.h +++ b/src/Location.h @@ -6,20 +6,18 @@ struct Location { int line; int column; - std::string fname; + std::string filename; /* The location at which this location was imported, for error traces in imported files. */ std::shared_ptr parent = nullptr; /* Creates a Location with the given parameters. */ - Location(int ln, int col, std::string file); + Location(int line, int column, std::string filename); ~Location(); /* Returns a string of the format FILE:LINE:COL. */ std::string str() const; - /* Returns a string of the format (FILE:LINE:COL). */ - std::string paren_str() const; /* Advance to the next column in the file. */ void advance(); @@ -29,6 +27,6 @@ struct Location void operator=(const Location& other); - /* Copies the other location into this one. */ - void copy(const Location& other); + /* Returns a copy of the original Location. */ + static Location copy(const Location& other); }; diff --git a/src/Normalizer.cpp b/src/Normalizer.cpp deleted file mode 100644 index f048457..0000000 --- a/src/Normalizer.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "Normalizer.h" -#include "utils.h" - -TokenStream Normalizer::normalize(const TokenStream& input) -{ - TokenStream result; - int i = 0; - while (i < input.size()) - { - Token current = input[i]; - if (current.tk_type == TT_Null) - { - i++; - continue; - } - if (current.tk_type == TT_Equal) - { - if (i + 1 != input.size()) - { - if (input[i + 1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_Equals)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - if (current.tk_type == TT_Exclamation) - { - if (i + 1 != input.size()) - { - if (input[i + 1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_NEqual)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - if (current.tk_type == TT_GreaterThan) - { - if (i + 1 != input.size()) - { - if (input[i + 1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_GTE)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - if (current.tk_type == TT_LessThan) - { - if (i + 1 != input.size()) - { - if (input[i + 1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_LTE)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - i++; - result.push_back(current); - continue; - } - - return result; -} \ No newline at end of file diff --git a/src/Normalizer.h b/src/Normalizer.h deleted file mode 100644 index 1846ea6..0000000 --- a/src/Normalizer.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include "Lexer.h" // for TokenStream -#include "Token.h" - -/* Namespace to normalize a TokenStream. */ -namespace Normalizer -{ -/* Some tokens are difficult for the Lexer to parse right, or maybe I'm just lazy. -Anyways, this function transforms > and = tokens next to each other into a single >=, which has a different meaning, -etc... For example: = + = : ==, < + = : <=... - -It also takes blank tokens and removes them. */ -TokenStream normalize(const TokenStream& input); -} // namespace Normalizer diff --git a/src/Parser.cpp b/src/Parser.cpp index e8c6acf..9e2ca94 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -25,7 +25,7 @@ std::shared_ptr Parser::parse() auto result = toplevel(); if (result.is_error()) result.ethrow(); final_result->append(result.get()); - if (current_token->tk_type == TT_EOF) break; + if (current_token->type == TT_EOF) break; } return final_result; } @@ -44,16 +44,18 @@ Result Parser::factor() { Token& token = *current_token; - if (token.tk_type == TT_Number) + if (token.type == TT_Number) { advance(); - return Ok(new IntegerNode(token.int_value), &token); + assert(token.int_value.has_value()); + return Ok(new IntegerNode(token.int_value.value()), &token); } - if (token.tk_type == TT_Float) + if (token.type == TT_Float) { advance(); - return Ok(new FloatNode(token.float_value), &token); + assert(token.float_value.has_value()); + return Ok(new FloatNode(token.float_value.value()), &token); } return Err("expected a number", &token); @@ -63,13 +65,13 @@ Result Parser::term() { Result left = factor(); if (left.is_error()) return left; - while (current_token->tk_type == TT_Mul || current_token->tk_type == TT_Div) + while (current_token->type == TT_Mul || current_token->type == TT_Div) { Token& op = *current_token; advance(); Result right = factor(); if (right.is_error()) return right; - left = Ok(new MulNode(left.get(), right.get(), op.tk_type == TT_Mul ? '*' : '/'), &op); + left = Ok(new MulNode(left.get(), right.get(), op.type == TT_Mul ? '*' : '/'), &op); } return left; } @@ -78,13 +80,13 @@ Result Parser::expr() { Result left = term(); if (left.is_error()) return left; - while (current_token->tk_type == TT_Plus || current_token->tk_type == TT_Minus) + while (current_token->type == TT_Plus || current_token->type == TT_Minus) { Token& op = *current_token; advance(); Result right = term(); if (right.is_error()) return right; - left = Ok(new SumNode(left.get(), right.get(), op.tk_type == TT_Plus ? '+' : '-'), &op); + left = Ok(new SumNode(left.get(), right.get(), op.type == TT_Plus ? '+' : '-'), &op); } return left; } @@ -98,35 +100,35 @@ Result Parser::toplevel() Result Parser::function() { FunctionPrototype proto; - Token* ftoken = current_token; + Token* start_token = current_token; proto.returnType = llvm::IntegerType::getInt32Ty(*globalContext); // FIXME: allow specifying return type proto.arguments = {}; // FIXME: allow specifying arguments - if (current_token->tk_type != TT_Let) - return Err("Expected let at the beginning of a function", current_token); + if (current_token->type != TT_Let) return Err("Expected let", current_token); advance(); - if (current_token->tk_type != TT_At) - return Err("Expected @ at the beginning of a function", current_token); + if (current_token->type != TT_At) return Err("Expected @", current_token); advance(); - if (current_token->tk_type != TT_Identifier) return Err("Expected an identifier", current_token); + if (current_token->type != TT_Identifier) return Err("Expected an identifier", current_token); else - proto.name = current_token->string_value; - advance(); - if (current_token->tk_type != TT_In && current_token->tk_type != TT_Semicolon) - return Err("Expected 'in' or semicolon", current_token); - if (current_token->tk_type == TT_Semicolon) { - advance(); - return Ok(new EmptyFunctionNode(proto), ftoken); + assert(current_token->string_value.has_value()); + proto.name = current_token->string_value.value(); } advance(); - if (current_token->tk_type != TT_LBracket) + if (current_token->type != TT_In && current_token->type != TT_Semicolon) + return Err("Expected 'in' or semicolon", current_token); + if (current_token->type == TT_Semicolon) + { + advance(); + return Ok(new EmptyFunctionNode(proto), start_token); + } + advance(); + if (current_token->type != TT_LBracket) return Err("Invalid syntax", current_token); // FIXME: Do not be lazy and return a meaningful error message. advance(); Result body = expr(); if (body.is_error()) return Err(body.error(), body.token()); - if (current_token->tk_type != TT_RBracket) - return Err(format_string("Invalid syntax %d", current_token->tk_type), current_token); + if (current_token->type != TT_RBracket) return Err("Invalid syntax", current_token); advance(); - return Ok(new FunctionNode(proto, body.get()), ftoken); + return Ok(new FunctionNode(proto, body.get()), start_token); } diff --git a/src/Result.h b/src/Result.h index 28fc184..8717f7f 100644 --- a/src/Result.h +++ b/src/Result.h @@ -14,7 +14,7 @@ template class Result } void ethrow() { - Error::throw_error(m_token->loc, m_token->line(), m_error); + Error::throw_error(m_token->location, m_token->line(), m_error); } Token* token() { @@ -38,7 +38,7 @@ template class Result Result(T* result, Token* token) : m_result(result), m_token(token), m_is_error(false) { } - Result(const std::string& error, Token* token) : m_error(std::move(error)), m_token(token), m_is_error(true) + Result(std::string&& error, Token* token) : m_error(error), m_token(token), m_is_error(true) { } }; @@ -54,7 +54,7 @@ template class Ok final : public Result template class Err final : public Result { public: - Err(const std::string& error, Token* token) : Result(std::move(error), token) + Err(std::string error, Token* token) : Result(std::move(error), token) { } }; \ No newline at end of file diff --git a/src/Token.cpp b/src/Token.cpp index 780f2b2..e817d92 100644 --- a/src/Token.cpp +++ b/src/Token.cpp @@ -1,205 +1,61 @@ #include "Token.h" #include "utils.h" -const std::string token_strings[] = { - "TT_IDENTIFIER", "TT_NUMBER", "TT_FLOAT", "TT_KEYWORD", "TT_STRING", "TT_PLUS", - "TT_MINUS", "TT_MUL", "TT_DIV", "TT_AT", "TT_EQUAL", "TT_LESSTHAN", - "TT_GREATERTHAN", "TT_LPAREN", "TT_RPAREN", "TT_LBRACKET", "TT_RBRACKET", "TT_SEMICOLON", - "TT_LOADEDSTRING", "TT_EOF", "TT_NULL", "TT_EQUALS", "TT_GTE", "TT_LTE", - "TT_PERIOD", "TT_COMMA", "TT_PATH", "TT_EXCLAMATION", "TT_NEQUAL", "TT_LSQB", - "TT_RSQB", "TT_TYPE", "TT_IMPORT", "TT_SYSCALL0", "TT_SYSCALL1", "TT_SYSCALL2", - "TT_SYSCALL3", "TT_SYSCALL4", "TT_SYSCALL5", "TT_COMPILERMACRO"}; - -Token::Token(const TokenType& type) : tk_type(type), loc(0, 0, "") +Token::Token(TokenType type) : type(type), location(0, 0, "") { } -Token::Token(const TokenType& type, const Location& location) : tk_type(type), loc(location) +Token::Token(TokenType type, const Location& location) : type(type), location(location) { } -Token::Token(const TokenType& type, const std::string& val) : tk_type(type), loc(0, 0, ""), string_value(val) +Token::Token(TokenType type, std::string value) : type(type), location(0, 0, ""), string_value(std::move(value)) { } -Token::Token(const TokenType& type, const int& val, const Location& location) - : tk_type(type), int_value(val), loc(location) +Token::Token(TokenType type, int value, const Location& location) : type(type), int_value(value), location(location) { } -Token::Token(const TokenType& type, const std::string& val, const Location& location) - : tk_type(type), string_value(val), loc(location) +Token::Token(TokenType type, std::string value, const Location& location) + : type(type), string_value(std::move(value)), location(location) { } -Token::Token(const TokenType& type, const float& val, const Location& location) - : tk_type(type), float_value(val), loc(location) +Token::Token(TokenType type, float value, const Location& location) : type(type), float_value(value), location(location) { } -Token::~Token() +Token Token::copy_with_new_type(const TokenType& type) const { -} + Token result(type, location); -Token Token::copy_with_new_type(const TokenType& type) -{ - Token result(type, loc); + result.m_line_text = m_line_text; - result.int_value = int_value; - result.float_value = float_value; - result.string_value = string_value; - - result.line_text = line_text; - - return result; -} - -std::string Token::to_string() const -{ - std::string details = loc.paren_str(); - if (tk_type == TT_Number) + if (int_value.has_value()) { - return format_string("INT:%d %s", int_value, details); + result.int_value = int_value.value(); } - else if (tk_type == TT_Float) + else if (float_value.has_value()) { - return format_string("FLOAT:%f %s", float_value, details); + result.float_value = float_value.value(); } - else if (tk_type == TT_Identifier) + else if (string_value.has_value()) { - return format_string("ID:%s %s", string_value, details); + result.string_value = string_value.value(); } - else if (tk_type == TT_Keyword) - { - return format_string("KEYWORD:%s %s", string_value, details); - } - else if (tk_type == TT_Type) - { - return format_string("TYPE:%s %s", string_value, details); - } - else if (tk_type == TT_String) - { - replace(const_cast(string_value), "\n", "\\n"); - return format_string("STRING:'%s' %s", string_value, details); - } - switch (tk_type) - { - case TT_EOF: - return "EOF " + details; - case TT_Plus: - return "PLUS " + details; - case TT_Minus: - return "MINUS " + details; - case TT_Mul: - return "MUL " + details; - case TT_Div: - return "DIV " + details; - case TT_At: - return "AT " + details; - case TT_Equal: - return "EQUAL " + details; - case TT_LessThan: - return "LESSTHAN " + details; - case TT_GreaterThan: - return "GREATERTHAN " + details; - case TT_LParen: - return "LPAREN " + details; - case TT_RParen: - return "RPAREN " + details; - case TT_LBracket: - return "LBRACKET " + details; - case TT_RBracket: - return "RBRACKET " + details; - case TT_Semicolon: - return "SEMICOLON " + details; - case TT_LoadedString: - return "LDSTRING " + details; - case TT_Equals: - return "EQUALS " + details; - case TT_GTE: - return "GTE " + details; - case TT_LTE: - return "LTE " + details; - case TT_Period: - return "PERIOD " + details; - case TT_Comma: - return "COMMA " + details; - case TT_Path: - return "PATH " + details; - case TT_Exclamation: - return "EXCLAMATION " + details; - case TT_NEqual: - return "NEQUAL " + details; - case TT_LSQB: - return "LEFTSQB " + details; - case TT_RSQB: - return "RIGHTSQB " + details; - case TT_Import: - return "IMPORT " + details; - case TT_Syscall0: - return "SYSCALL0 " + details; - case TT_Syscall1: - return "SYSCALL1 " + details; - case TT_Syscall2: - return "SYSCALL2 " + details; - case TT_Syscall3: - return "SYSCALL3 " + details; - case TT_Syscall4: - return "SYSCALL4 " + details; - case TT_Syscall5: - return "SYSCALL5 " + details; - case TT_CompilerMacro: - return "COMPMACRO " + details; - case TT_Let: - return "LET " + details; - case TT_In: - return "IN " + details; - } - return ""; -} -std::string Token::line() const -{ - return this->line_text; + return std::move(result); } Token Token::make_with_line(const Token& origin, const std::string& line_text) { - Token result(origin.tk_type, origin.loc); + Token result = origin.copy_with_new_type(origin.type); - result.int_value = origin.int_value; - result.float_value = origin.float_value; - result.string_value = origin.string_value; - - result.line_text = line_text; - - return result; + return std::move(result); } void Token::operator=(const Token& other) { - tk_type = other.tk_type; - int_value = other.int_value; - string_value = other.string_value; - float_value = other.float_value; - line_text = other.line_text; -} - -void Token::erase(Token& tk) -{ - tk.tk_type = TT_Null; -} - -bool Token::match_token_types(const std::vector& a, const std::vector& b, int count) -{ - int size = [](int a, int b) { return a > b ? b : a; }(a.size() - count, b.size()); - - for (int i = 0; i < size; ++i) - { - if (a[i + count].tk_type != b[i].tk_type) - { - return false; - } - } - return true; + *this = other.copy_with_new_type(other.type); } diff --git a/src/Token.h b/src/Token.h index 253379f..12c4e43 100644 --- a/src/Token.h +++ b/src/Token.h @@ -25,62 +25,56 @@ enum TokenType TT_Semicolon, TT_LoadedString, TT_EOF, - TT_Null, TT_Equals, TT_GTE, TT_LTE, TT_Period, TT_Comma, - TT_Path, TT_Exclamation, TT_NEqual, TT_LSQB, TT_RSQB, - TT_Type, - TT_Import, TT_Syscall0, TT_Syscall1, TT_Syscall2, TT_Syscall3, TT_Syscall4, TT_Syscall5, - TT_CompilerMacro, TT_Let, - TT_In + TT_In, + TT_Colon, }; -extern const std::string token_strings[]; - /* Struct to represent tokens generated by the Lexer. */ struct Token { - TokenType tk_type; + TokenType type; - int int_value; - std::string string_value; - float float_value; + std::optional int_value; + std::optional string_value; + std::optional float_value; - Location loc; + Location location; - Token(const TokenType& type); + Token(TokenType type); - Token(const TokenType& type, const Location& location); + Token(TokenType type, const Location& location); - Token(const TokenType& type, const int& val, const Location& location); + Token(TokenType type, int value, const Location& location); - Token(const TokenType& type, const std::string& val, const Location& location); + Token(TokenType type, std::string value, const Location& location); - Token(const TokenType& type, const std::string& val); + Token(TokenType type, std::string value); - Token(const TokenType& type, const float& val, const Location& location); + Token(TokenType type, float val, const Location& location); - ~Token(); - - /* Return a string representation of the Token's contents. */ - std::string to_string() const; + ~Token() = default; /* Return the contents of the line where the Token was located. */ - std::string line() const; + std::string line() const + { + return m_line_text; + } /* Return a copy of the original token, but adding the contents of the line where the token was located. */ @@ -88,19 +82,13 @@ struct Token void operator=(const Token& other); - /* Convert the Token into a blank token (does not delete it), so that the Normalizer can remove it afterwards. - This is to not alter vectors while iterating over them. */ - static void erase(Token& tk); - /* Return a copy of this Token, but with its TokenType changed. */ - Token copy_with_new_type(const TokenType& type); - - /* Iterate over two vectors of Tokens, starting from count for vector A, starting from 0 for vector B, checking if - the current Tokens' types match. If at any point they don't, return false. Else, return true. */ - static bool match_token_types(const std::vector& a, const std::vector& b, int count); + Token copy_with_new_type(const TokenType& type) const; private: - std::string line_text; + // FIXME: this should be moved to Location, to remove all Token* that are only used to throw errors at a certain + // location. + std::string m_line_text; }; /* typedef to make it easier to see a what a std::vector of tokens is being used for. */ diff --git a/src/sapphire.cpp b/src/sapphire.cpp index 3e47faa..659353f 100644 --- a/src/sapphire.cpp +++ b/src/sapphire.cpp @@ -2,9 +2,7 @@ #include "FileIO.h" #include "GlobalContext.h" #include "IRBuilder.h" -#include "Importer.h" #include "Lexer.h" -#include "Normalizer.h" #include "Parser.h" #include "sapphirepch.h" #include "utils.h" @@ -24,16 +22,6 @@ int main(int argc, char** argv) result = lexer->lex(contents); } - Importer::init(FileIO::remove_file_extension(fname)); - { - benchmark("Importing"); - result = Importer::evaluate(result); - } - { - benchmark("Normalizing"); - result = Normalizer::normalize(result); - } - initGlobalContext(); auto parser = Parser::new_parser(result); diff --git a/src/sapphirepch.h b/src/sapphirepch.h index a6304e5..2bcf759 100644 --- a/src/sapphirepch.h +++ b/src/sapphirepch.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include \ No newline at end of file diff --git a/tests/import-inexistent.json b/tests/import-inexistent.json index aed893e..fe10082 100644 --- a/tests/import-inexistent.json +++ b/tests/import-inexistent.json @@ -4,6 +4,6 @@ "flags": [], "exit-code": 1, "stdout": "", - "stderr": "\u001b[1;1mtests/import-inexistent.sp:1:8: \u001b[31;49merror: \u001b[0;0mfile not found\n1 import penguin_boi;\n \u001b[31;49m^\u001b[0;0m\n" + "stderr": "\u001b[1;1mtests/import-inexistent.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n" } } \ No newline at end of file diff --git a/tests/simple.json b/tests/simple.json index 4c73651..e0b13f7 100644 --- a/tests/simple.json +++ b/tests/simple.json @@ -4,6 +4,6 @@ "flags": [], "exit-code": 1, "stdout": "", - "stderr": "\u001b[1;1mtests/simple.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let at the beginning of a function\n1 const { outln } from @'core/io';\n \u001b[31;49m^\u001b[0;0m\n" + "stderr": "\u001b[1;1mtests/simple.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n" } } \ No newline at end of file diff --git a/tests/wimport.json b/tests/wimport.json index 355d9c2..e038d90 100644 --- a/tests/wimport.json +++ b/tests/wimport.json @@ -4,13 +4,8 @@ "flags": [ "--wimport" ], - "exit-code": 0, - "stdout": "\u001b[1;1mtests/wimport.sp:1:8: \u001b[33;49mwarning: \u001b[0;0mfile already imported, skipping\n1 import tests/wimport;\n \u001b[33;49m^\u001b[0;0m\n", - "stderr": "" - }, - "run": { - "exit-code": 0, + "exit-code": 1, "stdout": "", - "stderr": "" + "stderr": "\u001b[1;1mtests/wimport.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n" } } \ No newline at end of file