diff --git a/.clang-format b/.clang-format index 43a67ab..01cf865 100644 --- a/.clang-format +++ b/.clang-format @@ -10,6 +10,6 @@ SpaceAfterTemplateKeyword: 'false' SpacesInCStyleCastParentheses: 'false' SpacesInSquareBrackets: 'false' TabWidth: '4' -UseTab: ForIndentation +UseTab: Never ... diff --git a/src/AST/ASTNode.h b/src/AST/ASTNode.h index b891d1b..6e65600 100644 --- a/src/AST/ASTNode.h +++ b/src/AST/ASTNode.h @@ -3,7 +3,7 @@ class ASTNode { -public: + public: ASTNode(); ~ASTNode(); }; diff --git a/src/AST/BinaryOpNode.cpp b/src/AST/BinaryOpNode.cpp index bf5d62f..c410af1 100644 --- a/src/AST/BinaryOpNode.cpp +++ b/src/AST/BinaryOpNode.cpp @@ -1,6 +1,6 @@ #include "BinaryOpNode.h" -BinaryOpNode::BinaryOpNode(std::shared_ptr left,std::shared_ptr right) +BinaryOpNode::BinaryOpNode(std::shared_ptr left, std::shared_ptr right) : left(left), right(right), ExprNode() { } diff --git a/src/AST/BinaryOpNode.h b/src/AST/BinaryOpNode.h index a476dd9..30e0542 100644 --- a/src/AST/BinaryOpNode.h +++ b/src/AST/BinaryOpNode.h @@ -3,10 +3,11 @@ class BinaryOpNode : public ExprNode { -protected: + protected: std::shared_ptr left; std::shared_ptr right; -public: - BinaryOpNode(std::shared_ptr left,std::shared_ptr right); + + public: + BinaryOpNode(std::shared_ptr left, std::shared_ptr right); ~BinaryOpNode(); }; \ No newline at end of file diff --git a/src/AST/ExprNode.cpp b/src/AST/ExprNode.cpp index 27533d2..281c985 100644 --- a/src/AST/ExprNode.cpp +++ b/src/AST/ExprNode.cpp @@ -1,16 +1,13 @@ #include "ExprNode.h" -ExprNode::ExprNode(std::shared_ptr child): child(child), ASTNode() +ExprNode::ExprNode(std::shared_ptr child) : child(child), ASTNode() { - } -ExprNode::ExprNode(): ASTNode() +ExprNode::ExprNode() : ASTNode() { - } -ExprNode::~ExprNode() +ExprNode::~ExprNode() { - } \ No newline at end of file diff --git a/src/AST/ExprNode.h b/src/AST/ExprNode.h index 7695727..1d60ce7 100644 --- a/src/AST/ExprNode.h +++ b/src/AST/ExprNode.h @@ -4,7 +4,8 @@ class ExprNode : public ASTNode { std::shared_ptr child; -public: + + public: ExprNode(std::shared_ptr child); ExprNode(); ~ExprNode(); diff --git a/src/AST/MulNode.cpp b/src/AST/MulNode.cpp index 73025f9..a53d423 100644 --- a/src/AST/MulNode.cpp +++ b/src/AST/MulNode.cpp @@ -1,7 +1,7 @@ #include "MulNode.h" -MulNode::MulNode(std::shared_ptr left,std::shared_ptr right, char op) - : BinaryOpNode(left,right), op(op) +MulNode::MulNode(std::shared_ptr left, std::shared_ptr right, char op) + : BinaryOpNode(left, right), op(op) { } diff --git a/src/AST/MulNode.h b/src/AST/MulNode.h index a195304..822222c 100644 --- a/src/AST/MulNode.h +++ b/src/AST/MulNode.h @@ -1,10 +1,11 @@ #pragma once #include "BinaryOpNode.h" -class MulNode final : public BinaryOpNode +class MulNode final : public BinaryOpNode { char op; -public: - MulNode(std::shared_ptr left,std::shared_ptr right, char op); + + public: + MulNode(std::shared_ptr left, std::shared_ptr right, char op); ~MulNode(); }; \ No newline at end of file diff --git a/src/AST/NumberNode.cpp b/src/AST/NumberNode.cpp index e30f840..0e025c5 100644 --- a/src/AST/NumberNode.cpp +++ b/src/AST/NumberNode.cpp @@ -1,2 +1 @@ #include "NumberNode.h" - diff --git a/src/AST/NumberNode.h b/src/AST/NumberNode.h index b7f9509..9b420e4 100644 --- a/src/AST/NumberNode.h +++ b/src/AST/NumberNode.h @@ -2,9 +2,9 @@ #include "ExprNode.h" #include -class NumberNode : public ExprNode +class NumberNode : public ExprNode { -public: + public: NumberNode(); ~NumberNode(); }; \ No newline at end of file diff --git a/src/AST/StatementNode.cpp b/src/AST/StatementNode.cpp index 0f059b9..3bfa61c 100644 --- a/src/AST/StatementNode.cpp +++ b/src/AST/StatementNode.cpp @@ -1,11 +1,9 @@ #include "StatementNode.h" -StatementNode::StatementNode(std::shared_ptr child): child(child), ASTNode() +StatementNode::StatementNode(std::shared_ptr child) : child(child), ASTNode() { - } -StatementNode::~StatementNode() +StatementNode::~StatementNode() { - } \ No newline at end of file diff --git a/src/AST/StatementNode.h b/src/AST/StatementNode.h index e28763d..f887290 100644 --- a/src/AST/StatementNode.h +++ b/src/AST/StatementNode.h @@ -4,7 +4,8 @@ class StatementNode : public ASTNode { std::shared_ptr child; -public: + + public: StatementNode(std::shared_ptr child); ~StatementNode(); }; \ No newline at end of file diff --git a/src/AST/SumNode.cpp b/src/AST/SumNode.cpp index ef10b62..0ef3a05 100644 --- a/src/AST/SumNode.cpp +++ b/src/AST/SumNode.cpp @@ -1,7 +1,7 @@ #include "SumNode.h" -SumNode::SumNode(std::shared_ptr left,std::shared_ptr right, char op) - : BinaryOpNode(left,right), op(op) +SumNode::SumNode(std::shared_ptr left, std::shared_ptr right, char op) + : BinaryOpNode(left, right), op(op) { } diff --git a/src/AST/SumNode.h b/src/AST/SumNode.h index ccc3844..3bd8fe1 100644 --- a/src/AST/SumNode.h +++ b/src/AST/SumNode.h @@ -1,10 +1,11 @@ #pragma once #include "BinaryOpNode.h" -class SumNode final : public BinaryOpNode +class SumNode final : public BinaryOpNode { char op; -public: - SumNode(std::shared_ptr left,std::shared_ptr right, char op); + + public: + SumNode(std::shared_ptr left, std::shared_ptr right, char op); ~SumNode(); }; \ No newline at end of file diff --git a/src/Arguments.cpp b/src/Arguments.cpp index cbbab35..2a3644a 100644 --- a/src/Arguments.cpp +++ b/src/Arguments.cpp @@ -9,19 +9,21 @@ bool Arguments::wimport; llvm::Triple Arguments::TargetTriple; std::string Arguments::cpu; -void Arguments::parse(int argc, char **argv) +void Arguments::parse(int argc, char** argv) { - try { - TCLAP::CmdLine command_line("The Sapphire compiler.",' ',"0.1"); + try + { + TCLAP::CmdLine command_line("The Sapphire compiler.", ' ', "0.1"); - TCLAP::UnlabeledValueArg input_fname_arg("file","Input file.",true,"test.sp","string"); + TCLAP::UnlabeledValueArg input_fname_arg("file", "Input file.", true, "test.sp", "string"); - TCLAP::ValueArg output_fname_arg("o","output","Output file.",false,"sp-output","string"); - TCLAP::ValueArg march_arg("","march","Architecture to compile for.",false,"native","string"); - TCLAP::ValueArg mcpu_arg("","mcpu","CPU to compile for.",false,"generic","string"); - TCLAP::ValueArg msystem_arg("","msystem","Operating System to compile for.",false,"native","string"); + TCLAP::ValueArg output_fname_arg("o", "output", "Output file.", false, "sp-output", "string"); + TCLAP::ValueArg march_arg("", "march", "Architecture to compile for.", false, "native", "string"); + TCLAP::ValueArg mcpu_arg("", "mcpu", "CPU to compile for.", false, "generic", "string"); + TCLAP::ValueArg msystem_arg("", "msystem", "Operating System to compile for.", false, "native", + "string"); - TCLAP::SwitchArg wimport_arg("","wimport","Show a warning when trying to import an already imported file."); + TCLAP::SwitchArg wimport_arg("", "wimport", "Show a warning when trying to import an already imported file."); command_line.add(wimport_arg); @@ -32,7 +34,7 @@ void Arguments::parse(int argc, char **argv) command_line.add(mcpu_arg); command_line.add(msystem_arg); - command_line.parse(argc,argv); + command_line.parse(argc, argv); input_fname = input_fname_arg.getValue(); output_fname = output_fname_arg.getValue(); @@ -40,9 +42,10 @@ void Arguments::parse(int argc, char **argv) cpu = mcpu_arg.getValue(); - setTriple(march_arg.getValue(),msystem_arg.getValue()); - - } catch (TCLAP::ArgException &e) { + setTriple(march_arg.getValue(), msystem_arg.getValue()); + } + catch (TCLAP::ArgException& e) + { Error::throw_error_without_location(e.error()); } } @@ -50,18 +53,18 @@ void Arguments::parse(int argc, char **argv) void Arguments::setTriple(const std::string& arch, const std::string& system) { std::string triple = llvm::sys::getDefaultTargetTriple(); - llvm::Triple tgTriple(triple); + llvm::Triple targetTriple(triple); - if(arch != "native") + if (arch != "native") { - tgTriple.setArchName(arch); + targetTriple.setArchName(arch); } - if(system != "native") + if (system != "native") { - tgTriple.setOSAndEnvironmentName(system); + targetTriple.setOSAndEnvironmentName(system); } - tgTriple.setVendor(llvm::Triple::VendorType::UnknownVendor); // let's leave it like that + targetTriple.setVendor(llvm::Triple::VendorType::UnknownVendor); // let's leave it like that - TargetTriple = tgTriple; + TargetTriple = targetTriple; } diff --git a/src/Arguments.h b/src/Arguments.h index 28bdf43..9d362bd 100644 --- a/src/Arguments.h +++ b/src/Arguments.h @@ -1,6 +1,6 @@ #pragma once -#include #include +#include struct Arguments { @@ -14,6 +14,7 @@ struct Arguments static std::string cpu; static llvm::Triple TargetTriple; -private: + + private: static void setTriple(const std::string& arch, const std::string& system); }; diff --git a/src/Error.cpp b/src/Error.cpp index 9f57414..23e1f48 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -1,13 +1,16 @@ #include "Error.h" -#include "StringConversion.h" #include "Importer.h" -#include +#include "StringConversion.h" #include +#include std::string Error::get_spaces(const int& num) { std::string output; - for(int i = 0; i < num; i++) { output += " "; } + for (int i = 0; i < num; i++) + { + output += " "; + } return output; } @@ -25,28 +28,29 @@ void Error::show_import_line(const Location& loc, std::ostream& output_stream) output_stream << std::endl; } -void Error::show_import_lines(const Location& loc, void(*import_line_printer)(const Location&, std::ostream&), std::ostream& stream) +void Error::show_import_lines(const Location& loc, void (*import_line_printer)(const Location&, std::ostream&), + std::ostream& stream) { std::vector locations; Location scanned_loc = loc; - while(scanned_loc.parent) + while (scanned_loc.parent) { locations.push_back(*scanned_loc.parent.get()); scanned_loc = *scanned_loc.parent.get(); } - std::reverse(locations.begin(),locations.end()); + std::reverse(locations.begin(), locations.end()); - for(const auto& location : locations) + for (const auto& location : locations) { - import_line_printer(location,stream); + import_line_printer(location, stream); } } [[noreturn]] void Error::throw_error(const Location& loc, const std::string line_text, const std::string& details) { - show_import_lines(loc,show_import_line,std::cerr); + show_import_lines(loc, show_import_line, std::cerr); std::string linestr = int_to_string(loc.line); @@ -68,7 +72,7 @@ void Error::show_import_lines(const Location& loc, void(*import_line_printer)(co std::cerr << std::endl; std::cerr << get_spaces(4 + linestr.size() + loc.column - 1); - + std::cerr << "\033[31;49m"; std::cerr << "^"; std::cerr << "\033[0;0m"; @@ -89,12 +93,11 @@ void Error::show_import_lines(const Location& loc, void(*import_line_printer)(co std::cerr << std::endl; exit(1); - } void Error::throw_warning(const Location& loc, const std::string line_text, const std::string& details) { - show_import_lines(loc,show_import_line,std::cout); + show_import_lines(loc, show_import_line, std::cout); std::string linestr = int_to_string(loc.line); @@ -116,7 +119,7 @@ void Error::throw_warning(const Location& loc, const std::string line_text, cons std::cout << std::endl; std::cout << get_spaces(4 + linestr.size() + loc.column - 1); - + std::cout << "\033[33;49m"; std::cout << "^"; std::cout << "\033[0;0m"; diff --git a/src/Error.h b/src/Error.h index 5339312..79e334d 100644 --- a/src/Error.h +++ b/src/Error.h @@ -3,15 +3,16 @@ namespace Error { - void show_import_line(const Location& loc, std::ostream& output_stream); +void show_import_line(const Location& loc, std::ostream& output_stream); - [[noreturn]] void throw_error(const Location& loc, const std::string line_text, const std::string& details); +[[noreturn]] void throw_error(const Location& loc, const std::string line_text, const std::string& details); - [[noreturn]] void throw_error_without_location(const std::string& details); +[[noreturn]] void throw_error_without_location(const std::string& details); - void throw_warning(const Location& loc, const std::string line_text, const std::string& details); +void throw_warning(const Location& loc, const std::string line_text, const std::string& details); - void show_import_lines(const Location& loc, void(*import_line_printer)(const Location&, std::ostream&), std::ostream& stream); +void show_import_lines(const Location& loc, void (*import_line_printer)(const Location&, std::ostream&), + std::ostream& stream); - std::string get_spaces(const int& num); -} +std::string get_spaces(const int& num); +} // namespace Error diff --git a/src/FileIO.cpp b/src/FileIO.cpp index 610e38c..a8c844e 100644 --- a/src/FileIO.cpp +++ b/src/FileIO.cpp @@ -1,34 +1,36 @@ #include "FileIO.h" #include "Error.h" +#include +#include +#include #include #include -#include -#include -#include std::string FileIO::read_all(const std::string& filename) { - if(std::filesystem::is_directory(std::filesystem::status(filename))) Error::throw_error_without_location("unable to open file "+ filename + ": Is a directory"); + if (std::filesystem::is_directory(std::filesystem::status(filename))) + Error::throw_error_without_location("unable to open file " + filename + ": Is a directory"); std::ifstream file; file.exceptions(std::ios::badbit | std::ios::failbit); try { file.open(filename); } - catch(const std::exception& e) + catch (const std::exception& e) { - Error::throw_error_without_location("unable to open file "+ filename + ": " + strerror(errno)); + Error::throw_error_without_location("unable to open file " + filename + ": " + strerror(errno)); return ""; } file.exceptions(std::ios::goodbit); std::vector file_chars; char fchar; - while (file.good()) { + while (file.good()) + { fchar = file.get(); - if(fchar != -1 ) file_chars.push_back(fchar); + if (fchar != -1) file_chars.push_back(fchar); } file.close(); - return std::string(file_chars.begin(),file_chars.end()); + return std::string(file_chars.begin(), file_chars.end()); } void FileIO::write_all(const std::string& filename, const std::string& contents) @@ -37,3 +39,8 @@ void FileIO::write_all(const std::string& filename, const std::string& contents) file << contents; file.close(); } + +std::string FileIO::remove_file_extension(const std::string& filename) +{ + return filename.substr(0, filename.find_last_of('.')); +} \ No newline at end of file diff --git a/src/FileIO.h b/src/FileIO.h index 8fcb77c..18c29e0 100644 --- a/src/FileIO.h +++ b/src/FileIO.h @@ -4,8 +4,10 @@ /* Namespace for simple file operations. */ namespace FileIO { - /* Helper function to read all of a file's contents. */ - std::string read_all(const std::string& filename); - /* Helper function to write a string to a file. */ - void write_all(const std::string& filename, const std::string& contents); -} +/* Helper function to read all of a file's contents. */ +std::string read_all(const std::string& filename); +/* Helper function to write a string to a file. */ +void write_all(const std::string& filename, const std::string& contents); +/* Return a filename without its extension. */ +std::string remove_file_extension(const std::string& filename); +} // namespace FileIO diff --git a/src/Importer.cpp b/src/Importer.cpp index 410a880..1f6cc7f 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -1,7 +1,7 @@ #include "Importer.h" +#include "Arguments.h" #include "Error.h" #include "FileIO.h" -#include "Arguments.h" #include #include #include @@ -18,139 +18,142 @@ TokenStream Importer::evaluate(const TokenStream& original) auto ret_tk = original; TokenStream new_tokens; - while(original[i].tk_type != TT_EOF) + while (original[i].tk_type != TT_EOF) { Token current_token = original[i]; - if(current_token.tk_type == TT_Import) + if (current_token.tk_type == TT_Import) { - Token next_token = original[i+1]; - if(next_token.tk_type == TT_EOF) - Error::throw_error(current_token.loc,current_token.line(),"did not expect EOF after import statement"); + Token next_token = original[i + 1]; + if (next_token.tk_type == TT_EOF) + Error::throw_error(current_token.loc, current_token.line(), + "did not expect EOF after import statement"); - if(next_token.tk_type == TT_Identifier) // TODO: add support for strings + if (next_token.tk_type == TT_Identifier) // TODO: add support for strings { - Token last_token = original[i+2]; + Token last_token = original[i + 2]; - if(last_token.tk_type != TT_Semicolon) - Error::throw_error(last_token.loc,last_token.line(),"expected a semicolon"); + if (last_token.tk_type != TT_Semicolon) + Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon"); - if(std::find(imported_files.begin(),imported_files.end(),next_token.string_value) != imported_files.end()) + if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) != + imported_files.end()) { - if(Arguments::wimport) - Error::throw_warning(next_token.loc,next_token.line(),"file already imported, skipping"); + if (Arguments::wimport) + Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping"); Token::erase(ret_tk[i]); - Token::erase(ret_tk[i+1]); - Token::erase(ret_tk[i+2]); + Token::erase(ret_tk[i + 1]); + Token::erase(ret_tk[i + 2]); ++i; continue; } - if(import_count > MAX_IMPORTS) - Error::throw_error(current_token.loc,current_token.line(),"maximum import depth exceeded"); + if (import_count > MAX_IMPORTS) + Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded"); std::string input_file_name = next_token.string_value + ".sp"; std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards - if(!input_file.good()) - Error::throw_error(next_token.loc,next_token.line(),"file not found"); + if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found"); input_file.close(); auto file_contents = FileIO::read_all(input_file_name); - auto top_location = std::make_shared(current_token.loc.line,current_token.loc.column,current_token.loc.fname); + auto top_location = std::make_shared(current_token.loc.line, current_token.loc.column, + current_token.loc.fname); top_location.get()->parent = current_token.loc.parent; import_stack.push_back(top_location); // Keep ref_count above 0, just in case auto import_lexer = Lexer::make_lexer(input_file_name); - Lexer::assign_parent_location(import_lexer,top_location); + Lexer::assign_parent_location(import_lexer, top_location); TokenStream imported_tokens = import_lexer->lex(file_contents); imported_tokens.pop_back(); // remove EOF at end of token stream - for(auto& tk : imported_tokens) + for (auto& tk : imported_tokens) { tk.loc.parent = top_location; } imported_files.push_back(next_token.string_value); - new_tokens.insert(new_tokens.end(),imported_tokens.begin(),imported_tokens.end()); + new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end()); Token::erase(ret_tk[i]); - Token::erase(ret_tk[i+1]); - Token::erase(ret_tk[i+2]); - } else if(next_token.tk_type == TT_Path) + Token::erase(ret_tk[i + 1]); + Token::erase(ret_tk[i + 2]); + } + else if (next_token.tk_type == TT_Path) { - Token last_token = original[i+2]; + Token last_token = original[i + 2]; - if(last_token.tk_type != TT_Semicolon) - Error::throw_error(last_token.loc,last_token.line(),"expected a semicolon"); + if (last_token.tk_type != TT_Semicolon) + Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon"); - if(std::find(imported_files.begin(),imported_files.end(),next_token.string_value) != imported_files.end()) + if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) != + imported_files.end()) { - if(Arguments::wimport) - Error::throw_warning(next_token.loc,next_token.line(),"file already imported, skipping"); + if (Arguments::wimport) + Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping"); Token::erase(ret_tk[i]); - Token::erase(ret_tk[i+1]); - Token::erase(ret_tk[i+2]); + Token::erase(ret_tk[i + 1]); + Token::erase(ret_tk[i + 2]); ++i; continue; } - if(import_count > MAX_IMPORTS) - Error::throw_error(current_token.loc,current_token.line(),"maximum import depth exceeded"); + if (import_count > MAX_IMPORTS) + Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded"); std::string input_file_name = next_token.string_value + ".sp"; std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards - if(!input_file.good()) - Error::throw_error(next_token.loc,next_token.line(),"file not found"); + if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found"); input_file.close(); auto file_contents = FileIO::read_all(input_file_name); - auto top_location = std::make_shared(current_token.loc.line,current_token.loc.column,current_token.loc.fname); + auto top_location = std::make_shared(current_token.loc.line, current_token.loc.column, + current_token.loc.fname); top_location.get()->parent = current_token.loc.parent; import_stack.push_back(top_location); // Keep ref_count above 0, just in case auto import_lexer = Lexer::make_lexer(input_file_name); - Lexer::assign_parent_location(import_lexer,top_location); + Lexer::assign_parent_location(import_lexer, top_location); TokenStream imported_tokens = import_lexer->lex(file_contents); imported_tokens.pop_back(); // remove EOF at end of token stream - for(auto& tk : imported_tokens) + for (auto& tk : imported_tokens) { tk.loc.parent = top_location; } imported_files.push_back(next_token.string_value); - new_tokens.insert(new_tokens.end(),imported_tokens.begin(),imported_tokens.end()); + new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end()); Token::erase(ret_tk[i]); - Token::erase(ret_tk[i+1]); - Token::erase(ret_tk[i+2]); - } else - Error::throw_error(next_token.loc,next_token.line(),"import keyword should be followed by an identifier"); - + Token::erase(ret_tk[i + 1]); + Token::erase(ret_tk[i + 2]); + } + else + Error::throw_error(next_token.loc, next_token.line(), + "import keyword should be followed by an identifier"); } ++i; - - } - if(new_tokens.size() != 0) + if (new_tokens.size() != 0) { - new_tokens.insert(new_tokens.end(),ret_tk.begin(),ret_tk.end()); + new_tokens.insert(new_tokens.end(), ret_tk.begin(), ret_tk.end()); import_count++; return evaluate(new_tokens); } diff --git a/src/Importer.h b/src/Importer.h index 8e7f3d1..6b54b4c 100644 --- a/src/Importer.h +++ b/src/Importer.h @@ -1,12 +1,12 @@ #pragma once -#include "Token.h" #include "Lexer.h" +#include "Token.h" namespace Importer { - extern int import_count; - extern std::vector> import_stack; - extern std::vector imported_files; - TokenStream evaluate(const TokenStream& original); - void init(std::string init_file); -} +extern int import_count; +extern std::vector> import_stack; +extern std::vector imported_files; +TokenStream evaluate(const TokenStream& original); +void init(std::string init_file); +} // namespace Importer diff --git a/src/Lexer.cpp b/src/Lexer.cpp index d1f30e3..c662f7a 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -7,10 +7,10 @@ #define IDENTIFIERS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWYZ_0123456789" #define DIGITS "0123456789" -const std::array Lexer::types = {"void","bool","str","i8","i16","i32","i64","u8","u16","u32","u64","f32","f64","f128"}; +const std::array Lexer::types = {"void", "bool", "str", "i8", "i16", "i32", "i64", + "u8", "u16", "u32", "u64", "f32", "f64", "f128"}; -Lexer::Lexer(const std::string& fname) - : loc(1,0,fname), index(-1), prev_loc(1,0,fname) +Lexer::Lexer(const std::string& fname) : loc(1, 0, fname), index(-1), prev_loc(1, 0, fname) { } @@ -20,340 +20,369 @@ Lexer::~Lexer() int Lexer::advance() { - prev_loc = loc; - ++index; - loc.advance(); - if(index >= current_lexed_text.size()) return 0; - current_char = current_lexed_text[index]; - loc.pos_from_char(current_char); - if(current_char == '\n') - { - previous_line_text = current_line_text; - current_line_text = this->recalculate_current_line(current_lexed_text); - } - return 1; + prev_loc = loc; + ++index; + loc.advance(); + if (index >= current_lexed_text.size()) return 0; + current_char = current_lexed_text[index]; + loc.pos_from_char(current_char); + if (current_char == '\n') + { + previous_line_text = current_line_text; + current_line_text = this->recalculate_current_line(current_lexed_text); + } + return 1; } int Lexer::rewind() { - loc = prev_loc; - --index; - if(index == -1) return 0; - if(current_char == '\n') - { - current_line_text = previous_line_text; - } - current_char = current_lexed_text[index]; - return 1; + loc = prev_loc; + --index; + if (index == -1) return 0; + if (current_char == '\n') + { + current_line_text = previous_line_text; + } + current_char = current_lexed_text[index]; + return 1; } std::string Lexer::recalculate_current_line(const std::string& text) { - int idx = index; - std::string final_str; - ++idx; - while(idx != text.size() && text[idx] != '\n') - { - final_str += text[idx]; - ++idx; - } - return final_str; + int idx = index; + std::string final_str; + ++idx; + while (idx != text.size() && text[idx] != '\n') + { + final_str += text[idx]; + ++idx; + } + return final_str; } std::shared_ptr Lexer::make_lexer(const std::string& fname) { - return std::shared_ptr(new Lexer(fname)); // not using make_shared because the constructor is private + return std::shared_ptr(new Lexer(fname)); // not using make_shared because the constructor is private } void Lexer::assign_parent_location(std::shared_ptr& lexer, const std::shared_ptr& loc) { - lexer->loc.parent = loc; + lexer->loc.parent = loc; } bool Lexer::is_in_string(const std::string& string, const char& character) { - return string.find(character) != std::string::npos; + return string.find(character) != std::string::npos; } TokenStream Lexer::lex(const std::string& text) { - TokenStream result; - bool comment = false; - current_lexed_text = text; - current_line_text = this->recalculate_current_line(current_lexed_text); + TokenStream result; + bool comment = false; + current_lexed_text = text; + current_line_text = this->recalculate_current_line(current_lexed_text); - while(this->advance()) - { - if(this->current_char == '\n') comment = false; + while (this->advance()) + { + if (this->current_char == '\n') comment = false; - if(comment) continue; + if (comment) continue; - if(is_in_string(WHITESPACE,current_char)) continue; + if (is_in_string(WHITESPACE, current_char)) continue; - else if(is_in_string(LETTERS,current_char)) - { - result.push_back(create_identifier()); - } + else if (is_in_string(LETTERS, current_char)) + { + result.push_back(create_identifier()); + } - else if(is_in_string(DIGITS,current_char)) - { - result.push_back(create_number()); - } + else if (is_in_string(DIGITS, current_char)) + { + result.push_back(create_number()); + } - else if(current_char == '\'') - { - result.push_back(create_string()); - } + else if (current_char == '\'') + { + result.push_back(create_string()); + } - else switch(current_char) - { - case '/': - if(index + 1 != current_lexed_text.size()) - { - if(current_lexed_text[index+1] == '/') - { - comment = true; - break; - } - } - result.push_back(Token::make_with_line({TT_Div,loc},current_line_text)); - break; - case '+': - result.push_back(Token::make_with_line({TT_Plus,loc},current_line_text)); - break; - case '-': - result.push_back(Token::make_with_line({TT_Minus,loc},current_line_text)); - break; - case '*': - result.push_back(Token::make_with_line({TT_Mul,loc},current_line_text)); - break; - case '@': - result.push_back(Token::make_with_line({TT_At,loc},current_line_text)); - break; - case '=': - result.push_back(Token::make_with_line({TT_Equal,loc},current_line_text)); - break; - case '>': - result.push_back(Token::make_with_line({TT_GreaterThan,loc},current_line_text)); - break; - case '<': - result.push_back(Token::make_with_line({TT_LessThan,loc},current_line_text)); - break; - case '(': - result.push_back(Token::make_with_line({TT_LParen,loc},current_line_text)); - break; - case ')': - result.push_back(Token::make_with_line({TT_RParen,loc},current_line_text)); - break; - case '{': - result.push_back(Token::make_with_line({TT_RBracket,loc},current_line_text)); - break; - case '}': - result.push_back(Token::make_with_line({TT_LBracket,loc},current_line_text)); - break; - case ';': - result.push_back(Token::make_with_line({TT_Semicolon,loc},current_line_text)); - break; - case '.': - result.push_back(Token::make_with_line({TT_Period,loc},current_line_text)); - break; - case ',': - result.push_back(Token::make_with_line({TT_Comma,loc},current_line_text)); - break; - case '!': - result.push_back(Token::make_with_line({TT_Exclamation,loc},current_line_text)); - break; - case '[': - result.push_back(Token::make_with_line({TT_Exclamation,loc},current_line_text)); - break; - case ']': - result.push_back(Token::make_with_line({TT_Exclamation,loc},current_line_text)); - break; - default: - Error::throw_error(loc,current_line_text,"unknown character"); - } - } + else + switch (current_char) + { + case '/': + if (index + 1 != current_lexed_text.size()) + { + if (current_lexed_text[index + 1] == '/') + { + comment = true; + break; + } + } + result.push_back(Token::make_with_line({TT_Div, loc}, current_line_text)); + break; + case '+': + result.push_back(Token::make_with_line({TT_Plus, loc}, current_line_text)); + break; + case '-': + result.push_back(Token::make_with_line({TT_Minus, loc}, current_line_text)); + break; + case '*': + result.push_back(Token::make_with_line({TT_Mul, loc}, current_line_text)); + break; + case '@': + result.push_back(Token::make_with_line({TT_At, loc}, current_line_text)); + break; + case '=': + result.push_back(Token::make_with_line({TT_Equal, loc}, current_line_text)); + break; + case '>': + result.push_back(Token::make_with_line({TT_GreaterThan, loc}, current_line_text)); + break; + case '<': + result.push_back(Token::make_with_line({TT_LessThan, loc}, current_line_text)); + break; + case '(': + result.push_back(Token::make_with_line({TT_LParen, loc}, current_line_text)); + break; + case ')': + result.push_back(Token::make_with_line({TT_RParen, loc}, current_line_text)); + break; + case '{': + result.push_back(Token::make_with_line({TT_RBracket, loc}, current_line_text)); + break; + case '}': + result.push_back(Token::make_with_line({TT_LBracket, loc}, current_line_text)); + break; + case ';': + result.push_back(Token::make_with_line({TT_Semicolon, loc}, current_line_text)); + break; + case '.': + result.push_back(Token::make_with_line({TT_Period, loc}, current_line_text)); + break; + case ',': + result.push_back(Token::make_with_line({TT_Comma, loc}, current_line_text)); + break; + case '!': + result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text)); + break; + case '[': + result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text)); + break; + case ']': + result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text)); + break; + default: + Error::throw_error(loc, current_line_text, "unknown character"); + } + } - result.push_back(Token(TT_EOF,loc)); + result.push_back(Token(TT_EOF, loc)); - return result; + return result; } Token Lexer::create_identifier() { - std::vector characters; - int prev_line = loc.line; - int prev_column = loc.column; - bool is_path = false; - bool last_was_path = false; - Location saved_loc = this->loc; - Location saved_prev_loc = this->prev_loc; + std::vector characters; + int prev_line = loc.line; + int prev_column = loc.column; + bool is_path = false; + bool last_was_path = false; + Location saved_loc = this->loc; + Location saved_prev_loc = this->prev_loc; - characters.push_back(current_char); + characters.push_back(current_char); - while(this->advance()) - { - if(is_in_string(IDENTIFIERS,current_char)) - { - characters.push_back(current_char); - last_was_path = false; - } - else if(current_char == '/') - { - if(last_was_path) { - characters.pop_back(); - this->loc = saved_loc; - this->prev_loc = saved_prev_loc; - this->rewind(); - std::string identifier(characters.begin(), characters.end()); - return Token::make_with_line({TT_Path,identifier,{prev_line,prev_column,loc.fname}},current_line_text); - } + while (this->advance()) + { + if (is_in_string(IDENTIFIERS, current_char)) + { + characters.push_back(current_char); + last_was_path = false; + } + else if (current_char == '/') + { + if (last_was_path) + { + characters.pop_back(); + this->loc = saved_loc; + this->prev_loc = saved_prev_loc; + this->rewind(); + std::string identifier(characters.begin(), characters.end()); + return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, + current_line_text); + } - saved_loc = this->loc; - saved_prev_loc = this->prev_loc; - - characters.push_back(current_char); - is_path = true; - last_was_path = true; - } - else - { - this->rewind(); - std::string identifier(characters.begin(), characters.end()); - if(is_path) return Token::make_with_line({TT_Path,identifier,{prev_line,prev_column,loc.fname}},current_line_text); - auto location = std::find(types.begin(),types.end(),identifier); - if(location != types.end()) - { - return Token::make_with_line({TT_Type,identifier,{prev_line,prev_column,loc.fname}},current_line_text); - } - if (identifier == "import") return Token::make_with_line({TT_Import,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall0") return Token::make_with_line({TT_Syscall0,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall1") return Token::make_with_line({TT_Syscall1,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall2") return Token::make_with_line({TT_Syscall2,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall3") return Token::make_with_line({TT_Syscall3,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall4") return Token::make_with_line({TT_Syscall4,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall5") return Token::make_with_line({TT_Syscall5,{prev_line,prev_column,loc.fname}},current_line_text); - if( identifier == "compmacro" ) return Token::make_with_line({TT_CompilerMacro,{prev_line,prev_column,loc.fname}},current_line_text); - return Token::make_with_line({TT_Identifier,identifier,{prev_line,prev_column,loc.fname}},current_line_text); - } - } + saved_loc = this->loc; + saved_prev_loc = this->prev_loc; - std::string identifier(characters.begin(), characters.end()); - if(is_path) return Token::make_with_line({TT_Path,identifier,{prev_line,prev_column,loc.fname}},current_line_text); - auto location = std::find(types.begin(),types.end(),identifier); - if(location != types.end()) - { - return Token::make_with_line({TT_Type,identifier,{prev_line,prev_column,loc.fname}},current_line_text); - } - if (identifier == "import") return Token::make_with_line({TT_Import,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall0") return Token::make_with_line({TT_Syscall0,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall1") return Token::make_with_line({TT_Syscall1,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall2") return Token::make_with_line({TT_Syscall2,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall3") return Token::make_with_line({TT_Syscall3,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall4") return Token::make_with_line({TT_Syscall4,{prev_line,prev_column,loc.fname}},current_line_text); - if (identifier == "syscall5") return Token::make_with_line({TT_Syscall5,{prev_line,prev_column,loc.fname}},current_line_text); - if( identifier == "compmacro" ) return Token::make_with_line({TT_CompilerMacro,{prev_line,prev_column,loc.fname}},current_line_text); - return Token::make_with_line({TT_Identifier,identifier,{prev_line,prev_column,loc.fname}},current_line_text); + characters.push_back(current_char); + is_path = true; + last_was_path = true; + } + else + { + this->rewind(); + std::string identifier(characters.begin(), characters.end()); + if (is_path) + return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, + current_line_text); + auto location = std::find(types.begin(), types.end(), identifier); + if (location != types.end()) + { + return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}}, + current_line_text); + } + if (identifier == "import") + return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall0") + return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall1") + return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall2") + return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall3") + return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall4") + return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall5") + return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "compmacro") + return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}}, + current_line_text); + return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}}, + current_line_text); + } + } + + std::string identifier(characters.begin(), characters.end()); + if (is_path) + return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, current_line_text); + auto location = std::find(types.begin(), types.end(), identifier); + if (location != types.end()) + { + return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}}, current_line_text); + } + if (identifier == "import") + return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall0") + return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall1") + return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall2") + return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall3") + return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall4") + return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "syscall5") + return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text); + if (identifier == "compmacro") + return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}}, current_line_text); + return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}}, current_line_text); } Token Lexer::create_number() { - std::vector characters; - int prev_line = loc.line; - int prev_column = loc.column; - int dot_count = 0; + std::vector characters; + int prev_line = loc.line; + int prev_column = loc.column; + int dot_count = 0; - characters.push_back(current_char); + characters.push_back(current_char); - while(this->advance()) - { - if(is_in_string(DIGITS,current_char)) - { - characters.push_back(current_char); - } - else if (current_char == '.') - { - if(dot_count == 0) - { - characters.push_back(current_char); - ++dot_count; - } - else { - Error::throw_warning(loc,current_line_text,"floats can only have one dot"); - this->rewind(); - float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Float,tk_value,{prev_line,prev_column,loc.fname}},current_line_text); - } - } - else - { - this->rewind(); - if(dot_count != 0) - { - float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Float,tk_value,{prev_line,prev_column,loc.fname}},current_line_text); - } - int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Number,tk_value,{prev_line,prev_column,loc.fname}},current_line_text); - } - } + while (this->advance()) + { + if (is_in_string(DIGITS, current_char)) + { + characters.push_back(current_char); + } + else if (current_char == '.') + { + if (dot_count == 0) + { + characters.push_back(current_char); + ++dot_count; + } + else + { + Error::throw_warning(loc, current_line_text, "floats can only have one dot"); + this->rewind(); + float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); + return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, + current_line_text); + } + } + else + { + this->rewind(); + if (dot_count != 0) + { + float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); + return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, + current_line_text); + } + int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str()); + return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text); + } + } - if(dot_count != 0) - { - float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Float,tk_value,{prev_line,prev_column,loc.fname}},current_line_text); - } - int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str()); - return Token::make_with_line({TT_Number,tk_value,{prev_line,prev_column,loc.fname}},current_line_text); + if (dot_count != 0) + { + float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str()); + return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text); + } + int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str()); + return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text); } Token Lexer::create_string() { - std::vector characters; - int prev_line = loc.line; - int prev_column = loc.column; + std::vector characters; + int prev_line = loc.line; + int prev_column = loc.column; - while(this->advance()) - { - if(current_char == '\n') - { - this->rewind(); - Error::throw_error(loc,current_line_text,"expected end of string but got newline"); - } - if(current_char == '\'') - { - std::string identifier(characters.begin(), characters.end()); - return Token::make_with_line({TT_String,identifier,{prev_line,prev_column,loc.fname}},current_line_text); - } - if(current_char == '\\') - { - if(index + 1 == current_lexed_text.size()) - { - Error::throw_error(loc,current_line_text,"unfinished escape sequence"); - } - switch(current_lexed_text[index+1]) - { - case 'n': - characters.push_back('\n'); - break; - case '\'': - characters.push_back('\''); - break; - case '\\': - characters.push_back('\\'); - break; - default: - Error::throw_error(loc,current_line_text,"unknown escape sequence"); - } - ++index; - ++loc.column; - continue; - } + while (this->advance()) + { + if (current_char == '\n') + { + this->rewind(); + Error::throw_error(loc, current_line_text, "expected end of string but got newline"); + } + if (current_char == '\'') + { + std::string identifier(characters.begin(), characters.end()); + return Token::make_with_line({TT_String, identifier, {prev_line, prev_column, loc.fname}}, + current_line_text); + } + if (current_char == '\\') + { + if (index + 1 == current_lexed_text.size()) + { + Error::throw_error(loc, current_line_text, "unfinished escape sequence"); + } + switch (current_lexed_text[index + 1]) + { + case 'n': + characters.push_back('\n'); + break; + case '\'': + characters.push_back('\''); + break; + case '\\': + characters.push_back('\\'); + break; + default: + Error::throw_error(loc, current_line_text, "unknown escape sequence"); + } + ++index; + ++loc.column; + continue; + } - characters.push_back(current_char); - } - this->rewind(); - Error::throw_error(loc,current_line_text,"expected end of string but got EOF"); + characters.push_back(current_char); + } + this->rewind(); + Error::throw_error(loc, current_line_text, "expected end of string but got EOF"); - return Token(TT_Null,loc); // unreachable since Error::throw_error calls exit() + return Token(TT_Null, loc); // unreachable since Error::throw_error calls exit() } diff --git a/src/Lexer.h b/src/Lexer.h index 6613fec..44bb0dc 100644 --- a/src/Lexer.h +++ b/src/Lexer.h @@ -1,53 +1,55 @@ #pragma once #include "Token.h" -#include -#include -#include #include +#include +#include +#include /* Let's redefine TokenStream, as if it wasn't already defined in Token.h*/ typedef std::vector TokenStream; /* The number of data types currently in Sapphire. */ #define TYPE_COUNT 14 -/* The Lexer for the Sapphire compiler. A Lexer reads source code from a file, and turns it into a stream of tokens the compiler can understand. */ +/* The Lexer for the Sapphire compiler. A Lexer reads source code from a file, and turns it into a stream of tokens the + * compiler can understand. */ class Lexer { -private: - Location loc; - Location prev_loc; + private: + Location loc; + Location prev_loc; - int advance(); - int rewind(); - char current_char; - int index; + int advance(); + int rewind(); + char current_char; + int index; - Lexer(const std::string& fname); + Lexer(const std::string& fname); - std::string current_line_text; - std::string previous_line_text; + std::string current_line_text; + std::string previous_line_text; - std::string current_lexed_text; + std::string current_lexed_text; - std::string recalculate_current_line(const std::string& text); + std::string recalculate_current_line(const std::string& text); - Token create_string(); - Token create_number(); - Token create_identifier(); + Token create_string(); + Token create_number(); + Token create_identifier(); - bool is_in_string(const std::string& string, const char& character); -public: - /* An array containing Sapphire's current data types. */ - static const std::array types; + bool is_in_string(const std::string& string, const char& character); - ~Lexer(); + public: + /* An array containing Sapphire's current data types. */ + static const std::array types; - /* Lex the given text, turning it into a stream of tokens. */ - TokenStream lex(const std::string& text); + ~Lexer(); - /* Create a new Lexer and return a pointer to it. */ - static std::shared_ptr make_lexer(const std::string& fname); + /* Lex the given text, turning it into a stream of tokens. */ + TokenStream lex(const std::string& text); - /* If the Lexer is lexing an impòrted file, give it the location in the parent file at which it was imported. */ - static void assign_parent_location(std::shared_ptr& lexer, const std::shared_ptr& loc); + /* Create a new Lexer and return a pointer to it. */ + static std::shared_ptr make_lexer(const std::string& fname); + + /* If the Lexer is lexing an impòrted file, give it the location in the parent file at which it was imported. */ + static void assign_parent_location(std::shared_ptr& lexer, const std::shared_ptr& loc); }; diff --git a/src/Location.cpp b/src/Location.cpp index a9caf6e..ca9bb36 100644 --- a/src/Location.cpp +++ b/src/Location.cpp @@ -2,8 +2,7 @@ #include "StringConversion.h" #include -Location::Location(int ln, int col, std::string file) - : line(ln), column(col), fname(file) +Location::Location(int ln, int col, std::string file) : line(ln), column(col), fname(file) { } @@ -13,43 +12,43 @@ Location::~Location() std::string Location::to_string() const { - std::ostringstream ss; - ss << fname; - ss << ":"; - ss << int_to_string(line); - ss << ":"; - ss << int_to_string(column); - return ss.str(); + std::ostringstream ss; + ss << fname; + ss << ":"; + ss << int_to_string(line); + ss << ":"; + ss << int_to_string(column); + return ss.str(); } std::string Location::to_parenthesized_string() const { - return "(" + this->to_string() + ")"; + return "(" + this->to_string() + ")"; } void Location::advance() { - ++column; + ++column; } void Location::pos_from_char(const char& character) { - if(character == '\n') - { - ++line; - column = 0; - } + if (character == '\n') + { + ++line; + column = 0; + } } -void Location::operator=(const Location &other) +void Location::operator=(const Location& other) { - this->parent = other.parent; - this->line = other.line; - this->column = other.column; - this->fname.assign(other.fname.c_str()); + this->parent = other.parent; + this->line = other.line; + this->column = other.column; + this->fname.assign(other.fname.c_str()); } -void Location::copy(const Location &other) +void Location::copy(const Location& other) { - this->operator=(other); + this->operator=(other); } diff --git a/src/Location.h b/src/Location.h index 62d3df9..9e722a6 100644 --- a/src/Location.h +++ b/src/Location.h @@ -1,35 +1,35 @@ #pragma once -#include #include +#include /* Struct to represent a location in a file. */ struct Location { - int line; - int column; - std::string fname; + int line; + int column; + std::string fname; - /* The location at which this location was imported, for error traces in imported files. */ - std::shared_ptr parent = nullptr; + /* The location at which this location was imported, for error traces in imported files. */ + std::shared_ptr parent = nullptr; - /* Creates a Location with the given parameters. */ - Location(int ln, int col, std::string file); + /* Creates a Location with the given parameters. */ + Location(int ln, int col, std::string file); - ~Location(); + ~Location(); - /* Returns a string of the format FILE:LINE:COL. */ - std::string to_string() const; - /* Returns a string of the format (FILE:LINE:COL). */ - std::string to_parenthesized_string() const; + /* Returns a string of the format FILE:LINE:COL. */ + std::string to_string() const; + /* Returns a string of the format (FILE:LINE:COL). */ + std::string to_parenthesized_string() const; - /* Advance to the next column in the file. */ - void advance(); + /* Advance to the next column in the file. */ + void advance(); - /* Advance to the next line if provided a newline. */ - void pos_from_char(const char& character); + /* Advance to the next line if provided a newline. */ + void pos_from_char(const char& character); - void operator=(const Location& other); + void operator=(const Location& other); - /* Copies the other location into this one. */ - void copy(const Location& other); + /* Copies the other location into this one. */ + void copy(const Location& other); }; diff --git a/src/Normalizer.cpp b/src/Normalizer.cpp index 559c7e6..2b69d27 100644 --- a/src/Normalizer.cpp +++ b/src/Normalizer.cpp @@ -2,80 +2,80 @@ TokenStream Normalizer::normalize(const TokenStream& input) { - TokenStream result; - int i = 0; - while (i < input.size()) - { - Token current = input[i]; - if(current.tk_type == TT_Null) - { - i++; - continue; - } - if(current.tk_type == TT_Equal) - { - if(i+1 != input.size()) - { - if(input[i+1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_Equals)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - if(current.tk_type == TT_Exclamation) - { - if(i+1 != input.size()) - { - if(input[i+1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_NEqual)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - if(current.tk_type == TT_GreaterThan) - { - if(i+1 != input.size()) - { - if(input[i+1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_GTE)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - if(current.tk_type == TT_LessThan) - { - if(i+1 != input.size()) - { - if(input[i+1].tk_type == TT_Equal) - { - i += 2; - result.push_back(current.copy_with_new_type(TT_LTE)); - continue; - } - } - i++; - result.push_back(current); - continue; - } - i++; - result.push_back(current); - continue; - } + TokenStream result; + int i = 0; + while (i < input.size()) + { + Token current = input[i]; + if (current.tk_type == TT_Null) + { + i++; + continue; + } + if (current.tk_type == TT_Equal) + { + if (i + 1 != input.size()) + { + if (input[i + 1].tk_type == TT_Equal) + { + i += 2; + result.push_back(current.copy_with_new_type(TT_Equals)); + continue; + } + } + i++; + result.push_back(current); + continue; + } + if (current.tk_type == TT_Exclamation) + { + if (i + 1 != input.size()) + { + if (input[i + 1].tk_type == TT_Equal) + { + i += 2; + result.push_back(current.copy_with_new_type(TT_NEqual)); + continue; + } + } + i++; + result.push_back(current); + continue; + } + if (current.tk_type == TT_GreaterThan) + { + if (i + 1 != input.size()) + { + if (input[i + 1].tk_type == TT_Equal) + { + i += 2; + result.push_back(current.copy_with_new_type(TT_GTE)); + continue; + } + } + i++; + result.push_back(current); + continue; + } + if (current.tk_type == TT_LessThan) + { + if (i + 1 != input.size()) + { + if (input[i + 1].tk_type == TT_Equal) + { + i += 2; + result.push_back(current.copy_with_new_type(TT_LTE)); + continue; + } + } + i++; + result.push_back(current); + continue; + } + i++; + result.push_back(current); + continue; + } - return result; + return result; } \ No newline at end of file diff --git a/src/Normalizer.h b/src/Normalizer.h index 3db55b0..1846ea6 100644 --- a/src/Normalizer.h +++ b/src/Normalizer.h @@ -1,14 +1,14 @@ #pragma once -#include "Token.h" #include "Lexer.h" // for TokenStream +#include "Token.h" /* Namespace to normalize a TokenStream. */ namespace Normalizer { - /* Some tokens are difficult for the Lexer to parse right, or maybe I'm just lazy. - Anyways, this function transforms > and = tokens next to each other into a single >=, which has a different meaning, etc... - For example: = + = : ==, < + = : <=... - - It also takes blank tokens and removes them. */ - TokenStream normalize(const TokenStream& input); -} +/* Some tokens are difficult for the Lexer to parse right, or maybe I'm just lazy. +Anyways, this function transforms > and = tokens next to each other into a single >=, which has a different meaning, +etc... For example: = + = : ==, < + = : <=... + +It also takes blank tokens and removes them. */ +TokenStream normalize(const TokenStream& input); +} // namespace Normalizer diff --git a/src/Parser.cpp b/src/Parser.cpp index cc24011..465f10e 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -1,7 +1,6 @@ #include "Parser.h" -Parser::Parser(const TokenStream& tokens) - : tokens(tokens) +Parser::Parser(const TokenStream& tokens) : tokens(tokens) { } @@ -11,37 +10,39 @@ Parser::~Parser() std::shared_ptr Parser::new_parser(const TokenStream& tokens) { - return std::shared_ptr(new Parser(tokens)); // As always, not using std::make_shared 'cause constructor is private + return std::shared_ptr( + new Parser(tokens)); // As always, not using std::make_shared 'cause constructor is private } std::shared_ptr Parser::parse() { - auto result = walk_expr(); + auto result = walk_expr(); - if(result.is_error()) - { - result.ethrow(); - } + if (result.is_error()) + { + result.ethrow(); + } - return result.get(); + return result.get(); } Parser::ErrorOr Parser::walk_expr() { - return ErrorOr(new ExprNode()); // constructor does not want to accept a shared_ptr in the argument list, thats why im not using make_shared here + return ErrorOr(new ExprNode()); // constructor does not want to accept a shared_ptr in the argument + // list, thats why im not using make_shared here } Parser::ErrorOr Parser::walk_number() { - return ErrorOr(new NumberNode()); + return ErrorOr(new NumberNode()); } void Parser::save_current_position() { - saved_m_index = m_index; + saved_m_index = m_index; } void Parser::restore_current_position() { - m_index = saved_m_index; + m_index = saved_m_index; } diff --git a/src/Parser.h b/src/Parser.h index b54c371..a8c4000 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -1,62 +1,73 @@ #pragma once -#include -#include "Lexer.h" #include "AST/NumberNode.h" #include "Error.h" +#include "Lexer.h" #include +#include /* Parser class for the Sapphire compiler. */ class Parser { - /* Struct to store a parsing result which can be either a parsing error or a success, in which case it contains a pointer to the result. */ - template - struct ErrorOr - { - /* Return the stored pointer. */ - std::shared_ptr get() - { - assert(!m_is_error); - return m_ptr; - } + /* Struct to store a parsing result which can be either a parsing error or a success, in which case it contains a + * pointer to the result. */ + template struct ErrorOr + { + /* Return the stored pointer. */ + std::shared_ptr get() + { + assert(!m_is_error); + return m_ptr; + } - /* Call Error::throw_error() with the stored error's location, line text, and the error string provided to this struct instance. */ - void ethrow() - { - assert(m_is_error); - Error::throw_error(error_tok->loc,error_tok->line(),m_error); - } + /* Call Error::throw_error() with the stored error's location, line text, and the error string provided to this + * struct instance. */ + void ethrow() + { + assert(m_is_error); + Error::throw_error(error_tok->loc, error_tok->line(), m_error); + } - /* Construct a new successful ErrorOr with a heap-allocated pointer to the result class. */ - ErrorOr(T* ptr) : m_ptr(ptr), m_is_error(false) {} - /* Construct a new failed ErrorOr with the error details and the token where parsing failed. */ - ErrorOr(const std::string& error, const Token& error_tok) : m_error(error), m_is_error(true), error_tok(error_tok) {} + /* Construct a new successful ErrorOr with a heap-allocated pointer to the result class. */ + ErrorOr(T* ptr) : m_ptr(ptr), m_is_error(false) + { + } + /* Construct a new failed ErrorOr with the error details and the token where parsing failed. */ + ErrorOr(const std::string& error, const Token& error_tok) + : m_error(error), m_is_error(true), error_tok(error_tok) + { + } - /* Is this ErrorOr instance successful or failed? */ - bool is_error() { return m_is_error; } + /* Is this ErrorOr instance successful or failed? */ + bool is_error() + { + return m_is_error; + } - private: - bool m_is_error; - std::string m_error; - std::shared_ptr error_tok; - std::shared_ptr m_ptr; - }; -private: - Parser(const TokenStream& tokens); - TokenStream tokens; + private: + bool m_is_error; + std::string m_error; + std::shared_ptr error_tok; + std::shared_ptr m_ptr; + }; - ErrorOr walk_expr(); - ErrorOr walk_number(); + private: + Parser(const TokenStream& tokens); + TokenStream tokens; - int m_index; - int saved_m_index; + ErrorOr walk_expr(); + ErrorOr walk_number(); - void save_current_position(); - void restore_current_position(); -public: - ~Parser(); + int m_index; + int saved_m_index; - /* Construct a new Parser with the given TokenStream. */ - static std::shared_ptr new_parser(const TokenStream& tokens); - /* Parse the stored TokenStream and return the top-level node of the result Abstract Syntax Tree. */ - std::shared_ptr parse(); + void save_current_position(); + void restore_current_position(); + + public: + ~Parser(); + + /* Construct a new Parser with the given TokenStream. */ + static std::shared_ptr new_parser(const TokenStream& tokens); + /* Parse the stored TokenStream and return the top-level node of the result Abstract Syntax Tree. */ + std::shared_ptr parse(); }; diff --git a/src/StringConversion.cpp b/src/StringConversion.cpp index cc48712..48c3c9f 100644 --- a/src/StringConversion.cpp +++ b/src/StringConversion.cpp @@ -4,13 +4,13 @@ std::string int_to_string(const int& value) { char buffer[12]; - std::sprintf(buffer,"%d",value); + std::sprintf(buffer, "%d", value); return {buffer}; } std::string float_to_string(const float& value) { char buffer[50]; - std::sprintf(buffer,"%f",value); + std::sprintf(buffer, "%f", value); return {buffer}; } \ No newline at end of file diff --git a/src/Token.cpp b/src/Token.cpp index 92cf543..8a0b786 100644 --- a/src/Token.cpp +++ b/src/Token.cpp @@ -1,63 +1,26 @@ #include "Token.h" -#include "StringConversion.h" #include "FormatString/FormatString.hpp" +#include "StringConversion.h" #include "replace.h" const std::string token_strings[] = { - "TT_IDENTIFIER", - "TT_NUMBER", - "TT_FLOAT", - "TT_KEYWORD", - "TT_STRING", - "TT_PLUS", - "TT_MINUS", - "TT_MUL", - "TT_DIV", - "TT_AT", - "TT_EQUAL", - "TT_LESSTHAN", - "TT_GREATERTHAN", - "TT_LPAREN", - "TT_RPAREN", - "TT_LBRACKET", - "TT_RBRACKET", - "TT_SEMICOLON", - "TT_LOADEDSTRING", - "TT_EOF", - "TT_NULL", - "TT_EQUALS", - "TT_GTE", - "TT_LTE", - "TT_PERIOD", - "TT_COMMA", - "TT_PATH", - "TT_EXCLAMATION", - "TT_NEQUAL", - "TT_LSQB", - "TT_RSQB", - "TT_TYPE", - "TT_IMPORT", - "TT_SYSCALL0", - "TT_SYSCALL1", - "TT_SYSCALL2", - "TT_SYSCALL3", - "TT_SYSCALL4", - "TT_SYSCALL5", - "TT_COMPILERMACRO" -}; + "TT_IDENTIFIER", "TT_NUMBER", "TT_FLOAT", "TT_KEYWORD", "TT_STRING", "TT_PLUS", + "TT_MINUS", "TT_MUL", "TT_DIV", "TT_AT", "TT_EQUAL", "TT_LESSTHAN", + "TT_GREATERTHAN", "TT_LPAREN", "TT_RPAREN", "TT_LBRACKET", "TT_RBRACKET", "TT_SEMICOLON", + "TT_LOADEDSTRING", "TT_EOF", "TT_NULL", "TT_EQUALS", "TT_GTE", "TT_LTE", + "TT_PERIOD", "TT_COMMA", "TT_PATH", "TT_EXCLAMATION", "TT_NEQUAL", "TT_LSQB", + "TT_RSQB", "TT_TYPE", "TT_IMPORT", "TT_SYSCALL0", "TT_SYSCALL1", "TT_SYSCALL2", + "TT_SYSCALL3", "TT_SYSCALL4", "TT_SYSCALL5", "TT_COMPILERMACRO"}; -Token::Token(const TokenType& type) - : tk_type(type), loc(0,0,"") +Token::Token(const TokenType& type) : tk_type(type), loc(0, 0, "") { } -Token::Token(const TokenType& type, const Location& location) - : tk_type(type), loc(location) +Token::Token(const TokenType& type, const Location& location) : tk_type(type), loc(location) { } -Token::Token(const TokenType& type, const std::string& val) - : tk_type(type), loc(0,0,""), string_value(val) +Token::Token(const TokenType& type, const std::string& val) : tk_type(type), loc(0, 0, ""), string_value(val) { } @@ -82,7 +45,7 @@ Token::~Token() Token Token::copy_with_new_type(const TokenType& type) { - Token result(type,loc); + Token result(type, loc); result.int_value = int_value; result.float_value = float_value; @@ -96,99 +59,99 @@ Token Token::copy_with_new_type(const TokenType& type) std::string Token::to_string() const { std::string details = loc.to_parenthesized_string(); - if(tk_type == TT_Number) + if (tk_type == TT_Number) { - return format_string("INT:%d %s",int_value,details); - } + return format_string("INT:%d %s", int_value, details); + } else if (tk_type == TT_Float) { - return format_string("FLOAT:%f %s",float_value,details); + return format_string("FLOAT:%f %s", float_value, details); } else if (tk_type == TT_Identifier) { - return format_string("ID:%s %s",string_value,details); - } + return format_string("ID:%s %s", string_value, details); + } else if (tk_type == TT_Keyword) { - return format_string("KEYWORD:%s %s",string_value,details); + return format_string("KEYWORD:%s %s", string_value, details); } else if (tk_type == TT_Type) { - return format_string("TYPE:%s %s",string_value,details); + return format_string("TYPE:%s %s", string_value, details); } else if (tk_type == TT_String) { - replace(const_cast(string_value),"\n","\\n"); - return format_string("STRING:'%s' %s",string_value,details); + replace(const_cast(string_value), "\n", "\\n"); + return format_string("STRING:'%s' %s", string_value, details); } - switch(tk_type) + switch (tk_type) { - case TT_EOF: - return "EOF " + details; - case TT_Plus: - return "PLUS " + details; - case TT_Minus: - return "MINUS " + details; - case TT_Mul: - return "MUL " + details; - case TT_Div: - return "DIV " + details; - case TT_At: - return "AT " + details; - case TT_Equal: - return "EQUAL " + details; - case TT_LessThan: - return "LESSTHAN " + details; - case TT_GreaterThan: - return "GREATERTHAN " + details; - case TT_LParen: - return "LPAREN " + details; - case TT_RParen: - return "RPAREN " + details; - case TT_LBracket: - return "LBRACKET " + details; - case TT_RBracket: - return "RBRACKET " + details; - case TT_Semicolon: - return "SEMICOLON " + details; - case TT_LoadedString: - return "LDSTRING " + details; - case TT_Equals: - return "EQUALS " + details; - case TT_GTE: - return "GTE " + details; - case TT_LTE: - return "LTE " + details; - case TT_Period: - return "PERIOD " + details; - case TT_Comma: - return "COMMA " + details; - case TT_Path: - return "PATH " + details; - case TT_Exclamation: - return "EXCLAMATION " + details; - case TT_NEqual: - return "NEQUAL " + details; - case TT_LSQB: - return "LEFTSQB " + details; - case TT_RSQB: - return "RIGHTSQB " + details; - case TT_Import: - return "IMPORT " + details; - case TT_Syscall0: - return "SYSCALL0 " + details; - case TT_Syscall1: - return "SYSCALL1 " + details; - case TT_Syscall2: - return "SYSCALL2 " + details; - case TT_Syscall3: - return "SYSCALL3 " + details; - case TT_Syscall4: - return "SYSCALL4 " + details; - case TT_Syscall5: - return "SYSCALL5 " + details; - case TT_CompilerMacro: - return "COMPMACRO " + details; + case TT_EOF: + return "EOF " + details; + case TT_Plus: + return "PLUS " + details; + case TT_Minus: + return "MINUS " + details; + case TT_Mul: + return "MUL " + details; + case TT_Div: + return "DIV " + details; + case TT_At: + return "AT " + details; + case TT_Equal: + return "EQUAL " + details; + case TT_LessThan: + return "LESSTHAN " + details; + case TT_GreaterThan: + return "GREATERTHAN " + details; + case TT_LParen: + return "LPAREN " + details; + case TT_RParen: + return "RPAREN " + details; + case TT_LBracket: + return "LBRACKET " + details; + case TT_RBracket: + return "RBRACKET " + details; + case TT_Semicolon: + return "SEMICOLON " + details; + case TT_LoadedString: + return "LDSTRING " + details; + case TT_Equals: + return "EQUALS " + details; + case TT_GTE: + return "GTE " + details; + case TT_LTE: + return "LTE " + details; + case TT_Period: + return "PERIOD " + details; + case TT_Comma: + return "COMMA " + details; + case TT_Path: + return "PATH " + details; + case TT_Exclamation: + return "EXCLAMATION " + details; + case TT_NEqual: + return "NEQUAL " + details; + case TT_LSQB: + return "LEFTSQB " + details; + case TT_RSQB: + return "RIGHTSQB " + details; + case TT_Import: + return "IMPORT " + details; + case TT_Syscall0: + return "SYSCALL0 " + details; + case TT_Syscall1: + return "SYSCALL1 " + details; + case TT_Syscall2: + return "SYSCALL2 " + details; + case TT_Syscall3: + return "SYSCALL3 " + details; + case TT_Syscall4: + return "SYSCALL4 " + details; + case TT_Syscall5: + return "SYSCALL5 " + details; + case TT_CompilerMacro: + return "COMPMACRO " + details; } return ""; } @@ -200,7 +163,7 @@ std::string Token::line() const Token Token::make_with_line(const Token& origin, const std::string& line_text) { - Token result(origin.tk_type,origin.loc); + Token result(origin.tk_type, origin.loc); result.int_value = origin.int_value; result.float_value = origin.float_value; @@ -227,11 +190,11 @@ void Token::erase(Token& tk) bool Token::match_token_types(const std::vector& a, const std::vector& b, int count) { - int size = [](int a, int b){ return a > b ? b : a; }(a.size() - count,b.size()); + int size = [](int a, int b) { return a > b ? b : a; }(a.size() - count, b.size()); - for(int i = 0; i < size; ++i) + for (int i = 0; i < size; ++i) { - if(a[i+count].tk_type != b[i].tk_type) + if (a[i + count].tk_type != b[i].tk_type) { return false; } diff --git a/src/Token.h b/src/Token.h index 5a7abdf..89e401e 100644 --- a/src/Token.h +++ b/src/Token.h @@ -87,18 +87,18 @@ struct Token void operator=(const Token& other); - /* Convert the Token into a blank token (does not delete it), so that the Normalizer can remove it afterwards. + /* Convert the Token into a blank token (does not delete it), so that the Normalizer can remove it afterwards. This is to not alter vectors while iterating over them. */ static void erase(Token& tk); /* Return a copy of this Token, but with its TokenType changed. */ Token copy_with_new_type(const TokenType& type); - /* Iterate over two vectors of Tokens, starting from count for vector A, starting from 0 for vector B, checking if the current Tokens' types match. - If at any point they don't, return false. Else, return true. */ + /* Iterate over two vectors of Tokens, starting from count for vector A, starting from 0 for vector B, checking if + the current Tokens' types match. If at any point they don't, return false. Else, return true. */ static bool match_token_types(const std::vector& a, const std::vector& b, int count); -private: + private: std::string line_text; }; diff --git a/src/replace.cpp b/src/replace.cpp index b3f7423..3a59b0c 100644 --- a/src/replace.cpp +++ b/src/replace.cpp @@ -1,9 +1,9 @@ #include "replace.h" -bool replace(std::string& str, const std::string& from, const std::string& to) { - size_t start_pos = str.find(from); - if(start_pos == std::string::npos) - return false; - str.replace(start_pos, from.length(), to); - return true; +bool replace(std::string& str, const std::string& from, const std::string& to) +{ + size_t start_pos = str.find(from); + if (start_pos == std::string::npos) return false; + str.replace(start_pos, from.length(), to); + return true; } \ No newline at end of file diff --git a/src/sapphire.cpp b/src/sapphire.cpp index a9452e5..3a2168a 100644 --- a/src/sapphire.cpp +++ b/src/sapphire.cpp @@ -1,24 +1,26 @@ -#include "Lexer.h" +#include "Arguments.h" #include "FileIO.h" #include "Importer.h" -#include "Arguments.h" +#include "Lexer.h" #include "Normalizer.h" #include int main(int argc, char** argv) { - Arguments::parse(argc,argv); + Arguments::parse(argc, argv); std::string fname = Arguments::input_fname; std::string contents = FileIO::read_all(fname); - TokenStream res = Lexer::make_lexer(fname)->lex(contents); - Importer::init(fname.substr(0,fname.find_last_of('.'))); - res = Importer::evaluate(res); - res = Normalizer::normalize(res); + TokenStream result = Lexer::make_lexer(fname)->lex(contents); - for(int i = 0; i < res.size(); i++) + Importer::init(FileIO::remove_file_extension(fname)); + + result = Importer::evaluate(result); + result = Normalizer::normalize(result); + + for (int i = 0; i < result.size(); i++) { - std::cout << res[i].to_string() << std::endl; + std::cout << result[i].to_string() << std::endl; } std::cout << "Output filename: " << Arguments::output_fname << std::endl;