REFACTOR!!

This commit is contained in:
apio 2022-08-26 12:00:14 +02:00
parent 7094698df7
commit 99f56d3a61
21 changed files with 186 additions and 707 deletions

View File

@ -27,12 +27,8 @@ add_executable(
src/external/FormatString/FormatString.hpp
src/FileIO.h
src/FileIO.cpp
src/Importer.cpp
src/Importer.h
src/Arguments.cpp
src/Arguments.h
src/Normalizer.cpp
src/Normalizer.h
src/AST/ASTNode.cpp
src/AST/ASTNode.h
src/AST/BinaryOpNode.cpp

View File

@ -1,5 +1,4 @@
#include "Error.h"
#include "Importer.h"
#include "utils.h"
#include <algorithm>
#include <iostream>
@ -42,7 +41,7 @@ void Error::show_import_lines(const Location& loc, void (*import_line_printer)(c
{
show_import_lines(loc, show_import_line, std::cerr);
std::string linestr = to_string(loc.line);
std::string line = to_string(loc.line);
std::cerr << "\033[1;1m";
std::cerr << loc.str();
@ -56,12 +55,12 @@ void Error::show_import_lines(const Location& loc, void (*import_line_printer)(c
std::cerr << details;
std::cerr << std::endl;
std::cerr << linestr;
std::cerr << line;
std::cerr << std::string(4, ' ');
std::cerr << line_text;
std::cerr << std::endl;
std::cerr << std::string(4 + linestr.size() + loc.column - 1, ' ');
std::cerr << std::string(4 + line.size() + loc.column - 1, ' ');
std::cerr << "\033[31;49m";
std::cerr << "^";
@ -101,7 +100,7 @@ void Error::throw_warning(const Location& loc, const std::string line_text, cons
{
show_import_lines(loc, show_import_line, std::cout);
std::string linestr = to_string(loc.line);
std::string line = to_string(loc.line);
std::cout << "\033[1;1m";
std::cout << loc.str();
@ -115,12 +114,12 @@ void Error::throw_warning(const Location& loc, const std::string line_text, cons
std::cout << details;
std::cout << std::endl;
std::cout << linestr;
std::cout << line;
std::cout << std::string(4, ' ');
std::cout << line_text;
std::cout << std::endl;
std::cout << std::string(4 + linestr.size() + loc.column - 1, ' ');
std::cout << std::string(4 + line.size() + loc.column - 1, ' ');
std::cout << "\033[33;49m";
std::cout << "^";

View File

@ -31,7 +31,7 @@ void IRBuilder::create_program(std::shared_ptr<ProgramNode> program)
program->walk([&](std::shared_ptr<TopLevelNode> node) { node->codegen(this, module.get()); });
}
void IRBuilder::resolveToLLVMIR(std::string path)
void IRBuilder::resolveToLLVMIR(std::string_view path)
{
std::error_code EC;
llvm::raw_fd_ostream dest(path, EC, llvm::sys::fs::OF_None);
@ -45,7 +45,7 @@ void IRBuilder::resolveToLLVMIR(std::string path)
dest.flush();
}
void IRBuilder::resolveToObjectFile(std::string path)
void IRBuilder::resolveToObjectFile(std::string_view path)
{
// edit this section when adding support for more architectures
LLVMInitializeX86TargetInfo();

View File

@ -19,7 +19,7 @@ class IRBuilder
llvm::IRBuilder<>* getBuilder();
void resolveToLLVMIR(std::string path);
void resolveToLLVMIR(std::string_view path);
void resolveToObjectFile(std::string path);
void resolveToObjectFile(std::string_view path);
};

View File

@ -1,168 +0,0 @@
#include "Importer.h"
#include "Arguments.h"
#include "Error.h"
#include "FileIO.h"
#include "sapphirepch.h"
#include "utils.h"
#include <fstream>
#define MAX_IMPORTS 100
int Importer::import_count = 0;
std::vector<std::shared_ptr<Location>> Importer::import_stack;
std::vector<std::string> Importer::imported_files;
TokenStream Importer::evaluate(const TokenStream& original)
{
int i = 0;
auto ret_tk = original;
TokenStream new_tokens;
while (original[i].tk_type != TT_EOF)
{
Token current_token = original[i];
if (current_token.tk_type == TT_Import)
{
Token next_token = original[i + 1];
if (next_token.tk_type == TT_EOF)
Error::throw_error(current_token.loc, current_token.line(),
"did not expect EOF after import statement");
if (next_token.tk_type == TT_Identifier) // TODO: add support for strings
{
Token last_token = original[i + 2];
if (last_token.tk_type != TT_Semicolon)
Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon");
if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) !=
imported_files.end())
{
if (Arguments::wimport)
Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping");
Token::erase(ret_tk[i]);
Token::erase(ret_tk[i + 1]);
Token::erase(ret_tk[i + 2]);
++i;
continue;
}
if (import_count > MAX_IMPORTS)
Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded");
std::string input_file_name = next_token.string_value + ".sp";
std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards
if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found");
input_file.close();
auto file_contents = FileIO::read_all(input_file_name);
auto top_location = std::make_shared<Location>(current_token.loc.line, current_token.loc.column,
current_token.loc.fname);
top_location.get()->parent = current_token.loc.parent;
import_stack.push_back(top_location); // Keep ref_count above 0, just in case
auto import_lexer = Lexer::make_lexer(input_file_name);
Lexer::assign_parent_location(import_lexer, top_location);
TokenStream imported_tokens = import_lexer->lex(file_contents);
imported_tokens.pop_back(); // remove EOF at end of token stream
for (auto& tk : imported_tokens)
{
tk.loc.parent = top_location;
}
imported_files.push_back(next_token.string_value);
new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end());
Token::erase(ret_tk[i]);
Token::erase(ret_tk[i + 1]);
Token::erase(ret_tk[i + 2]);
}
else if (next_token.tk_type == TT_Path)
{
Token last_token = original[i + 2];
if (last_token.tk_type != TT_Semicolon)
Error::throw_error(last_token.loc, last_token.line(), "expected a semicolon");
if (std::find(imported_files.begin(), imported_files.end(), next_token.string_value) !=
imported_files.end())
{
if (Arguments::wimport)
Error::throw_warning(next_token.loc, next_token.line(), "file already imported, skipping");
Token::erase(ret_tk[i]);
Token::erase(ret_tk[i + 1]);
Token::erase(ret_tk[i + 2]);
++i;
continue;
}
if (import_count > MAX_IMPORTS)
Error::throw_error(current_token.loc, current_token.line(), "maximum import depth exceeded");
std::string input_file_name = next_token.string_value + ".sp";
std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards
if (!input_file.good()) Error::throw_error(next_token.loc, next_token.line(), "file not found");
input_file.close();
auto file_contents = FileIO::read_all(input_file_name);
auto top_location = std::make_shared<Location>(current_token.loc.line, current_token.loc.column,
current_token.loc.fname);
top_location.get()->parent = current_token.loc.parent;
import_stack.push_back(top_location); // Keep ref_count above 0, just in case
auto import_lexer = Lexer::make_lexer(input_file_name);
Lexer::assign_parent_location(import_lexer, top_location);
TokenStream imported_tokens = import_lexer->lex(file_contents);
imported_tokens.pop_back(); // remove EOF at end of token stream
for (auto& tk : imported_tokens)
{
tk.loc.parent = top_location;
}
imported_files.push_back(next_token.string_value);
new_tokens.insert(new_tokens.end(), imported_tokens.begin(), imported_tokens.end());
Token::erase(ret_tk[i]);
Token::erase(ret_tk[i + 1]);
Token::erase(ret_tk[i + 2]);
}
else
Error::throw_error(next_token.loc, next_token.line(),
"import keyword should be followed by an identifier");
}
++i;
}
if (new_tokens.size() != 0)
{
new_tokens.insert(new_tokens.end(), ret_tk.begin(), ret_tk.end());
import_count++;
return evaluate(new_tokens);
}
return ret_tk;
}
void Importer::init(std::string init_file)
{
imported_files.push_back(init_file);
}

View File

@ -1,12 +0,0 @@
#pragma once
#include "Lexer.h"
#include "Token.h"
namespace Importer
{
extern int import_count;
extern std::vector<std::shared_ptr<Location>> import_stack;
extern std::vector<std::string> imported_files;
TokenStream evaluate(const TokenStream& original);
void init(std::string init_file);
} // namespace Importer

View File

@ -7,10 +7,7 @@
#define IDENTIFIERS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWYZ_0123456789"
#define DIGITS "0123456789"
const std::array<std::string, TYPE_COUNT> Lexer::types = {"void", "bool", "str", "i8", "i16", "i32", "i64",
"u8", "u16", "u32", "u64", "f32", "f64", "f128"};
Lexer::Lexer(const std::string& fname) : loc(1, 0, fname), index(-1), prev_loc(1, 0, fname)
Lexer::Lexer(const std::string& fname) : location(1, 0, fname), index(-1), previous_location(1, 0, fname)
{
}
@ -20,12 +17,12 @@ Lexer::~Lexer()
int Lexer::advance()
{
prev_loc = loc;
previous_location = location;
++index;
loc.advance();
location.advance();
if (index >= current_lexed_text.size()) return 0;
current_char = current_lexed_text[index];
loc.pos_from_char(current_char);
location.pos_from_char(current_char);
if (current_char == '\n')
{
previous_line_text = current_line_text;
@ -36,7 +33,7 @@ int Lexer::advance()
int Lexer::rewind()
{
loc = prev_loc;
location = previous_location;
--index;
if (index == -1) return 0;
if (current_char == '\n')
@ -67,7 +64,7 @@ std::unique_ptr<Lexer> Lexer::make_lexer(const std::string& fname)
void Lexer::assign_parent_location(std::unique_ptr<Lexer>& lexer, const std::shared_ptr<Location>& loc)
{
lexer->loc.parent = loc;
lexer->location.parent = loc;
}
bool Lexer::is_in_string(const std::string& string, const char& character)
@ -117,81 +114,82 @@ TokenStream Lexer::lex(const std::string& text)
break;
}
}
result.push_back(Token::make_with_line({TT_Div, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Div, location}, current_line_text));
break;
case '+':
result.push_back(Token::make_with_line({TT_Plus, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Plus, location}, current_line_text));
break;
case '-':
result.push_back(Token::make_with_line({TT_Minus, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Minus, location}, current_line_text));
break;
case '*':
result.push_back(Token::make_with_line({TT_Mul, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Mul, location}, current_line_text));
break;
case '@':
result.push_back(Token::make_with_line({TT_At, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_At, location}, current_line_text));
break;
case '=':
result.push_back(Token::make_with_line({TT_Equal, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Equal, location}, current_line_text));
break;
case '>':
result.push_back(Token::make_with_line({TT_GreaterThan, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_GreaterThan, location}, current_line_text));
break;
case '<':
result.push_back(Token::make_with_line({TT_LessThan, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_LessThan, location}, current_line_text));
break;
case '(':
result.push_back(Token::make_with_line({TT_LParen, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_LParen, location}, current_line_text));
break;
case ')':
result.push_back(Token::make_with_line({TT_RParen, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_RParen, location}, current_line_text));
break;
case '{':
result.push_back(Token::make_with_line({TT_LBracket, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_LBracket, location}, current_line_text));
break;
case '}':
result.push_back(Token::make_with_line({TT_RBracket, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_RBracket, location}, current_line_text));
break;
case ';':
result.push_back(Token::make_with_line({TT_Semicolon, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Semicolon, location}, current_line_text));
break;
case '.':
result.push_back(Token::make_with_line({TT_Period, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Period, location}, current_line_text));
break;
case ',':
result.push_back(Token::make_with_line({TT_Comma, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Comma, location}, current_line_text));
break;
case '!':
result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text));
break;
case '[':
result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text));
break;
case ']':
result.push_back(Token::make_with_line({TT_Exclamation, loc}, current_line_text));
result.push_back(Token::make_with_line({TT_Exclamation, location}, current_line_text));
break;
case ':':
result.push_back(Token::make_with_line({TT_Colon, location}, current_line_text));
break;
case '\377':
result.push_back(Token(TT_EOF, loc));
return result;
result.push_back(Token(TT_EOF, location));
return std::move(result);
default:
Error::throw_error(loc, current_line_text, "unknown character");
Error::throw_error(location, current_line_text, "unknown character");
}
}
result.push_back(Token(TT_EOF, loc));
result.push_back(Token(TT_EOF, location));
return result;
return std::move(result);
}
Token Lexer::create_identifier()
{
std::vector<char> characters;
int prev_line = loc.line;
int prev_column = loc.column;
bool is_path = false;
bool last_was_path = false;
Location saved_loc = this->loc;
Location saved_prev_loc = this->prev_loc;
int prev_line = location.line;
int prev_column = location.column;
Location saved_loc = this->location;
Location saved_prev_loc = this->previous_location;
characters.push_back(current_char);
@ -200,103 +198,48 @@ Token Lexer::create_identifier()
if (is_in_string(IDENTIFIERS, current_char))
{
characters.push_back(current_char);
last_was_path = false;
}
else if (current_char == '/')
{
if (last_was_path)
{
characters.pop_back();
this->loc = saved_loc;
this->prev_loc = saved_prev_loc;
this->rewind();
std::string identifier(characters.begin(), characters.end());
return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}},
current_line_text);
}
saved_loc = this->loc;
saved_prev_loc = this->prev_loc;
characters.push_back(current_char);
is_path = true;
last_was_path = true;
}
else
{
this->rewind();
std::string identifier(characters.begin(), characters.end());
if (is_path)
return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}},
current_line_text);
auto location = std::find(types.begin(), types.end(), identifier);
if (location != types.end())
{
return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}},
current_line_text);
}
if (identifier == "import")
return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "syscall0")
return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "syscall1")
return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "syscall2")
return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "syscall3")
return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "syscall4")
return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "syscall5")
return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "compmacro")
return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}},
current_line_text);
if (identifier == "let")
return Token::make_with_line({TT_Let, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "in")
return Token::make_with_line({TT_In, {prev_line, prev_column, loc.fname}}, current_line_text);
return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}},
current_line_text);
goto end;
}
}
end:
std::string identifier(characters.begin(), characters.end());
if (is_path)
return Token::make_with_line({TT_Path, identifier, {prev_line, prev_column, loc.fname}}, current_line_text);
auto location = std::find(types.begin(), types.end(), identifier);
if (location != types.end())
{
return Token::make_with_line({TT_Type, identifier, {prev_line, prev_column, loc.fname}}, current_line_text);
}
if (identifier == "import")
return Token::make_with_line({TT_Import, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "syscall0")
return Token::make_with_line({TT_Syscall0, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_Syscall0, {prev_line, prev_column, location.filename}}, current_line_text));
if (identifier == "syscall1")
return Token::make_with_line({TT_Syscall1, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_Syscall1, {prev_line, prev_column, location.filename}}, current_line_text));
if (identifier == "syscall2")
return Token::make_with_line({TT_Syscall2, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_Syscall2, {prev_line, prev_column, location.filename}}, current_line_text));
if (identifier == "syscall3")
return Token::make_with_line({TT_Syscall3, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_Syscall3, {prev_line, prev_column, location.filename}}, current_line_text));
if (identifier == "syscall4")
return Token::make_with_line({TT_Syscall4, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_Syscall4, {prev_line, prev_column, location.filename}}, current_line_text));
if (identifier == "syscall5")
return Token::make_with_line({TT_Syscall5, {prev_line, prev_column, loc.fname}}, current_line_text);
if (identifier == "compmacro")
return Token::make_with_line({TT_CompilerMacro, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_Syscall5, {prev_line, prev_column, location.filename}}, current_line_text));
if (identifier == "let")
return Token::make_with_line({TT_Let, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_Let, {prev_line, prev_column, location.filename}}, current_line_text));
if (identifier == "in")
return Token::make_with_line({TT_In, {prev_line, prev_column, loc.fname}}, current_line_text);
return Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, loc.fname}}, current_line_text);
return std::move(
Token::make_with_line({TT_In, {prev_line, prev_column, location.filename}}, current_line_text));
return std::move(Token::make_with_line({TT_Identifier, identifier, {prev_line, prev_column, location.filename}},
current_line_text));
}
Token Lexer::create_number()
{
std::vector<char> characters;
int prev_line = loc.line;
int prev_column = loc.column;
int prev_line = location.line;
int prev_column = location.column;
int dot_count = 0;
characters.push_back(current_char);
@ -316,60 +259,53 @@ Token Lexer::create_number()
}
else
{
Error::throw_warning(loc, current_line_text, "floats can only have one dot");
Error::throw_warning(location, current_line_text, "floats can only have one dot");
this->rewind();
float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str());
return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}},
current_line_text);
goto end;
}
}
else
{
this->rewind();
if (dot_count != 0)
{
float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str());
return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}},
current_line_text);
}
int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str());
return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text);
goto end;
}
}
end:
if (dot_count != 0)
{
float tk_value = std::stof(std::string(characters.begin(), characters.end()).c_str());
return Token::make_with_line({TT_Float, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text);
float value = std::stof(std::string(characters.begin(), characters.end()).c_str());
return std::move(
Token::make_with_line({TT_Float, value, {prev_line, prev_column, location.filename}}, current_line_text));
}
int tk_value = atoi(std::string(characters.begin(), characters.end()).c_str());
return Token::make_with_line({TT_Number, tk_value, {prev_line, prev_column, loc.fname}}, current_line_text);
int value = std::atoi(std::string(characters.begin(), characters.end()).c_str());
return std::move(
Token::make_with_line({TT_Number, value, {prev_line, prev_column, location.filename}}, current_line_text));
}
Token Lexer::create_string()
{
std::vector<char> characters;
int prev_line = loc.line;
int prev_column = loc.column;
int prev_line = location.line;
int prev_column = location.column;
while (this->advance())
{
if (current_char == '\n')
{
this->rewind();
Error::throw_error(loc, current_line_text, "expected end of string but got newline");
Error::throw_error(location, current_line_text, "expected end of string but got newline");
}
if (current_char == '\'')
{
std::string identifier(characters.begin(), characters.end());
return Token::make_with_line({TT_String, identifier, {prev_line, prev_column, loc.fname}},
current_line_text);
return std::move(Token::make_with_line({TT_String, identifier, {prev_line, prev_column, location.filename}},
current_line_text));
}
if (current_char == '\\')
{
if (index + 1 == current_lexed_text.size())
{
Error::throw_error(loc, current_line_text, "unfinished escape sequence");
Error::throw_error(location, current_line_text, "unfinished escape sequence");
}
switch (current_lexed_text[index + 1])
{
@ -383,17 +319,17 @@ Token Lexer::create_string()
characters.push_back('\\');
break;
default:
Error::throw_error(loc, current_line_text, "unknown escape sequence");
Error::throw_error(location, current_line_text, "unknown escape sequence");
}
++index;
++loc.column;
++location.column;
continue;
}
characters.push_back(current_char);
}
this->rewind();
Error::throw_error(loc, current_line_text, "expected end of string but got EOF");
Error::throw_error(location, current_line_text, "expected end of string but got EOF");
return Token(TT_Null, loc); // unreachable since Error::throw_error calls exit()
return *(Token*)nullptr; // unreachable
}

View File

@ -13,15 +13,15 @@ typedef std::vector<Token> TokenStream;
class Lexer
{
private:
Location loc;
Location prev_loc;
Location location;
Location previous_location;
int advance();
int rewind();
char current_char;
int index;
Lexer(const std::string& fname);
Lexer(const std::string& filename);
std::string current_line_text;
std::string previous_line_text;
@ -37,16 +37,13 @@ class Lexer
bool is_in_string(const std::string& string, const char& character);
public:
/* An array containing Sapphire's current data types. */
static const std::array<std::string, TYPE_COUNT> types;
~Lexer();
/* Lex the given text, turning it into a stream of tokens. */
TokenStream lex(const std::string& text);
/* Create a new Lexer and return a pointer to it. */
static std::unique_ptr<Lexer> make_lexer(const std::string& fname);
static std::unique_ptr<Lexer> make_lexer(const std::string& filename);
/* If the Lexer is lexing an impòrted file, give it the location in the parent file at which it was imported. */
static void assign_parent_location(std::unique_ptr<Lexer>& lexer, const std::shared_ptr<Location>& loc);

View File

@ -2,7 +2,8 @@
#include "FormatString/FormatString.hpp"
#include <sstream>
Location::Location(int ln, int col, std::string file) : line(ln), column(col), fname(file)
Location::Location(int line, int column, std::string filename)
: line(line), column(column), filename(std::move(filename))
{
}
@ -12,12 +13,7 @@ Location::~Location()
std::string Location::str() const
{
return format_string("%s:%d:%d", fname, line, column);
}
std::string Location::paren_str() const
{
return format_string("(%s:%d:%d)", fname, line, column);
return format_string("%s:%d:%d", filename, line, column);
}
void Location::advance()
@ -36,13 +32,16 @@ void Location::pos_from_char(const char& character)
void Location::operator=(const Location& other)
{
this->parent = other.parent;
this->line = other.line;
this->column = other.column;
this->fname = other.fname;
Location copied = copy(other);
line = copied.line;
column = copied.column;
parent = copied.parent;
filename = std::move(copied.filename);
}
void Location::copy(const Location& other)
Location Location::copy(const Location& other)
{
this->operator=(other);
Location result(other.line, other.column, other.filename);
result.parent = other.parent;
return std::move(result);
}

View File

@ -6,20 +6,18 @@ struct Location
{
int line;
int column;
std::string fname;
std::string filename;
/* The location at which this location was imported, for error traces in imported files. */
std::shared_ptr<Location> parent = nullptr;
/* Creates a Location with the given parameters. */
Location(int ln, int col, std::string file);
Location(int line, int column, std::string filename);
~Location();
/* Returns a string of the format FILE:LINE:COL. */
std::string str() const;
/* Returns a string of the format (FILE:LINE:COL). */
std::string paren_str() const;
/* Advance to the next column in the file. */
void advance();
@ -29,6 +27,6 @@ struct Location
void operator=(const Location& other);
/* Copies the other location into this one. */
void copy(const Location& other);
/* Returns a copy of the original Location. */
static Location copy(const Location& other);
};

View File

@ -1,82 +0,0 @@
#include "Normalizer.h"
#include "utils.h"
TokenStream Normalizer::normalize(const TokenStream& input)
{
TokenStream result;
int i = 0;
while (i < input.size())
{
Token current = input[i];
if (current.tk_type == TT_Null)
{
i++;
continue;
}
if (current.tk_type == TT_Equal)
{
if (i + 1 != input.size())
{
if (input[i + 1].tk_type == TT_Equal)
{
i += 2;
result.push_back(current.copy_with_new_type(TT_Equals));
continue;
}
}
i++;
result.push_back(current);
continue;
}
if (current.tk_type == TT_Exclamation)
{
if (i + 1 != input.size())
{
if (input[i + 1].tk_type == TT_Equal)
{
i += 2;
result.push_back(current.copy_with_new_type(TT_NEqual));
continue;
}
}
i++;
result.push_back(current);
continue;
}
if (current.tk_type == TT_GreaterThan)
{
if (i + 1 != input.size())
{
if (input[i + 1].tk_type == TT_Equal)
{
i += 2;
result.push_back(current.copy_with_new_type(TT_GTE));
continue;
}
}
i++;
result.push_back(current);
continue;
}
if (current.tk_type == TT_LessThan)
{
if (i + 1 != input.size())
{
if (input[i + 1].tk_type == TT_Equal)
{
i += 2;
result.push_back(current.copy_with_new_type(TT_LTE));
continue;
}
}
i++;
result.push_back(current);
continue;
}
i++;
result.push_back(current);
continue;
}
return result;
}

View File

@ -1,14 +0,0 @@
#pragma once
#include "Lexer.h" // for TokenStream
#include "Token.h"
/* Namespace to normalize a TokenStream. */
namespace Normalizer
{
/* Some tokens are difficult for the Lexer to parse right, or maybe I'm just lazy.
Anyways, this function transforms > and = tokens next to each other into a single >=, which has a different meaning,
etc... For example: = + = : ==, < + = : <=...
It also takes blank tokens and removes them. */
TokenStream normalize(const TokenStream& input);
} // namespace Normalizer

View File

@ -25,7 +25,7 @@ std::shared_ptr<ProgramNode> Parser::parse()
auto result = toplevel();
if (result.is_error()) result.ethrow();
final_result->append(result.get());
if (current_token->tk_type == TT_EOF) break;
if (current_token->type == TT_EOF) break;
}
return final_result;
}
@ -44,16 +44,18 @@ Result<ExprNode> Parser::factor()
{
Token& token = *current_token;
if (token.tk_type == TT_Number)
if (token.type == TT_Number)
{
advance();
return Ok<ExprNode>(new IntegerNode(token.int_value), &token);
assert(token.int_value.has_value());
return Ok<ExprNode>(new IntegerNode(token.int_value.value()), &token);
}
if (token.tk_type == TT_Float)
if (token.type == TT_Float)
{
advance();
return Ok<ExprNode>(new FloatNode(token.float_value), &token);
assert(token.float_value.has_value());
return Ok<ExprNode>(new FloatNode(token.float_value.value()), &token);
}
return Err<ExprNode>("expected a number", &token);
@ -63,13 +65,13 @@ Result<ExprNode> Parser::term()
{
Result<ExprNode> left = factor();
if (left.is_error()) return left;
while (current_token->tk_type == TT_Mul || current_token->tk_type == TT_Div)
while (current_token->type == TT_Mul || current_token->type == TT_Div)
{
Token& op = *current_token;
advance();
Result<ExprNode> right = factor();
if (right.is_error()) return right;
left = Ok<ExprNode>(new MulNode(left.get(), right.get(), op.tk_type == TT_Mul ? '*' : '/'), &op);
left = Ok<ExprNode>(new MulNode(left.get(), right.get(), op.type == TT_Mul ? '*' : '/'), &op);
}
return left;
}
@ -78,13 +80,13 @@ Result<ExprNode> Parser::expr()
{
Result<ExprNode> left = term();
if (left.is_error()) return left;
while (current_token->tk_type == TT_Plus || current_token->tk_type == TT_Minus)
while (current_token->type == TT_Plus || current_token->type == TT_Minus)
{
Token& op = *current_token;
advance();
Result<ExprNode> right = term();
if (right.is_error()) return right;
left = Ok<ExprNode>(new SumNode(left.get(), right.get(), op.tk_type == TT_Plus ? '+' : '-'), &op);
left = Ok<ExprNode>(new SumNode(left.get(), right.get(), op.type == TT_Plus ? '+' : '-'), &op);
}
return left;
}
@ -98,35 +100,35 @@ Result<TopLevelNode> Parser::toplevel()
Result<TopLevelNode> Parser::function()
{
FunctionPrototype proto;
Token* ftoken = current_token;
Token* start_token = current_token;
proto.returnType = llvm::IntegerType::getInt32Ty(*globalContext); // FIXME: allow specifying return type
proto.arguments = {}; // FIXME: allow specifying arguments
if (current_token->tk_type != TT_Let)
return Err<TopLevelNode>("Expected let at the beginning of a function", current_token);
if (current_token->type != TT_Let) return Err<TopLevelNode>("Expected let", current_token);
advance();
if (current_token->tk_type != TT_At)
return Err<TopLevelNode>("Expected @ at the beginning of a function", current_token);
if (current_token->type != TT_At) return Err<TopLevelNode>("Expected @", current_token);
advance();
if (current_token->tk_type != TT_Identifier) return Err<TopLevelNode>("Expected an identifier", current_token);
if (current_token->type != TT_Identifier) return Err<TopLevelNode>("Expected an identifier", current_token);
else
proto.name = current_token->string_value;
advance();
if (current_token->tk_type != TT_In && current_token->tk_type != TT_Semicolon)
return Err<TopLevelNode>("Expected 'in' or semicolon", current_token);
if (current_token->tk_type == TT_Semicolon)
{
advance();
return Ok<TopLevelNode>(new EmptyFunctionNode(proto), ftoken);
assert(current_token->string_value.has_value());
proto.name = current_token->string_value.value();
}
advance();
if (current_token->tk_type != TT_LBracket)
if (current_token->type != TT_In && current_token->type != TT_Semicolon)
return Err<TopLevelNode>("Expected 'in' or semicolon", current_token);
if (current_token->type == TT_Semicolon)
{
advance();
return Ok<TopLevelNode>(new EmptyFunctionNode(proto), start_token);
}
advance();
if (current_token->type != TT_LBracket)
return Err<TopLevelNode>("Invalid syntax",
current_token); // FIXME: Do not be lazy and return a meaningful error message.
advance();
Result<ExprNode> body = expr();
if (body.is_error()) return Err<TopLevelNode>(body.error(), body.token());
if (current_token->tk_type != TT_RBracket)
return Err<TopLevelNode>(format_string("Invalid syntax %d", current_token->tk_type), current_token);
if (current_token->type != TT_RBracket) return Err<TopLevelNode>("Invalid syntax", current_token);
advance();
return Ok<TopLevelNode>(new FunctionNode(proto, body.get()), ftoken);
return Ok<TopLevelNode>(new FunctionNode(proto, body.get()), start_token);
}

View File

@ -14,7 +14,7 @@ template<typename T> class Result
}
void ethrow()
{
Error::throw_error(m_token->loc, m_token->line(), m_error);
Error::throw_error(m_token->location, m_token->line(), m_error);
}
Token* token()
{
@ -38,7 +38,7 @@ template<typename T> class Result
Result(T* result, Token* token) : m_result(result), m_token(token), m_is_error(false)
{
}
Result(const std::string& error, Token* token) : m_error(std::move(error)), m_token(token), m_is_error(true)
Result(std::string&& error, Token* token) : m_error(error), m_token(token), m_is_error(true)
{
}
};
@ -54,7 +54,7 @@ template<typename T> class Ok final : public Result<T>
template<typename T> class Err final : public Result<T>
{
public:
Err(const std::string& error, Token* token) : Result<T>(std::move(error), token)
Err(std::string error, Token* token) : Result<T>(std::move(error), token)
{
}
};

View File

@ -1,205 +1,61 @@
#include "Token.h"
#include "utils.h"
const std::string token_strings[] = {
"TT_IDENTIFIER", "TT_NUMBER", "TT_FLOAT", "TT_KEYWORD", "TT_STRING", "TT_PLUS",
"TT_MINUS", "TT_MUL", "TT_DIV", "TT_AT", "TT_EQUAL", "TT_LESSTHAN",
"TT_GREATERTHAN", "TT_LPAREN", "TT_RPAREN", "TT_LBRACKET", "TT_RBRACKET", "TT_SEMICOLON",
"TT_LOADEDSTRING", "TT_EOF", "TT_NULL", "TT_EQUALS", "TT_GTE", "TT_LTE",
"TT_PERIOD", "TT_COMMA", "TT_PATH", "TT_EXCLAMATION", "TT_NEQUAL", "TT_LSQB",
"TT_RSQB", "TT_TYPE", "TT_IMPORT", "TT_SYSCALL0", "TT_SYSCALL1", "TT_SYSCALL2",
"TT_SYSCALL3", "TT_SYSCALL4", "TT_SYSCALL5", "TT_COMPILERMACRO"};
Token::Token(const TokenType& type) : tk_type(type), loc(0, 0, "")
Token::Token(TokenType type) : type(type), location(0, 0, "")
{
}
Token::Token(const TokenType& type, const Location& location) : tk_type(type), loc(location)
Token::Token(TokenType type, const Location& location) : type(type), location(location)
{
}
Token::Token(const TokenType& type, const std::string& val) : tk_type(type), loc(0, 0, ""), string_value(val)
Token::Token(TokenType type, std::string value) : type(type), location(0, 0, ""), string_value(std::move(value))
{
}
Token::Token(const TokenType& type, const int& val, const Location& location)
: tk_type(type), int_value(val), loc(location)
Token::Token(TokenType type, int value, const Location& location) : type(type), int_value(value), location(location)
{
}
Token::Token(const TokenType& type, const std::string& val, const Location& location)
: tk_type(type), string_value(val), loc(location)
Token::Token(TokenType type, std::string value, const Location& location)
: type(type), string_value(std::move(value)), location(location)
{
}
Token::Token(const TokenType& type, const float& val, const Location& location)
: tk_type(type), float_value(val), loc(location)
Token::Token(TokenType type, float value, const Location& location) : type(type), float_value(value), location(location)
{
}
Token::~Token()
Token Token::copy_with_new_type(const TokenType& type) const
{
}
Token result(type, location);
Token Token::copy_with_new_type(const TokenType& type)
{
Token result(type, loc);
result.m_line_text = m_line_text;
result.int_value = int_value;
result.float_value = float_value;
result.string_value = string_value;
result.line_text = line_text;
return result;
}
std::string Token::to_string() const
{
std::string details = loc.paren_str();
if (tk_type == TT_Number)
if (int_value.has_value())
{
return format_string("INT:%d %s", int_value, details);
result.int_value = int_value.value();
}
else if (tk_type == TT_Float)
else if (float_value.has_value())
{
return format_string("FLOAT:%f %s", float_value, details);
result.float_value = float_value.value();
}
else if (tk_type == TT_Identifier)
else if (string_value.has_value())
{
return format_string("ID:%s %s", string_value, details);
result.string_value = string_value.value();
}
else if (tk_type == TT_Keyword)
{
return format_string("KEYWORD:%s %s", string_value, details);
}
else if (tk_type == TT_Type)
{
return format_string("TYPE:%s %s", string_value, details);
}
else if (tk_type == TT_String)
{
replace(const_cast<std::string&>(string_value), "\n", "\\n");
return format_string("STRING:'%s' %s", string_value, details);
}
switch (tk_type)
{
case TT_EOF:
return "EOF " + details;
case TT_Plus:
return "PLUS " + details;
case TT_Minus:
return "MINUS " + details;
case TT_Mul:
return "MUL " + details;
case TT_Div:
return "DIV " + details;
case TT_At:
return "AT " + details;
case TT_Equal:
return "EQUAL " + details;
case TT_LessThan:
return "LESSTHAN " + details;
case TT_GreaterThan:
return "GREATERTHAN " + details;
case TT_LParen:
return "LPAREN " + details;
case TT_RParen:
return "RPAREN " + details;
case TT_LBracket:
return "LBRACKET " + details;
case TT_RBracket:
return "RBRACKET " + details;
case TT_Semicolon:
return "SEMICOLON " + details;
case TT_LoadedString:
return "LDSTRING " + details;
case TT_Equals:
return "EQUALS " + details;
case TT_GTE:
return "GTE " + details;
case TT_LTE:
return "LTE " + details;
case TT_Period:
return "PERIOD " + details;
case TT_Comma:
return "COMMA " + details;
case TT_Path:
return "PATH " + details;
case TT_Exclamation:
return "EXCLAMATION " + details;
case TT_NEqual:
return "NEQUAL " + details;
case TT_LSQB:
return "LEFTSQB " + details;
case TT_RSQB:
return "RIGHTSQB " + details;
case TT_Import:
return "IMPORT " + details;
case TT_Syscall0:
return "SYSCALL0 " + details;
case TT_Syscall1:
return "SYSCALL1 " + details;
case TT_Syscall2:
return "SYSCALL2 " + details;
case TT_Syscall3:
return "SYSCALL3 " + details;
case TT_Syscall4:
return "SYSCALL4 " + details;
case TT_Syscall5:
return "SYSCALL5 " + details;
case TT_CompilerMacro:
return "COMPMACRO " + details;
case TT_Let:
return "LET " + details;
case TT_In:
return "IN " + details;
}
return "";
}
std::string Token::line() const
{
return this->line_text;
return std::move(result);
}
Token Token::make_with_line(const Token& origin, const std::string& line_text)
{
Token result(origin.tk_type, origin.loc);
Token result = origin.copy_with_new_type(origin.type);
result.int_value = origin.int_value;
result.float_value = origin.float_value;
result.string_value = origin.string_value;
result.line_text = line_text;
return result;
return std::move(result);
}
void Token::operator=(const Token& other)
{
tk_type = other.tk_type;
int_value = other.int_value;
string_value = other.string_value;
float_value = other.float_value;
line_text = other.line_text;
}
void Token::erase(Token& tk)
{
tk.tk_type = TT_Null;
}
bool Token::match_token_types(const std::vector<Token>& a, const std::vector<Token>& b, int count)
{
int size = [](int a, int b) { return a > b ? b : a; }(a.size() - count, b.size());
for (int i = 0; i < size; ++i)
{
if (a[i + count].tk_type != b[i].tk_type)
{
return false;
}
}
return true;
*this = other.copy_with_new_type(other.type);
}

View File

@ -25,62 +25,56 @@ enum TokenType
TT_Semicolon,
TT_LoadedString,
TT_EOF,
TT_Null,
TT_Equals,
TT_GTE,
TT_LTE,
TT_Period,
TT_Comma,
TT_Path,
TT_Exclamation,
TT_NEqual,
TT_LSQB,
TT_RSQB,
TT_Type,
TT_Import,
TT_Syscall0,
TT_Syscall1,
TT_Syscall2,
TT_Syscall3,
TT_Syscall4,
TT_Syscall5,
TT_CompilerMacro,
TT_Let,
TT_In
TT_In,
TT_Colon,
};
extern const std::string token_strings[];
/* Struct to represent tokens generated by the Lexer. */
struct Token
{
TokenType tk_type;
TokenType type;
int int_value;
std::string string_value;
float float_value;
std::optional<int> int_value;
std::optional<std::string> string_value;
std::optional<float> float_value;
Location loc;
Location location;
Token(const TokenType& type);
Token(TokenType type);
Token(const TokenType& type, const Location& location);
Token(TokenType type, const Location& location);
Token(const TokenType& type, const int& val, const Location& location);
Token(TokenType type, int value, const Location& location);
Token(const TokenType& type, const std::string& val, const Location& location);
Token(TokenType type, std::string value, const Location& location);
Token(const TokenType& type, const std::string& val);
Token(TokenType type, std::string value);
Token(const TokenType& type, const float& val, const Location& location);
Token(TokenType type, float val, const Location& location);
~Token();
/* Return a string representation of the Token's contents. */
std::string to_string() const;
~Token() = default;
/* Return the contents of the line where the Token was located. */
std::string line() const;
std::string line() const
{
return m_line_text;
}
/* Return a copy of the original token, but adding the contents of the line where
the token was located. */
@ -88,19 +82,13 @@ struct Token
void operator=(const Token& other);
/* Convert the Token into a blank token (does not delete it), so that the Normalizer can remove it afterwards.
This is to not alter vectors while iterating over them. */
static void erase(Token& tk);
/* Return a copy of this Token, but with its TokenType changed. */
Token copy_with_new_type(const TokenType& type);
/* Iterate over two vectors of Tokens, starting from count for vector A, starting from 0 for vector B, checking if
the current Tokens' types match. If at any point they don't, return false. Else, return true. */
static bool match_token_types(const std::vector<Token>& a, const std::vector<Token>& b, int count);
Token copy_with_new_type(const TokenType& type) const;
private:
std::string line_text;
// FIXME: this should be moved to Location, to remove all Token* that are only used to throw errors at a certain
// location.
std::string m_line_text;
};
/* typedef to make it easier to see a what a std::vector of tokens is being used for. */

View File

@ -2,9 +2,7 @@
#include "FileIO.h"
#include "GlobalContext.h"
#include "IRBuilder.h"
#include "Importer.h"
#include "Lexer.h"
#include "Normalizer.h"
#include "Parser.h"
#include "sapphirepch.h"
#include "utils.h"
@ -24,16 +22,6 @@ int main(int argc, char** argv)
result = lexer->lex(contents);
}
Importer::init(FileIO::remove_file_extension(fname));
{
benchmark("Importing");
result = Importer::evaluate(result);
}
{
benchmark("Normalizing");
result = Normalizer::normalize(result);
}
initGlobalContext();
auto parser = Parser::new_parser(result);

View File

@ -3,6 +3,7 @@
#include <functional>
#include <iostream>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <vector>

View File

@ -4,6 +4,6 @@
"flags": [],
"exit-code": 1,
"stdout": "",
"stderr": "\u001b[1;1mtests/import-inexistent.sp:1:8: \u001b[31;49merror: \u001b[0;0mfile not found\n1 import penguin_boi;\n \u001b[31;49m^\u001b[0;0m\n"
"stderr": "\u001b[1;1mtests/import-inexistent.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n"
}
}

View File

@ -4,6 +4,6 @@
"flags": [],
"exit-code": 1,
"stdout": "",
"stderr": "\u001b[1;1mtests/simple.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let at the beginning of a function\n1 const { outln } from @'core/io';\n \u001b[31;49m^\u001b[0;0m\n"
"stderr": "\u001b[1;1mtests/simple.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n"
}
}

View File

@ -4,13 +4,8 @@
"flags": [
"--wimport"
],
"exit-code": 0,
"stdout": "\u001b[1;1mtests/wimport.sp:1:8: \u001b[33;49mwarning: \u001b[0;0mfile already imported, skipping\n1 import tests/wimport;\n \u001b[33;49m^\u001b[0;0m\n",
"stderr": ""
},
"run": {
"exit-code": 0,
"exit-code": 1,
"stdout": "",
"stderr": ""
"stderr": "\u001b[1;1mtests/wimport.sp:1:1: \u001b[31;49merror: \u001b[0;0mExpected let\n1 \n \u001b[31;49m^\u001b[0;0m\n"
}
}