867 lines
28 KiB
C++
867 lines
28 KiB
C++
/* Original implementation of the Sapphire compiler, in one file.
|
|
WARNING: THIS IS LEGACY. This was the original thing I wrote to make a simple compiler, but it was clear it wasn't gonna cut it.
|
|
The code was difficult to read with everything scattered across the file, and I had to throw random forward declarations everywhere.
|
|
Also, it was limited, since my intention was to make it output assembly code manually. As you can see, the definitions are for x86-64 Linux assembly.
|
|
Now I'm using LLVM, which will make it a lot easier to do stuff.
|
|
Eventually, I found that splitting the code into multiple files, using classes and such, would be much better and easier.
|
|
So I began a rewrite of this compiler in the src/ directory, which I found was way easier to write, and was more productive and more.
|
|
The src/ compiler has come a long way since then, and this code is just kept around for me to be able to see it in the future and facepalm at it.
|
|
I'm aware of Git, I'm using it, but I didn't think keeping this would make any harm. it's very clearly separated from the rest of the code. */
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <vector>
|
|
#include <regex>
|
|
#include <array>
|
|
#include <sstream>
|
|
#include <fstream>
|
|
|
|
#define IMPORT_MAX_DEEP 100
|
|
|
|
#define STRLEN "strlen: ; -- length of null-terminated string in rdi --\n xor rax, rax\n mov rcx, -1\n cld\n repne scasb\n mov rax, rcx\n add rax, 2\n neg rax\n ret\n"
|
|
#define PRINT "print: ; -- print null-terminated string in rdi --\n mov rsi, rdi\n call strlen\n mov rdx, rax\n mov rdi, 1\n mov rax, 1\n syscall\n ret\n"
|
|
|
|
#define __FASM
|
|
|
|
#ifdef __FASM
|
|
#define ASM_HEADER "format ELF64 executable 3\n"
|
|
#define ASM_TEXT_SECTION "segment readable executable\n"
|
|
#define ASM_DATA_SECTION "segment readable writable\n"
|
|
#define ASM_BSS_SECTION ""
|
|
#define ASM_ENTRY "entry start\n"
|
|
#define ASM_ENTRY_DECL "start:\n"
|
|
#elif defined(__NASM)
|
|
#define ASM_HEADER "BITS 64\n"
|
|
#define ASM_TEXT_SECTION "section .text\n"
|
|
#define ASM_DATA_SECTION "section .data\n"
|
|
#define ASM_BSS_SECTION "section .bss\n"
|
|
#define ASM_ENTRY "global _start\n"
|
|
#define ASM_ENTRY_DECL "_start:\n"
|
|
#else
|
|
#endif
|
|
|
|
std::regex letter("[a-zA-Z_]");
|
|
std::regex identifier("[a-zA-Z0-9_]");
|
|
std::regex number("[0-9]");
|
|
std::regex whitespace("[\t \n]");
|
|
int g_importCount = 0;
|
|
|
|
std::string g_current_line;
|
|
|
|
std::vector<std::string> imported_files;
|
|
|
|
enum TokenType
|
|
{
|
|
TT_Identifier,
|
|
TT_Number,
|
|
TT_Float,
|
|
TT_Keyword,
|
|
TT_String,
|
|
TT_Plus,
|
|
TT_Minus,
|
|
TT_Mul,
|
|
TT_Div,
|
|
TT_At,
|
|
TT_Equal,
|
|
TT_LessThan,
|
|
TT_GreaterThan,
|
|
TT_LParen,
|
|
TT_RParen,
|
|
TT_LBracket,
|
|
TT_RBracket,
|
|
TT_Semicolon,
|
|
TT_LoadedString,
|
|
TT_EOF,
|
|
TT_Null
|
|
};
|
|
|
|
static std::string token_strings[] = {
|
|
"TT_IDENTIFIER",
|
|
"TT_NUMBER",
|
|
"TT_FLOAT",
|
|
"TT_KEYWORD",
|
|
"TT_STRING",
|
|
"TT_PLUS",
|
|
"TT_MINUS",
|
|
"TT_MUL",
|
|
"TT_DIV",
|
|
"TT_AT",
|
|
"TT_EQUAL",
|
|
"TT_LESSTHAN",
|
|
"TT_GREATERTHAN",
|
|
"TT_LPAREN",
|
|
"TT_RPAREN",
|
|
"TT_LBRACKET",
|
|
"TT_RBRACKET",
|
|
"TT_SEMICOLON",
|
|
"TT_LOADEDSTRING",
|
|
"TT_EOF",
|
|
"TT_NULL"
|
|
};
|
|
|
|
struct Token
|
|
{
|
|
TokenType tk_type;
|
|
int int_value;
|
|
std::string string_value;
|
|
float float_value;
|
|
int line;
|
|
int column;
|
|
std::string fname;
|
|
std::string line_ctx;
|
|
Token(const TokenType& type, const int& lineno, const int& colno, const std::string& name)
|
|
: tk_type(type), line(lineno), column(colno), fname(name)
|
|
{
|
|
line_ctx = g_current_line;
|
|
}
|
|
|
|
Token(const TokenType& type, const int& val, const int& lineno, const int& colno, const std::string& name)
|
|
: tk_type(type), int_value(val), line(lineno), column(colno), fname(name)
|
|
{
|
|
line_ctx = g_current_line;
|
|
}
|
|
|
|
Token(const TokenType& type, const std::string& val, const int& lineno, const int& colno, const std::string& name)
|
|
: tk_type(type), string_value(val), line(lineno), column(colno), fname(name)
|
|
{
|
|
line_ctx = g_current_line;
|
|
}
|
|
|
|
Token(const TokenType& type, const float& val, const int& lineno, const int& colno, const std::string& name)
|
|
: tk_type(type), float_value(val), line(lineno), column(colno), fname(name)
|
|
{
|
|
line_ctx = g_current_line;
|
|
}
|
|
|
|
std::string to_string() const
|
|
{
|
|
char linestr[32];
|
|
sprintf(linestr,"%d",line);
|
|
char colstr[32];
|
|
sprintf(colstr,"%d",column);
|
|
if(tk_type == TT_Number)
|
|
{
|
|
char num[32];
|
|
sprintf(num,"%d",int_value);
|
|
return "INT:" + std::string(num) + " (" + fname + ":" + std::string(linestr) + ":" + std::string(colstr) + ")";
|
|
} else if (tk_type == TT_Float)
|
|
{
|
|
char num[64];
|
|
sprintf(num,"%f",float_value);
|
|
return "FLOAT:" + std::string(num) + " (" + fname + ":" + std::string(linestr) + ":" + std::string(colstr) + ")";
|
|
}
|
|
else if (tk_type == TT_Identifier){
|
|
return "ID:" + string_value + " (" + fname + ":" + std::string(linestr) + ":" + std::string(colstr) + ")";
|
|
} else if (tk_type == TT_Keyword){
|
|
return "KEYWORD:" + string_value + " (" + fname + ":" + std::string(linestr) + ":" + std::string(colstr) + ")";
|
|
} else if (tk_type == TT_String)
|
|
{
|
|
return "STRING:" + std::string("\'") + string_value + std::string("\'") + " (" + fname + ":" + std::string(linestr) + ":" + std::string(colstr) + ")";
|
|
}
|
|
std::string details = std::string(" (") + fname + ":" + std::string(linestr) + ":" + std::string(colstr) + ")";
|
|
switch(tk_type)
|
|
{
|
|
case TT_EOF:
|
|
return "EOF" + details;
|
|
case TT_Plus:
|
|
return "PLUS" + details;
|
|
case TT_Minus:
|
|
return "MINUS" + details;
|
|
case TT_Mul:
|
|
return "MUL" + details;
|
|
case TT_Div:
|
|
return "DIV" + details;
|
|
case TT_At:
|
|
return "AT" + details;
|
|
case TT_Equal:
|
|
return "EQUAL" + details;
|
|
case TT_LessThan:
|
|
return "LESSTHAN" + details;
|
|
case TT_GreaterThan:
|
|
return "GREATERTHAN" + details;
|
|
case TT_LParen:
|
|
return "LPAREN" + details;
|
|
case TT_RParen:
|
|
return "RPAREN" + details;
|
|
case TT_LBracket:
|
|
return "LBRACKET" + details;
|
|
case TT_RBracket:
|
|
return "RBRACKET" + details;
|
|
case TT_Semicolon:
|
|
return "SEMICOLON" + details;
|
|
case TT_LoadedString:
|
|
return "LDSTRING" + details;
|
|
}
|
|
return "";
|
|
}
|
|
};
|
|
|
|
std::string tokentype_as_string(const TokenType& type)
|
|
{
|
|
return token_strings[type];
|
|
}
|
|
|
|
struct Sentence
|
|
{
|
|
std::string type_name;
|
|
};
|
|
|
|
struct Function : public Sentence
|
|
{
|
|
std::string type_name = "function";
|
|
std::vector<Token> fun_tokens;
|
|
Function(std::vector<Token> tokens) : fun_tokens(tokens){}
|
|
};
|
|
|
|
struct DeclVar : public Sentence
|
|
{
|
|
std::string type_name = "decl";
|
|
std::vector<Token> vtokens;
|
|
DeclVar(std::vector<Token> tokens) : vtokens(tokens){}
|
|
};
|
|
|
|
struct Variable
|
|
{
|
|
int size;
|
|
std::string identifier;
|
|
|
|
Variable(int _size, std::string _identifier) : size(_size), identifier(_identifier){}
|
|
};
|
|
|
|
struct StringLiteral
|
|
{
|
|
std::string data;
|
|
std::string identifier;
|
|
|
|
StringLiteral(std::string _data, std::string _identifier) : data(_data), identifier(_identifier){}
|
|
};
|
|
|
|
std::array<std::string, 3> keywords{"out", "var","import"};
|
|
|
|
std::vector<Variable> registered_vars;
|
|
std::vector<StringLiteral> registered_strings;
|
|
|
|
std::string make_asm_string(StringLiteral str);
|
|
std::vector<Token> lex_tokens(const std::string&, const std::string&);
|
|
std::vector<Token> evaluate_imports(const std::string& text, const std::vector<Token>& tokens);
|
|
std::string read_file(const std::string&);
|
|
void compiler_error(const std::string& text, const int& line, const int& column, const std::string& fname, const std::string& details);
|
|
void compiler_warning(const std::string& text, const int& line, const int& column, const std::string& fname, const std::string& details);
|
|
std::vector<Sentence> parse_tokens(const std::vector<Token>& tokens);
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
std::string fname;
|
|
if(argc < 2)
|
|
{
|
|
fname = "test.sp";
|
|
}
|
|
else fname = (const char*)argv[1];
|
|
std::string command = read_file(fname);
|
|
std::vector<Token> main_tokens = lex_tokens(command,fname);
|
|
imported_files.push_back(fname.substr(0,fname.find_last_of('.')));
|
|
main_tokens = evaluate_imports(command,main_tokens);
|
|
parse_tokens(main_tokens);
|
|
std::string assembly;
|
|
assembly += ASM_HEADER;
|
|
assembly += "; Assembly generated by the Sapphire compiler.\n";
|
|
assembly += ASM_TEXT_SECTION;
|
|
assembly += ASM_ENTRY;
|
|
assembly += STRLEN;
|
|
assembly += PRINT;
|
|
assembly += ASM_ENTRY_DECL;
|
|
assembly += "; -- exit with code 0 --\n";
|
|
assembly += " mov rax, 60\n";
|
|
assembly += " xor rdi, rdi\n";
|
|
assembly += " syscall\n";
|
|
if(registered_strings.size() != 0)
|
|
{
|
|
assembly += "\n";
|
|
assembly += ASM_DATA_SECTION;
|
|
for(auto asm_string : registered_strings)
|
|
{
|
|
assembly += make_asm_string(asm_string);
|
|
}
|
|
}
|
|
std::string outfile_basename = fname.substr(0,fname.find_last_of('.'));
|
|
std::ofstream outfile(outfile_basename + ".asm");
|
|
outfile << assembly;
|
|
outfile.close();
|
|
#ifdef __NASM
|
|
system(std::string("nasm -f elf64 " + outfile_basename + ".asm -o" + outfile_basename + ".o").c_str());
|
|
system(std::string("ld " + outfile_basename + ".o -o" + outfile_basename).c_str());
|
|
#else
|
|
system(std::string("fasm " + outfile_basename + ".asm").c_str());
|
|
system((std::string("chmod +x ") + outfile_basename).c_str());
|
|
#endif
|
|
std::cout << fname + " > " + std::string(outfile_basename) + "\n";
|
|
}
|
|
|
|
Token construct_identifier(const std::string&, int& , int&, int&, const std::string&);
|
|
Token construct_number(const std::string&, int&, int&, int&, const std::string&);
|
|
Token construct_string(const std::string&, int&, int&, int&, const std::string&);
|
|
|
|
|
|
std::string read_file(const std::string& fname)
|
|
{
|
|
std::ifstream main_file;
|
|
main_file.open(fname);
|
|
if(!main_file.is_open()) return "";
|
|
std::vector<char> file_chars;
|
|
char fchar;
|
|
while ( main_file ) {
|
|
fchar = main_file.get();
|
|
if(fchar != -1 ) file_chars.push_back(fchar);
|
|
}
|
|
main_file.close();
|
|
return std::string(file_chars.begin(),file_chars.end());
|
|
}
|
|
|
|
std::string recalculate_current_line(const std::string& text, int index)
|
|
{
|
|
std::string final_str;
|
|
++index;
|
|
while(index != text.size() && text[index] != '\n')
|
|
{
|
|
final_str += text[index];
|
|
++index;
|
|
}
|
|
return final_str;
|
|
}
|
|
|
|
std::string rewind_current_line(const std::string& text, int index)
|
|
{
|
|
--index;
|
|
while(text[index] != '\n')
|
|
{
|
|
--index;
|
|
if(index == 0)
|
|
{
|
|
return recalculate_current_line(text,-1);
|
|
}
|
|
}
|
|
return recalculate_current_line(text,index);
|
|
}
|
|
|
|
std::vector<Token> lex_tokens(const std::string& text, const std::string& fname)
|
|
{
|
|
int line = 1;
|
|
int column = 0;
|
|
int index = -1;
|
|
std::vector<Token> result;
|
|
bool comment = false;
|
|
g_current_line = recalculate_current_line(text,-1);
|
|
|
|
while(index < (int)text.size())
|
|
{
|
|
++index;
|
|
++column;
|
|
|
|
if(text[index] == '\n')
|
|
{
|
|
++line;
|
|
column = 0;
|
|
g_current_line = recalculate_current_line(text,index);
|
|
comment = false;
|
|
}
|
|
if(comment) continue;
|
|
char cstyle_char[2]{text[index],'\0'};
|
|
const char* character = (const char*)cstyle_char;
|
|
if(std::regex_match(character,whitespace))
|
|
{
|
|
continue;
|
|
}
|
|
else if(std::regex_match(character,letter))
|
|
{
|
|
result.push_back(construct_identifier(text,index,line,column,fname));
|
|
}
|
|
else if(std::regex_match(character,number))
|
|
{
|
|
result.push_back(construct_number(text,index,line,column,fname));
|
|
} else if (text[index] == '\'')
|
|
{
|
|
result.push_back(construct_string(text,index,line,column,fname));
|
|
} else if(index == text.size())
|
|
{
|
|
result.push_back(Token(TT_EOF,line,column,fname));
|
|
} else switch(text[index])
|
|
{
|
|
case '+':
|
|
result.push_back(Token(TT_Plus,line,column,fname));
|
|
break;
|
|
case '-':
|
|
result.push_back(Token(TT_Minus,line,column,fname));
|
|
break;
|
|
case '*':
|
|
result.push_back(Token(TT_Mul,line,column,fname));
|
|
break;
|
|
case '/':
|
|
if(index != text.size())
|
|
{
|
|
if(text[index+1] == '/')
|
|
{
|
|
comment = true;
|
|
break;
|
|
}
|
|
}
|
|
result.push_back(Token(TT_Div,line,column,fname));
|
|
break;
|
|
case '@':
|
|
result.push_back(Token(TT_At,line,column,fname));
|
|
break;
|
|
case '=':
|
|
result.push_back(Token(TT_Equal,line,column,fname));
|
|
break;
|
|
case '<':
|
|
result.push_back(Token(TT_LessThan,line,column,fname));
|
|
break;
|
|
case '>':
|
|
result.push_back(Token(TT_GreaterThan,line,column,fname));
|
|
break;
|
|
case '(':
|
|
result.push_back(Token(TT_LParen,line,column,fname));
|
|
break;
|
|
case ')':
|
|
result.push_back(Token(TT_RParen,line,column,fname));
|
|
break;
|
|
case '{':
|
|
result.push_back(Token(TT_LBracket,line,column,fname));
|
|
break;
|
|
case '}':
|
|
result.push_back(Token(TT_RBracket,line,column,fname));
|
|
break;
|
|
case ';':
|
|
result.push_back(Token(TT_Semicolon,line,column,fname));
|
|
break;
|
|
default:
|
|
compiler_error(g_current_line,line,column,fname,"unknown character");
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
std::string get_line(const std::string&, int);
|
|
|
|
Token construct_identifier(const std::string& text, int& index, int& line, int& column, const std::string& fname)
|
|
{
|
|
std::vector<char> id_symbols;
|
|
int prev_column = column * 1;
|
|
int prev_line = line * 1;
|
|
|
|
char cstyle_char[2]{text[index],'\0'};
|
|
const char* character = (const char*)cstyle_char;
|
|
|
|
if(std::regex_match(character,identifier))
|
|
{
|
|
id_symbols.push_back(text[index]);
|
|
} else
|
|
{
|
|
--index;
|
|
--column;
|
|
if(text[index] == '\n')
|
|
{
|
|
--line;
|
|
column = get_line(text,line-1).size() + 1;
|
|
g_current_line = rewind_current_line(text,index);
|
|
}
|
|
std::string identifier(id_symbols.begin(), id_symbols.end());
|
|
std::string* location = std::find(keywords.begin(),keywords.end(),identifier);
|
|
if(location != keywords.end())
|
|
{
|
|
return Token(TT_Keyword,identifier,prev_line,prev_column,fname);
|
|
}
|
|
return Token(TT_Identifier,identifier,prev_line,prev_column,fname);
|
|
}
|
|
|
|
while(index < text.size() || index == -1)
|
|
{
|
|
++index;
|
|
++column;
|
|
if(text[index] == '\n')
|
|
{
|
|
++line;
|
|
g_current_line = recalculate_current_line(text,index);
|
|
column = 0;
|
|
}
|
|
char cstyle_char[2]{text[index],'\0'};
|
|
const char* character = (const char*)cstyle_char;
|
|
|
|
if(std::regex_match(character,identifier))
|
|
{
|
|
id_symbols.push_back(text[index]);
|
|
} else
|
|
{
|
|
--index;
|
|
--column;
|
|
if(text[index] == '\n')
|
|
{
|
|
--line;
|
|
column = get_line(text,line-1).size() + 1;
|
|
g_current_line = rewind_current_line(text,index);
|
|
}
|
|
std::string identifier(id_symbols.begin(), id_symbols.end());
|
|
std::string* location = std::find(keywords.begin(),keywords.end(),identifier);
|
|
if(location != keywords.end())
|
|
{
|
|
return Token(TT_Keyword,identifier,prev_line,prev_column,fname);
|
|
}
|
|
return Token(TT_Identifier,identifier,prev_line,prev_column,fname);
|
|
}
|
|
}
|
|
std::string identifier(id_symbols.begin(), id_symbols.end());
|
|
std::string* location = std::find(keywords.begin(),keywords.end(),identifier);
|
|
if(location != keywords.end())
|
|
{
|
|
return Token(TT_Keyword,identifier,prev_line,prev_column,fname);
|
|
}
|
|
return Token(TT_Identifier,identifier,prev_line,prev_column,fname);
|
|
}
|
|
|
|
Token construct_number(const std::string& text, int& index, int& line, int& column, const std::string& fname)
|
|
{
|
|
std::vector<char> num_symbols;
|
|
int dot_count = 0;
|
|
int prev_column = column * 1;
|
|
int prev_line = line * 1;
|
|
|
|
char cstyle_char[2]{text[index],'\0'};
|
|
const char* character = (const char*)cstyle_char;
|
|
|
|
if(std::regex_match(character,number))
|
|
{
|
|
num_symbols.push_back(text[index]);
|
|
} else if (text[index] == '.')
|
|
{
|
|
if (dot_count == 0)
|
|
{
|
|
num_symbols.push_back(text[index]);
|
|
++dot_count;
|
|
} else
|
|
{
|
|
--index;
|
|
--column;
|
|
if(text[index] == '\n')
|
|
{
|
|
--line;
|
|
g_current_line = rewind_current_line(text,index);
|
|
column = get_line(text,line-1).size() + 1;
|
|
}
|
|
float tk_value = std::stof(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
return Token(TT_Float,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
}else
|
|
{
|
|
--index;
|
|
--column;
|
|
if(text[index] == '\n')
|
|
{
|
|
--line;
|
|
column = get_line(text,line-1).size() + 1;
|
|
g_current_line = rewind_current_line(text,index);
|
|
}
|
|
if(dot_count != 0)
|
|
{
|
|
float tk_value = std::stof(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
return Token(TT_Float,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
int tk_value = atoi(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
return Token(TT_Number,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
|
|
while(index < text.size() || index == -1)
|
|
{
|
|
++index;
|
|
++column;
|
|
if(text[index] == '\n')
|
|
{
|
|
++line;
|
|
column = 0;
|
|
g_current_line = recalculate_current_line(text,index);
|
|
}
|
|
|
|
char cstyle_char[2]{text[index],'\0'};
|
|
const char* character = (const char*)cstyle_char;
|
|
|
|
if(std::regex_match(character,number))
|
|
{
|
|
num_symbols.push_back(text[index]);
|
|
} else if (text[index] == '.')
|
|
{
|
|
if (dot_count == 0)
|
|
{
|
|
num_symbols.push_back(text[index]);
|
|
++dot_count;
|
|
} else
|
|
{
|
|
--index;
|
|
--column;
|
|
if(text[index] == '\n')
|
|
{
|
|
--line;
|
|
g_current_line = rewind_current_line(text,index);
|
|
column = get_line(text,line-1).size() + 1;
|
|
}
|
|
float tk_value = std::stof(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
return Token(TT_Float,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
}else
|
|
{
|
|
--index;
|
|
--column;
|
|
if(text[index] == '\n')
|
|
{
|
|
--line;
|
|
column = get_line(text,line-1).size() + 1;
|
|
g_current_line = rewind_current_line(text,index);
|
|
}
|
|
if(dot_count != 0)
|
|
{
|
|
float tk_value = std::stof(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
return Token(TT_Float,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
int tk_value = atoi(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
return Token(TT_Number,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
}
|
|
--index;
|
|
--column;
|
|
if(text[index] == '\n')
|
|
{
|
|
--line;
|
|
column = get_line(text,line-1).size() + 1;
|
|
g_current_line = rewind_current_line(text,index);
|
|
}
|
|
if(dot_count != 0)
|
|
{
|
|
float tk_value = std::stof(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
return Token(TT_Float,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
int tk_value = atoi(std::string(num_symbols.begin(), num_symbols.end()).c_str());
|
|
|
|
return Token(TT_Number,tk_value,prev_line,prev_column,fname);
|
|
}
|
|
|
|
Token construct_string(const std::string& text, int& index, int& line, int& column, const std::string& fname)
|
|
{
|
|
std::vector<char> string_chars;
|
|
int prev_column = column * 1;
|
|
int prev_line = line * 1;
|
|
|
|
while(index < text.size())
|
|
{
|
|
++index;
|
|
++column;
|
|
|
|
if(text[index] == '\n')
|
|
{
|
|
compiler_error(g_current_line,line,column,fname,"expected \', but got a newline");
|
|
}
|
|
|
|
if(text[index] == '\'')
|
|
{
|
|
return Token(TT_String,std::string(string_chars.begin(),string_chars.end()),prev_line,prev_column,fname);
|
|
}
|
|
|
|
if(text[index] == '\\')
|
|
{
|
|
if(index + 1 == text.size())
|
|
{
|
|
compiler_error(g_current_line,line,column,fname,"unfinished escape sequence");
|
|
}
|
|
switch(text[index+1])
|
|
{
|
|
case 'n':
|
|
string_chars.push_back('\n');
|
|
break;
|
|
case '\'':
|
|
string_chars.push_back('\'');
|
|
break;
|
|
case '\\':
|
|
string_chars.push_back('\\');
|
|
break;
|
|
default:
|
|
compiler_error(g_current_line,line,column,fname,"unknown escape sequence");
|
|
}
|
|
++index;
|
|
++column;
|
|
continue;
|
|
}
|
|
|
|
string_chars.push_back(text[index]);
|
|
}
|
|
compiler_error(text,line,column,fname,"expected \', but got EOF");
|
|
exit(127);
|
|
}
|
|
|
|
std::string get_spaces(int);
|
|
|
|
void compiler_error(const std::string& text, const int& line, const int& column, const std::string& fname, const std::string& details)
|
|
{
|
|
char linestr[32];
|
|
sprintf(linestr,"%d",line);
|
|
char colstr[32];
|
|
sprintf(colstr,"%d",column);
|
|
std::cerr << "\033[1;1m";
|
|
std::cerr << fname;
|
|
std::cerr << ":";
|
|
std::cerr << linestr;
|
|
std::cerr << ":";
|
|
std::cerr << colstr;
|
|
std::cerr << ": ";
|
|
std::cerr << "\033[31;49m";
|
|
std::cerr << "error: ";
|
|
std::cerr << "\033[0;0m";
|
|
std::cerr << details;
|
|
std::cerr << std::endl;
|
|
std::cerr << linestr;
|
|
std::cerr << get_spaces(4);
|
|
std::cerr << text;
|
|
std::cerr << std::endl;
|
|
std::cerr << get_spaces(4 + std::string(linestr).size());
|
|
std::cerr << get_spaces(column - 1);
|
|
std::cerr << "\033[31;49m";
|
|
std::cerr << "^";
|
|
std::cerr << "\033[0;0m";
|
|
std::cerr << std::endl;
|
|
exit(1);
|
|
}
|
|
|
|
void compiler_warning(const std::string& text, const int& line, const int& column, const std::string& fname, const std::string& details)
|
|
{
|
|
char linestr[32];
|
|
sprintf(linestr,"%d",line);
|
|
char colstr[32];
|
|
sprintf(colstr,"%d",column);
|
|
std::cout << "\033[1;1m";
|
|
std::cout << fname;
|
|
std::cout << ":";
|
|
std::cout << linestr;
|
|
std::cout << ":";
|
|
std::cout << colstr;
|
|
std::cout << ": ";
|
|
std::cout << "\033[33;49m";
|
|
std::cout << "warning: ";
|
|
std::cout << "\033[0;0m";
|
|
std::cout << details;
|
|
std::cout << std::endl;
|
|
std::cout << linestr;
|
|
std::cout << get_spaces(4);
|
|
std::cout << text;
|
|
std::cout << std::endl;
|
|
std::cout << get_spaces(4 + std::string(linestr).size());
|
|
std::cout << get_spaces(column - 1);
|
|
std::cout << "\033[33;49m";
|
|
std::cout << "^";
|
|
std::cout << "\033[0;0m";
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
std::string get_line(const std::string& str, int line_no)
|
|
{
|
|
std::string line;
|
|
std::istringstream stream(str);
|
|
while (line_no-- >= 0)
|
|
std::getline(stream, line);
|
|
return line;
|
|
}
|
|
|
|
std::string get_spaces(int spacenum)
|
|
{
|
|
std::string output = "";
|
|
for(int i = 0; i < spacenum; i++)
|
|
{
|
|
output += " ";
|
|
}
|
|
return output;
|
|
}
|
|
|
|
std::string make_asm_string(StringLiteral str)
|
|
{
|
|
std::string result;
|
|
result += str.identifier;
|
|
result += ": ; -- string literal -- \n";
|
|
result += " db ";
|
|
char code_point[4];
|
|
sprintf(code_point,"%d",str.data[0]);
|
|
result += std::string(code_point);
|
|
for(int i = 1; i < str.data.size(); i++)
|
|
{
|
|
char code_point[4];
|
|
sprintf(code_point,"%d",str.data[i]);
|
|
result += (", " + std::string(code_point));
|
|
}
|
|
result += ", 0";
|
|
result += "\n";
|
|
return result;
|
|
}
|
|
|
|
std::vector<Token> evaluate_imports(const std::string& text, const std::vector<Token>& tokens)
|
|
{
|
|
int i = 0;
|
|
std::vector<Token> ret_tk = tokens;
|
|
std::vector<Token> new_tokens;
|
|
while(tokens[i].tk_type != TT_EOF)
|
|
{
|
|
if (g_importCount > IMPORT_MAX_DEEP) {
|
|
compiler_error(tokens[i].line_ctx,tokens[i].line,tokens[i].column,tokens[i].fname,"import tree too deep");
|
|
}
|
|
if(tokens[i].tk_type == TT_Keyword && tokens[i].string_value == keywords[2])
|
|
{
|
|
if(tokens[i+1].tk_type == TT_EOF) {compiler_error(text,tokens[i].line,tokens[i].column,tokens[i].fname,"did not expect EOF after 'import' keyword");}
|
|
if(tokens[i+1].tk_type == TT_Identifier)
|
|
{
|
|
if(std::find(imported_files.begin(), imported_files.end(), tokens[i+1].string_value) != imported_files.end()) {
|
|
compiler_error(tokens[i+2].line_ctx,tokens[i+2].line,tokens[i+2].column,tokens[i+2].fname,"file already imported");
|
|
}
|
|
if(tokens[i+2].tk_type != TT_Semicolon) {compiler_error(text,tokens[i+2].line,tokens[i+2].column,tokens[i+2].fname,"expected a semicolon after import statement");}
|
|
std::ifstream ifile(tokens[i+1].string_value + ".sp");
|
|
|
|
if (!ifile.good()) {
|
|
compiler_error(tokens[i+1].line_ctx,tokens[i+1].line,tokens[i+1].column,tokens[i+1].fname,"file '" + tokens[i+1].string_value + ".sp' not found");
|
|
}
|
|
ifile.close();
|
|
|
|
std::string imported_file_contents = read_file(tokens[i+1].string_value + ".sp");
|
|
|
|
std::vector<Token> imported_tokens = lex_tokens(imported_file_contents,tokens[i+1].string_value + ".sp");
|
|
imported_tokens.pop_back(); // remove EOF at end of token stream
|
|
|
|
new_tokens.insert(new_tokens.end(),imported_tokens.begin(),imported_tokens.end());
|
|
ret_tk[i] = Token(TT_Null,ret_tk[i].line,ret_tk[i].column,ret_tk[i].fname); // remove import data
|
|
ret_tk[i+1] = Token(TT_Null,ret_tk[i+1].line,ret_tk[i+1].column,ret_tk[i+1].fname); // remove import data
|
|
ret_tk[i+2] = Token(TT_Null,ret_tk[i+2].line,ret_tk[i+2].column,ret_tk[i+2].fname); // remove import data
|
|
imported_files.push_back(tokens[i+1].string_value);
|
|
} else {
|
|
compiler_error(tokens[i+1].line_ctx,tokens[i+1].line,tokens[i+1].column,tokens[i+1].fname,"import statement must use an identifier");
|
|
}
|
|
}
|
|
++i;
|
|
}
|
|
if(new_tokens.size() != 0)
|
|
{
|
|
new_tokens.insert(new_tokens.end(),ret_tk.begin(),ret_tk.end());
|
|
++g_importCount;
|
|
return evaluate_imports(text,new_tokens);
|
|
}
|
|
return ret_tk;
|
|
}
|
|
|
|
std::vector<Sentence> parse_tokens(const std::vector<Token>& tokens)
|
|
{
|
|
int i = 0;
|
|
while(i < tokens.size())
|
|
{
|
|
if(tokens[i].tk_type == TT_Null)
|
|
{
|
|
++i;
|
|
continue;
|
|
}
|
|
std::cout << tokens[i].to_string() << std::endl;
|
|
++i;
|
|
}
|
|
return std::vector<Sentence>();
|
|
}
|