2022-06-08 18:16:14 +00:00
/* Original implementation of the Sapphire compiler, in one file.
WARNING : THIS IS LEGACY . This was the original thing I wrote to make a simple compiler , but it was clear it wasn ' t gonna cut it .
The code was difficult to read with everything scattered across the file , and I had to throw random forward declarations everywhere .
Also , it was limited , since my intention was to make it output assembly code manually . As you can see , the definitions are for x86 - 64 Linux assembly .
Now I ' m using LLVM , which will make it a lot easier to do stuff .
Eventually , I found that splitting the code into multiple files , using classes and such , would be much better and easier .
So I began a rewrite of this compiler in the src / directory , which I found was way easier to write , and was more productive and more .
The src / compiler has come a long way since then , and this code is just kept around for me to be able to see it in the future and facepalm at it .
I ' m aware of Git , I ' m using it , but I didn ' t think keeping this would make any harm . it ' s very clearly separated from the rest of the code . */
2022-03-11 16:00:09 +00:00
# include <iostream>
# include <string>
# include <stdlib.h>
# include <stdio.h>
# include <vector>
# include <regex>
# include <array>
# include <sstream>
# include <fstream>
# define IMPORT_MAX_DEEP 100
# define STRLEN "strlen: ; -- length of null-terminated string in rdi --\n xor rax, rax\n mov rcx, -1\n cld\n repne scasb\n mov rax, rcx\n add rax, 2\n neg rax\n ret\n"
# define PRINT "print: ; -- print null-terminated string in rdi --\n mov rsi, rdi\n call strlen\n mov rdx, rax\n mov rdi, 1\n mov rax, 1\n syscall\n ret\n"
# define __FASM
# ifdef __FASM
# define ASM_HEADER "format ELF64 executable 3\n"
# define ASM_TEXT_SECTION "segment readable executable\n"
# define ASM_DATA_SECTION "segment readable writable\n"
# define ASM_BSS_SECTION ""
# define ASM_ENTRY "entry start\n"
# define ASM_ENTRY_DECL "start:\n"
# elif defined(__NASM)
# define ASM_HEADER "BITS 64\n"
# define ASM_TEXT_SECTION "section .text\n"
# define ASM_DATA_SECTION "section .data\n"
# define ASM_BSS_SECTION "section .bss\n"
# define ASM_ENTRY "global _start\n"
# define ASM_ENTRY_DECL "_start:\n"
# else
# endif
std : : regex letter ( " [a-zA-Z_] " ) ;
std : : regex identifier ( " [a-zA-Z0-9_] " ) ;
std : : regex number ( " [0-9] " ) ;
std : : regex whitespace ( " [ \t \n ] " ) ;
int g_importCount = 0 ;
std : : string g_current_line ;
std : : vector < std : : string > imported_files ;
enum TokenType
{
TT_Identifier ,
TT_Number ,
TT_Float ,
TT_Keyword ,
TT_String ,
TT_Plus ,
TT_Minus ,
TT_Mul ,
TT_Div ,
TT_At ,
TT_Equal ,
TT_LessThan ,
TT_GreaterThan ,
TT_LParen ,
TT_RParen ,
TT_LBracket ,
TT_RBracket ,
TT_Semicolon ,
TT_LoadedString ,
TT_EOF ,
TT_Null
} ;
static std : : string token_strings [ ] = {
" TT_IDENTIFIER " ,
" TT_NUMBER " ,
" TT_FLOAT " ,
" TT_KEYWORD " ,
" TT_STRING " ,
" TT_PLUS " ,
" TT_MINUS " ,
" TT_MUL " ,
" TT_DIV " ,
" TT_AT " ,
" TT_EQUAL " ,
" TT_LESSTHAN " ,
" TT_GREATERTHAN " ,
" TT_LPAREN " ,
" TT_RPAREN " ,
" TT_LBRACKET " ,
" TT_RBRACKET " ,
" TT_SEMICOLON " ,
" TT_LOADEDSTRING " ,
" TT_EOF " ,
" TT_NULL "
} ;
struct Token
{
TokenType tk_type ;
int int_value ;
std : : string string_value ;
float float_value ;
int line ;
int column ;
std : : string fname ;
std : : string line_ctx ;
Token ( const TokenType & type , const int & lineno , const int & colno , const std : : string & name )
: tk_type ( type ) , line ( lineno ) , column ( colno ) , fname ( name )
{
line_ctx = g_current_line ;
}
Token ( const TokenType & type , const int & val , const int & lineno , const int & colno , const std : : string & name )
: tk_type ( type ) , int_value ( val ) , line ( lineno ) , column ( colno ) , fname ( name )
{
line_ctx = g_current_line ;
}
Token ( const TokenType & type , const std : : string & val , const int & lineno , const int & colno , const std : : string & name )
: tk_type ( type ) , string_value ( val ) , line ( lineno ) , column ( colno ) , fname ( name )
{
line_ctx = g_current_line ;
}
Token ( const TokenType & type , const float & val , const int & lineno , const int & colno , const std : : string & name )
: tk_type ( type ) , float_value ( val ) , line ( lineno ) , column ( colno ) , fname ( name )
{
line_ctx = g_current_line ;
}
std : : string to_string ( ) const
{
char linestr [ 32 ] ;
sprintf ( linestr , " %d " , line ) ;
char colstr [ 32 ] ;
sprintf ( colstr , " %d " , column ) ;
if ( tk_type = = TT_Number )
{
char num [ 32 ] ;
sprintf ( num , " %d " , int_value ) ;
return " INT: " + std : : string ( num ) + " ( " + fname + " : " + std : : string ( linestr ) + " : " + std : : string ( colstr ) + " ) " ;
} else if ( tk_type = = TT_Float )
{
char num [ 64 ] ;
sprintf ( num , " %f " , float_value ) ;
return " FLOAT: " + std : : string ( num ) + " ( " + fname + " : " + std : : string ( linestr ) + " : " + std : : string ( colstr ) + " ) " ;
}
else if ( tk_type = = TT_Identifier ) {
return " ID: " + string_value + " ( " + fname + " : " + std : : string ( linestr ) + " : " + std : : string ( colstr ) + " ) " ;
} else if ( tk_type = = TT_Keyword ) {
return " KEYWORD: " + string_value + " ( " + fname + " : " + std : : string ( linestr ) + " : " + std : : string ( colstr ) + " ) " ;
} else if ( tk_type = = TT_String )
{
return " STRING: " + std : : string ( " \' " ) + string_value + std : : string ( " \' " ) + " ( " + fname + " : " + std : : string ( linestr ) + " : " + std : : string ( colstr ) + " ) " ;
}
std : : string details = std : : string ( " ( " ) + fname + " : " + std : : string ( linestr ) + " : " + std : : string ( colstr ) + " ) " ;
switch ( tk_type )
{
case TT_EOF :
return " EOF " + details ;
case TT_Plus :
return " PLUS " + details ;
case TT_Minus :
return " MINUS " + details ;
case TT_Mul :
return " MUL " + details ;
case TT_Div :
return " DIV " + details ;
case TT_At :
return " AT " + details ;
case TT_Equal :
return " EQUAL " + details ;
case TT_LessThan :
return " LESSTHAN " + details ;
case TT_GreaterThan :
return " GREATERTHAN " + details ;
case TT_LParen :
return " LPAREN " + details ;
case TT_RParen :
return " RPAREN " + details ;
case TT_LBracket :
return " LBRACKET " + details ;
case TT_RBracket :
return " RBRACKET " + details ;
case TT_Semicolon :
return " SEMICOLON " + details ;
case TT_LoadedString :
return " LDSTRING " + details ;
}
return " " ;
}
} ;
std : : string tokentype_as_string ( const TokenType & type )
{
return token_strings [ type ] ;
}
struct Sentence
{
std : : string type_name ;
} ;
struct Function : public Sentence
{
std : : string type_name = " function " ;
std : : vector < Token > fun_tokens ;
Function ( std : : vector < Token > tokens ) : fun_tokens ( tokens ) { }
} ;
struct DeclVar : public Sentence
{
std : : string type_name = " decl " ;
std : : vector < Token > vtokens ;
DeclVar ( std : : vector < Token > tokens ) : vtokens ( tokens ) { }
} ;
struct Variable
{
int size ;
std : : string identifier ;
Variable ( int _size , std : : string _identifier ) : size ( _size ) , identifier ( _identifier ) { }
} ;
struct StringLiteral
{
std : : string data ;
std : : string identifier ;
StringLiteral ( std : : string _data , std : : string _identifier ) : data ( _data ) , identifier ( _identifier ) { }
} ;
std : : array < std : : string , 3 > keywords { " out " , " var " , " import " } ;
std : : vector < Variable > registered_vars ;
std : : vector < StringLiteral > registered_strings ;
std : : string make_asm_string ( StringLiteral str ) ;
std : : vector < Token > lex_tokens ( const std : : string & , const std : : string & ) ;
std : : vector < Token > evaluate_imports ( const std : : string & text , const std : : vector < Token > & tokens ) ;
std : : string read_file ( const std : : string & ) ;
void compiler_error ( const std : : string & text , const int & line , const int & column , const std : : string & fname , const std : : string & details ) ;
void compiler_warning ( const std : : string & text , const int & line , const int & column , const std : : string & fname , const std : : string & details ) ;
std : : vector < Sentence > parse_tokens ( const std : : vector < Token > & tokens ) ;
int main ( int argc , char * * argv )
{
std : : string fname ;
if ( argc < 2 )
{
fname = " test.sp " ;
}
else fname = ( const char * ) argv [ 1 ] ;
std : : string command = read_file ( fname ) ;
std : : vector < Token > main_tokens = lex_tokens ( command , fname ) ;
imported_files . push_back ( fname . substr ( 0 , fname . find_last_of ( ' . ' ) ) ) ;
main_tokens = evaluate_imports ( command , main_tokens ) ;
parse_tokens ( main_tokens ) ;
std : : string assembly ;
assembly + = ASM_HEADER ;
assembly + = " ; Assembly generated by the Sapphire compiler. \n " ;
assembly + = ASM_TEXT_SECTION ;
assembly + = ASM_ENTRY ;
assembly + = STRLEN ;
assembly + = PRINT ;
assembly + = ASM_ENTRY_DECL ;
assembly + = " ; -- exit with code 0 -- \n " ;
assembly + = " mov rax, 60 \n " ;
assembly + = " xor rdi, rdi \n " ;
assembly + = " syscall \n " ;
if ( registered_strings . size ( ) ! = 0 )
{
assembly + = " \n " ;
assembly + = ASM_DATA_SECTION ;
for ( auto asm_string : registered_strings )
{
assembly + = make_asm_string ( asm_string ) ;
}
}
std : : string outfile_basename = fname . substr ( 0 , fname . find_last_of ( ' . ' ) ) ;
std : : ofstream outfile ( outfile_basename + " .asm " ) ;
outfile < < assembly ;
outfile . close ( ) ;
# ifdef __NASM
system ( std : : string ( " nasm -f elf64 " + outfile_basename + " .asm -o " + outfile_basename + " .o " ) . c_str ( ) ) ;
system ( std : : string ( " ld " + outfile_basename + " .o -o " + outfile_basename ) . c_str ( ) ) ;
# else
system ( std : : string ( " fasm " + outfile_basename + " .asm " ) . c_str ( ) ) ;
system ( ( std : : string ( " chmod +x " ) + outfile_basename ) . c_str ( ) ) ;
# endif
std : : cout < < fname + " > " + std : : string ( outfile_basename ) + " \n " ;
}
Token construct_identifier ( const std : : string & , int & , int & , int & , const std : : string & ) ;
Token construct_number ( const std : : string & , int & , int & , int & , const std : : string & ) ;
Token construct_string ( const std : : string & , int & , int & , int & , const std : : string & ) ;
std : : string read_file ( const std : : string & fname )
{
std : : ifstream main_file ;
main_file . open ( fname ) ;
if ( ! main_file . is_open ( ) ) return " " ;
std : : vector < char > file_chars ;
char fchar ;
while ( main_file ) {
fchar = main_file . get ( ) ;
if ( fchar ! = - 1 ) file_chars . push_back ( fchar ) ;
}
main_file . close ( ) ;
return std : : string ( file_chars . begin ( ) , file_chars . end ( ) ) ;
}
std : : string recalculate_current_line ( const std : : string & text , int index )
{
std : : string final_str ;
+ + index ;
while ( index ! = text . size ( ) & & text [ index ] ! = ' \n ' )
{
final_str + = text [ index ] ;
+ + index ;
}
return final_str ;
}
std : : string rewind_current_line ( const std : : string & text , int index )
{
- - index ;
while ( text [ index ] ! = ' \n ' )
{
- - index ;
if ( index = = 0 )
{
return recalculate_current_line ( text , - 1 ) ;
}
}
return recalculate_current_line ( text , index ) ;
}
std : : vector < Token > lex_tokens ( const std : : string & text , const std : : string & fname )
{
int line = 1 ;
int column = 0 ;
int index = - 1 ;
std : : vector < Token > result ;
bool comment = false ;
g_current_line = recalculate_current_line ( text , - 1 ) ;
while ( index < ( int ) text . size ( ) )
{
+ + index ;
+ + column ;
if ( text [ index ] = = ' \n ' )
{
+ + line ;
column = 0 ;
g_current_line = recalculate_current_line ( text , index ) ;
comment = false ;
}
if ( comment ) continue ;
char cstyle_char [ 2 ] { text [ index ] , ' \0 ' } ;
const char * character = ( const char * ) cstyle_char ;
if ( std : : regex_match ( character , whitespace ) )
{
continue ;
}
else if ( std : : regex_match ( character , letter ) )
{
result . push_back ( construct_identifier ( text , index , line , column , fname ) ) ;
}
else if ( std : : regex_match ( character , number ) )
{
result . push_back ( construct_number ( text , index , line , column , fname ) ) ;
} else if ( text [ index ] = = ' \' ' )
{
result . push_back ( construct_string ( text , index , line , column , fname ) ) ;
} else if ( index = = text . size ( ) )
{
result . push_back ( Token ( TT_EOF , line , column , fname ) ) ;
} else switch ( text [ index ] )
{
case ' + ' :
result . push_back ( Token ( TT_Plus , line , column , fname ) ) ;
break ;
case ' - ' :
result . push_back ( Token ( TT_Minus , line , column , fname ) ) ;
break ;
case ' * ' :
result . push_back ( Token ( TT_Mul , line , column , fname ) ) ;
break ;
case ' / ' :
if ( index ! = text . size ( ) )
{
if ( text [ index + 1 ] = = ' / ' )
{
comment = true ;
break ;
}
}
result . push_back ( Token ( TT_Div , line , column , fname ) ) ;
break ;
case ' @ ' :
result . push_back ( Token ( TT_At , line , column , fname ) ) ;
break ;
case ' = ' :
result . push_back ( Token ( TT_Equal , line , column , fname ) ) ;
break ;
case ' < ' :
result . push_back ( Token ( TT_LessThan , line , column , fname ) ) ;
break ;
case ' > ' :
result . push_back ( Token ( TT_GreaterThan , line , column , fname ) ) ;
break ;
case ' ( ' :
result . push_back ( Token ( TT_LParen , line , column , fname ) ) ;
break ;
case ' ) ' :
result . push_back ( Token ( TT_RParen , line , column , fname ) ) ;
break ;
case ' { ' :
result . push_back ( Token ( TT_LBracket , line , column , fname ) ) ;
break ;
case ' } ' :
result . push_back ( Token ( TT_RBracket , line , column , fname ) ) ;
break ;
case ' ; ' :
result . push_back ( Token ( TT_Semicolon , line , column , fname ) ) ;
break ;
default :
compiler_error ( g_current_line , line , column , fname , " unknown character " ) ;
break ;
}
}
return result ;
}
std : : string get_line ( const std : : string & , int ) ;
Token construct_identifier ( const std : : string & text , int & index , int & line , int & column , const std : : string & fname )
{
std : : vector < char > id_symbols ;
int prev_column = column * 1 ;
int prev_line = line * 1 ;
char cstyle_char [ 2 ] { text [ index ] , ' \0 ' } ;
const char * character = ( const char * ) cstyle_char ;
if ( std : : regex_match ( character , identifier ) )
{
id_symbols . push_back ( text [ index ] ) ;
} else
{
- - index ;
- - column ;
if ( text [ index ] = = ' \n ' )
{
- - line ;
column = get_line ( text , line - 1 ) . size ( ) + 1 ;
g_current_line = rewind_current_line ( text , index ) ;
}
std : : string identifier ( id_symbols . begin ( ) , id_symbols . end ( ) ) ;
std : : string * location = std : : find ( keywords . begin ( ) , keywords . end ( ) , identifier ) ;
if ( location ! = keywords . end ( ) )
{
return Token ( TT_Keyword , identifier , prev_line , prev_column , fname ) ;
}
return Token ( TT_Identifier , identifier , prev_line , prev_column , fname ) ;
}
while ( index < text . size ( ) | | index = = - 1 )
{
+ + index ;
+ + column ;
if ( text [ index ] = = ' \n ' )
{
+ + line ;
g_current_line = recalculate_current_line ( text , index ) ;
column = 0 ;
}
char cstyle_char [ 2 ] { text [ index ] , ' \0 ' } ;
const char * character = ( const char * ) cstyle_char ;
if ( std : : regex_match ( character , identifier ) )
{
id_symbols . push_back ( text [ index ] ) ;
} else
{
- - index ;
- - column ;
if ( text [ index ] = = ' \n ' )
{
- - line ;
column = get_line ( text , line - 1 ) . size ( ) + 1 ;
g_current_line = rewind_current_line ( text , index ) ;
}
std : : string identifier ( id_symbols . begin ( ) , id_symbols . end ( ) ) ;
std : : string * location = std : : find ( keywords . begin ( ) , keywords . end ( ) , identifier ) ;
if ( location ! = keywords . end ( ) )
{
return Token ( TT_Keyword , identifier , prev_line , prev_column , fname ) ;
}
return Token ( TT_Identifier , identifier , prev_line , prev_column , fname ) ;
}
}
std : : string identifier ( id_symbols . begin ( ) , id_symbols . end ( ) ) ;
std : : string * location = std : : find ( keywords . begin ( ) , keywords . end ( ) , identifier ) ;
if ( location ! = keywords . end ( ) )
{
return Token ( TT_Keyword , identifier , prev_line , prev_column , fname ) ;
}
return Token ( TT_Identifier , identifier , prev_line , prev_column , fname ) ;
}
Token construct_number ( const std : : string & text , int & index , int & line , int & column , const std : : string & fname )
{
std : : vector < char > num_symbols ;
int dot_count = 0 ;
int prev_column = column * 1 ;
int prev_line = line * 1 ;
char cstyle_char [ 2 ] { text [ index ] , ' \0 ' } ;
const char * character = ( const char * ) cstyle_char ;
if ( std : : regex_match ( character , number ) )
{
num_symbols . push_back ( text [ index ] ) ;
} else if ( text [ index ] = = ' . ' )
{
if ( dot_count = = 0 )
{
num_symbols . push_back ( text [ index ] ) ;
+ + dot_count ;
} else
{
- - index ;
- - column ;
if ( text [ index ] = = ' \n ' )
{
- - line ;
g_current_line = rewind_current_line ( text , index ) ;
column = get_line ( text , line - 1 ) . size ( ) + 1 ;
}
float tk_value = std : : stof ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Float , tk_value , prev_line , prev_column , fname ) ;
}
} else
{
- - index ;
- - column ;
if ( text [ index ] = = ' \n ' )
{
- - line ;
column = get_line ( text , line - 1 ) . size ( ) + 1 ;
g_current_line = rewind_current_line ( text , index ) ;
}
if ( dot_count ! = 0 )
{
float tk_value = std : : stof ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Float , tk_value , prev_line , prev_column , fname ) ;
}
int tk_value = atoi ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Number , tk_value , prev_line , prev_column , fname ) ;
}
while ( index < text . size ( ) | | index = = - 1 )
{
+ + index ;
+ + column ;
if ( text [ index ] = = ' \n ' )
{
+ + line ;
column = 0 ;
g_current_line = recalculate_current_line ( text , index ) ;
}
char cstyle_char [ 2 ] { text [ index ] , ' \0 ' } ;
const char * character = ( const char * ) cstyle_char ;
if ( std : : regex_match ( character , number ) )
{
num_symbols . push_back ( text [ index ] ) ;
} else if ( text [ index ] = = ' . ' )
{
if ( dot_count = = 0 )
{
num_symbols . push_back ( text [ index ] ) ;
+ + dot_count ;
} else
{
- - index ;
- - column ;
if ( text [ index ] = = ' \n ' )
{
- - line ;
g_current_line = rewind_current_line ( text , index ) ;
column = get_line ( text , line - 1 ) . size ( ) + 1 ;
}
float tk_value = std : : stof ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Float , tk_value , prev_line , prev_column , fname ) ;
}
} else
{
- - index ;
- - column ;
if ( text [ index ] = = ' \n ' )
{
- - line ;
column = get_line ( text , line - 1 ) . size ( ) + 1 ;
g_current_line = rewind_current_line ( text , index ) ;
}
if ( dot_count ! = 0 )
{
float tk_value = std : : stof ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Float , tk_value , prev_line , prev_column , fname ) ;
}
int tk_value = atoi ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Number , tk_value , prev_line , prev_column , fname ) ;
}
}
- - index ;
- - column ;
if ( text [ index ] = = ' \n ' )
{
- - line ;
column = get_line ( text , line - 1 ) . size ( ) + 1 ;
g_current_line = rewind_current_line ( text , index ) ;
}
if ( dot_count ! = 0 )
{
float tk_value = std : : stof ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Float , tk_value , prev_line , prev_column , fname ) ;
}
int tk_value = atoi ( std : : string ( num_symbols . begin ( ) , num_symbols . end ( ) ) . c_str ( ) ) ;
return Token ( TT_Number , tk_value , prev_line , prev_column , fname ) ;
}
Token construct_string ( const std : : string & text , int & index , int & line , int & column , const std : : string & fname )
{
std : : vector < char > string_chars ;
int prev_column = column * 1 ;
int prev_line = line * 1 ;
while ( index < text . size ( ) )
{
+ + index ;
+ + column ;
if ( text [ index ] = = ' \n ' )
{
compiler_error ( g_current_line , line , column , fname , " expected \' , but got a newline " ) ;
}
if ( text [ index ] = = ' \' ' )
{
return Token ( TT_String , std : : string ( string_chars . begin ( ) , string_chars . end ( ) ) , prev_line , prev_column , fname ) ;
}
if ( text [ index ] = = ' \\ ' )
{
if ( index + 1 = = text . size ( ) )
{
compiler_error ( g_current_line , line , column , fname , " unfinished escape sequence " ) ;
}
switch ( text [ index + 1 ] )
{
case ' n ' :
string_chars . push_back ( ' \n ' ) ;
break ;
case ' \' ' :
string_chars . push_back ( ' \' ' ) ;
break ;
case ' \\ ' :
string_chars . push_back ( ' \\ ' ) ;
break ;
default :
compiler_error ( g_current_line , line , column , fname , " unknown escape sequence " ) ;
}
+ + index ;
+ + column ;
continue ;
}
string_chars . push_back ( text [ index ] ) ;
}
compiler_error ( text , line , column , fname , " expected \' , but got EOF " ) ;
exit ( 127 ) ;
}
std : : string get_spaces ( int ) ;
void compiler_error ( const std : : string & text , const int & line , const int & column , const std : : string & fname , const std : : string & details )
{
char linestr [ 32 ] ;
sprintf ( linestr , " %d " , line ) ;
char colstr [ 32 ] ;
sprintf ( colstr , " %d " , column ) ;
std : : cerr < < " \033 [1;1m " ;
std : : cerr < < fname ;
std : : cerr < < " : " ;
std : : cerr < < linestr ;
std : : cerr < < " : " ;
std : : cerr < < colstr ;
std : : cerr < < " : " ;
std : : cerr < < " \033 [31;49m " ;
std : : cerr < < " error: " ;
std : : cerr < < " \033 [0;0m " ;
std : : cerr < < details ;
std : : cerr < < std : : endl ;
std : : cerr < < linestr ;
std : : cerr < < get_spaces ( 4 ) ;
std : : cerr < < text ;
std : : cerr < < std : : endl ;
std : : cerr < < get_spaces ( 4 + std : : string ( linestr ) . size ( ) ) ;
std : : cerr < < get_spaces ( column - 1 ) ;
std : : cerr < < " \033 [31;49m " ;
std : : cerr < < " ^ " ;
std : : cerr < < " \033 [0;0m " ;
std : : cerr < < std : : endl ;
exit ( 1 ) ;
}
void compiler_warning ( const std : : string & text , const int & line , const int & column , const std : : string & fname , const std : : string & details )
{
char linestr [ 32 ] ;
sprintf ( linestr , " %d " , line ) ;
char colstr [ 32 ] ;
sprintf ( colstr , " %d " , column ) ;
std : : cout < < " \033 [1;1m " ;
std : : cout < < fname ;
std : : cout < < " : " ;
std : : cout < < linestr ;
std : : cout < < " : " ;
std : : cout < < colstr ;
std : : cout < < " : " ;
std : : cout < < " \033 [33;49m " ;
std : : cout < < " warning: " ;
std : : cout < < " \033 [0;0m " ;
std : : cout < < details ;
std : : cout < < std : : endl ;
std : : cout < < linestr ;
std : : cout < < get_spaces ( 4 ) ;
std : : cout < < text ;
std : : cout < < std : : endl ;
std : : cout < < get_spaces ( 4 + std : : string ( linestr ) . size ( ) ) ;
std : : cout < < get_spaces ( column - 1 ) ;
std : : cout < < " \033 [33;49m " ;
std : : cout < < " ^ " ;
std : : cout < < " \033 [0;0m " ;
std : : cout < < std : : endl ;
}
std : : string get_line ( const std : : string & str , int line_no )
{
std : : string line ;
std : : istringstream stream ( str ) ;
while ( line_no - - > = 0 )
std : : getline ( stream , line ) ;
return line ;
}
std : : string get_spaces ( int spacenum )
{
std : : string output = " " ;
for ( int i = 0 ; i < spacenum ; i + + )
{
output + = " " ;
}
return output ;
}
std : : string make_asm_string ( StringLiteral str )
{
std : : string result ;
result + = str . identifier ;
result + = " : ; -- string literal -- \n " ;
result + = " db " ;
char code_point [ 4 ] ;
sprintf ( code_point , " %d " , str . data [ 0 ] ) ;
result + = std : : string ( code_point ) ;
for ( int i = 1 ; i < str . data . size ( ) ; i + + )
{
char code_point [ 4 ] ;
sprintf ( code_point , " %d " , str . data [ i ] ) ;
result + = ( " , " + std : : string ( code_point ) ) ;
}
result + = " , 0 " ;
result + = " \n " ;
return result ;
}
std : : vector < Token > evaluate_imports ( const std : : string & text , const std : : vector < Token > & tokens )
{
int i = 0 ;
std : : vector < Token > ret_tk = tokens ;
std : : vector < Token > new_tokens ;
while ( tokens [ i ] . tk_type ! = TT_EOF )
{
if ( g_importCount > IMPORT_MAX_DEEP ) {
compiler_error ( tokens [ i ] . line_ctx , tokens [ i ] . line , tokens [ i ] . column , tokens [ i ] . fname , " import tree too deep " ) ;
}
if ( tokens [ i ] . tk_type = = TT_Keyword & & tokens [ i ] . string_value = = keywords [ 2 ] )
{
if ( tokens [ i + 1 ] . tk_type = = TT_EOF ) { compiler_error ( text , tokens [ i ] . line , tokens [ i ] . column , tokens [ i ] . fname , " did not expect EOF after 'import' keyword " ) ; }
if ( tokens [ i + 1 ] . tk_type = = TT_Identifier )
{
if ( std : : find ( imported_files . begin ( ) , imported_files . end ( ) , tokens [ i + 1 ] . string_value ) ! = imported_files . end ( ) ) {
compiler_error ( tokens [ i + 2 ] . line_ctx , tokens [ i + 2 ] . line , tokens [ i + 2 ] . column , tokens [ i + 2 ] . fname , " file already imported " ) ;
}
if ( tokens [ i + 2 ] . tk_type ! = TT_Semicolon ) { compiler_error ( text , tokens [ i + 2 ] . line , tokens [ i + 2 ] . column , tokens [ i + 2 ] . fname , " expected a semicolon after import statement " ) ; }
std : : ifstream ifile ( tokens [ i + 1 ] . string_value + " .sp " ) ;
if ( ! ifile . good ( ) ) {
compiler_error ( tokens [ i + 1 ] . line_ctx , tokens [ i + 1 ] . line , tokens [ i + 1 ] . column , tokens [ i + 1 ] . fname , " file ' " + tokens [ i + 1 ] . string_value + " .sp' not found " ) ;
}
ifile . close ( ) ;
std : : string imported_file_contents = read_file ( tokens [ i + 1 ] . string_value + " .sp " ) ;
std : : vector < Token > imported_tokens = lex_tokens ( imported_file_contents , tokens [ i + 1 ] . string_value + " .sp " ) ;
imported_tokens . pop_back ( ) ; // remove EOF at end of token stream
new_tokens . insert ( new_tokens . end ( ) , imported_tokens . begin ( ) , imported_tokens . end ( ) ) ;
ret_tk [ i ] = Token ( TT_Null , ret_tk [ i ] . line , ret_tk [ i ] . column , ret_tk [ i ] . fname ) ; // remove import data
ret_tk [ i + 1 ] = Token ( TT_Null , ret_tk [ i + 1 ] . line , ret_tk [ i + 1 ] . column , ret_tk [ i + 1 ] . fname ) ; // remove import data
ret_tk [ i + 2 ] = Token ( TT_Null , ret_tk [ i + 2 ] . line , ret_tk [ i + 2 ] . column , ret_tk [ i + 2 ] . fname ) ; // remove import data
imported_files . push_back ( tokens [ i + 1 ] . string_value ) ;
} else {
compiler_error ( tokens [ i + 1 ] . line_ctx , tokens [ i + 1 ] . line , tokens [ i + 1 ] . column , tokens [ i + 1 ] . fname , " import statement must use an identifier " ) ;
}
}
+ + i ;
}
if ( new_tokens . size ( ) ! = 0 )
{
new_tokens . insert ( new_tokens . end ( ) , ret_tk . begin ( ) , ret_tk . end ( ) ) ;
+ + g_importCount ;
return evaluate_imports ( text , new_tokens ) ;
}
return ret_tk ;
}
std : : vector < Sentence > parse_tokens ( const std : : vector < Token > & tokens )
{
int i = 0 ;
while ( i < tokens . size ( ) )
{
if ( tokens [ i ] . tk_type = = TT_Null )
{
+ + i ;
continue ;
}
std : : cout < < tokens [ i ] . to_string ( ) < < std : : endl ;
+ + i ;
}
return std : : vector < Sentence > ( ) ;
}