Lots of rewrites.

Planning to use LLVM to make stuff easier. Also, moved the random sapphire stuff to actual examples in an actual examples/ folder. Also started a standard library even if the compiler is not ready, because why not?? Also, it helps the examples.
2022-05-28 20:44:26 +02:00 · 2022-05-28 20:44:26 +02:00 · f1668853dd
commit f1668853dd
parent 1ba46c44cf
35 changed files with 332 additions and 65 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -25,6 +25,10 @@ add_executable(
    src/Arguments.h
    src/Normalizer.cpp
    src/Normalizer.h
+    src/ASTNode.cpp
+    src/ASTNode.h
+    src/replace.cpp
+    src/replace.h
 )

 target_include_directories(sapphirec PUBLIC src)
--- a/core/__internal/io/linux.sp
+++ b/core/__internal/io/linux.sp
@ -0,0 +1,15 @@
+import core/linux;
+import core/string;
+
+namespace io {
+
+    @out (str String) {
+        linux.sys_write(1,(i8*)String,string.len(String));
+    }
+
+    @in (str) () {
+        i8* buffer = i8*(256);
+        linux.sys_read(0,buffer,256);
+        return (str)buffer;
+    }
+}
--- a/core/__internal/linux/x64.sp
+++ b/core/__internal/linux/x64.sp
@ -0,0 +1,23 @@
+namespace linux {
+
+    @sys_read (u32 fd, i8* buf, u64 size) {
+        syscall3(0,fd,buf,size);
+    }
+
+    @sys_write (u32 fd, i8* buf, u64 size) {
+        syscall3(1,fd,buf,size);
+    }
+
+    @sys_open (i8* name, i32 flags, u16 mode) {
+        syscall3(2,name,flags,mode);
+    }
+
+    @sys_close (u32 fd) {
+        syscall1(3,fd);
+    }
+
+    @sys_exit (i32 code) {
+        syscall1(60,code);
+    }
+
+}
--- a/core/flow.sp
+++ b/core/flow.sp
@ -0,0 +1,9 @@
+import core/linux;
+
+namespace flow {
+
+    @exit (i32 code) {
+        linux.sys_exit(code);
+    }
+
+}
--- a/core/fs.sp
+++ b/core/fs.sp
@ -0,0 +1,5 @@
+import core/linux;
+
+namespace fs {
+    
+}
--- a/core/io.sp
+++ b/core/io.sp
@ -0,0 +1,9 @@
+import core/__internal/io/linux;
+
+namespace io {
+    
+    @outln (str String) {
+        out(string.concat(String,'\n'));
+    }
+    
+}
--- a/core/linux.sp
+++ b/core/linux.sp
@ -0,0 +1 @@
+import core/__internal/linux/x64;
--- a/core/string.sp
+++ b/core/string.sp
@ -0,0 +1,33 @@
+namespace string {
+
+    @len (u64) (str string) {
+        i8* ptr = (i8*)string;
+        u64 length = 0;
+        while(ptr[length] != 0)
+        {
+            length += 1;
+        }
+        return length;
+    }
+
+    @concat (str) (str a, str b) {
+        u64 len_a = len(a);
+        u64 len_b = len(b);
+        u64 final_size = len_a + len_b;
+        i8* chars = i8*(final_size + 1); // TODO: work on allocation
+        clone(a,chars,len_a);
+        clone(b,chars + len_a,len_b);
+        chars[final_size] = 0;
+        return (str)chars;
+    }
+
+    @clone (str string, i8* buffer, u64 max_copy_size) {
+        u64 chars_cloned = 0;
+        i8* ptr = (i8*)string;
+        while(chars_cloned <= max_copy_size && ptr[chars_cloned] != 0)
+        {
+            buffer[chars_cloned] = ptr[chars_cloned];
+            chars_cloned += 1;
+        }
+    }
+}
--- a/examples/error.sp
+++ b/examples/error.sp
@ -0,0 +1,5 @@
+import core/flow;
+
+@main {
+    flow.exit(1);
+}
--- a/examples/file.sp
+++ b/examples/file.sp
@ -0,0 +1,10 @@
+import core/io;
+import core/fs;
+
+@main {
+    io.out('What\'s your name? ');
+    str name = io.in();
+    fs.File nameFile = fs.open('name.txt');
+    nameFile.write(name);
+    nameFile.close();
+}
--- a/examples/hello-world.sp
+++ b/examples/hello-world.sp
@ -0,0 +1,5 @@
+import core/io;
+
+@main {
+    io.outln('Hello world!');
+}
--- a/examples/input.sp
+++ b/examples/input.sp
@ -0,0 +1,9 @@
+import core/io;
+
+@main {
+    io.out('What\'s your name? ');
+    str name = io.in();
+    io.out('Hello, ');
+    io.out(name);
+    io.out('!!');
+}
--- a/examples/variables.sp
+++ b/examples/variables.sp
@ -0,0 +1,8 @@
+import core/io;
+
+@main {
+    i32 age = 64;
+    io.out('I am ');
+    io.out(age);
+    io.outln(' years old.');
+}
--- a/exit.sp
+++ b/exit.sp
@ -1,5 +0,0 @@
-import idk;
-
-@hey {
-    out 'bye';
-}
--- a/exp.asm
+++ b/exp.asm
@ -1 +0,0 @@
-div rdx, rax
--- a/BIN
+++ b/BIN
--- a/hi.sp
+++ b/hi.sp
@ -1,2 +0,0 @@
-import test;
-@main() {''; idk; out;}
--- a/idk.sp
+++ b/idk.sp
@ -1,6 +0,0 @@
-
-
-
-import hi;
-
-@main (smth);
--- a/import.sp
+++ b/import.sp
@ -1 +0,0 @@
-*#
--- a/new.sp
+++ b/new.sp
@ -1,6 +0,0 @@
-import new;
-
-@main {
-    out 'Tokenizing is great';
-    cmp 3 == 5;
-}
--- a/BIN
+++ b/BIN
--- a/src/ASTNode.cpp
+++ b/src/ASTNode.cpp
@ -0,0 +1,27 @@
+#include "ASTNode.h"
+
+ASTNode::ASTNode()
+{
+}
+
+ASTNode::~ASTNode()
+{
+}
+
+BinaryOpNode::BinaryOpNode(std::shared_ptr<ASTNode> left,std::shared_ptr<ASTNode> right)
+    : left(left), right(right)
+{
+}
+
+BinaryOpNode::~BinaryOpNode()
+{
+}
+
+PlusNode::PlusNode(std::shared_ptr<ASTNode> left,std::shared_ptr<ASTNode> right)
+    : BinaryOpNode(left,right)
+{
+}
+
+PlusNode::~PlusNode()
+{
+}
--- a/src/ASTNode.h
+++ b/src/ASTNode.h
@ -0,0 +1,26 @@
+#pragma once
+#include <memory>
+
+class ASTNode
+{
+public:
+    ASTNode();
+    ~ASTNode();
+};
+
+class BinaryOpNode : public ASTNode
+{
+protected:
+    std::shared_ptr<ASTNode> left;
+    std::shared_ptr<ASTNode> right;
+public:
+    BinaryOpNode(std::shared_ptr<ASTNode> left,std::shared_ptr<ASTNode> right);
+    ~BinaryOpNode();
+};
+
+class PlusNode final : public BinaryOpNode 
+{
+public:
+    PlusNode(std::shared_ptr<ASTNode> left,std::shared_ptr<ASTNode> right);
+    ~PlusNode();
+};
--- a/src/Importer.cpp
+++ b/src/Importer.cpp
@ -80,6 +80,61 @@ TokenStream Importer::evaluate(const TokenStream& original)

                new_tokens.insert(new_tokens.end(),imported_tokens.begin(),imported_tokens.end());

+                Token::erase(ret_tk[i]);
+                Token::erase(ret_tk[i+1]);
+                Token::erase(ret_tk[i+2]);
+            } else if(next_token.tk_type == TT_Path)
+            {
+                Token last_token = original[i+2];
+
+                if(last_token.tk_type != TT_Semicolon)
+                    Error::throw_error(last_token.loc,last_token.line(),"expected a semicolon");
+
+                if(std::find(imported_files.begin(),imported_files.end(),next_token.string_value) != imported_files.end())
+                {
+                    if(Arguments::wimport)
+                        Error::throw_warning(next_token.loc,next_token.line(),"file already imported, skipping");
+                    Token::erase(ret_tk[i]);
+                    Token::erase(ret_tk[i+1]);
+                    Token::erase(ret_tk[i+2]);
+                    ++i;
+                    continue;
+                }
+
+                if(import_count > MAX_IMPORTS)
+                    Error::throw_error(current_token.loc,current_token.line(),"maximum import depth exceeded");
+
+                std::string input_file_name = next_token.string_value + ".sp";
+
+                std::ifstream input_file(input_file_name); // only used to check if it exists, thus closed afterwards
+                if(!input_file.good())
+                    Error::throw_error(next_token.loc,next_token.line(),"file not found");
+                input_file.close();
+
+                auto file_contents = FileIO::read_all(input_file_name);
+
+                auto top_location = std::make_shared<Location>(current_token.loc.line,current_token.loc.column,current_token.loc.fname);
+                top_location.get()->parent = current_token.loc.parent;
+
+                import_stack.push_back(top_location); // Keep ref_count above 0, just in case
+
+                auto import_lexer = Lexer::make_lexer(input_file_name);
+
+                Lexer::assign_parent_location(import_lexer,top_location);
+
+                TokenStream imported_tokens = import_lexer->lex(file_contents);
+
+                imported_tokens.pop_back(); // remove EOF at end of token stream
+
+                for(auto& tk : imported_tokens)
+                {
+                    tk.loc.parent = top_location;
+                }
+
+                imported_files.push_back(next_token.string_value);
+
+                new_tokens.insert(new_tokens.end(),imported_tokens.begin(),imported_tokens.end());
+
                Token::erase(ret_tk[i]);
                Token::erase(ret_tk[i+1]);
                Token::erase(ret_tk[i+2]);
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@ -154,6 +154,15 @@ TokenStream Lexer::lex(const std::string& text)
            case ';':
                result.push_back(Token::make_with_line({TT_Semicolon,loc},current_line_text));
                break;
+            case '.':
+                result.push_back(Token::make_with_line({TT_Period,loc},current_line_text));
+                break;
+            case ',':
+                result.push_back(Token::make_with_line({TT_Comma,loc},current_line_text));
+                break;
+            case '!':
+                result.push_back(Token::make_with_line({TT_Exclamation,loc},current_line_text));
+                break;
            default:
                Error::throw_error(loc,current_line_text,"unknown character");
        }
@ -169,6 +178,10 @@ Token Lexer::create_identifier()
    std::vector<char> characters;
    int prev_line = loc.line;
    int prev_column = loc.column;
+    bool is_path = false;
+    bool last_was_path = false;
+    Location saved_loc = this->loc;
+    Location saved_prev_loc = this->prev_loc;

    characters.push_back(current_char);

@ -177,11 +190,31 @@ Token Lexer::create_identifier()
        if(is_in_string(IDENTIFIERS,current_char))
        {
            characters.push_back(current_char);
+            last_was_path = false;
+        }
+        else if(current_char == '/')
+        {
+            if(last_was_path) {
+                characters.pop_back();
+                this->loc = saved_loc;
+                this->prev_loc = saved_prev_loc;
+                this->rewind();
+                std::string identifier(characters.begin(), characters.end());
+                return Token::make_with_line({TT_Path,identifier,{prev_line,prev_column,loc.fname}},current_line_text);
+            }
+
+            saved_loc = this->loc;
+            saved_prev_loc = this->prev_loc;
+            
+            characters.push_back(current_char);
+            is_path = true;
+            last_was_path = true;
        }
        else
        {
            this->rewind();
            std::string identifier(characters.begin(), characters.end());
+            if(is_path) return Token::make_with_line({TT_Path,identifier,{prev_line,prev_column,loc.fname}},current_line_text);
            auto location = std::find(keywords.begin(),keywords.end(),identifier);
            if(location != keywords.end())
            {
--- a/src/Normalizer.cpp
+++ b/src/Normalizer.cpp
@ -27,6 +27,21 @@ TokenStream Normalizer::normalize(const TokenStream& input)
            result.push_back(current);
            continue;
        }
+        if(current.tk_type == TT_Exclamation)
+        {
+            if(i+1 != input.size())
+            {
+                if(input[i+1].tk_type == TT_Equal)
+                {
+                    i += 2;
+                    result.push_back(current.copy_with_new_type(TT_NEqual));
+                    continue;
+                }
+            }
+            i++;
+            result.push_back(current);
+            continue;
+        }
        if(current.tk_type == TT_GreaterThan)
        {
            if(i+1 != input.size())
--- a/src/StringConversion.cpp
+++ b/src/StringConversion.cpp
@ -4,13 +4,13 @@
 std::string int_to_string(const int& value)
 {
    char buffer[12];
-    sprintf(buffer,"%d",value);
+    std::sprintf(buffer,"%d",value);
    return {buffer};
 }

 std::string float_to_string(const float& value)
 {
    char buffer[50];
-    sprintf(buffer,"%f",value);
+    std::sprintf(buffer,"%f",value);
    return {buffer};
 }
--- a/src/Token.cpp
+++ b/src/Token.cpp
@ -1,6 +1,7 @@
 #include "Token.h"
 #include "StringConversion.h"
 #include "FormatString/FormatString.hpp"
+#include "replace.h"

 const std::string token_strings[] = {
    "TT_IDENTIFIER",
@ -27,6 +28,11 @@ const std::string token_strings[] = {
    "TT_EQUALS",
    "TT_GTE",
    "TT_LTE",
+    "TT_PERIOD",
+    "TT_COMMA",
+    "TT_PATH",
+    "TT_EXCLAMATION",
+    "TT_NEQUAL"
 };

 Token::Token(const TokenType& type)
@ -97,6 +103,7 @@ std::string Token::to_string() const
    } 
    else if (tk_type == TT_String)
    {
+        replace(const_cast<std::string&>(string_value),"\n","\\n");
        return format_string("STRING:'%s' %s",string_value,details);
    }
    switch(tk_type)
@ -137,6 +144,16 @@ std::string Token::to_string() const
            return "GTE " + details;
        case TT_LTE:
            return "LTE " + details;
+        case TT_Period:
+            return "PERIOD " + details;
+        case TT_Comma:
+            return "COMMA " + details;
+        case TT_Path:
+            return "PATH " + details;
+        case TT_Exclamation:
+            return "EXCLAMATION " + details;
+        case TT_NEqual:
+            return "NEQUAL " + details;
    }
    return "";
 }
@ -175,7 +192,7 @@ void Token::erase(Token& tk)
    tk.tk_type = TT_Null;
 }

-bool Token::match_token_types(const TokenStream& a, const TokenStream& b, int count)
+bool Token::match_token_types(const std::vector<Token>& a, const std::vector<Token>& b, int count)
 {
    int size = [](int a, int b){ return a > b ? b : a; }(a.size() - count,b.size());

--- a/src/Token.h
+++ b/src/Token.h
@ -28,7 +28,12 @@ enum TokenType
    TT_Null,
    TT_Equals,
    TT_GTE,
-    TT_LTE
+    TT_LTE,
+    TT_Period,
+    TT_Comma,
+    TT_Path,
+    TT_Exclamation,
+    TT_NEqual
 };

 extern const std::string token_strings[];
@ -69,7 +74,7 @@ struct Token

    Token copy_with_new_type(const TokenType& type);

-    static bool match_token_types(const TokenStream& a, const TokenStream& b, int count);
+    static bool match_token_types(const std::vector<Token>& a, const std::vector<Token>& b, int count);

 private:
    std::string line_text;
--- a/src/replace.cpp
+++ b/src/replace.cpp
@ -0,0 +1,9 @@
+#include "replace.h"
+
+bool replace(std::string& str, const std::string& from, const std::string& to) {
+    size_t start_pos = str.find(from);
+    if(start_pos == std::string::npos)
+        return false;
+    str.replace(start_pos, from.length(), to);
+    return true;
+}
--- a/src/replace.h
+++ b/src/replace.h
@ -0,0 +1,4 @@
+#pragma once
+#include <string>
+
+bool replace(std::string& str, const std::string& from, const std::string& to);
--- a/BIN
+++ b/BIN
--- a/test.asm
+++ b/test.asm
@ -1,26 +0,0 @@
-format ELF64 executable 3
-; Assembly generated by the Sapphire compiler.
-segment readable executable
-entry start
-strlen: ; -- length of null-terminated string in rdi --
-    xor rax, rax
-    mov rcx, -1
-    cld
-    repne scasb
-    mov rax, rcx
-    add rax, 2
-    neg rax
-    ret
-print: ; -- print null-terminated string in rdi --
-    mov rsi, rdi
-    call strlen
-    mov rdx, rax
-    mov rdi, 1
-    mov rax, 1
-    syscall
-    ret
-start:
-; -- exit with code 0 --
-    mov rax, 60
-    xor rdi, rdi
-    syscall
--- a/test.sp
+++ b/test.sp
@ -1,8 +0,0 @@
-import hi;
-
-@main {
-    var number (i32) = 11;
-    exit 1;
-}
-
-import trap;
--- a/trap.sp
+++ b/trap.sp
@ -1,5 +0,0 @@
-import idk;
-
-// more stuff here
-
-@pentagon_sides (i32) { return 5; }