From 7522858897aeba4470b5313e57da6a3be2545407 Mon Sep 17 00:00:00 2001 From: apio Date: Fri, 15 Jul 2022 14:28:05 +0200 Subject: [PATCH] basic LLVM IR building and parsing --- CMakeLists.txt | 2 ++ examples/parser-test.sp | 2 +- src/AST/ASTNode.h | 7 ++++- src/AST/MulNode.cpp | 14 +++++++++ src/AST/MulNode.h | 2 ++ src/AST/NumberNode.h | 13 ++++++++ src/AST/SumNode.cpp | 14 +++++++++ src/AST/SumNode.h | 2 ++ src/IRBuilder.cpp | 42 +++++++++++++++++++++++++ src/IRBuilder.h | 23 ++++++++++++++ src/Parser.cpp | 70 ++++++++++++++++++++++++----------------- src/Parser.h | 59 +++++----------------------------- src/sapphire.cpp | 15 +++++---- tests/simple.sp | 2 +- 14 files changed, 177 insertions(+), 90 deletions(-) create mode 100644 src/IRBuilder.cpp create mode 100644 src/IRBuilder.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 060c378..d14ebab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,8 @@ add_executable( src/Parser.cpp src/Parser.h src/sapphirepch.h + src/IRBuilder.cpp + src/IRBuilder.h ) target_include_directories(sapphirec PUBLIC src) diff --git a/examples/parser-test.sp b/examples/parser-test.sp index 38b1214..4e1e45e 100644 --- a/examples/parser-test.sp +++ b/examples/parser-test.sp @@ -1 +1 @@ -1 + 2 * 4 \ No newline at end of file +1+2+3*5 \ No newline at end of file diff --git a/src/AST/ASTNode.h b/src/AST/ASTNode.h index 6e65600..08474f2 100644 --- a/src/AST/ASTNode.h +++ b/src/AST/ASTNode.h @@ -1,9 +1,14 @@ #pragma once -#include +#include "IRBuilder.h" +#include "llvm/IR/Value.h" + +class IRBuilder; class ASTNode { public: ASTNode(); ~ASTNode(); + + virtual llvm::Value* codegen(IRBuilder* generator) = 0; }; diff --git a/src/AST/MulNode.cpp b/src/AST/MulNode.cpp index a53d423..3662540 100644 --- a/src/AST/MulNode.cpp +++ b/src/AST/MulNode.cpp @@ -3,8 +3,22 @@ MulNode::MulNode(std::shared_ptr left, std::shared_ptr right, char op) : BinaryOpNode(left, right), op(op) { + assert(op == '*' || op == '/'); } MulNode::~MulNode() { +} + +llvm::Value* MulNode::codegen(IRBuilder* generator) +{ + switch (op) + { + case '*': + return generator->getBuilder()->CreateMul(left->codegen(generator), right->codegen(generator)); + case '/': + return generator->getBuilder()->CreateSDiv(left->codegen(generator), right->codegen(generator)); + default: + assert(0 && "unreachable"); + } } \ No newline at end of file diff --git a/src/AST/MulNode.h b/src/AST/MulNode.h index 822222c..5a20dcd 100644 --- a/src/AST/MulNode.h +++ b/src/AST/MulNode.h @@ -8,4 +8,6 @@ class MulNode final : public BinaryOpNode public: MulNode(std::shared_ptr left, std::shared_ptr right, char op); ~MulNode(); + + llvm::Value* codegen(IRBuilder* generator) override; }; \ No newline at end of file diff --git a/src/AST/NumberNode.h b/src/AST/NumberNode.h index d66424d..3367dad 100644 --- a/src/AST/NumberNode.h +++ b/src/AST/NumberNode.h @@ -39,6 +39,19 @@ template class BasicNumberNode : public NumberNode { return std::is_integral::value; } + + llvm::Value* codegen(IRBuilder* generator) override + { + if (is_integral()) + { + return llvm::ConstantInt::getSigned(llvm::IntegerType::getInt32Ty(generator->getBuilder()->getContext()), + value); + } + else + { + return llvm::ConstantFP::get(llvm::Type::getFloatTy(generator->getBuilder()->getContext()), value); + } + } }; using IntegerNode = BasicNumberNode; diff --git a/src/AST/SumNode.cpp b/src/AST/SumNode.cpp index 0ef3a05..aa06956 100644 --- a/src/AST/SumNode.cpp +++ b/src/AST/SumNode.cpp @@ -3,8 +3,22 @@ SumNode::SumNode(std::shared_ptr left, std::shared_ptr right, char op) : BinaryOpNode(left, right), op(op) { + assert(op == '+' || op == '-'); } SumNode::~SumNode() { +} + +llvm::Value* SumNode::codegen(IRBuilder* generator) +{ + switch (op) + { + case '+': + return generator->getBuilder()->CreateAdd(left->codegen(generator), right->codegen(generator)); + case '-': + return generator->getBuilder()->CreateSub(left->codegen(generator), right->codegen(generator)); + default: + assert(0 && "unreachable"); + } } \ No newline at end of file diff --git a/src/AST/SumNode.h b/src/AST/SumNode.h index 3bd8fe1..cffc97b 100644 --- a/src/AST/SumNode.h +++ b/src/AST/SumNode.h @@ -8,4 +8,6 @@ class SumNode final : public BinaryOpNode public: SumNode(std::shared_ptr left, std::shared_ptr right, char op); ~SumNode(); + + llvm::Value* codegen(IRBuilder* generator) override; }; \ No newline at end of file diff --git a/src/IRBuilder.cpp b/src/IRBuilder.cpp new file mode 100644 index 0000000..cc585d5 --- /dev/null +++ b/src/IRBuilder.cpp @@ -0,0 +1,42 @@ +#include "IRBuilder.h" +#include "Arguments.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/Verifier.h" + +IRBuilder::IRBuilder() +{ + context = std::make_unique(); + builder = std::unique_ptr>(new llvm::IRBuilder<>(*context)); + module = std::make_unique(Arguments::input_fname, *context); +} + +llvm::IRBuilder<>* IRBuilder::getBuilder() +{ + return builder.get(); +} + +void IRBuilder::create_main_function(std::shared_ptr expression) +{ + llvm::FunctionType* mainType = + llvm::FunctionType::get(llvm::IntegerType::getInt32Ty(*context), std::vector(), false); + llvm::Function* main = llvm::Function::Create(mainType, llvm::GlobalValue::ExternalLinkage, "main", module.get()); + + llvm::BasicBlock* entryBlock = llvm::BasicBlock::Create(*context, "entry", main); + builder->SetInsertPoint(entryBlock); + + llvm::Value* returnValue = expression->codegen(this); + + builder->CreateRet(returnValue); + llvm::verifyFunction(*main); +} + +std::string IRBuilder::getGeneratedIR() +{ + std::string str; + llvm::raw_string_ostream oss(str); + + module->print(oss, nullptr); + + return oss.str(); +} \ No newline at end of file diff --git a/src/IRBuilder.h b/src/IRBuilder.h new file mode 100644 index 0000000..98e883b --- /dev/null +++ b/src/IRBuilder.h @@ -0,0 +1,23 @@ +#pragma once +#include "AST/ASTNode.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" + +class ASTNode; + +class IRBuilder +{ + std::unique_ptr context; + std::unique_ptr> builder; + std::unique_ptr module; + + public: + IRBuilder(); + void create_main_function(std::shared_ptr expression); + + llvm::IRBuilder<>* getBuilder(); + + std::string getGeneratedIR(); +}; \ No newline at end of file diff --git a/src/Parser.cpp b/src/Parser.cpp index 4e53d9f..e129dea 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -1,13 +1,11 @@ #include "Parser.h" +#include "AST/MulNode.h" +#include "AST/SumNode.h" Parser::Parser(const TokenStream& tokens) : tokens(tokens) { } -Parser::~Parser() -{ -} - std::shared_ptr Parser::new_parser(const TokenStream& tokens) { return std::shared_ptr( @@ -16,45 +14,59 @@ std::shared_ptr Parser::new_parser(const TokenStream& tokens) std::shared_ptr Parser::parse() { - auto result = walk_expr(); - - if (result.is_error()) - { - result.ethrow(); - } - - return result.get(); + advance(); + return expr(); } -Parser::ErrorOr Parser::walk_expr() +int Parser::advance() { - return ErrorOr(new ExprNode()); // constructor does not want to accept a shared_ptr in the argument - // list, thats why im not using make_shared here + ++index; + if (index < tokens.size()) + { + current_token = &tokens[index]; + } + return 1; } -Parser::ErrorOr Parser::walk_number() +std::shared_ptr Parser::factor() { - Token& current_token = tokens[m_index++]; - if (current_token.tk_type == TT_Number) + Token token = *current_token; + + if (token.tk_type == TT_Number) { - return ErrorOr(new IntegerNode(current_token.int_value)); + advance(); + return std::make_shared(token.int_value); } - else if (current_token.tk_type == TT_Float) + + if (token.tk_type == TT_Float) { - return ErrorOr(new FloatNode(current_token.float_value)); - } - else - { - return ErrorOr("expected a number", current_token); + advance(); + return std::make_shared(token.float_value); } } -void Parser::save_current_position() +std::shared_ptr Parser::term() { - saved_m_index = m_index; + std::shared_ptr left = factor(); + while (current_token->tk_type == TT_Mul || current_token->tk_type == TT_Div) + { + Token op = *current_token; + advance(); + std::shared_ptr right = factor(); + left = std::make_shared(left, right, op.tk_type == TT_Mul ? '*' : '/'); + } + return left; } -void Parser::restore_current_position() +std::shared_ptr Parser::expr() { - m_index = saved_m_index; + std::shared_ptr left = term(); + while (current_token->tk_type == TT_Plus || current_token->tk_type == TT_Minus) + { + Token op = *current_token; + advance(); + std::shared_ptr right = term(); + left = std::make_shared(left, right, op.tk_type == TT_Plus ? '+' : '-'); + } + return left; } diff --git a/src/Parser.h b/src/Parser.h index f0cad10..4028add 100644 --- a/src/Parser.h +++ b/src/Parser.h @@ -1,5 +1,6 @@ #pragma once #include "AST/NumberNode.h" +#include "AST/SumNode.h" #include "Error.h" #include "Lexer.h" #include "sapphirepch.h" @@ -7,64 +8,18 @@ /* Parser class for the Sapphire compiler. */ class Parser { - /* Struct to store a parsing result which can be either a parsing error or a success, in which case it contains a - * pointer to the result. */ - template struct ErrorOr - { - /* Return the stored pointer. */ - std::shared_ptr get() - { - assert(!m_is_error); - return m_ptr; - } - - /* Call Error::throw_error() with the stored error's location, line text, and the error string provided to this - * struct instance. */ - void ethrow() - { - assert(m_is_error); - Error::throw_error(error_tok->loc, error_tok->line(), m_error); - } - - /* Construct a new successful ErrorOr with a heap-allocated pointer to the result class. */ - ErrorOr(T* ptr) : m_ptr(ptr), m_is_error(false) - { - } - /* Construct a new failed ErrorOr with the error details and the token where parsing failed. */ - ErrorOr(const std::string& error, const Token& error_tok) - : m_error(error), m_is_error(true), error_tok(error_tok) - { - } - - /* Is this ErrorOr instance successful or failed? */ - bool is_error() - { - return m_is_error; - } - - private: - bool m_is_error; - std::string m_error; - std::unique_ptr error_tok; - std::shared_ptr m_ptr; - }; - private: Parser(const TokenStream& tokens); TokenStream tokens; + int index = -1; + int advance(); + Token* current_token; - ErrorOr walk_expr(); - ErrorOr walk_number(); - - int m_index; - int saved_m_index; - - void save_current_position(); - void restore_current_position(); + std::shared_ptr factor(); + std::shared_ptr expr(); + std::shared_ptr term(); public: - ~Parser(); - /* Construct a new Parser with the given TokenStream. */ static std::shared_ptr new_parser(const TokenStream& tokens); /* Parse the stored TokenStream and return the top-level node of the result Abstract Syntax Tree. */ diff --git a/src/sapphire.cpp b/src/sapphire.cpp index 10340c7..6cd36a2 100644 --- a/src/sapphire.cpp +++ b/src/sapphire.cpp @@ -1,8 +1,10 @@ #include "Arguments.h" #include "FileIO.h" +#include "IRBuilder.h" #include "Importer.h" #include "Lexer.h" #include "Normalizer.h" +#include "Parser.h" #include "sapphirepch.h" #include "utils.h" @@ -31,12 +33,13 @@ int main(int argc, char** argv) result = Normalizer::normalize(result); } - for (int i = 0; i < result.size(); i++) - { - std::cout << result[i].to_string() << std::endl; - } + auto parser = Parser::new_parser(result); - std::cout << "Output filename: " << Arguments::output_fname << std::endl; + auto ast = parser->parse(); - std::cout << "Output target triple: " << Arguments::TargetTriple.getTriple() << std::endl; + IRBuilder builder; + + builder.create_main_function(ast); + + std::cout << builder.getGeneratedIR(); } diff --git a/tests/simple.sp b/tests/simple.sp index aae958a..122be2f 100644 --- a/tests/simple.sp +++ b/tests/simple.sp @@ -1,4 +1,4 @@ -const outln from @'core/io'; +const { outln } from @'core/io'; let @main in { outln('Hello, world!');