Skip to content

Instantly share code, notes, and snippets.

@kkestell
Last active October 31, 2024 13:00
Show Gist options
  • Save kkestell/e4db95e88d5289c0e336fa754114af56 to your computer and use it in GitHub Desktop.
Save kkestell/e4db95e88d5289c0e336fa754114af56 to your computer and use it in GitHub Desktop.
Compiler

Compiler

main.py

import textwrap

from ast_debug import debug_ast
from cfg_builder import CFGBuilder
from cfg_debug import debug_cfg
from llvm_compiler import LLVMCompiler
from llvm_generator import LLVMGenerator
from llvm_runner import LLVMRunner
from parser import Parser
from ssa_debug import debug_ssa
from ssa_transformer import SSATransformer
from symbol_table_builder import SymbolTableBuilder
from symbol_table_debug import debug_symbol_table
from tac_debug import debug_tac
from tac_optimizer import TACDeadCodeEliminator
from tac_transformer import TACTransformer
from utils_debug import format_header


def main() -> None:
    source = """
    function fib(n: int): int
        if n <= 1 then
            return n
        end
        return fib(n - 1) + fib(n - 2)
    end
    
    function main(): int
        return fib(10)
    end
    """

    print(format_header("Source Code"))
    print(textwrap.dedent(source).strip())

    parser = Parser()
    ast_program = parser.parse(source)
    print(format_header("AST"))
    print(debug_ast(ast_program))

    symbol_table_builder = SymbolTableBuilder()
    symbol_table = symbol_table_builder.build(ast_program)
    print(format_header("Symbol Table"))
    print(debug_symbol_table(symbol_table))

    tac_generator = TACTransformer()
    tac_program = tac_generator.generate(ast_program, symbol_table)
    print(format_header("Three-Address Code"))
    print(debug_tac(tac_program))

    tac_eliminator = TACDeadCodeEliminator()
    tac_program = tac_eliminator.eliminate(tac_program)
    print(format_header("Three-Address Code (After Dead Code Elimination)"))
    print(debug_tac(tac_program))

    cfg_builder = CFGBuilder()
    cfgs = cfg_builder.build(tac_program)
    print(format_header("Control Flow Graphs"))
    for cfg in cfgs:
        print(debug_cfg(cfg))

    ssa_transform = SSATransformer()
    ssa_program = ssa_transform.transform(tac_program, cfgs)
    print(format_header("Static Single Assignment"))
    print(debug_ssa(ssa_program))

    llvm_generator = LLVMGenerator()
    llvm_ir = llvm_generator.generate(ssa_program)
    print(format_header("LLVM IR"))
    print(llvm_ir)

    llvm_runner = LLVMRunner()
    result = llvm_runner.run_ir(llvm_generator.module)
    print(format_header("Result"))
    print(result)

    llvm_compiler = LLVMCompiler()
    llvm_compiler.create_executable(llvm_generator.module, "a.exe")
    print(format_header("Executable Generated"))
    print("a.exe")


if __name__ == "__main__":
    main()

Output

═ AST ═════════════════════════════════════════════════════════════════════════
{
  "functions": [
    {
      "name": "fib",
      "parameters": [
        {
          "name": "n",
          "type": {
            "name": "int"
          }
        }
      ],
      "return_type": {
        "name": "int"
      },
      "body": {
        "statements": [
          {
            "condition": {
              "operator": "LTE",
              "left": {
                "name": "n"
              },
              "right": {
                "value": 1
              }
            },
            "then_block": {
              "statements": [
                {
                  "expression": {
                    "name": "n"
                  }
                }
              ]
            },
            "else_block": null
          },
          {
            "expression": {
              "operator": "ADD",
              "left": {
                "name": "fib",
                "arguments": [
                  {
                    "operator": "SUB",
                    "left": {
                      "name": "n"
                    },
                    "right": {
                      "value": 1
                    }
                  }
                ]
              },
              "right": {
                "name": "fib",
                "arguments": [
                  {
                    "operator": "SUB",
                    "left": {
                      "name": "n"
                    },
                    "right": {
                      "value": 2
                    }
                  }
                ]
              }
            }
          }
        ]
      }
    },
    {
      "name": "main",
      "parameters": [],
      "return_type": {
        "name": "int"
      },
      "body": {
        "statements": [
          {
            "expression": {
              "name": "fib",
              "arguments": [
                {
                  "value": 10
                }
              ]
            }
          }
        ]
      }
    }
  ]
}

═ Symbol Table ════════════════════════════════════════════════════════════════
fib(n: int) -> int
main() -> int
→
  n: int
  →
    →
    ←
  ←
←
→
  →
  ←
←

═ Three-Address Code ══════════════════════════════════════════════════════════
procedure fib(n: int) -> int:
bb1:
  t1 = n <= 1
  if t1 goto bb2 else goto bb3
bb2:
  return n
  goto bb3
bb3:
  t2 = n - 1
  t3 = fib(t2)
  t4 = n - 2
  t5 = fib(t4)
  t6 = t3 + t5
  return t6
  t7 = 0
  return t7

procedure main() -> int:
bb4:
  t8 = fib(10)
  return t8
  t9 = 0
  return t9

═ Three-Address Code (After Dead Code Elimination) ════════════════════════════
procedure fib(n: int) -> int:
bb1:
  t1 = n <= 1
  if t1 goto bb2 else goto bb3
bb2:
  return n
bb3:
  t2 = n - 1
  t3 = fib(t2)
  t4 = n - 2
  t5 = fib(t4)
  t6 = t3 + t5
  return t6

procedure main() -> int:
bb4:
  t8 = fib(10)
  return t8

═ Control Flow Graphs ═════════════════════════════════════════════════════════
fib:
  entry: bb1, exit: bb3
  block bb1:
    flags: entry
    predecessors: []
    successors: ['bb2', 'bb3']
    instructions:
      t1 = n <= 1
      if t1 goto bb2 else goto bb3
  block bb2:
    predecessors: ['bb1']
    successors: []
    instructions:
      return n
  block bb3:
    flags: exit
    predecessors: ['bb1']
    successors: []
    instructions:
      t2 = n - 1
      t3 = fib(t2)
      t4 = n - 2
      t5 = fib(t4)
      t6 = t3 + t5
      return t6
main:
  entry: bb4, exit: bb4
  block bb4:
    flags: entry, exit
    predecessors: []
    successors: []
    instructions:
      t8 = fib(10)
      return t8

═ Static Single Assignment ════════════════════════════════════════════════════
proc fib(n: int) -> int:
block bb1:
  t1 = n <= TACConst(value=1)
  if t1 goto bb2 else goto bb3
block bb2:
  return n
block bb3:
  t2 = n - TACConst(value=1)
  t3 = fib(t2)
  t4 = n - TACConst(value=2)
  t5 = fib(t4)
  t6 = t3 + t5
  return t6

proc main() -> int:
block bb4:
  t8 = fib(TACConst(value=10))
  return t8

═ LLVM IR ═════════════════════════════════════════════════════════════════════
; ModuleID = "module"
target triple = "unknown-unknown-unknown"
target datalayout = ""

define i32 @"fib"(i32 %"n")
{
bb1:
  %"t1" = icmp sle i32 %"n", 1
  br i1 %"t1", label %"bb2", label %"bb3"
bb2:
  ret i32 %"n"
bb3:
  %"t2" = sub i32 %"n", 1
  %"t3" = call i32 @"fib"(i32 %"t2")
  %"t4" = sub i32 %"n", 2
  %"t5" = call i32 @"fib"(i32 %"t4")
  %"t6" = add i32 %"t3", %"t5"
  ret i32 %"t6"
}

define i32 @"main"()
{
bb4:
  %"t8" = call i32 @"fib"(i32 10)
  ret i32 %"t8"
}

═ Result ══════════════════════════════════════════════════════════════════════
55

═ Executable Generated ════════════════════════════════════════════════════════
a.exe
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment