Skip to content

Instantly share code, notes, and snippets.

@nicklockwood
Created May 18, 2020 22:58
Show Gist options
  • Save nicklockwood/303ea2cb52a0ebf0a1b1ec2b0c28ac88 to your computer and use it in GitHub Desktop.
Save nicklockwood/303ea2cb52a0ebf0a1b1ec2b0c28ac88 to your computer and use it in GitHub Desktop.
A simple one-file language parser and compiler test
import Foundation
enum LexingError: Error, Equatable {
case syntaxError(String)
case unexpectedEOF
}
enum ParsingError: Error {
case expected(String)
case unexpectedToken(Token)
}
enum Token: Equatable {
case print
case lparen
case rparen
case comma
case string(StringLiteral)
case error(LexingError)
}
struct StringLiteral: Equatable {
var body: String
}
enum Operand {
case string(StringLiteral)
}
struct Arguments {
var operands: [Operand]
}
enum Statement {
case print(Arguments)
}
struct Program {
var statements: [Statement]
}
// MARK: Lexer
extension Character {
var isUppercaseLetter: Bool {
return "ABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(self)
}
var isLowercaseLetter: Bool {
return "abcdefghijklmnopqrstuvwxyz".contains(self)
}
var isLetter: Bool {
return isUppercaseLetter || isLowercaseLetter
}
var isDigit: Bool {
return "0123456789".contains(self)
}
var isAlphaNumeric: Bool {
return isLetter || isNumber
}
}
extension Substring {
mutating func read(_ matching: (Character) -> Bool) -> Character? {
return first.map(matching) == true ? removeFirst() : nil
}
mutating func read(oneOrMore m: (Character) -> Bool) -> String? {
var result = ""
while let c = read(m) {
result.append(c)
}
return result.isEmpty ? nil : result
}
mutating func read(zeroOrMore m: (Character) -> Bool) -> String {
return read(oneOrMore: m) ?? ""
}
mutating func read(_ char: Character) -> Bool {
if first == char {
removeFirst()
return true
}
return false
}
mutating func readLparen() -> Token? {
return read("(") ? .lparen : nil
}
mutating func readRparen() -> Token? {
return read(")") ? .rparen : nil
}
mutating func readComma() -> Token? {
return read(",") ? .comma : nil
}
mutating func readIdentifier() -> Token? {
guard let head = read(oneOrMore:{ $0.isLetter }) else {
return nil
}
switch head + read(zeroOrMore: { $0.isAlphaNumeric }) {
case "print":
return .print
case let identifier:
return .error(.syntaxError(identifier))
}
}
mutating func readString() -> Token? {
guard read("\"") else { return nil }
var body = ""
var escaped = false
while let char = popFirst() {
if escaped {
body.append(char)
escaped = false
continue
}
switch char {
case "\"":
return .string(StringLiteral(body: body))
case "\\":
escaped = true
default:
body.append(char)
}
}
return .error(.unexpectedEOF)
}
mutating func readToken() -> Token? {
return
readLparen() ??
readRparen() ??
readComma() ??
readIdentifier() ??
readString()
}
}
func tokenize(_ string: String) throws -> [Token] {
var tokens = [Token]()
var input = Substring(string)
while let token = input.readToken() {
if case let .error(error) = token {
throw error
}
tokens.append(token)
}
return tokens
}
// MARK: Parser
extension ArraySlice where Element == Token {
mutating func read(_ matching: (Token) -> Bool) -> Token? {
return first.map(matching) == true ? removeFirst() : nil
}
mutating func read(_ token: Token) -> Bool {
if first == token {
removeFirst()
return true
}
return false
}
mutating func expect(_ token: Token) throws {
guard read(token) else {
throw ParsingError.expected("\(token)")
}
}
mutating func readOperand() throws -> Operand? {
guard case let .string(string)? = first else {
return nil
}
removeFirst()
return .string(string)
}
mutating func readArguments() throws -> Arguments {
var arguments = [Operand]()
while let operand = try readOperand() {
arguments.append(operand)
if read(.comma) {
continue
}
return Arguments(operands: arguments)
}
throw ParsingError.expected("operand")
}
mutating func readStatement() throws -> Statement? {
guard read(.print) else { return nil }
try expect(.lparen)
let arguments = try readArguments()
try expect(.rparen)
return .print(arguments)
}
}
func parse(_ string: String) throws -> Program {
var statements = [Statement]()
var tokens = try ArraySlice(tokenize(string))
while let statement = try tokens.readStatement() {
statements.append(statement)
}
return Program(statements: statements)
}
// MARK: Compiler
struct Compiler {
}
extension StringLiteral {
func compile(_ c: inout Compiler) -> String {
return body
.replacingOccurrences(of: "\\", with: "\\\\")
.replacingOccurrences(of: "\"", with: "\\\"")
.replacingOccurrences(of: "\n", with: "\\n")
}
}
extension Operand {
func compile(_ c: inout Compiler) -> String {
switch self {
case let .string(literal):
return "\"\(literal.compile(&c))\""
}
}
}
extension Arguments {
func compile(_ c: inout Compiler) -> String {
return operands.map { $0.compile(&c) }.joined(separator: ", ")
}
}
extension Statement {
func compile(_ c: inout Compiler) -> String {
switch self {
case let .print(arguments):
return "printf(\(arguments.compile(&c)));"
}
}
}
extension Program {
func compile(_ c: inout Compiler) -> String {
return """
#include <stdio.h>
int main() {
\(statements.map { $0.compile(&c) }.joined(separator: "\n"))
}
"""
}
}
func compile(_ program: Program) throws -> String {
var compiler = Compiler()
return program.compile(&compiler)
}
// MARK: CLI
@discardableResult
func shell(_ args: String...) -> Int32 {
let task = Process()
task.launchPath = "/usr/bin/env"
task.arguments = args
task.launch()
task.waitUntilExit()
return task.terminationStatus
}
let source = """
print("Hello World")
"""
let program = try! parse(source)
let output = try! compile(program)
let cPath = ("~/Desktop/test.c" as NSString).expandingTildeInPath
let oPath = ("~/Desktop/test.out" as NSString).expandingTildeInPath
let url = URL(fileURLWithPath: cPath)
try! output.write(to: url, atomically: true, encoding: .utf8)
shell("clang", cPath, "-o", oPath)
shell(oPath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment