Created
May 18, 2020 22:58
-
-
Save nicklockwood/303ea2cb52a0ebf0a1b1ec2b0c28ac88 to your computer and use it in GitHub Desktop.
A simple one-file language parser and compiler test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
enum LexingError: Error, Equatable { | |
case syntaxError(String) | |
case unexpectedEOF | |
} | |
enum ParsingError: Error { | |
case expected(String) | |
case unexpectedToken(Token) | |
} | |
enum Token: Equatable { | |
case print | |
case lparen | |
case rparen | |
case comma | |
case string(StringLiteral) | |
case error(LexingError) | |
} | |
struct StringLiteral: Equatable { | |
var body: String | |
} | |
enum Operand { | |
case string(StringLiteral) | |
} | |
struct Arguments { | |
var operands: [Operand] | |
} | |
enum Statement { | |
case print(Arguments) | |
} | |
struct Program { | |
var statements: [Statement] | |
} | |
// MARK: Lexer | |
extension Character { | |
var isUppercaseLetter: Bool { | |
return "ABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(self) | |
} | |
var isLowercaseLetter: Bool { | |
return "abcdefghijklmnopqrstuvwxyz".contains(self) | |
} | |
var isLetter: Bool { | |
return isUppercaseLetter || isLowercaseLetter | |
} | |
var isDigit: Bool { | |
return "0123456789".contains(self) | |
} | |
var isAlphaNumeric: Bool { | |
return isLetter || isNumber | |
} | |
} | |
extension Substring { | |
mutating func read(_ matching: (Character) -> Bool) -> Character? { | |
return first.map(matching) == true ? removeFirst() : nil | |
} | |
mutating func read(oneOrMore m: (Character) -> Bool) -> String? { | |
var result = "" | |
while let c = read(m) { | |
result.append(c) | |
} | |
return result.isEmpty ? nil : result | |
} | |
mutating func read(zeroOrMore m: (Character) -> Bool) -> String { | |
return read(oneOrMore: m) ?? "" | |
} | |
mutating func read(_ char: Character) -> Bool { | |
if first == char { | |
removeFirst() | |
return true | |
} | |
return false | |
} | |
mutating func readLparen() -> Token? { | |
return read("(") ? .lparen : nil | |
} | |
mutating func readRparen() -> Token? { | |
return read(")") ? .rparen : nil | |
} | |
mutating func readComma() -> Token? { | |
return read(",") ? .comma : nil | |
} | |
mutating func readIdentifier() -> Token? { | |
guard let head = read(oneOrMore:{ $0.isLetter }) else { | |
return nil | |
} | |
switch head + read(zeroOrMore: { $0.isAlphaNumeric }) { | |
case "print": | |
return .print | |
case let identifier: | |
return .error(.syntaxError(identifier)) | |
} | |
} | |
mutating func readString() -> Token? { | |
guard read("\"") else { return nil } | |
var body = "" | |
var escaped = false | |
while let char = popFirst() { | |
if escaped { | |
body.append(char) | |
escaped = false | |
continue | |
} | |
switch char { | |
case "\"": | |
return .string(StringLiteral(body: body)) | |
case "\\": | |
escaped = true | |
default: | |
body.append(char) | |
} | |
} | |
return .error(.unexpectedEOF) | |
} | |
mutating func readToken() -> Token? { | |
return | |
readLparen() ?? | |
readRparen() ?? | |
readComma() ?? | |
readIdentifier() ?? | |
readString() | |
} | |
} | |
func tokenize(_ string: String) throws -> [Token] { | |
var tokens = [Token]() | |
var input = Substring(string) | |
while let token = input.readToken() { | |
if case let .error(error) = token { | |
throw error | |
} | |
tokens.append(token) | |
} | |
return tokens | |
} | |
// MARK: Parser | |
extension ArraySlice where Element == Token { | |
mutating func read(_ matching: (Token) -> Bool) -> Token? { | |
return first.map(matching) == true ? removeFirst() : nil | |
} | |
mutating func read(_ token: Token) -> Bool { | |
if first == token { | |
removeFirst() | |
return true | |
} | |
return false | |
} | |
mutating func expect(_ token: Token) throws { | |
guard read(token) else { | |
throw ParsingError.expected("\(token)") | |
} | |
} | |
mutating func readOperand() throws -> Operand? { | |
guard case let .string(string)? = first else { | |
return nil | |
} | |
removeFirst() | |
return .string(string) | |
} | |
mutating func readArguments() throws -> Arguments { | |
var arguments = [Operand]() | |
while let operand = try readOperand() { | |
arguments.append(operand) | |
if read(.comma) { | |
continue | |
} | |
return Arguments(operands: arguments) | |
} | |
throw ParsingError.expected("operand") | |
} | |
mutating func readStatement() throws -> Statement? { | |
guard read(.print) else { return nil } | |
try expect(.lparen) | |
let arguments = try readArguments() | |
try expect(.rparen) | |
return .print(arguments) | |
} | |
} | |
func parse(_ string: String) throws -> Program { | |
var statements = [Statement]() | |
var tokens = try ArraySlice(tokenize(string)) | |
while let statement = try tokens.readStatement() { | |
statements.append(statement) | |
} | |
return Program(statements: statements) | |
} | |
// MARK: Compiler | |
struct Compiler { | |
} | |
extension StringLiteral { | |
func compile(_ c: inout Compiler) -> String { | |
return body | |
.replacingOccurrences(of: "\\", with: "\\\\") | |
.replacingOccurrences(of: "\"", with: "\\\"") | |
.replacingOccurrences(of: "\n", with: "\\n") | |
} | |
} | |
extension Operand { | |
func compile(_ c: inout Compiler) -> String { | |
switch self { | |
case let .string(literal): | |
return "\"\(literal.compile(&c))\"" | |
} | |
} | |
} | |
extension Arguments { | |
func compile(_ c: inout Compiler) -> String { | |
return operands.map { $0.compile(&c) }.joined(separator: ", ") | |
} | |
} | |
extension Statement { | |
func compile(_ c: inout Compiler) -> String { | |
switch self { | |
case let .print(arguments): | |
return "printf(\(arguments.compile(&c)));" | |
} | |
} | |
} | |
extension Program { | |
func compile(_ c: inout Compiler) -> String { | |
return """ | |
#include <stdio.h> | |
int main() { | |
\(statements.map { $0.compile(&c) }.joined(separator: "\n")) | |
} | |
""" | |
} | |
} | |
func compile(_ program: Program) throws -> String { | |
var compiler = Compiler() | |
return program.compile(&compiler) | |
} | |
// MARK: CLI | |
@discardableResult | |
func shell(_ args: String...) -> Int32 { | |
let task = Process() | |
task.launchPath = "/usr/bin/env" | |
task.arguments = args | |
task.launch() | |
task.waitUntilExit() | |
return task.terminationStatus | |
} | |
let source = """ | |
print("Hello World") | |
""" | |
let program = try! parse(source) | |
let output = try! compile(program) | |
let cPath = ("~/Desktop/test.c" as NSString).expandingTildeInPath | |
let oPath = ("~/Desktop/test.out" as NSString).expandingTildeInPath | |
let url = URL(fileURLWithPath: cPath) | |
try! output.write(to: url, atomically: true, encoding: .utf8) | |
shell("clang", cPath, "-o", oPath) | |
shell(oPath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment