Last active
August 29, 2015 14:07
-
-
Save makenowjust/2e1ab4194ada5150a2f7 to your computer and use it in GitHub Desktop.
The tiny compiler for x86_64 written in JavaScript (Node.js)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
function Class(name) { | |
return function me() { | |
if (!(this instanceof me)) return me.apply(new me(), arguments); | |
this.name = name; | |
[].slice.call(arguments).forEach(function (v, i) { this[i] = v; | |
}, this); | |
this.length = arguments.length; | |
return this; | |
}; | |
} | |
var | |
Program = Class('Program'), | |
Print = Class('Print'), | |
Assign = Class('Assign'), | |
BinOp = Class('BinOp'), | |
UnOp = Class('UnOp'), | |
Ident = Class('Ident'), | |
Integer = Class('Integer'); | |
function parse(src) { | |
var | |
line = 1; | |
return program(); | |
function program() { | |
var | |
list = []; | |
skip(); | |
while(!isEOF()) { | |
if (lineBreak()) continue; | |
list.push(print()); | |
if (!lineBreak()) throw Error('parse error (line at ' + line + ')'); | |
} | |
return Program(list); | |
} | |
function print() { | |
var | |
flag; | |
try { | |
flag = word('!'); | |
} catch (e) { } | |
return flag ? Print(addExpr()) : assign(); | |
} | |
function assign() { | |
var | |
id, val, src_ = src; | |
try { | |
id = ident(); | |
word('='); | |
val = assign(); | |
return Assign(id, val); | |
} catch (e) { | |
src = src_; | |
} | |
return addExpr(); | |
} | |
function addExpr() { | |
var | |
expr = mulExpr(), op, right; | |
for (;;) { | |
try { | |
op = word('+', '-'); | |
} catch (e) { | |
break; | |
} | |
right = mulExpr(); | |
expr = BinOp(op, expr, right); | |
} | |
return expr; | |
} | |
function mulExpr() { | |
var | |
expr = sign(), op, right; | |
for (;;) { | |
try { | |
op = word('*', '/', '%'); | |
} catch (e) { | |
break; | |
} | |
right = sign(); | |
expr = BinOp(op, expr, right); | |
} | |
return expr; | |
} | |
function sign() { | |
var | |
sign; | |
try { | |
sign = word('+', '-'); | |
} catch (e) { } | |
return sign ? UnOp(sign, primitive()) : primitive(); | |
} | |
function primitive() { | |
var | |
flag, expr; | |
skip(); | |
try { | |
flag = word('('); | |
} catch (e) { } | |
if (flag) { | |
expr = addExpr(); | |
word(')'); | |
return expr; | |
} | |
try { | |
return Ident(ident()); | |
} catch (e) { } | |
try { | |
return Integer(integer()); | |
} catch (e) { } | |
throw Error('expect ident, integer (line at ' + line + ')'); | |
} | |
function ident() { | |
var | |
c = src[0], id = ''; | |
if (isAlpha(c)) { | |
do { | |
id += c; | |
src = src.slice(1); | |
c = src[0]; | |
} while (isAlphaNum(c)) | |
return id; | |
} | |
throw Error('expect ident (line at ' + line + ')'); | |
} | |
function integer() { | |
var | |
c = src[0], int = ''; | |
if (isNum(c)) { | |
do { | |
int += c; | |
src = src.slice(1); | |
c = src[0]; | |
} while (isNum(c)) | |
return int; | |
} | |
throw Error('expect integer (line at ' + line + ')'); | |
} | |
function word() { | |
var | |
i; | |
skip(); | |
for (i = 0; i < arguments.length; i++) { | |
if (src.lastIndexOf(arguments[i], 0) === 0) { | |
src = src.slice(arguments[i].length); | |
return arguments[i]; | |
} | |
} | |
throw Error('expect ' + [].join.call(arguments, ', ') + '(line at ' + line + ')'); | |
} | |
function isEOF() { | |
return src === ''; | |
} | |
function lineBreak() { | |
skip(); | |
if (src[0] === '\n' || src === '') { | |
src = src.slice(1); | |
return true; | |
} | |
return false; | |
} | |
function skip() { | |
while (isSpace(src[0])) { | |
src = src.slice(1); | |
} | |
while (src[0] === '#') { | |
while (src[0] !== '\n') { | |
src = src.slice(1); | |
} | |
} | |
} | |
function isSpace(c) { | |
return c === ' ' || c === '\t'; | |
} | |
function isAlpha(c) { | |
return 'a' <= c && c <= 'z' || | |
'A' <= c && c <= 'Z'; | |
} | |
function isNum(c) { | |
return '0' <= c && c <= '9'; | |
} | |
function isAlphaNum(c) { | |
return isAlpha(c) || isNum(c); | |
} | |
} | |
var | |
OpCode = Class('OpCode'), | |
iota = 0, | |
INT = iota++, | |
LOAD = iota++, | |
STORE = iota++, | |
PRINT = iota++, | |
POP = iota++, | |
ADD = iota++, | |
SUB = iota++, | |
MUL = iota++, | |
DIV = iota++, | |
PLUS = iota++, | |
MINUS = iota++, | |
MOD = iota++; | |
function compile1(tree) { | |
var | |
count = 0, | |
env = {}, | |
code = [], | |
callback = {}; | |
callback.Program = function (list) { | |
list.forEach(function (tree) { | |
call(tree); | |
code.push(OpCode(POP)); | |
}); | |
}; | |
callback.Print = function (expr) { | |
call(expr); | |
code.push(OpCode(PRINT)); | |
}; | |
callback.Assign = function (id, expr) { | |
if (typeof env[id] === 'undefined') env[id] = count++; | |
call(expr); | |
code.push(OpCode(STORE, env[id])); | |
}; | |
callback.BinOp = function (op, l, r) { | |
call(l); | |
call(r); | |
switch (op) { | |
case '+': | |
code.push(OpCode(ADD)); | |
break; | |
case '-': | |
code.push(OpCode(SUB)); | |
break; | |
case '*': | |
code.push(OpCode(MUL)); | |
break; | |
case '/': | |
code.push(OpCode(DIV)); | |
break; | |
case '%': | |
code.push(OpCode(MOD)); | |
break; | |
} | |
}; | |
callback.UnOp = function (op, expr) { | |
call(expr); | |
switch (op) { | |
case '+': | |
code.push(OpCode(PLUS)); | |
break; | |
case '-': | |
code.push(OpCode(MINUS)); | |
break; | |
} | |
}; | |
callback.Ident = function (id) { | |
if (typeof env[id] === 'undefined') throw Error(id + ' is not defined'); | |
code.push(OpCode(LOAD, env[id])); | |
}; | |
callback.Integer = function (int) { | |
code.push(OpCode(INT, int)); | |
}; | |
function call(tree) { | |
callback[tree.name].apply(tree, tree); | |
}; | |
call(tree); | |
return code; | |
} | |
function compile2(code) { | |
var | |
asm = [], | |
env = {}, | |
registers = {}, | |
vstack = [], | |
stack = {}, | |
stackLength = 0; | |
'r12 r13 r14 r15'.split(' ').forEach(function (r) { | |
registers['%' + r] = false; | |
}); | |
code.forEach(function (c) { | |
var | |
reg1, reg2, | |
op = c[0], | |
opd1 = c[1]; | |
switch (op) { | |
case INT: | |
reg1 = findReg(); | |
vstack.push(reg1); | |
asm.push('movq $' + opd1 + ', ' + reg1); | |
break; | |
case LOAD: | |
reg1 = findReg(); | |
reg2 = env[opd1]; | |
if (!reg2) throw Error('error!'); | |
vstack.push(reg1); | |
asm.push('movq ' + reg2 + ', ' + reg1); | |
break; | |
case STORE: | |
reg1 = peekReg(); | |
reg2 = env[opd1]; | |
if (!reg2) { | |
reg2 = env[opd1] = findStack(); | |
stack[reg2] = 'var'; | |
} | |
asm.push('movq ' + reg1 + ', ' + reg2); | |
break; | |
case PRINT: | |
reg1 = peekReg(); | |
asm.push('movq ' + reg1 + ', %rsi'); | |
asm.push('push $680997'); // push "%d\n" | |
asm.push('movq %rsp, %rdi'); | |
asm.push('movq $0, %rax'); | |
asm.push('call printf'); | |
asm.push('pop %rax'); | |
break; | |
case POP: | |
freeReg(popReg()); | |
break; | |
case ADD: | |
reg2 = popReg(); | |
reg1 = peekReg(); | |
asm.push('addq ' + reg2 + ', ' + reg1); | |
freeReg(reg2); | |
break; | |
case SUB: | |
reg2 = popReg(); | |
reg1 = peekReg(); | |
asm.push('subq ' + reg2 + ', ' + reg1); | |
freeReg(reg2); | |
break; | |
case MUL: | |
reg2 = popReg(); | |
reg1 = peekReg(); | |
asm.push('imulq ' + reg2 + ', ' + reg1); | |
freeReg(reg2); | |
break; | |
case DIV: | |
reg2 = popReg(); | |
reg1 = peekReg(); | |
asm.push('movq ' + reg1 + ', %rax'); | |
asm.push('cqto'); | |
asm.push('idivq ' + reg2); | |
asm.push('movq %rax, ' + reg1); | |
freeReg(reg2); | |
break; | |
case PLUS: | |
break; | |
case MINUS: | |
reg1 = peekReg(); | |
asm.push('negq ' + reg1); | |
break; | |
case MOD: | |
reg2 = popReg(); | |
reg1 = peekReg(); | |
asm.push('movq ' + reg1 + ', %rax'); | |
asm.push('cqto'); | |
asm.push('idivq ' + reg2); | |
asm.push('movq %rdx, ' + reg1); | |
freeReg(reg2); | |
break; | |
} | |
}); | |
return '' + | |
'\t.global main\n' + | |
'main:\n' + | |
'\tpush %rbp\n' + | |
'\tmovq %rsp, %rbp\n' + | |
'\tsubq $' + (stackLength * 8) + ', %rsp\n' + | |
asm.map(function (a) { return '\t' + a; }).join('\n') + '\n' + | |
'\tmovq $0, %rax\n' + | |
'\tleave\n' + | |
'\tret\n'; | |
function findReg() { | |
var | |
reg, stk, i; | |
Object.keys(registers).forEach(function (r) { | |
if (!registers[r]) { | |
reg = r; | |
} | |
}); | |
if (reg) { | |
registers[reg] = true; | |
return reg; | |
} | |
if (stack.length) { | |
for (i = 0; i < stack.length; i++) { | |
reg = stack[i]; | |
if (reg in registers) { | |
stk = findStack(); | |
stack[i] = stk; | |
break; | |
} | |
} | |
asm.push('movq ' + reg + ', ' + stk + ' #findReg'); | |
return reg; | |
} else { | |
stk = findStack(); | |
return stk; | |
} | |
} | |
function findStack() { | |
var | |
i, stk; | |
for (i = 0; i < stackLength; i++) { | |
stk = '-' + ((i + 1) * 8) + '(%rbp)'; | |
if (!stack[stk]) { | |
stack[stk] = 'reg'; | |
return stk; | |
} | |
} | |
stk = '-' + (++stackLength * 8) + '(%rbp)'; | |
stack[stk] = 'reg'; | |
return stk; | |
} | |
function popReg() { | |
return vstack.pop(); | |
} | |
function freeReg(reg) { | |
if (reg in registers) { | |
registers[reg] = false; | |
} else { | |
stack[reg] = ''; | |
} | |
} | |
function peekReg() { | |
var | |
top = vstack[vstack.length - 1], reg; | |
if (!top) throw Error('error!'); | |
if (!(top in registers)) { | |
asm.push('mov ' + top + ', %r12 #peekReg'); | |
stack[top] = ''; | |
vstack = vstack.map(function (reg) { | |
var | |
stk; | |
if (reg === '%r12') { | |
stk = findStack(); | |
asm.push('mov ' + reg + ', ' + stk); | |
reg = stk; | |
} | |
return reg; | |
}); | |
vstack[vstack.length - 1] = top = '%r12'; | |
} | |
return top; | |
} | |
} | |
var | |
util = require('util'), | |
fs = require('fs'); | |
var | |
file = process.argv[2]; | |
var | |
src = fs.readFileSync(file, 'utf-8'), | |
tree = parse(src), | |
code1 = compile1(tree), | |
code2 = compile2(code1); | |
console.log(code2); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
node compiler.js "$1" > ".$1.s" | |
gcc -o "${1%.*}" ".$1.s" | |
rm ".$1.s" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# you can compile and run it by following: | |
# `./compile.sh example.calc && ./example` | |
a = -1 | |
b = 101 | |
! -(a * b) / 2 + (5 * 14 + (100 * (100 + 20) + 20) - (100 * 120 + 20) - 21) + 3 % 2 # 100 | |
b = 102 | |
! -(a * b) / 2 # 51 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment