Last active
May 10, 2016 07:18
-
-
Save alihammad-gist/533aea911a58664df49f6860e4c7599a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace scanner { | |
type stateFn = { | |
(l: Lexer): stateFn | |
} | |
export enum itemType { | |
itemEof, | |
itemErr, | |
itemStart, | |
itemOp, | |
itemNum, // positive / negative ints and floats | |
itemLParen, | |
itemRParen, | |
}; | |
export type item = { | |
typ: itemType, | |
val: string, | |
}; | |
export class Lexer { | |
private start: number = 0; | |
private pos: number = 0; | |
private len: number; | |
private expr: string; | |
private onEmit: (i: item) => void; | |
constructor(expr: string, onEmit: (i: item) => void) { | |
this.onEmit = onEmit; | |
this.expr = expr.replace(/\s/g, ''); | |
this.len = this.expr.length; | |
} | |
next(): string { | |
if (this.pos < this.len) { | |
const s = this.expr[this.pos]; | |
this.pos += 1; | |
return s; | |
} else { | |
return undefined; | |
} | |
} | |
peek(): string { | |
return this.expr[this.pos]; | |
} | |
emit(typ: itemType) { | |
this.onEmit({ | |
typ, | |
val: this.expr.slice(this.start, this.pos) | |
}); | |
this.start = this.pos; | |
} | |
accept(chars: string) { | |
let p: number; | |
for (p = this.pos; p < this.len; p++) { | |
if (chars.indexOf(this.expr[p]) < 0) { | |
break; | |
} | |
} | |
this.pos = p; | |
} | |
lookBehind(): string { | |
return this.expr[this.pos - 1]; | |
} | |
lookAhead(): string { | |
return this.expr[this.pos + 1]; | |
} | |
backup() { | |
if (this.pos > 0) { | |
this.pos -= 1; | |
} | |
} | |
run() { | |
let lex = lexExprStart; | |
while (lex !== undefined) { | |
lex = lex(this); | |
} | |
} | |
}; | |
// state functions | |
const nums = '0123456789'; | |
const ops = '^*/+-'; | |
// start of expression | |
const lexExprStart: stateFn = (l) => { | |
const s = l.peek(); | |
if (s === undefined) { // un-expected end of file | |
l.emit(itemType.itemErr); | |
return undefined; | |
} else if (s === '(') { | |
return lexLParen; | |
} else if (couldBeNumber(s)) { | |
return lexNumber; | |
} else { // if its someother char | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
} | |
// end of expression | |
const lexExprEnd: stateFn = (l) => { | |
const s = l.peek(); | |
if (s === undefined) { // end of file | |
l.emit(itemType.itemEof); | |
return undefined; | |
} else if (s === ')') { | |
return lexRParen; | |
} else if (isOperator(s)) { | |
return lexOp; | |
} else { | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
} | |
const lexOp: stateFn = (l) => { | |
const s = l.next(); | |
if (isOperator(s)) { | |
l.emit(itemType.itemOp); | |
} else { | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
return lexExprStart; | |
}; | |
const lexNumber: stateFn = (l) => { | |
let s = l.next(); | |
// could be signed number | |
if (s === '+' || s === '-') { | |
s = l.next(); | |
} | |
if (s === '.') { | |
// floating piont number starting with a '.' | |
s = l.next(); | |
if (s !== undefined && isNumber(s)) { | |
l.accept(nums); | |
l.emit(itemType.itemNum); | |
} else { | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
} else if (isNumber(s)) { | |
// could be an integer | |
l.accept(nums); | |
// or could be a float eg. '0012.2132' or '0.222' starting with a number | |
if (l.peek() === '.') { | |
if (l.lookAhead() !== undefined && isNumber(l.lookAhead())) { | |
l.next(); // '.' | |
l.accept(nums); | |
} else { | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
} | |
l.emit(itemType.itemNum); | |
} else { | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
return lexExprEnd; | |
}; | |
const lexLParen: stateFn = (l) => { | |
const s = l.next(); | |
if (s === '(') { | |
l.emit(itemType.itemLParen); | |
return lexExprStart; | |
} else { | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
} | |
const lexRParen: stateFn = (l) => { | |
const s = l.next(); | |
if (s === ')') { | |
l.emit(itemType.itemRParen); | |
return lexExprEnd; | |
} else { | |
l.emit(itemType.itemErr); | |
return undefined; | |
} | |
} | |
// helpers | |
/** | |
* checks if the character is a number | |
*/ | |
function isNumber(char: string): boolean { | |
const code = char.charCodeAt(0); | |
return code >= 48 && code <= 57; | |
} | |
/** | |
* checks if character is an operator | |
*/ | |
function isOperator(char: string) { | |
switch (char) { | |
case '^': | |
case '*': | |
case '/': | |
case '+': | |
case '-': return true; | |
default: return false; | |
} | |
} | |
/** | |
* checks the character could be start of a number | |
* like signed numbers +,- | |
* floating point number . | |
* positive number 0,1,2,3,4,5,6 | |
*/ | |
function couldBeNumber(char: string): boolean { | |
switch (char) { | |
case '.': | |
case '+': | |
case '-': return true; | |
default: | |
if (isNumber(char)) { | |
return true; | |
} else { | |
return false; | |
} | |
} | |
} | |
}; | |
namespace parser { | |
enum opAssoc { | |
left, | |
right, | |
} | |
type opType = { | |
assoc: opAssoc | |
prece: number | |
} | |
const opMap: { [a: string]: opType } = { | |
'^': { assoc: opAssoc.right, prece: 3 }, | |
'/': { assoc: opAssoc.left, prece: 2 }, | |
'*': { assoc: opAssoc.left, prece: 2 }, | |
'+': { assoc: opAssoc.left, prece: 1 }, | |
'-': { assoc: opAssoc.left, prece: 1 }, | |
} | |
// only binary operations | |
const opFunc: { [a: string]: (a: number, b: number) => number } = { | |
'^': (a, b) => Math.pow(a, b), | |
'/': (a, b) => a/b, | |
'*': (a, b) => a*b, | |
'+': (a, b) => a+b, | |
'-': (a, b) => a-b, | |
}; | |
export function shuntingYard(expr: string): scanner.item[] { | |
let outputQueue: scanner.item[] = [], | |
opStack: scanner.item[] = []; | |
new scanner.Lexer(expr, (i) => { | |
switch (i.typ) { | |
case scanner.itemType.itemOp: | |
pushOperator(i, opMap[i.val]); | |
break; | |
case scanner.itemType.itemNum: | |
outputQueue.push(i); | |
break; | |
case scanner.itemType.itemLParen: | |
opStack.push(i); | |
break; | |
case scanner.itemType.itemRParen: | |
pushRParen(); | |
break; | |
// error handling | |
case scanner.itemType.itemErr: | |
throw new Error('Invalid expression'); | |
} | |
}).run(); | |
// while there are still operators on opStack | |
while (opStack.length > 0) { | |
const op = opStack.pop(); | |
if (op.typ === scanner.itemType.itemLParen) { | |
throw new Error('Unmatched opening parenthesis'); | |
} else { | |
outputQueue.push(op); | |
} | |
} | |
function pushOperator(item: scanner.item, o1: opType) { | |
while (opStack.length > 0 && opStack[opStack.length - 1].typ != scanner.itemType.itemLParen) { | |
const o2 = opMap[opStack[opStack.length - 1].val]; | |
// if o1 is left associative and its precedence | |
// is less than or equal to that of o2 | |
const cond1 = o1.assoc === opAssoc.left && o1.prece <= o2.prece; | |
// or o1 is right associative and has precendence less | |
// than that of o2 | |
const cond2 = o2.assoc === opAssoc.right && o1.prece < o2.prece; | |
if (cond1 || cond2) { | |
// pop o2 off the opStack onto outputQueue | |
outputQueue.push(opStack.pop()); | |
} else { | |
break; | |
} | |
} | |
// at the end of iteration push o1 onto opStack | |
opStack.push(item); | |
} | |
function pushRParen() { | |
while (true) { | |
if (opStack.length > 0) { | |
if (opStack[opStack.length - 1].typ === scanner.itemType.itemLParen) { | |
// pop left paren off the opStack | |
// but not onto outputQueue | |
opStack.pop(); | |
break; | |
} else { | |
// pop operators off the opStack | |
// onto outputQueue | |
outputQueue.push(opStack.pop()); | |
} | |
} else { | |
throw new Error('Umatched closing parenthesis'); | |
} | |
} | |
} | |
return outputQueue; | |
} | |
export function parse(expr: string) { | |
const items: scanner.item[] = shuntingYard(expr); | |
console.log(items); | |
let stack: number[] = []; | |
for (let i = 0; i < items.length; i += 1) { | |
var token = items[i]; | |
switch (token.typ) { | |
case scanner.itemType.itemNum: | |
stack.push(Number(token.val)); | |
break; | |
case scanner.itemType.itemOp: | |
if(stack.length < 2) { | |
throw new Error('Issufficient numbers for binary operation'); | |
} | |
const b = stack.pop(), | |
a = stack.pop(); | |
stack.push(opFunc[token.val](a, b)); | |
break; | |
} | |
console.log(stack); | |
} | |
if (stack.length !== 1) { | |
throw new Error('Too few operators: stack.length=' + stack.length); | |
} | |
return stack.pop(); | |
} | |
export function prettyPrint(expr: string) :string { | |
let str: string = ''; | |
let items: scanner.item[] = []; | |
new scanner.Lexer(expr, (i) => { | |
items.push(i); | |
}).run(); | |
for (let i = 0; i < items.length; i+=1) { | |
if (items[i].typ === scanner.itemType.itemOp) { | |
str += ` ${items[i].val} `; | |
} else { | |
str += items[i].val; | |
} | |
} | |
return str; | |
} | |
} | |
console.log(parser.prettyPrint('2+3*3 + - 3)')); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as test from 'tape'; | |
import Lexer from './shanting_yard'; | |
type expectation = { | |
expr: string, | |
tokens: Lexer.item[], | |
eval: number | |
}; | |
const exprTable: expectation[] = [ | |
{ | |
expr: '3+3+(3 + (4 + 5))', | |
eval: 18, | |
tokens: [ | |
{typ: Lexer.itemType.itemNum, val:'3'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemNum, val:'3'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemLParen, val:'('}, | |
{typ: Lexer.itemType.itemNum, val:'3'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemLParen, val:'('}, | |
{typ: Lexer.itemType.itemNum, val:'4'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemNum, val:'5'}, | |
{typ: Lexer.itemType.itemRParen, val:')'}, | |
{typ: Lexer.itemType.itemRParen, val:')'}, | |
{typ: Lexer.itemType.itemEof, val:''}, | |
], | |
}, | |
{ | |
expr: '-1 - - 2 - + 3 + ( -0.3 * + .5)', | |
eval: -2.15, | |
tokens: [ | |
{typ: Lexer.itemType.itemNum, val:'-1'}, | |
{typ: Lexer.itemType.itemOp, val:'-'}, | |
{typ: Lexer.itemType.itemNum, val:'-2'}, | |
{typ: Lexer.itemType.itemOp, val:'-'}, | |
{typ: Lexer.itemType.itemNum, val:'+3'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemLParen, val:'('}, | |
{typ: Lexer.itemType.itemNum, val:'-0.3'}, | |
{typ: Lexer.itemType.itemOp, val:'*'}, | |
{typ: Lexer.itemType.itemNum, val:'+.5'}, | |
{typ: Lexer.itemType.itemRParen, val:')'}, | |
{typ: Lexer.itemType.itemEof, val:''}, | |
], | |
}, | |
{ | |
expr: '+0.2 / -0.', | |
eval: undefined, | |
tokens: [ | |
{typ: Lexer.itemType.itemNum, val: '+0.2'}, | |
{typ: Lexer.itemType.itemOp, val: '/'}, | |
{typ: Lexer.itemType.itemErr, val: '-0'}, | |
] | |
}, | |
{ | |
expr: '+110.2 + 2 + 3 + (23 / 22) / (3+3+3++)', | |
eval: undefined, | |
tokens: [ | |
{typ: Lexer.itemType.itemNum, val: '+110.2'}, | |
{typ: Lexer.itemType.itemOp, val: '+'}, | |
{typ: Lexer.itemType.itemNum, val: '2'}, | |
{typ: Lexer.itemType.itemOp, val: '+'}, | |
{typ: Lexer.itemType.itemNum, val: '3'}, | |
{typ: Lexer.itemType.itemOp, val: '+'}, | |
{typ: Lexer.itemType.itemLParen, val: '('}, | |
{typ: Lexer.itemType.itemNum, val: '23'}, | |
{typ: Lexer.itemType.itemOp, val: '/'}, | |
{typ: Lexer.itemType.itemNum, val: '22'}, | |
{typ: Lexer.itemType.itemRParen, val: ')'}, | |
{typ: Lexer.itemType.itemOp, val: '/'}, | |
{typ: Lexer.itemType.itemLParen, val:'('}, | |
{typ: Lexer.itemType.itemNum, val:'3'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemNum, val:'3'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemNum, val:'3'}, | |
{typ: Lexer.itemType.itemOp, val:'+'}, | |
{typ: Lexer.itemType.itemErr, val:'+)'}, | |
] | |
} | |
]; | |
test('lexing expressions', (t) => { | |
exprTable.forEach(v => { | |
let tokens: Lexer.item[] = []; | |
new Lexer.Lexer(v.expr, i => { | |
tokens.push(i); | |
}).run(); | |
t.deepEqual(tokens, v.tokens); | |
}) | |
t.end(); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment