Last active
March 6, 2019 12:43
-
-
Save nixorn/f88fb194bd92b4fee3f351e689e99e0a to your computer and use it in GitHub Desktop.
Moo haskell ident sample(mostly stolen from https://gist.github.com/nathan/d8d1adea38a1ef3a6d6a06552da641aa)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<meta content="text/html; charset=utf-8" http-equiv="content-type"> | |
</head> | |
<body> | |
<script type="text/javascript" src="./moo.js"> </script> | |
<script type="text/javascript"> | |
const lexer = moo.compile({ | |
plus: "+", | |
equal: "=", | |
where: "where", | |
minus: "-", | |
power: "**", | |
lbrace: "(", | |
rbrace: ")", | |
integer: "[0-9]*?", | |
ws: /[ \t]+/, | |
nl: { match: /(?:\r\n?|\n)+/, lineBreaks: true }, | |
id: /\w+/, | |
}) | |
// example | |
const tokens = indented(lexer, 'testIndent i = t1 i\n' | |
+ ' where t1 i = 2 + t2 i\n' | |
+ ' where t2 i = (t3 i) ** 2\n' | |
+ ' where t3 i = i - 3\n' | |
+ 'testIndent 0 = 5\n' | |
) | |
let result = []; | |
for (const tok of tokens) result.push(tok) | |
console.log(result) | |
// implementation | |
function* indented(lexer, source) { | |
let iter = peekable(lexer.reset(source)) | |
let stack = [] | |
// absorb initial blank lines and indentation | |
let indent = iter.nextIndent() | |
for (let tok; tok = iter.next(); ) { | |
if (tok.type === 'nl') { | |
const newIndent = iter.nextIndent() | |
if (newIndent == null) break // eof | |
if (newIndent === indent) { | |
yield {type: 'nl'} | |
} else if (newIndent > indent) { | |
stack.push(indent) | |
indent = newIndent | |
yield {type: 'indent'} | |
} else { | |
while (newIndent < indent) { | |
indent = stack.pop() | |
yield {type: 'dedent'} | |
} | |
if (newIndent !== indent) { | |
throw new Error('inconsistent indentation') | |
} | |
} | |
indent = newIndent | |
// ignore whitespace within lines | |
} else if (tok.type !== 'ws') { | |
yield tok | |
} | |
} | |
// dedent remaining blocks at eof | |
for (let i = stack.length; i--;) { | |
yield {type: 'dedent'} | |
} | |
} | |
function peekable(lexer) { | |
let here = lexer.next() | |
return { | |
next() { | |
const old = here | |
here = lexer.next() | |
return old | |
}, | |
peek() { | |
return here | |
}, | |
nextIndent() { | |
for (let tok; tok = this.peek(); ) { | |
if (tok.type === 'nl') { | |
this.next() | |
continue | |
} | |
if (tok.type === 'ws') { | |
const indent = tok.value.length | |
this.next() | |
const next = this.peek() | |
if (!next) return | |
if (next.type === 'nl') { | |
this.next() | |
continue | |
} | |
return indent | |
} | |
return 0 | |
} | |
}, | |
} | |
} | |
</script> | |
</body> | |
</html> | |
<!-- | |
result: | |
[ | |
{ | |
"type": "id", | |
"value": "testIndent", | |
"text": "testIndent", | |
"offset": 0, | |
"lineBreaks": 0, | |
"line": 1, | |
"col": 1 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 11, | |
"lineBreaks": 0, | |
"line": 1, | |
"col": 12 | |
}, | |
{ | |
"type": "equal", | |
"value": "=", | |
"text": "=", | |
"offset": 13, | |
"lineBreaks": 0, | |
"line": 1, | |
"col": 14 | |
}, | |
{ | |
"type": "id", | |
"value": "t1", | |
"text": "t1", | |
"offset": 15, | |
"lineBreaks": 0, | |
"line": 1, | |
"col": 16 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 18, | |
"lineBreaks": 0, | |
"line": 1, | |
"col": 19 | |
}, | |
{ | |
"type": "indent" | |
}, | |
{ | |
"type": "where", | |
"value": "where", | |
"text": "where", | |
"offset": 22, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 3 | |
}, | |
{ | |
"type": "id", | |
"value": "t1", | |
"text": "t1", | |
"offset": 28, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 9 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 31, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 12 | |
}, | |
{ | |
"type": "equal", | |
"value": "=", | |
"text": "=", | |
"offset": 33, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 14 | |
}, | |
{ | |
"type": "id", | |
"value": "2", | |
"text": "2", | |
"offset": 35, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 16 | |
}, | |
{ | |
"type": "plus", | |
"value": "+", | |
"text": "+", | |
"offset": 37, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 18 | |
}, | |
{ | |
"type": "id", | |
"value": "t2", | |
"text": "t2", | |
"offset": 39, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 20 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 42, | |
"lineBreaks": 0, | |
"line": 2, | |
"col": 23 | |
}, | |
{ | |
"type": "indent" | |
}, | |
{ | |
"type": "where", | |
"value": "where", | |
"text": "where", | |
"offset": 48, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 5 | |
}, | |
{ | |
"type": "id", | |
"value": "t2", | |
"text": "t2", | |
"offset": 54, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 11 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 57, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 14 | |
}, | |
{ | |
"type": "equal", | |
"value": "=", | |
"text": "=", | |
"offset": 59, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 16 | |
}, | |
{ | |
"type": "lbrace", | |
"value": "(", | |
"text": "(", | |
"offset": 61, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 18 | |
}, | |
{ | |
"type": "id", | |
"value": "t3", | |
"text": "t3", | |
"offset": 62, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 19 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 65, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 22 | |
}, | |
{ | |
"type": "rbrace", | |
"value": ")", | |
"text": ")", | |
"offset": 66, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 23 | |
}, | |
{ | |
"type": "power", | |
"value": "**", | |
"text": "**", | |
"offset": 68, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 25 | |
}, | |
{ | |
"type": "id", | |
"value": "2", | |
"text": "2", | |
"offset": 71, | |
"lineBreaks": 0, | |
"line": 3, | |
"col": 28 | |
}, | |
{ | |
"type": "indent" | |
}, | |
{ | |
"type": "where", | |
"value": "where", | |
"text": "where", | |
"offset": 79, | |
"lineBreaks": 0, | |
"line": 4, | |
"col": 7 | |
}, | |
{ | |
"type": "id", | |
"value": "t3", | |
"text": "t3", | |
"offset": 85, | |
"lineBreaks": 0, | |
"line": 4, | |
"col": 13 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 88, | |
"lineBreaks": 0, | |
"line": 4, | |
"col": 16 | |
}, | |
{ | |
"type": "equal", | |
"value": "=", | |
"text": "=", | |
"offset": 90, | |
"lineBreaks": 0, | |
"line": 4, | |
"col": 18 | |
}, | |
{ | |
"type": "id", | |
"value": "i", | |
"text": "i", | |
"offset": 92, | |
"lineBreaks": 0, | |
"line": 4, | |
"col": 20 | |
}, | |
{ | |
"type": "minus", | |
"value": "-", | |
"text": "-", | |
"offset": 94, | |
"lineBreaks": 0, | |
"line": 4, | |
"col": 22 | |
}, | |
{ | |
"type": "id", | |
"value": "3", | |
"text": "3", | |
"offset": 96, | |
"lineBreaks": 0, | |
"line": 4, | |
"col": 24 | |
}, | |
{ | |
"type": "dedent" | |
}, | |
{ | |
"type": "dedent" | |
}, | |
{ | |
"type": "dedent" | |
}, | |
{ | |
"type": "id", | |
"value": "testIndent", | |
"text": "testIndent", | |
"offset": 98, | |
"lineBreaks": 0, | |
"line": 5, | |
"col": 1 | |
}, | |
{ | |
"type": "id", | |
"value": "0", | |
"text": "0", | |
"offset": 109, | |
"lineBreaks": 0, | |
"line": 5, | |
"col": 12 | |
}, | |
{ | |
"type": "equal", | |
"value": "=", | |
"text": "=", | |
"offset": 111, | |
"lineBreaks": 0, | |
"line": 5, | |
"col": 14 | |
}, | |
{ | |
"type": "id", | |
"value": "5", | |
"text": "5", | |
"offset": 113, | |
"lineBreaks": 0, | |
"line": 5, | |
"col": 16 | |
} | |
] | |
--> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment