Skip to content

Instantly share code, notes, and snippets.

@Heimdell
Last active November 11, 2016 19:54
Show Gist options
  • Save Heimdell/b5aaaea29503c6ef81345a7065ba1137 to your computer and use it in GitHub Desktop.
Save Heimdell/b5aaaea29503c6ef81345a7065ba1137 to your computer and use it in GitHub Desktop.
var StdLib = require('./stdlib.js')
/*
module for working with input streams (char iterators, in fact)
*/
var Stream = require('./stream.js')
/*
module for parsing an array of tokens from text (repesented as "Stream")
*/
var Tokenizer = require('./tokenizer.js')
var Parsing = require('./parsing.js')
///////////////////////////////////////////////////////////////////////////////
var T = Tokenizer
/*
lexeme analyser for our ML-- language
*/
var lexemes = T.many(
T.any(
// comments
T.delimited('/*/', '/*/'),
T.delimited('/*', '*/'),
T.delimited('//', '\n'),
// strings
T.string('"'),
T.string('`'),
// names (reserved, too)
T.while(T.isNameChar),
// spaces
T.while(T.isSpace),
// breakers like "(", ")", ...
T.singleChar
)
)
var r = StdLib.ofSet([
"let", ";", "in", "=", "\\", "(", ")", "->", "{", "}", ":",
"if", "then", "else", "|",
"module", "exports", "imports", "as", "implementation",
"data"
])
var P = Parsing
var S = StdLib
/*
parsing scheme for ML--
*/
var syntax = (
{ "module" : P.rule
( P.seq
( P.reserved("module", r)
, "name"
, "exports"
, "imports"
, "implementation"
)
, xs => S.tag("module",
{ name: xs[1]
, exports: xs[2]
, imports: xs[3]
, body: xs[4]
})
)
, "implementation" : P.rule
( P.seq
( P.reserved("implementation", r)
, P.indented("binding", "{", ";", "}", r)
)
, xs => xs[1]
)
, "exports" : P.rule
( P.seq
( P.reserved("exports", r)
, P.indented("name", "{", ";", "}", r)
)
, xs => xs[1]
)
, "imports" : P.rule
( P.seq
( P.reserved("imports", r)
, P.indented("import", "{", ";", "}", r)
)
, xs => xs[1]
)
, "import" : P.rule
( P.seq
( "name"
, P.reserved("as", r)
, P.notReserved(r)
)
, xs => S.tag("import", {import: xs[0], as: xs[2]})
)
/*
= application, like "f a b" (in JS it will look as "f(a, b)")
*/
, "expr" : P.rule
( P.some("atomic")
, xs => S.tag("app", {pile: xs})
)
/*
= terminal
*/
, "atomic" : P.alt
( "let-expr"
, "if-expr"
, "lambda"
, "number"
, "string"
, "spliced"
, "name"
, "quoted-expr"
)
, "if-expr" : P.rule
( P.seq
( P.reserved("if", r)
, P.indented("if-case", "{", "|", "}", r)
)
, xs => S.tag("if", {cases: xs[1]})
)
, "if-case" : P.rule
( P.seq
( "expr"
, P.reserved("->", r)
, "expr"
)
, xs => S.tag("ifThen", {if: xs[0], then: xs[2]})
)
/* let ; foo = bar ; f x = g x ... in qux */
, "let-expr" : P.rule
( P.seq
( P.reserved("let", r)
, P.indented("binding", "{", ";", "}", r)
, P.reserved("in", r)
, "expr"
)
, xs => S.tag("let", {bindings : xs[1], body : xs[3]})
)
, "binding" : P.alt("data-decl", "var-binding")
, "data-decl" : P.rule
( P.seq
( P.reserved("data", r)
, P.notReserved(r)
, P.indented("ctor", "{", ";", "}", r)
)
, xs => S.tag("data", {name: xs[1], ctors: xs[2]})
)
, "ctor" : P.rule
( P.seq
( P.notReserved(r)
, P.many(P.notReserved(r))
)
, xs => S.tag("ctor", {name: xs[0], fields: xs[1]})
)
/* ; f x = g x */
, "var-binding" : P.rule
( P.seq
( "name"
, P.many("name")
, P.reserved("=", r)
, "expr"
)
, xs => S.tag("binding", {name: xs[0], args: xs[1], body: xs[3]})
)
/* \a b -> a */
, "lambda" : P.rule
( P.seq
( P.reserved("\\", r)
, P.many("name")
, P.reserved("->", r)
, "expr"
)
, xs => S.tag("lambda", {args: xs[1], body: xs[3]})
)
, "number" : P.rule(P.number(), xs => S.tag("num", {num: xs}))
, "string" : P.rule(P.string(), xs => S.tag("str", {str: xs}))
/* (...) */
, "quoted-expr" : P.rule
( P.seq
( P.reserved("(", r)
, "expr"
, P.reserved(")", r)
)
, xs => xs[1]
)
, "spliced" : P.rule
( P.satisfy("splicer", x => x.startsWith('`'))
, xs => S.tag("raw", {raw : xs.slice(1, xs.length - 1)})
)
, "name" : P.rule
( P.seq
( P.notReserved(r)
, P.many(P.seq
( P.reserved(":", r)
, P.notReserved(r)
))
)
, xs =>
{
xs[1] = xs[1].map(x => x[1])
xs[1].unshift(xs[0])
return S.tag("name", {chain : xs[1]})
}
)
}
)
/*
js transpiler from ML--, simple and dumb
*/
var Compiler = (
{ nameInJS : (name) =>
{
var chars = Array.prototype.slice.apply(name)
chars = chars.map(Compiler.nameCharInJS)
return chars.join("")
}
, fineForName : StdLib.ofSet
( "qwertyuiopasdfghjklzxcvbnm"
+ "QWERTYUIOPASDFGHJKLZXCVBNM"
+ "1234567890_$"
)
, nameCharInJS : (char) =>
{
if (Compiler.fineForName(char))
return char
else
return `_${char.charCodeAt(0)}`
}
, qualifiedInJS : (name) =>
name.chain.map(Compiler.nameInJS).join(".")
, run : function compile(ast) {
return S.match(ast,
// f a b => f(a, b)
{ app : (it) =>
{
it.pile = it.pile.map(compile)
var f = it.pile.shift()
var xs = it.pile
return xs.length
? `${f}(${xs})`
: `${f}`
}
, module : (it) =>
{
var imports = it.imports.map(x =>
`var ${x.as} = require("./${Compiler.qualifiedInJS(x.import)}.js")`
).join(";\n")
var exports = it.exports.map(x =>
{
var name = Compiler.qualifiedInJS(x)
return `module.exports.${name} = ${name}`
}
).join(";\n")
var body = it.body.map(compile).join(";") + ";"
return '"use strict";' + imports + ";\n" + body + ";\n" + exports
}
, if : (it) =>
{
var cases = it.cases.map(compile)
return cases.join(" ") + " undefined"
}
, ifThen : (it) =>
{
var if_ = compile(it.if)
var then_ = compile(it.then)
return `${if_}? ${then_} :`
}
, name : (it) =>
{
return it.chain.map(Compiler.nameInJS).join(".")
}
/*
the problem: "let" is an expression in ML-- and a statement in JS
solution: "(function() { whatever }) ()" in JS converts
statement "whatever" into an expression
*/
, let : (it) =>
{
var bindings = it.bindings.map(compile)
var body = compile(it.body)
return `(function() { ${bindings.join("; ")}; return ${body} })()`
}
/*
if we have arguments, produce a function, otherwise - var
functions with no args? Hmm. Either you put _ as a param and feed garbage
or I can modify tokenizer/parser to accept () as single token
*/
, binding : (it) =>
{
var name = compile(it.name)
var args = it.args.map(compile)
var body = compile(it.body)
return args.length
? `function ${name}(${args}) { return ${body} }`
: `var ${name} = ${body}`
}
, lambda : (it) =>
{
var args = it.args.map(compile)
var body = compile(it.body)
return `function (${args}) { return ${body} }`
}
, num : (it) => `${it.num}`
, str : (it) => `${it.str}`
, data : (it) => it.ctors.map(compile).join("; ")
, ctor : (it) =>
{
var fields = it.fields.map(Compiler.nameInJS)
var name = Compiler.nameInJS(it.name)
var predName = Compiler.nameInJS(`${it.name}?`)
var assignment = it.fields.map(x => `${x}: ${x}`).join(", ")
var ctor = it.fields.length
? `function ${name}(${fields}) { return {tag: "${name}", ${assignment}}}`
: `var ${name} = {tag: "${name}"}`
var pred = `function ${predName}(o) { return o.tag === ${name} }`
var fields = fields.map(x => `function ${x} (o) { return o.${x}}`)
return `${ctor}; ${pred}; ${fields.join("; ")}`
}
, raw : (it) => it.raw
, default : (it) => `/* ${JSON.stringify(it)} */`
})
}
})
var s = Stream.ofFile('test.ml--')
// does anyone here needs those spaces, anyway?
var tokens = lexemes(s).tokens.filter(x => true
&& x.patch.trim() != ""
&& !x.patch.startsWith("/*")
&& !x.patch.startsWith("//")
)
// console.log(tokens.map(t => t.patch));
var parse = Parsing.parse("module", tokens, syntax)
if (parse.result) {
// okay, lets assume compiling newer fails (for now)
var ast = parse.result
console.log("AST");
console.log("===");
console.log(require('util').inspect(ast, { depth: null }))
var js = Compiler.run(ast)
console.log("JS");
console.log("===");
console.log(js);
// wow. I have and idea! We can hot-code-reload modules via eval!
var result = eval(js)
console.log();
console.log("EVAL");
console.log("====");
console.log(result);
} else {
var place = (
{ line: tokens[parse.pos].line
, col: tokens[parse.pos].col
, patch: tokens[parse.pos].patch
}
)
console.log(parse, place);
}
var StdLib = require('./stdlib.js')
var Parsing = (
/*
runs given "p"-arser "s"-tream with "syntax" from "pos"-ition
later:
"front token" == s[pos]
*/
{ parse : (p, s, syntax, pos) =>
{
pos = pos || 0
while (syntax[p])
p = syntax[p]
if (p instanceof Function)
return p(s, syntax, pos)
return {error : {unknown_rule : p}, pos}
}
/*
"pure(x)" will always succeed and parse "x" out of anything
*/
, pure : (x) => (s, _syntax, pos) => ({result : x, pos})
/*
will succeed if front token succeeds "cond"-ition, producing the fron token
and advancing the position one step forward
else will fail with msg
*/
, satisfy : (msg, cond) => (s, _syntax, pos) =>
{
if (pos >= s.length)
return {error : msg, pos}
if (cond(s[pos].patch))
return {result : s[pos].patch, pos : pos + 1}
else
return {error : msg, pos}
}
, string : () => Parsing.satisfy("string", x => x.startsWith('"'))
, token : (tok) => Parsing.satisfy(tok, x => x == tok)
, number : () => Parsing.satisfy("number", x => Parsing.isNumber(x[0]))
, isNumber : StdLib.ofSet("1234567890")
/*
will succeed if all "parsers" succeed in sequence, producing array of
their results in order
*/
, seq : (...parsers) => (s, syntax, pos) =>
{
var acc = []
for (var i = 0; i < parsers.length; i++) {
var res = Parsing.parse(parsers[i], s, syntax, pos)
if (!res.result)
return res
acc.push(res.result)
pos = res.pos
}
return {result : acc, pos}
}
/*
will succeed if any of the "parsers" succeeds
*/
, alt : (...parsers) => (s, syntax, pos) =>
{
for (var i = 0; i < parsers.length; i++) {
console.log(i, parsers[i]);
var res = Parsing.parse(parsers[i], s, syntax, pos)
if (res.result || res.pos != pos)
return res
}
return {error: "no alt", pos}
}
/*
always succceds
will continiously apply given "parser" until it fails
correlates to '*' (Kleene star) operation from regular grammatics
*/
, many : (parser) => (s, syntax, pos) =>
{
var acc = []
while (true) {
var res = Parsing.parse(parser, s, syntax, pos)
if (!res.result)
return {result : acc, pos}
acc.push(res.result)
pos = res.pos
}
}
/*
will continiously apply given "parser" until it fails
fails, if first application of "parser" fails
correlates to '+' (Kleene plus) operation from regular grammatics
*/
, some : (parser) => Parsing.rule
( Parsing.seq
( parser
, Parsing.many(parser)
)
, xs =>
{
xs[1].unshift(xs[0])
return xs[1]
}
)
/*
if "parser" fails, produces some "def" value instead
*/
, orDefault : (def, parser) => Parsing.alt(parser, Parsing.pure(def))
/*
if "parser" succceds, applies a "transform"-ation to its result
*/
, rule : (parser, transform) => (s, syntax, pos) =>
{
var res = Parsing.parse(parser, s, syntax, pos)
if (res.result)
res.result = transform(res.result)
return res
}
/*
produces front token, if its not succceds the "reservedSet" predicate
*/
, notReserved : (reservedSet) => Parsing.satisfy("name", x => !reservedSet(x))
/*
produces "tok", if front token is the same
and it succceds the "reservedSet" predicate
*/
, reserved : (tok, reservedSet) =>
{
if (!reservedSet(tok))
throw `${tok} is not reserved!`
return Parsing.satisfy(tok, x => x == tok)
}
, indented : (parser, in_, sep, out, r) => Parsing.rule
( Parsing.seq
( Parsing.reserved(in_, r)
, Parsing.many
( Parsing.rule
( Parsing.seq(Parsing.reserved(sep, r), parser)
, xs => xs[1]
)
)
, Parsing.reserved(out, r)
)
, xs => xs[1]
)
}
)
module.exports = Parsing
module.exports.tag = function (t, o) {
return Object.assign({}, {tag: t}, o)
}
module.exports.match = function (o, scheme) {
if (!scheme[o.tag])
if (!scheme["default"]) {
console.log(o);
throw "no <" + o.tag + "> for scheme " + Object.keys(scheme)
}
return scheme[o.tag || "default"](o)
}
module.exports.box = function () {
var res = {}
for (var i = 0; i < arguments.length; i += 2)
res[arguments[i]] = arguments[i + 1]
return res
}
var StdLib = (
/*
apply function "f" "count" times to "x" and put all sequent states
in array
iterate(0, f, x) = [x]
iterate(1, f, x) = [x, f(x)]
iterate(2, f, x) = [x, f(x), f(f(x))]
...
*/
{ iterate : (count, f, x) =>
{
var acc = [x]
for (var i = 0; i < count; i++) {
acc.push(x = f(x))
}
return acc
}
/*
apply function "f" "n" times to "x"
times(0, f, x) = x
times(1, f, x) = f(x)
times(2, f, x) = f(f(x))
times(n, f, x) = last(iterate(n, f, x))
...
*/
, times : (n, f, x) =>
StdLib.while(x, _ => n--, f)
/*
see impl.
*/
, "while" : (thing, cond, f) =>
{
while (cond(thing))
thing = f(thing)
return thing
}
/*
transform array-like object into predicate with check its argument to
be present in source object
ofSet([1,2]) (1) == true
ofSet([1,2]) (3) == false
*/
, ofSet : (xs) =>
{
var o = {}
Array.prototype.slice.apply(xs).forEach(c => { o[c] = true })
return x => o[x]
}
/*
add a tag field with a given value
*/
, tag : (t, o) =>
Object.assign(o, {tag: t})
/*
given the object "o" and the "scheme", pass "o" to the method of "scheme"
with same name as (o.tag || "default")
*/
, match : (o, scheme) =>
{
if (!scheme[o.tag])
if (!scheme["default"]) {
throw `no ${o.tag} for scheme ${Object.keys(scheme)}`
}
return (scheme[o.tag] || scheme["default"])(o)
}
}
)
module.exports = StdLib
var StdLib = require('./stdlib.js')
var Stream = (
/*
create a source-stream from given string
*/
{ ofText : (text) =>
({ text, line: 1, col: 1, offset: 0 })
/*
read file and create a source-stream from its content
*/
, ofFile : (filename) =>
Stream.ofText(
require('fs').readFileSync(filename).toString()
)
/*
dereference the iterator, returning "current char"
*/
, currentChar : (stream) =>
stream.text[stream.offset]
/*
advance the iterator one char forward
*/
, step : (stream) =>
{
var char = Stream.currentChar(stream)
var line = char == '\n'? stream.line + 1 : stream.line
var col = char == '\n'? 1 : stream.col + 1
var offset = stream.offset + 1
return Object.assign({}, stream, {line, col, offset})
}
/*
cut part of text between two iterators and return it along with
the second (farthest) one
*/
, between : (from, to) =>
{
if (from.offset != to.offset) {
var patch = from.text.slice(from.offset, to.offset)
var token = Object.assign({patch}, from)
return { token, stream : to }
}
}
/*
check if iterator reached end of the text
*/
, eof : (stream) =>
Stream.currentChar(stream) === undefined
/*
cut a contiguous part of text with each char in it succeeding
some "cond"-ition, starting on "from" iterator position
*/
, "while" : (from, cond) =>
{
var toCurrentChar = cond => s =>
cond(Stream.currentChar(s)) && !Stream.eof(s)
var to = StdLib.while(from, toCurrentChar(cond), Stream.step)
return Stream.between(from, to)
}
/*
check if text from iterator position contains given "patch"
*/
, startsWith : (stream, patch) =>
stream.text.substr(stream.offset, patch.length) == patch
}
)
module.exports = Stream
module foo:bar
exports
{
}
imports
{
; runtime as r
}
implementation
{
; data List
{
; Cons head tail
; Nil
}
; List-map f list = r:match list (r:box
"Cons" (\it -> Cons (f it:head) (List-map f it:tail))
"Nil" (\_ -> Nil)
)
; List-fold f z list = r:match list (r:box
"Cons" (\it -> f it:head (List-fold f z it:tail))
"Nil" (\_ -> z)
)
; List-show show-elem list =
List-fold
(\x y -> add x (add " :: " y))
"[]"
(List-map show-elem list)
; add = `(function (x, y) { return x + y})`
; main = console:log (List-show (\x -> x) (Cons 1 (Cons 2 (Cons 3 Nil))))
}
var StdLib = require('./stdlib.js')
var Stream = require('./stream.js')
var Tokenizer = (
{ isStandaloneChar : StdLib.ofSet("({[\\;,:]})")
, isSpace : StdLib.ofSet(" \n")
, isNameChar : x =>
!(Tokenizer.isSpace(x) ||
Tokenizer.isStandaloneChar(x))
/*
matches `${start} .* ${end}`, so delimited("(*", "*)")
will match "(* this is an ocaml-style comment *) BUT NOT THIS", producing
"(* this is an ocaml-style comment *)"
*/
, delimited : (start, end) => (from) =>
{
if (Stream.startsWith(from, start))
{
var s = StdLib.times(start.length, Stream.step, from)
var to = StdLib.while(s, s => !Stream.startsWith(s, end), Stream.step)
var to = StdLib.times(end.length, Stream.step, to)
return Stream.between(from, to)
}
}
/*
matches `${tagChar} ([-${tagChar}] | \.)* ${tagChar}`,
so string("'", "'")
will match "'this \'is\' a "string"' BUT NOT THIS", producing
"'this \'is\' a "string"'"
*/
, string : (tagChar) => (from) =>
{
if (Stream.startsWith(from, tagChar))
{
var s = Stream.step(from)
var consume = c => Stream.currentChar(c) == '\\'?
Stream.step(Stream.step(c)) :
Stream.step(c)
var to = StdLib.while(s, s => !Stream.startsWith(s, tagChar), consume)
var to = Stream.step(to)
return Stream.between(from, to)
}
}
/*
see Stream.while
*/
, while : (cond) => (from) => Stream.while(from, cond)
/*
matches, if any of "tokenizers" matches
*/
, any : (...tokenizers) => (s) =>
{
for (var i = 0; i < tokenizers.length; i++) {
var current = tokenizers[i]
var result = current(s)
if (result)
return result
}
}
/*
continiously cuts patches from the front of the text with
given tokenizer
*/
, many : (tokenizer) => (from) =>
{
var to = from
var acc = []
var next
while (next = tokenizer(to)) {
acc.push(next.token)
to = next.stream
}
return { tokens : acc, stream : to}
}
/*
cut 1 char as a token
*/
, singleChar : (from) =>
!Stream.eof(from) &&
Stream.between(from, Stream.step(from))
}
)
module.exports = Tokenizer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment