Last active
August 29, 2015 14:02
-
-
Save drslump/33a0f1b4960f8435b504 to your computer and use it in GitHub Desktop.
incremental parsing proof of concept for metascript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Incremental parser wrapper for the compiler service. | |
Designed to be used for integration on text editors, to keep an up to date | |
representation of the source code in its textual and AST forms. | |
// create compiler for some source code | |
compiler = Meta.compilerFromString(code) | |
// wrap the compiler with the incremental parser | |
parser = new IncrementalParser(compiler) | |
// for each change in the editor | |
parser.insertAt(line, column, newText) | |
parser.removeAt(line, column, charCount) | |
// to update the AST representation | |
parser.sync() | |
// when the source is out of sync for some reason | |
parser.reset(code) | |
// go crazy querying the compiler :) | |
compiler.root.forEachRecursive(...) | |
Algorithm overview: | |
- All source code modifications must be reported in order | |
- Line/column position for each action must take into account the previous | |
- Synchronizing: | |
- Update internal source text representation and update AST loc info | |
- Find a scoped block that can enclose all the edit actions | |
- Map block to a portion of source code text | |
- Replace block with the result of parsing the portion source code | |
- Run the compiler pipeline on the new block | |
Ideas for optimizations: | |
- Skip checkArity if we don't care about errors? | |
- Skip expand/resolve nested scoped blocks. Flag them and do it | |
only when we work inside one of them (or memoize them) | |
- Try to merge actions as they are reported | |
- Detect special expressions (would this work always with macros?): | |
- strings: update value (except for quote chars) | |
- numbers: update value | |
- tags: update value (check if scope needs updating) | |
- array/object: parse only the literal (if not closing char) | |
*/ | |
function IncrementalParser(compiler) { | |
var self = this; | |
self.compiler = compiler; | |
// Consume the source code and keep a copy | |
self.source = []; | |
compiler.reader.start(function (ln) { | |
self.source.push(ln); | |
}); | |
// Expose our version of the source to the parser | |
compiler.reader = { | |
name: compiler.reader.name, | |
error: null, | |
start: function (lineHandler) { | |
self.source.forEach(lineHandler); | |
} | |
}; | |
Object.defineProperty(compiler.reader, 'value', { | |
get: function () { | |
return self.source.join('\n') | |
} | |
}); | |
// Initialize the AST | |
self.reset(self.source); | |
} | |
// Discards any queued editions and generate a new AST | |
IncrementalParser.prototype.reset = function (source) { | |
this.source = Array.isArray(source) ? source : source.split("\n"); | |
this.actions = []; | |
// HACK: the compiler service should be refactored to make this simpler | |
this.compiler.root = null; | |
this.compiler.parser.initialize(this.compiler, this.compiler.parser.Expr); | |
this.compiler.ensureRoot(); | |
}; | |
IncrementalParser.prototype.insertAt = function (line, column, text) { | |
// TODO: merge previous action if possible | |
this.actions.push({ | |
type: 'insert', | |
line: line, | |
column: column, | |
text: text | |
}); | |
}; | |
// NOTE: Line endings are always counted as 1 character | |
IncrementalParser.prototype.removeAt = function (line, column, count) { | |
// TODO: merge previous action if possible | |
this.actions.push({ | |
type: 'remove', | |
line: line, | |
column: column, | |
count: count | |
}); | |
}; | |
IncrementalParser.prototype.sync = function () { | |
if (!this.actions.length) { | |
return; | |
} | |
var bounds = this.applyActions(this.actions, this.source, this.compiler.root), | |
min = bounds.start, | |
max = bounds.end; | |
// Bail out if the changes expand too many lines (should be faster) | |
if (max.line - min.line > this.source.length/2) { | |
console.log('Bail out: too many changes') | |
this.reset(this.source); | |
return; | |
} | |
// Calculate the minimum indentation for the edited region | |
var minIndent = this.getLineIndent(min.line); | |
for (var i = min.line + 1; i <= max.line; i++) { | |
this.source[i - 1].replace(/^(;|"""|''')|^(\s+)/, function (m0, m1, m2) { | |
minIndent = m1 ? minIndent : Math.min(minIndent, m2.length); | |
}); | |
} | |
// Bail out if the enclosing is going to be the module | |
if (minIndent === 0) { | |
console.log('Bail out: minIndent == 0') | |
this.reset(this.source); | |
return; | |
} | |
// Find an expression that encloses all the changes | |
// TODO: Refactor findScopedAt to only get the expression so we can | |
// call it only once here and then check scoped and boundaries | |
// in the loop. | |
var expr, bounds; | |
while (expr !== this.compiler.root) { | |
expr = this.findScopedAt(min); | |
// Verify it fits the boundaries (ie: binary operators) | |
bounds = this.getExprBoundaries(expr); | |
if (min.line < bounds.start.line || max.line > bounds.end.line || | |
min.line === bounds.start.line && min.column < bounds.start.column || | |
max.line === bounds.end.line && max.column > bounds.end.column) { | |
min = bounds.start; | |
continue; | |
} | |
// Can expression accommodate our minimum indentation? | |
if (minIndent < this.getLineIndent(min.line)) { | |
min = bounds.start; | |
continue; | |
} | |
min = bounds.start; | |
max = bounds.end; | |
break; | |
} | |
// Bail out if the changes expand too many lines | |
if (max.line - min.line > this.source.length/2) { | |
// TODO: At this point in the update it doesn't seem to be faster! | |
// console.log('Bail out: enclosing too big') | |
// this.reset(this.source.join('\n')); | |
// return; | |
} | |
// Obtain the source code for the expression | |
var portion = this.source.slice(min.line - 1, max.line); | |
portion[0] = portion[0].substring(min.column); | |
portion[portion.length-1] = portion[portion.length-1].substring(0, max.column); | |
console.log(portion.join('\n')); | |
// TODO: What should we do with errors? shall we map them? | |
var partial = this.parsePartial(portion.join('\n')); | |
// Normalize location info on replacement | |
partial.forEachRecursive(function (expr) { | |
expr.loc.start.line += min.line - 1; | |
expr.loc.end.line += min.line - 1; | |
if (expr.loc.start.line === min.line) { | |
expr.loc.start.column += min.column; | |
} | |
if (expr.loc.end.line === min.line) { | |
expr.loc.end.column += min.column; | |
} | |
}); | |
// Replace expression with the new version | |
expr.replaceWith(partial.at(0)); | |
// HACK: override root so we only compile the new contents | |
var root = this.compiler.root; | |
this.compiler.root = partial.at(0); | |
this.compiler.pipeline(); | |
this.compiler.root = root; | |
}; | |
IncrementalParser.prototype.applyActions = function (actions, source, ast) { | |
var min = { line: null, column: null }; | |
var max = { line: null, column: null }; | |
function expandBounds(line, column) { | |
if (min.line === null || line <= min.line) { | |
min.line = action.line; | |
if (min.column === null || line < min.line || column < min.column) { | |
min.column = column; | |
} | |
} | |
if (max.line === null || line >= max.line) { | |
max.line = line; | |
if (max.column === null || line > max.line || column > max.column) { | |
max.column = column; | |
} | |
} | |
} | |
// Apply all changes to the source text | |
while (actions.length > 0) { | |
var action = actions.shift(); | |
expandBounds(action.line, action.column); | |
var deltaLines = 0, deltaColumns = 0; | |
if (action.type === 'insert') { | |
var lines = action.text.split('\n'); | |
var keepRemain = source[action.line - 1].substring(action.column); | |
source[action.line - 1] = | |
source[action.line - 1].substr(0, action.column) + | |
lines.shift(); | |
if (lines.length) { | |
var args = [action.line - 1, 0].concat(lines); | |
source.splice.apply(source, args); | |
deltaLines += lines.length; | |
deltaColumns = 0; | |
} | |
source[ action.line - 1 + lines.length ] += keepRemain; | |
deltaColumns = source[ action.line - 1 + lines.length ].length - keepRemain.length; | |
if (!lines.length) { | |
deltaColumns -= action.column; | |
} | |
// Make sure the bounds reflect all the new inserted text | |
expandBounds(action.line + deltaLines, deltaLines ? deltaColumns : action.columns + deltaColumns); | |
} else if (action.type === 'remove') { | |
var ln = action.line - 1; | |
var col = action.column; | |
// TODO: Optimize by removing as much of the line as possible at once | |
while (action.count--) { | |
// Detect end of line as one character | |
if (source[ln].length <= col) { | |
deltaLines -= 1; | |
deltaColumns = col; | |
// Make sure we include the now folded line | |
expandBounds(action.line, action.column + source[ln + 1].length - 1); | |
// TODO: Handle last line | |
source[ln] += source[ln + 1]; | |
source.splice(ln + 1, 1); | |
continue; | |
} | |
source[ln] = source[ln].substring(0, col) + source[ln].substring(col + 1); | |
deltaColumns -= 1; | |
} | |
} else { | |
throw new Error('Action "' + action.type + '" not supported'); | |
} | |
// Adjust LOC after each action, it simplifies the algorithm | |
ast.forEachRecursive(function (expr) { | |
if (expr.loc.end.line < action.line) { | |
return; | |
} else if (expr.loc.end.line > action.line) { | |
expr.loc.end.line += deltaLines; | |
} else if (expr.loc.end.column <= action.column) { | |
expr.loc.end.line += deltaLines; | |
expr.loc.end.column += deltaColumns; | |
} | |
if (expr.loc.start.line < action.line) { | |
return; | |
} else if (expr.loc.start.line > action.line) { | |
expr.loc.start.line += deltaLines; | |
} else if (expr.loc.start.column >= action.column) { | |
expr.loc.start.line += deltaLines; | |
expr.loc.start.column += deltaColumns; | |
} | |
}); | |
} | |
return {start: min, end: max}; | |
}; | |
// Private method to generate a parse AST from a portion of source code | |
IncrementalParser.prototype.parsePartial = function (code) { | |
var root = this.compiler.root; | |
this.compiler.root = null; | |
var result = null; | |
try { | |
this.compiler.parser.initialize(this.compiler, this.compiler.parser.Expr); | |
this.compiler.parse(); | |
result = this.compiler.root; | |
} finally { | |
this.compiler.root = root; | |
} | |
return result; | |
}; | |
IncrementalParser.prototype.getLineIndent = function (line) { | |
var indent = 0; | |
var code = this.source[line - 1]; | |
while (indent < code.length && code.charAt(indent) === ' ') | |
indent++; | |
return indent; | |
}; | |
IncrementalParser.prototype.getExprBoundaries = function (expr) { | |
var start = {line: expr.loc.start.line, column: expr.loc.start.column}; | |
var end = {line: expr.loc.end.line, column: expr.loc.end.column}; | |
// TODO: This would probably work too for most cases without being recursive | |
expr.forEachRecursive(function (e) { | |
var loc = e.loc; | |
// Skip macro's code literals from other modules | |
if (loc.source !== expr.loc.source) return; | |
// Skip macro's code literals from same module (if they appear above the snippet) | |
if (loc.start.line < expr.loc.start.line) return; | |
if (loc.start.line < start.line | loc.start.line === start.line && loc.start.column < start.column) { | |
start.line = loc.start.line; | |
start.column = loc.start.column; | |
} | |
if (loc.end.line > end.line || loc.end.line === end.line && loc.end.column > end.column) { | |
end.line = loc.end.line; | |
end.column = loc.end.column; | |
} | |
}) | |
return {start: start, end: end}; | |
}; | |
IncrementalParser.prototype.findScopedAt = function (start) { | |
function walk(expr) { | |
if (expr.loc.start.line > start.line) return null; | |
var found = null; | |
for (var i=0; found === null && i<expr.count; i++) { | |
found = walk(expr.at(i)); | |
} | |
if (!found) { | |
if (expr.loc.start.line < start.line || expr.loc.start.line === start.line && expr.loc.start.column < start.column) { | |
if (expr.loc.end.line > start.line || expr.loc.end.line === start.line && expr.loc.end.column > start.column) { | |
found = expr | |
} | |
} | |
} | |
return found; | |
} | |
// First find the expression... | |
var root = this.compiler.root; | |
var leaf = walk(root); | |
// ... now bubble up to find a scoped parent | |
while (leaf !== null) { | |
if (leaf.sym.symbolData.opensNewScope) break; | |
leaf = leaf.parent; | |
} | |
// default to root if not found | |
return leaf || root; | |
}; | |
var Meta = require('./')() | |
var code = [ | |
'while true', | |
' var o = () ->', | |
' console.log (param + 10)', | |
'', | |
'foo(10)' | |
].join('\n'); | |
var ITERS = 10; | |
for (var i=0; i<ITERS; i++) { | |
var compiler = Meta.compilerFromString(code, '<incremental>'); | |
console.time('init') | |
var parser = new IncrementalParser(compiler); | |
console.timeEnd('init') | |
console.time('update') | |
parser.insertAt(2, 6, 'fo'); | |
parser.insertAt(2, 13, 'param'); | |
parser.removeAt(3, 25, 2); | |
parser.insertAt(3, 25, '2'); | |
parser.sync(); | |
console.timeEnd('update') | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment