Skip to content

Instantly share code, notes, and snippets.

@drslump
Last active August 29, 2015 14:02
Show Gist options
  • Save drslump/33a0f1b4960f8435b504 to your computer and use it in GitHub Desktop.
Save drslump/33a0f1b4960f8435b504 to your computer and use it in GitHub Desktop.
incremental parsing proof of concept for metascript
/*
Incremental parser wrapper for the compiler service.
Designed to be used for integration on text editors, to keep an up to date
representation of the source code in its textual and AST forms.
// create compiler for some source code
compiler = Meta.compilerFromString(code)
// wrap the compiler with the incremental parser
parser = new IncrementalParser(compiler)
// for each change in the editor
parser.insertAt(line, column, newText)
parser.removeAt(line, column, charCount)
// to update the AST representation
parser.sync()
// when the source is out of sync for some reason
parser.reset(code)
// go crazy querying the compiler :)
compiler.root.forEachRecursive(...)
Algorithm overview:
- All source code modifications must be reported in order
- Line/column position for each action must take into account the previous
- Synchronizing:
- Update internal source text representation and update AST loc info
- Find a scoped block that can enclose all the edit actions
- Map block to a portion of source code text
- Replace block with the result of parsing the portion source code
- Run the compiler pipeline on the new block
Ideas for optimizations:
- Skip checkArity if we don't care about errors?
- Skip expand/resolve nested scoped blocks. Flag them and do it
only when we work inside one of them (or memoize them)
- Try to merge actions as they are reported
- Detect special expressions (would this work always with macros?):
- strings: update value (except for quote chars)
- numbers: update value
- tags: update value (check if scope needs updating)
- array/object: parse only the literal (if not closing char)
*/
function IncrementalParser(compiler) {
var self = this;
self.compiler = compiler;
// Consume the source code and keep a copy
self.source = [];
compiler.reader.start(function (ln) {
self.source.push(ln);
});
// Expose our version of the source to the parser
compiler.reader = {
name: compiler.reader.name,
error: null,
start: function (lineHandler) {
self.source.forEach(lineHandler);
}
};
Object.defineProperty(compiler.reader, 'value', {
get: function () {
return self.source.join('\n')
}
});
// Initialize the AST
self.reset(self.source);
}
// Discards any queued editions and generate a new AST
IncrementalParser.prototype.reset = function (source) {
this.source = Array.isArray(source) ? source : source.split("\n");
this.actions = [];
// HACK: the compiler service should be refactored to make this simpler
this.compiler.root = null;
this.compiler.parser.initialize(this.compiler, this.compiler.parser.Expr);
this.compiler.ensureRoot();
};
IncrementalParser.prototype.insertAt = function (line, column, text) {
// TODO: merge previous action if possible
this.actions.push({
type: 'insert',
line: line,
column: column,
text: text
});
};
// NOTE: Line endings are always counted as 1 character
IncrementalParser.prototype.removeAt = function (line, column, count) {
// TODO: merge previous action if possible
this.actions.push({
type: 'remove',
line: line,
column: column,
count: count
});
};
IncrementalParser.prototype.sync = function () {
if (!this.actions.length) {
return;
}
var bounds = this.applyActions(this.actions, this.source, this.compiler.root),
min = bounds.start,
max = bounds.end;
// Bail out if the changes expand too many lines (should be faster)
if (max.line - min.line > this.source.length/2) {
console.log('Bail out: too many changes')
this.reset(this.source);
return;
}
// Calculate the minimum indentation for the edited region
var minIndent = this.getLineIndent(min.line);
for (var i = min.line + 1; i <= max.line; i++) {
this.source[i - 1].replace(/^(;|"""|''')|^(\s+)/, function (m0, m1, m2) {
minIndent = m1 ? minIndent : Math.min(minIndent, m2.length);
});
}
// Bail out if the enclosing is going to be the module
if (minIndent === 0) {
console.log('Bail out: minIndent == 0')
this.reset(this.source);
return;
}
// Find an expression that encloses all the changes
// TODO: Refactor findScopedAt to only get the expression so we can
// call it only once here and then check scoped and boundaries
// in the loop.
var expr, bounds;
while (expr !== this.compiler.root) {
expr = this.findScopedAt(min);
// Verify it fits the boundaries (ie: binary operators)
bounds = this.getExprBoundaries(expr);
if (min.line < bounds.start.line || max.line > bounds.end.line ||
min.line === bounds.start.line && min.column < bounds.start.column ||
max.line === bounds.end.line && max.column > bounds.end.column) {
min = bounds.start;
continue;
}
// Can expression accommodate our minimum indentation?
if (minIndent < this.getLineIndent(min.line)) {
min = bounds.start;
continue;
}
min = bounds.start;
max = bounds.end;
break;
}
// Bail out if the changes expand too many lines
if (max.line - min.line > this.source.length/2) {
// TODO: At this point in the update it doesn't seem to be faster!
// console.log('Bail out: enclosing too big')
// this.reset(this.source.join('\n'));
// return;
}
// Obtain the source code for the expression
var portion = this.source.slice(min.line - 1, max.line);
portion[0] = portion[0].substring(min.column);
portion[portion.length-1] = portion[portion.length-1].substring(0, max.column);
console.log(portion.join('\n'));
// TODO: What should we do with errors? shall we map them?
var partial = this.parsePartial(portion.join('\n'));
// Normalize location info on replacement
partial.forEachRecursive(function (expr) {
expr.loc.start.line += min.line - 1;
expr.loc.end.line += min.line - 1;
if (expr.loc.start.line === min.line) {
expr.loc.start.column += min.column;
}
if (expr.loc.end.line === min.line) {
expr.loc.end.column += min.column;
}
});
// Replace expression with the new version
expr.replaceWith(partial.at(0));
// HACK: override root so we only compile the new contents
var root = this.compiler.root;
this.compiler.root = partial.at(0);
this.compiler.pipeline();
this.compiler.root = root;
};
IncrementalParser.prototype.applyActions = function (actions, source, ast) {
var min = { line: null, column: null };
var max = { line: null, column: null };
function expandBounds(line, column) {
if (min.line === null || line <= min.line) {
min.line = action.line;
if (min.column === null || line < min.line || column < min.column) {
min.column = column;
}
}
if (max.line === null || line >= max.line) {
max.line = line;
if (max.column === null || line > max.line || column > max.column) {
max.column = column;
}
}
}
// Apply all changes to the source text
while (actions.length > 0) {
var action = actions.shift();
expandBounds(action.line, action.column);
var deltaLines = 0, deltaColumns = 0;
if (action.type === 'insert') {
var lines = action.text.split('\n');
var keepRemain = source[action.line - 1].substring(action.column);
source[action.line - 1] =
source[action.line - 1].substr(0, action.column) +
lines.shift();
if (lines.length) {
var args = [action.line - 1, 0].concat(lines);
source.splice.apply(source, args);
deltaLines += lines.length;
deltaColumns = 0;
}
source[ action.line - 1 + lines.length ] += keepRemain;
deltaColumns = source[ action.line - 1 + lines.length ].length - keepRemain.length;
if (!lines.length) {
deltaColumns -= action.column;
}
// Make sure the bounds reflect all the new inserted text
expandBounds(action.line + deltaLines, deltaLines ? deltaColumns : action.columns + deltaColumns);
} else if (action.type === 'remove') {
var ln = action.line - 1;
var col = action.column;
// TODO: Optimize by removing as much of the line as possible at once
while (action.count--) {
// Detect end of line as one character
if (source[ln].length <= col) {
deltaLines -= 1;
deltaColumns = col;
// Make sure we include the now folded line
expandBounds(action.line, action.column + source[ln + 1].length - 1);
// TODO: Handle last line
source[ln] += source[ln + 1];
source.splice(ln + 1, 1);
continue;
}
source[ln] = source[ln].substring(0, col) + source[ln].substring(col + 1);
deltaColumns -= 1;
}
} else {
throw new Error('Action "' + action.type + '" not supported');
}
// Adjust LOC after each action, it simplifies the algorithm
ast.forEachRecursive(function (expr) {
if (expr.loc.end.line < action.line) {
return;
} else if (expr.loc.end.line > action.line) {
expr.loc.end.line += deltaLines;
} else if (expr.loc.end.column <= action.column) {
expr.loc.end.line += deltaLines;
expr.loc.end.column += deltaColumns;
}
if (expr.loc.start.line < action.line) {
return;
} else if (expr.loc.start.line > action.line) {
expr.loc.start.line += deltaLines;
} else if (expr.loc.start.column >= action.column) {
expr.loc.start.line += deltaLines;
expr.loc.start.column += deltaColumns;
}
});
}
return {start: min, end: max};
};
// Private method to generate a parse AST from a portion of source code
IncrementalParser.prototype.parsePartial = function (code) {
var root = this.compiler.root;
this.compiler.root = null;
var result = null;
try {
this.compiler.parser.initialize(this.compiler, this.compiler.parser.Expr);
this.compiler.parse();
result = this.compiler.root;
} finally {
this.compiler.root = root;
}
return result;
};
IncrementalParser.prototype.getLineIndent = function (line) {
var indent = 0;
var code = this.source[line - 1];
while (indent < code.length && code.charAt(indent) === ' ')
indent++;
return indent;
};
IncrementalParser.prototype.getExprBoundaries = function (expr) {
var start = {line: expr.loc.start.line, column: expr.loc.start.column};
var end = {line: expr.loc.end.line, column: expr.loc.end.column};
// TODO: This would probably work too for most cases without being recursive
expr.forEachRecursive(function (e) {
var loc = e.loc;
// Skip macro's code literals from other modules
if (loc.source !== expr.loc.source) return;
// Skip macro's code literals from same module (if they appear above the snippet)
if (loc.start.line < expr.loc.start.line) return;
if (loc.start.line < start.line | loc.start.line === start.line && loc.start.column < start.column) {
start.line = loc.start.line;
start.column = loc.start.column;
}
if (loc.end.line > end.line || loc.end.line === end.line && loc.end.column > end.column) {
end.line = loc.end.line;
end.column = loc.end.column;
}
})
return {start: start, end: end};
};
IncrementalParser.prototype.findScopedAt = function (start) {
function walk(expr) {
if (expr.loc.start.line > start.line) return null;
var found = null;
for (var i=0; found === null && i<expr.count; i++) {
found = walk(expr.at(i));
}
if (!found) {
if (expr.loc.start.line < start.line || expr.loc.start.line === start.line && expr.loc.start.column < start.column) {
if (expr.loc.end.line > start.line || expr.loc.end.line === start.line && expr.loc.end.column > start.column) {
found = expr
}
}
}
return found;
}
// First find the expression...
var root = this.compiler.root;
var leaf = walk(root);
// ... now bubble up to find a scoped parent
while (leaf !== null) {
if (leaf.sym.symbolData.opensNewScope) break;
leaf = leaf.parent;
}
// default to root if not found
return leaf || root;
};
var Meta = require('./')()
var code = [
'while true',
' var o = () ->',
' console.log (param + 10)',
'',
'foo(10)'
].join('\n');
var ITERS = 10;
for (var i=0; i<ITERS; i++) {
var compiler = Meta.compilerFromString(code, '<incremental>');
console.time('init')
var parser = new IncrementalParser(compiler);
console.timeEnd('init')
console.time('update')
parser.insertAt(2, 6, 'fo');
parser.insertAt(2, 13, 'param');
parser.removeAt(3, 25, 2);
parser.insertAt(3, 25, '2');
parser.sync();
console.timeEnd('update')
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment