Last active
May 9, 2019 23:54
-
-
Save maxbrunsfeld/f56db205f993dc7bc71e8cad2fd5e120 to your computer and use it in GitHub Desktop.
Converting raw Tree-sitter syntax tree to a strongly typed structure (TypeScript example)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root { | |
"name": { | |
"text": "foo" | |
}, | |
"parameters": { | |
"type": "parameters" | |
}, | |
"body": { | |
"statements": [ | |
{ | |
"condition": { | |
"text": "a" | |
}, | |
"consequence": { | |
"statements": [ | |
{ | |
"type": "print_statement", | |
"argument": [ | |
{ | |
"text": "b" | |
} | |
] | |
}, | |
{ | |
"type": "print_statement", | |
"argument": [ | |
{ | |
"text": "c" | |
} | |
] | |
} | |
] | |
}, | |
"alternatives": [ | |
{ | |
"condition": [ | |
{ | |
"text": "d" | |
} | |
], | |
"consequence": [ | |
{ | |
"statements": [ | |
{ | |
"type": "print_statement", | |
"argument": [ | |
{ | |
"text": "e" | |
} | |
] | |
} | |
] | |
} | |
] | |
}, | |
{ | |
"condition": [ | |
{ | |
"text": "g" | |
} | |
], | |
"consequence": [ | |
{ | |
"statements": [ | |
{ | |
"type": "print_statement", | |
"argument": [ | |
{ | |
"text": "h" | |
} | |
] | |
} | |
] | |
} | |
] | |
}, | |
{ | |
"condition": [ | |
{ | |
"text": "g" | |
} | |
], | |
"consequence": [ | |
{ | |
"statements": [ | |
{ | |
"type": "print_statement", | |
"argument": [ | |
{ | |
"text": "h" | |
} | |
] | |
} | |
] | |
} | |
] | |
} | |
] | |
} | |
] | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Hand-written code (not auto-generated) | |
function parseToStaticNode(sourceCode: String, language) { | |
// Get the raw syntax tree. | |
const parser = new Parser(); | |
parser.setLanguage(Python); | |
const tree = parser.parse(sourceCode); | |
const cursor = tree.walk(); | |
// Walk the syntax tree depth first, keeping a stack of field maps | |
const fieldMapStack = []; | |
let visitedChildren = false; | |
while (true) { | |
// Each node is visited twice: | |
// once on the way down, and once on the way up. | |
const node = cursor.currentNode; | |
// On the way up, all of the node's fields will have been collected | |
// into the field map. | |
if (visitedChildren) { | |
// Convert the field map into a strongly-typed node using the | |
// generated conversion function. | |
const typedNode = convertFieldMapToTypedNode(node, fieldMapStack.pop()); | |
// Insert the strongly-typed node into it's parent field map. | |
const parentFieldMap = fieldMapStack[fieldMapStack.length - 1]; | |
if (!parentFieldMap) return typedNode; | |
const fieldName = cursor.currentFieldName; | |
if (fieldName) { | |
if (!parentFieldMap[fieldName]) parentFieldMap[fieldName] = []; | |
parentFieldMap[fieldName].push(typedNode); | |
} | |
// If there is a next sibling, move forward in the tree. | |
if (cursor.gotoNextSibling()) { | |
fieldMapStack.push({type: cursor.currentNode.type}); | |
visitedChildren = false; | |
} | |
// If there is a parent, move up in the tree, otherwise terminate. | |
else if (!cursor.gotoParent()) { | |
return typedNode | |
} | |
} | |
// On the way down, push a new field map onto the stack. | |
else if (cursor.gotoFirstChild()) { | |
fieldMapStack.push({type: cursor.currentNode.type}); | |
} else { | |
visitedChildren = true; | |
} | |
} | |
} | |
// Auto-generated data types | |
class ParametersNode { | |
constructor () { | |
// TODO - add fields for parameters | |
} | |
} | |
class IdentifierNode { | |
text: String | |
constructor (text) { | |
this.text = text | |
} | |
} | |
class PrintStatementNode { | |
arguments: [IdentifierNode] | |
constructor (args) { | |
this.arguments = args | |
} | |
} | |
class IfStatementNode { | |
condition: IdentifierNode | |
consequence: BlockNode | |
alternatives: [ElifClause] | |
constructor (condition, consequence, alternatives) { | |
this.condition = condition | |
this.consequence = consequence | |
this.alternatives = alternatives | |
} | |
} | |
class FunctionDefinition { | |
name: IdentifierNode | |
parameters: ParametersNode | |
body: BlockNode | |
constructor (name, parameters, body) { | |
this.name = name | |
this.parameters = parameters | |
this.body = body | |
} | |
} | |
class BlockNode { | |
statements: [IfStatementNode | PrintStatementNode] | |
constructor (statements) { | |
this.statements = statements; | |
} | |
} | |
class ElifClause { | |
condition: IdentifierNode | |
consequence: BlockNode | |
constructor (condition, consequence) { | |
this.condition = condition; | |
this.consequence = consequence; | |
} | |
} | |
// Auto-generated conversion function | |
function convertFieldMapToTypedNode(node, fieldMap) { | |
switch (fieldMap.type) { | |
case 'if_statement': | |
return new IfStatementNode( | |
fieldMap.condition[0], | |
fieldMap.consequence[0], | |
fieldMap.alternative | |
); | |
case 'function_definition': | |
return new FunctionDefinition( | |
fieldMap.name[0], | |
fieldMap.parameters[0], | |
fieldMap.body[0] | |
); | |
case 'block': | |
return new BlockNode( | |
fieldMap.statement, | |
); | |
case 'elif_clause': | |
return new ElifClause( | |
fieldMap.condition, | |
fieldMap.consequence, | |
); | |
case 'identifier': | |
return new IdentifierNode(node.text); | |
default: | |
return fieldMap; | |
} | |
} | |
// Test program | |
const fs = require('fs'); | |
const Parser = require('.'); | |
const Python = require('../tree-sitter-python'); | |
const filePath = process.argv[2]; | |
const sourceCode = fs.readFileSync(filePath, 'utf8'); | |
const root = parseToStaticNode(sourceCode, Python); | |
console.log('root', JSON.stringify(root, null, 2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def foo(bar, baz): | |
if a: | |
print b | |
print c | |
elif d: | |
print e | |
elif g: | |
print h | |
elif g: | |
print h |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment