Skip to content

Instantly share code, notes, and snippets.

@whosaysni
Last active September 9, 2018 01:09
Show Gist options
  • Save whosaysni/000fdf8feb2301d40e85365e99dbdbf9 to your computer and use it in GitHub Desktop.
Save whosaysni/000fdf8feb2301d40e85365e99dbdbf9 to your computer and use it in GitHub Desktop.
PoC code to display AST difference with corresponding source lines - ASTの差分をとり差異部分のソースコードと一緒に表示するPoC (SUPER DIRTY)
# coding: utf-8
import ast
import sys
from difflib import SequenceMatcher
class NodeDumper(object):
def __init__(self, node, indent_chars=' ', indent_level=0):
self.node = node
self.doc = []
self.line = ''
self.indent_level = indent_level
self.indent_chars = indent_chars
def add_to_line(self, *contents):
for content in contents:
self.line += content
def terminate_line(self, lc, *tail_contents):
(lno, col) = lc
for content in tail_contents:
self.line += content
indent = self.indent_chars * self.indent_level
self.doc.append((lc, indent + str(self.line)))
self.line = ''
def indent(self, level=1):
self.indent_level += level
def dedent(self, level=1):
self.indent_level -= level
def build(self, node=None, parent_lno=None, parent_col=None):
if node is None:
node = self.node
lineno = getattr(node, 'lineno', None)
col_offset = getattr(node, 'col_offset', None)
if lineno is None and parent_lno:
lineno = parent_lno
if col_offset is None and parent_col:
col_offset = parent_col
lc = lineno, col_offset
self.add_to_line(node.__class__.__name__)
node_vars = [
(attr_name, getattr(node, attr_name))
for attr_name in node._fields
if True or attr_name not in ['ctx']]
if not node_vars:
self.terminate_line(lc, '(),')
return
self.terminate_line(lc, '(')
self.indent()
for k, v in node_vars:
self.add_to_line(k, ': ')
if isinstance(v, ast.AST):
self.build(
v, parent_lno=lineno, parent_col=col_offset)
elif isinstance(v, list):
if not v:
self.terminate_line(lc, '[],')
continue
self.terminate_line(lc, '[')
self.indent()
for subnode in v:
self.build(
subnode,
parent_lno=lineno, parent_col=col_offset)
self.dedent()
self.terminate_line(lc, '],')
else:
self.terminate_line(lc, repr(v), ',')
self.dedent()
self.terminate_line(lc, '),')
def astdiff(s1, s2):
slines1 = s1.splitlines()
slines2 = s2.splitlines()
nd1 = NodeDumper(ast.parse(s1))
nd2 = NodeDumper(ast.parse(s2))
nd1.build()
nd2.build()
seq1 = [line for lc, line in nd1.doc]
seq2 = [line for lc, line in nd2.doc]
sm = SequenceMatcher(None, seq1, seq2)
print('='*60)
for tag, l1, l2, r1, r2 in sm.get_opcodes():
if tag == 'equal':
print('EQUAL: AST {:06d},{:04d} - {:04d},{:04d}'.format(
l1, l2, r1, r2))
elif tag == 'replace':
print('REPLACED AST {:06d}:{:04d} - {:04d}:{:04d}'.format(
l1, l2, r1, r2))
for (lno, col_offset), line in nd1.doc[l1:l2]:
print('<<< AST {:06d}:{:04d} - {}'.format(
lno, col_offset, line))
print('TO:')
for (lno, col_offset), line in nd2.doc[r1:r2]:
print('>>> AST {:06d}:{:04d} - {}'.format(
lno, col_offset, line))
print('-'*60)
l_lnos = [lno_
for (lno_, col_), line_ in nd1.doc[l1:l2]]
r_lnos = [lno_
for (lno_, col_), line_ in nd2.doc[r1:r2]]
for i, l in enumerate(slines1[min(l_lnos)-1:max(l_lnos)]):
print('<<< [SRC {:06d}] {}'.format(i+min(l_lnos), l))
for i, l in enumerate(slines2[min(r_lnos)-1:max(r_lnos)]):
print('<<< [SRC {:06d}] {}'.format(i+min(r_lnos), l))
slines2[min(r_lnos):max(r_lnos)+1]
elif tag == 'delete':
print('DELETED: AST {:06d}:{:04d} - {:04d}:{:04d}'.format(
l1, l2, r1, r2))
for (lno, col_offset), line in nd1.doc[l1:l2]:
print('>>> AST {:06d}:{:04d} - {}'.format(
lno, col_offset, line))
print('-'*60)
l_lnos = [lno_
for (lno_, col_), line_ in nd1.doc[l1:l2]]
for i, l in enumerate(slines1[min(l_lnos)-1:max(l_lnos)]):
print('<<< [SRC {:06d}] {}'.format(i+min(l_lnos), l))
for i, l in enumerate(slines2[min(l_lnos)-1:max(l_lnos)]):
print('>>> [SRC {:06d}]*{}'.format(i+min(l_lnos), l))
elif tag == 'insert':
print('INSERTED: AST {:06d}:{:04d} - {:04d}:{:04d}'.format(
l1, l2, r1, r2))
src_lines = []
for (lno, col_offset), line in nd1.doc[r1:r2]:
print('<<< AST {:06d}:{:04d} - {}'.format(
lno, col_offset, line))
src_lines
print('-'*60)
r_lnos = [lno_
for (lno_, col_), line_ in nd2.doc[r1:r2]]
for i, l in enumerate(slines1[min(r_lnos)-1:max(r_lnos)]):
print('<<< [SRC {:06d}]*{}'.format(i+min(r_lnos), l))
for i, l in enumerate(slines2[min(r_lnos)-1:max(r_lnos)]):
print('<<< [SRC {:06d}] {}'.format(i+min(r_lnos), l))
print('='*60)
with open(sys.argv[1]) as f1, open(sys.argv[2]) as f2:
astdiff(f1.read(), f2.read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment