Last active
September 9, 2018 01:09
-
-
Save whosaysni/000fdf8feb2301d40e85365e99dbdbf9 to your computer and use it in GitHub Desktop.
PoC code to display AST difference with corresponding source lines - ASTの差分をとり差異部分のソースコードと一緒に表示するPoC (SUPER DIRTY)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import ast | |
import sys | |
from difflib import SequenceMatcher | |
class NodeDumper(object): | |
def __init__(self, node, indent_chars=' ', indent_level=0): | |
self.node = node | |
self.doc = [] | |
self.line = '' | |
self.indent_level = indent_level | |
self.indent_chars = indent_chars | |
def add_to_line(self, *contents): | |
for content in contents: | |
self.line += content | |
def terminate_line(self, lc, *tail_contents): | |
(lno, col) = lc | |
for content in tail_contents: | |
self.line += content | |
indent = self.indent_chars * self.indent_level | |
self.doc.append((lc, indent + str(self.line))) | |
self.line = '' | |
def indent(self, level=1): | |
self.indent_level += level | |
def dedent(self, level=1): | |
self.indent_level -= level | |
def build(self, node=None, parent_lno=None, parent_col=None): | |
if node is None: | |
node = self.node | |
lineno = getattr(node, 'lineno', None) | |
col_offset = getattr(node, 'col_offset', None) | |
if lineno is None and parent_lno: | |
lineno = parent_lno | |
if col_offset is None and parent_col: | |
col_offset = parent_col | |
lc = lineno, col_offset | |
self.add_to_line(node.__class__.__name__) | |
node_vars = [ | |
(attr_name, getattr(node, attr_name)) | |
for attr_name in node._fields | |
if True or attr_name not in ['ctx']] | |
if not node_vars: | |
self.terminate_line(lc, '(),') | |
return | |
self.terminate_line(lc, '(') | |
self.indent() | |
for k, v in node_vars: | |
self.add_to_line(k, ': ') | |
if isinstance(v, ast.AST): | |
self.build( | |
v, parent_lno=lineno, parent_col=col_offset) | |
elif isinstance(v, list): | |
if not v: | |
self.terminate_line(lc, '[],') | |
continue | |
self.terminate_line(lc, '[') | |
self.indent() | |
for subnode in v: | |
self.build( | |
subnode, | |
parent_lno=lineno, parent_col=col_offset) | |
self.dedent() | |
self.terminate_line(lc, '],') | |
else: | |
self.terminate_line(lc, repr(v), ',') | |
self.dedent() | |
self.terminate_line(lc, '),') | |
def astdiff(s1, s2): | |
slines1 = s1.splitlines() | |
slines2 = s2.splitlines() | |
nd1 = NodeDumper(ast.parse(s1)) | |
nd2 = NodeDumper(ast.parse(s2)) | |
nd1.build() | |
nd2.build() | |
seq1 = [line for lc, line in nd1.doc] | |
seq2 = [line for lc, line in nd2.doc] | |
sm = SequenceMatcher(None, seq1, seq2) | |
print('='*60) | |
for tag, l1, l2, r1, r2 in sm.get_opcodes(): | |
if tag == 'equal': | |
print('EQUAL: AST {:06d},{:04d} - {:04d},{:04d}'.format( | |
l1, l2, r1, r2)) | |
elif tag == 'replace': | |
print('REPLACED AST {:06d}:{:04d} - {:04d}:{:04d}'.format( | |
l1, l2, r1, r2)) | |
for (lno, col_offset), line in nd1.doc[l1:l2]: | |
print('<<< AST {:06d}:{:04d} - {}'.format( | |
lno, col_offset, line)) | |
print('TO:') | |
for (lno, col_offset), line in nd2.doc[r1:r2]: | |
print('>>> AST {:06d}:{:04d} - {}'.format( | |
lno, col_offset, line)) | |
print('-'*60) | |
l_lnos = [lno_ | |
for (lno_, col_), line_ in nd1.doc[l1:l2]] | |
r_lnos = [lno_ | |
for (lno_, col_), line_ in nd2.doc[r1:r2]] | |
for i, l in enumerate(slines1[min(l_lnos)-1:max(l_lnos)]): | |
print('<<< [SRC {:06d}] {}'.format(i+min(l_lnos), l)) | |
for i, l in enumerate(slines2[min(r_lnos)-1:max(r_lnos)]): | |
print('<<< [SRC {:06d}] {}'.format(i+min(r_lnos), l)) | |
slines2[min(r_lnos):max(r_lnos)+1] | |
elif tag == 'delete': | |
print('DELETED: AST {:06d}:{:04d} - {:04d}:{:04d}'.format( | |
l1, l2, r1, r2)) | |
for (lno, col_offset), line in nd1.doc[l1:l2]: | |
print('>>> AST {:06d}:{:04d} - {}'.format( | |
lno, col_offset, line)) | |
print('-'*60) | |
l_lnos = [lno_ | |
for (lno_, col_), line_ in nd1.doc[l1:l2]] | |
for i, l in enumerate(slines1[min(l_lnos)-1:max(l_lnos)]): | |
print('<<< [SRC {:06d}] {}'.format(i+min(l_lnos), l)) | |
for i, l in enumerate(slines2[min(l_lnos)-1:max(l_lnos)]): | |
print('>>> [SRC {:06d}]*{}'.format(i+min(l_lnos), l)) | |
elif tag == 'insert': | |
print('INSERTED: AST {:06d}:{:04d} - {:04d}:{:04d}'.format( | |
l1, l2, r1, r2)) | |
src_lines = [] | |
for (lno, col_offset), line in nd1.doc[r1:r2]: | |
print('<<< AST {:06d}:{:04d} - {}'.format( | |
lno, col_offset, line)) | |
src_lines | |
print('-'*60) | |
r_lnos = [lno_ | |
for (lno_, col_), line_ in nd2.doc[r1:r2]] | |
for i, l in enumerate(slines1[min(r_lnos)-1:max(r_lnos)]): | |
print('<<< [SRC {:06d}]*{}'.format(i+min(r_lnos), l)) | |
for i, l in enumerate(slines2[min(r_lnos)-1:max(r_lnos)]): | |
print('<<< [SRC {:06d}] {}'.format(i+min(r_lnos), l)) | |
print('='*60) | |
with open(sys.argv[1]) as f1, open(sys.argv[2]) as f2: | |
astdiff(f1.read(), f2.read()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment