Skip to content

Instantly share code, notes, and snippets.

@math314
Created May 26, 2014 11:18
Show Gist options
  • Select an option

  • Save math314/8590ec95bfb979067570 to your computer and use it in GitHub Desktop.

Select an option

Save math314/8590ec95bfb979067570 to your computer and use it in GitHub Desktop.
#coding=utf-8
import sys,os
import re
from collections import defaultdict
from itertools import *
def scc(edges,redges):
def dfs(v):
used.add(v)
if v in edges:
for t in edges[v]:
if t not in used:
dfs(t)
d1.append(v)
def rdfs(v,l):
used.add(v)
l.append(v)
if v in redges:
for t in redges[v]:
if t not in used:
rdfs(t,l)
used = set()
d1 = []
for v in edges:
if v not in used:
dfs(v)
used = set()
ret = []
for v in reversed(d1):
if v not in used:
l = []
rdfs(v,l)
ret.append(l)
return ret
def lexical_order_toporogycal_sort(_nodes,edges):
nodes = list(sorted(_nodes))
in_rank = {node:0 for node in nodes}
for es in edges:
for t in es:
in_rank[t] += 1
used = set()
ret = []
for _ in xrange(len(nodes)):
select = None
for node in nodes:
if node not in used and in_rank[node] == 0:
select = node
break
ret.apped(select)
used.add(select)
for t in edges[select]:
in_rank[t] -= 1
return ret
class Asm(object):
def __init__(self,address,binary,opecode,operand):
self.address = address
self.binary = binary
self.opecode = opecode
self.operand = operand
self.indent = 0
self.comment = ''
self.attribute = []
def next_address(self):
if self.opecode in ('ret','jmp'): return None
else: return self.address + len(self.binary)
def jump_to(self):
if not self.opecode.startswith('j'): return None
return int(self.operand.split()[0],16)
def __str__(self):
indent = ' ' * self.indent
address = "%x" % self.address
binary = ' '.join(['%02x' % i for i in self.binary])
opecode = self.opecode if self.opecode is not None else ''
operand = self.operand if self.operand is not None else ''
comment = self.comment if self.comment is not None else ''
attribute = str(self.attribute) if len(self.attribute) != 0 else ''
ret = []
# if 'if' in self.attribute:
# ret.append(' ' * (self.indent - 1) + 'if(state) {')
# elif 'for' in self.attribute:
# ret.append(' ' * (self.indent - 1) + 'for(init; cond; loop-expression) {')
# elif 'while' in self.attribute:
# ret.append(' ' * (self.indent - 1) + 'while(true) {')
# elif 'else' in self.attribute:
# ret.append(' ' * (self.indent - 1) + '} else {')
# end_blacket_indent = self.indent - 1
# for attr in self.attribute:
# if attr in ('end if','end for','end while'): end_blacket_indent += 1
# for attr in self.attribute:
# if attr in ('end if','end for','end while'):
# ret.append(' ' * end_blacket_indent + '} //' + attr);
# end_blacket_indent -= 1
ret.append("%s%s:\t%-22s\t%s\t%s; %s %s" % \
(indent,address,binary,opecode,operand,comment,attribute) \
)
# if self.opecode[0] == 'j' and len(self.attribute) == 0:
# ret[-1] += 'no attr?'
return '\n'.join(ret)
class AsmNode(object):
def __init__(self,nodes):
self._nodes = nodes
self._address_list = list(chain(node.address_list() for node in nodes))
self._next_to = self.init_next_to()
def init_next_to(self):
next_list = []
for node in self._nodes:
node_next = node.next_to()
for to in node_next:
if not self.contain_address(to):
next_list.append(to)
return next_list
def head_address(self):
return self._nodes[0].head_address()
def address_list(self):
return self._address_list
def contain_address(self,address):
return address in self._address_list
def next_to(self):
return self._next_to
class AsmLeaf(object):
def __init__(self,asms):
self._asms = asms
def head_address(self):
return self._asms[0].address
def address_list(self):
return [asm.address for asm in self._asms]
def next_to(self):
l = []
jump_to = self._asms[-1].jump_to()
if jump_to and jump_to not in self.address_list(): l.append(jump_to)
next_address = self._asms[-1].next_address()
if next_address: l.append(next_address)
return l
def __str__(self):
l = []
l.append('start address = %x' % self.head_address())
for asm in self._asms:
l.append(' ' + str(asm))
return '\n'.join(l)
class Function(object):
def __init__(self,asms):
pass
def load_assembler(file_name):
part = re.compile(r"\s(?P<address>\w+):\s+(?P<binary>(\w\w\s)+)\s+((?P<opecode>\w+)(\s+(?P<operand>.*))?)?$")
data = [i.strip('\n') for i in open(file_name)]
ret = []
i = 0
text_section = False
while i < len(data):
line = data[i]
if line == "Disassembly of section .text:":
text_section = True
i += 3
continue
if text_section:
if line == "":
break
m = part.match(line)
address = int(m.group("address"),16)
binary = [int(j,16) for j in m.group("binary").split()]
opecode = m.group("opecode")
operand = m.group("operand")
asm = Asm(address,binary,opecode,operand)
ret.append(asm)
i += 1
return ret
def divide_to_function(asms):
next_instruction_address = asms[0].address
current_graph = []
for asm in asms:
if next_instruction_address < asm.address:
yield current_graph
current_graph = []
next_instruction_address = asm.address
current_graph.append(asm)
if asm.opecode == 'ret':
continue
elif asm.opecode == 'jmp':
jump_to = int(asm.operand.split()[0],16)
next_instruction_address = max(next_instruction_address,jump_to)
elif asm.opecode.startswith('j'):
jump_to = int(asm.operand.split()[0],16)
to = max(jump_to,asm.next_address())
next_instruction_address = max(next_instruction_address,to)
else:
next_instruction_address = max(next_instruction_address,asm.next_address())
if current_graph:
yield current_graph
def divide_to_asm_block(asms):
feature_addresses = []
for asm in asms:
if asm.jump_to() is not None:
feature_addresses.append(asm.jump_to())
feature_addresses.append(asm.address + len(asm.binary))
feature_addresses = list(sorted(set(feature_addresses)))
i = 0
l = []
for asm in asms:
if i < len(feature_addresses) and feature_addresses[i] == asm.address:
#print i,"%x" % l[-1].address
yield AsmLeaf(l)
l = []
i += 1
l.append(asm)
if l:
yield AsmLeaf(l)
def asm_block_to_dot_format(asm_nodes, node_dict):
w = []
for k,v in node_dict.items():
w.append('"%x"[label="%s"];' % \
(k,''.join(i + '\\l' for i in str(v).split('\n')[1:])) \
)
w.append('"%s" -> "%x";' % ('start',asm_nodes[0].head_address()) )
for node in asm_nodes:
for to in node.next_to():
w.append('"%x" -> "%x";' % (node.head_address(),to) )
print 'digraph asm {\n node [shape = box];\n%s \n}' % '\n'.join([' ' + i for i in w])
def build_graph(asm_nodes):
if len(asm_nodes) == 1:
return AsmNode(asm_nodes)
edges = defaultdict(list)
redges = defaultdict(list)
head_address = asm_nodes[0].head_address()
def add_edge(v,t):
if v != head_address: edges[v].append(t)
if t != head_address: edges[t].append(v)
for node in asm_nodes:
for to in node.next_to():
add_edge(node.head_address(),to)
address_to_node = {node.head_address(): node for node in asm_nodes}
edges = dict(edges)
redges = dict(redges)
ret = []
for result_nodes_addresses in scc(edges,redges):
result_nodes = map(lambda address: address_to_node[address],result_nodes_addresses)
if len(result_nodes) == 1:
ret.append(result_nodes[0])
else:
ret.append(analyze(result_nodes))
ret.sort(key=lambda blocks: blocks.head_address())
# ret = lexical_order_toporogycal_sort(ret)
return AsmNode(ret)
def analyze_function(asms):
asm_nodes = [asm_block for asm_block in divide_to_asm_block(asms)]
node_dict = {}
for node in asm_nodes:
#print node
node_dict[node.head_address()] = node
asm_block_to_dot_format(asm_nodes, node_dict)
root = build_graph(asm_nodes)
return root
def main():
asms = load_assembler(sys.argv[1])
for func in divide_to_function(asms):
#print "-" * 30
analyzed = analyze_function(func)
#print "-" * 30
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment