Skip to content

Instantly share code, notes, and snippets.

@WanderingGlitch
Created November 12, 2019 01:30
Show Gist options
  • Save WanderingGlitch/a033beb47fe8676e91aaff810c363d46 to your computer and use it in GitHub Desktop.
Save WanderingGlitch/a033beb47fe8676e91aaff810c363d46 to your computer and use it in GitHub Desktop.
IDA 2to3 fixers to help move from the newer APIs
#!/usr/bin/env python
'''
This has some pretty gross hacks in it
But gives a general idea what it is like to write a 2to3 fixer
Basically run like this:
ida2to3.py /path/to/your/script /path/to/idc_bc695.py
Give it a once over to make sure it didn't break too much, then:
ida2to3.py -w /path/to/your/script /path/to/idc_bc695.py
Note that you could use this to move away from the old IDA APIs without moving to py3
'''
import sys
import os
import re
from lib2to3.main import main as lib2to3_main
from lib2to3 import fixer_base
from lib2to3.fixer_util import ArgList
from lib2to3.fixer_util import Comma
from lib2to3.fixer_util import Name
from lib2to3.fixer_util import Number
from lib2to3.fixer_util import String
from lib2to3.fixer_util import attr_chain
from lib2to3.fixer_util import touch_import
from lib2to3.pgen2 import token
from lib2to3.pygram import python_symbols
from lib2to3.pytree import Leaf
from lib2to3.pytree import Node
# This is awful, but lets all of this live in a single file
def hook_refactor():
from lib2to3 import refactor
# If we hook refactor.get_all_fix_names, we could appear in the fixer list
# Not doing that since we'd have to undo it in refactor.get_fixers_from_package
our_fixers = [
'ida_complex_functions',
'ida_complex_renames',
'ida_simple_renames',
'ida_imports',
]
original_get_fixers_from_package = refactor.get_fixers_from_package
def hooked_get_fixers_from_package(fixer_pkg, *args, **kwargs):
fixers = original_get_fixers_from_package(fixer_pkg, *args, **kwargs)
fixers.extend(our_fixers)
return fixers
refactor.get_fixers_from_package = hooked_get_fixers_from_package
for x in our_fixers:
sys.modules[x] = sys.modules['__main__']
hook_refactor()
OLD_NEW_MAP = {}
COMPLICATED_FUNCTIONS = {}
UNSUPPORTED_REPLACEMENTS = set()
class FixIdaImports(fixer_base.BaseFix):
run_order = 1
PATTERN = '''power< 'this_should_never_match_anything' trailer< '.' 'sorry_if_it_does' > >'''
@classmethod
def generate_pattern(kls):
def _generate_pattern():
# should remove any `from idc import blah`
for name in OLD_NEW_MAP.keys():
# from idc import blah
yield '''
import0=import_from< 'from' 'idc' 'import' attr_name={name!r} >
'''.format(name=name)
# from idc import this as that
yield '''
import1=import_from< 'from' 'idc' 'import' import_as=import_as_name< {name!r} 'as' any* > >
'''.format(name=name)
# from idc import this, that, other
yield '''
import2=import_from< 'from' 'idc' 'import' imports_as=import_as_names< any* {name!r} any* > >
'''.format(name=name)
# from idc import this as that, this2 as that2
yield '''
import2=import_from< 'from' 'idc' 'import' imports_as=import_as_names< any* import_as_name< {name!r} 'as' any* > any* > >
'''.format(name=name)
kls.PATTERN = '|'.join(_generate_pattern())
def _handle_import_as_name(self, node, import_node, sibling_value=None):
if isinstance(import_node, Node):
# Node(import_as_name, [Leaf(1, u'Fatal'), Leaf(1, u'as'), Leaf(1, u'idc_Fatal')])
if len(import_node.children) != 3:
print('Unable to process {!r} as it does not have 3 children'.format(import_node))
return False
attr_leaf, as_leaf, alias_leaf = import_node.children
if not isinstance(attr_leaf, Leaf):
raise Exception('We should not have reached here without the attr Leaf')
if not isinstance(as_leaf, Leaf) or as_leaf.value != 'as':
raise Exception('We should not have reached here without an "as" Leaf')
if not isinstance(alias_leaf, Leaf):
raise Exception('We should not have reached here without the alias being a Leaf')
if attr_leaf.value not in OLD_NEW_MAP:
return False
if alias_leaf.value in OLD_NEW_MAP:
if OLD_NEW_MAP[alias_leaf.value] != OLD_NEW_MAP[attr_leaf.value]:
alias = alias_leaf.value
attr = attr_leaf.value
print('Unable to process {!r} as it would be destructive to do so'.format(import_node))
print(' {} maps to {} instead of {}'.format(alias, OLD_NEW_MAP[alias], OLD_NEW_MAP[attr]))
return False
OLD_NEW_MAP[alias_leaf.value] = OLD_NEW_MAP[attr_leaf.value]
elif isinstance(import_node, Leaf):
# Leaf(1, u'Fatal')
if not isinstance(import_node, Leaf):
print('Unable to process {!r} as it is not a Leaf'.format(import_node))
return False
if import_node.value not in OLD_NEW_MAP:
return False
retval = 0
if import_node.next_sibling is None:
if isinstance(import_node.prev_sibling, Leaf) and import_node.prev_sibling.value == sibling_value:
retval += 1
import_node.prev_sibling.remove()
elif isinstance(import_node.next_sibling, Leaf) and import_node.next_sibling.value == sibling_value:
import_node.next_sibling.remove()
import_node.remove()
return retval
def transform(self, node, result):
if 'import0' in result:
if isinstance(node.next_sibling, Leaf) and node.next_sibling.value == '\n':
node.next_sibling.remove()
node.remove()
elif 'import1' in result:
import_node = result['import_as']
self._handle_import_as_name(node, import_node)
# TWG Sure we don't want this ?
#if isinstance(node.next_sibling, Leaf) and node.next_sibling.value == u'\n':
# node.next_sibling.remove()
node.remove()
elif 'import2' in result:
import_nodes = result['imports_as']
i = 0
while i < len(import_nodes.children):
import_node = import_nodes.children[i]
retval = self._handle_import_as_name(node, import_node, sibling_value=',')
i = i + 1 if retval is False else i - retval
if len(import_nodes.children) == 0:
# TWG Sure we don't want this ?
#if isinstance(node.next_sibling, Leaf) and node.next_sibling.value == '\n':
# node.next_sibling.remove()
node.remove()
else:
raise Exception('Should not have reached here')
class FixIdaComplexFunctions(fixer_base.BaseFix):
run_order = 2
order = 'pre'
PATTERN = '''
power< attr=TOKEN arg_trailer=trailer< '(' args=any* ')' > >
|
power< idc='idc' trailer=trailer< '.' attr=TOKEN > arg_trailer=trailer< '(' args=any* ')' > >
'''
def match(self, node):
results = super(FixIdaComplexFunctions, self).match(node)
if not results:
return False
if results['attr'].value not in COMPLICATED_FUNCTIONS:
return False
return results
def transform(self, node, results):
# if idc/trailer present, remove and replace
# then handle children in a similar manner
# need to make sure to re-add LParan and RParen
arg_nodes = results['args']
old_arguments = []
new_arguments = []
if len(arg_nodes) == 1:
arg_node = arg_nodes[0]
if arg_node.type == python_symbols.arglist:
# Not the cleanest way, but needed in case of expressions in the arguments
current_arg = []
for arg in arg_node.children:
if isinstance(arg, Leaf) and arg.value == ',':
old_arguments.append(tuple(current_arg))
current_arg = []
else:
current_arg.append(arg)
old_arguments.append(tuple(current_arg))
else: #if isinstance(arg_node, Leaf):
old_arguments.append(arg_node)
elif len(arg_nodes) != 0:
print('Expected {!r} to either have no arguments or exactly one'.format(node))
return
oldname_node = results['attr']
package, newname, oldargs, newargs = COMPLICATED_FUNCTIONS[oldname_node.value]
if 'idc' in results:
idc_node = results['idc']
trailer_node = results['trailer']
newname_node = Name(newname, prefix=idc_node.prefix)
trailer_node.remove()
idc_node.replace(newname_node)
else:
newname_node = Name(newname, prefix=oldname_node.prefix)
oldname_node.replace(newname_node)
for i, arg in enumerate(newargs):
prefix = ' ' if i != 0 else None
if i != 0:
new_arguments.append(Comma())
if arg in oldargs:
old_arg = old_arguments[oldargs.index(arg)]
if isinstance(old_arg, tuple):
new_arguments.extend(x.clone() for x in old_arguments[oldargs.index(arg)])
else:
new_arguments.append(old_arg.clone())
else:
# Assuming anything with a . is idaapi.blah or some such
if '.' in arg:
package, attr = arg.rsplit('.', 1)
touch_import(package, attr, node)
new_arguments.append(Name(attr, prefix=prefix))
# Numbers
elif re.match('^-?(?:0x|0)?[0-9]+$', arg):
new_arguments.append(Number(arg, prefix=prefix))
# Strings, or our best guess at them
elif re.match('^(?:\'|").*(?:\'|")+$', arg):
new_arguments.append(String(arg, prefix=prefix))
# Else we assume it comes from idc
else:
touch_import('idc', arg, node)
new_arguments.append(Name(arg, prefix=prefix))
arg_trailer_node = results['arg_trailer']
new_trailer = ArgList(new_arguments)
arg_trailer_node.replace(new_trailer)
# Complex as in idc.blah
class FixIdaComplexRenames(fixer_base.BaseFix):
run_order = 3
order = 'pre'
PATTERN = '''power< idc='idc' trailer=trailer< '.' attr=TOKEN> any*>'''
def match(self, node):
results = super(FixIdaComplexRenames, self).match(node)
if not results:
return False
# if we do blah = idc.python_on (or any 'complicated' function)
# then we end up returning False here
if results['attr'].value not in OLD_NEW_MAP:
return False
# Needlessly reasserting what the pattern should have gotten us
if len(node.children) < 2:
print('Should not have reached here with a power node with fewer than 2 children')
return False
idc_node = node.children[0]
if not isinstance(idc_node, Leaf) or idc_node.value != 'idc':
print('Expected the first child to be "idc"')
return False
trailer_node = node.children[1]
if not isinstance(trailer_node, Node) or trailer_node.type != python_symbols.trailer:
print('Expected the second child to be a trailer node')
return False
if len(trailer_node.children) != 2:
print('Expected the trailer node to have at exactly two children')
print('It actually has {}'.format(len(trailer_node.children)))
return False
dot_node = trailer_node.children[0]
if not isinstance(dot_node, Leaf) or dot_node.value != '.':
print('Expected the first child of the trailer node to be "."')
return False
return results
def transform(self, node, results):
oldname_node = results['attr']
idc_node = results['idc']
trailer_node = results['trailer']
package, newname = OLD_NEW_MAP[oldname_node.value]
if package is not None:
touch_import(package, newname, node)
newname_node = Name(newname, prefix=idc_node.prefix)
trailer_node.remove()
idc_node.replace(newname_node)
# Simple as in blah instead of idc.blah
class FixIdaSimpleRenames(fixer_base.BaseFix):
run_order = 4
_accept_type = token.NAME
PRINTED = set()
IMPORT_STATEMENTS = set([
python_symbols.import_stmt,
python_symbols.import_name,
python_symbols.import_from,
python_symbols.import_as_name,
python_symbols.import_as_names,
])
def match(self, node):
if any(n.type in self.IMPORT_STATEMENTS for n in attr_chain(node, 'parent')):
return False
if isinstance(node.prev_sibling, Leaf) and node.prev_sibling.value == '.':
# Should have been handled already
return False
if isinstance(node, Leaf):
if node.value in UNSUPPORTED_REPLACEMENTS:
if node.value not in self.PRINTED:
print('Sorry, you will have to manually handle this conversion: {!r}'.format(node.value))
self.PRINTED.add(node.value)
return False
return node.value in OLD_NEW_MAP
return False
def transform(self, node, result):
parent = node.parent
package, newname = OLD_NEW_MAP[node.value]
if package is not None:
touch_import(package, newname, parent)
return Name(newname, prefix=node.prefix)
def process_idc_bc695(filepath):
if not filepath.endswith('idc_bc695.py'):
return False
with open(filepath, 'rb') as file_h:
oldlines = file_h.readlines()
for line in oldlines:
if sys.version_info > (3, 0):
line = line.decode('utf-8')
if line.startswith(('#', '\n')):
pass
elif line.startswith(('import ', 'from ')):
pass
elif line.startswith('def '):
if '=' in line:
# IDA's autogen doesn't do named arguments, but just in case
print('Unable to transform lines like {!r} due to keywords'.format(line))
continue
match = re.match('^def (?P<oldname>\w+)\((?P<oldargs>(?:[^,\)]+?)(?:,[^,\)]+?)*)?\): return (?:(?P<package>\w+)\.)?(?P<newname>\w+)\((?P<newargs>(?:[^,\)]+?)(?:,[^,\)]+?)*)?\);?\n$', line)
if match is not None:
oldargs = match.group('oldargs')
oldname = match.group('oldname')
newargs = match.group('newargs')
newname = match.group('newname')
package = match.group('package')
if package is None:
package = 'idc'
if oldargs == newargs:
OLD_NEW_MAP[oldname] = (package, newname)
else:
if oldargs is None:
oldargs = ()
else:
oldargs = tuple(x.strip() for x in oldargs.split(','))
if newargs is None:
newargs = ()
else:
newargs = tuple(x.strip() for x in newargs.split(','))
COMPLICATED_FUNCTIONS[oldname] = (package, newname, oldargs, newargs)
else:
match = re.match('^def (?P<oldname>\w+)\(', line)
if match is None:
print('Sorry, you will have to manually handle this conversion: {!r}'.format(line))
else:
UNSUPPORTED_REPLACEMENTS.add(match.group('oldname'))
elif '=' in line:
if re.search('\s', line[:-1]) is not None:
# These are generally OLDNAME=NEWNAME, not expecting spaces
print('Unable to transform lines like {!r} due to spaces'.format(line))
continue
match = re.match('^(?P<oldname>\w+)=(?:(?P<package>\w+)\.)?(?P<newname>\w+)\n$', line)
if match is None:
print('Unable to transform, line did not match regex: {!r}'.format(line))
continue
oldname = match.group('oldname')
package = match.group('package')
newname = match.group('newname')
OLD_NEW_MAP[oldname] = (package, newname)
else:
raise Exception('Do not know how to handle lines like {!r}'.format(line))
FixIdaImports.generate_pattern()
return True
def main():
if len(sys.argv) == 1:
print('Usage: {} [args to 2to3] /path/to/idc_bc695.py'.format(sys.argv[0]))
sys.exit(-1)
if '-h' in sys.argv or '--help' in sys.argv:
print('Usage: {} [args to 2to3] /path/to/idc_bc695.py'.format(sys.argv[0]))
# Fall through so we print out the help doc for 2to3 as well
else:
if not sys.argv[-1].endswith('idc_bc695.py') or not os.path.isfile(sys.argv[-1]):
print('{} is not a valid idc_bc695.py file'.format(sys.argv[-1]))
sys.exit(-1)
process_idc_bc695(sys.argv.pop())
sys.exit(lib2to3_main('lib2to3.fixes'))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment