Created
November 12, 2019 01:30
-
-
Save WanderingGlitch/a033beb47fe8676e91aaff810c363d46 to your computer and use it in GitHub Desktop.
IDA 2to3 fixers to help move from the newer APIs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
This has some pretty gross hacks in it | |
But gives a general idea what it is like to write a 2to3 fixer | |
Basically run like this: | |
ida2to3.py /path/to/your/script /path/to/idc_bc695.py | |
Give it a once over to make sure it didn't break too much, then: | |
ida2to3.py -w /path/to/your/script /path/to/idc_bc695.py | |
Note that you could use this to move away from the old IDA APIs without moving to py3 | |
''' | |
import sys | |
import os | |
import re | |
from lib2to3.main import main as lib2to3_main | |
from lib2to3 import fixer_base | |
from lib2to3.fixer_util import ArgList | |
from lib2to3.fixer_util import Comma | |
from lib2to3.fixer_util import Name | |
from lib2to3.fixer_util import Number | |
from lib2to3.fixer_util import String | |
from lib2to3.fixer_util import attr_chain | |
from lib2to3.fixer_util import touch_import | |
from lib2to3.pgen2 import token | |
from lib2to3.pygram import python_symbols | |
from lib2to3.pytree import Leaf | |
from lib2to3.pytree import Node | |
# This is awful, but lets all of this live in a single file | |
def hook_refactor(): | |
from lib2to3 import refactor | |
# If we hook refactor.get_all_fix_names, we could appear in the fixer list | |
# Not doing that since we'd have to undo it in refactor.get_fixers_from_package | |
our_fixers = [ | |
'ida_complex_functions', | |
'ida_complex_renames', | |
'ida_simple_renames', | |
'ida_imports', | |
] | |
original_get_fixers_from_package = refactor.get_fixers_from_package | |
def hooked_get_fixers_from_package(fixer_pkg, *args, **kwargs): | |
fixers = original_get_fixers_from_package(fixer_pkg, *args, **kwargs) | |
fixers.extend(our_fixers) | |
return fixers | |
refactor.get_fixers_from_package = hooked_get_fixers_from_package | |
for x in our_fixers: | |
sys.modules[x] = sys.modules['__main__'] | |
hook_refactor() | |
OLD_NEW_MAP = {} | |
COMPLICATED_FUNCTIONS = {} | |
UNSUPPORTED_REPLACEMENTS = set() | |
class FixIdaImports(fixer_base.BaseFix): | |
run_order = 1 | |
PATTERN = '''power< 'this_should_never_match_anything' trailer< '.' 'sorry_if_it_does' > >''' | |
@classmethod | |
def generate_pattern(kls): | |
def _generate_pattern(): | |
# should remove any `from idc import blah` | |
for name in OLD_NEW_MAP.keys(): | |
# from idc import blah | |
yield ''' | |
import0=import_from< 'from' 'idc' 'import' attr_name={name!r} > | |
'''.format(name=name) | |
# from idc import this as that | |
yield ''' | |
import1=import_from< 'from' 'idc' 'import' import_as=import_as_name< {name!r} 'as' any* > > | |
'''.format(name=name) | |
# from idc import this, that, other | |
yield ''' | |
import2=import_from< 'from' 'idc' 'import' imports_as=import_as_names< any* {name!r} any* > > | |
'''.format(name=name) | |
# from idc import this as that, this2 as that2 | |
yield ''' | |
import2=import_from< 'from' 'idc' 'import' imports_as=import_as_names< any* import_as_name< {name!r} 'as' any* > any* > > | |
'''.format(name=name) | |
kls.PATTERN = '|'.join(_generate_pattern()) | |
def _handle_import_as_name(self, node, import_node, sibling_value=None): | |
if isinstance(import_node, Node): | |
# Node(import_as_name, [Leaf(1, u'Fatal'), Leaf(1, u'as'), Leaf(1, u'idc_Fatal')]) | |
if len(import_node.children) != 3: | |
print('Unable to process {!r} as it does not have 3 children'.format(import_node)) | |
return False | |
attr_leaf, as_leaf, alias_leaf = import_node.children | |
if not isinstance(attr_leaf, Leaf): | |
raise Exception('We should not have reached here without the attr Leaf') | |
if not isinstance(as_leaf, Leaf) or as_leaf.value != 'as': | |
raise Exception('We should not have reached here without an "as" Leaf') | |
if not isinstance(alias_leaf, Leaf): | |
raise Exception('We should not have reached here without the alias being a Leaf') | |
if attr_leaf.value not in OLD_NEW_MAP: | |
return False | |
if alias_leaf.value in OLD_NEW_MAP: | |
if OLD_NEW_MAP[alias_leaf.value] != OLD_NEW_MAP[attr_leaf.value]: | |
alias = alias_leaf.value | |
attr = attr_leaf.value | |
print('Unable to process {!r} as it would be destructive to do so'.format(import_node)) | |
print(' {} maps to {} instead of {}'.format(alias, OLD_NEW_MAP[alias], OLD_NEW_MAP[attr])) | |
return False | |
OLD_NEW_MAP[alias_leaf.value] = OLD_NEW_MAP[attr_leaf.value] | |
elif isinstance(import_node, Leaf): | |
# Leaf(1, u'Fatal') | |
if not isinstance(import_node, Leaf): | |
print('Unable to process {!r} as it is not a Leaf'.format(import_node)) | |
return False | |
if import_node.value not in OLD_NEW_MAP: | |
return False | |
retval = 0 | |
if import_node.next_sibling is None: | |
if isinstance(import_node.prev_sibling, Leaf) and import_node.prev_sibling.value == sibling_value: | |
retval += 1 | |
import_node.prev_sibling.remove() | |
elif isinstance(import_node.next_sibling, Leaf) and import_node.next_sibling.value == sibling_value: | |
import_node.next_sibling.remove() | |
import_node.remove() | |
return retval | |
def transform(self, node, result): | |
if 'import0' in result: | |
if isinstance(node.next_sibling, Leaf) and node.next_sibling.value == '\n': | |
node.next_sibling.remove() | |
node.remove() | |
elif 'import1' in result: | |
import_node = result['import_as'] | |
self._handle_import_as_name(node, import_node) | |
# TWG Sure we don't want this ? | |
#if isinstance(node.next_sibling, Leaf) and node.next_sibling.value == u'\n': | |
# node.next_sibling.remove() | |
node.remove() | |
elif 'import2' in result: | |
import_nodes = result['imports_as'] | |
i = 0 | |
while i < len(import_nodes.children): | |
import_node = import_nodes.children[i] | |
retval = self._handle_import_as_name(node, import_node, sibling_value=',') | |
i = i + 1 if retval is False else i - retval | |
if len(import_nodes.children) == 0: | |
# TWG Sure we don't want this ? | |
#if isinstance(node.next_sibling, Leaf) and node.next_sibling.value == '\n': | |
# node.next_sibling.remove() | |
node.remove() | |
else: | |
raise Exception('Should not have reached here') | |
class FixIdaComplexFunctions(fixer_base.BaseFix): | |
run_order = 2 | |
order = 'pre' | |
PATTERN = ''' | |
power< attr=TOKEN arg_trailer=trailer< '(' args=any* ')' > > | |
| | |
power< idc='idc' trailer=trailer< '.' attr=TOKEN > arg_trailer=trailer< '(' args=any* ')' > > | |
''' | |
def match(self, node): | |
results = super(FixIdaComplexFunctions, self).match(node) | |
if not results: | |
return False | |
if results['attr'].value not in COMPLICATED_FUNCTIONS: | |
return False | |
return results | |
def transform(self, node, results): | |
# if idc/trailer present, remove and replace | |
# then handle children in a similar manner | |
# need to make sure to re-add LParan and RParen | |
arg_nodes = results['args'] | |
old_arguments = [] | |
new_arguments = [] | |
if len(arg_nodes) == 1: | |
arg_node = arg_nodes[0] | |
if arg_node.type == python_symbols.arglist: | |
# Not the cleanest way, but needed in case of expressions in the arguments | |
current_arg = [] | |
for arg in arg_node.children: | |
if isinstance(arg, Leaf) and arg.value == ',': | |
old_arguments.append(tuple(current_arg)) | |
current_arg = [] | |
else: | |
current_arg.append(arg) | |
old_arguments.append(tuple(current_arg)) | |
else: #if isinstance(arg_node, Leaf): | |
old_arguments.append(arg_node) | |
elif len(arg_nodes) != 0: | |
print('Expected {!r} to either have no arguments or exactly one'.format(node)) | |
return | |
oldname_node = results['attr'] | |
package, newname, oldargs, newargs = COMPLICATED_FUNCTIONS[oldname_node.value] | |
if 'idc' in results: | |
idc_node = results['idc'] | |
trailer_node = results['trailer'] | |
newname_node = Name(newname, prefix=idc_node.prefix) | |
trailer_node.remove() | |
idc_node.replace(newname_node) | |
else: | |
newname_node = Name(newname, prefix=oldname_node.prefix) | |
oldname_node.replace(newname_node) | |
for i, arg in enumerate(newargs): | |
prefix = ' ' if i != 0 else None | |
if i != 0: | |
new_arguments.append(Comma()) | |
if arg in oldargs: | |
old_arg = old_arguments[oldargs.index(arg)] | |
if isinstance(old_arg, tuple): | |
new_arguments.extend(x.clone() for x in old_arguments[oldargs.index(arg)]) | |
else: | |
new_arguments.append(old_arg.clone()) | |
else: | |
# Assuming anything with a . is idaapi.blah or some such | |
if '.' in arg: | |
package, attr = arg.rsplit('.', 1) | |
touch_import(package, attr, node) | |
new_arguments.append(Name(attr, prefix=prefix)) | |
# Numbers | |
elif re.match('^-?(?:0x|0)?[0-9]+$', arg): | |
new_arguments.append(Number(arg, prefix=prefix)) | |
# Strings, or our best guess at them | |
elif re.match('^(?:\'|").*(?:\'|")+$', arg): | |
new_arguments.append(String(arg, prefix=prefix)) | |
# Else we assume it comes from idc | |
else: | |
touch_import('idc', arg, node) | |
new_arguments.append(Name(arg, prefix=prefix)) | |
arg_trailer_node = results['arg_trailer'] | |
new_trailer = ArgList(new_arguments) | |
arg_trailer_node.replace(new_trailer) | |
# Complex as in idc.blah | |
class FixIdaComplexRenames(fixer_base.BaseFix): | |
run_order = 3 | |
order = 'pre' | |
PATTERN = '''power< idc='idc' trailer=trailer< '.' attr=TOKEN> any*>''' | |
def match(self, node): | |
results = super(FixIdaComplexRenames, self).match(node) | |
if not results: | |
return False | |
# if we do blah = idc.python_on (or any 'complicated' function) | |
# then we end up returning False here | |
if results['attr'].value not in OLD_NEW_MAP: | |
return False | |
# Needlessly reasserting what the pattern should have gotten us | |
if len(node.children) < 2: | |
print('Should not have reached here with a power node with fewer than 2 children') | |
return False | |
idc_node = node.children[0] | |
if not isinstance(idc_node, Leaf) or idc_node.value != 'idc': | |
print('Expected the first child to be "idc"') | |
return False | |
trailer_node = node.children[1] | |
if not isinstance(trailer_node, Node) or trailer_node.type != python_symbols.trailer: | |
print('Expected the second child to be a trailer node') | |
return False | |
if len(trailer_node.children) != 2: | |
print('Expected the trailer node to have at exactly two children') | |
print('It actually has {}'.format(len(trailer_node.children))) | |
return False | |
dot_node = trailer_node.children[0] | |
if not isinstance(dot_node, Leaf) or dot_node.value != '.': | |
print('Expected the first child of the trailer node to be "."') | |
return False | |
return results | |
def transform(self, node, results): | |
oldname_node = results['attr'] | |
idc_node = results['idc'] | |
trailer_node = results['trailer'] | |
package, newname = OLD_NEW_MAP[oldname_node.value] | |
if package is not None: | |
touch_import(package, newname, node) | |
newname_node = Name(newname, prefix=idc_node.prefix) | |
trailer_node.remove() | |
idc_node.replace(newname_node) | |
# Simple as in blah instead of idc.blah | |
class FixIdaSimpleRenames(fixer_base.BaseFix): | |
run_order = 4 | |
_accept_type = token.NAME | |
PRINTED = set() | |
IMPORT_STATEMENTS = set([ | |
python_symbols.import_stmt, | |
python_symbols.import_name, | |
python_symbols.import_from, | |
python_symbols.import_as_name, | |
python_symbols.import_as_names, | |
]) | |
def match(self, node): | |
if any(n.type in self.IMPORT_STATEMENTS for n in attr_chain(node, 'parent')): | |
return False | |
if isinstance(node.prev_sibling, Leaf) and node.prev_sibling.value == '.': | |
# Should have been handled already | |
return False | |
if isinstance(node, Leaf): | |
if node.value in UNSUPPORTED_REPLACEMENTS: | |
if node.value not in self.PRINTED: | |
print('Sorry, you will have to manually handle this conversion: {!r}'.format(node.value)) | |
self.PRINTED.add(node.value) | |
return False | |
return node.value in OLD_NEW_MAP | |
return False | |
def transform(self, node, result): | |
parent = node.parent | |
package, newname = OLD_NEW_MAP[node.value] | |
if package is not None: | |
touch_import(package, newname, parent) | |
return Name(newname, prefix=node.prefix) | |
def process_idc_bc695(filepath): | |
if not filepath.endswith('idc_bc695.py'): | |
return False | |
with open(filepath, 'rb') as file_h: | |
oldlines = file_h.readlines() | |
for line in oldlines: | |
if sys.version_info > (3, 0): | |
line = line.decode('utf-8') | |
if line.startswith(('#', '\n')): | |
pass | |
elif line.startswith(('import ', 'from ')): | |
pass | |
elif line.startswith('def '): | |
if '=' in line: | |
# IDA's autogen doesn't do named arguments, but just in case | |
print('Unable to transform lines like {!r} due to keywords'.format(line)) | |
continue | |
match = re.match('^def (?P<oldname>\w+)\((?P<oldargs>(?:[^,\)]+?)(?:,[^,\)]+?)*)?\): return (?:(?P<package>\w+)\.)?(?P<newname>\w+)\((?P<newargs>(?:[^,\)]+?)(?:,[^,\)]+?)*)?\);?\n$', line) | |
if match is not None: | |
oldargs = match.group('oldargs') | |
oldname = match.group('oldname') | |
newargs = match.group('newargs') | |
newname = match.group('newname') | |
package = match.group('package') | |
if package is None: | |
package = 'idc' | |
if oldargs == newargs: | |
OLD_NEW_MAP[oldname] = (package, newname) | |
else: | |
if oldargs is None: | |
oldargs = () | |
else: | |
oldargs = tuple(x.strip() for x in oldargs.split(',')) | |
if newargs is None: | |
newargs = () | |
else: | |
newargs = tuple(x.strip() for x in newargs.split(',')) | |
COMPLICATED_FUNCTIONS[oldname] = (package, newname, oldargs, newargs) | |
else: | |
match = re.match('^def (?P<oldname>\w+)\(', line) | |
if match is None: | |
print('Sorry, you will have to manually handle this conversion: {!r}'.format(line)) | |
else: | |
UNSUPPORTED_REPLACEMENTS.add(match.group('oldname')) | |
elif '=' in line: | |
if re.search('\s', line[:-1]) is not None: | |
# These are generally OLDNAME=NEWNAME, not expecting spaces | |
print('Unable to transform lines like {!r} due to spaces'.format(line)) | |
continue | |
match = re.match('^(?P<oldname>\w+)=(?:(?P<package>\w+)\.)?(?P<newname>\w+)\n$', line) | |
if match is None: | |
print('Unable to transform, line did not match regex: {!r}'.format(line)) | |
continue | |
oldname = match.group('oldname') | |
package = match.group('package') | |
newname = match.group('newname') | |
OLD_NEW_MAP[oldname] = (package, newname) | |
else: | |
raise Exception('Do not know how to handle lines like {!r}'.format(line)) | |
FixIdaImports.generate_pattern() | |
return True | |
def main(): | |
if len(sys.argv) == 1: | |
print('Usage: {} [args to 2to3] /path/to/idc_bc695.py'.format(sys.argv[0])) | |
sys.exit(-1) | |
if '-h' in sys.argv or '--help' in sys.argv: | |
print('Usage: {} [args to 2to3] /path/to/idc_bc695.py'.format(sys.argv[0])) | |
# Fall through so we print out the help doc for 2to3 as well | |
else: | |
if not sys.argv[-1].endswith('idc_bc695.py') or not os.path.isfile(sys.argv[-1]): | |
print('{} is not a valid idc_bc695.py file'.format(sys.argv[-1])) | |
sys.exit(-1) | |
process_idc_bc695(sys.argv.pop()) | |
sys.exit(lib2to3_main('lib2to3.fixes')) | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment