Last active
August 29, 2015 14:26
-
-
Save LexManos/04da409492922d845816 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Thu Jan 19 16:29:03 2012 | |
Rewritten on Tue July 28 13:09:00 2015 | |
@author: Fesh0r, LexManos | |
@version: v7.0 | |
""" | |
import sys | |
import os | |
import fnmatch | |
import shutil | |
import re | |
import zipfile | |
import time | |
from contextlib import closing | |
from optparse import OptionParser | |
from pprint import pprint | |
""" | |
This processes a FernFlower output file and fixes some of the common decompiler mistakes. | |
Making the output code cleaner and less errornious. | |
This takes advantage of the reconstituted local variables and inner class attributes that are present | |
in MC release 1.8.2 and above. | |
Things that are cleaned: | |
Consecutive empty lines are consensed: | |
Line 1 | |
Line 2 | |
------------------------------------ | |
Line 1 | |
Line 2 | |
------------------------------------ | |
Trailing whitespace is removed: | |
' HELLO ' | |
' HELLO' | |
Decompile differences between machines related to double and floats, by removing trailing zeros: | |
0.0010D => 0.001D | |
Unnessasary calls to super with zero arguments, this is implied by the compiler. | |
'super();' => '' | |
Parameter names in abstract methods, seince abstract methods have no LVT attribute, FF does not name them correctly. | |
' <T extends Object & Comparable<T>, V extends T> IBlockState func_177226_a(IProperty<T> var1, V var2);' | |
' <T extends Object & Comparable<T>, V extends T> IBlockState func_177226_a(IProperty<T> p_177226_1_, V p_177226_1_);' | |
Enum Members, Enums are majorly syntax sugar, FernFlower does a good job at decompiling most of it. | |
However it still leaves the first two paramters in code. So we fix that: | |
'LOGIN("LOGIN", 0, 1)' => 'LOGIN(1)' | |
If a Enum's value is an anonymous inner class, the compiler adds a 'null' parameter to the initalizer. Unsure why but we need to strip this out. | |
'STONEBRICK("STONEBRICK", 2, 2, "stone_brick", "brick", (BlockSilverfish.NamelessClass1508106186)null) {' | |
'STONEBRICK(2, "stone_brick", "brick") {' | |
It also leaves those two parameters in the constructor arguments: | |
'EnumSomething(String p_i123_1_, int p_i123_2_, int p_i123_3_)' | |
'EnumSomething(int p_i123_3_)' | |
Synthetic methods, To support generics Java creates synthetic methods that bounce to concrete methods. | |
We scan for these methods that do nothing more then bounce with potential typcasting. And remove them | |
if the target method has the same name. This heavily relies on the mapping data having the correct mappings | |
'// \$FF: synthetic method' | |
'public Object call() { | |
' return (Object)this.call();' | |
'}' | |
Fernflower does not properly add generic parameters to anonymous inner class declarations. | |
I can't think of a good way to fix this generically, so we fix it for the classes | |
used in Minecraft, Function, Predicate, and Comparator | |
'new Predicate() {' => 'new Predicate<String, ItemStack>() {' | |
""" | |
_JAVA_IDENTIFIER = r'[a-zA-Z_$][\w_$\.]*' | |
_MODIFIERS = r'public|protected|private|static|abstract|final|native|synchronized|transient|volatile|strictfp' | |
_MODIFIERS_INIT = r'public|protected|private' | |
_PARAMETERS_VAR = r'(?:(?P<type>(?:[^ ,])+(?:<.*>)?(?: \.\.\.)?) var(?P<id>\d+)(?P<end>,? )?)' | |
_PARAMETERS = r'(?:(?P<type>(?:[^ ,])+(?:<.*>)?(?: \.\.\.)?) (?P<name>' + _JAVA_IDENTIFIER + r')(?P<end>,? )?)' | |
_REGEXP = { | |
# Typecast marker | |
'typecast': re.compile(r'\([\w\.]+\)'), | |
# Remove repeated blank lines | |
'newlines': re.compile(r'^\n{2,}', re.MULTILINE), | |
# Normalize line ending to unix style | |
'normlines': re.compile(r'\r?\n', re.MULTILINE), | |
# Remove trailing whitespace | |
'trailing': re.compile(r'[ \t]+$'), | |
# strip trailing 0 from doubles and floats to fix decompile differences on OSX | |
# 0.0010D => 0.001D | |
#'trailingzero': re.compile(r'(?P<value>[0-9]+\.[0-9]*[1-9])0+(?P<type>[DdFfEe])'), | |
# Remove unnessasary calls to super() | |
#'empty_super': re.compile(r'^ +super\(\);\n'), | |
# Cleanup the argument names on abstract methods | |
'abstract': re.compile(r' (?P<method>func_(?P<number>\d+)_[a-zA-Z_]+)\((?P<arguments>' + _PARAMETERS_VAR + r'+)\)(?: throws (?:[\w$.]+,? ?)+)?;$'), | |
# Single parts of parameter lists | |
'params_var': re.compile(_PARAMETERS_VAR), | |
# Cleanup enum syntax sugar not being removed properly | |
#'enum_member': re.compile(r'^(?P<indent> +)(?P<name>' + _JAVA_IDENTIFIER + r')\("(?P=name)", \d+(?P<sep>[,\)] *)(?P<end>.+)'), | |
# | |
# Enum declarations, used to find constructors | |
#'enum_class': re.compile(r' enum (?P<name>' + _JAVA_IDENTIFIER + r') '), | |
# | |
# Enum constructor with sugar arguments | |
#'enum_init': re.compile(r'^(?P<indent> +)(?P<modifiers>(?:(?:public|protected|private) )*)(?P<name>' + _JAVA_IDENTIFIER + r')\(String p_(?P<id>i\d+)_1_, int p_i\d+_2_(?:, )*(?P<end>.+)'), | |
# | |
# Empty enum ending | |
#'enum_empty': re.compile(r'\)\s*(?:throws (?:[\w$.]+,? ?)+)?\s*\{\s*\}\s*$'), | |
# | |
# Enum anon classes add a random 'null' argument at the end.. No clue where this comes from | |
#'enum_anon': re.compile(r'(?:, )*(?:\([\w\.]+\))*null\) \{'), | |
# | |
# Enum $VALUES field | |
#'enum_values': re.compile(r'^\s*private static final (?P<name>' + _JAVA_IDENTIFIER + r')\[\] \$VALUES = new (?P=name)\[\]\{.*?\};'), | |
# | |
# Fernflower namecless classes scattered all over the place no clue why.... | |
#'nameless': re.compile(r'(?:, )*\([\w\.]+(NamelessClass\d+|SwitchHelper)\)null\)'), | |
# Synthetic markers | |
'syn_marker': re.compile(r'^\s*// \$FF: (synthetic|bridge) method$'), | |
# Method definition | |
'method_def': re.compile(r'^\s*(?P<modifiers>(?:(?:' + _MODIFIERS + r') )*)(?P<return>.+?) (?P<method>.+?)\((?P<arguments>' + _PARAMETERS + r'*)\)\s*(?:throws (?:[\w$.]+,? ?)+)?\s*\{'), | |
# Method call | |
'syn_call': re.compile(r'^\s*(?P<return>return )?(this|super)\.(?P<target>.+)\((?P<arguments>(?:(?:(?:\([\w\.]+\))?[a-zA-Z_$][\w_$]*)(?:, )*)*)\);'), | |
# Function generic method | |
#'apply_def': re.compile(r'^\s*public (?P<return>.+?) apply\((?P<type>[^ ,]+(?:<.*>)?) p_apply_1_\)'), | |
# | |
# Predicate generic method` | |
#'predicate_def': re.compile(r'^\s*public boolean apply\((?P<type>[^ ,]+(?:<.*>)?) p_apply_1_\)'), | |
# | |
# Comparator generic method | |
#'compare_def': re.compile(r'^\s*public int compare\((?P<type>[^ ,]+(?:<.*>)?) p_compare_1_, '), | |
# | |
# TypeAdapter generic method | |
#'write_def': re.compile(r'^\s*public void write\(JsonWriter p_write_1_, (?P<type>[^ ,]+(?:<.*>)?) p_write_2_\)'), | |
# | |
# SimpleChannelInboundHandler generic method | |
#'channelRead0_def': re.compile(r'^\s*(public|protected) void channelRead0\(ChannelHandlerContext p_channelRead0_1_, (?P<type>[^ ,]+(?:<.*>)?) p_channelRead0_2_\)'), | |
# | |
# GenericFutureListener generic method | |
#'operationComplete_def': re.compile(r'^\s*public void operationComplete\((?P<type>[^ ,]+(?:<.*>)?) p_operationComplete_1_\)'), | |
# | |
# FutureCallback generic method | |
#'onSuccess_def': re.compile(r'^\s*public void onSuccess\((?P<type>[^ ,]+(?:<.*>)?) p_onSuccess_1_\)'), | |
# | |
# CacheLoader generic method | |
#'load_def': re.compile(r'^\s*public (?P<return>.+?) load\((?P<type>[^ ,]+(?:<.*>)?) p_load_1_\)'), | |
} | |
class Error(Exception): | |
pass | |
class ParseError(Error): | |
pass | |
def fffix(srcdir): | |
for path, _, filelist in os.walk(srcdir, followlinks=True): | |
for cur_file in fnmatch.filter(filelist, '*.java'): | |
src_file = os.path.normpath(os.path.join(path, cur_file)) | |
_process_file(src_file) | |
def fffix_zip_dir(src_file, dest_dir): | |
reallyrmtree(dest_dir) | |
os.makedirs(dest_dir) | |
with closing(zipfile.ZipFile(open(src_file, 'rb'))) as zip: | |
for info in zip.filelist: | |
data = zip.read(info.filename) | |
if info.filename.endswith('.java'): | |
data = _process_data(data, os.path.splitext(os.path.basename(info.filename))[0]) | |
dest_file = os.path.join(dest_dir, info.filename) | |
if not os.path.exists(os.path.dirname(dest_file)): | |
os.makedirs(os.path.dirname(dest_file)) | |
with open(dest_file, 'wb') as f: | |
f.write(data) | |
def _process_file(src_file): | |
if not os.path.splitext(src_file)[1] == '.java': | |
return | |
class_name = os.path.splitext(os.path.basename(src_file))[0] | |
tmp_file = src_file + '.tmp' | |
with open(src_file, 'r') as fh: | |
orig = fh.read() | |
buf = _process_data(orig, class_name) | |
if not buf == orig: | |
with open(tmp_file, 'w') as fh: | |
fh.write(buf) | |
shutil.move(tmp_file, src_file) | |
def _process_data(data, class_name): | |
buf = data | |
buf = _REGEXP['normlines'].sub(r'\n', buf) | |
buf = buf.split('\n') | |
#enums = [] | |
for idx, line in enumerate(buf): | |
line_s = line.strip(); | |
# Gather Enum names for use in constructors | |
#for match in _REGEXP['enum_class'].finditer(line): | |
# enums.append(match.group('name')) | |
match = None | |
# Fix Compile differences related to doubles and floats | |
#line = _REGEXP['trailingzero'].sub(r'\g<value>\g<type>', line) | |
# Remove unnessasary super calls | |
if line_s == 'super();': | |
line = '' | |
# Remove casts to nameless classes, TODO: Research why these exist in the first place... | |
#line = _REGEXP['nameless'].sub(r')', line) | |
if line_s == '// $FF: synthetic method' or line_s == '// $FF: bridge method': | |
i = idx + 1 | |
if buf[i].strip() == '// $FF: synthetic method' or buf[i].strip() == '// $FF: bridge method': | |
i += 1 | |
method = _REGEXP['method_def'].match(buf[i]) | |
body = _REGEXP['syn_call'].match(buf[i+1]) | |
end = buf[i+2].strip() == '}' | |
if method and body and end: | |
if method.group('method') == body.group('target'): | |
args1 = '' if method.group('arguments') == '' else ', '.join([v.split(' ')[1] for v in method.group('arguments').split(', ')]) | |
args2 = '' if body.group('arguments') == None else _REGEXP['typecast'].sub('', body.group('arguments')) | |
if args1 == args2: | |
line = buf[i-1] = buf[i] = buf[i+1] = buf[i+2] = '' | |
else: | |
print 'MISMATCH ARGS %s' % buf[i] | |
print ' %s' % args1 | |
print 'MISMATCH ARGS %s' % buf[i+1] | |
print ' %s' % args2 | |
else: | |
print 'MISMATCH TARGET %s %s' % (method.group('method'), body.group('target')) | |
#print ' MATCH ' + buf[i] | |
#print ' ' + buf[i+1] | |
#pprint(body.groupdict()) | |
else: | |
if buf[i].endswith(') {') and buf[i+1].lstrip().startswith('this(') and end: | |
line = buf[i-1] = buf[i] = buf[i+1] = buf[i+2] = '' | |
#else: | |
# print 'MISMATCH ' + buf[i] | |
# print 'MISMATCH ' + buf[i+1] | |
# print 'MISMATCH ' + buf[i+2] | |
#match = _REGEXP['enum_member'].search(line) | |
#if not match is None: | |
# end = _REGEXP['enum_anon'].sub(r') {', match.group('end')) | |
# line = match.group('indent') + match.group('name') | |
# if not match.group('sep') == ')': | |
# line = line + '(' + end | |
# else: | |
# line = line + end | |
#match = _REGEXP['enum_init'].search(line) | |
#if not match is None and match.group('name') in enums: | |
# if _REGEXP['enum_empty'].search(match.group('end')): | |
# line = '' | |
# else: | |
# line = match.group('indent') + match.group('modifiers') + match.group('name') + '(' + match.group('end') | |
# buf[idx+1] = buf[idx+1].replace('this(p_%s_1_, p_%s_2_, ' % (match.group('id'), match.group('id')), 'this(') | |
# Strip out synthetic enum $VALUES array | |
#if line_s == '// $FF: synthetic field': | |
# if _REGEXP['enum_values'].match(buf[idx+1]): | |
# line = buf[idx+1] = '' | |
def abstract_match(match): | |
args = match.group('arguments') | |
args = _REGEXP['params_var'].sub(lambda m: '%s p_%s_%s_%s' % (m.group('type'), match.group('number'), m.group('id'), m.group('end') if not m.group('end') is None else ''), args) | |
return match.group(0).replace(match.group('arguments'), args) | |
# Cleanup the argument names on abstract methods | |
line = _REGEXP['abstract'].sub(abstract_match, line) | |
#def find_params(buf, index, indent, REG): | |
# for x in range(index, len(buf)): | |
# if not buf[x].endswith('{'): | |
# continue | |
# match = REG.match(buf[x]) | |
# if match: | |
# return [match.group('return'), match.group('type')] | |
# if buf[x].startswith(indent): | |
# return None | |
# return None | |
# | |
#def find_param(buf, index, indent, REG): | |
# for x in range(index, len(buf)): | |
# if not buf[x].endswith('{'): | |
# continue | |
# match = REG.match(buf[x]) | |
# if match: | |
# return match.group('type') | |
# if buf[x].startswith(indent): | |
# return None | |
# return None | |
# | |
#def fix_anon_one(buf, idx, line, cls, REG): | |
# if line.endswith(cls + '() {'): | |
# param = find_param(buf, idx + 1, ''.ljust(len(line) - len(line_s)) + '}', REG) | |
# if not param is None: | |
# return '%s%s<%s>() {' % (line[:-4 - len(cls)], cls, param) | |
# return line | |
# | |
#def fix_anon_two(buf, idx, line, cls, REG): | |
# if line.endswith(cls + '() {'): | |
# params = find_params(buf, idx + 1, ''.ljust(len(line) - len(line_s)) + '}', REG) | |
# if not params is None: | |
# return '%s%s<%s, %s>() {' % (line[:-4 - len(cls)], cls, params[1], params[0]) | |
# return line | |
# | |
# Fixup anonymous Function, Predicate, and Comparator classes | |
#if line.endswith('() {'): | |
# line = fix_anon_two(buf, idx, line, 'new Function', _REGEXP['apply_def']) | |
# line = fix_anon_two(buf, idx, line, 'new CacheLoader', _REGEXP['load_def']) | |
# line = fix_anon_one(buf, idx, line, 'new Predicate', _REGEXP['predicate_def']) | |
# line = fix_anon_one(buf, idx, line, 'new Comparator', _REGEXP['compare_def']) | |
# line = fix_anon_one(buf, idx, line, 'new TypeAdapter', _REGEXP['write_def']) | |
# line = fix_anon_one(buf, idx, line, 'new SimpleChannelInboundHandler', _REGEXP['channelRead0_def']) | |
# line = fix_anon_one(buf, idx, line, 'new GenericFutureListener', _REGEXP['operationComplete_def']) | |
# line = fix_anon_one(buf, idx, line, 'new FutureCallback', _REGEXP['onSuccess_def']) | |
# line = fix_anon_one(buf, idx, line, 'new CacheLoader', _REGEXP['load_def']) | |
# Trim trailing whitespace | |
buf[idx] = line.rstrip() | |
buf = '\n'.join(buf) | |
# Condense any consecutive empty lines | |
buf = _REGEXP['newlines'].sub(r'\n', buf) | |
return buf | |
def main(): | |
usage = 'usage: %prog [options] src [dest]' | |
version = '%prog 7.0' | |
parser = OptionParser(version=version, usage=usage) | |
options, args = parser.parse_args() | |
if len(args) == 1: | |
fffix(args[0]) | |
elif len(args) == 2: | |
fffix_zip_dir(args[0], args[1]) | |
else: | |
print >> sys.stderr, 'src_dir required' | |
sys.exit(1) | |
def reallyrmtree(path): | |
if not sys.platform.startswith('win'): | |
if os.path.exists(path): | |
shutil.rmtree(path) | |
else: | |
i = 0 | |
try: | |
while os.stat(path) and i < 20: | |
shutil.rmtree(path, onerror=rmtree_onerror) | |
i += 1 | |
except OSError: | |
pass | |
# raise OSError if the path still exists even after trying really hard | |
try: | |
os.stat(path) | |
except OSError: | |
pass | |
else: | |
raise OSError(errno.EPERM, "Failed to remove: '" + path + "'", path) | |
def rmtree_onerror(func, path, _): | |
if not os.access(path, os.W_OK): | |
os.chmod(path, stat.S_IWUSR) | |
time.sleep(0.5) | |
try: | |
func(path) | |
except OSError: | |
pass | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment