Skip to content

Instantly share code, notes, and snippets.

@LexManos
Last active August 29, 2015 14:26
Show Gist options
  • Save LexManos/04da409492922d845816 to your computer and use it in GitHub Desktop.
Save LexManos/04da409492922d845816 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 19 16:29:03 2012
Rewritten on Tue July 28 13:09:00 2015
@author: Fesh0r, LexManos
@version: v7.0
"""
import sys
import os
import fnmatch
import shutil
import re
import zipfile
import time
from contextlib import closing
from optparse import OptionParser
from pprint import pprint
"""
This processes a FernFlower output file and fixes some of the common decompiler mistakes.
Making the output code cleaner and less errornious.
This takes advantage of the reconstituted local variables and inner class attributes that are present
in MC release 1.8.2 and above.
Things that are cleaned:
Consecutive empty lines are consensed:
Line 1
Line 2
------------------------------------
Line 1
Line 2
------------------------------------
Trailing whitespace is removed:
' HELLO '
' HELLO'
Decompile differences between machines related to double and floats, by removing trailing zeros:
0.0010D => 0.001D
Unnessasary calls to super with zero arguments, this is implied by the compiler.
'super();' => ''
Parameter names in abstract methods, seince abstract methods have no LVT attribute, FF does not name them correctly.
' <T extends Object & Comparable<T>, V extends T> IBlockState func_177226_a(IProperty<T> var1, V var2);'
' <T extends Object & Comparable<T>, V extends T> IBlockState func_177226_a(IProperty<T> p_177226_1_, V p_177226_1_);'
Enum Members, Enums are majorly syntax sugar, FernFlower does a good job at decompiling most of it.
However it still leaves the first two paramters in code. So we fix that:
'LOGIN("LOGIN", 0, 1)' => 'LOGIN(1)'
If a Enum's value is an anonymous inner class, the compiler adds a 'null' parameter to the initalizer. Unsure why but we need to strip this out.
'STONEBRICK("STONEBRICK", 2, 2, "stone_brick", "brick", (BlockSilverfish.NamelessClass1508106186)null) {'
'STONEBRICK(2, "stone_brick", "brick") {'
It also leaves those two parameters in the constructor arguments:
'EnumSomething(String p_i123_1_, int p_i123_2_, int p_i123_3_)'
'EnumSomething(int p_i123_3_)'
Synthetic methods, To support generics Java creates synthetic methods that bounce to concrete methods.
We scan for these methods that do nothing more then bounce with potential typcasting. And remove them
if the target method has the same name. This heavily relies on the mapping data having the correct mappings
'// \$FF: synthetic method'
'public Object call() {
' return (Object)this.call();'
'}'
Fernflower does not properly add generic parameters to anonymous inner class declarations.
I can't think of a good way to fix this generically, so we fix it for the classes
used in Minecraft, Function, Predicate, and Comparator
'new Predicate() {' => 'new Predicate<String, ItemStack>() {'
"""
_JAVA_IDENTIFIER = r'[a-zA-Z_$][\w_$\.]*'
_MODIFIERS = r'public|protected|private|static|abstract|final|native|synchronized|transient|volatile|strictfp'
_MODIFIERS_INIT = r'public|protected|private'
_PARAMETERS_VAR = r'(?:(?P<type>(?:[^ ,])+(?:<.*>)?(?: \.\.\.)?) var(?P<id>\d+)(?P<end>,? )?)'
_PARAMETERS = r'(?:(?P<type>(?:[^ ,])+(?:<.*>)?(?: \.\.\.)?) (?P<name>' + _JAVA_IDENTIFIER + r')(?P<end>,? )?)'
_REGEXP = {
# Typecast marker
'typecast': re.compile(r'\([\w\.]+\)'),
# Remove repeated blank lines
'newlines': re.compile(r'^\n{2,}', re.MULTILINE),
# Normalize line ending to unix style
'normlines': re.compile(r'\r?\n', re.MULTILINE),
# Remove trailing whitespace
'trailing': re.compile(r'[ \t]+$'),
# strip trailing 0 from doubles and floats to fix decompile differences on OSX
# 0.0010D => 0.001D
#'trailingzero': re.compile(r'(?P<value>[0-9]+\.[0-9]*[1-9])0+(?P<type>[DdFfEe])'),
# Remove unnessasary calls to super()
#'empty_super': re.compile(r'^ +super\(\);\n'),
# Cleanup the argument names on abstract methods
'abstract': re.compile(r' (?P<method>func_(?P<number>\d+)_[a-zA-Z_]+)\((?P<arguments>' + _PARAMETERS_VAR + r'+)\)(?: throws (?:[\w$.]+,? ?)+)?;$'),
# Single parts of parameter lists
'params_var': re.compile(_PARAMETERS_VAR),
# Cleanup enum syntax sugar not being removed properly
#'enum_member': re.compile(r'^(?P<indent> +)(?P<name>' + _JAVA_IDENTIFIER + r')\("(?P=name)", \d+(?P<sep>[,\)] *)(?P<end>.+)'),
#
# Enum declarations, used to find constructors
#'enum_class': re.compile(r' enum (?P<name>' + _JAVA_IDENTIFIER + r') '),
#
# Enum constructor with sugar arguments
#'enum_init': re.compile(r'^(?P<indent> +)(?P<modifiers>(?:(?:public|protected|private) )*)(?P<name>' + _JAVA_IDENTIFIER + r')\(String p_(?P<id>i\d+)_1_, int p_i\d+_2_(?:, )*(?P<end>.+)'),
#
# Empty enum ending
#'enum_empty': re.compile(r'\)\s*(?:throws (?:[\w$.]+,? ?)+)?\s*\{\s*\}\s*$'),
#
# Enum anon classes add a random 'null' argument at the end.. No clue where this comes from
#'enum_anon': re.compile(r'(?:, )*(?:\([\w\.]+\))*null\) \{'),
#
# Enum $VALUES field
#'enum_values': re.compile(r'^\s*private static final (?P<name>' + _JAVA_IDENTIFIER + r')\[\] \$VALUES = new (?P=name)\[\]\{.*?\};'),
#
# Fernflower namecless classes scattered all over the place no clue why....
#'nameless': re.compile(r'(?:, )*\([\w\.]+(NamelessClass\d+|SwitchHelper)\)null\)'),
# Synthetic markers
'syn_marker': re.compile(r'^\s*// \$FF: (synthetic|bridge) method$'),
# Method definition
'method_def': re.compile(r'^\s*(?P<modifiers>(?:(?:' + _MODIFIERS + r') )*)(?P<return>.+?) (?P<method>.+?)\((?P<arguments>' + _PARAMETERS + r'*)\)\s*(?:throws (?:[\w$.]+,? ?)+)?\s*\{'),
# Method call
'syn_call': re.compile(r'^\s*(?P<return>return )?(this|super)\.(?P<target>.+)\((?P<arguments>(?:(?:(?:\([\w\.]+\))?[a-zA-Z_$][\w_$]*)(?:, )*)*)\);'),
# Function generic method
#'apply_def': re.compile(r'^\s*public (?P<return>.+?) apply\((?P<type>[^ ,]+(?:<.*>)?) p_apply_1_\)'),
#
# Predicate generic method`
#'predicate_def': re.compile(r'^\s*public boolean apply\((?P<type>[^ ,]+(?:<.*>)?) p_apply_1_\)'),
#
# Comparator generic method
#'compare_def': re.compile(r'^\s*public int compare\((?P<type>[^ ,]+(?:<.*>)?) p_compare_1_, '),
#
# TypeAdapter generic method
#'write_def': re.compile(r'^\s*public void write\(JsonWriter p_write_1_, (?P<type>[^ ,]+(?:<.*>)?) p_write_2_\)'),
#
# SimpleChannelInboundHandler generic method
#'channelRead0_def': re.compile(r'^\s*(public|protected) void channelRead0\(ChannelHandlerContext p_channelRead0_1_, (?P<type>[^ ,]+(?:<.*>)?) p_channelRead0_2_\)'),
#
# GenericFutureListener generic method
#'operationComplete_def': re.compile(r'^\s*public void operationComplete\((?P<type>[^ ,]+(?:<.*>)?) p_operationComplete_1_\)'),
#
# FutureCallback generic method
#'onSuccess_def': re.compile(r'^\s*public void onSuccess\((?P<type>[^ ,]+(?:<.*>)?) p_onSuccess_1_\)'),
#
# CacheLoader generic method
#'load_def': re.compile(r'^\s*public (?P<return>.+?) load\((?P<type>[^ ,]+(?:<.*>)?) p_load_1_\)'),
}
class Error(Exception):
pass
class ParseError(Error):
pass
def fffix(srcdir):
for path, _, filelist in os.walk(srcdir, followlinks=True):
for cur_file in fnmatch.filter(filelist, '*.java'):
src_file = os.path.normpath(os.path.join(path, cur_file))
_process_file(src_file)
def fffix_zip_dir(src_file, dest_dir):
reallyrmtree(dest_dir)
os.makedirs(dest_dir)
with closing(zipfile.ZipFile(open(src_file, 'rb'))) as zip:
for info in zip.filelist:
data = zip.read(info.filename)
if info.filename.endswith('.java'):
data = _process_data(data, os.path.splitext(os.path.basename(info.filename))[0])
dest_file = os.path.join(dest_dir, info.filename)
if not os.path.exists(os.path.dirname(dest_file)):
os.makedirs(os.path.dirname(dest_file))
with open(dest_file, 'wb') as f:
f.write(data)
def _process_file(src_file):
if not os.path.splitext(src_file)[1] == '.java':
return
class_name = os.path.splitext(os.path.basename(src_file))[0]
tmp_file = src_file + '.tmp'
with open(src_file, 'r') as fh:
orig = fh.read()
buf = _process_data(orig, class_name)
if not buf == orig:
with open(tmp_file, 'w') as fh:
fh.write(buf)
shutil.move(tmp_file, src_file)
def _process_data(data, class_name):
buf = data
buf = _REGEXP['normlines'].sub(r'\n', buf)
buf = buf.split('\n')
#enums = []
for idx, line in enumerate(buf):
line_s = line.strip();
# Gather Enum names for use in constructors
#for match in _REGEXP['enum_class'].finditer(line):
# enums.append(match.group('name'))
match = None
# Fix Compile differences related to doubles and floats
#line = _REGEXP['trailingzero'].sub(r'\g<value>\g<type>', line)
# Remove unnessasary super calls
if line_s == 'super();':
line = ''
# Remove casts to nameless classes, TODO: Research why these exist in the first place...
#line = _REGEXP['nameless'].sub(r')', line)
if line_s == '// $FF: synthetic method' or line_s == '// $FF: bridge method':
i = idx + 1
if buf[i].strip() == '// $FF: synthetic method' or buf[i].strip() == '// $FF: bridge method':
i += 1
method = _REGEXP['method_def'].match(buf[i])
body = _REGEXP['syn_call'].match(buf[i+1])
end = buf[i+2].strip() == '}'
if method and body and end:
if method.group('method') == body.group('target'):
args1 = '' if method.group('arguments') == '' else ', '.join([v.split(' ')[1] for v in method.group('arguments').split(', ')])
args2 = '' if body.group('arguments') == None else _REGEXP['typecast'].sub('', body.group('arguments'))
if args1 == args2:
line = buf[i-1] = buf[i] = buf[i+1] = buf[i+2] = ''
else:
print 'MISMATCH ARGS %s' % buf[i]
print ' %s' % args1
print 'MISMATCH ARGS %s' % buf[i+1]
print ' %s' % args2
else:
print 'MISMATCH TARGET %s %s' % (method.group('method'), body.group('target'))
#print ' MATCH ' + buf[i]
#print ' ' + buf[i+1]
#pprint(body.groupdict())
else:
if buf[i].endswith(') {') and buf[i+1].lstrip().startswith('this(') and end:
line = buf[i-1] = buf[i] = buf[i+1] = buf[i+2] = ''
#else:
# print 'MISMATCH ' + buf[i]
# print 'MISMATCH ' + buf[i+1]
# print 'MISMATCH ' + buf[i+2]
#match = _REGEXP['enum_member'].search(line)
#if not match is None:
# end = _REGEXP['enum_anon'].sub(r') {', match.group('end'))
# line = match.group('indent') + match.group('name')
# if not match.group('sep') == ')':
# line = line + '(' + end
# else:
# line = line + end
#match = _REGEXP['enum_init'].search(line)
#if not match is None and match.group('name') in enums:
# if _REGEXP['enum_empty'].search(match.group('end')):
# line = ''
# else:
# line = match.group('indent') + match.group('modifiers') + match.group('name') + '(' + match.group('end')
# buf[idx+1] = buf[idx+1].replace('this(p_%s_1_, p_%s_2_, ' % (match.group('id'), match.group('id')), 'this(')
# Strip out synthetic enum $VALUES array
#if line_s == '// $FF: synthetic field':
# if _REGEXP['enum_values'].match(buf[idx+1]):
# line = buf[idx+1] = ''
def abstract_match(match):
args = match.group('arguments')
args = _REGEXP['params_var'].sub(lambda m: '%s p_%s_%s_%s' % (m.group('type'), match.group('number'), m.group('id'), m.group('end') if not m.group('end') is None else ''), args)
return match.group(0).replace(match.group('arguments'), args)
# Cleanup the argument names on abstract methods
line = _REGEXP['abstract'].sub(abstract_match, line)
#def find_params(buf, index, indent, REG):
# for x in range(index, len(buf)):
# if not buf[x].endswith('{'):
# continue
# match = REG.match(buf[x])
# if match:
# return [match.group('return'), match.group('type')]
# if buf[x].startswith(indent):
# return None
# return None
#
#def find_param(buf, index, indent, REG):
# for x in range(index, len(buf)):
# if not buf[x].endswith('{'):
# continue
# match = REG.match(buf[x])
# if match:
# return match.group('type')
# if buf[x].startswith(indent):
# return None
# return None
#
#def fix_anon_one(buf, idx, line, cls, REG):
# if line.endswith(cls + '() {'):
# param = find_param(buf, idx + 1, ''.ljust(len(line) - len(line_s)) + '}', REG)
# if not param is None:
# return '%s%s<%s>() {' % (line[:-4 - len(cls)], cls, param)
# return line
#
#def fix_anon_two(buf, idx, line, cls, REG):
# if line.endswith(cls + '() {'):
# params = find_params(buf, idx + 1, ''.ljust(len(line) - len(line_s)) + '}', REG)
# if not params is None:
# return '%s%s<%s, %s>() {' % (line[:-4 - len(cls)], cls, params[1], params[0])
# return line
#
# Fixup anonymous Function, Predicate, and Comparator classes
#if line.endswith('() {'):
# line = fix_anon_two(buf, idx, line, 'new Function', _REGEXP['apply_def'])
# line = fix_anon_two(buf, idx, line, 'new CacheLoader', _REGEXP['load_def'])
# line = fix_anon_one(buf, idx, line, 'new Predicate', _REGEXP['predicate_def'])
# line = fix_anon_one(buf, idx, line, 'new Comparator', _REGEXP['compare_def'])
# line = fix_anon_one(buf, idx, line, 'new TypeAdapter', _REGEXP['write_def'])
# line = fix_anon_one(buf, idx, line, 'new SimpleChannelInboundHandler', _REGEXP['channelRead0_def'])
# line = fix_anon_one(buf, idx, line, 'new GenericFutureListener', _REGEXP['operationComplete_def'])
# line = fix_anon_one(buf, idx, line, 'new FutureCallback', _REGEXP['onSuccess_def'])
# line = fix_anon_one(buf, idx, line, 'new CacheLoader', _REGEXP['load_def'])
# Trim trailing whitespace
buf[idx] = line.rstrip()
buf = '\n'.join(buf)
# Condense any consecutive empty lines
buf = _REGEXP['newlines'].sub(r'\n', buf)
return buf
def main():
usage = 'usage: %prog [options] src [dest]'
version = '%prog 7.0'
parser = OptionParser(version=version, usage=usage)
options, args = parser.parse_args()
if len(args) == 1:
fffix(args[0])
elif len(args) == 2:
fffix_zip_dir(args[0], args[1])
else:
print >> sys.stderr, 'src_dir required'
sys.exit(1)
def reallyrmtree(path):
if not sys.platform.startswith('win'):
if os.path.exists(path):
shutil.rmtree(path)
else:
i = 0
try:
while os.stat(path) and i < 20:
shutil.rmtree(path, onerror=rmtree_onerror)
i += 1
except OSError:
pass
# raise OSError if the path still exists even after trying really hard
try:
os.stat(path)
except OSError:
pass
else:
raise OSError(errno.EPERM, "Failed to remove: '" + path + "'", path)
def rmtree_onerror(func, path, _):
if not os.access(path, os.W_OK):
os.chmod(path, stat.S_IWUSR)
time.sleep(0.5)
try:
func(path)
except OSError:
pass
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment