zhuowei · December 10, 2015 14:28
diff --git a/identvtable.py b/identvtable.py
 """Identify vtables from a C++ ELF application. Tailored for Minecraft-pi.

 Requires the pyelftools library from https://bitbucket.org/eliben/pyelftools

 Copyright (C) 2013 Zhuowei Zhang

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA."""

 from __future__ import print_function

 from sys import argv
 import struct

 from elftools.elf.elffile import *
 from elftools.common.utils import *

 baseClassList = ["Entity", "Tile", "GameMode", "Screen"]

 def parseTypeInfoName(lastString):
 	# Typeinfo are formatted in the format: (ASCII int with length)TypeName
 	#e.g. 3Pig
 	#Thanks, https://gist.github.com/2174255
 	digits = []
 	try:
 		for a in range(0, len(lastString)):
 			b = lastString[a]
 			if not b.isdigit():
 				strLength = len(lastString) - a
 				supposedLength = int("".join(digits))
 				#print(strLength, supposedLength)
 				if strLength == supposedLength and supposedLength > 1:
 					return lastString[a:]
 				else:
 					return None
 			else:
 				digits.append(b)
 	except:
 		return None

 def findTypeInfoNames(mcElfFile):

 	dataSection = mcElfFile.get_section_by_name(".rodata")

 	print("rodata section found:", dataSection)

 	dataStream = dataSection.data();

 	#print(dataStream)

 	lastString = ""

 	stringBeginLoc = -1;

 	typeInfos = {}
 	dataBegin = dataSection['sh_offset']
 	dataAddr = dataSection['sh_addr']
 	print(hex(dataBegin), hex(dataAddr))

 	dataDiff = dataAddr - dataBegin
 	print("data diff is:", dataDiff)

 	for a in range(0, len(dataStream)):
 		b = dataStream[a]
 		c = ord(b)
 		#print(b)
 		if c == 0 and len(lastString) > 0:
 			#print(lastString)
 			typeInfo = parseTypeInfoName(lastString)
 			if typeInfo != None:
 				typeInfos[stringBeginLoc + dataBegin + dataDiff] = typeInfo
 				print(hex((stringBeginLoc + dataBegin + dataDiff)), typeInfo)
 			lastString = ""
 		elif c >= 32 and c <= 126:
 			if len(lastString) == 0:
 				stringBeginLoc = a
 			lastString = lastString + b
 		else:
 			lastString = ""

 	return typeInfos

 def findTypeinfoPointers(mcElfFile, typeInfoNames):
 	dataSection = mcElfFile.get_section_by_name(".rodata")
 	print("Rodata section found:", dataSection)
 	dataStream = dataSection.data()
 	dataBegin = dataSection['sh_offset']
 	dataAddr = dataSection['sh_addr']

 	dataDiff = dataAddr - dataBegin
 	print("data diff is:", dataDiff)

 	typeInfos = {}

 	vtables = []

 	print(len(dataStream))

 	print("<" + str(len(dataStream) / 4) + "I")

 	allIntegers = struct.unpack("<" + str(len(dataStream) / 4) + "Ixx", dataStream)
 #	print(allIntegers)
 	for loc in range(0, len(allIntegers)):
 		i = allIntegers[loc]
 		typeInfo = None
 		#print(hex((loc * 4) + dataBegin), hex(i))

 		try:
 			typeInfo = typeInfoNames[i]
 		except KeyError:
 			continue

 		if typeInfo != None:
 			print("Found one typeinfo!", hex(i), hex((loc * 4) + dataBegin - 4), typeInfo)
 			typeInfos[(loc * 4) + dataBegin - 4 + dataDiff] = typeInfo

 #	print(typeInfos)
 	

 	for loc in range(0, len(allIntegers)):
 		i = allIntegers[loc]

 		typeInfo = None

 		try:
 			typeInfo = typeInfos[i]
 		except KeyError:
 			continue
 		if typeInfo != None and (allIntegers[loc - 2] == 0 and allIntegers[loc - 1] == 0):
 			print("Found one vtable!", hex(i), typeInfo)
 			stillReading = True
 			vtableOffset = loc
 			vtableData = [0]
 			while stillReading:
 				vtableVal = allIntegers[vtableOffset]
 				if (vtableVal == 0):
 					break
 				vtableData.append(vtableVal)
 				vtableOffset = vtableOffset + 1
 			vtables.append({"type": typeInfo, "data": vtableData, "offset": ((loc - 1) * 4)})

 #	print(vtables)
 	for x in vtables:
 		print(hex(x["offset"] + dataBegin), x["type"])
 	return vtables

 def generateNewName(type, i):
 	return type + "::func_" + str(i);

 def findBaseClassName(existingName, newName):
 	existingClassName = existingName[0:existingName.find("::")]
 	if existingClassName in baseClassList:
 		return existingName
 	else:
 		return newName

 def generateMethodDict(vtables, textBegin, textEnd, dataBegin):
 	methodDict = {}

 	vcopy = vtables[:]
 	for vtable in vtables:
 		for i in range(1, len(vtable["data"])):
 			functionPointer = vtable["data"][i]
 			if functionPointer < textBegin or functionPointer > textEnd:
 				print("Found invalid vtable entry: ", hex(functionPointer), vtable["type"], hex(vtable["offset"] + dataBegin))
 				vcopy.remove(vtable)
 				break
 			#print(hex(functionPointer), i, vtable["type"])
 			existingNames = None
 			try:
 				existingNames = methodDict[functionPointer]
 			except KeyError:
 				existingNames = []
 				methodDict[functionPointer] = existingNames
 			newName = generateNewName(vtable["type"], i)
 			#if len(existingNames) > 0:
 			#	newName = findBaseClassName(existingNames, newName)
 			existingNames.append(newName)
 	for x in vcopy:
 		print(hex(x["offset"] + dataBegin), x["type"])
 	return methodDict
 			

 def annotateDecomp(mcElfFile, decompInput, decompOutput, vtables):
 	textSection = mcElfFile.get_section_by_name(".text")
 	textBegin = textSection['sh_addr']
 	textEnd = textBegin + textSection['sh_size']

 	dataSection = mcElfFile.get_section_by_name(".rodata")
 	dataBegin = dataSection['sh_offset']
 	dataAddr = dataSection['sh_addr']
 	dataEnd = dataAddr + dataSection['sh_size']
 	print(hex(textBegin), hex(textEnd))
 	methodDict = generateMethodDict(vtables, textBegin, dataEnd, dataBegin)
 	for line in decompInput:
 		try:
 			addr = int(line[0:line.find(":\t")].lstrip(), 16)
 			methodNames = methodDict[addr]
 			#print(hex(addr))
 			print("\n{0:8x} <{1}>".format(addr, ", ".join(methodNames)), file=decompOutput)
 		except KeyError:
 			pass
 		except ValueError:
 			pass
 		decompOutput.write(line)

 #print(methodDict)


 vtables = None

 if len(argv) != 4:
 	print("Usage: python identvtable.py minecraft-pi originaldecomp.txt annotateddecomp.txt")
 	exit()

 with open(argv[1], "rb") as mcExecutable:
 	mcElfFile = ELFFile(mcExecutable)
 	typeInfos = findTypeInfoNames(mcElfFile)
 	#print(typeInfos)
 	vtables = findTypeinfoPointers(mcElfFile, typeInfos)

 	with open(argv[2], "r") as decompInput:
 		with open(argv[3], "w") as decompOutput:
 			annotateDecomp(mcElfFile, decompInput, decompOutput, vtables)
	"""Identify vtables from a C++ ELF application. Tailored for Minecraft-pi.

	Requires the pyelftools library from https://bitbucket.org/eliben/pyelftools

	Copyright (C) 2013 Zhuowei Zhang

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA."""

	from __future__ import print_function

	from sys import argv
	import struct

	from elftools.elf.elffile import *
	from elftools.common.utils import *

	baseClassList = ["Entity", "Tile", "GameMode", "Screen"]

	def parseTypeInfoName(lastString):
	# Typeinfo are formatted in the format: (ASCII int with length)TypeName
	#e.g. 3Pig
	#Thanks, https://gist.github.com/2174255
	digits = []
	try:
	for a in range(0, len(lastString)):
	b = lastString[a]
	if not b.isdigit():
	strLength = len(lastString) - a
	supposedLength = int("".join(digits))
	#print(strLength, supposedLength)
	if strLength == supposedLength and supposedLength > 1:
	return lastString[a:]
	else:
	return None
	else:
	digits.append(b)
	except:
	return None

	def findTypeInfoNames(mcElfFile):

	dataSection = mcElfFile.get_section_by_name(".rodata")

	print("rodata section found:", dataSection)

	dataStream = dataSection.data();

	#print(dataStream)

	lastString = ""

	stringBeginLoc = -1;

	typeInfos = {}
	dataBegin = dataSection['sh_offset']
	dataAddr = dataSection['sh_addr']
	print(hex(dataBegin), hex(dataAddr))

	dataDiff = dataAddr - dataBegin
	print("data diff is:", dataDiff)

	for a in range(0, len(dataStream)):
	b = dataStream[a]
	c = ord(b)
	#print(b)
	if c == 0 and len(lastString) > 0:
	#print(lastString)
	typeInfo = parseTypeInfoName(lastString)
	if typeInfo != None:
	typeInfos[stringBeginLoc + dataBegin + dataDiff] = typeInfo
	print(hex((stringBeginLoc + dataBegin + dataDiff)), typeInfo)
	lastString = ""
	elif c >= 32 and c <= 126:
	if len(lastString) == 0:
	stringBeginLoc = a
	lastString = lastString + b
	else:
	lastString = ""

	return typeInfos

	def findTypeinfoPointers(mcElfFile, typeInfoNames):
	dataSection = mcElfFile.get_section_by_name(".rodata")
	print("Rodata section found:", dataSection)
	dataStream = dataSection.data()
	dataBegin = dataSection['sh_offset']
	dataAddr = dataSection['sh_addr']

	dataDiff = dataAddr - dataBegin
	print("data diff is:", dataDiff)

	typeInfos = {}

	vtables = []

	print(len(dataStream))

	print("<" + str(len(dataStream) / 4) + "I")

	allIntegers = struct.unpack("<" + str(len(dataStream) / 4) + "Ixx", dataStream)
	# print(allIntegers)
	for loc in range(0, len(allIntegers)):
	i = allIntegers[loc]
	typeInfo = None
	#print(hex((loc * 4) + dataBegin), hex(i))

	try:
	typeInfo = typeInfoNames[i]
	except KeyError:
	continue

	if typeInfo != None:
	print("Found one typeinfo!", hex(i), hex((loc * 4) + dataBegin - 4), typeInfo)
	typeInfos[(loc * 4) + dataBegin - 4 + dataDiff] = typeInfo

	# print(typeInfos)


	for loc in range(0, len(allIntegers)):
	i = allIntegers[loc]

	typeInfo = None

	try:
	typeInfo = typeInfos[i]
	except KeyError:
	continue
	if typeInfo != None and (allIntegers[loc - 2] == 0 and allIntegers[loc - 1] == 0):
	print("Found one vtable!", hex(i), typeInfo)
	stillReading = True
	vtableOffset = loc
	vtableData = [0]
	while stillReading:
	vtableVal = allIntegers[vtableOffset]
	if (vtableVal == 0):
	break
	vtableData.append(vtableVal)
	vtableOffset = vtableOffset + 1
	vtables.append({"type": typeInfo, "data": vtableData, "offset": ((loc - 1) * 4)})

	# print(vtables)
	for x in vtables:
	print(hex(x["offset"] + dataBegin), x["type"])
	return vtables

	def generateNewName(type, i):
	return type + "::func_" + str(i);

	def findBaseClassName(existingName, newName):
	existingClassName = existingName[0:existingName.find("::")]
	if existingClassName in baseClassList:
	return existingName
	else:
	return newName

	def generateMethodDict(vtables, textBegin, textEnd, dataBegin):
	methodDict = {}

	vcopy = vtables[:]
	for vtable in vtables:
	for i in range(1, len(vtable["data"])):
	functionPointer = vtable["data"][i]
	if functionPointer < textBegin or functionPointer > textEnd:
	print("Found invalid vtable entry: ", hex(functionPointer), vtable["type"], hex(vtable["offset"] + dataBegin))
	vcopy.remove(vtable)
	break
	#print(hex(functionPointer), i, vtable["type"])
	existingNames = None
	try:
	existingNames = methodDict[functionPointer]
	except KeyError:
	existingNames = []
	methodDict[functionPointer] = existingNames
	newName = generateNewName(vtable["type"], i)
	#if len(existingNames) > 0:
	# newName = findBaseClassName(existingNames, newName)
	existingNames.append(newName)
	for x in vcopy:
	print(hex(x["offset"] + dataBegin), x["type"])
	return methodDict


	def annotateDecomp(mcElfFile, decompInput, decompOutput, vtables):
	textSection = mcElfFile.get_section_by_name(".text")
	textBegin = textSection['sh_addr']
	textEnd = textBegin + textSection['sh_size']

	dataSection = mcElfFile.get_section_by_name(".rodata")
	dataBegin = dataSection['sh_offset']
	dataAddr = dataSection['sh_addr']
	dataEnd = dataAddr + dataSection['sh_size']
	print(hex(textBegin), hex(textEnd))
	methodDict = generateMethodDict(vtables, textBegin, dataEnd, dataBegin)
	for line in decompInput:
	try:
	addr = int(line[0:line.find(":\t")].lstrip(), 16)
	methodNames = methodDict[addr]
	#print(hex(addr))
	print("\n{0:8x} <{1}>".format(addr, ", ".join(methodNames)), file=decompOutput)
	except KeyError:
	pass
	except ValueError:
	pass
	decompOutput.write(line)

	#print(methodDict)


	vtables = None

	if len(argv) != 4:
	print("Usage: python identvtable.py minecraft-pi originaldecomp.txt annotateddecomp.txt")
	exit()

	with open(argv[1], "rb") as mcExecutable:
	mcElfFile = ELFFile(mcExecutable)
	typeInfos = findTypeInfoNames(mcElfFile)
	#print(typeInfos)
	vtables = findTypeinfoPointers(mcElfFile, typeInfos)

	with open(argv[2], "r") as decompInput:
	with open(argv[3], "w") as decompOutput:
	annotateDecomp(mcElfFile, decompInput, decompOutput, vtables)