Last active
May 17, 2017 00:18
-
-
Save Cimbali/2aea56e01959cb4bf801 to your computer and use it in GitHub Desktop.
Fix assembler source for PEBS usage (in python, cf. http://stackoverflow.com/a/35694557/1387346)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" Fix assembler source for PEBS usage | |
By default, prints fixed file to stdout. | |
command line options: | |
"-o" -- output file (instead of stdout) | |
"-p" -- output patch instead of fixed file | |
"-i" -- in-place, overwrite input file(s) | |
"-q" -- suppress warnings | |
"-v" -- more verbose | |
command line arguments: list of .s files to process | |
suggested usage: | |
change build to produce .s files | |
FROM: | |
cc [options] -c foo.c | |
TO: | |
cc [options] -S foo.c | |
pebsfixup -i foo.s | |
cc -c foo.s | |
suggested compiler options: | |
[probably only really needed if push/pop required.] | |
(1) use either of | |
-O2 -fno-optimize-sibling-calls | |
-O1 | |
(2) use -mno-omit-leaf-frame-pointer | |
(3) use -mno-red-zone [probably not required in any case] | |
NOTES: | |
(1) red zones are only really useful for leaf functions (i.e. if fncA calls | |
fncB, fncA's red zone would be clobbered) | |
(2) pushing onto the stack isn't a problem if there is a formal stack frame | |
(3) the push is okay if the function has no more than six arguments (i.e. | |
does _not_ use positive offsets from %rsp to access them) | |
Credits to and original Perl implementation from Craig Estey | |
see http://stackoverflow.com/a/35694557/1387346 | |
""" | |
import getopt | |
import time | |
import sys | |
import os | |
import re | |
# output files, global, will be initialized in main | |
cmt = False # print only verbose | |
msg = True # print if non-quiet | |
err = True # always print | |
# Usable temp registers for our fix | |
reg_type = { | |
'11':'T', '10':'T', # temporary registers. | |
'9':'A6', '8':'A5', 'c':'A4', 'd':'A3', 'si':'A2', 'di':'A1', # arguments | |
'15':'P', '14':'P', '13':'P', '12':'P' # callee preserved | |
} | |
# In order of preference. Should we use %r10? -- see notes in ABI at bottom | |
reg_pref = ["11", "9", "8", "c", "d", "si", "di", "15", "14", "13", "12"] | |
# Regular expressions | |
# symbol declatations | |
symbol_type=re.compile(r"\s+\.type\s+([^,]+),\s*(\S+)") | |
# label (function entry point) | |
label = re.compile(r"([a-zA-Z_][a-zA-Z0-9_\.]*):") | |
# match register, return a unique identifier: a,b,c,d,di,si or numeric | |
rgx = re.compile(r"%[re]?([a-d]|[ds]i|[0-9]+|[sb]p)[xlhwdb]?\b") | |
# return (function exit point) | |
ret = re.compile(r"\s+(rep[a-z]*\s+)?ret") | |
# use stack pointer with positive offset (i.e. for argument passing) | |
stk = re.compile(r"\b[0-9]+\(%[re]?spl?\)\s*,") | |
# all types of mov instructions, from memory | |
memmov = re.compile(r"\s+(c?mov[a-z]*)\s+(.*\(.*\).*),(.*)") | |
# this does the reverse mapping from the 'rgx' regular expression | |
reg_name = { | |
"a" : ["rax", "eax", "ax", "al"], | |
"b" : ["rbx", "ebx", "bx", "bl"], | |
"c" : ["rcx", "ecx", "cx", "cl"], | |
"d" : ["rdx", "edx", "dx", "dl"], | |
"si": ["rsi", "esi", "si", "sil"], | |
"di": ["rdi", "edi", "di", "dil"], | |
"bp": ["rbp", "ebp", "bp", "bpl"], | |
"sp": ["rsp", "esp", "sp", "spl"], | |
"8" : ["r8", "r8d", "r8w", "r8b"], | |
"9" : ["r9", "r9d", "r9w", "r9b"], | |
"10": ["r10", "r10d", "r10w", "r10b"], | |
"11": ["r11", "r11d", "r11w", "r11b"], | |
"12": ["r12", "r12d", "r12w", "r12b"], | |
"13": ["r13", "r13d", "r13w", "r13b"], | |
"14": ["r14", "r14d", "r14w", "r14b"], | |
"15": ["r15", "r15d", "r15w", "r15b"] | |
} | |
def regfull(rid, bits = 64): | |
""" Return the full name of a register based on its identifier | |
as the regex 'rgx' extracts it. | |
Optionally specify what bit-width to be used in the register. | |
""" | |
if bits == 64: | |
bid = 0 | |
elif bits == 32: | |
bid = 1 | |
elif bits == 16: | |
bid = 2 | |
elif bits == 8: | |
bid = 3 | |
else: | |
raise ValueError("No such bit-width for registers: {}".format(bits)) | |
try: | |
return "%"+reg_name[rid][bid] | |
except: | |
print("ERROR: no such register id: {}".format(rgx), file=err) | |
sys.exit(6) | |
def regfix(fnc): | |
""" Given a function object, find a temporary register it doesn't use. | |
Just returns None if nothing can't be found. | |
""" | |
for reg in reg_pref: | |
if reg in fnc['usecount']: | |
continue | |
elif reg_type[reg] == 'P' and len(fnc['stkargs']) == 0: | |
continue | |
else: | |
return reg | |
print("WARNING: unable to locate usable fixup register\n", file=msg) | |
def parse_file(f, lines, sym_type, functions): | |
""" Given a file of assembly, extracting symbols into sym_type and infos | |
about functions (used registers, line numbers, return points, fixes), | |
using a lot of regular expressions. | |
""" | |
for lno, line in enumerate([l.rstrip("\n") for l in f]): | |
lines.append(line) | |
if line.lstrip().startswith('.'): | |
# look for ".type blah, @function" | |
type_def = symbol_type.match(line) | |
if type_def: | |
sym_type[type_def.group(1)] = type_def.group(2).lstrip('@') | |
print("TYPE: {} --> {}".format(type_def.group(1), | |
type_def.group(2).lstrip('@')), file=cmt) | |
continue | |
# look for "label:" | |
symbol = label.fullmatch(line) | |
if symbol: | |
sym = symbol.group(1) | |
try: | |
if sym_type[sym] == "function": | |
pass | |
except KeyError: | |
pass | |
functions[sym] = {} | |
usecount = {} | |
retlist = [] | |
fixlist = [] | |
stkargs = [] | |
functions[sym]['lno'] = lno; | |
functions[sym]['usecount'] = usecount; | |
functions[sym]['retlist'] = retlist; | |
functions[sym]['fixlist'] = fixlist; | |
functions[sym]['stkargs'] = stkargs; | |
continue | |
# remember all used registers | |
for reg in rgx.findall(line): | |
try: | |
usecount[reg] += 1 | |
except KeyError: | |
usecount[reg] = 1 | |
# handle returns | |
if ret.match(line): | |
retlist.append(lno) | |
continue | |
# handle positive-offset stack uses | |
if stk.search(line): | |
stkargs.append(lno) | |
continue | |
# find mov with memory on lhs | |
mem = memmov.match(line) | |
if mem: | |
ins = mem.group(1) | |
lhs = mem.group(2) | |
rhs = mem.group(3) | |
try: | |
dest = rgx.search(rhs).group(1) | |
except AttributeError: | |
# No rgx match: a register we're not interested in, e.g. xmm* | |
continue | |
# may use several registers to compute load address | |
if dest in rgx.findall(lhs): | |
# overlap! | |
fixlist.append({'lno':lno, 'ins':ins, 'lhs':lhs, 'rhs':rhs}) | |
def fixfnc(fncname, fnc, lines, diff): | |
""" Fix a given function. | |
""" | |
fixlist = fnc['fixlist'] | |
retlist = fnc['retlist'] | |
print("FUNCTION: {} RET: {} FIX: {}".format(fncname, | |
len(retlist), len(fixlist)), file=cmt) | |
try: # find fix register fixrid, may be None | |
fixrid = regfix(fnc) | |
fixrtype = reg_type[fixrid] | |
fixreg = regfull(fixrid) | |
print(" FIXREG --> {} (TYPE: {})".format(fixreg,fixrtype), file=cmt) | |
except: | |
if fixlist: | |
print("ERROR {} fixes needed but no fix register" | |
.format(len(fixlist)), file=err) | |
sys.exit(4) | |
# check number of return points | |
if len(retlist) == 0: | |
print("WARNING function '{}' has no return points\n" \ | |
"Suggest recompiling with correct options." | |
.format(fncname), file=msg) | |
if len(fixlist) == 0: | |
print("working around because function needs no fixups", | |
file=msg) | |
elif fixrtype != "P": | |
print("working around because fixup reg does not need to be saved", | |
file=msg) | |
else: | |
print("ERROR impossible fixup: {} fixes needed, " \ | |
"register needs to be saved but no return points!", file=err) | |
sys.exit(5) | |
# show stats on register usage in function | |
usecount = fnc['usecount'] | |
for reg in sorted(usecount, key = lambda r: usecount[r]): | |
use = usecount[reg] | |
try: | |
typ = reg_type[reg] | |
except KeyError: | |
typ = "?" | |
print(" {:2} used {} times (TYPE: {})".format(reg, use, typ), file=cmt) | |
# individual fixups | |
for fix in fixlist: | |
add = [ "\tlea\t{}, {}".format(fix['lhs'], fixreg), | |
"\t{}\t({}), {}".format(fix['ins'], fixreg, fix['rhs'])] | |
print("\n".join([ | |
"Replace line {}: '{}' with:".format(fix['lno'], lines[fix['lno']]) | |
] + add), file=cmt) | |
diff[fix['lno']] = ([lines[fix['lno']]], add) | |
lines[fix['lno']] = "\n".join(add) | |
if fixlist and fixrtype == "P": | |
# fix the function prolog | |
add = "\tpush\t{}".format(fixreg) | |
diff[fnc['lno']] = ([], [add]) | |
print("Insert push after line {}: {}".format(fnc['lno'], add), file=cmt) | |
lines[fnc['lno']] += "\n" + add | |
# fix the function return points | |
for retpoint in retlist: | |
add = "\tpop\t{}".format(fixreg) | |
diff[retpoint - 1] = ([lines[retpoint]], [add, lines[retpoint]]) | |
print("Insert pop after line {}: {}".format(retpoint, add), file=cmt) | |
lines[retpoint] = add + "\n" + lines[retpoint] | |
def fixup_file(infile, outfile, as_patch): | |
""" Process a given assembly file. | |
""" | |
print("processing {} ...".format(infile), file=msg) | |
print("-" * 78, file=cmt) | |
print("FILE: {}".format(infile), file=cmt) | |
sym_type = {} | |
functions = {} | |
lines = [] | |
diff = {} | |
with open(infile, "r") as f: | |
parse_file(f, lines, sym_type, functions) | |
fixtot = sum([len(functions[fnc]['fixlist']) for fnc in functions]) | |
if fixtot: | |
print("File {} needs {} fixups".format(infile, fixtot), file=msg) | |
for fnc in sorted(functions, key=lambda fnc: functions[fnc]['lno']): | |
fixfnc(fnc, functions[fnc], lines, diff) | |
if as_patch: | |
print("--- {} {}".format(infile, time.ctime(os.stat(infile).st_mtime)), file=outfile) | |
print("+++ {} {}".format(infile, time.ctime()), file=outfile) | |
if diff: | |
print_patch(outfile, lines, diff) | |
else: | |
print("\n".join(lines), file=outfile) | |
def print_patch(outfile, lines, diff): | |
""" Given context lines (lines) and diff a mapping: | |
first modified lno (on old file) -> ([old lines], [new lines]), | |
print the patch that transforms old to new. | |
""" | |
# number of (possibly merged) context lines | |
ctx = 3 | |
shift = 0 # Cumulative difference of lines between old and new. | |
out = [] # Lines (context and modified) of the current patch block. | |
add = 0 # Difference of lines removed and inserterd in out. | |
past = 0 # 1 past the last item shown, i.e. the first non-shown. | |
modln = sorted(diff) # Sorted list of modified lines. | |
for l, show in [(l, nextl > l + 2 * ctx) for l, nextl | |
in zip(modln, modln[1:] + [sys.maxsize])]: | |
# remove context lines of the previous modification | |
# if they clash with the current modification | |
while past > l: | |
past -= 1 | |
out.pop() | |
# context lines before | |
for ll in range(max(l - ctx, past), l): | |
out.append(' ' + lines[ll]) | |
# one (atomic) set of removed lines, and added lines | |
old, new = diff[l] | |
add += len(new) - len(old) | |
for o in old: | |
if new and o == new[0]: | |
new.pop(0) | |
c = ' ' | |
else: | |
c = '-' | |
out.append(c + o) | |
for n in new: | |
out.append('+' + n) | |
# context lines after, until past (excluded) | |
past = l + add + ctx + 1 | |
for ll in range(past - ctx, past): | |
out.append(' ' + lines[ll]) | |
# print iff next set of lines is not contiguous | |
if show: | |
print("\n".join(["@@ -{},{} +{},{} @@".format( | |
past - len(out) + 1, len(out), | |
past - len(out) + 1 + shift, len(out) + add) | |
] + out), file=outfile) | |
shift += add | |
out, add = ([], 0) | |
def usage(): | |
""" Explain how to use this script. It's all in the docstring. | |
""" | |
print("\nUsage: {} [[-o|--output] file] [-p|--patch] [-i] [-v | -q] " \ | |
"file.s [file.s [...]]\n".format(sys.argv[0])) | |
print(__doc__) | |
def main(): | |
""" Parse arguments, set output streams, and start processing files. | |
""" | |
global cmt, msg, err | |
inplace = False | |
mkpatch = False | |
outfile = "-" | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "ho:piqv", ["help", "output=", "patch"]) | |
except getopt.GetoptError as err: | |
print(err, file=sys.stderr) | |
usage() | |
sys.exit(1) | |
for o, a in opts: | |
if o in ("-h", "--help"): | |
usage() | |
sys.exit() | |
elif o == "-q": | |
msg = False | |
elif o == "-v": | |
cmt = True | |
elif o in ("-p", "--patch"): | |
mkpatch = True | |
elif o in ("-o", "--output"): | |
outfile = a | |
elif o in ("-i"): | |
inplace = True | |
if not args: | |
usage() | |
sys.exit() | |
elif set(ext for basename, ext in map(os.path.splitext, args)) != {".s"}: | |
print("Pass only assembly files (*.s) as arguments", file=sys.stderr) | |
sys.exit(1) | |
elif inplace and (outfile != "-" or mkpatch): | |
print("Can't use -i with -o or -p!", file=sys.stderr) | |
sys.exit(2) | |
elif cmt and not msg: | |
print("Can't be both quiet and verbose!", file=sys.stderr) | |
sys.exit(3) | |
with open(os.devnull, 'w') as silent: | |
# set output streams | |
cmt = sys.stderr if cmt else silent | |
msg = sys.stderr if msg else silent | |
err = sys.stderr if err else silent | |
# different approach based on output strategy | |
if not inplace: | |
with open(outfile, "w") if outfile != "-" else sys.stdout as out: | |
for f in args: | |
fixup_file(f, out, mkpatch) | |
else: | |
for f in args: | |
with open(f + ".tmp", "w") as out: | |
fixup_file(f, out, mkpatch) | |
os.rename(f + ".tmp", f) | |
if __name__ == '__main__': | |
main() | |
# ------------------------------------------------------------------------------ | |
# x86 ABI register usage in function calls: | |
# | |
# %rax temporary register; | |
# with variable arguments passes information about the number of vector | |
# registers used; 1st return register | |
# Preserved: No | |
# | |
# %rbx callee-saved register; optionally used as base pointer | |
# Preserved: Yes | |
# | |
# %rcx used to pass 4th integer argument to functions | |
# Preserved: No | |
# | |
# %rdx used to pass 3rd argument to functions; 2nd return register | |
# Preserved: No | |
# | |
# %rsp stack pointer | |
# Preserved: Yes | |
# | |
# %rbp callee-saved register; optionally used as frame pointer | |
# Preserved: Yes | |
# | |
# %rsi used to pass 2nd argument to functions | |
# Preserved: No | |
# | |
# %rdi used to pass 1st argument to functions | |
# Preserved: No | |
# | |
# %r8 used to pass 5th argument to functions | |
# Preserved: No | |
# | |
# %r9 used to pass 6th argument to functions | |
# Preserved: No | |
# | |
# %r10 temporary register, used for passing a function's static chain pointer | |
# Preserved: No | |
# | |
# %r11 temporary register | |
# Preserved: No | |
# | |
# %r12-r15 callee-saved registers | |
# Preserved: Yes | |
# | |
# %xmm0-%xmm1 used to pass and return floating point arguments | |
# Preserved: No | |
# | |
# %xmm2-%xmm7 used to pass floating point arguments | |
# Preserved: No | |
# | |
# %xmm8-%xmm15 temporary registers | |
# Preserved: No | |
# | |
# %mmx0-%mmx7 temporary registers | |
# Preserved: No | |
# | |
# %st0,%st1 temporary registers; used to return long double arguments | |
# Preserved: No | |
# | |
# %st2-%st7 temporary registers | |
# Preserved: No | |
# | |
# %fs Reserved for system (as thread specific data register) | |
# Preserved: No | |
# | |
# mxcsr SSE2 control and status word partial | |
# x87 SW x87 status word | |
# Preserved: No | |
# | |
# x87 CW x87 control word | |
# Preserved: Yes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment