Cimbali · May 17, 2017 00:18
diff --git a/pebsfixup.py b/pebsfixup.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """ Fix assembler source for PEBS usage
    By default, prints fixed file to stdout.

    command line options:
      "-o" -- output file (instead of stdout)
      "-p" -- output patch instead of fixed file
      "-i" -- in-place, overwrite input file(s)

      "-q" -- suppress warnings
      "-v" -- more verbose

    command line arguments: list of .s files to process


    suggested usage:
      change build to produce .s files
      FROM:
        cc [options] -c foo.c
      TO:
        cc [options] -S foo.c
        pebsfixup -i foo.s
        cc -c foo.s

    suggested compiler options:
    [probably only really needed if push/pop required.]
      (1) use either of
          -O2 -fno-optimize-sibling-calls
          -O1
      (2) use -mno-omit-leaf-frame-pointer
      (3) use -mno-red-zone [probably not required in any case]

    NOTES:
      (1) red zones are only really useful for leaf functions (i.e. if fncA calls
          fncB, fncA's red zone would be clobbered)
      (2) pushing onto the stack isn't a problem if there is a formal stack frame
      (3) the push is okay if the function has no more than six arguments (i.e.
          does _not_ use positive offsets from %rsp to access them)

    Credits to and original Perl implementation from Craig Estey
    see http://stackoverflow.com/a/35694557/1387346
 """


 import getopt
 import time
 import sys
 import os
 import re


 # output files, global, will be initialized in main
 cmt = False # print only verbose
 msg = True  # print if non-quiet
 err = True  # always print


 # Usable temp registers for our fix
 reg_type = {
    '11':'T', '10':'T', # temporary registers.
    '9':'A6', '8':'A5', 'c':'A4', 'd':'A3', 'si':'A2', 'di':'A1', # arguments
    '15':'P', '14':'P', '13':'P', '12':'P' # callee preserved
 }

 # In order of preference. Should we use %r10? -- see notes in ABI at bottom
 reg_pref = ["11", "9", "8", "c", "d", "si", "di", "15", "14", "13", "12"]


 # Regular expressions
 # symbol declatations
 symbol_type=re.compile(r"\s+\.type\s+([^,]+),\s*(\S+)")
 # label (function entry point)
 label = re.compile(r"([a-zA-Z_][a-zA-Z0-9_\.]*):")
 # match register, return a unique identifier: a,b,c,d,di,si or numeric
 rgx = re.compile(r"%[re]?([a-d]|[ds]i|[0-9]+|[sb]p)[xlhwdb]?\b")
 # return (function exit point)
 ret = re.compile(r"\s+(rep[a-z]*\s+)?ret")
 # use stack pointer with positive offset (i.e. for argument passing)
 stk = re.compile(r"\b[0-9]+\(%[re]?spl?\)\s*,")
 # all types of mov instructions, from memory
 memmov = re.compile(r"\s+(c?mov[a-z]*)\s+(.*\(.*\).*),(.*)")

 # this does the reverse mapping from the 'rgx' regular expression
 reg_name = {
    "a" : ["rax", "eax",  "ax",   "al"],
    "b" : ["rbx", "ebx",  "bx",   "bl"],
    "c" : ["rcx", "ecx",  "cx",   "cl"],
    "d" : ["rdx", "edx",  "dx",   "dl"],
    "si": ["rsi", "esi",  "si",   "sil"],
    "di": ["rdi", "edi",  "di",   "dil"],
    "bp": ["rbp", "ebp",  "bp",   "bpl"],
    "sp": ["rsp", "esp",  "sp",   "spl"],
    "8" : ["r8",  "r8d",  "r8w",  "r8b"],
    "9" : ["r9",  "r9d",  "r9w",  "r9b"],
    "10": ["r10", "r10d", "r10w", "r10b"],
    "11": ["r11", "r11d", "r11w", "r11b"],
    "12": ["r12", "r12d", "r12w", "r12b"],
    "13": ["r13", "r13d", "r13w", "r13b"],
    "14": ["r14", "r14d", "r14w", "r14b"],
    "15": ["r15", "r15d", "r15w", "r15b"]
 }


 def regfull(rid, bits = 64):
    """ Return the full name of a register based on its identifier
        as the regex 'rgx' extracts it.
        Optionally specify what bit-width to be used in the register.
    """
    if bits == 64:
        bid = 0
    elif bits == 32:
        bid = 1
    elif bits == 16:
        bid = 2
    elif bits == 8:
        bid = 3
    else:
        raise ValueError("No such bit-width for registers: {}".format(bits))

    try:
        return "%"+reg_name[rid][bid]
    except:
        print("ERROR: no such register id: {}".format(rgx), file=err)
        sys.exit(6)


 def regfix(fnc):
    """ Given a function object, find a temporary register it doesn't use.
        Just returns None if nothing can't be found.
    """
    for reg in reg_pref:
        if reg in fnc['usecount']:
            continue
        elif reg_type[reg] == 'P' and len(fnc['stkargs']) == 0:
            continue
        else:
            return reg

    print("WARNING: unable to locate usable fixup register\n", file=msg)


 def parse_file(f, lines, sym_type, functions):
    """ Given a file of assembly, extracting symbols into sym_type and infos
        about functions (used registers, line numbers, return points, fixes),
        using a lot of regular expressions.
    """
    for lno, line in enumerate([l.rstrip("\n") for l in f]):
        lines.append(line)

        if line.lstrip().startswith('.'):
            # look for ".type blah, @function"
            type_def = symbol_type.match(line)
            if type_def:
                sym_type[type_def.group(1)] = type_def.group(2).lstrip('@')
                print("TYPE: {} --> {}".format(type_def.group(1),
                            type_def.group(2).lstrip('@')), file=cmt)
            continue

        # look for "label:"
        symbol = label.fullmatch(line)
        if symbol:
            sym = symbol.group(1)
            try:
                if sym_type[sym] == "function":
                    pass
            except KeyError:
                pass

            functions[sym] = {}
            usecount = {}
            retlist = []
            fixlist = []
            stkargs = []

            functions[sym]['lno'] = lno;
            functions[sym]['usecount'] = usecount;
            functions[sym]['retlist'] = retlist;
            functions[sym]['fixlist'] = fixlist;
            functions[sym]['stkargs'] = stkargs;
            continue

        # remember all used registers
        for reg in rgx.findall(line):
            try:
                usecount[reg] += 1
            except KeyError:
                usecount[reg] = 1

        # handle returns
        if ret.match(line):
            retlist.append(lno)
            continue

        # handle positive-offset stack uses
        if stk.search(line):
            stkargs.append(lno)
            continue

        # find mov with memory on lhs
        mem = memmov.match(line)
        if mem:
            ins = mem.group(1)
            lhs = mem.group(2)
            rhs = mem.group(3)

            try:
                dest = rgx.search(rhs).group(1)
            except AttributeError:
                # No rgx match: a register we're not interested in, e.g. xmm*
                continue

            # may use several registers to compute load address
            if dest in rgx.findall(lhs):
                # overlap!
                fixlist.append({'lno':lno, 'ins':ins, 'lhs':lhs, 'rhs':rhs})


 def fixfnc(fncname, fnc, lines, diff):
    """ Fix a given function.
    """

    fixlist = fnc['fixlist']
    retlist = fnc['retlist']
    print("FUNCTION: {}  RET: {} FIX: {}".format(fncname,
                len(retlist), len(fixlist)), file=cmt)

    try: # find fix register fixrid, may be None
        fixrid = regfix(fnc)
        fixrtype = reg_type[fixrid]
        fixreg = regfull(fixrid)

        print("  FIXREG --> {} (TYPE: {})".format(fixreg,fixrtype), file=cmt)
    except:
        if fixlist:
            print("ERROR {} fixes needed but no fix register"
                    .format(len(fixlist)), file=err)
            sys.exit(4)

    # check number of return points
    if len(retlist) == 0:
        print("WARNING function '{}' has no return points\n" \
            "Suggest recompiling with correct options."
            .format(fncname), file=msg)

        if len(fixlist) == 0:
            print("working around because function needs no fixups",
                file=msg)

        elif fixrtype != "P":
            print("working around because fixup reg does not need to be saved",
                file=msg)

        else:
            print("ERROR impossible fixup: {} fixes needed, " \
                "register needs to be saved but no return points!", file=err)
            sys.exit(5)

    # show stats on register usage in function
    usecount = fnc['usecount']
    for reg in sorted(usecount, key = lambda r: usecount[r]):
        use = usecount[reg]
        try:
            typ = reg_type[reg]
        except KeyError:
            typ = "?"
        print("  {:2} used {} times (TYPE: {})".format(reg, use, typ), file=cmt)

    # individual fixups
    for fix in fixlist:
        add = [ "\tlea\t{}, {}".format(fix['lhs'], fixreg),
                "\t{}\t({}), {}".format(fix['ins'], fixreg, fix['rhs'])]

        print("\n".join([
            "Replace line {}: '{}' with:".format(fix['lno'], lines[fix['lno']])
        ] + add), file=cmt)
        diff[fix['lno']] = ([lines[fix['lno']]], add)

        lines[fix['lno']] = "\n".join(add)

    if fixlist and fixrtype == "P":
        # fix the function prolog
        add = "\tpush\t{}".format(fixreg)

        diff[fnc['lno']] = ([], [add])
        print("Insert push after line {}: {}".format(fnc['lno'], add), file=cmt)

        lines[fnc['lno']] += "\n" + add

        # fix the function return points
        for retpoint in retlist:
            add = "\tpop\t{}".format(fixreg)

            diff[retpoint - 1] = ([lines[retpoint]], [add, lines[retpoint]])
            print("Insert pop after line {}: {}".format(retpoint, add), file=cmt)

            lines[retpoint] = add + "\n" + lines[retpoint]


 def fixup_file(infile, outfile, as_patch):
    """ Process a given assembly file.
    """
    print("processing {} ...".format(infile), file=msg)

    print("-" * 78, file=cmt)
    print("FILE: {}".format(infile), file=cmt)

    sym_type  = {}
    functions = {}
    lines = []
    diff  = {}

    with open(infile, "r") as f:
        parse_file(f, lines, sym_type, functions)


    fixtot = sum([len(functions[fnc]['fixlist']) for fnc in functions])
    if fixtot:
        print("File {} needs {} fixups".format(infile, fixtot), file=msg)


    for fnc in sorted(functions, key=lambda fnc: functions[fnc]['lno']):
        fixfnc(fnc, functions[fnc], lines, diff)


    if as_patch:
        print("--- {} {}".format(infile, time.ctime(os.stat(infile).st_mtime)), file=outfile)
        print("+++ {} {}".format(infile, time.ctime()), file=outfile)

        if diff:
            print_patch(outfile, lines, diff)
    else:
        print("\n".join(lines), file=outfile)


 def print_patch(outfile, lines, diff):
    """ Given context lines (lines) and diff a mapping:
        first modified lno (on old file) -> ([old lines], [new lines]),
        print the patch that transforms old to new.
    """
    # number of (possibly merged) context lines
    ctx = 3

    shift = 0  # Cumulative difference of lines between old and new.
    out   = [] # Lines (context and modified) of the current patch block.
    add   = 0  # Difference of lines removed and inserterd in out.
    past  = 0  # 1 past the last item shown, i.e. the first non-shown.
    modln = sorted(diff) # Sorted list of modified lines.

    for l, show in [(l, nextl > l + 2 * ctx) for l, nextl
                                    in zip(modln, modln[1:] + [sys.maxsize])]:
        # remove context lines of the previous modification
        # if they clash with the current modification
        while past > l:
            past -= 1
            out.pop()

        # context lines before
        for ll in range(max(l - ctx, past), l):
            out.append(' ' + lines[ll])

        # one (atomic) set of removed lines, and added lines
        old, new = diff[l]
        add += len(new) - len(old)

        for o in old:
            if new and o == new[0]:
                new.pop(0)
                c = ' '
            else:
                c = '-'
            out.append(c + o)
        for n in new:
            out.append('+' + n)

        # context lines after, until past (excluded)
        past = l + add + ctx + 1
        for ll in range(past - ctx, past):
            out.append(' ' + lines[ll])

        # print iff next set of lines is not contiguous
        if show:
            print("\n".join(["@@ -{},{} +{},{} @@".format(
               past - len(out) + 1,         len(out),
               past - len(out) + 1 + shift, len(out) + add)
            ] + out), file=outfile)

            shift += add
            out, add = ([], 0)


 def usage():
    """ Explain how to use this script. It's all in the docstring.
    """
    print("\nUsage: {} [[-o|--output] file] [-p|--patch] [-i] [-v | -q] " \
            "file.s [file.s [...]]\n".format(sys.argv[0]))
    print(__doc__)


 def main():
    """ Parse arguments, set output streams, and start processing files.
    """
    global cmt, msg, err
    inplace = False
    mkpatch = False
    outfile = "-"

    try:
        opts, args = getopt.getopt(sys.argv[1:], "ho:piqv", ["help", "output=", "patch"])
    except getopt.GetoptError as err:
        print(err, file=sys.stderr)
        usage()
        sys.exit(1)

    for o, a in opts:
        if o in ("-h", "--help"):
            usage()
            sys.exit()

        elif o == "-q":
            msg = False
        elif o == "-v":
            cmt = True

        elif o in ("-p", "--patch"):
            mkpatch = True
        elif o in ("-o", "--output"):
            outfile = a
        elif o in ("-i"):
            inplace = True


    if not args:
        usage()
        sys.exit()

    elif set(ext for basename, ext in map(os.path.splitext, args)) != {".s"}:
        print("Pass only assembly files (*.s) as arguments", file=sys.stderr)
        sys.exit(1)

    elif inplace and (outfile != "-" or mkpatch):
        print("Can't use -i with -o or -p!", file=sys.stderr)
        sys.exit(2)

    elif cmt and not msg:
        print("Can't be both quiet and verbose!", file=sys.stderr)
        sys.exit(3)

    with open(os.devnull, 'w') as silent:
        # set output streams
        cmt = sys.stderr if cmt else silent
        msg = sys.stderr if msg else silent
        err = sys.stderr if err else silent

        # different approach based on output strategy
        if not inplace:
            with open(outfile, "w") if outfile != "-" else sys.stdout as out:
                for f in args:
                    fixup_file(f, out, mkpatch)
        else:
            for f in args:
                with open(f + ".tmp", "w") as out:
                    fixup_file(f, out, mkpatch)
                os.rename(f + ".tmp", f)


 if __name__ == '__main__':
    main()

 # ------------------------------------------------------------------------------
 # x86 ABI register usage in function calls:
 #
 #   %rax temporary register;
 #        with variable arguments passes information about the number of vector
 #        registers used; 1st return register
 #   Preserved: No
 #
 #   %rbx callee-saved register; optionally used as base pointer
 #   Preserved: Yes
 #
 #   %rcx used to pass 4th integer argument to functions
 #   Preserved: No
 #
 #   %rdx used to pass 3rd argument to functions; 2nd return register
 #   Preserved: No
 #
 #   %rsp stack pointer
 #   Preserved: Yes
 #
 #   %rbp callee-saved register; optionally used as frame pointer
 #   Preserved: Yes
 #
 #   %rsi used to pass 2nd argument to functions
 #   Preserved: No
 #
 #   %rdi used to pass 1st argument to functions
 #   Preserved: No
 #
 #   %r8 used to pass 5th argument to functions
 #   Preserved: No
 #
 #   %r9 used to pass 6th argument to functions
 #   Preserved: No
 #
 #   %r10 temporary register, used for passing a function's static chain pointer
 #   Preserved: No
 #
 #   %r11 temporary register
 #   Preserved: No
 #
 #   %r12-r15 callee-saved registers
 #   Preserved: Yes
 #
 #   %xmm0-%xmm1 used to pass and return floating point arguments
 #   Preserved: No
 #
 #   %xmm2-%xmm7 used to pass floating point arguments
 #   Preserved: No
 #
 #   %xmm8-%xmm15 temporary registers
 #   Preserved: No
 #
 #   %mmx0-%mmx7 temporary registers
 #   Preserved: No
 #
 #   %st0,%st1 temporary registers; used to return long double arguments
 #   Preserved: No
 #
 #   %st2-%st7 temporary registers
 #   Preserved: No
 #
 #   %fs Reserved for system (as thread specific data register)
 #   Preserved: No
 #
 #   mxcsr SSE2 control and status word partial
 #   x87 SW x87 status word
 #   Preserved: No
 #
 #   x87 CW x87 control word
 #   Preserved: Yes
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	""" Fix assembler source for PEBS usage
	By default, prints fixed file to stdout.

	command line options:
	"-o" -- output file (instead of stdout)
	"-p" -- output patch instead of fixed file
	"-i" -- in-place, overwrite input file(s)

	"-q" -- suppress warnings
	"-v" -- more verbose

	command line arguments: list of .s files to process


	suggested usage:
	change build to produce .s files
	FROM:
	cc [options] -c foo.c
	TO:
	cc [options] -S foo.c
	pebsfixup -i foo.s
	cc -c foo.s

	suggested compiler options:
	[probably only really needed if push/pop required.]
	(1) use either of
	-O2 -fno-optimize-sibling-calls
	-O1
	(2) use -mno-omit-leaf-frame-pointer
	(3) use -mno-red-zone [probably not required in any case]

	NOTES:
	(1) red zones are only really useful for leaf functions (i.e. if fncA calls
	fncB, fncA's red zone would be clobbered)
	(2) pushing onto the stack isn't a problem if there is a formal stack frame
	(3) the push is okay if the function has no more than six arguments (i.e.
	does _not_ use positive offsets from %rsp to access them)

	Credits to and original Perl implementation from Craig Estey
	see http://stackoverflow.com/a/35694557/1387346
	"""


	import getopt
	import time
	import sys
	import os
	import re


	# output files, global, will be initialized in main
	cmt = False # print only verbose
	msg = True # print if non-quiet
	err = True # always print


	# Usable temp registers for our fix
	reg_type = {
	'11':'T', '10':'T', # temporary registers.
	'9':'A6', '8':'A5', 'c':'A4', 'd':'A3', 'si':'A2', 'di':'A1', # arguments
	'15':'P', '14':'P', '13':'P', '12':'P' # callee preserved
	}

	# In order of preference. Should we use %r10? -- see notes in ABI at bottom
	reg_pref = ["11", "9", "8", "c", "d", "si", "di", "15", "14", "13", "12"]


	# Regular expressions
	# symbol declatations
	symbol_type=re.compile(r"\s+\.type\s+([^,]+),\s*(\S+)")
	# label (function entry point)
	label = re.compile(r"([a-zA-Z_][a-zA-Z0-9_\.]*):")
	# match register, return a unique identifier: a,b,c,d,di,si or numeric
	rgx = re.compile(r"%[re]?([a-d]\|[ds]i\|[0-9]+\|[sb]p)[xlhwdb]?\b")
	# return (function exit point)
	ret = re.compile(r"\s+(rep[a-z]*\s+)?ret")
	# use stack pointer with positive offset (i.e. for argument passing)
	stk = re.compile(r"\b[0-9]+\(%[re]?spl?\)\s*,")
	# all types of mov instructions, from memory
	memmov = re.compile(r"\s+(c?mov[a-z])\s+(.\(.\).),(.*)")

	# this does the reverse mapping from the 'rgx' regular expression
	reg_name = {
	"a" : ["rax", "eax", "ax", "al"],
	"b" : ["rbx", "ebx", "bx", "bl"],
	"c" : ["rcx", "ecx", "cx", "cl"],
	"d" : ["rdx", "edx", "dx", "dl"],
	"si": ["rsi", "esi", "si", "sil"],
	"di": ["rdi", "edi", "di", "dil"],
	"bp": ["rbp", "ebp", "bp", "bpl"],
	"sp": ["rsp", "esp", "sp", "spl"],
	"8" : ["r8", "r8d", "r8w", "r8b"],
	"9" : ["r9", "r9d", "r9w", "r9b"],
	"10": ["r10", "r10d", "r10w", "r10b"],
	"11": ["r11", "r11d", "r11w", "r11b"],
	"12": ["r12", "r12d", "r12w", "r12b"],
	"13": ["r13", "r13d", "r13w", "r13b"],
	"14": ["r14", "r14d", "r14w", "r14b"],
	"15": ["r15", "r15d", "r15w", "r15b"]
	}


	def regfull(rid, bits = 64):
	""" Return the full name of a register based on its identifier
	as the regex 'rgx' extracts it.
	Optionally specify what bit-width to be used in the register.
	"""
	if bits == 64:
	bid = 0
	elif bits == 32:
	bid = 1
	elif bits == 16:
	bid = 2
	elif bits == 8:
	bid = 3
	else:
	raise ValueError("No such bit-width for registers: {}".format(bits))

	try:
	return "%"+reg_name[rid][bid]
	except:
	print("ERROR: no such register id: {}".format(rgx), file=err)
	sys.exit(6)


	def regfix(fnc):
	""" Given a function object, find a temporary register it doesn't use.
	Just returns None if nothing can't be found.
	"""
	for reg in reg_pref:
	if reg in fnc['usecount']:
	continue
	elif reg_type[reg] == 'P' and len(fnc['stkargs']) == 0:
	continue
	else:
	return reg

	print("WARNING: unable to locate usable fixup register\n", file=msg)


	def parse_file(f, lines, sym_type, functions):
	""" Given a file of assembly, extracting symbols into sym_type and infos
	about functions (used registers, line numbers, return points, fixes),
	using a lot of regular expressions.
	"""
	for lno, line in enumerate([l.rstrip("\n") for l in f]):
	lines.append(line)

	if line.lstrip().startswith('.'):
	# look for ".type blah, @function"
	type_def = symbol_type.match(line)
	if type_def:
	sym_type[type_def.group(1)] = type_def.group(2).lstrip('@')
	print("TYPE: {} --> {}".format(type_def.group(1),
	type_def.group(2).lstrip('@')), file=cmt)
	continue

	# look for "label:"
	symbol = label.fullmatch(line)
	if symbol:
	sym = symbol.group(1)
	try:
	if sym_type[sym] == "function":
	pass
	except KeyError:
	pass

	functions[sym] = {}
	usecount = {}
	retlist = []
	fixlist = []
	stkargs = []

	functions[sym]['lno'] = lno;
	functions[sym]['usecount'] = usecount;
	functions[sym]['retlist'] = retlist;
	functions[sym]['fixlist'] = fixlist;
	functions[sym]['stkargs'] = stkargs;
	continue

	# remember all used registers
	for reg in rgx.findall(line):
	try:
	usecount[reg] += 1
	except KeyError:
	usecount[reg] = 1

	# handle returns
	if ret.match(line):
	retlist.append(lno)
	continue

	# handle positive-offset stack uses
	if stk.search(line):
	stkargs.append(lno)
	continue

	# find mov with memory on lhs
	mem = memmov.match(line)
	if mem:
	ins = mem.group(1)
	lhs = mem.group(2)
	rhs = mem.group(3)

	try:
	dest = rgx.search(rhs).group(1)
	except AttributeError:
	# No rgx match: a register we're not interested in, e.g. xmm*
	continue

	# may use several registers to compute load address
	if dest in rgx.findall(lhs):
	# overlap!
	fixlist.append({'lno':lno, 'ins':ins, 'lhs':lhs, 'rhs':rhs})


	def fixfnc(fncname, fnc, lines, diff):
	""" Fix a given function.
	"""

	fixlist = fnc['fixlist']
	retlist = fnc['retlist']
	print("FUNCTION: {} RET: {} FIX: {}".format(fncname,
	len(retlist), len(fixlist)), file=cmt)

	try: # find fix register fixrid, may be None
	fixrid = regfix(fnc)
	fixrtype = reg_type[fixrid]
	fixreg = regfull(fixrid)

	print(" FIXREG --> {} (TYPE: {})".format(fixreg,fixrtype), file=cmt)
	except:
	if fixlist:
	print("ERROR {} fixes needed but no fix register"
	.format(len(fixlist)), file=err)
	sys.exit(4)

	# check number of return points
	if len(retlist) == 0:
	print("WARNING function '{}' has no return points\n" \
	"Suggest recompiling with correct options."
	.format(fncname), file=msg)

	if len(fixlist) == 0:
	print("working around because function needs no fixups",
	file=msg)

	elif fixrtype != "P":
	print("working around because fixup reg does not need to be saved",
	file=msg)

	else:
	print("ERROR impossible fixup: {} fixes needed, " \
	"register needs to be saved but no return points!", file=err)
	sys.exit(5)

	# show stats on register usage in function
	usecount = fnc['usecount']
	for reg in sorted(usecount, key = lambda r: usecount[r]):
	use = usecount[reg]
	try:
	typ = reg_type[reg]
	except KeyError:
	typ = "?"
	print(" {:2} used {} times (TYPE: {})".format(reg, use, typ), file=cmt)

	# individual fixups
	for fix in fixlist:
	add = [ "\tlea\t{}, {}".format(fix['lhs'], fixreg),
	"\t{}\t({}), {}".format(fix['ins'], fixreg, fix['rhs'])]

	print("\n".join([
	"Replace line {}: '{}' with:".format(fix['lno'], lines[fix['lno']])
	] + add), file=cmt)
	diff[fix['lno']] = ([lines[fix['lno']]], add)

	lines[fix['lno']] = "\n".join(add)

	if fixlist and fixrtype == "P":
	# fix the function prolog
	add = "\tpush\t{}".format(fixreg)

	diff[fnc['lno']] = ([], [add])
	print("Insert push after line {}: {}".format(fnc['lno'], add), file=cmt)

	lines[fnc['lno']] += "\n" + add

	# fix the function return points
	for retpoint in retlist:
	add = "\tpop\t{}".format(fixreg)

	diff[retpoint - 1] = ([lines[retpoint]], [add, lines[retpoint]])
	print("Insert pop after line {}: {}".format(retpoint, add), file=cmt)

	lines[retpoint] = add + "\n" + lines[retpoint]


	def fixup_file(infile, outfile, as_patch):
	""" Process a given assembly file.
	"""
	print("processing {} ...".format(infile), file=msg)

	print("-" * 78, file=cmt)
	print("FILE: {}".format(infile), file=cmt)

	sym_type = {}
	functions = {}
	lines = []
	diff = {}

	with open(infile, "r") as f:
	parse_file(f, lines, sym_type, functions)


	fixtot = sum([len(functions[fnc]['fixlist']) for fnc in functions])
	if fixtot:
	print("File {} needs {} fixups".format(infile, fixtot), file=msg)


	for fnc in sorted(functions, key=lambda fnc: functions[fnc]['lno']):
	fixfnc(fnc, functions[fnc], lines, diff)


	if as_patch:
	print("--- {} {}".format(infile, time.ctime(os.stat(infile).st_mtime)), file=outfile)
	print("+++ {} {}".format(infile, time.ctime()), file=outfile)

	if diff:
	print_patch(outfile, lines, diff)
	else:
	print("\n".join(lines), file=outfile)


	def print_patch(outfile, lines, diff):
	""" Given context lines (lines) and diff a mapping:
	first modified lno (on old file) -> ([old lines], [new lines]),
	print the patch that transforms old to new.
	"""
	# number of (possibly merged) context lines
	ctx = 3

	shift = 0 # Cumulative difference of lines between old and new.
	out = [] # Lines (context and modified) of the current patch block.
	add = 0 # Difference of lines removed and inserterd in out.
	past = 0 # 1 past the last item shown, i.e. the first non-shown.
	modln = sorted(diff) # Sorted list of modified lines.

	for l, show in [(l, nextl > l + 2 * ctx) for l, nextl
	in zip(modln, modln[1:] + [sys.maxsize])]:
	# remove context lines of the previous modification
	# if they clash with the current modification
	while past > l:
	past -= 1
	out.pop()

	# context lines before
	for ll in range(max(l - ctx, past), l):
	out.append(' ' + lines[ll])

	# one (atomic) set of removed lines, and added lines
	old, new = diff[l]
	add += len(new) - len(old)

	for o in old:
	if new and o == new[0]:
	new.pop(0)
	c = ' '
	else:
	c = '-'
	out.append(c + o)
	for n in new:
	out.append('+' + n)

	# context lines after, until past (excluded)
	past = l + add + ctx + 1
	for ll in range(past - ctx, past):
	out.append(' ' + lines[ll])

	# print iff next set of lines is not contiguous
	if show:
	print("\n".join(["@@ -{},{} +{},{} @@".format(
	past - len(out) + 1, len(out),
	past - len(out) + 1 + shift, len(out) + add)
	] + out), file=outfile)

	shift += add
	out, add = ([], 0)


	def usage():
	""" Explain how to use this script. It's all in the docstring.
	"""
	print("\nUsage: {} [[-o\|--output] file] [-p\|--patch] [-i] [-v \| -q] " \
	"file.s [file.s [...]]\n".format(sys.argv[0]))
	print(__doc__)


	def main():
	""" Parse arguments, set output streams, and start processing files.
	"""
	global cmt, msg, err
	inplace = False
	mkpatch = False
	outfile = "-"

	try:
	opts, args = getopt.getopt(sys.argv[1:], "ho:piqv", ["help", "output=", "patch"])
	except getopt.GetoptError as err:
	print(err, file=sys.stderr)
	usage()
	sys.exit(1)

	for o, a in opts:
	if o in ("-h", "--help"):
	usage()
	sys.exit()

	elif o == "-q":
	msg = False
	elif o == "-v":
	cmt = True

	elif o in ("-p", "--patch"):
	mkpatch = True
	elif o in ("-o", "--output"):
	outfile = a
	elif o in ("-i"):
	inplace = True


	if not args:
	usage()
	sys.exit()

	elif set(ext for basename, ext in map(os.path.splitext, args)) != {".s"}:
	print("Pass only assembly files (*.s) as arguments", file=sys.stderr)
	sys.exit(1)

	elif inplace and (outfile != "-" or mkpatch):
	print("Can't use -i with -o or -p!", file=sys.stderr)
	sys.exit(2)

	elif cmt and not msg:
	print("Can't be both quiet and verbose!", file=sys.stderr)
	sys.exit(3)

	with open(os.devnull, 'w') as silent:
	# set output streams
	cmt = sys.stderr if cmt else silent
	msg = sys.stderr if msg else silent
	err = sys.stderr if err else silent

	# different approach based on output strategy
	if not inplace:
	with open(outfile, "w") if outfile != "-" else sys.stdout as out:
	for f in args:
	fixup_file(f, out, mkpatch)
	else:
	for f in args:
	with open(f + ".tmp", "w") as out:
	fixup_file(f, out, mkpatch)
	os.rename(f + ".tmp", f)


	if __name__ == '__main__':
	main()

	# ------------------------------------------------------------------------------
	# x86 ABI register usage in function calls:
	#
	# %rax temporary register;
	# with variable arguments passes information about the number of vector
	# registers used; 1st return register
	# Preserved: No
	#
	# %rbx callee-saved register; optionally used as base pointer
	# Preserved: Yes
	#
	# %rcx used to pass 4th integer argument to functions
	# Preserved: No
	#
	# %rdx used to pass 3rd argument to functions; 2nd return register
	# Preserved: No
	#
	# %rsp stack pointer
	# Preserved: Yes
	#
	# %rbp callee-saved register; optionally used as frame pointer
	# Preserved: Yes
	#
	# %rsi used to pass 2nd argument to functions
	# Preserved: No
	#
	# %rdi used to pass 1st argument to functions
	# Preserved: No
	#
	# %r8 used to pass 5th argument to functions
	# Preserved: No
	#
	# %r9 used to pass 6th argument to functions
	# Preserved: No
	#
	# %r10 temporary register, used for passing a function's static chain pointer
	# Preserved: No
	#
	# %r11 temporary register
	# Preserved: No
	#
	# %r12-r15 callee-saved registers
	# Preserved: Yes
	#
	# %xmm0-%xmm1 used to pass and return floating point arguments
	# Preserved: No
	#
	# %xmm2-%xmm7 used to pass floating point arguments
	# Preserved: No
	#
	# %xmm8-%xmm15 temporary registers
	# Preserved: No
	#
	# %mmx0-%mmx7 temporary registers
	# Preserved: No
	#
	# %st0,%st1 temporary registers; used to return long double arguments
	# Preserved: No
	#
	# %st2-%st7 temporary registers
	# Preserved: No
	#
	# %fs Reserved for system (as thread specific data register)
	# Preserved: No
	#
	# mxcsr SSE2 control and status word partial
	# x87 SW x87 status word
	# Preserved: No
	#
	# x87 CW x87 control word
	# Preserved: Yes