-
-
Save zliuva/1084476 to your computer and use it in GitHub Desktop.
; A minimal Mach-o x64 executable for OS X (also see below Mountain Lion version) | |
; | |
; $ nasm -f bin -o tiny_hello tiny_hello.s | |
; $ chmod +x tiny_hello | |
; $ ./tiny_hello | |
; Hello World! | |
; $ | |
; c.f. | |
; http://osxbook.com/blog/2009/03/15/crafting-a-tiny-mach-o-executable/ ( the original tiny mach-o executable ) | |
; http://feiri.de/macho/ ( improved version, using a very clever trick to put code inside the load command itself) | |
; Constants (For readability) | |
%define MH_MAGIC_64 0xfeedfacf | |
%define CPU_ARCH_ABI64 0x01000000 | |
%define CPU_TYPE_I386 0x00000007 | |
%define CPU_TYPE_X86_64 CPU_ARCH_ABI64 | CPU_TYPE_I386 | |
%define CPU_SUBTYPE_LIB64 0x80000000 | |
%define CPU_SUBTYPE_I386_ALL 0x00000003 | |
%define MH_EXECUTE 0x2 | |
%define MH_NOUNDEFS 0x1 | |
%define LC_SEGMENT_64 0x19 | |
%define LC_UNIXTHREAD 0x5 | |
%define VM_PROT_READ 0x1 | |
%define VM_PROT_WRITE 0x2 | |
%define VM_PROT_EXECUTE 0x4 | |
%define x86_THREAD_STATE64 0x4 | |
%define x86_EXCEPTION_STATE64_COUNT 42 | |
%define SYSCALL_CLASS_SHIFT 24 | |
%define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) | |
%define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) | |
%define SYSCALL_CLASS_UNIX 2 | |
%define SYSCALL_CONSTRUCT_UNIX(syscall_number) \ | |
((SYSCALL_CLASS_UNIX << SYSCALL_CLASS_SHIFT) | \ | |
(SYSCALL_NUMBER_MASK & (syscall_number))) | |
%define SYS_exit 1 | |
%define SYS_write 4 | |
; NASM directive, not compiled | |
; Use RIP-Relative addressing for x64 | |
BITS 64 | |
DEFAULT REL | |
; Mach-O header | |
DD MH_MAGIC_64 ; magic | |
DD CPU_TYPE_X86_64 ; cputype | |
DD CPU_SUBTYPE_LIB64 | CPU_SUBTYPE_I386_ALL ; cpusubtype | |
DD MH_EXECUTE ; filetype | |
DD 2 ; ncmds | |
DD end - load_commands ; sizeofcmds | |
DD MH_NOUNDEFS ; flags | |
DD 0x0 ; reserved | |
; Load commands | |
load_commands: | |
; Segment and Sections | |
DD LC_SEGMENT_64 ; cmd | |
DD 72 ; cmdsize | |
; store our message in segname | |
hello_str: | |
DB 'Hello, World!', 0x0A ; segname | |
len EQU $-hello_str | |
DB 0x00, 0x00 ; last 2-byte of segname | |
DQ 0x0 ; vmaddr | |
DQ 0x1000 ; vmsize | |
DQ 0 ; fileoff | |
DQ filesize ; filesize | |
DD VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE ; maxprot | |
DD VM_PROT_READ | VM_PROT_EXECUTE ; initprot | |
DD 0x0 ; nsects | |
DD 0x0 ; flags | |
; UNIX Thread Status | |
DD LC_UNIXTHREAD ; cmd | |
DD 184 ; cmdsize | |
DD x86_THREAD_STATE64 ; flavor | |
DD x86_EXCEPTION_STATE64_COUNT ; count | |
; The following should be rax-rdx ( 8 * 4 = 32 bytes) | |
; Somehow the OS X loader will clear rax-rdx regardless of the initial state | |
; Thus we can use these 32 bytes to store our code | |
; The original code is 35 bytes, thus we split the syscalls, store the 2nd part below and uses a jmp | |
; | |
; note: depending on NASM versions, the generated code may vary in size and the padding length has to be adjusted | |
; the NASM shipped with Xcode 4.0 on Lion generates 64 bit movs ("movq") and a short jump (24 bytes) | |
; nasm is broken in Mountain Lion and Xcode 4.3 (only i386 binary shipped, however ML is 64-bit only) | |
; NASM 2.10.04 built from nasm.us by default enables optimization to generate 32 bit movs ("movl") hence more bytes needs to be padded | |
; however when -O0 is specified, NASM 2.10 will generate movqs but with long jump (27 bytes), hence only 5, instead of 8 bytes needs to be padded | |
; | |
start: ; 24 bytes | |
; rdi and rsi have already been set in the initial state | |
mov rdx, len | |
mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_write) | |
syscall | |
jmp next | |
DB 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ; 8 bytes padding | |
DQ 0x01, hello_str, 0x00, 0x00 ; rdi = STDOUT, rsi = address of hello_str, rbp, rsp | |
; The exit syscall is 15 bytes, we use the inital sate of r8 and r9 to store the code | |
next: ; 15 bytes | |
xor rdi, rdi | |
mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_exit) | |
syscall | |
DB 0x00 ; 1 byte padding | |
DQ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ; r10, r11, r12, r13, r14, r15 | |
DQ start, 0x00, 0x00, 0x00, 0x00 ; rip, rflags, cs, fs, gs | |
end: | |
filesize EQU $-$$ |
; A minimal Mach-o x64 executable for OS X (using LC_MAIN in Mountain Lion) | |
; | |
; $ nasm -O0 -f bin -o tiny_hello tiny_hello.s | |
; $ chmod +x tiny_hello | |
; $ ./tiny_hello | |
; Hello World! | |
; $ | |
; c.f. | |
; http://osxbook.com/blog/2009/03/15/crafting-a-tiny-mach-o-executable/ ( the original tiny mach-o executable ) | |
; http://feiri.de/macho/ ( improved version, using a very clever trick to put code inside the load command itself) | |
; Constants (For readability) | |
%define MH_MAGIC_64 0xfeedfacf | |
%define CPU_ARCH_ABI64 0x01000000 | |
%define CPU_TYPE_I386 0x00000007 | |
%define CPU_TYPE_X86_64 CPU_ARCH_ABI64 | CPU_TYPE_I386 | |
%define CPU_SUBTYPE_LIB64 0x80000000 | |
%define CPU_SUBTYPE_I386_ALL 0x00000003 | |
%define MH_EXECUTE 0x2 | |
%define MH_NOUNDEFS 0x1 | |
%define LC_REQ_DYLD 0x80000000 | |
%define LC_LOAD_DYLIB 0xc | |
%define LC_LOAD_DYLINKER 0xe | |
%define LC_SEGMENT_64 0x19 | |
%define LC_MAIN 0x28 | LC_REQ_DYLD | |
%define VM_PROT_READ 0x1 | |
%define VM_PROT_WRITE 0x2 | |
%define VM_PROT_EXECUTE 0x4 | |
%define x86_THREAD_STATE64 0x4 | |
%define x86_EXCEPTION_STATE64_COUNT 42 | |
%define SYSCALL_CLASS_SHIFT 24 | |
%define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) | |
%define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) | |
%define SYSCALL_CLASS_UNIX 2 | |
%define SYSCALL_CONSTRUCT_UNIX(syscall_number) \ | |
((SYSCALL_CLASS_UNIX << SYSCALL_CLASS_SHIFT) | \ | |
(SYSCALL_NUMBER_MASK & (syscall_number))) | |
%define SYS_exit 1 | |
%define SYS_write 4 | |
; NASM directive, not compiled | |
; Use RIP-Relative addressing for x64 | |
BITS 64 | |
DEFAULT REL | |
; Mach-O header | |
DD MH_MAGIC_64 ; magic | |
DD CPU_TYPE_X86_64 ; cputype | |
DD CPU_SUBTYPE_LIB64 | CPU_SUBTYPE_I386_ALL ; cpusubtype | |
DD MH_EXECUTE ; filetype | |
DD 4 ; ncmds | |
DD start - load_commands ; sizeofcmds | |
DD MH_NOUNDEFS ; flags | |
DD 0x0 ; reserved | |
; Load commands | |
load_commands: | |
; Segment and Sections | |
DD LC_SEGMENT_64 ; cmd | |
DD 72 ; cmdsize | |
; store our message in segname | |
hello_str: | |
DB 'Hello, World!', 0x0A ; segname | |
len EQU $-hello_str | |
DB 0x00, 0x00 ; last 2-byte of segname | |
DQ 0x0 ; vmaddr | |
DQ 0x1000 ; vmsize | |
DQ 0 ; fileoff | |
DQ filesize ; filesize | |
DD VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE ; maxprot | |
DD VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE ; initprot | |
DD 0x0 ; nsects | |
DD 0x0 ; flags | |
; In Mountain Lion, LC_MAIN is used to replace LC_UNIXTHREAD | |
; LC_MAIN is of fixed size 24 bytes, hence we cannot embed code into the command itself | |
; instead we simply pad the code at the end | |
; must use dyld to use LC_MAIN | |
DD LC_LOAD_DYLINKER ; cmd | |
DD 28 ; cmdsize (must be multiples of 4) | |
DD 12 ; lc_str.offset | |
DB '/usr/lib/dyld', 0x00, 0x00, 0x00 ; pad to multiples of 4 | |
; must link to libSystem | |
DD LC_LOAD_DYLIB ; cmd | |
DD 52 ; cmdsize | |
DD 24 ; lc_str.offset | |
DD 2 ; timestamp | |
DD 0xA90300 ; current_version | |
DD 0x010000 ; compatibility_version | |
DB '/usr/lib/libSystem.B.dylib', 0x00, 0x00 ; pad to multiples of 4 | |
; Entry Point Command | |
DD LC_MAIN ; cmd | |
DD 24 ; cmdsize | |
DQ start ; entry point | |
DQ 0 ; stack size | |
start: ; depending on the code generation size may vary a little bit | |
mov rdi, 0x01 | |
mov rsi, hello_str | |
mov rdx, len | |
mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_write) | |
syscall | |
; the call to exit is optional now since the glue code in dyld will take care of it for us | |
;xor rdi, rdi | |
;mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_exit) | |
;syscall | |
ret ; returns from "main", the glue code will take care of the rest | |
end: | |
filesize EQU $-$$ | |
The Mach-O validation is more strict since 10.10.5. The executable needs to be at least 4096 bytes not to get killed by kernel. Zeroes padding will do the trick (bye bye my 232 byte 32bit executables :-( ) Interestingly the change appeared due to Apple fixing a Mach-O jailbreak vulnerability starting from iOS 8.4.1 (The kernel code seems to be shared, at least to some extent).
Trying to get this working on 10.11 - just adding some padding (i.e. "times 4096-(
tiny works on my 10.14.2 MacBook.
I don't understand why you use x86_EXCEPTION_STATE64_COUNT. The Apple header files define that as 4 but then you redefine it as 42. Since this is a x86_THREAD_STATE64 flavor, shouldn't it just be x86_THREAD_STATE64_COUNT which is 21?
However, when I set it to 21, I get Killed: 9 I'm noticing but not understanding that 42 is 2 * 21.
The cmdsize looks correct.
Any ideas?
@Olsonist what code did you add to get it working on 10.14?
I get Killed: 9
man ld
gives us some info as to why
-pagezero_size size
By default the linker creates an unreadable segment starting at address zero named
__PAGEZERO. Its existence will cause a bus error if a NULL pointer is dereferenced.
The argument size is a hexadecimal number with an optional leading 0x. If size is zero,
the linker will not generate a page zero segment. By default on 32-bit architectures
the page zero size is 4KB. On 64-bit architectures, the default size is 4GB. The ppc64
architecture has some special cases. Since Mac OS X 10.4 did not support 4GB page zero
programs, the default page zero size for ppc64 will be 4KB unless -macosx_version_min is
10.5 or later. Also, the -mdynamic-no-pic codegen model for ppc64 will only work if the
code is placed in the lower 2GB of the address space, so the if the linker detects any
such code, the page zero size is set to 4KB and then a new unreadable trailing segment
is created after the code, filling up the lower 4GB.
@asauber I wanted to know why it was 42 rather than 21. The answer comes from loader.h:
./Developer/CommandLineTools/SDKs/MacOSX11.3.sdk/usr/include/mach-o/loader.h
/* uint32_t count count of uint32_t's in thread state */
I've test the code in my environment but I got Killed: 9. My environment is Mac OS X 10.10.5