Skip to content

Instantly share code, notes, and snippets.

@zliuva
Last active August 20, 2024 10:32
Show Gist options
  • Save zliuva/1084476 to your computer and use it in GitHub Desktop.
Save zliuva/1084476 to your computer and use it in GitHub Desktop.
A minimal Mach-o x64 executable for OS X
; A minimal Mach-o x64 executable for OS X (also see below Mountain Lion version)
;
; $ nasm -f bin -o tiny_hello tiny_hello.s
; $ chmod +x tiny_hello
; $ ./tiny_hello
; Hello World!
; $
; c.f.
; http://osxbook.com/blog/2009/03/15/crafting-a-tiny-mach-o-executable/ ( the original tiny mach-o executable )
; http://feiri.de/macho/ ( improved version, using a very clever trick to put code inside the load command itself)
; Constants (For readability)
%define MH_MAGIC_64 0xfeedfacf
%define CPU_ARCH_ABI64 0x01000000
%define CPU_TYPE_I386 0x00000007
%define CPU_TYPE_X86_64 CPU_ARCH_ABI64 | CPU_TYPE_I386
%define CPU_SUBTYPE_LIB64 0x80000000
%define CPU_SUBTYPE_I386_ALL 0x00000003
%define MH_EXECUTE 0x2
%define MH_NOUNDEFS 0x1
%define LC_SEGMENT_64 0x19
%define LC_UNIXTHREAD 0x5
%define VM_PROT_READ 0x1
%define VM_PROT_WRITE 0x2
%define VM_PROT_EXECUTE 0x4
%define x86_THREAD_STATE64 0x4
%define x86_EXCEPTION_STATE64_COUNT 42
%define SYSCALL_CLASS_SHIFT 24
%define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
%define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
%define SYSCALL_CLASS_UNIX 2
%define SYSCALL_CONSTRUCT_UNIX(syscall_number) \
((SYSCALL_CLASS_UNIX << SYSCALL_CLASS_SHIFT) | \
(SYSCALL_NUMBER_MASK & (syscall_number)))
%define SYS_exit 1
%define SYS_write 4
; NASM directive, not compiled
; Use RIP-Relative addressing for x64
BITS 64
DEFAULT REL
; Mach-O header
DD MH_MAGIC_64 ; magic
DD CPU_TYPE_X86_64 ; cputype
DD CPU_SUBTYPE_LIB64 | CPU_SUBTYPE_I386_ALL ; cpusubtype
DD MH_EXECUTE ; filetype
DD 2 ; ncmds
DD end - load_commands ; sizeofcmds
DD MH_NOUNDEFS ; flags
DD 0x0 ; reserved
; Load commands
load_commands:
; Segment and Sections
DD LC_SEGMENT_64 ; cmd
DD 72 ; cmdsize
; store our message in segname
hello_str:
DB 'Hello, World!', 0x0A ; segname
len EQU $-hello_str
DB 0x00, 0x00 ; last 2-byte of segname
DQ 0x0 ; vmaddr
DQ 0x1000 ; vmsize
DQ 0 ; fileoff
DQ filesize ; filesize
DD VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE ; maxprot
DD VM_PROT_READ | VM_PROT_EXECUTE ; initprot
DD 0x0 ; nsects
DD 0x0 ; flags
; UNIX Thread Status
DD LC_UNIXTHREAD ; cmd
DD 184 ; cmdsize
DD x86_THREAD_STATE64 ; flavor
DD x86_EXCEPTION_STATE64_COUNT ; count
; The following should be rax-rdx ( 8 * 4 = 32 bytes)
; Somehow the OS X loader will clear rax-rdx regardless of the initial state
; Thus we can use these 32 bytes to store our code
; The original code is 35 bytes, thus we split the syscalls, store the 2nd part below and uses a jmp
;
; note: depending on NASM versions, the generated code may vary in size and the padding length has to be adjusted
; the NASM shipped with Xcode 4.0 on Lion generates 64 bit movs ("movq") and a short jump (24 bytes)
; nasm is broken in Mountain Lion and Xcode 4.3 (only i386 binary shipped, however ML is 64-bit only)
; NASM 2.10.04 built from nasm.us by default enables optimization to generate 32 bit movs ("movl") hence more bytes needs to be padded
; however when -O0 is specified, NASM 2.10 will generate movqs but with long jump (27 bytes), hence only 5, instead of 8 bytes needs to be padded
;
start: ; 24 bytes
; rdi and rsi have already been set in the initial state
mov rdx, len
mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_write)
syscall
jmp next
DB 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ; 8 bytes padding
DQ 0x01, hello_str, 0x00, 0x00 ; rdi = STDOUT, rsi = address of hello_str, rbp, rsp
; The exit syscall is 15 bytes, we use the inital sate of r8 and r9 to store the code
next: ; 15 bytes
xor rdi, rdi
mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_exit)
syscall
DB 0x00 ; 1 byte padding
DQ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ; r10, r11, r12, r13, r14, r15
DQ start, 0x00, 0x00, 0x00, 0x00 ; rip, rflags, cs, fs, gs
end:
filesize EQU $-$$
; A minimal Mach-o x64 executable for OS X (using LC_MAIN in Mountain Lion)
;
; $ nasm -O0 -f bin -o tiny_hello tiny_hello.s
; $ chmod +x tiny_hello
; $ ./tiny_hello
; Hello World!
; $
; c.f.
; http://osxbook.com/blog/2009/03/15/crafting-a-tiny-mach-o-executable/ ( the original tiny mach-o executable )
; http://feiri.de/macho/ ( improved version, using a very clever trick to put code inside the load command itself)
; Constants (For readability)
%define MH_MAGIC_64 0xfeedfacf
%define CPU_ARCH_ABI64 0x01000000
%define CPU_TYPE_I386 0x00000007
%define CPU_TYPE_X86_64 CPU_ARCH_ABI64 | CPU_TYPE_I386
%define CPU_SUBTYPE_LIB64 0x80000000
%define CPU_SUBTYPE_I386_ALL 0x00000003
%define MH_EXECUTE 0x2
%define MH_NOUNDEFS 0x1
%define LC_REQ_DYLD 0x80000000
%define LC_LOAD_DYLIB 0xc
%define LC_LOAD_DYLINKER 0xe
%define LC_SEGMENT_64 0x19
%define LC_MAIN 0x28 | LC_REQ_DYLD
%define VM_PROT_READ 0x1
%define VM_PROT_WRITE 0x2
%define VM_PROT_EXECUTE 0x4
%define x86_THREAD_STATE64 0x4
%define x86_EXCEPTION_STATE64_COUNT 42
%define SYSCALL_CLASS_SHIFT 24
%define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
%define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
%define SYSCALL_CLASS_UNIX 2
%define SYSCALL_CONSTRUCT_UNIX(syscall_number) \
((SYSCALL_CLASS_UNIX << SYSCALL_CLASS_SHIFT) | \
(SYSCALL_NUMBER_MASK & (syscall_number)))
%define SYS_exit 1
%define SYS_write 4
; NASM directive, not compiled
; Use RIP-Relative addressing for x64
BITS 64
DEFAULT REL
; Mach-O header
DD MH_MAGIC_64 ; magic
DD CPU_TYPE_X86_64 ; cputype
DD CPU_SUBTYPE_LIB64 | CPU_SUBTYPE_I386_ALL ; cpusubtype
DD MH_EXECUTE ; filetype
DD 4 ; ncmds
DD start - load_commands ; sizeofcmds
DD MH_NOUNDEFS ; flags
DD 0x0 ; reserved
; Load commands
load_commands:
; Segment and Sections
DD LC_SEGMENT_64 ; cmd
DD 72 ; cmdsize
; store our message in segname
hello_str:
DB 'Hello, World!', 0x0A ; segname
len EQU $-hello_str
DB 0x00, 0x00 ; last 2-byte of segname
DQ 0x0 ; vmaddr
DQ 0x1000 ; vmsize
DQ 0 ; fileoff
DQ filesize ; filesize
DD VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE ; maxprot
DD VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE ; initprot
DD 0x0 ; nsects
DD 0x0 ; flags
; In Mountain Lion, LC_MAIN is used to replace LC_UNIXTHREAD
; LC_MAIN is of fixed size 24 bytes, hence we cannot embed code into the command itself
; instead we simply pad the code at the end
; must use dyld to use LC_MAIN
DD LC_LOAD_DYLINKER ; cmd
DD 28 ; cmdsize (must be multiples of 4)
DD 12 ; lc_str.offset
DB '/usr/lib/dyld', 0x00, 0x00, 0x00 ; pad to multiples of 4
; must link to libSystem
DD LC_LOAD_DYLIB ; cmd
DD 52 ; cmdsize
DD 24 ; lc_str.offset
DD 2 ; timestamp
DD 0xA90300 ; current_version
DD 0x010000 ; compatibility_version
DB '/usr/lib/libSystem.B.dylib', 0x00, 0x00 ; pad to multiples of 4
; Entry Point Command
DD LC_MAIN ; cmd
DD 24 ; cmdsize
DQ start ; entry point
DQ 0 ; stack size
start: ; depending on the code generation size may vary a little bit
mov rdi, 0x01
mov rsi, hello_str
mov rdx, len
mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_write)
syscall
; the call to exit is optional now since the glue code in dyld will take care of it for us
;xor rdi, rdi
;mov rax, SYSCALL_CONSTRUCT_UNIX(SYS_exit)
;syscall
ret ; returns from "main", the glue code will take care of the rest
end:
filesize EQU $-$$
@CBSears
Copy link

CBSears commented Mar 31, 2022

@asauber I wanted to know why it was 42 rather than 21. The answer comes from loader.h:

./Developer/CommandLineTools/SDKs/MacOSX11.3.sdk/usr/include/mach-o/loader.h

/* uint32_t count count of uint32_t's in thread state */

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment