Below is the handwritten LLVM IR for SP_2SWAP and SP_ROT
SP_2SWAP:
kernel.SP_2SWAP:
; swap top two pairs of elements on the stack
; pop %eax
; pop %ebx
; pop %ecx
; pop %edx
; push %ebx
; push %eax
; push %edx
; push %ecx
%SP.ptr.SP_2SWAP = getelementptr %cell.ptr* %SP.ptr.ptr, i32 0
%SP.SP_2SWAP = load %cell.ptr* %SP.ptr.SP_2SWAP
%A.addr.ptr.SP_2SWAP = getelementptr %cell.ptr %SP.SP_2SWAP, i32 0
%A.addr.int.SP_2SWAP = ptrtoint %cell.ptr %A.addr.ptr.SP_2SWAP to %addr
%A.cell.SP_2SWAP = load %cell* %A.addr.ptr.SP_2SWAP
%B.addr.int.SP_2SWAP = add %addr %A.addr.int.SP_2SWAP, 8
%B.addr.ptr.SP_2SWAP = inttoptr %addr %B.addr.int.SP_2SWAP to %cell*
%B.cell.SP_2SWAP = load %cell* %B.addr.ptr.SP_2SWAP
%C.addr.int.SP_2SWAP = add %addr %B.addr.int.SP_2SWAP, 8
%C.addr.ptr.SP_2SWAP = inttoptr %addr %C.addr.int.SP_2SWAP to %cell*
%C.cell.SP_2SWAP = load %cell* %C.addr.ptr.SP_2SWAP
%D.addr.int.SP_2SWAP = add %addr %C.addr.int.SP_2SWAP, 8
%D.addr.ptr.SP_2SWAP = inttoptr %addr %D.addr.int.SP_2SWAP to %cell*
%D.cell.SP_2SWAP = load %cell* %D.addr.ptr.SP_2SWAP
store %cell %B.cell.SP_2SWAP, %cell* %D.addr.ptr.SP_2SWAP ; %(edx)
store %cell %A.cell.SP_2SWAP, %cell* %C.addr.ptr.SP_2SWAP ; %(ecx)
store %cell %D.cell.SP_2SWAP, %cell* %B.addr.ptr.SP_2SWAP ; %(ebx)
store %cell %C.cell.SP_2SWAP, %cell* %A.addr.ptr.SP_2SWAP ; %(eax)
br label %kernel.NEXT
SP_ROT:
kernel.SP_ROT:
; rotate the first three elements at the top of the stack
; pop %eax
; pop %ebx
; pop %ecx
; push %ebx
; push %eax
; push %ecx
; load the memory address that %SP.ptr.ptr resolves to
%SP.ptr.SP_ROT = getelementptr %cell.ptr* %SP.ptr.ptr, i32 0
%SP.SP_ROT = load %cell.ptr* %SP.ptr.SP_ROT
%SP.addr.ptr.SP_ROT = getelementptr %cell.ptr %SP.SP_ROT, i32 0
%SP.addr.int.SP_ROT = ptrtoint %cell.ptr %SP.addr.ptr.SP_ROT
to %addr
; load %eax
%A.int.SP_ROT = load %cell* %SP.addr.ptr.SP_ROT
; load %ebx
%SP.addr.incr.int.SP_ROT = add %addr %SP.addr.int.SP_ROT, 8
%SP.addr.incr.ptr.SP_ROT = inttoptr %addr %SP.addr.incr.int.SP_ROT
to %cell.ptr
%B.int.SP_ROT = load %cell* %SP.addr.incr.ptr.SP_ROT
; load %ecx
%SP.addr.incr.incr.int.SP_ROT = add %addr %SP.addr.incr.int.SP_ROT, 8
%SP.addr.incr.incr.ptr.SP_ROT = inttoptr %addr %SP.addr.incr.incr.int.SP_ROT
to %cell.ptr
%C.int.SP_ROT = load %cell* %SP.addr.incr.incr.ptr.SP_ROT
; directly store %eax, %ebx, and %ecx in the appropriate pointers
store %cell %B.int.SP_ROT, %cell* %SP.addr.incr.incr.ptr.SP_ROT ; %(ecx)
store %cell %A.int.SP_ROT, %cell* %SP.addr.incr.ptr.SP_ROT ; %(ebx)
store %cell %C.int.SP_ROT, %cell* %SP.addr.ptr.SP_ROT ; %(eax)
br label %kernel.NEXT
x86 assembler output of the 2SWAP and ROT functions -- below, we can see that SP_2SWAP jumps to a subblock of SP_ROT:
LBB13_11: ## %kernel.SP_2SWAP
## in Loop: Header=BB13_1 Depth=1
movq (%rbx), %rax
movq 16(%rax), %rcx
movq 24(%rax), %rdx
movq (%rax), %rsi
movq 8(%rax), %rdi
movq %rdi, 24(%rax)
jmp LBB13_18
LBB13_17: ## %kernel.SP_ROT
## in Loop: Header=BB13_1 Depth=1
movq (%rbx), %rax
movq 16(%rax), %rcx
movq (%rax), %rdx
movq 8(%rax), %rsi
LBB13_18: ## %kernel.SP_ROT
## in Loop: Header=BB13_1 Depth=1
movq %rsi, 16(%rax)
movq %rdx, 8(%rax)
movq %rcx, (%rax)
jmp LBB13_1