-
-
Save kekyo/1dfb79cd5298f4cb12d4bec999568b0a to your computer and use it in GitHub Desktop.
| .code | |
| ; struct | |
| ; { | |
| ; uint64_t Frame; | |
| ; uint64_t Rbx; | |
| ; uint64_t Rsp; | |
| ; uint64_t Rbp; | |
| ; uint64_t Rsi; | |
| ; uint64_t Rdi; | |
| ; uint64_t R12; | |
| ; uint64_t R13; | |
| ; uint64_t R14; | |
| ; uint64_t R15; | |
| ; uint64_t Rip; | |
| ; uint32_t MxCsr; | |
| ; uint16_t FpCsr; | |
| ; uint16_t Spare; | |
| ; uint128_t Xmm6; | |
| ; uint128_t Xmm7; | |
| ; uint128_t Xmm8; | |
| ; uint128_t Xmm9; | |
| ; uint128_t Xmm10; | |
| ; uint128_t Xmm11; | |
| ; uint128_t Xmm12; | |
| ; uint128_t Xmm13; | |
| ; uint128_t Xmm14; | |
| ; uint128_t Xmm15; | |
| ; } *jmp_buf; | |
| ; int setjmp(jmp_buf env); // setjmp(rcx) | |
| mysetjmp proc | |
| mov [rcx], rdx ; Frame | |
| mov [rcx+8], rbx | |
| lea r8, [rsp+8] | |
| mov [rcx+10h], r8 ; Rsp | |
| mov [rcx+18h], rbp | |
| mov [rcx+20h], rsi | |
| mov [rcx+28h], rdi | |
| mov [rcx+30h], r12 | |
| mov [rcx+38h], r13 | |
| mov [rcx+40h], r14 | |
| mov [rcx+48h], r15 | |
| mov r8, [rsp] | |
| mov [rcx+50h], r8 ; Rip (return address) | |
| stmxcsr dword ptr [rcx+58h] ; MxCsr | |
| fnstcw word ptr [rcx+5ch] ; FpCsr | |
| movdqa [rcx+60h], xmm6 | |
| movdqa [rcx+70h], xmm7 | |
| movdqa [rcx+80h], xmm8 | |
| movdqa [rcx+90h], xmm9 | |
| movdqa [rcx+0a0h], xmm10 | |
| movdqa [rcx+0b0h], xmm11 | |
| movdqa [rcx+0c0h], xmm12 | |
| movdqa [rcx+0d0h], xmm13 | |
| movdqa [rcx+0e0h], xmm14 | |
| movdqa [rcx+0f0h], xmm15 | |
| xor eax, eax ; first phase: 0 | |
| ret | |
| mysetjmp endp | |
| ; void longjmp(jmp_buf env, int retval); // longjmp(rcx, edx) | |
| mylongjmp proc | |
| mov eax, edx | |
| mov rdx, [rcx] ; Frame | |
| mov rbx, [rcx+8] | |
| mov rsp, [rcx+10h] ; Rsp | |
| mov rbp, [rcx+18h] | |
| mov rsi, [rcx+20h] | |
| mov rdi, [rcx+28h] | |
| mov r12, [rcx+30h] | |
| mov r13, [rcx+38h] | |
| mov r14, [rcx+40h] | |
| mov r15, [rcx+48h] | |
| mov r8, [rcx+50h] ; Rip (return address) | |
| movdqa xmm6, [rcx+60h] | |
| movdqa xmm7, [rcx+70h] | |
| movdqa xmm8, [rcx+80h] | |
| movdqa xmm9, [rcx+90h] | |
| movdqa xmm10, [rcx+0a0h] | |
| movdqa xmm11, [rcx+0b0h] | |
| movdqa xmm12, [rcx+0c0h] | |
| movdqa xmm13, [rcx+0d0h] | |
| movdqa xmm14, [rcx+0e0h] | |
| movdqa xmm15, [rcx+0f0h] | |
| jmp r8 | |
| mylongjmp endp | |
| end |
@evjeesm Thank you! Wow, I completely forgot the background of this implementation (I even forgot gist existed! ;)
Maybe it was an experiment to implement an exception handler in one of my projects IL2C, or maybe it was a note I made while thinking about the validity of sjlj within the Windows kernel mode driver.
In both cases, I don't remember referring to the internal implementation of sjlj in the standard C runtime libraries, and I think I designed it on my own, relying on my knowledge of coroutine implementations and pre-emptive multi-threaded implementations from the Z80 and 8086 days, long ago. So there must be something missing compared to the standard sjlj implementation, which may have led to what you have shown.
I am glad to hear from you!
I just got into assembly lately and was curious about the working behind 'setjmp'.
Your gist was one of the first links that popup in the search ))
So I just explore things by re-implementing and disassembling stuff.
Oh, I think I get it, this IL2C is about enabling c# for embedded devices, impressive!
I did explore primitive coro and try/catch constructs in C using setjmp and ucontext.
Very powerful mechanism!
;TLDR; some of my observations:
Well, as far as I understand, standard setjmp on x86_64-linux-gnu saves only 7 general purpose registers + rip (return address).
(rbx, r12-r15, rsp, rbp) are the only registers that should be preserved between the calls.
Other thing that is worth to mention is that, their implementation does primitive encryption of the 'rsp' and 'rbp' before storing them into the 'jmp_buf' ( rotate right 17 followed by xor with random 8byte key stored in tls ) and decryption on longjmp.
Well, as far as I understand, standard setjmp on x86_64-linux-gnu saves only 7 general purpose registers + rip (return address).
That's interesting. I think the implementation of sjlj depends on the ABI specification of the processor and OS. In other words, in the case of the x86-64 linux kernel and libc, it seems that the ABI is based on the assumption that only those registers are preserved.
If I remember correctly, the xmm register group should also be saved in the case of Windows.
I think that narrowing down the sjlj (the register group that should be saved in the C language implementation) to a minimum makes sense in terms of saving storage space and reducing load/store time.
their implementation does primitive encryption
I see, so this is how they defend against attacks, that's surprising too! As longjmp has the property of being able to jump anywhere, it's practically the same as an attack by stack corruption.
Very interesting implementation, Cozy!
I've noticed that here you save extra registers compared to standard C implementation.
Original __jmp_buf besides signal mask stuff takes 64 bytes, I assume they save only 8 64bit registers, does that make sense?