-
-
Save wh5a/502907 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
gcc a.c -g -o a | |
objdump -S a > a.s | |
*/ | |
typedef int (*func_t)(); | |
static int f(int arg) { | |
int a = 0, b = 1, c = 2; | |
int nested() { | |
int d = 3; | |
return b + d + arg; | |
} | |
return nested(); | |
} | |
int main() | |
{ | |
return f(4); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
08048374 <nested.1219>: | |
static int f(int arg) { | |
int a = 0, b = 1, c = 2; | |
int nested() { | |
8048374: 55 push %ebp | |
8048375: 89 e5 mov %esp,%ebp | |
8048377: 83 ec 10 sub $0x10,%esp | |
804837a: 89 c8 mov %ecx,%eax | |
int d = 3; | |
804837c: c7 45 fc 03 00 00 00 movl $0x3,-0x4(%ebp) | |
return b + d + arg; | |
8048383: 8b 50 04 mov 0x4(%eax),%edx | |
8048386: 03 55 fc add -0x4(%ebp),%edx | |
8048389: 8b 00 mov (%eax),%eax | |
804838b: 8d 04 02 lea (%edx,%eax,1),%eax | |
} | |
804838e: c9 leave | |
804838f: c3 ret | |
08048390 <f>: | |
static int f(int arg) { | |
8048390: 55 push %ebp | |
8048391: 89 e5 mov %esp,%ebp | |
8048393: 83 ec 10 sub $0x10,%esp | |
8048396: 8b 45 08 mov 0x8(%ebp),%eax | |
8048399: 89 45 f0 mov %eax,-0x10(%ebp) | |
int a = 0, b = 1, c = 2; | |
804839c: c7 45 fc 00 00 00 00 movl $0x0,-0x4(%ebp) | |
80483a3: c7 45 f4 01 00 00 00 movl $0x1,-0xc(%ebp) | |
80483aa: c7 45 f8 02 00 00 00 movl $0x2,-0x8(%ebp) | |
int nested() { | |
int d = 3; | |
return b + d + arg; | |
} | |
return nested(); | |
80483b1: 8d 45 f0 lea -0x10(%ebp),%eax | |
80483b4: 89 c1 mov %eax,%ecx | |
80483b6: e8 b9 ff ff ff call 8048374 <nested.1219> | |
} | |
80483bb: c9 leave | |
80483bc: c3 ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
gcc b.c -g -o b | |
objdump -S b > b.s | |
*/ | |
typedef int (*func_t)(); | |
int g(func_t h) { | |
return h(); | |
} | |
static int f(int arg) { | |
int a = 0, b = 1, c = 2; | |
int nested() { | |
int d = 3; | |
return b + d + arg; | |
} | |
return g(nested); | |
} | |
int main() | |
{ | |
return f(4); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
08048374 <g>: | |
int g(func_t h) { | |
8048374: 55 push %ebp | |
8048375: 89 e5 mov %esp,%ebp | |
8048377: 83 ec 08 sub $0x8,%esp | |
return h(); | |
804837a: 8b 45 08 mov 0x8(%ebp),%eax | |
804837d: ff d0 call *%eax | |
} | |
804837f: c9 leave | |
8048380: c3 ret | |
08048381 <nested.1222>: | |
static int f(int arg) { | |
int a = 0, b = 1, c = 2; | |
int nested() { | |
8048381: 55 push %ebp | |
8048382: 89 e5 mov %esp,%ebp | |
8048384: 83 ec 10 sub $0x10,%esp | |
8048387: 89 c8 mov %ecx,%eax | |
int d = 3; | |
8048389: c7 45 fc 03 00 00 00 movl $0x3,-0x4(%ebp) | |
return b + d + arg; | |
8048390: 8b 50 04 mov 0x4(%eax),%edx | |
8048393: 03 55 fc add -0x4(%ebp),%edx | |
8048396: 8b 00 mov (%eax),%eax | |
8048398: 8d 04 02 lea (%edx,%eax,1),%eax | |
} | |
804839b: c9 leave | |
804839c: c3 ret | |
0804839d <f>: | |
static int f(int arg) { | |
804839d: 55 push %ebp | |
804839e: 89 e5 mov %esp,%ebp | |
80483a0: 53 push %ebx | |
80483a1: 83 ec 34 sub $0x34,%esp | |
;; copy arg | |
80483a4: 8b 45 08 mov 0x8(%ebp),%eax | |
80483a7: 89 45 dc mov %eax,-0x24(%ebp) | |
;; point eax to on-stack trampoline | |
80483aa: 8d 45 dc lea -0x24(%ebp),%eax | |
80483ad: 83 c0 08 add $0x8,%eax | |
;; point edx to the frame | |
80483b0: 8d 55 dc lea -0x24(%ebp),%edx | |
;; write the trampoline's first instruction's opcode (0xb9 mov ecx) | |
80483b3: c6 00 b9 movb $0xb9,(%eax) | |
;; write the instruction's operand (the frame %ecx is supposed to point to) | |
80483b6: 89 50 01 mov %edx,0x1(%eax) | |
;; calculate the offset for the relative jump | |
80483b9: b9 81 83 04 08 mov $0x8048381,%ecx | |
80483be: 8d 50 0a lea 0xa(%eax),%edx | |
80483c1: 89 cb mov %ecx,%ebx | |
80483c3: 29 d3 sub %edx,%ebx | |
80483c5: 89 da mov %ebx,%edx | |
;; write the second instruction's opcode (0xe9 jmp) | |
80483c7: c6 40 05 e9 movb $0xe9,0x5(%eax) | |
;; write the instruction's operand (which will jump to nested) | |
80483cb: 89 50 06 mov %edx,0x6(%eax) | |
int a = 0, b = 1, c = 2; | |
80483ce: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) | |
80483d5: c7 45 e0 01 00 00 00 movl $0x1,-0x20(%ebp) | |
80483dc: c7 45 f0 02 00 00 00 movl $0x2,-0x10(%ebp) | |
int nested() { | |
int d = 3; | |
return b + d + arg; | |
} | |
return g(nested); | |
80483e3: 8d 45 dc lea -0x24(%ebp),%eax | |
80483e6: 83 c0 08 add $0x8,%eax | |
;; pass the trampoline address to g | |
80483e9: 89 04 24 mov %eax,(%esp) | |
80483ec: e8 83 ff ff ff call 8048374 <g> | |
} | |
80483f1: 83 c4 34 add $0x34,%esp | |
80483f4: 5b pop %ebx | |
80483f5: 5d pop %ebp | |
80483f6: c3 ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DISCLAIMER: All my knowledge is obtained by reverse engineering the assembly code. I didn't read gcc's source code or the paper "Lexical Closures for C++". Please leave a comment if there's anything wrong. | |
GCC supports a limited form of <a href="http://gcc.gnu.org/onlinedocs/gcc/Nested-Functions.html">nested functions</a> through the use of trampolines. I got interested in its implementation after reading jserv's <a href="http://blog.linux.org.tw/~jserv/archives/2010/07/gcc_nested_func.html">blog post</a>. There are some errors in it. For example, you're not supposed to return a function pointer to a nested function (we'll see why below). | |
First, let's see a simple example. | |
<script src="http://gist.github.com/502907.js?file=a.c"></script> | |
and the generated assembly | |
<script src="http://gist.github.com/502907.js?file=a.s"></script> | |
What can we tell from the assembly of nested()? | |
1. The nested function is compiled as if it's defined at the top level (with some twist), rather than inlined in the enclosing function. | |
2. It has its own stack frame for storing its local data, just as any other function. | |
3. Data in the outer lexical scope is stored in a separate "stack frame", whose base is pointed to by %ecx, whereas regular stack frames are referenced by %ebp. | |
To understand how the stack was set up for nested(), we must look at the assembly of f(): | |
4. No trampoline was generated in this example, because nested() wasn't taken address of. | |
5. The stack layout (before f calls nested) is pretty much like this: | |
<pre> | |
| arg | | |
| saved %eip | | |
| saved %ebp | | |
ebp --> +--------------------+ | |
| a | | |
| c | f's stack frame | |
+--------------------+--------------------------\ | |
| b | shared stack frame | | |
+--------------------+ frame created for nested | |
| arg' | copied arguments | | |
ecx --> +--------------------+--------------------------/ | |
</pre> | |
f()'s local variables are allocated together, whereas f()'s variables shared by nested() are allocated below them, and accessible with %ecx. | |
So far so good. How do we maintain the invariant that nested() can access its outer lexical scope through %ecx, if we're to pass a pointer to nested() around and later make an indirect call through the pointer? That's where trampolines come in. | |
Below is a slightly modified program | |
<script src="http://gist.github.com/502907.js?file=b.c"></script> | |
and its assembly | |
<script src="http://gist.github.com/502907.js?file=b.s"></script> | |
nested() is unchanged, and g() is uninteresting, so we'll focus on f(). The stack frame before f calls g is now like this: | |
<pre> | |
| arg | | |
| saved %eip | | |
| saved %ebp | | |
ebp --> +--------------------+ | |
| a | | |
| c | f's stack frame | |
+--------------------+ | |
| jmp [nested] | | |
| mov ___ %ecx | trampoline for nested | |
nested --> +--------|-----------+ | |
______________| | |
| +--------------------+--------------------------\ | |
| | b | shared stack frame | | |
| +--------------------+ frame created for nested | |
| | arg' | copied arguments | | |
--> +--------------------+--------------------------/ | |
</pre> | |
The function pointer to nested() doesn't really point to the function's machine code. It instead points to a two-instruction trampoline created on f()'s stack. The trampoline does two things: first restores the %ecx invariant, then jumps to the real address of nested(). | |
Whew! What did we learn? Everything nested() needs is stored on stack along with f()'s data. No heap is involved and least amount of data is copied, so nested functions are implemented very efficiently in gcc. The downside is that we still need to maintain the LIFO nature of a stack. We cannot really go nuts and pretend we can do functional programming as the examples in jserv's blog. To quote gcc's manual: | |
<blockquote>If you try to call the nested function through its address after the containing function has exited, all hell will break loose. If you try to call it after a containing scope level has exited, and if it refers to some of the variables that are no longer in scope, you may be lucky, but it's not wise to take the risk. If, however, the nested function does not refer to anything that has gone out of scope, you should be safe.</blockquote> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment