Last active
December 19, 2015 04:29
-
-
Save TikiTDO/5897771 to your computer and use it in GitHub Desktop.
short example of abstract compilation performed on JS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Lex Config for C - http://www.quut.com/c/ANSI-C-grammar-l.html | |
Bison Config for C - http://www.quut.com/c/ANSI-C-grammar-y.html | |
ECMA Spec - http://www.ecma-international.org/ecma-262/5.1/#contents | |
# Organization | |
# Statements (Executes an operation) | |
# Expressions (Evaluates to a value) | |
js = <<js | |
function hello() { | |
var hello = "Hello"; | |
world = "World"; | |
console.log(hello + " " + world); | |
} | |
hello(); | |
js | |
# Lexical Tokens | |
function | |
identifier (hello) | |
open_param | |
close_param | |
open_curly | |
var_decl | |
identifier (hello) | |
equals | |
string ("hello") | |
semicolon | |
identifier (word) | |
equals | |
string ("world") | |
semicolon | |
identifier (console) | |
dot_operator | |
identifier (log) | |
open_param | |
identifier (hello) | |
plus_sign | |
string (" ") | |
plus_sign | |
identifier (world) | |
close_param | |
semicolon | |
close_curly | |
identifier (hello) | |
open_param | |
close_param | |
semicolon | |
# Parsing | |
# Program :: Source Elements | |
# Source Elments :: Source Element | |
# :: Source Elements -> Source Element | |
require 'rkelly' | |
RKelly::Parser.new.parse(js) | |
# => | |
#<SourceElementsNode | |
@value=[ | |
#<FunctionDeclNode | |
@line=1, @value="hello", @arguments=[], @function_body=#<FunctionBodyNode | |
@value=#<SourceElementsNode | |
@value=[ | |
#<VarStatementNode | |
@line=2, @value=[ | |
#<VarDeclNode | |
@constant=false, @line=2, @name="hello", @value=#<AssignExprNode | |
@line=2, @value=#<StringNode | |
@line=2, @value="\"Hello\"">>> | |
]>, | |
#<ExpressionStatementNode | |
@line=2, @value=#<OpEqualNode | |
@left=#<ResolveNode | |
@line=2, @value="world">, | |
@value=#<StringNode | |
@line=2, @value="\"World\"">>>, | |
#<ExpressionStatementNode | |
@line=3, @value=#<FunctionCallNode | |
@arguments=#<ArgumentsNode | |
@line=3, @value=[ | |
#<AddNode | |
@left=#<AddNode | |
@left=#<ResolveNode | |
@line=4, @value="hello">, | |
@value=#<StringNode | |
@line=4, @value="\" \"">>, | |
@value=#<ResolveNode | |
@line=4, @value="world">> | |
]>, | |
@value=#<DotAccessorNode | |
@accessor="log", @line=3, @value=#<ResolveNode | |
@line=3, @value="console">>>> | |
]>>>, | |
#<ExpressionStatementNode | |
@line=5, @value=#<FunctionCallNode | |
@arguments=#<ArgumentsNode | |
@line=5, @value=[]>, | |
@value=#<ResolveNode | |
@line=5, @value="hello">>> | |
]> | |
# Compilation | |
1. Allocate space A to store the program, Ax to store program metadata | |
2. Allocate space B to store a function, Bx to store function metadata | |
3. Create a reference in Ax :hello => B | |
4. Write function preamble to B (This also initiates Bx each time) | |
5. Write (Create a reference in Bx :hello => "Hello") to B | |
6. Write (Create a reference in Ax :world => "World") to B | |
7. Write (Query metadata for console, save to temp1) to B | |
8. Write (Query temp1 for log, save to temp2) to B | |
9. Write (Query metadata for :hello (Bx[:hello]), save to temp3) to B | |
10. Write(Push temp3 onto stack) to B | |
11. Write(Push " " onto stack) to B | |
12. Write (Query metadata for :world (Ax[:world]), save to temp4) to B | |
13. Write (Push temp4 onto stack) to B | |
14. Write (Call temp2) to B | |
15. Write function cleanup to B | |
16. Write (Query metadata for :hello (Ax[:hello]), save to temp5) to A | |
17. Write (Call temp5) to A | |
## Example Disassembly | |
# Source Code | |
#include "stdio.h" | |
char *foo () { | |
return "Hello World"; | |
} | |
int main () { | |
printf(foo()); | |
} | |
## Compiled Object | |
# Section Dump | |
a.o: file format elf64-x86-64 | |
Contents of section .text: | |
0000 554889e5 b8000000 005dc355 4889e5b8 UH.......].UH... | |
0010 00000000 e8000000 004889c7 b8000000 .........H...... | |
0020 00e80000 00005dc3 ......]. | |
Contents of section .rodata: | |
0000 48656c6c 6f20576f 726c6400 Hello World. | |
Contents of section .comment: | |
0000 00474343 3a202847 4e552920 342e382e .GCC: (GNU) 4.8. | |
0010 30203230 31333034 31312028 70726572 0 20130411 (prer | |
0020 656c6561 73652900 elease). | |
Contents of section .eh_frame: | |
0000 14000000 00000000 017a5200 01781001 .........zR..x.. | |
0010 1b0c0708 90010000 1c000000 1c000000 ................ | |
0020 00000000 0b000000 00410e10 8602430d .........A....C. | |
0030 06460c07 08000000 1c000000 3c000000 .F..........<... | |
0040 00000000 1d000000 00410e10 8602430d .........A....C. | |
0050 06580c07 08000000 .X...... | |
# Decompilation | |
Disassembly of section .text: | |
0000000000000000 <foo>: | |
0: 55 push %rbp | |
1: 48 89 e5 mov %rsp,%rbp | |
4: b8 00 00 00 00 mov $0x0,%eax | |
9: 5d pop %rbp | |
a: c3 retq | |
000000000000000b <main>: | |
b: 55 push %rbp | |
c: 48 89 e5 mov %rsp,%rbp | |
f: b8 00 00 00 00 mov $0x0,%eax | |
14: e8 00 00 00 00 callq 19 <main+0xe> | |
19: 48 89 c7 mov %rax,%rdi | |
1c: b8 00 00 00 00 mov $0x0,%eax | |
21: e8 00 00 00 00 callq 26 <main+0x1b> | |
26: 5d pop %rbp | |
27: c3 retq | |
# Relocation Dump (Used to set execution values in linker) | |
RELOCATION RECORDS FOR [.text]: | |
OFFSET TYPE VALUE | |
0000000000000005 R_X86_64_32 .rodata | |
0000000000000015 R_X86_64_PC32 foo-0x0000000000000004 | |
0000000000000022 R_X86_64_PC32 printf-0x0000000000000004 | |
RELOCATION RECORDS FOR [.eh_frame]: | |
OFFSET TYPE VALUE | |
0000000000000020 R_X86_64_PC32 .text | |
0000000000000040 R_X86_64_PC32 .text+0x000000000000000b | |
# Optimized Object | |
Disassembly of section .text: | |
0000000000000000 <foo>: | |
0: b8 00 00 00 00 mov $0x0,%eax | |
5: c3 retq | |
Disassembly of section .text.startup: | |
0000000000000000 <main>: | |
0: bf 00 00 00 00 mov $0x0,%edi | |
5: 31 c0 xor %eax,%eax | |
7: e9 00 00 00 00 jmpq c <main+0xc> | |
# Optimized Reolcation Data | |
RELOCATION RECORDS FOR [.text]: | |
OFFSET TYPE VALUE | |
0000000000000001 R_X86_64_32 .rodata.str1.1 | |
RELOCATION RECORDS FOR [.text.startup]: | |
OFFSET TYPE VALUE | |
0000000000000001 R_X86_64_32 .rodata.str1.1 | |
0000000000000008 R_X86_64_PC32 printf-0x0000000000000004 | |
RELOCATION RECORDS FOR [.eh_frame]: | |
OFFSET TYPE VALUE | |
0000000000000020 R_X86_64_PC32 .text | |
0000000000000034 R_X86_64_PC32 .text.startup | |
# Symbol Table | |
SYMBOL TABLE: | |
0000000000000000 l df *ABS* 0000000000000000 a.c | |
0000000000000000 l d .text 0000000000000000 .text | |
0000000000000000 l d .data 0000000000000000 .data | |
0000000000000000 l d .bss 0000000000000000 .bss | |
0000000000000000 l d .rodata 0000000000000000 .rodata | |
0000000000000000 l d .note.GNU-stack 0000000000000000 .note.GNU-stack | |
0000000000000000 l d .eh_frame 0000000000000000 .eh_frame | |
0000000000000000 l d .comment 0000000000000000 .comment | |
0000000000000000 g F .text 000000000000000b foo | |
000000000000000b g F .text 000000000000001d main | |
0000000000000000 *UND* 0000000000000000 printf | |
# Linked Object | |
0000000000400500 <foo>: | |
400500: 55 push %rbp | |
400501: 48 89 e5 mov %rsp,%rbp | |
400504: b8 b4 05 40 00 mov $0x4005b4,%eax | |
400509: 5d pop %rbp | |
40050a: c3 retq | |
000000000040050b <main>: | |
40050b: 55 push %rbp | |
40050c: 48 89 e5 mov %rsp,%rbp | |
40050f: b8 00 00 00 00 mov $0x0,%eax | |
400514: e8 e7 ff ff ff callq 400500 <foo> | |
400519: 48 89 c7 mov %rax,%rdi | |
40051c: b8 00 00 00 00 mov $0x0,%eax | |
400521: e8 ba fe ff ff callq 4003e0 <printf@plt> | |
400526: 5d pop %rbp | |
400527: c3 retq | |
400528: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1) | |
40052f: 00 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment