Created
November 7, 2024 13:30
-
-
Save georgemorgan/65aa08cb872e3dabff23a87a63581411 to your computer and use it in GitHub Desktop.
Annotated hotspots for the HVM3 optimization challenge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-------------------------------------------------------------------------------- | |
-- Metadata | |
-------------------------------------------------------------------------------- | |
Invocation: /usr/bin/cg_annotate --auto=yes cachegrind.out.1943 | |
Command: ./hvm3 | |
Events recorded: Ir | |
Events shown: Ir | |
Event sort order: Ir | |
Threshold: 0.1% | |
Annotation: on | |
-------------------------------------------------------------------------------- | |
-- Summary | |
-------------------------------------------------------------------------------- | |
Ir_____________________ | |
12,532,702,431 (100.0%) PROGRAM TOTALS | |
-------------------------------------------------------------------------------- | |
-- File:function summary | |
-------------------------------------------------------------------------------- | |
Ir____________________________ file:function | |
< 10,217,333,009 (81.5%, 81.5%) /home/george/HVML.c: | |
10,217,332,125 (81.5%) reduce | |
< 2,315,257,429 (18.5%, 100.0%) ???: | |
2,315,256,936 (18.5%) __aarch64_ldadd8_relax | |
-------------------------------------------------------------------------------- | |
-- Function:file summary | |
-------------------------------------------------------------------------------- | |
Ir____________________________ function:file | |
> 10,217,332,125 (81.5%, 81.5%) reduce:/home/george/HVML.c | |
> 2,315,256,936 (18.5%, 100.0%) __aarch64_ldadd8_relax:??? | |
-------------------------------------------------------------------------------- | |
-- Annotated source file: /home/george/HVML.c | |
-------------------------------------------------------------------------------- | |
Ir___________________ | |
570,426,090 (4.6%) <unknown (line 0)> | |
-- line 41 ---------------------------------------- | |
. #define SUB 0x07 | |
. | |
. #define VOID 0x00000000000000 | |
. | |
. // Initialization | |
. // -------------- | |
. | |
. Heap* new_heap() { | |
3 (0.0%) Heap* heap = malloc(sizeof(Heap)); | |
3 (0.0%) heap->stk = malloc((1ULL << 32) * sizeof(Term)); | |
4 (0.0%) heap->mem = malloc((1ULL << 32) * sizeof(ATerm)); | |
4 (0.0%) heap->ini = malloc(sizeof(a64)); | |
4 (0.0%) heap->end = malloc(sizeof(a64)); | |
4 (0.0%) heap->itr = malloc(sizeof(a64)); | |
3 (0.0%) atomic_store_explicit(heap->ini, 0, memory_order_relaxed); | |
2 (0.0%) atomic_store_explicit(heap->end, 1, memory_order_relaxed); | |
1 (0.0%) atomic_store_explicit(heap->itr, 0, memory_order_relaxed); | |
. return heap; | |
. } | |
. | |
. Term new_term(Tag tag, Lab lab, Loc loc) { | |
. Term tag_enc = tag; | |
. Term lab_enc = ((Term)lab) << 8; | |
67,108,886 (0.5%) Term loc_enc = ((Term)loc) << 32; | |
469,762,254 (3.7%) return tag_enc | lab_enc | loc_enc; | |
. } | |
. | |
. Tag get_tag(Term x) { | |
117,440,608 (0.9%) return x & 0xFF; | |
. } | |
. | |
. Lab get_lab(Term x) { | |
. return (x >> 8) & 0xFFFFFF; | |
. } | |
. | |
. Loc get_loc(Term x) { | |
436,208,026 (3.5%) return (x >> 32) & 0xFFFFFFFF; | |
. } | |
. | |
. Loc get_key(Term term) { | |
. switch (get_tag(term)) { | |
. case VAR: return get_loc(term) + 0; | |
. case DP0: return get_loc(term) + 0; | |
50,331,693 (0.4%) case DP1: return get_loc(term) + 1; | |
. default: return 0; | |
. } | |
. } | |
. | |
. Loc get_ini(Heap* heap) { | |
. return atomic_load_explicit(heap->ini, memory_order_relaxed); | |
. } | |
. | |
. Loc get_end(Heap* heap) { | |
2 (0.0%) return atomic_load_explicit(heap->end, memory_order_relaxed); | |
. } | |
. | |
. Loc get_itr(Heap* heap) { | |
4 (0.0%) return atomic_load_explicit(heap->itr, memory_order_relaxed); | |
. } | |
. | |
. void set_ini(Heap* heap, Loc value) { | |
2 (0.0%) atomic_store_explicit(heap->ini, value, memory_order_relaxed); | |
. } | |
. | |
. void set_end(Heap* heap, Loc value) { | |
1 (0.0%) atomic_store_explicit(heap->end, value, memory_order_relaxed); | |
. } | |
. | |
. void set_itr(Heap* heap, Loc value) { | |
2 (0.0%) atomic_store_explicit(heap->itr, value, memory_order_relaxed); | |
. } | |
. | |
. // Memory | |
. // ------ | |
. | |
. Term swap(Heap* heap, Loc loc, Term term) { | |
. return atomic_exchange_explicit(&heap->mem[loc], term, memory_order_relaxed); | |
. } | |
. | |
. Term got(Heap* heap, Loc loc) { | |
805,307,112 (6.4%) return atomic_load_explicit(&heap->mem[loc], memory_order_relaxed); | |
. } | |
. | |
. void set(Heap* heap, Loc loc, Term term) { | |
1,627,391,237 (13.0%) atomic_store_explicit(&heap->mem[loc], term, memory_order_relaxed); | |
. } | |
. | |
. Term take(Heap* heap, Loc loc) { | |
. return swap(heap, loc, VOID); | |
. } | |
. | |
. // Allocation | |
. // ---------- | |
. | |
. Loc alloc_node(Heap* heap, Loc arity) { | |
1,006,633,290 (8.0%) return atomic_fetch_add_explicit(heap->end, arity, memory_order_relaxed); | |
. } | |
. | |
. Loc inc_itr(Heap* heap) { | |
352,321,824 (2.8%) return atomic_fetch_add_explicit(heap->itr, 1, memory_order_relaxed); | |
. } | |
. | |
. // Stringification | |
. // --------------- | |
. | |
. void print_tag(Tag tag) { | |
. switch (tag) { | |
. case SUB: printf("SUB"); break; | |
-- line 148 ---------------------------------------- | |
-- line 189 ---------------------------------------- | |
. // (λx(body) a) | |
. // ------------ APP_LAM | |
. // x <- a | |
. // body | |
. Term reduce_app_lam(Heap* heap, Term app, Term lam) { | |
. inc_itr(heap); | |
. Loc app_loc = get_loc(app); | |
. Loc lam_loc = get_loc(lam); | |
33,554,482 (0.3%) Term arg = got(heap, app_loc + 1); | |
33,554,482 (0.3%) Term bod = got(heap, lam_loc + 1); | |
. set(heap, lam_loc + 0, arg); | |
. return bod; | |
. } | |
. | |
. // ({a b} c) | |
. // --------------- APP_SUP | |
. // & {x0 x1} = c | |
. // {(a x0) (b x1)} | |
. Term reduce_app_sup(Heap* heap, Term app, Term sup) { | |
. inc_itr(heap); | |
. Loc app_loc = get_loc(app); | |
. Loc sup_loc = get_loc(sup); | |
33,554,430 (0.3%) Term arg = got(heap, app_loc + 1); | |
. Term tm0 = got(heap, sup_loc + 0); | |
33,554,430 (0.3%) Term tm1 = got(heap, sup_loc + 1); | |
. Loc du0 = alloc_node(heap, 3); | |
. Loc su0 = alloc_node(heap, 2); | |
. Loc ap0 = alloc_node(heap, 2); | |
. Loc ap1 = alloc_node(heap, 2); | |
. set(heap, du0 + 0, new_term(SUB, 0, 0)); | |
33,554,430 (0.3%) set(heap, du0 + 1, new_term(SUB, 0, 0)); | |
33,554,430 (0.3%) set(heap, du0 + 2, arg); | |
. set(heap, ap0 + 0, tm0); | |
33,554,430 (0.3%) set(heap, ap0 + 1, new_term(DP0, 0, du0)); | |
. set(heap, ap1 + 0, tm1); | |
33,554,430 (0.3%) set(heap, ap1 + 1, new_term(DP1, 0, du0)); | |
. set(heap, su0 + 0, new_term(APP, 0, ap0)); | |
33,554,430 (0.3%) set(heap, su0 + 1, new_term(APP, 0, ap1)); | |
. return new_term(SUP, 0, su0); | |
. } | |
. | |
. // & {x y} = * | |
. // ----------- DUP_ERA | |
. // x <- * | |
. // y <- * | |
. Term reduce_dup_era(Heap* heap, Term dup, Term era) { | |
-- line 234 ---------------------------------------- | |
-- line 246 ---------------------------------------- | |
. // r <- λx0(f0) | |
. // s <- λx1(f1) | |
. // x <- {x0 x1} | |
. Term reduce_dup_lam(Heap* heap, Term dup, Term lam) { | |
. inc_itr(heap); | |
. Loc dup_loc = get_loc(dup); | |
. Tag dup_num = get_tag(dup) == DP0 ? 0 : 1; | |
. Loc lam_loc = get_loc(lam); | |
33,554,456 (0.3%) Term bod = got(heap, lam_loc + 1); | |
. Loc du0 = alloc_node(heap, 3); | |
. Loc lm0 = alloc_node(heap, 2); | |
. Loc lm1 = alloc_node(heap, 2); | |
. Loc su0 = alloc_node(heap, 2); | |
. set(heap, du0 + 0, new_term(SUB, 0, 0)); | |
33,554,456 (0.3%) set(heap, du0 + 1, new_term(SUB, 0, 0)); | |
33,554,456 (0.3%) set(heap, du0 + 2, bod); | |
. set(heap, lm0 + 0, new_term(SUB, 0, 0)); | |
67,108,912 (0.5%) set(heap, lm0 + 1, new_term(DP0, 0, du0)); | |
. set(heap, lm1 + 0, new_term(SUB, 0, 0)); | |
33,554,456 (0.3%) set(heap, lm1 + 1, new_term(DP1, 0, du0)); | |
. set(heap, su0 + 0, new_term(VAR, 0, lm0)); | |
33,554,456 (0.3%) set(heap, su0 + 1, new_term(VAR, 0, lm1)); | |
. set(heap, dup_loc + 0, new_term(LAM, 0, lm0)); | |
33,554,456 (0.3%) set(heap, dup_loc + 1, new_term(LAM, 0, lm1)); | |
. set(heap, lam_loc + 0, new_term(SUP, 0, su0)); | |
. return got(heap, dup_loc + dup_num); | |
. } | |
. | |
. // & {x y} = {a b} | |
. // --------------- DUP_SUP | |
. // x <- a | |
. // y <- b | |
. Term reduce_dup_sup(Heap* heap, Term dup, Term sup) { | |
. inc_itr(heap); | |
. Loc dup_loc = get_loc(dup); | |
. Tag dup_num = get_tag(dup) == DP0 ? 0 : 1; | |
. Loc sup_loc = get_loc(sup); | |
. Term tm0 = got(heap, sup_loc + 0); | |
16,777,240 (0.1%) Term tm1 = got(heap, sup_loc + 1); | |
. set(heap, dup_loc + 0, tm0); | |
16,777,240 (0.1%) set(heap, dup_loc + 1, tm1); | |
. return got(heap, dup_loc + dup_num); | |
. } | |
. | |
21 (0.0%) Term reduce(Heap* heap, Term term) { | |
3 (0.0%) Term* path = heap->stk; | |
. Loc spos = 0; | |
318,767,415 (2.5%) Term next = term; | |
. while (1) { | |
. Tag tag = get_tag(next); | |
. Lab lab = get_lab(next); | |
. Loc loc = get_loc(next); | |
1,962,936,119 (15.7%) switch (tag) { | |
. case APP: { | |
134,217,824 (1.1%) path[spos++] = next; | |
. next = got(heap, loc + 0); | |
. continue; | |
. } | |
. case DP0: | |
. case DP1: { | |
. Loc key = get_key(next); | |
. Term sub = got(heap, key); | |
301,990,158 (2.4%) if (get_tag(sub) == SUB) { | |
100,663,392 (0.8%) path[spos++] = next; | |
50,331,696 (0.4%) next = got(heap, loc + 2); | |
. continue; | |
. } else { | |
. next = sub; | |
. continue; | |
. } | |
. } | |
. case VAR: { | |
. Loc key = get_key(next); | |
. Term sub = got(heap, key); | |
33,554,507 (0.3%) if (get_tag(sub) == SUB) { | |
. break; | |
. } else { | |
. next = sub; | |
. continue; | |
. } | |
. } | |
. default: { | |
117,440,610 (0.9%) if (spos == 0) { | |
. break; | |
. } else { | |
234,881,216 (1.9%) Term prev = path[--spos]; | |
. Tag ptag = get_tag(prev); | |
. Lab plab = get_lab(prev); | |
. Loc ploc = get_loc(prev); | |
369,099,040 (2.9%) switch (ptag) { | |
. case APP: { | |
268,435,700 (2.1%) switch (tag) { | |
. case ERA: next = reduce_app_era(heap, prev, next); continue; | |
. case LAM: next = reduce_app_lam(heap, prev, next); continue; | |
. case SUP: next = reduce_app_sup(heap, prev, next); continue; | |
. default: break; | |
. } | |
. break; | |
. } | |
. case DP0: | |
. case DP1: { | |
218,104,000 (1.7%) switch (tag) { | |
. case ERA: next = reduce_dup_era(heap, prev, next); continue; | |
. case LAM: next = reduce_dup_lam(heap, prev, next); continue; | |
. case SUP: next = reduce_dup_sup(heap, prev, next); continue; | |
. default: break; | |
. } | |
. break; | |
. } | |
. default: break; | |
. } | |
. break; | |
. } | |
. } | |
. } | |
1 (0.0%) if (spos == 0) { | |
. return next; | |
. } else { | |
. Term host = path[--spos]; | |
. Tag htag = get_tag(host); | |
. Lab hlab = get_lab(host); | |
. Loc hloc = get_loc(host); | |
. switch (htag) { | |
. case APP: { | |
-- line 369 ---------------------------------------- | |
-- line 377 ---------------------------------------- | |
. } | |
. default: { | |
. return 0; | |
. } | |
. } | |
. } | |
. } | |
. return 0; | |
24 (0.0%) } | |
. | |
18 (0.0%) Term normal(Heap* heap, Term term) { | |
6 (0.0%) Term wnf = reduce(heap, term); | |
. Tag tag = get_tag(wnf); | |
. Lab lab = get_lab(wnf); | |
. Loc loc = get_loc(wnf); | |
20 (0.0%) switch (tag) { | |
. case APP: { | |
. Term fun; | |
. Term arg; | |
. fun = got(heap, loc + 0); | |
. fun = normal(heap, fun); | |
. arg = got(heap, loc + 1); | |
. arg = normal(heap, arg); | |
. set(heap, loc + 0, fun); | |
. set(heap, loc + 1, arg); | |
. return wnf; | |
. } | |
. case LAM: { | |
. Term bod; | |
2 (0.0%) bod = got(heap, loc + 1); | |
. bod = normal(heap, bod); | |
. set(heap, loc + 1, bod); | |
. return wnf; | |
. } | |
. case SUP: { | |
. Term tm0; | |
. Term tm1; | |
. tm0 = got(heap, loc + 0); | |
-- line 414 ---------------------------------------- | |
-- line 431 ---------------------------------------- | |
. val = got(heap, loc + 2); | |
. val = normal(heap, val); | |
. set(heap, loc + 2, val); | |
. return wnf; | |
. } | |
. default: | |
. return wnf; | |
. } | |
18 (0.0%) } | |
. | |
. // Main | |
. // ---- | |
. | |
. static void inject_P24(Heap* heap) { | |
. set_ini(heap, 0x000000000); | |
. set_end(heap, 0x0000000f1); | |
. set_itr(heap, 0x000000000); | |
-- line 447 ---------------------------------------- | |
-- line 683 ---------------------------------------- | |
. set(heap, 0x0000000eb, new_term(SUB,0x000000,0x000000000)); | |
. set(heap, 0x0000000ec, new_term(VAR,0x000000,0x0000000e9)); | |
. set(heap, 0x0000000ed, new_term(SUB,0x000000,0x000000000)); | |
. set(heap, 0x0000000ee, new_term(LAM,0x000000,0x0000000ef)); | |
. set(heap, 0x0000000ef, new_term(SUB,0x000000,0x000000000)); | |
. set(heap, 0x0000000f0, new_term(VAR,0x000000,0x0000000ed)); | |
. } | |
. | |
6 (0.0%) int main() { | |
. Heap* heap = new_heap(); | |
. inject_P24(heap); | |
. | |
2 (0.0%) clock_t start = clock(); | |
. | |
. // Normalize and get interaction count | |
. Term root = got(heap, 0); | |
2 (0.0%) normal(heap, root); | |
2 (0.0%) clock_t end = clock(); | |
. | |
3 (0.0%) printf("Itrs: %u\n", get_itr(heap)); | |
10 (0.0%) double time_spent = (double)(end - start) / CLOCKS_PER_SEC * 1000; | |
3 (0.0%) printf("Size: %u nodes\n", get_end(heap)); | |
5 (0.0%) printf("Time: %.2f seconds\n", time_spent / 1000.0); | |
6 (0.0%) printf("MIPS: %.2f\n", (get_itr(heap) / 1000000.0) / (time_spent / 1000.0)); | |
. | |
2 (0.0%) free(heap->stk); | |
2 (0.0%) free(heap->mem); | |
2 (0.0%) free(heap->ini); | |
2 (0.0%) free(heap->end); | |
2 (0.0%) free(heap->itr); | |
2 (0.0%) free(heap); | |
7 (0.0%) return 0; | |
. } | |
. | |
-------------------------------------------------------------------------------- | |
-- Annotation summary | |
-------------------------------------------------------------------------------- | |
Ir___________________ | |
9,646,906,919 (77.0%) annotated: files known & above threshold & readable, line numbers known | |
570,426,090 (4.6%) annotated: files known & above threshold & readable, line numbers unknown | |
0 unannotated: files known & above threshold & two or more non-identical | |
0 unannotated: files known & above threshold & unreadable | |
111,993 (0.0%) unannotated: files known & below threshold | |
2,315,257,429 (18.5%) unannotated: files unknown | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment