Skip to content

Instantly share code, notes, and snippets.

@georgemorgan
Created November 7, 2024 13:30
Show Gist options
  • Save georgemorgan/65aa08cb872e3dabff23a87a63581411 to your computer and use it in GitHub Desktop.
Save georgemorgan/65aa08cb872e3dabff23a87a63581411 to your computer and use it in GitHub Desktop.
Annotated hotspots for the HVM3 optimization challenge
--------------------------------------------------------------------------------
-- Metadata
--------------------------------------------------------------------------------
Invocation: /usr/bin/cg_annotate --auto=yes cachegrind.out.1943
Command: ./hvm3
Events recorded: Ir
Events shown: Ir
Event sort order: Ir
Threshold: 0.1%
Annotation: on
--------------------------------------------------------------------------------
-- Summary
--------------------------------------------------------------------------------
Ir_____________________
12,532,702,431 (100.0%) PROGRAM TOTALS
--------------------------------------------------------------------------------
-- File:function summary
--------------------------------------------------------------------------------
Ir____________________________ file:function
< 10,217,333,009 (81.5%, 81.5%) /home/george/HVML.c:
10,217,332,125 (81.5%) reduce
< 2,315,257,429 (18.5%, 100.0%) ???:
2,315,256,936 (18.5%) __aarch64_ldadd8_relax
--------------------------------------------------------------------------------
-- Function:file summary
--------------------------------------------------------------------------------
Ir____________________________ function:file
> 10,217,332,125 (81.5%, 81.5%) reduce:/home/george/HVML.c
> 2,315,256,936 (18.5%, 100.0%) __aarch64_ldadd8_relax:???
--------------------------------------------------------------------------------
-- Annotated source file: /home/george/HVML.c
--------------------------------------------------------------------------------
Ir___________________
570,426,090 (4.6%) <unknown (line 0)>
-- line 41 ----------------------------------------
. #define SUB 0x07
.
. #define VOID 0x00000000000000
.
. // Initialization
. // --------------
.
. Heap* new_heap() {
3 (0.0%) Heap* heap = malloc(sizeof(Heap));
3 (0.0%) heap->stk = malloc((1ULL << 32) * sizeof(Term));
4 (0.0%) heap->mem = malloc((1ULL << 32) * sizeof(ATerm));
4 (0.0%) heap->ini = malloc(sizeof(a64));
4 (0.0%) heap->end = malloc(sizeof(a64));
4 (0.0%) heap->itr = malloc(sizeof(a64));
3 (0.0%) atomic_store_explicit(heap->ini, 0, memory_order_relaxed);
2 (0.0%) atomic_store_explicit(heap->end, 1, memory_order_relaxed);
1 (0.0%) atomic_store_explicit(heap->itr, 0, memory_order_relaxed);
. return heap;
. }
.
. Term new_term(Tag tag, Lab lab, Loc loc) {
. Term tag_enc = tag;
. Term lab_enc = ((Term)lab) << 8;
67,108,886 (0.5%) Term loc_enc = ((Term)loc) << 32;
469,762,254 (3.7%) return tag_enc | lab_enc | loc_enc;
. }
.
. Tag get_tag(Term x) {
117,440,608 (0.9%) return x & 0xFF;
. }
.
. Lab get_lab(Term x) {
. return (x >> 8) & 0xFFFFFF;
. }
.
. Loc get_loc(Term x) {
436,208,026 (3.5%) return (x >> 32) & 0xFFFFFFFF;
. }
.
. Loc get_key(Term term) {
. switch (get_tag(term)) {
. case VAR: return get_loc(term) + 0;
. case DP0: return get_loc(term) + 0;
50,331,693 (0.4%) case DP1: return get_loc(term) + 1;
. default: return 0;
. }
. }
.
. Loc get_ini(Heap* heap) {
. return atomic_load_explicit(heap->ini, memory_order_relaxed);
. }
.
. Loc get_end(Heap* heap) {
2 (0.0%) return atomic_load_explicit(heap->end, memory_order_relaxed);
. }
.
. Loc get_itr(Heap* heap) {
4 (0.0%) return atomic_load_explicit(heap->itr, memory_order_relaxed);
. }
.
. void set_ini(Heap* heap, Loc value) {
2 (0.0%) atomic_store_explicit(heap->ini, value, memory_order_relaxed);
. }
.
. void set_end(Heap* heap, Loc value) {
1 (0.0%) atomic_store_explicit(heap->end, value, memory_order_relaxed);
. }
.
. void set_itr(Heap* heap, Loc value) {
2 (0.0%) atomic_store_explicit(heap->itr, value, memory_order_relaxed);
. }
.
. // Memory
. // ------
.
. Term swap(Heap* heap, Loc loc, Term term) {
. return atomic_exchange_explicit(&heap->mem[loc], term, memory_order_relaxed);
. }
.
. Term got(Heap* heap, Loc loc) {
805,307,112 (6.4%) return atomic_load_explicit(&heap->mem[loc], memory_order_relaxed);
. }
.
. void set(Heap* heap, Loc loc, Term term) {
1,627,391,237 (13.0%) atomic_store_explicit(&heap->mem[loc], term, memory_order_relaxed);
. }
.
. Term take(Heap* heap, Loc loc) {
. return swap(heap, loc, VOID);
. }
.
. // Allocation
. // ----------
.
. Loc alloc_node(Heap* heap, Loc arity) {
1,006,633,290 (8.0%) return atomic_fetch_add_explicit(heap->end, arity, memory_order_relaxed);
. }
.
. Loc inc_itr(Heap* heap) {
352,321,824 (2.8%) return atomic_fetch_add_explicit(heap->itr, 1, memory_order_relaxed);
. }
.
. // Stringification
. // ---------------
.
. void print_tag(Tag tag) {
. switch (tag) {
. case SUB: printf("SUB"); break;
-- line 148 ----------------------------------------
-- line 189 ----------------------------------------
. // (λx(body) a)
. // ------------ APP_LAM
. // x <- a
. // body
. Term reduce_app_lam(Heap* heap, Term app, Term lam) {
. inc_itr(heap);
. Loc app_loc = get_loc(app);
. Loc lam_loc = get_loc(lam);
33,554,482 (0.3%) Term arg = got(heap, app_loc + 1);
33,554,482 (0.3%) Term bod = got(heap, lam_loc + 1);
. set(heap, lam_loc + 0, arg);
. return bod;
. }
.
. // ({a b} c)
. // --------------- APP_SUP
. // & {x0 x1} = c
. // {(a x0) (b x1)}
. Term reduce_app_sup(Heap* heap, Term app, Term sup) {
. inc_itr(heap);
. Loc app_loc = get_loc(app);
. Loc sup_loc = get_loc(sup);
33,554,430 (0.3%) Term arg = got(heap, app_loc + 1);
. Term tm0 = got(heap, sup_loc + 0);
33,554,430 (0.3%) Term tm1 = got(heap, sup_loc + 1);
. Loc du0 = alloc_node(heap, 3);
. Loc su0 = alloc_node(heap, 2);
. Loc ap0 = alloc_node(heap, 2);
. Loc ap1 = alloc_node(heap, 2);
. set(heap, du0 + 0, new_term(SUB, 0, 0));
33,554,430 (0.3%) set(heap, du0 + 1, new_term(SUB, 0, 0));
33,554,430 (0.3%) set(heap, du0 + 2, arg);
. set(heap, ap0 + 0, tm0);
33,554,430 (0.3%) set(heap, ap0 + 1, new_term(DP0, 0, du0));
. set(heap, ap1 + 0, tm1);
33,554,430 (0.3%) set(heap, ap1 + 1, new_term(DP1, 0, du0));
. set(heap, su0 + 0, new_term(APP, 0, ap0));
33,554,430 (0.3%) set(heap, su0 + 1, new_term(APP, 0, ap1));
. return new_term(SUP, 0, su0);
. }
.
. // & {x y} = *
. // ----------- DUP_ERA
. // x <- *
. // y <- *
. Term reduce_dup_era(Heap* heap, Term dup, Term era) {
-- line 234 ----------------------------------------
-- line 246 ----------------------------------------
. // r <- λx0(f0)
. // s <- λx1(f1)
. // x <- {x0 x1}
. Term reduce_dup_lam(Heap* heap, Term dup, Term lam) {
. inc_itr(heap);
. Loc dup_loc = get_loc(dup);
. Tag dup_num = get_tag(dup) == DP0 ? 0 : 1;
. Loc lam_loc = get_loc(lam);
33,554,456 (0.3%) Term bod = got(heap, lam_loc + 1);
. Loc du0 = alloc_node(heap, 3);
. Loc lm0 = alloc_node(heap, 2);
. Loc lm1 = alloc_node(heap, 2);
. Loc su0 = alloc_node(heap, 2);
. set(heap, du0 + 0, new_term(SUB, 0, 0));
33,554,456 (0.3%) set(heap, du0 + 1, new_term(SUB, 0, 0));
33,554,456 (0.3%) set(heap, du0 + 2, bod);
. set(heap, lm0 + 0, new_term(SUB, 0, 0));
67,108,912 (0.5%) set(heap, lm0 + 1, new_term(DP0, 0, du0));
. set(heap, lm1 + 0, new_term(SUB, 0, 0));
33,554,456 (0.3%) set(heap, lm1 + 1, new_term(DP1, 0, du0));
. set(heap, su0 + 0, new_term(VAR, 0, lm0));
33,554,456 (0.3%) set(heap, su0 + 1, new_term(VAR, 0, lm1));
. set(heap, dup_loc + 0, new_term(LAM, 0, lm0));
33,554,456 (0.3%) set(heap, dup_loc + 1, new_term(LAM, 0, lm1));
. set(heap, lam_loc + 0, new_term(SUP, 0, su0));
. return got(heap, dup_loc + dup_num);
. }
.
. // & {x y} = {a b}
. // --------------- DUP_SUP
. // x <- a
. // y <- b
. Term reduce_dup_sup(Heap* heap, Term dup, Term sup) {
. inc_itr(heap);
. Loc dup_loc = get_loc(dup);
. Tag dup_num = get_tag(dup) == DP0 ? 0 : 1;
. Loc sup_loc = get_loc(sup);
. Term tm0 = got(heap, sup_loc + 0);
16,777,240 (0.1%) Term tm1 = got(heap, sup_loc + 1);
. set(heap, dup_loc + 0, tm0);
16,777,240 (0.1%) set(heap, dup_loc + 1, tm1);
. return got(heap, dup_loc + dup_num);
. }
.
21 (0.0%) Term reduce(Heap* heap, Term term) {
3 (0.0%) Term* path = heap->stk;
. Loc spos = 0;
318,767,415 (2.5%) Term next = term;
. while (1) {
. Tag tag = get_tag(next);
. Lab lab = get_lab(next);
. Loc loc = get_loc(next);
1,962,936,119 (15.7%) switch (tag) {
. case APP: {
134,217,824 (1.1%) path[spos++] = next;
. next = got(heap, loc + 0);
. continue;
. }
. case DP0:
. case DP1: {
. Loc key = get_key(next);
. Term sub = got(heap, key);
301,990,158 (2.4%) if (get_tag(sub) == SUB) {
100,663,392 (0.8%) path[spos++] = next;
50,331,696 (0.4%) next = got(heap, loc + 2);
. continue;
. } else {
. next = sub;
. continue;
. }
. }
. case VAR: {
. Loc key = get_key(next);
. Term sub = got(heap, key);
33,554,507 (0.3%) if (get_tag(sub) == SUB) {
. break;
. } else {
. next = sub;
. continue;
. }
. }
. default: {
117,440,610 (0.9%) if (spos == 0) {
. break;
. } else {
234,881,216 (1.9%) Term prev = path[--spos];
. Tag ptag = get_tag(prev);
. Lab plab = get_lab(prev);
. Loc ploc = get_loc(prev);
369,099,040 (2.9%) switch (ptag) {
. case APP: {
268,435,700 (2.1%) switch (tag) {
. case ERA: next = reduce_app_era(heap, prev, next); continue;
. case LAM: next = reduce_app_lam(heap, prev, next); continue;
. case SUP: next = reduce_app_sup(heap, prev, next); continue;
. default: break;
. }
. break;
. }
. case DP0:
. case DP1: {
218,104,000 (1.7%) switch (tag) {
. case ERA: next = reduce_dup_era(heap, prev, next); continue;
. case LAM: next = reduce_dup_lam(heap, prev, next); continue;
. case SUP: next = reduce_dup_sup(heap, prev, next); continue;
. default: break;
. }
. break;
. }
. default: break;
. }
. break;
. }
. }
. }
1 (0.0%) if (spos == 0) {
. return next;
. } else {
. Term host = path[--spos];
. Tag htag = get_tag(host);
. Lab hlab = get_lab(host);
. Loc hloc = get_loc(host);
. switch (htag) {
. case APP: {
-- line 369 ----------------------------------------
-- line 377 ----------------------------------------
. }
. default: {
. return 0;
. }
. }
. }
. }
. return 0;
24 (0.0%) }
.
18 (0.0%) Term normal(Heap* heap, Term term) {
6 (0.0%) Term wnf = reduce(heap, term);
. Tag tag = get_tag(wnf);
. Lab lab = get_lab(wnf);
. Loc loc = get_loc(wnf);
20 (0.0%) switch (tag) {
. case APP: {
. Term fun;
. Term arg;
. fun = got(heap, loc + 0);
. fun = normal(heap, fun);
. arg = got(heap, loc + 1);
. arg = normal(heap, arg);
. set(heap, loc + 0, fun);
. set(heap, loc + 1, arg);
. return wnf;
. }
. case LAM: {
. Term bod;
2 (0.0%) bod = got(heap, loc + 1);
. bod = normal(heap, bod);
. set(heap, loc + 1, bod);
. return wnf;
. }
. case SUP: {
. Term tm0;
. Term tm1;
. tm0 = got(heap, loc + 0);
-- line 414 ----------------------------------------
-- line 431 ----------------------------------------
. val = got(heap, loc + 2);
. val = normal(heap, val);
. set(heap, loc + 2, val);
. return wnf;
. }
. default:
. return wnf;
. }
18 (0.0%) }
.
. // Main
. // ----
.
. static void inject_P24(Heap* heap) {
. set_ini(heap, 0x000000000);
. set_end(heap, 0x0000000f1);
. set_itr(heap, 0x000000000);
-- line 447 ----------------------------------------
-- line 683 ----------------------------------------
. set(heap, 0x0000000eb, new_term(SUB,0x000000,0x000000000));
. set(heap, 0x0000000ec, new_term(VAR,0x000000,0x0000000e9));
. set(heap, 0x0000000ed, new_term(SUB,0x000000,0x000000000));
. set(heap, 0x0000000ee, new_term(LAM,0x000000,0x0000000ef));
. set(heap, 0x0000000ef, new_term(SUB,0x000000,0x000000000));
. set(heap, 0x0000000f0, new_term(VAR,0x000000,0x0000000ed));
. }
.
6 (0.0%) int main() {
. Heap* heap = new_heap();
. inject_P24(heap);
.
2 (0.0%) clock_t start = clock();
.
. // Normalize and get interaction count
. Term root = got(heap, 0);
2 (0.0%) normal(heap, root);
2 (0.0%) clock_t end = clock();
.
3 (0.0%) printf("Itrs: %u\n", get_itr(heap));
10 (0.0%) double time_spent = (double)(end - start) / CLOCKS_PER_SEC * 1000;
3 (0.0%) printf("Size: %u nodes\n", get_end(heap));
5 (0.0%) printf("Time: %.2f seconds\n", time_spent / 1000.0);
6 (0.0%) printf("MIPS: %.2f\n", (get_itr(heap) / 1000000.0) / (time_spent / 1000.0));
.
2 (0.0%) free(heap->stk);
2 (0.0%) free(heap->mem);
2 (0.0%) free(heap->ini);
2 (0.0%) free(heap->end);
2 (0.0%) free(heap->itr);
2 (0.0%) free(heap);
7 (0.0%) return 0;
. }
.
--------------------------------------------------------------------------------
-- Annotation summary
--------------------------------------------------------------------------------
Ir___________________
9,646,906,919 (77.0%) annotated: files known & above threshold & readable, line numbers known
570,426,090 (4.6%) annotated: files known & above threshold & readable, line numbers unknown
0 unannotated: files known & above threshold & two or more non-identical
0 unannotated: files known & above threshold & unreadable
111,993 (0.0%) unannotated: files known & below threshold
2,315,257,429 (18.5%) unannotated: files unknown
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment