Last active
September 18, 2024 21:03
-
-
Save maedoc/9575bd9467b1aabe1d968090bf24fa9b to your computer and use it in GitHub Desktop.
-O3 optimizes away loop/switch for compile time data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://godbolt.org/z/7zPrK8567 -O3 only | |
typedef struct op_t { | |
const enum { ADD, SUB, DIV, MUL, SQUARE, READ, WRITE } tag; | |
const float b; | |
float *c; | |
} op_t; | |
typedef struct seq_t { | |
const int n_ops; | |
const op_t *ops; | |
} seq_t; | |
static void add(float *a, float b) { *a += b; } | |
static void sub(float *a, float b) { *a -= b; } | |
static void div(float *a, float b) { *a /= b; } | |
static void mul(float *a, float b) { *a *= b; } | |
static void square(float *a, float b) { *a = (*a)*(*a); } | |
static float run(const seq_t *seq, const float a0) { | |
float a = a0; | |
for (int i=0; i<seq->n_ops; i++) { | |
op_t o = seq->ops[i]; | |
float val = o.c ? *o.c : o.b; | |
switch(o.tag) { | |
case ADD: add(&a, val); break; | |
case SUB: sub(&a, val); break; | |
case DIV: div(&a, val); break; | |
case MUL: mul(&a, val); break; | |
case SQUARE: square(&a, val); break; | |
case READ: a = *o.c; | |
case WRITE: *o.c = a; | |
} | |
} | |
return a; | |
} | |
float bistable(float x) { | |
float x2; | |
const op_t ops[] = { | |
{.tag=SQUARE}, | |
{.tag=WRITE, .c=&x2}, | |
{.tag=MUL, .b=x}, | |
{.tag=DIV, .b=-3.0f}, | |
{.tag=ADD, .b=x}, | |
{.tag=SQUARE}, | |
{.tag=ADD, .c=&x2}, | |
}; | |
const seq_t seq = {.n_ops = 7, .ops = ops}; | |
return run(&seq, x); | |
} | |
float bistable1(float x) { | |
float x2 = x*x; | |
float a = x + x2*x/-3.0f; | |
return a*a + x2; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bistable: | |
movaps xmm3, xmm0 | |
movaps xmm1, xmm0 | |
mulss xmm3, xmm0 | |
mulss xmm1, xmm3 | |
divss xmm1, DWORD PTR .LC0[rip] | |
addss xmm1, xmm0 | |
movaps xmm0, xmm1 | |
mulss xmm0, xmm1 | |
addss xmm0, xmm3 | |
ret | |
bistable1: | |
movaps xmm3, xmm0 | |
movaps xmm1, xmm0 | |
mulss xmm3, xmm0 | |
mulss xmm1, xmm3 | |
divss xmm1, DWORD PTR .LC0[rip] | |
addss xmm1, xmm0 | |
movaps xmm0, xmm1 | |
mulss xmm0, xmm1 | |
addss xmm0, xmm3 | |
ret | |
.LC0: | |
.long -1069547520 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment