- Constant materialization
- Sign extensions
- Comparisons
- Math optimisations
- Inefficient float loads
- FP comparison branchiness
- FP constant materialisation
- Unnecessary
andafterfeq.d - Unnecessary FP conversions
- Dead branch instruction
C
long a() { return 0x94BFE000; }GCC RV64GC
li a0,0x4A5FF000 # lui only
slli a0,a0,1
retClang RV64GC
lui a0, 74
addiw a0, a0, 1535
slli a0, a0, 13
retC
long a(long b) {
int c = ++b;
return c;
}GCC
addiw a0,a0,1
retClang
slli a0, a0, 32
addi a1, zero, 1
slli a1, a1, 32
add a0, a0, a1
srai a0, a0, 32
retC
int a(int b) {
int c = b * b, d = c % b, e = !d;
return e;
}GCC
mulw a5,a0,a0
remw a0,a5,a0
seqz a0,a0
retClang
mul a1, a0, a0
remw a0, a1, a0
slli a0, a0, 32
srli a0, a0, 32
seqz a0, a0
retC
int a(int b) {
int c = 6472 >> b;
return c;
}GCC
li a5,8192
addiw a5,a5,-1720
sraw a0,a5,a0
retClang
lui a1, 2
addiw a1, a1, -1720
srlw a0, a1, a0
slli a0, a0, 32
srli a0, a0, 32
retC
int a(int b) {
int c = 8 / b;
return c;
}GCC
li a5,8
divw a0,a5,a0
retClang
addi a1, zero, 8
div a0, a1, a0
sext.w a0, a0
retC
int a(int b) {
int c = -57, d = c << b;
return d;
}GCC
li a5,-57
sllw a0,a5,a0
retClang
addi a1, zero, 1
slli a1, a1, 32
addi a1, a1, -57
sllw a0, a1, a0
retMaybe it's just a sign extension issue, maybe it's more:
C
int a(int b) {
int c = ~b;
--c;
return c;
}GCC
not a0,a0
addiw a0,a0,-1
retClang
addi a1, zero, 1
slli a1, a1, 32
addi a1, a1, -2
subw a0, a1, a0
retC
int a(int b) {
short c = -b;
char d = -c;
return d;
}GCC
andi a0,a0,0xff
retClang
slli a0, a0, 16
neg a0, a0
srli a0, a0, 16
neg a0, a0
andi a0, a0, 255
retC
long a(long b) {
int c = -b;
return c;
}GCC RV64GC
negw a0,a0
retClang
slli a0, a0, 32
neg a0, a0
srai a0, a0, 32
retC
int a(int b) {
int c = 0 >= b, d = -c;
return d;
}GCC
slti a0,a0,1
subw a0,zero,a0
retClang
add a1, zero, a0
addi a2, zero, 1
addi a0, zero, -1
blt a1, a2, .LBB0_2
mv a0, zero
.LBB0_2:
retC
int a(short b) {
double c = 7 >= b;
++c;
return c;
}GCC RV64GC
slti a0,a0,8
addi a0,a0,1
retClang RV64GC
add a1, zero, a0
addi a2, zero, 8
addi a0, zero, 2
blt a1, a2, .LBB0_2
# %bb.1:
addi a0, zero, 1
.LBB0_2:
retUnnecessary seqz.
C
float a(double b, int c) {
float d = c ?: b;
return d;
}GCC RV64GC
beq a0,zero,.L2
fcvt.d.w fa0,a0
.L2:
fcvt.s.d fa0,fa0
retClang RV64GC
seqz a1, a0
bnez a1, .LBB0_2
# %bb.1:
fcvt.d.l fa0, a0
.LBB0_2:
fcvt.s.d fa0, fa0
retC
int a(short b) {
short c = 0 <= b;
return c;
}GCC RV32GC
not a0,a0
srli a0,a0,31
retClang RV32GC
not a0, a0
lui a1, 8
and a0, a0, a1
srli a0, a0, 15
retC
int a(int b) {
int c = ~b--, d = c - b;
return d;
}GCC
slli a0,a0,1
neg a0,a0
retClang
not a1, a0
sub a0, a1, a0
addi a0, a0, 1
retC
int a(int b) {
int c = b + b, d = b == c;
return d;
}GCC
seqz a0,a0
retClang
slli a1, a0, 1
xor a0, a0, a1
seqz a0, a0
retC
int a(short b) {
char c = b, e = -b;
short d = !c;
int f = e ? d : e;
char g = f ?: 7;
return g;
}GCC RV32GC
li a0,7
retClang RV32GC
neg a1, a0
andi a1, a1, 255
bnez a1, .LBB0_3
# %bb.1:
addi a0, zero, 7
bnez a1, .LBB0_4
.LBB0_2:
ret
.LBB0_3:
andi a0, a0, 255
seqz a1, a0
addi a0, zero, 7
beqz a1, .LBB0_2
.LBB0_4:
add a0, zero, a1
retC
char a(char b, short c) {
int d = ++b;
char e = b ?: c;
short f = e ? c : d;
return f;
}GCC RV64GC
andi a0,a1,0xff
retClang RV32GC
addi a3, a0, 1
andi a0, a3, 255
add a2, zero, a1
beq a0, a3, .LBB0_3
# %bb.1:
andi a2, a2, 255
bnez a2, .LBB0_4
.LBB0_2:
andi a0, a0, 255
ret
.LBB0_3:
add a2, zero, a0
andi a2, a2, 255
beqz a2, .LBB0_2
.LBB0_4:
add a0, zero, a1
andi a0, a0, 255
retC
int a;
float b() {
double c = a == a;
return c;
}GCC
lui a5,%hi(.LC0)
flw fa0,%lo(.LC0)(a5)
retClang
lui a0, %hi(.LCPI0_0)
addi a0, a0, %lo(.LCPI0_0)
flw fa0, 0(a0)
retC
double a(float b) {
long c = !b;
return c;
}GCC
fmv.s.x fa5,zero
feq.s a5,fa0,fa5
fcvt.d.w fa0,a5
retClang
fmv.w.x ft0, zero
feq.s a0, fa0, ft0
bnez a0, .LBB0_2
# %bb.1:
fcvt.d.w fa0, zero
ret
.LBB0_2:
lui a0, %hi(.LCPI0_0)
addi a0, a0, %lo(.LCPI0_0)
fld fa0, 0(a0)
retC
float a(int b, int c) {
float d = b >= c;
return d;
}GCC
bge a0,a1,.L3
fmv.s.x fa0,zero
ret
.L3:
lui a5,%hi(.LC0)
flw fa0,%lo(.LC0)(a5)
retClang
slt a0, a0, a1
xori a0, a0, 1
bnez a0, .LBB0_2
# %bb.1:
fmv.w.x fa0, zero
ret
.LBB0_2:
lui a0, %hi(.LCPI0_0)
addi a0, a0, %lo(.LCPI0_0)
flw fa0, 0(a0)
retC
float a(float b) {
long c = b, d = !c;
return d;
}GCC
fcvt.l.s a5,fa0,rtz
seqz a5,a5
fcvt.s.l fa0,a5
retClang
fcvt.l.s a0, fa0, rtz
seqz a0, a0
bnez a0, .LBB0_2
# %bb.1:
fmv.w.x fa0, zero
ret
.LBB0_2:
lui a0, %hi(.LCPI0_0)
addi a0, a0, %lo(.LCPI0_0)
flw fa0, 0(a0)
retBad roundtripping:
C
double a(float b) {
char c = b == b;
return c;
}GCC
feq.s a5,fa0,fa0
fcvt.d.wu fa0,a5
retClang
feq.s a0, fa0, fa0
and a0, a0, a0
bnez a0, .LBB0_2
# %bb.1:
fcvt.d.w fa0, zero
ret
.LBB0_2:
lui a0, %hi(.LCPI0_0)
addi a0, a0, %lo(.LCPI0_0)
fld fa0, 0(a0)
retEven though GCC also doesn't do that, we could materialise small FP constants with addi xn, zero, imm12 plus one int to FP conversion instruction. Even if for some uarchs that were slower, we could use that for -Os.
C
float test(float a) {
return a + 1.0;
}Clang
lui a1, %hi(.LCPI0_0)
addi a1, a1, %lo(.LCPI0_0)
flw ft0, 0(a1)
fmv.w.x ft1, a0
fadd.s ft0, ft1, ft0
fmv.x.w a0, ft0
retC
long a(double b) {
long c = b <= b;
return c;
}GCC
feq.d a0,fa0,fa0
retClang
feq.d a0, fa0, fa0
and a0, a0, a0
retC
float a(long b) {
double c = ~b;
return c;
}GCC
not a0,a0
fcvt.s.w fa0,a0
retClang
not a0, a0
fcvt.d.w ft0, a0
fcvt.s.d fa0, ft0
retC
int a(char b, int c, short d) {
double e = d;
int f = e ? c : e;
return f;
}GCC
li a0,0
beq a2,zero,.L2
mv a0,a1
.L2:
retClang
seqz a0, a2
bnez a0, .LBB0_2
# %bb.1:
fcvt.d.l ft0, a1
fcvt.l.d a0, ft0, rtz
ret
.LBB0_2:
fcvt.d.l ft0, a2
fcvt.l.d a0, ft0, rtz
retC
float a(char b) {
double c = b;
return c;
}GCC
fcvt.s.wu fa0,a0
retClang
fcvt.d.wu ft0, a0
fcvt.s.d fa0, ft0
retC
char b(char c, short d) {
short e = c ? d : a;
int f = c ? e : 2;
return f;
}GCC
bnez a0,.L2
li a1,2
.L2:
andi a0,a1,0xff
retClang
beqz a0, .LBB0_3
# %bb.1:
beqz a0, .LBB0_4
.LBB0_2:
andi a0, a1, 255
ret
.LBB0_3:
lui a1, %hi(a)
lw a1, %lo(a)(a1)
bnez a0, .LBB0_2
.LBB0_4:
addi a1, zero, 2
andi a0, a1, 255
ret