Skip to content

Instantly share code, notes, and snippets.

@pamaury
Created February 19, 2025 10:43
Show Gist options
  • Save pamaury/f03b58bd92229e2ee767cfa999017388 to your computer and use it in GitHub Desktop.
Save pamaury/f03b58bd92229e2ee767cfa999017388 to your computer and use it in GitHub Desktop.

Before (without minsize):

100083ca <udiv64_slow>:
udiv64_slow():
/proc/self/cwd/sw/device/lib/base/math.c:14
100083ca:                 1141                  addi    sp,sp,-16
100083cc:                 c622                  sw      s0,12(sp)
100083ce:                 4881                  li      a7,0
100083d0:                 4801                  li      a6,0
100083d2:                 4381                  li      t2,0
100083d4:                 4e01                  li      t3,0
100083d6:                 03f00f93              li      t6,63
100083da:                 00159293              slli    t0,a1,0x1
100083de:                 537d                  li      t1,-1
/proc/self/cwd/sw/device/lib/base/math.c:28
100083e0:       /-------> fe0f8793              addi    a5,t6,-32
100083e4:       |     /-- 0007c963              bltz    a5,100083f6 <udiv64_slow+0x2c>
100083e8:       |     |   01f5deb3              srl     t4,a1,t6
100083ec:       |  /--|-- a829                  j       10008406 <udiv64_slow+0x3c>
100083ee:       |  |  |   c0001073              unimp
100083f2:       |  |  |   c0001073              unimp
100083f6:       |  |  \-> 01f55eb3              srl     t4,a0,t6
100083fa:       |  |      ffffc793              not     a5,t6
100083fe:       |  |      00f297b3              sll     a5,t0,a5
10008402:       |  |      00feeeb3              or      t4,t4,a5
10008406:       |  \----> 01f3d793              srli    a5,t2,0x1f
1000840a:       |         0e06                  slli    t3,t3,0x1
1000840c:       |         00fe6e33              or      t3,t3,a5
10008410:       |         0386                  slli    t2,t2,0x1
10008412:       |         001ef793              andi    a5,t4,1
10008416:       |         0077eeb3              or      t4,a5,t2
/proc/self/cwd/sw/device/lib/base/math.c:34
1000841a:       |  /----- 00de0963              beq     t3,a3,1000842c <udiv64_slow+0x62>
1000841e:       |  |      00de37b3              sltu    a5,t3,a3
10008422:       |  |  /-- a039                  j       10008430 <udiv64_slow+0x66>
10008424:       |  |  |   c0001073              unimp
10008428:       |  |  |   c0001073              unimp
1000842c:       |  \--|-> 00ceb7b3              sltu    a5,t4,a2
10008430:       |     \-> 0017cf13              xori    t5,a5,1
10008434:       |         01f8d793              srli    a5,a7,0x1f
10008438:       |         0806                  slli    a6,a6,0x1
1000843a:       |         00f86833              or      a6,a6,a5
1000843e:       |         0886                  slli    a7,a7,0x1
10008440:       |         41e007b3              neg     a5,t5
10008444:       |         00d7f433              and     s0,a5,a3
10008448:       |         8ff1                  and     a5,a5,a2
1000844a:       |         40fe83b3              sub     t2,t4,a5
1000844e:       |         00feb7b3              sltu    a5,t4,a5
10008452:       |         408e0433              sub     s0,t3,s0
10008456:       |         40f40e33              sub     t3,s0,a5
/proc/self/cwd/sw/device/lib/base/math.c:25
1000845a:       |         1ffd                  addi    t6,t6,-1
/proc/self/cwd/sw/device/lib/base/math.c:34
1000845c:       |         01e8e8b3              or      a7,a7,t5
/proc/self/cwd/sw/device/lib/base/math.c:25
10008460:       \-------- f86f90e3              bne     t6,t1,100083e0 <udiv64_slow+0x16>
/proc/self/cwd/sw/device/lib/base/math.c:40
10008464:             /-- c709                  beqz    a4,1000846e <udiv64_slow+0xa4>
/proc/self/cwd/sw/device/lib/base/math.c:41
10008466:             |   00772023              sw      t2,0(a4)
1000846a:             |   01c72223              sw      t3,4(a4)
/proc/self/cwd/sw/device/lib/base/math.c:43
1000846e:             \-> 8546                  mv      a0,a7
10008470:                 85c2                  mv      a1,a6
10008472:                 4432                  lw      s0,12(sp)
10008474:                 0141                  addi    sp,sp,16
10008476:                 8082                  ret
10008478:                 c0001073              unimp
1000847c:                 c0001073              unimp

After (with minsize, and manual implementation of __lshrdi3 marked as noinline):

10008350 <udiv64_slow>:
udiv64_slow():
/proc/self/cwd/sw/device/lib/base/math.c:56
10008350:                    7179                       addi    sp,sp,-48
10008352:                    d606                       sw      ra,44(sp)
10008354:                    d422                       sw      s0,40(sp)
10008356:                    d226                       sw      s1,36(sp)
10008358:                    d04a                       sw      s2,32(sp)
1000835a:                    ce4e                       sw      s3,28(sp)
1000835c:                    cc52                       sw      s4,24(sp)
1000835e:                    ca56                       sw      s5,20(sp)
10008360:                    c85a                       sw      s6,16(sp)
10008362:                    c65e                       sw      s7,12(sp)
10008364:                    c462                       sw      s8,8(sp)
10008366:                    c266                       sw      s9,4(sp)
10008368:                    c06a                       sw      s10,0(sp)
1000836a:                    893a                       mv      s2,a4
1000836c:                    8bb6                       mv      s7,a3
1000836e:                    8ab2                       mv      s5,a2
10008370:                    89ae                       mv      s3,a1
10008372:                    8a2a                       mv      s4,a0
10008374:                    4501                       li      a0,0
10008376:                    4581                       li      a1,0
10008378:                    4c81                       li      s9,0
1000837a:                    4b01                       li      s6,0
1000837c:                    03f00d13           li      s10,63
10008380:                    5c7d                       li      s8,-1
/proc/self/cwd/sw/device/lib/base/math.c:67
10008382:       /----------- 078d0563           beq     s10,s8,100083ec <udiv64_slow+0x9c>
/proc/self/cwd/sw/device/lib/base/math.c:68
10008386:       |  /-------> 01f55613           srli    a2,a0,0x1f
1000838a:       |  |         0586                       slli    a1,a1,0x1
1000838c:       |  |         00c5e4b3           or      s1,a1,a2
10008390:       |  |         00151413           slli    s0,a0,0x1
/proc/self/cwd/sw/device/lib/base/math.c:70
10008394:       |  |         8552                       mv      a0,s4
10008396:       |  |         85ce                       mv      a1,s3
10008398:       |  |         866a                       mv      a2,s10
1000839a:       |  |         3741                       jal     1000831a <__lshrdi3>
1000839c:       |  |         8905                       andi    a0,a0,1
1000839e:       |  |         008565b3           or      a1,a0,s0
/proc/self/cwd/sw/device/lib/base/math.c:76
100083a2:       |  |  /----- 01748963           beq     s1,s7,100083b4 <udiv64_slow+0x64>
100083a6:       |  |  |      0174b533           sltu    a0,s1,s7
100083aa:       |  |  |  /-- a039                       j       100083b8 <udiv64_slow+0x68>
100083ac:       |  |  |  |   c0001073           unimp
100083b0:       |  |  |  |   c0001073           unimp
100083b4:       |  |  \--|-> 0155b533           sltu    a0,a1,s5
100083b8:       |  |     \-> 00154613           xori    a2,a0,1
100083bc:       |  |         01fcd513           srli    a0,s9,0x1f
100083c0:       |  |         0b06                       slli    s6,s6,0x1
100083c2:       |  |         00ab6b33           or      s6,s6,a0
100083c6:       |  |         0c86                       slli    s9,s9,0x1
100083c8:       |  |         40c00533           neg     a0,a2
100083cc:       |  |         017576b3           and     a3,a0,s7
100083d0:       |  |         01557733           and     a4,a0,s5
100083d4:       |  |         40e58533           sub     a0,a1,a4
100083d8:       |  |         00e5b5b3           sltu    a1,a1,a4
100083dc:       |  |         8c95                       sub     s1,s1,a3
100083de:       |  |         40b485b3           sub     a1,s1,a1
100083e2:       |  |         00ccecb3           or      s9,s9,a2
/proc/self/cwd/sw/device/lib/base/math.c:67
100083e6:       |  |         1d7d                       addi    s10,s10,-1
100083e8:       |  \-------- f98d1fe3           bne     s10,s8,10008386 <udiv64_slow+0x36>
/proc/self/cwd/sw/device/lib/base/math.c:82
100083ec:       \--------/-X 00090563           beqz    s2,100083f6 <udiv64_slow+0xa6>
/proc/self/cwd/sw/device/lib/base/math.c:83
100083f0:                |   864a                       mv      a2,s2
100083f2:                |   c208                       sw      a0,0(a2)
100083f4:                |   c24c                       sw      a1,4(a2)
/proc/self/cwd/sw/device/lib/base/math.c:85
100083f6:                \-> 8566                       mv      a0,s9
100083f8:                    85da                       mv      a1,s6
100083fa:                    50b2                       lw      ra,44(sp)
100083fc:                    5422                       lw      s0,40(sp)
100083fe:                    5492                       lw      s1,36(sp)
10008400:                    5902                       lw      s2,32(sp)
10008402:                    49f2                       lw      s3,28(sp)
10008404:                    4a62                       lw      s4,24(sp)
10008406:                    4ad2                       lw      s5,20(sp)
10008408:                    4b42                       lw      s6,16(sp)
1000840a:                    4bb2                       lw      s7,12(sp)
1000840c:                    4c22                       lw      s8,8(sp)
1000840e:                    4c92                       lw      s9,4(sp)
10008410:                    4d02                       lw      s10,0(sp)
10008412:                    6145                       addi    sp,sp,48
10008414:                    8082                       ret
10008416:                    c0001073           unimp
1000841a:                    c0001073           unimp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment