Skip to content

Instantly share code, notes, and snippets.

@clausecker
Last active December 6, 2015 08:49
Show Gist options
  • Select an option

  • Save clausecker/ab520f95961e47d0c15d to your computer and use it in GitHub Desktop.

Select an option

Save clausecker/ab520f95961e47d0c15d to your computer and use it in GitHub Desktop.
.section .text
.type bcd2dpd_mul,@function
.globl bcd2dpd_mul
# convert BCD to DPD with multiplication tricks
# input abcd efgh iklm in edi
.align 8
bcd2dpd_mul:
mov %edi,%eax # = 0000 abcd efgh iklm
shl %al # = 0000 abcd fghi klm0
shr %eax # = 0000 0abc dfgh iklm
test $0x880,%edi # fast path for a = e = 0
jnz 1f
ret
.align 8
1: and $0x888,%edi # = 0000 a000 e000 i000
imul $0x49,%edi # = 0ae0 aei0 ei00 i000
mov %eax,%esi
and $0x66,%esi # q = 0000 0000 0fg0 0kl0
shr $7,%edi # = 0000 0000 ae0a ei0e
and $0x1c,%edi # = 0000 0000 000a ei00
imul lookup-8(%rdi),%si # v = q * tab[u-2][0]
and $0x397,%eax # r = 0000 00bc d00h 0klm
xor %esi,%eax # w = r ^ v
or lookup-6(%rdi),%ax # x = w | tab[u-2][1]
and $0x3ff,%eax # = 0000 00xx xxxx xxxx
ret
.size bcd2dpd_mul,.-bcd2dpd_mul
.section .rodata
# lookup table of factors interleaved with or-masks
# the factors shuffle around fg and kl as needed
# the masks set the bits that indicate what case we have
# aligned to four so one read puts both entries for one
# case into the cache at least
.align 4
lookup:
.short 0x0011
.short 0x000a
.short 0x0000
.short 0x004e
.short 0x0081
.short 0x000c
.short 0x0008
.short 0x002e
.short 0x0081
.short 0x000e
.short 0x0000
.short 0x006e
.size lookup,.-lookup
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment