Skip to content

Instantly share code, notes, and snippets.

@dgryski
Created October 29, 2016 11:42
Show Gist options
  • Save dgryski/7a44b85ffecdbc3696c88841463ebefe to your computer and use it in GitHub Desktop.
Save dgryski/7a44b85ffecdbc3696c88841463ebefe to your computer and use it in GitHub Desktop.
from peachpy import *
from peachpy.x86_64 import *
r = Argument(ptr(const_uint64_t))
bits = Argument(ptr(const_uint64_t))
hashes = Argument(ptr(const_uint16_t))
length = Argument(size_t)
with Function("querySSE", (r, bits, hashes, length), float_, target=uarch.default + isa.fma3) as function:
reg_r = GeneralPurposeRegister64()
reg_bits = GeneralPurposeRegister64()
reg_hashes = GeneralPurposeRegister64()
reg_length = GeneralPurposeRegister64()
LOAD.ARGUMENT(reg_r, r)
LOAD.ARGUMENT(reg_bits, bits)
LOAD.ARGUMENT(reg_hashes, hashes)
LOAD.ARGUMENT(reg_length, length)
xmm_scalar_r0 = XMMRegister()
xmm_scalar_r1 = XMMRegister()
xmm_scalar_r2 = XMMRegister()
xmm_scalar_r3 = XMMRegister()
MOVAPS(xmm_scalar_r0, [reg_r])
MOVAPS(xmm_scalar_r1, [reg_r+16])
MOVAPS(xmm_scalar_r2, [reg_r+32])
MOVAPS(xmm_scalar_r3, [reg_r+48])
idx = GeneralPurposeRegister64()
scalar_loop = Loop()
with scalar_loop:
XOR(idx, idx)
MOV(idx.as_word, [reg_hashes])
SHL(idx, 6)
ADD(idx, reg_bits)
ANDPS(xmm_scalar_r0, [idx])
ANDPS(xmm_scalar_r1, [idx+16])
ANDPS(xmm_scalar_r2, [idx+32])
ANDPS(xmm_scalar_r3, [idx+48])
ADD(reg_hashes, 2)
SUB(reg_length, 1)
JNZ(scalar_loop.begin)
MOVAPS([reg_r], xmm_scalar_r0)
MOVAPS([reg_r+16], xmm_scalar_r1)
MOVAPS([reg_r+32], xmm_scalar_r2)
MOVAPS([reg_r+48], xmm_scalar_r3)
RETURN()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment