Created
April 19, 2019 21:56
-
-
Save gamozolabs/7e6fa7ca00256b2bb826af1a8bea8572 to your computer and use it in GitHub Desktop.
Compare coverage implementation in the old Vectorized Emulator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Generate a hash using the values in Zmm0 and return Zmm0 | |
/// | |
/// Clobbers Zmm1!!! | |
fn hash_zmm0(outasm: &mut falkasm::AsmStream, conststore: &mut ConstStore) | |
{ | |
let kmask = Operand::KmaskRegister(KmaskType::Merge(KmaskReg::K1)); | |
let thirt = Membc(Some(R10), None, conststore.add_const(13) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let sevent = Membc(Some(R10), None, conststore.add_const(17) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let five = Membc(Some(R10), None, conststore.add_const(15) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let rval = Membc(Some(R10), None, conststore.add_const(0xd21e9c0c) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
/* Xor in a random value to start the hash */ | |
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), rval]); | |
for _ in 0..4 { | |
/* zmm0 ^= zmm0 << 13 */ | |
outasm.vpsllvd(&[Vreg(Zmm1), kmask, Vreg(Zmm0), thirt]); | |
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
/* zmm0 ^= zmm0 >> 17 */ | |
outasm.vpsrlvd(&[Vreg(Zmm1), kmask, Vreg(Zmm0), sevent]); | |
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
/* zmm0 ^= zmm0 << 15 */ | |
outasm.vpsllvd(&[Vreg(Zmm1), kmask, Vreg(Zmm0), five]); | |
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
} | |
} | |
fn hash_coverage_zmm0(outasm: &mut falkasm::AsmStream) | |
{ | |
let kmask = Operand::KmaskRegister(KmaskType::Merge(KmaskReg::K1)); | |
/* Store the hashes to the stack */ | |
outasm.vmovdqa32(&[Mem(Some(Rsp), None, 0x10), kmask, Vreg(Zmm0)]); | |
/* Get the register mask */ | |
/* kmovw eax, k1 */ | |
outasm.raw_bytes(b"\xc5\xf8\x93\xc1"); | |
/* Dword counter */ | |
outasm.xor(&[Reg(Rcx), Reg(Rcx)]); | |
outasm.label("daloop"); | |
/* Shift the kmask by 1. If the carry is not set then this component | |
* is disabled and we should skip it. | |
*/ | |
outasm.shr(&[Reg(Rax), Imm(1)]); | |
outasm.jnc(&[BranchShort("next_iter")]); | |
/* Get the hash into edx */ | |
outasm.mov(&[Reg(Rdx), Mem(Some(Rsp), Some((Rcx, 4)), 0x10)]); | |
/* Get the pointer to the hash coverage table */ | |
outasm.set_mode(AsmMode::Bits64); | |
outasm.mov(&[Reg(Rbx), Mem(Some(Rsp), None, 64*4 + 8 + 0x10)]); | |
outasm.set_mode(AsmMode::Bits32); | |
/* Mask off the 'hash' we created for insertion into the | |
* table. Currently we allocate 16 MiB for the table, | |
* this translates to 128 Mbits which we can insert | |
* into the table | |
*/ | |
outasm.and(&[Reg(Rdx), Imm(0x7FFFFFF)]); | |
/* Insert the hash into the table, this table is shared | |
* and this operation is not atomic so it's possible | |
* there are races. But this is fine, it just means | |
* we might double report things. | |
*/ | |
outasm.bts(&[Mem(Some(Rbx), None, 0), Reg(Rdx)]); | |
/* If the entry was already in the table we skip reporting | |
* up new coverage information | |
*/ | |
outasm.jc(&[BranchShort("already_in_table")]); | |
/* Now we want to set a bit reporting that this vector | |
* component hit new coverage | |
*/ | |
outasm.bts(&[Mem(Some(Rsp), None, 64 * 4 + 0x10), Reg(Rcx)]); | |
outasm.label("already_in_table"); | |
/* If the result of the shift was zero, we're all done */ | |
outasm.label("next_iter"); | |
outasm.test(&[Reg(Rax), Reg(Rax)]); | |
outasm.jz(&[BranchShort("done")]); | |
/* Advance destination pointer by 1 and loop again */ | |
outasm.add(&[Reg(Rcx), Imm(1)]); | |
outasm.jmp(&[BranchShort("daloop")]); | |
outasm.label("done"); | |
} | |
{ | |
// Compare coverage | |
let mut outasm = falkasm::AsmStream::new(AsmMode::Bits32); | |
outasm.set_vecwidth(falkasm::VecWidth::Width512); | |
let path_mask = Membc(Some(R10), None, conststore.add_const(LIGHT_PATH_MASK) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let one = Membc(Some(R10), None, conststore.add_const(1) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let mask_000000ff = Membc(Some(R10), None, conststore.add_const(0xff) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let mask_0000ff00 = Membc(Some(R10), None, conststore.add_const(0xff00) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let mask_00ff0000 = Membc(Some(R10), None, conststore.add_const(0xff0000) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
let mask_ff000000 = Membc(Some(R10), None, conststore.add_const(0xff000000) | |
.unwind("Out of room for constant storage") as i64 * 4); | |
outasm.vpxord(&[Vreg(Zmm2), kmask, Vreg(Zmm2), Vreg(Zmm2)]); | |
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_000000ff]); | |
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_000000ff]); | |
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]); | |
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_0000ff00]); | |
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_0000ff00]); | |
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]); | |
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_00ff0000]); | |
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_00ff0000]); | |
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]); | |
outasm.vpandd(&[Vreg(Zmm0), kmask, Vreg(Zmm5), mask_ff000000]); | |
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm6), mask_ff000000]); | |
outasm.vpcmpeqd(&[tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
outasm.vpaddd(&[Vreg(Zmm2), tmpkmask, Vreg(Zmm2), one]); | |
/* Determine if all values are the same */ | |
outasm.vpbroadcastd(&[Vreg(Zmm0), kmask, Vreg(Zmm2)]); | |
outasm.vpcmpeqd(&[ | |
tmpkmask, kmask, Vreg(Zmm0), Vreg(Zmm2)]); | |
/* kxorw k2, k2, k1 */ | |
outasm.raw_bytes(b"\xc5\xec\x47\xd1"); | |
/* kortestw k2, k2 */ | |
outasm.raw_bytes(b"\xc5\xf8\x98\xd2"); | |
outasm.jz(&[BranchNear("regs_match")]); | |
/* Convert the matching byte count into a hash */ | |
outasm.vmovdqa32(&[Vreg(Zmm0), kmask, Vreg(Zmm2)]); | |
hash_zmm0(&mut outasm, conststore); | |
/* Xor in Eip to the hash */ | |
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), | |
Membc(Some(R9), None, 8 * 64)]); | |
/* Rehash zmm0 to make sure Eip is shuffled in well */ | |
hash_zmm0(&mut outasm, conststore); | |
if COMPUTE_PATH_HASH { | |
/* Mask the path hash to limit the number of unique paths */ | |
outasm.vpandd(&[Vreg(Zmm1), kmask, Vreg(Zmm30), path_mask]); | |
/* Xor in the path hash */ | |
outasm.vpxord(&[Vreg(Zmm0), kmask, Vreg(Zmm0), Vreg(Zmm1)]); | |
} | |
/* Perform hash coverage on the value in Zmm0 */ | |
hash_coverage_zmm0(&mut outasm); | |
outasm.label("regs_match"); | |
outasm.ret(&[]); | |
jitcache.insert_callable_cache( | |
(MemOpSize::Bits32, Some(Zmm31), Zmm31, Some(Zmm31)), outasm); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment