Last active
October 18, 2016 23:30
-
-
Save danking/312d73cc5f60fddf44dda7cf4e2c8d9e to your computer and use it in GitHub Desktop.
now handling NA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <x86intrin.h> | |
#include <avxintrin.h> | |
#include <avx2intrin.h> | |
#include <popcntintrin.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
__m256i allones = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF }; | |
__m256i rightAllele = { 0x5555555555555555, 0x5555555555555555, 0x5555555555555555, 0x5555555555555555 }; | |
void ibs256(uint64_t* result, __m256i x, __m256i y) { | |
__m256i nxor = _mm256_xor_si256(_mm256_xor_si256(x, y), allones); | |
// if both bits are one, then the genotype is missing | |
__m256i xna = _mm256_and_si256(_mm256_srli_si256(x, 1), x); | |
__m256i yna = _mm256_and_si256(_mm256_srli_si256(y, 1), y); | |
// if either sample is missing a genotype, we ignore that genotype pair | |
__m256i na = _mm256_or_si256(xna, yna); | |
// 1. shift the left alleles over the right ones | |
// 2. and the alleles | |
// 3. mask to the right ones | |
__m256i ibs2 = _mm256_and_si256(_mm256_and_si256(_mm256_and_si256(_mm256_srli_si256(nxor, 1), nxor), rightAllele), na); | |
// 4. popcnt | |
uint64_t ibs2sum = _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 0)); | |
ibs2sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 1)); | |
ibs2sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 2)); | |
ibs2sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 3)); | |
// 1. shift the left alleles over the right ones | |
// 2. or the alleles | |
// 3. mask to the right ones | |
__m256i ibs1 = _mm256_and_si256(_mm256_and_si256(_mm256_xor_si256(_mm256_srli_si256(nxor, 1), nxor), rightAllele), na); | |
// 4. popcnt | |
uint64_t ibs1sum = _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 0)); | |
ibs1sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 1)); | |
ibs1sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 2)); | |
ibs1sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 3)); | |
// 1. shift the left alleles over the right ones | |
// 2. or the alleles | |
// 3. negate the alleles | |
// 4. mask to the right ones | |
__m256i ibs0 = _mm256_and_si256(_mm256_andnot_si256(rightAllele, _mm256_or_si256(_mm256_srli_si256(nxor, 1), nxor)), na); | |
// 5. popcnt | |
uint64_t ibs0sum = _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 0)); | |
ibs0sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 1)); | |
ibs0sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 2)); | |
ibs0sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 3)); | |
*(result + 0) += ibs0sum; | |
*(result + 1) += ibs1sum; | |
*(result + 2) += ibs2sum; | |
} | |
uint8_t bytepopcnt(uint8_t x) { | |
uint8_t cnt = 0; | |
for (int i = 0; i < 8; ++i) { | |
cnt += (x >> i) & 0xFF; | |
} | |
return cnt; | |
} | |
void ibsVec(uint64_t* result, uint64_t length, uint8_t* x, uint8_t* y) { | |
uint64_t i; | |
for (i = 0; i < (length - 63); i += 64) { | |
__m256i x256 = _mm256_set_epi8(*x , *(x+1) , *(x+2) , *(x+3), | |
*(x+4) , *(x+5) , *(x+6) , *(x+7), | |
*(x+8) , *(x+9) , *(x+10) , *(x+11), | |
*(x+12) , *(x+13) , *(x+14) , *(x+15), | |
*(x+16) , *(x+17) , *(x+18) , *(x+19), | |
*(x+20) , *(x+21) , *(x+22) , *(x+23), | |
*(x+24) , *(x+25) , *(x+26) , *(x+27), | |
*(x+28) , *(x+29) , *(x+30) , *(x+31)); | |
__m256i y256 = _mm256_set_epi8(*y , *(y+1) , *(y+2) , *(y+3), | |
*(y+4) , *(y+5) , *(y+6) , *(y+7), | |
*(y+8) , *(y+9) , *(y+10) , *(y+11), | |
*(y+12) , *(y+13) , *(y+14) , *(y+15), | |
*(y+16) , *(y+17) , *(y+18) , *(y+19), | |
*(y+20) , *(y+21) , *(y+22) , *(y+23), | |
*(y+24) , *(y+25) , *(y+26) , *(y+27), | |
*(y+28) , *(y+29) , *(y+30) , *(y+31)); | |
ibs256(result, x256, y256); | |
} | |
while (i < length) { | |
uint8_t rightAllele8 = 0xFF; | |
uint8_t xb = *(x+i); | |
uint8_t yb = *(y+i); | |
uint8_t nxor = ~(xb ^ yb); | |
uint8_t xna = (xb >> 1) & xb; | |
uint8_t yna = (yb >> 1) & yb; | |
uint8_t na = xna | yna; | |
*(result+2) += bytepopcnt(((nxor >> 1) & nxor) & na & rightAllele8); | |
*(result+1) += bytepopcnt(((nxor >> 1) ^ nxor) & na & rightAllele8); | |
*(result+0) += bytepopcnt(~((nxor >> 1) | nxor) & na & rightAllele8); | |
} | |
} | |
int main(int argc, char** argv) { | |
if (argc != 2) { | |
return -1; | |
} | |
int itr = atoi(argv[1]); | |
uint64_t arr[3] = { 0, 0, 0 }; | |
for (int i = 0; i < itr; ++i ) { | |
uint64_t x0 = 0; //(((uint64_t)rand()) << 32) | rand(); | |
uint64_t x1 = 0;//(((uint64_t)rand()) << 32) | rand(); | |
uint64_t x2 = 0; //(((uint64_t)rand()) << 32) | rand(); | |
uint64_t x3 = 0;//(((uint64_t)rand()) << 32) | rand(); | |
uint64_t y0 = 0;//(((uint64_t)rand()) << 32) | rand(); | |
uint64_t y1 = 0;//(((uint64_t)rand()) << 32) | rand(); | |
uint64_t y2 = 0;//(((uint64_t)rand()) << 32) | rand(); | |
uint64_t y3 = 0;//(((uint64_t)rand()) << 32) | rand(); | |
__m256i x = _mm256_set_epi64x(x0, x1, x2, x3); | |
__m256i y = _mm256_set_epi64x(y0, y1, y2, y3); | |
ibs256(arr, x, y); | |
} | |
printf("%llu", arr[0]); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment