Skip to content

Instantly share code, notes, and snippets.

@danking
Last active October 18, 2016 23:30
Show Gist options
  • Save danking/312d73cc5f60fddf44dda7cf4e2c8d9e to your computer and use it in GitHub Desktop.
Save danking/312d73cc5f60fddf44dda7cf4e2c8d9e to your computer and use it in GitHub Desktop.
now handling NA
#include <x86intrin.h>
#include <avxintrin.h>
#include <avx2intrin.h>
#include <popcntintrin.h>
#include <stdint.h>
#include <stdio.h>
__m256i allones = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF };
__m256i rightAllele = { 0x5555555555555555, 0x5555555555555555, 0x5555555555555555, 0x5555555555555555 };
void ibs256(uint64_t* result, __m256i x, __m256i y) {
__m256i nxor = _mm256_xor_si256(_mm256_xor_si256(x, y), allones);
// if both bits are one, then the genotype is missing
__m256i xna = _mm256_and_si256(_mm256_srli_si256(x, 1), x);
__m256i yna = _mm256_and_si256(_mm256_srli_si256(y, 1), y);
// if either sample is missing a genotype, we ignore that genotype pair
__m256i na = _mm256_or_si256(xna, yna);
// 1. shift the left alleles over the right ones
// 2. and the alleles
// 3. mask to the right ones
__m256i ibs2 = _mm256_and_si256(_mm256_and_si256(_mm256_and_si256(_mm256_srli_si256(nxor, 1), nxor), rightAllele), na);
// 4. popcnt
uint64_t ibs2sum = _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 0));
ibs2sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 1));
ibs2sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 2));
ibs2sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs2, 3));
// 1. shift the left alleles over the right ones
// 2. or the alleles
// 3. mask to the right ones
__m256i ibs1 = _mm256_and_si256(_mm256_and_si256(_mm256_xor_si256(_mm256_srli_si256(nxor, 1), nxor), rightAllele), na);
// 4. popcnt
uint64_t ibs1sum = _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 0));
ibs1sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 1));
ibs1sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 2));
ibs1sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs1, 3));
// 1. shift the left alleles over the right ones
// 2. or the alleles
// 3. negate the alleles
// 4. mask to the right ones
__m256i ibs0 = _mm256_and_si256(_mm256_andnot_si256(rightAllele, _mm256_or_si256(_mm256_srli_si256(nxor, 1), nxor)), na);
// 5. popcnt
uint64_t ibs0sum = _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 0));
ibs0sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 1));
ibs0sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 2));
ibs0sum += _mm_popcnt_u64(_mm256_extract_epi64(ibs0, 3));
*(result + 0) += ibs0sum;
*(result + 1) += ibs1sum;
*(result + 2) += ibs2sum;
}
uint8_t bytepopcnt(uint8_t x) {
uint8_t cnt = 0;
for (int i = 0; i < 8; ++i) {
cnt += (x >> i) & 0xFF;
}
return cnt;
}
void ibsVec(uint64_t* result, uint64_t length, uint8_t* x, uint8_t* y) {
uint64_t i;
for (i = 0; i < (length - 63); i += 64) {
__m256i x256 = _mm256_set_epi8(*x , *(x+1) , *(x+2) , *(x+3),
*(x+4) , *(x+5) , *(x+6) , *(x+7),
*(x+8) , *(x+9) , *(x+10) , *(x+11),
*(x+12) , *(x+13) , *(x+14) , *(x+15),
*(x+16) , *(x+17) , *(x+18) , *(x+19),
*(x+20) , *(x+21) , *(x+22) , *(x+23),
*(x+24) , *(x+25) , *(x+26) , *(x+27),
*(x+28) , *(x+29) , *(x+30) , *(x+31));
__m256i y256 = _mm256_set_epi8(*y , *(y+1) , *(y+2) , *(y+3),
*(y+4) , *(y+5) , *(y+6) , *(y+7),
*(y+8) , *(y+9) , *(y+10) , *(y+11),
*(y+12) , *(y+13) , *(y+14) , *(y+15),
*(y+16) , *(y+17) , *(y+18) , *(y+19),
*(y+20) , *(y+21) , *(y+22) , *(y+23),
*(y+24) , *(y+25) , *(y+26) , *(y+27),
*(y+28) , *(y+29) , *(y+30) , *(y+31));
ibs256(result, x256, y256);
}
while (i < length) {
uint8_t rightAllele8 = 0xFF;
uint8_t xb = *(x+i);
uint8_t yb = *(y+i);
uint8_t nxor = ~(xb ^ yb);
uint8_t xna = (xb >> 1) & xb;
uint8_t yna = (yb >> 1) & yb;
uint8_t na = xna | yna;
*(result+2) += bytepopcnt(((nxor >> 1) & nxor) & na & rightAllele8);
*(result+1) += bytepopcnt(((nxor >> 1) ^ nxor) & na & rightAllele8);
*(result+0) += bytepopcnt(~((nxor >> 1) | nxor) & na & rightAllele8);
}
}
int main(int argc, char** argv) {
if (argc != 2) {
return -1;
}
int itr = atoi(argv[1]);
uint64_t arr[3] = { 0, 0, 0 };
for (int i = 0; i < itr; ++i ) {
uint64_t x0 = 0; //(((uint64_t)rand()) << 32) | rand();
uint64_t x1 = 0;//(((uint64_t)rand()) << 32) | rand();
uint64_t x2 = 0; //(((uint64_t)rand()) << 32) | rand();
uint64_t x3 = 0;//(((uint64_t)rand()) << 32) | rand();
uint64_t y0 = 0;//(((uint64_t)rand()) << 32) | rand();
uint64_t y1 = 0;//(((uint64_t)rand()) << 32) | rand();
uint64_t y2 = 0;//(((uint64_t)rand()) << 32) | rand();
uint64_t y3 = 0;//(((uint64_t)rand()) << 32) | rand();
__m256i x = _mm256_set_epi64x(x0, x1, x2, x3);
__m256i y = _mm256_set_epi64x(y0, y1, y2, y3);
ibs256(arr, x, y);
}
printf("%llu", arr[0]);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment