Last active
          August 29, 2015 14:05 
        
      - 
      
- 
        Save blackball/962e36d303db147638f5 to your computer and use it in GitHub Desktop. 
    simple binary feature with robust probabilistic algorithm could be very effective.
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #include "aligned-mem.h" | |
| #include <stdlib.h> | |
| void * | |
| aligned_malloc(size_t sz, size_t align) { | |
| if (align & (align-1)) | |
| return 0; | |
| void *m = malloc(sz + align + sizeof(void *)); | |
| void **p = (void **)((size_t)(m + align + sizeof(void*)) & ~(align-1)); | |
| p[-1] = m; | |
| return (void *)p; | |
| } | |
| void | |
| aligned_free(void *p) { | |
| if (p) free(((void **)p)[-1]); | |
| } | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #include "aligned-mem.h" /* consider using SIMD */ | |
| /* assume 32 */ | |
| typedef unsigned int uint32_t; | |
| struct bvec_t { | |
| size_t size; | |
| uint32_t v[1]; | |
| }; | |
| struct bvec_t * | |
| bvec_new(size_t size) { | |
| size_t n = ((size + 31) & -32) >> 5; | |
| struct bvec_t *bv = aligned_malloc(sizeof(*bv) + n, 32); | |
| memset(bv, 0, sizeof(*bv) + n); | |
| if (bv) bv->size = size; | |
| return bv; | |
| } | |
| #define bvec_set(bv, i) \ | |
| ((bv)->v[(size_t)(i)>>5] |= (1U << ((i) & 31))) | |
| #define bvec_clear(bv, i) \ | |
| ((bv)->v[(size_t)(i)>>5] &= ~(1U << ((i) & 31))) | |
| #define bvec_at(bv, i) \ | |
| (((bv)->v[(size_t)(i) >> 5] & (1u << ((i) & 31))) >> ((i) & 31)) | |
| void | |
| bvec_free(struct bvec_t **p) { | |
| if (p) return ; | |
| aligned_free(*p); | |
| *p = NULL; | |
| } | |
| static uint32_t | |
| popcnt32(uint32_t i) { | |
| i = i - ((i >> 1) & 0x55555555); | |
| i = (i & 0x33333333) + ((i >> 2) & 0x33333333); | |
| return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; | |
| } | |
| /* hamming weight: popcnt(b ^ bv0) */ | |
| size_t | |
| bvec_hamming(const struct bvec_t *a, const struct bvec_t *b) { | |
| size_t sz = 0, i = 0; | |
| const size_t n = a->size >> 5; | |
| for (; i < n; ++i) | |
| sz += popcnt32(a->v[i] ^ b->v[i]); | |
| i = a->size - (n << 5); | |
| if (i) sz += popcnt32((a->v[n] ^ b->v[n]) << (32 - i)); | |
| return sz; | |
| } | |
| size_t | |
| bvec_count(const struct bvec_t *bv) { | |
| size_t sz = 0, i = 0; | |
| const size_t n = bv->size >> 5; | |
| for (; i < n; ++i) | |
| sz += popcnt32(bv->v[i]); | |
| i = bv->size - (n << 5); | |
| if (i) sz += popcnt32(bv->v[n] << (32 - i)); | |
| return sz; | |
| } | |
| /* c = a & b */ | |
| void | |
| bvec_and(const struct bvec_t *a, const struct bvec_t *b, struct bvec_t *c) { | |
| const size_t n = a->size >> 5; | |
| for (size_t i = 0; i < n; ++i) | |
| c->v[i] = a->v[i] & b->v[i]; | |
| if (a->size > (n << 5)) | |
| c->v[n] = a->v[n] & b->v[n]; | |
| } | |
| /* c = a | b */ | |
| void | |
| bvec_or(const struct bvec_t *a, const struct bvec_t *b, struct bvec_t *c) { | |
| const size_t n = a->size >> 5; | |
| for (size_t i = 0; i < n; ++i) | |
| c->v[i] = a->v[i] | b->v[i]; | |
| if (a->size > (n << 5)) | |
| c->v[n] = a->v[n] | b->v[n]; | |
| } | |
| /* popcnt(a & b) */ | |
| size_t | |
| bvec_and_count(const struct bvec_t *a, const struct bvec_t *b) { | |
| size_t sz = 0; | |
| const size_t n = a->size >> 5; | |
| size_t i = 0; | |
| for (; i < n; ++i) | |
| sz += popcnt32(a->v[i] & b->v[i]); | |
| i = a->size - (n << 5); | |
| if (i) sz += popcnt32((a->v[n] & b->v[n]) << (32 - i)); | |
| return sz; | |
| } | |
| void | |
| bvec_not(struct bvec_t *a) { | |
| const size_t n = a->size >> 5; | |
| for (size_t i = 0; i < n; ++i) { | |
| a->v[i] = ~(a->v[i]); | |
| } | |
| if (a->size > (n << 5)) | |
| a->v[n] = ~(a->v[n]); | |
| } | |
| /* T = popcnt(a&b) / popcnt(a|b) = popcnt(a&b) / (popcnt(a) + popcnt(b) - popcnt(a&b)) */ | |
| double | |
| bvec_tanimono(const struct bvec_t *a, const struct bvec_t *b) { | |
| const size_t na = bvec_count(a); | |
| const size_t nb = bvec_count(b); | |
| const size_t nab = bvec_and_count(a, b); | |
| printf("%u, %u, %u\n", na, nb, nab); | |
| if (nab == na + nb) | |
| return 1.0; | |
| return nab / (double)(na + nb - nab); | |
| } | |
| static void | |
| bvec_print(const struct bvec_t *a) { | |
| for (size_t i = 0; i < a->size; ++i) { | |
| putchar('0' + bvec_at(a, i)); | |
| } | |
| putchar('\n'); | |
| } | |
| int | |
| main(int ac, char **av) { | |
| struct bvec_t *bv = bvec_new(33); | |
| struct bvec_t *bb = bvec_new(33); | |
| bvec_set(bv, 0); | |
| printf("%u\n", bvec_count(bv)); | |
| //bvec_clear(bv, 0); | |
| printf("%u\n", bvec_hamming(bv, bb)); | |
| printf("%lf\n", bvec_tanimono(bv, bb)); | |
| bvec_print(bv); | |
| bvec_not(bv); | |
| bvec_print(bv); | |
| bvec_not(bb); | |
| bvec_print(bb); | |
| printf("%u\n", bvec_count(bv)); | |
| printf("%u\n", bvec_count(bb)); | |
| printf("%lf\n", bvec_tanimono(bv, bb)); | |
| bvec_free(&bb); | |
| bvec_free(&bv); | |
| return 0; | |
| } | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment