Created
March 25, 2014 15:19
-
-
Save wasade/9764043 to your computer and use it in GitHub Desktop.
Rapid subsampling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <gsl/gsl_rng.h> | |
#include <math.h> | |
#include "subsample.h" | |
/* subsample_col | |
* | |
* desc: subsample an array of COO objects, of length size, to a sum of (n) | |
* | |
* inputs: COO* list - an array of COO objects | |
* COO* ss_result - store the subsampled result | |
* size - length of the array | |
* colsize - length of the dense column | |
* n - sum of values following subsample | |
* output: ss_result - a subsampled array of COO objects | |
*/ | |
void subsample_col(COO* list, COO* ss_result, unsigned int size, unsigned int colsize, unsigned int n) { | |
int *expanded; | |
int i; | |
int *sums; | |
int expanded_size = 0; | |
expanded = expand_by_index(list, size, &expanded_size, COL); | |
permutation(expanded, expanded_size, n); | |
sums = (int*)calloc(colsize, sizeof(int)); | |
for(i = 0; i < fminl(expanded_size, n); i++) { | |
sums[expanded[i]] += 1; | |
} | |
for(i = 0; i < size; i++) { | |
ROW_AT(ss_result, i) = ROW_AT(list, i); | |
COLUMN_AT(ss_result, i) = COLUMN_AT(list, i); | |
VALUE_AT(ss_result, i) = sums[ROW_AT(list, i)]; | |
} | |
free(expanded); | |
free(sums); | |
} | |
/* expand_by_index | |
* | |
* desc: expands a vector to indices. ie, [0 5 2] becomes [1 1 1 1 1 2 2] | |
* | |
* inputs: | |
* list an array of COO | |
* nz_count number of non-zero elements in list | |
* expanded_size size of the expanded array | |
* axis axis to work over | |
* | |
* outputs: | |
* returns the expanded array and the variable expanded_size holds the size | |
* of that array | |
*/ | |
int* expand_by_index(COO* list, unsigned int nz_count, int *expanded_size, int axis) { | |
int *expanded; | |
int i,j,offset = 0; | |
for(i = 0; i < nz_count; i++) | |
*expanded_size += VALUE_AT(list, i); | |
expanded = (int*)malloc((*expanded_size) * sizeof(int)); | |
for(i = 0; i < nz_count; i++) { | |
for(j = 0; j < VALUE_AT(list, i); j++) { | |
if(axis == COL) | |
expanded[offset] = ROW_AT(list, i); | |
else | |
expanded[offset] = COLUMN_AT(list, i); | |
offset++; | |
} | |
} | |
return expanded; | |
} | |
/* permutation | |
* | |
* desc: make a random permutation of an array | |
* | |
* inputs: | |
* indices array to be permuted | |
* size size of the array | |
* to_keep number of elements to permute | |
* | |
* outputs: | |
* indices holds the permuted array | |
*/ | |
void permutation(int *indices, unsigned int size, unsigned int to_keep) | |
{ | |
int i, c, t; | |
if(to_keep > size) | |
to_keep = size; | |
if(to_keep == 0) | |
return; | |
for(i = 0; i < to_keep; i++) { | |
c = gsl_rng_uniform_int(RNG, size-i); | |
/* swap */ | |
t = indices[i]; | |
indices[i] = indices[i+c]; | |
indices[i+c] = t; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment