Skip to content

Instantly share code, notes, and snippets.

@aolo2
Last active December 10, 2018 12:32
Show Gist options
  • Select an option

  • Save aolo2/dc1bb93a47737301fe99a8910417f568 to your computer and use it in GitHub Desktop.

Select an option

Save aolo2/dc1bb93a47737301fe99a8910417f568 to your computer and use it in GitHub Desktop.
#include "utils.c"
#include <errno.h>
#include <sys/sysinfo.h>
const uint32_t DIAGONAL_CUTOFF = 10; // NOTE: diagonals with this or more 2-word matches are checked
const uint32_t DIAGONAL_OFFSET = 32;
const uint32_t MAX_GOOD_PLOTS = 100;
const uint32_t EXPECTED_GOOD_DIAGONAL_PIECES = 20;
const int32_t RESCORE_CUTOFF = 50;
typedef struct
{
int32_t start[676];
uint32_t *positions;
uint32_t length;
char *string;
} dict;
typedef struct
{
uint32_t from_line;
uint32_t to_line;
char *haystack;
uint32_t *line_start;
} string_work;
typedef struct
{
dict *haystack;
dict needle;
uint32_t from_dict;
uint32_t to_dict;
} compare_work;
typedef struct
{
char *alice;
char *bob;
uint32_t alice_len;
uint32_t bob_len;
uint32_t left_bound;
uint32_t right_bound;
} madina_pack;
static inline uint32_t
to_number(char a, char b)
{
return((a - 'A') * 26 + b - 'A');
}
static dict
dict_from_one_string(char *string, uint32_t len)
{
vector pre_dict[676];
for (uint32_t i = 0; i < 676; ++i)
{
pre_dict[i] = init_vector(10);
}
for (uint32_t i = 0; i < len - 1; ++i)
{
uint32_t pair = to_number(string[i], string[i + 1]);
push_back(pre_dict + pair, i);
}
uint32_t dict_head = 0;
dict d =
{
.start = { -1 },
.positions = (uint32_t *) malloc(sizeof(uint32_t) * len),
.length = len,
.string = string
};
for (uint32_t i = 0; i < 676; ++i)
{
d.start[i] = dict_head;
for (uint32_t j = 0; j < pre_dict[i].size; ++j)
{
d.positions[dict_head++] = pre_dict[i].data[j];
}
free_vector(pre_dict + i);
}
int32_t last_good_val = len;
for (int32_t i = 675; i >= 0; --i)
{
if (d.start[i] == -1)
{
d.start[i] = last_good_val;
}
else
{
last_good_val = d.start[i];
}
}
return(d);
}
static void *
dicts_from_many_lines(void *arg)
{
string_work sw = *(string_work *) arg;
uint32_t from_line = sw.from_line;
uint32_t to_line = sw.to_line;
char *haystack = sw.haystack;
uint32_t *line_start = sw.line_start;
uint32_t line_count = to_line - from_line;
dict *dicts = (dict *) malloc(sizeof(dict) * line_count);
for (uint32_t i = 0; i < line_count; ++i)
{
for (uint32_t j = 0; j < 676; ++j)
{
dicts[i].start[j] = -1; // NOTE: no such kmer
}
}
for (uint32_t i = from_line; i < to_line; ++i)
{
// NOTE: one string
uint32_t string_start = line_start[i];
uint32_t local_dict_idx = i - from_line;
vector pre_dict[676];
for (uint32_t j = 0; j < 676; ++j)
{
pre_dict[j] = init_vector(10);
}
uint32_t string_len = 0;
for (uint32_t j = string_start; haystack[j + 1]; ++j)
{
uint32_t pair = to_number(haystack[j], haystack[j + 1]);
push_back(pre_dict + pair, j - string_start);
++string_len;
}
uint32_t dict_head = 0;
dicts[local_dict_idx].positions = (uint32_t *) malloc(sizeof(uint32_t) * string_len);
dicts[local_dict_idx].length = string_len;
dicts[local_dict_idx].string = haystack + string_start;
for (uint32_t j = 0; j < 676 - 1; ++j)
{
uint32_t last_head = dict_head;
for (uint32_t k = 0; k < pre_dict[j].size; ++k)
{
dicts[local_dict_idx].positions[dict_head++] = pre_dict[j].data[k];
}
if (dict_head != last_head)
{
dicts[local_dict_idx].start[j] = last_head;
}
free_vector(pre_dict + j);
}
int32_t last_good_val = string_len;
for (int32_t j = 675; j >= 0; --j)
{
if (dicts[local_dict_idx].start[j] == -1)
{
dicts[local_dict_idx].start[j] = last_good_val;
}
else
{
last_good_val = dicts[local_dict_idx].start[j];
}
}
// assert(dict_head + 1 == string_end - string_start);
}
return((void *) dicts);
}
static inline int32_t
score_blosum(char a, char b)
{
return(a == b ? 4 : -3);
}
static void *
compare_dicts(void *arg)
{
compare_work work = *(compare_work *) arg;
dict *haystack = work.haystack;
dict needle = work.needle;
uint32_t from_dict = work.from_dict;
uint32_t to_dict = work.to_dict;
// madina_pack ret[MAX_GOOD_PLOTS];
// uint32_t madina_head = 0;
// vector *diagonal_dots = (vector *) malloc(2 * MAX_SEQUENCE_LEN * sizeof(vector));
diagonal_info *short_info = (diagonal_info *) malloc(2 * MAX_SEQUENCE_LEN * sizeof(diagonal_info));
diagonal_segment *segments = (diagonal_segment *) malloc(EXPECTED_GOOD_DIAGONAL_PIECES * sizeof(diagonal_segment));
uint32_t segments_cap = EXPECTED_GOOD_DIAGONAL_PIECES;
uint32_t segments_head = 0;
for (uint32_t i = 0; i < 2 * MAX_SEQUENCE_LEN; ++i)
{
// diagonal_dots[i] = init_vector(10);
short_info[i].min = UINT32_MAX;
short_info[i].max = 0;
short_info[i].count = 0;
}
for (uint32_t i = from_dict; i < to_dict; ++i)
{
dict d = haystack[i];
for (uint32_t j = 0; j < 676 - 1; ++j)
{
if (needle.start[j] != needle.start[j + 1] && d.start[j] != d.start[j + 1])
{
for (int32_t k1 = needle.start[j]; k1 < needle.start[j + 1]; ++k1)
{
uint32_t p1 = needle.positions[k1];
for (int32_t k2 = d.start[j]; k2 < d.start[j + 1]; ++k2)
{
uint32_t p2 = d.positions[k2];
int32_t diagonal = (int32_t) p2 - (int32_t) p1;
uint32_t idx_on_diagonal = p1;
if (diagonal < 0)
{
idx_on_diagonal = p2;
diagonal = (int32_t) d.length - diagonal - 1;
}
++(short_info[diagonal].count);
short_info[diagonal].min = MIN(idx_on_diagonal, short_info[diagonal].min);
short_info[diagonal].max = MAX(idx_on_diagonal, short_info[diagonal].max);
// push_back(diagonal_dots + diagonal, idx_on_diagonal);
}
}
}
}
for (uint32_t j = 0; j < needle.length + d.length + 1; ++j)
{
if (short_info[j].count >= DIAGONAL_CUTOFF)
{
int32_t rescored_score = 0;
uint32_t diag_min = short_info[j].min;
uint32_t diag_max = short_info[j].max;
uint32_t diag_start_x = j < d.length ? j : 0;
uint32_t diag_start_y = j >= d.length ? d.length - 1 : 0;
uint32_t good_segments_here = 1; // NOTE: we are sure 'cause >= DIAGONAL_CUTOFF
int32_t score;
diagonal_segment current_segment =
{
.diagonal_index = j,
.from = diag_min,
.to = diag_min + 1
};
for (uint32_t k = diag_min; k <= diag_max + 1; ++k)
{
score = score_blosum(needle.string[diag_start_y + k], d.string[diag_start_x + k]);
rescored_score += score;
if (score > 0)
{
// NOTE: same symbols, WILL CHANGE (FIXME)
if (current_segment.to < k)
{
// NOTE: start new segment
current_segment.from = k;
current_segment.to = k + 1;
}
else
{
// NOTE: add to last segment
++(current_segment.to);
}
}
else
{
// NOTE: save last segment
insert_segment(&segments, &segments_head, &segments_cap, current_segment);
}
}
if (rescored_score < RESCORE_CUTOFF)
{
// NOTE: set back segment HEAD by good_segments_here for reuse
segments_head -= good_segments_here;
}
}
// NOTE: reset vectors without reallocating memory
// reset_vector(diagonal_dots + j);
reset_info(short_info + j);
}
// TODO: madina
// combine(segments, segments_head)
segments_head = 0;
}
// for (uint32_t i = 0; i < 2 * MAX_SEQUENCE_LEN; ++i)
// {
// free_vector(diagonal_dots + i);
// }
// free(diagonal_dots);
free(short_info);
free(segments);
return(NULL);
}
int32_t
main(int32_t argc, char **argv)
{
char *haystack;
read_result read = read_fasta(argv[1], &haystack);
printf("* Bytes read: %d\n* Line count: %d\n", read.total_len, read.line_count);
uint32_t total_len = read.total_len;
uint32_t n_threads = get_nprocs_conf();
uint32_t *line_start = read.line_start;
uint32_t line_count = read.line_count;
string_work tasks[n_threads];
pthread_t threads[n_threads];
struct timespec start, finish;
double elapsed;
printf("* Running on %d threads\n", n_threads);
printf("* Dictionary creation started\n");
for (uint32_t i = 0; i < n_threads - 1; ++i)
{
tasks[i].from_line = i * (line_count / n_threads);
tasks[i].to_line = (i + 1) * (line_count / n_threads);
tasks[i].haystack = haystack;
tasks[i].line_start = line_start;
printf("* Thread %d started for strings %d-%d\n", i + 1, tasks[i].from_line, tasks[i].to_line);
}
// NOTE: last thread
{
tasks[n_threads - 1].from_line = (n_threads - 1) * (line_count / n_threads);
tasks[n_threads - 1].to_line = line_count;
tasks[n_threads - 1].haystack = haystack;
tasks[n_threads - 1].line_start = line_start;
printf("* Thread %d started for strings %d-%d\n", n_threads, tasks[n_threads - 1].from_line, tasks[n_threads - 1].to_line);
}
dict *all_dicts = (dict *) malloc(line_count * sizeof(dict));
clock_gettime(CLOCK_MONOTONIC, &start);
for (uint32_t i = 0; i < n_threads; ++i)
{
pthread_create(&threads[i], NULL, dicts_from_many_lines, (void *) (tasks + i));
}
for (uint32_t i = 0; i < n_threads; ++i)
{
uint32_t n_lines = tasks[i].to_line - tasks[i].from_line;
void *ret;
pthread_join(threads[i], &ret);
memcpy(all_dicts + tasks[i].from_line, (dict *) ret, n_lines * sizeof(dict));
free(ret);
}
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
printf("> Dictionary creation done (%ld MB) in %.3f seconds\n", (sizeof(dict) * line_count + sizeof(uint32_t) * total_len) / 1024 / 1024, elapsed);
// NOTE: All that was pre-processing. Real code starts here
// MSIIG(A)... ...WDLGEQTC(A)...
char *needle = "MSIIGTRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCSGFCTSQPLCARIKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSLAERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADC";
uint32_t needle_len = strlen(needle);
dict needle_dict = dict_from_one_string(needle, needle_len);
printf("* Using needle \"%.10s...\"\n", needle);
compare_work compare_tasks[n_threads];
for (uint32_t i = 0; i < n_threads - 1; ++i)
{
compare_tasks[i].from_dict = i * (line_count / n_threads);
compare_tasks[i].to_dict = (i + 1) * (line_count / n_threads);
compare_tasks[i].haystack = all_dicts;
compare_tasks[i].needle = needle_dict;
printf("* Thread %d started for dictionaries %d-%d\n", i + 1, compare_tasks[i].from_dict, compare_tasks[i].to_dict);
}
// NOTE: last thread
{
compare_tasks[n_threads - 1].from_dict = (n_threads - 1) * (line_count / n_threads);
compare_tasks[n_threads - 1].to_dict = line_count;
compare_tasks[n_threads - 1].haystack = all_dicts;
compare_tasks[n_threads - 1].needle = needle_dict;
printf("* Thread %d started for dictionaries %d-%d\n", n_threads, compare_tasks[n_threads - 1].from_dict, compare_tasks[n_threads - 1].to_dict);
}
clock_gettime(CLOCK_MONOTONIC, &start);
for (uint32_t i = 0; i < n_threads; ++i)
{
pthread_create(&threads[i], NULL, compare_dicts, (void *) (compare_tasks + i));
}
for (uint32_t i = 0; i < n_threads; ++i)
{
pthread_join(threads[i], NULL);
}
clock_gettime(CLOCK_MONOTONIC, &finish);
elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
printf("> Finished comparing to all dictionaries in %.3f seconds\n", elapsed);
free(needle_dict.positions);
// =======================================================
for (uint32_t i = 0; i < line_count; ++i)
{
free(all_dicts[i].positions);
}
free(all_dicts);
free(line_start);
free(haystack);
return(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment