Skip to content

Instantly share code, notes, and snippets.

@aolo2
Last active December 10, 2018 12:32
Show Gist options
  • Select an option

  • Save aolo2/466c43072d4332a4716780087c7f9266 to your computer and use it in GitHub Desktop.

Select an option

Save aolo2/466c43072d4332a4716780087c7f9266 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <pthread.h>
#include <assert.h>
#include "vector.c"
#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)>(b))?(a):(b))
const uint32_t MAX_SEQUENCE_LEN = 50000;
const uint32_t MAX_FASTA_LINE = 256;
const int32_t MATCH_SCORE_B62[] = {4, 4, 9, 6, 5, 6, 6, 8, 4, -1, 5, 4, 5, 6, -1, 7, 5, 5, 4, 5, -1, 4, 11, -1, 7, 4};
// NOTE: match(a, a) = MATCH_SCORE_B62[a - 'A']
typedef struct
{
int32_t result;
char *haystack;
char *needle;
} search;
typedef struct
{
uint32_t total_len;
uint32_t line_count;
uint32_t *line_start;
} read_result;
typedef struct
{
uint32_t diagonal_index;
uint32_t from;
uint32_t to;
} diagonal_segment;
typedef struct
{
uint32_t count;
uint32_t min;
uint32_t max;
} diagonal_info;
typedef struct
{
uint32_t left_bound;
uint32_t right_bound;
} gap_result;
// TODO: madina
// combine(diagonal_segment *diagonals, uint8_t nel)
static inline void
reset_info(diagonal_info *info)
{
info->count = 0;
info->min = UINT32_MAX;
info->max = 0;
}
static inline void
insert_segment(diagonal_segment **data, uint32_t *cap, uint32_t *head, diagonal_segment elem)
{
if (head >= cap)
{
*cap *= VECTOR_SIZE_MULTIPLIER;
*data = (diagonal_segment *) realloc(*data, sizeof(diagonal_segment) * (*cap));
}
*data[*head++] = elem;
}
static uint32_t
count_lines(FILE *f)
{
uint32_t nlines = 0;
char line[MAX_FASTA_LINE];
while(fgets(line, MAX_FASTA_LINE, f) != NULL)
{
++nlines;
}
rewind(f);
return(nlines);
}
static read_result
read_fasta(char *filename, char **buffer)
{
FILE *file;
uint32_t size;
file = fopen(filename, "r");
if (!file)
{
printf("# File could not be opened\n# Exiting\n");
exit(0);
}
fseek(file, 0L, SEEK_END);
size = ftell(file);
rewind(file);
*buffer = (char *) malloc(sizeof(char) * size);
if (!*buffer)
{
fclose(file);
printf("# Malloc failed\n# Exiting\n");
exit(0);
}
char *seq_buffer = (char *) calloc(MAX_SEQUENCE_LEN, sizeof(char));
char *line = NULL;
uint32_t seq_len = 0;
uint32_t bytes_read = 0;
uint32_t current_line = 0;
uint32_t line_count = count_lines(file);
uint32_t *line_start = (uint32_t *) malloc(line_count * sizeof(uint32_t));
int32_t read;
uint64_t len;
while ((read = getline(&line, &len, file)) != -1)
{
if (line[0] == '>')
{
if (seq_buffer[0])
{
// NOTE: whole sequence in seq_buffer
strncpy(*buffer + bytes_read, seq_buffer, seq_len + 1);
line_start[current_line++] = bytes_read;
bytes_read += seq_len + 1;
memset((void *) seq_buffer, 0x00, seq_len);
seq_len = 0;
}
else
{
continue;
}
}
else
{
strncpy(seq_buffer + seq_len, line, read - 1);
seq_len += read - 1;
}
free(line);
line = NULL;
}
// NOTE: last string
strncpy(*buffer + bytes_read, seq_buffer, seq_len + 1);
bytes_read += seq_len + 1;
++current_line;
*buffer = (char *) realloc((void *) *buffer, bytes_read);
free(line);
free(seq_buffer);
fclose(file);
assert(current_line <= line_count);
line_start = (uint32_t *) realloc((void *) line_start, current_line * sizeof(uint32_t));
read_result res =
{
.total_len = bytes_read,
.line_count = current_line,
.line_start = line_start
};
return(res);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment