Last active
December 10, 2018 12:32
-
-
Save aolo2/466c43072d4332a4716780087c7f9266 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdint.h> | |
| #include <string.h> | |
| #include <stdlib.h> | |
| #include <pthread.h> | |
| #include <assert.h> | |
| #include "vector.c" | |
| #define MIN(a,b) (((a)<(b))?(a):(b)) | |
| #define MAX(a,b) (((a)>(b))?(a):(b)) | |
| const uint32_t MAX_SEQUENCE_LEN = 50000; | |
| const uint32_t MAX_FASTA_LINE = 256; | |
| const int32_t MATCH_SCORE_B62[] = {4, 4, 9, 6, 5, 6, 6, 8, 4, -1, 5, 4, 5, 6, -1, 7, 5, 5, 4, 5, -1, 4, 11, -1, 7, 4}; | |
| // NOTE: match(a, a) = MATCH_SCORE_B62[a - 'A'] | |
| typedef struct | |
| { | |
| int32_t result; | |
| char *haystack; | |
| char *needle; | |
| } search; | |
| typedef struct | |
| { | |
| uint32_t total_len; | |
| uint32_t line_count; | |
| uint32_t *line_start; | |
| } read_result; | |
| typedef struct | |
| { | |
| uint32_t diagonal_index; | |
| uint32_t from; | |
| uint32_t to; | |
| } diagonal_segment; | |
| typedef struct | |
| { | |
| uint32_t count; | |
| uint32_t min; | |
| uint32_t max; | |
| } diagonal_info; | |
| typedef struct | |
| { | |
| uint32_t left_bound; | |
| uint32_t right_bound; | |
| } gap_result; | |
| // TODO: madina | |
| // combine(diagonal_segment *diagonals, uint8_t nel) | |
| static inline void | |
| reset_info(diagonal_info *info) | |
| { | |
| info->count = 0; | |
| info->min = UINT32_MAX; | |
| info->max = 0; | |
| } | |
| static inline void | |
| insert_segment(diagonal_segment **data, uint32_t *cap, uint32_t *head, diagonal_segment elem) | |
| { | |
| if (head >= cap) | |
| { | |
| *cap *= VECTOR_SIZE_MULTIPLIER; | |
| *data = (diagonal_segment *) realloc(*data, sizeof(diagonal_segment) * (*cap)); | |
| } | |
| *data[*head++] = elem; | |
| } | |
| static uint32_t | |
| count_lines(FILE *f) | |
| { | |
| uint32_t nlines = 0; | |
| char line[MAX_FASTA_LINE]; | |
| while(fgets(line, MAX_FASTA_LINE, f) != NULL) | |
| { | |
| ++nlines; | |
| } | |
| rewind(f); | |
| return(nlines); | |
| } | |
| static read_result | |
| read_fasta(char *filename, char **buffer) | |
| { | |
| FILE *file; | |
| uint32_t size; | |
| file = fopen(filename, "r"); | |
| if (!file) | |
| { | |
| printf("# File could not be opened\n# Exiting\n"); | |
| exit(0); | |
| } | |
| fseek(file, 0L, SEEK_END); | |
| size = ftell(file); | |
| rewind(file); | |
| *buffer = (char *) malloc(sizeof(char) * size); | |
| if (!*buffer) | |
| { | |
| fclose(file); | |
| printf("# Malloc failed\n# Exiting\n"); | |
| exit(0); | |
| } | |
| char *seq_buffer = (char *) calloc(MAX_SEQUENCE_LEN, sizeof(char)); | |
| char *line = NULL; | |
| uint32_t seq_len = 0; | |
| uint32_t bytes_read = 0; | |
| uint32_t current_line = 0; | |
| uint32_t line_count = count_lines(file); | |
| uint32_t *line_start = (uint32_t *) malloc(line_count * sizeof(uint32_t)); | |
| int32_t read; | |
| uint64_t len; | |
| while ((read = getline(&line, &len, file)) != -1) | |
| { | |
| if (line[0] == '>') | |
| { | |
| if (seq_buffer[0]) | |
| { | |
| // NOTE: whole sequence in seq_buffer | |
| strncpy(*buffer + bytes_read, seq_buffer, seq_len + 1); | |
| line_start[current_line++] = bytes_read; | |
| bytes_read += seq_len + 1; | |
| memset((void *) seq_buffer, 0x00, seq_len); | |
| seq_len = 0; | |
| } | |
| else | |
| { | |
| continue; | |
| } | |
| } | |
| else | |
| { | |
| strncpy(seq_buffer + seq_len, line, read - 1); | |
| seq_len += read - 1; | |
| } | |
| free(line); | |
| line = NULL; | |
| } | |
| // NOTE: last string | |
| strncpy(*buffer + bytes_read, seq_buffer, seq_len + 1); | |
| bytes_read += seq_len + 1; | |
| ++current_line; | |
| *buffer = (char *) realloc((void *) *buffer, bytes_read); | |
| free(line); | |
| free(seq_buffer); | |
| fclose(file); | |
| assert(current_line <= line_count); | |
| line_start = (uint32_t *) realloc((void *) line_start, current_line * sizeof(uint32_t)); | |
| read_result res = | |
| { | |
| .total_len = bytes_read, | |
| .line_count = current_line, | |
| .line_start = line_start | |
| }; | |
| return(res); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment