|
/** |
|
|
|
A simple GAWK extension with functions to manipulate DNA. |
|
Pierre Lindenbaum PhD - Institut du Thorax. Nantes. France. |
|
2020 |
|
|
|
*/ |
|
#include <stdio.h> |
|
#include <assert.h> |
|
#include <errno.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
#include <sys/types.h> |
|
#include <sys/stat.h> |
|
#include <unistd.h> |
|
|
|
#include "gawkapi.h" |
|
static const gawk_api_t *api; /* for convenience macros to work */ |
|
static awk_ext_id_t ext_id; |
|
static const char *ext_version = "sequence extension: version 1.0"; |
|
|
|
|
|
// #define DEBUG fprintf(stderr,"[DEBUG]%s:%d\n",__FILE__,__LINE__) |
|
|
|
int plugin_is_GPL_compatible; |
|
|
|
static awk_value_t* modify_sequence(awk_value_t* result,awk_bool_t reverse,awk_bool_t complement) { |
|
awk_value_t param; |
|
char* text = NULL; |
|
size_t i; |
|
make_null_string(result); |
|
|
|
if (! get_argument(0, AWK_STRING,¶m)) { |
|
update_ERRNO_string("sequence: missing required STRING argument"); |
|
return result; |
|
} |
|
|
|
emalloc(text, char *, param.str_value.len, "sequence"); |
|
for(i=0;i< param.str_value.len;i++) { |
|
char c= param.str_value.str[reverse ? (param.str_value.len -1) - i: i]; |
|
|
|
if(complement) { |
|
switch(c) { |
|
case 'a': c = 't'; break; |
|
case 'A': c = 'T'; break; |
|
case 't': c = 'a'; break; |
|
case 'T': c = 'A'; break; |
|
case 'g': c = 'c'; break; |
|
case 'G': c = 'C'; break; |
|
case 'c': c = 'g'; break; |
|
case 'C': c = 'G'; break; |
|
default: c='N'; break; |
|
} |
|
} |
|
text[i]= c; |
|
} |
|
make_malloced_string(text,param.str_value.len, result); |
|
|
|
return result; |
|
} |
|
|
|
static awk_value_t *do_reverse_complement(int nargs, awk_value_t *result, struct awk_ext_func *unused) { |
|
return modify_sequence(result,awk_true,awk_true); |
|
} |
|
static awk_value_t *do_reverse(int nargs, awk_value_t *result, struct awk_ext_func *unused) { |
|
return modify_sequence(result,awk_true,awk_false); |
|
} |
|
|
|
static awk_value_t *do_complement(int nargs, awk_value_t *result, struct awk_ext_func *unused) { |
|
return modify_sequence(result,awk_false,awk_true); |
|
} |
|
|
|
static awk_bool_t init_sequence() { |
|
return awk_true; |
|
} |
|
static awk_bool_t (*init_func)(void) = init_sequence; |
|
|
|
static awk_ext_func_t func_table[] = { |
|
{ "reverse_complement", do_reverse_complement, 1, 1, awk_true, NULL }, |
|
{ "reverse", do_reverse, 1, 1, awk_true, NULL }, |
|
{ "complement", do_complement, 1, 1, awk_true, NULL }, |
|
}; |
|
|
|
/* define the dl_load function using the boilerplate macro */ |
|
|
|
dl_load_func(func_table, sequence, "") |
|
|