Last active
June 23, 2022 07:35
-
-
Save lindenb/c4b1dd33045175e1ffa56650734f93ec to your computer and use it in GitHub Desktop.
https://www.biostars.org/p/9528296/ How to search the human genome for sequences that differ from a given sequence by a set number of mismatches? C
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <ctype.h> | |
#define MAX_SEQ_LENGTH 100 | |
static char compl(char c) { | |
switch(c) { | |
case 'A': return 'T'; | |
case 'T': return 'A'; | |
case 'G': return 'C'; | |
case 'C': return 'G'; | |
default:return 'N'; | |
} | |
} | |
int main(int argc,char** argv) | |
{ | |
size_t i=0; | |
size_t position=0; | |
size_t qlen; | |
char* query; | |
size_t maxdiff=0; | |
size_t win_size=0; | |
char seqName[MAX_SEQ_LENGTH]; | |
int c; | |
if(argc!=3) { | |
fprintf(stderr,"usage %s query maxdiff < in.fa\n",argv[0]); | |
return EXIT_FAILURE; | |
} | |
query=strdup(argv[1]); | |
qlen = strlen(query); | |
maxdiff = (size_t)atoi(argv[2]); | |
if(qlen==0 ||maxdiff >= qlen) { | |
fprintf(stderr,"check parameters"); | |
return EXIT_FAILURE; | |
} | |
for(i=0;i<qlen;i++) query[i]=toupper(query[i]); | |
char *window = (char*)malloc((qlen+1)*sizeof(char)); | |
memset((void*)window,0,(qlen+1)*sizeof(char)); | |
seqName[0]=0; | |
while((c=fgetc(stdin))!=EOF) | |
{ | |
if(c=='>') | |
{ | |
i=0UL; | |
while((c=fgetc(stdin))!=EOF && c!='\n') | |
{ | |
seqName[i++]=c; | |
} | |
seqName[i]=0; | |
position=0; | |
win_size=0; | |
continue; | |
} | |
if(isspace(c)) continue; | |
window[win_size++]=toupper(c); | |
position++; | |
if(win_size==qlen) { | |
size_t count=0UL; | |
char strand='+'; | |
for(i=0;i< qlen && count<= maxdiff;i++) { | |
if(query[i]!=window[i]) count++; | |
} | |
if(count > maxdiff) { | |
strand = '-'; | |
count=0UL; | |
for(i=0;i< qlen && count<= maxdiff;i++) { | |
if(query[i]!=compl(window[(qlen-1)-i])) count++; | |
} | |
} | |
if(count <= maxdiff && i==qlen) { | |
fprintf(stdout,"%s\t%d\t%c\t%s\t%s\t%d\n",seqName,position+1,strand,query,window,count); | |
} | |
memmove((void*)window,&window[1],sizeof(char)*(qlen-1)); | |
win_size--; | |
} | |
} | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment