Created
June 8, 2011 06:44
-
-
Save lindenb/1013908 to your computer and use it in GitHub Desktop.
Adds a UCSC 'bin' column
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: Pierre Lindenbaum PhD (original source is from Jim Kent: http://genomewiki.ucsc.edu/index.php/Bin_indexing_system | |
* Motivation: Adds a UCSC 'bin' column see http://biostar.stackexchange.com/questions/8943/get-rs-number-based-on-position | |
* Compilation: gcc bin.c | |
* Execute: echo -e "chr1\t10326\t10327\trs112750067" | ./a.out | |
* History: updated so it prints all the bins in a at any depth | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
static void binsInRange( | |
int chromStart, | |
int chromEnd, | |
int binId, | |
int level, | |
int binRowStart, | |
int rowIndex, | |
int binRowCount, | |
int genomicPos, | |
int genomicLength, | |
int first | |
) | |
{ | |
if(first!=1) fputc(',',stdout); | |
printf("%d",binId); | |
if(level<4) | |
{ | |
int i; | |
int childLength=genomicLength/8; | |
int childBinRowCount=binRowCount*8; | |
int childRowBinStart=binRowStart+binRowCount; | |
int firstChildIndex=rowIndex*8; | |
int firstChildBin=childRowBinStart+firstChildIndex; | |
for(i=0;i< 8;++i) | |
{ | |
int childStart=genomicPos+i*childLength; | |
if( chromStart>(childStart+childLength) || | |
chromEnd<childStart ) | |
{ | |
continue; | |
} | |
binsInRange( | |
chromStart, | |
chromEnd, | |
firstChildBin+i, | |
level+1, | |
childRowBinStart, | |
firstChildIndex+i, | |
childBinRowCount, | |
childStart, | |
childLength, | |
0 | |
); | |
} | |
} | |
} | |
static void bins(int chromStart,int chromEnd) | |
{ | |
int genomicLength=536870912; | |
binsInRange(chromStart,chromEnd,0,0,0,0,1,0,genomicLength,1); | |
} | |
static char* readline(FILE* in) | |
{ | |
char* p=NULL; | |
int length=0; | |
int c; | |
while((c=fgetc(in))!=EOF) | |
{ | |
p=realloc(p,(length+2)*sizeof(char)); | |
if(p==NULL) | |
{ | |
fputs("Out of memory\n",stderr); | |
exit(EXIT_FAILURE); | |
} | |
if(c=='\n') | |
{ | |
p[length]='\0'; | |
return p; | |
} | |
p[length]=c; | |
p[length+1]='\0'; | |
length++; | |
} | |
return p; | |
} | |
int main(int argc,char** argv) | |
{ | |
char *line; | |
while((line=readline(stdin))!=NULL) | |
{ | |
int chromStart=0; | |
int chromEnd=0; | |
int col=0; | |
char *t = strtok(line,"\t"); | |
while (t != NULL) | |
{ | |
switch(col) | |
{ | |
case 0: fputs(t,stdout);break; | |
case 1: chromStart=atoi(t);printf("\t%s",t);break; | |
case 2: chromEnd=atoi(t);printf("\t%s\t",t);bins(chromStart,chromEnd);break; | |
default:printf("\t%s",t);break; | |
} | |
t = strtok (NULL, "\t"); | |
++col; | |
} | |
fputs("\n",stdout); | |
free(line); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment