crazyhottommy · August 29, 2015 14:16
diff --git a/batch_convert_faidx.sh b/batch_convert_faidx.sh
 #! /usr/bin
 # put the coordinates in a bed file

 infile=$1
 while read chr start end 
 do
 samtools faidx ref.fasta $chr:$start-$end >> test.fa
 done <$infile
diff --git a/coordinates2seq.py b/coordinates2seq.py
 # Use the cruzdb python package https://github.com/brentp/cruzdb
 from cruzdb import sequence 
 print sequence.sequence(‘hg19’,’chr1’, 100000, 100010)
 # cactaagcaca
 # 1 based!
diff --git a/coordinates2sequences.sh b/coordinates2sequences.sh
 # download twoBitToFa here http://hgdownload.cse.ucsc.edu/admin/exe/  
 twoBitToFa http://hgdownload.cse.ucsc.edu/gbdb/hg19/hg19.2bit test.fa -seq=chr1 -start=100000 -end=100010
 cat test.fa
 >chr1:100000-100010
 actaagcaca
 # Note that it is 0 based cooridnates system

 # Use betools get fasta http://bedtools.readthedocs.org/en/latest/content/tools/getfasta.html
 # you need a fasta file of the whole genome, and a bed file containing the cooridnates 
 bedtools getfasta -fi UCSC_hg19_genome.fa -bed input.bed -fo out.fa
 cat out.fa
 >chr1:100000-100010
 actaagcaca
 # it is also 0 based 

 # Use samtools faidx
 # First, index your fasta file (only have to do this once) 
 samtools faidx reference.fa  
 # then extract the sequences you want 
 samtools faidx ref.fasta chr1:100000-100010
 >chr1:100000-100010
 cactaagcaca
 # note that it is 1 based !
	#! /usr/bin
	# put the coordinates in a bed file

	infile=$1
	while read chr start end
	do
	samtools faidx ref.fasta $chr:$start-$end >> test.fa
	done <$infile
	# Use the cruzdb python package https://github.com/brentp/cruzdb
	from cruzdb import sequence
	print sequence.sequence(‘hg19’,’chr1’, 100000, 100010)
	# cactaagcaca
	# 1 based!
	# download twoBitToFa here http://hgdownload.cse.ucsc.edu/admin/exe/
	twoBitToFa http://hgdownload.cse.ucsc.edu/gbdb/hg19/hg19.2bit test.fa -seq=chr1 -start=100000 -end=100010
	cat test.fa
	>chr1:100000-100010
	actaagcaca
	# Note that it is 0 based cooridnates system

	# Use betools get fasta http://bedtools.readthedocs.org/en/latest/content/tools/getfasta.html
	# you need a fasta file of the whole genome, and a bed file containing the cooridnates
	bedtools getfasta -fi UCSC_hg19_genome.fa -bed input.bed -fo out.fa
	cat out.fa
	>chr1:100000-100010
	actaagcaca
	# it is also 0 based

	# Use samtools faidx
	# First, index your fasta file (only have to do this once)
	samtools faidx reference.fa
	# then extract the sequences you want
	samtools faidx ref.fasta chr1:100000-100010
	>chr1:100000-100010
	cactaagcaca
	# note that it is 1 based !