Skip to content

Instantly share code, notes, and snippets.

@knmkr
Created July 9, 2016 14:32
Show Gist options
  • Save knmkr/e38b4ef07dff3884885c1fa7a141016f to your computer and use it in GitHub Desktop.
Save knmkr/e38b4ef07dff3884885c1fa7a141016f to your computer and use it in GitHub Desktop.
misc.
# GRCh37.p13
# $ wget -r ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh37.p13/Primary_Assembly/assembled_chromosomes/FASTA/
# $ for x in {1..22} X Y; do gzip -dc chr${x}.fa.gz >> GRCh37.p13.fa; done
# UCSC hg18 (Build36.1) -- http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/
# $ wget -r -l1 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/'
# $ for x in {1..22} X Y; do gzip -dc hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr${x}.fa.gz >> hg18.fa; done
# UCSC hg17 (Build35) -- http://hgdownload.cse.ucsc.edu/goldenPath/hg17/chromosomes/
# $ wget -r -l1 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg17/chromosomes/'
# $ for x in {1..22} X Y; do gzip -dc hgdownload.cse.ucsc.edu/goldenPath/hg17/chromosomes/chr${x}.fa.gz >> hg17.fa; done
@knmkr
Copy link
Author

knmkr commented Jul 9, 2016

# rCRS.fa
# http://www.ncbi.nlm.nih.gov/nuccore/251831106?report=fasta

@knmkr
Copy link
Author

knmkr commented Jul 9, 2016

$ # hg19_rCRS.fa

$ wget -r -l1 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/'
$ for x in {1..22} X Y; do gzip -dc hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/chr${x}.fa.gz >> hg19.fa; done
$ echo ">chrM" > chrM.fa
$ tail -n+2 rCRS.fa >> chrM.fa
$ cat chrM.fa hg19.fa| gzip > hg19_rCRS.fa.gz

@knmkr
Copy link
Author

knmkr commented Jul 9, 2016

hg19_rCRS.fa

##contig=<ID=chrM,length=16569>
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##contig=<ID=chr7,length=159138663>
##contig=<ID=chr8,length=146364022>
##contig=<ID=chr9,length=141213431>
##contig=<ID=chr10,length=135534747>
##contig=<ID=chr11,length=135006516>
##contig=<ID=chr12,length=133851895>
##contig=<ID=chr13,length=115169878>
##contig=<ID=chr14,length=107349540>
##contig=<ID=chr15,length=102531392>
##contig=<ID=chr16,length=90354753>
##contig=<ID=chr17,length=81195210>
##contig=<ID=chr18,length=78077248>
##contig=<ID=chr19,length=59128983>
##contig=<ID=chr20,length=63025520>
##contig=<ID=chr21,length=48129895>
##contig=<ID=chr22,length=51304566>
##contig=<ID=chrX,length=155270560>
##contig=<ID=chrY,length=59373566>

@knmkr
Copy link
Author

knmkr commented Jul 10, 2016

$ while read line; do echo ${#line}; done < chrM.fa| tail -n+2| awk '{total = total + $1} END{print total}'
16569

@knmkr
Copy link
Author

knmkr commented Jul 10, 2016

$ samtools faidx hg19_rCRS.fa

@knmkr
Copy link
Author

knmkr commented Jul 10, 2016

$ cat hg19_rCRS.region.bed
chrM    0       16569
chr1    0       249250621
chr2    0       243199373
chr3    0       198022430
chr4    0       191154276
chr5    0       180915260
chr6    0       171115067
chr7    0       159138663
chr8    0       146364022
chr9    0       141213431
chr10   0       135534747
chr11   0       135006516
chr12   0       133851895
chr13   0       115169878
chr14   0       107349540
chr15   0       102531392
chr16   0       90354753
chr17   0       81195210
chr18   0       78077248
chr19   0       59128983
chr20   0       63025520
chr21   0       48129895
chr22   0       51304566
chrX    0       155270560
chrY    0       59373566
$ gzip -dc genome.gvcf.gz| break_blocks --region-file hg19_rCRS.region.bed --ref hg19_rCRS.fa| bgzip -c > genome.vcf.bgz

@knmkr
Copy link
Author

knmkr commented Jul 10, 2016

$ tabix -p vcf file.vcf.bgz

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment