Created
July 9, 2016 14:32
-
-
Save knmkr/e38b4ef07dff3884885c1fa7a141016f to your computer and use it in GitHub Desktop.
misc.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GRCh37.p13 | |
# $ wget -r ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh37.p13/Primary_Assembly/assembled_chromosomes/FASTA/ | |
# $ for x in {1..22} X Y; do gzip -dc chr${x}.fa.gz >> GRCh37.p13.fa; done | |
# UCSC hg18 (Build36.1) -- http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/ | |
# $ wget -r -l1 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/' | |
# $ for x in {1..22} X Y; do gzip -dc hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr${x}.fa.gz >> hg18.fa; done | |
# UCSC hg17 (Build35) -- http://hgdownload.cse.ucsc.edu/goldenPath/hg17/chromosomes/ | |
# $ wget -r -l1 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg17/chromosomes/' | |
# $ for x in {1..22} X Y; do gzip -dc hgdownload.cse.ucsc.edu/goldenPath/hg17/chromosomes/chr${x}.fa.gz >> hg17.fa; done |
Author
knmkr
commented
Jul 9, 2016
$ # hg19_rCRS.fa
$ wget -r -l1 'ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/'
$ for x in {1..22} X Y; do gzip -dc hgdownload.cse.ucsc.edu/goldenPath/hg19/chromosomes/chr${x}.fa.gz >> hg19.fa; done
$ echo ">chrM" > chrM.fa
$ tail -n+2 rCRS.fa >> chrM.fa
$ cat chrM.fa hg19.fa| gzip > hg19_rCRS.fa.gz
hg19_rCRS.fa
##contig=<ID=chrM,length=16569>
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##contig=<ID=chr7,length=159138663>
##contig=<ID=chr8,length=146364022>
##contig=<ID=chr9,length=141213431>
##contig=<ID=chr10,length=135534747>
##contig=<ID=chr11,length=135006516>
##contig=<ID=chr12,length=133851895>
##contig=<ID=chr13,length=115169878>
##contig=<ID=chr14,length=107349540>
##contig=<ID=chr15,length=102531392>
##contig=<ID=chr16,length=90354753>
##contig=<ID=chr17,length=81195210>
##contig=<ID=chr18,length=78077248>
##contig=<ID=chr19,length=59128983>
##contig=<ID=chr20,length=63025520>
##contig=<ID=chr21,length=48129895>
##contig=<ID=chr22,length=51304566>
##contig=<ID=chrX,length=155270560>
##contig=<ID=chrY,length=59373566>
$ while read line; do echo ${#line}; done < chrM.fa| tail -n+2| awk '{total = total + $1} END{print total}'
16569
$ samtools faidx hg19_rCRS.fa
$ cat hg19_rCRS.region.bed
chrM 0 16569
chr1 0 249250621
chr2 0 243199373
chr3 0 198022430
chr4 0 191154276
chr5 0 180915260
chr6 0 171115067
chr7 0 159138663
chr8 0 146364022
chr9 0 141213431
chr10 0 135534747
chr11 0 135006516
chr12 0 133851895
chr13 0 115169878
chr14 0 107349540
chr15 0 102531392
chr16 0 90354753
chr17 0 81195210
chr18 0 78077248
chr19 0 59128983
chr20 0 63025520
chr21 0 48129895
chr22 0 51304566
chrX 0 155270560
chrY 0 59373566
$ gzip -dc genome.gvcf.gz| break_blocks --region-file hg19_rCRS.region.bed --ref hg19_rCRS.fa| bgzip -c > genome.vcf.bgz
$ tabix -p vcf file.vcf.bgz
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment