This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR040/ERR040131/ERR040131_1.fastq.gz | |
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR040/ERR040131/ERR040131_2.fastq.gz | |
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/195/955/GCF_000195955.2_ASM19595v2/GCF_000195955.2_ASM19595v2_genomic.fna.gz | |
gunzip GCF_000195955.2_ASM19595v2_genomic.fna.gz | |
mv GCF_000195955.2_ASM19595v2_genomic.fna H37Rv.fa | |
# Get software | |
wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar -xvf samtools-1.9.tar.bz2 && cd samtools-1.9/ && make && cd .. | |
wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 && tar -xvf bcftools-1.9.tar.bz2 && cd bcftools-1.9/ && make && cd .. | |
git clone https://github.com/lh3/bwa.git && cd bwa && make && cd ../ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import sys | |
import subprocess | |
import argparse | |
import random | |
import os | |
rand_generator = random.SystemRandom() | |
def get_random_file(prefix = None,extension=None): | |
randint = rand_generator.randint(1,999999) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cd ~/data/ | |
rm -r gwas/* | |
wget http://pathogenseq.lshtm.ac.uk/downloads/new_gwas.tgz | |
tar -xvf new_gwas.tgz | |
rm new_gwas.tgz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import sys | |
for l in sys.stdin: | |
accessions = l.rstrip().split("_") | |
for accession in accessions: | |
if len(accession)==9: | |
dir1 = accession[:6] | |
sys.stdout.write("ftp://ftp.sra.ebi.ac.uk/vol1/fastq/%s/%s/%s_1.fastq.gz\n" % (dir1,accession,accession)) | |
sys.stdout.write("ftp://ftp.sra.ebi.ac.uk/vol1/fastq/%s/%s/%s_2.fastq.gz\n" % (dir1,accession,accession)) | |
elif len(accession)==10: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rsync -am --include='*.gatk.vcf.gz' --include='*/' --exclude='*' /mnt/storage/jody/ena/vcf/ . |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# VCF file = test.filt.vcf.gz | |
# replace where necessary | |
(printf "chr\npos\nref\n"; bcftools query -l test.filt.vcf.gz) | datamash transpose > test.filt.mat.bin | |
bcftools query -f '%CHROM\t%POS\t%REF[\t%GT]\n' test.filt.vcf.gz | tr '|' '/' | sed 's/\.\/\./N/g' | sed 's/0\/0/0/g' | sed 's/.\/./1/g' >> test.filt.mat.bin |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm -r genomics_db | |
gatk GenomicsDBImport --genomicsdb-workspace-path genomics_db -L Chromosome --sample-name-map test.map --reader-threads 10 --max-num-intervals-to-import-in-parallel 10 | |
gatk --java-options "-Xmx40g" GenotypeGVCFs -R ~/refgenome/MTB-h37rv_asm19595v2-eg18.fa -V gendb://genomics_db -O test.raw.vcf.gz | |
bcftools view -V indels test.raw.vcf.gz | bcftools filter -e 'GT="het"' -S . | awk 'length($4)==1 || $0~/^#/' | tr '|' '/' | tr '*' '.' | bcftools view -a | bcftools view -c 1 -Oz -o test.filt.vcf.gz | |
bcf2fasta.py test.filt.vcf.gz ~/refgenome/MTB-h37rv_asm19595v2-eg18.fa test.snps.fa |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cd /tmp/ | |
git clone https://github.com/jodyphelan/pathogen-profiler.git | |
cd pathogen-profiler | |
python setup.py install | |
cd ../ | |
rm -fr pathogen-profiler |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR209/009/SRR2099969/SRR2099969_1.fastq.gz | |
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR209/009/SRR2099969/SRR2099969_2.fastq.gz | |
wget ftp://ftp.ensemblgenomes.org/pub/release-32/bacteria//fasta/bacteria_0_collection/mycobacterium_tuberculosis_h37rv/dna/Mycobacterium_tuberculosis_h37rv.ASM19595v2.dna.toplevel.fa.gz | |
gunzip Mycobacterium_tuberculosis_h37rv.ASM19595v2.dna.toplevel.fa.gz | |
mv Mycobacterium_tuberculosis_h37rv.ASM19595v2.dna.toplevel.fa H37Rv.fa | |
# Get software | |
wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar -xvf samtools-1.9.tar.bz2 && cd samtools-1.9/ && make && cd .. | |
wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 && tar -xvf bcftools-1.9.tar.bz2 && cd bcftools-1.9/ && make && cd .. | |
git clone https://github.com/lh3/bwa.git && cd bwa && make && cd ../ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
echo $1 > $1.sample_name.txt \ | |
&& ebiDownload.pl $1.sample_name.txt | parallel wget \ | |
&& tb-profiler profile -1 ${1}_1.fastq.gz -2 ${1}_2.fastq.gz -t 5 -p $1 \ | |
&& bcftools mpileup -f /home/ubuntu/refgenome/MTB-h37rv_asm19595v2-eg18.fa bam/$1.bam -a DP,AD -g 10 -B -Q 0 -q 0 -I | bcftools call -mg 10 -V indels -Oz -o vcf/$1.bcftools.g.vcf.gz \ | |
&& gatk HaplotypeCaller -I bam/$1.bam -R /home/ubuntu/refgenome/MTB-h37rv_asm19595v2-eg18.fa -O vcf/$1.gatk.vcf.gz -ERC GVCF \ | |
&& delly call -g /home/ubuntu/refgenome/MTB-h37rv_asm19595v2-eg18.fa -o vcf/$1.delly.bcf bam/$1.bam \ | |
&& rm ${1}_1.fastq.gz ${1}_2.fastq.gz bam/${1}.* |