Skip to content

Instantly share code, notes, and snippets.

View jodyphelan's full-sized avatar

Jody Phelan jodyphelan

  • London, UK
View GitHub Profile
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR040/ERR040131/ERR040131_1.fastq.gz
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR040/ERR040131/ERR040131_2.fastq.gz
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/195/955/GCF_000195955.2_ASM19595v2/GCF_000195955.2_ASM19595v2_genomic.fna.gz
gunzip GCF_000195955.2_ASM19595v2_genomic.fna.gz
mv GCF_000195955.2_ASM19595v2_genomic.fna H37Rv.fa
# Get software
wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar -xvf samtools-1.9.tar.bz2 && cd samtools-1.9/ && make && cd ..
wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-1.9.tar.bz2 && tar -xvf bcftools-1.9.tar.bz2 && cd bcftools-1.9/ && make && cd ..
git clone https://github.com/lh3/bwa.git && cd bwa && make && cd ../
sudo mount -o remount,size=26386392k tmpfs /run
sudo mount -o remount,size=40386396k tmpfs /run/user/506/
bedtools genomecov -ibam bam/por1_fastq.bam -bga | awk '$4==0' | bedtools intersect -a - -b ~/miniconda3/envs/dev/share/tbprofiler/tbdb.bed | bedtools subtract -a - -b test2.vcf
test_env=testing
mamba create -y -n $test_env python bwa "samtools>=1.12" "bcftools>=1.12" requests snpEff jq samclip gatk4 trimmomatic parallel "delly>=1.1.7" "freebayes==1.3.6" tqdm minimap2 bedtools jinja2 samclip kmc pilon lofreq filelock mash docxtpl dsk pysam rich-argparse joblib iqtree pydantic tomli itol-config sourmash
conda activate $test_env
pip install rich_argparse
mamba install flask celery redis sqlalchemy psycopg2
pip install redis flask_sqlalchemy flask_login flask_session flask_wtf email_validator
pip install --force-reinstall git+https://github.com/jodyphelan/TBProfiler.git@dev
pip install --force-reinstall git+https://github.com/jodyphelan/pathogen-profiler.git@dev
bcftools view -V indels merged.raw.vcf.gz | setGT.py | bcftools view -a | bcftools filter -e 'GT="het"' -S . | bcftools view -i 'F_PASS(GT!="mis")>0.9' | bcftools view -c 1 | bcftools norm -f ../reference/Mycobacterium_abscessus.7396_7_44.dna.toplevel.fa | bcftools view -Oz -o merged.filt.vcf.gz
import sys
import argparse
import csv
import subprocess
from tqdm import tqdm
import gzip
def num_mismatches(str_x,str_y):
mismatches = 0
for char_x,char_y in zip(str_x,str_y):
import sys
import gzip
import os.path
import subprocess
from collections import defaultdict
import random
import argparse
rand_generator = random.SystemRandom()
Biological_sample Replicate + calling params number of snps across replicates num snps in run
por8 por8B_mq0_bq5 7 7
por8 por8A_mq60_bq0 7 7
por8 por8C_mq0_bq15 7 7
por8 por8B_mq10_bq5 7 7
por8 por8A_mq40_bq20 7 7
por8 por8C_mq30_bq15 7 7
por8 por8C_mq10_bq20 7 7
por8 por8A_mq30_bq0 7 7
por8 por8A_mq60_bq15 7 7
"""
I want to loop through a file that contains:
Rv0001 10
Rv0001 20
Rv0002 50
Rv0003 70
and I want something that looks like this:
{
"Rv0001": [10,20],
def get_barcode_performance(csv_file, tree_file, lineage):
meta = []
for row in csv.DictReader(open(args.csv)):
meta.append(row)
try:
t = ete3.Tree(args.tree,format=1)
except:
t = ete3.Tree(args.tree)