Skip to content

Instantly share code, notes, and snippets.

@DrYak
Last active April 3, 2020 10:05
Show Gist options
  • Save DrYak/f08be11d8aedbd391c5ef320ffc9e85c to your computer and use it in GitHub Desktop.
Save DrYak/f08be11d8aedbd391c5ef320ffc9e85c to your computer and use it in GitHub Desktop.
Testing V-pipe SARS-CoV-2
mkdir V-test
cd V-test
######################
### ###
### Miniconda3 ###
### ###
######################
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
# -b for batch (no question asked)
bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/V-test/miniconda3
## Mac OS X:
#curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
#sh Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/miniconda3
# mind the dot (=source)
. ~/V-test/miniconda3/bin/activate
conda create -n V-pipe -c bioconda snakemake conda
conda activate V-pipe
# We will let snakemake --use-conda handle
# installation and download of V-pipe dependencies
##################
### ###
### V-pipe ###
### ###
##################
git clone -b sars-cov2 https://github.com/cbg-ethz/V-pipe.git
cd V-pipe
################
### ###
### test ###
### ###
################
# Go to NCBI SRA and download the runs with references SRR10903401 and SRR10903402
# Create a directory samples and store them inside
mkdir -p samples/{SRR10903401,SRR10903402]/raw_data
tree samples
## verfify that the samples have the following directory structure,
## the _R1 and _R2 suffixes on paired-ends are important:
# samples
# ├── SRR10903401
# │ └── 20200102
# │ └── raw_data
# │ ├── wuhan2_R1.fastq
# │ └── wuhan2_R2.fastq
# └── SRR10903402
# └── 20200102
# └── raw_data
# ├── wuhan1_R1.fastq
# └── wuhan1_R2.fastq
### SNV + local (shorah)
nano vpipe.config
#[input]
#reference = references/NC_045512.2.fasta
#
#[output]
#snv = True
#local = False
#global = False
#
#[general]
#aligner = bwa
# Check task that will be run:
snakemake -s vpipe.snake --dryrun -p --cores 2
# on the first run V-pipe will also generate the sample collection table
nano samples.tsv
## V-pipe's defaults for reads length of 200 ; add other reads lengths in third column:
SRR10903401 20200102 150
SRR11092063 20191230 150
# run analysis:
snakemake -s vpipe.snake --use-conda -p --cores 2
## the necessary dependencies will be downloaded and installed in conda environments managed by snakemake
### Results:
## Data in: variants/minority_variants.tsv
## Shorah data in: {sample}/variants/SNVs/REGION_1/
###################
### ###
### Cluster ###
### ###
###################
# https://snakemake.readthedocs.io/en/stable/executing/cluster-cloud.html#cluster-execution
# https://scicomp.ethz.ch/wiki/Using_the_batch_system
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/miniconda3
~/miniconda3/bin/conda install -c bioconda snakemake
# beware of occasional version conflict...
# ...update an outdated miniconda base installation
~/miniconda3/bin/conda update conda
# ...and/or install elsewhere
~/miniconda3/bin/conda create -p ~/V-pipe_conda -c bioconda conda snakemake
~/V-pipe_conda/bin/snakemake --version
git clone -b sars-cov2 https://github.com/cbg-ethz/V-pipe.git
cd V-pipe
# Useful storage tricks:
#
# --create-envs-only
# only download and install dependencies, do not run yet
#
# --conda-prefix DIR
# store the conda environments of dependencies in that directory
# (thus possible to share re-use between multiple instances of V-pipe)
~/miniconda3/bin/snakemake -s vpipe.snake --use-conda --conda-prefix ~/snake-envs --create-envs-only
# cluster LSF dispatching
~/miniconda3/bin/snakemake -s vpipe.snake --use-conda --conda-prefix ~/snake-envs -p --cluster 'bsub' --jobs 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment