Last active
November 4, 2020 00:51
-
-
Save bretonics/f6c57474bb077d2acd293092cf7fa449 to your computer and use it in GitHub Desktop.
Template file to run Cell Ranger on Sun Grid Engine (SGE)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -l | |
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE) | |
# Andrés Bretón ~ http://andresbreton.com, [email protected] | |
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-cellranger-qsub | |
# ================================================================================ | |
# JOB | |
# Project Information | |
investigator="" | |
date="" | |
project="${date}_${investigator}" | |
# Load modules | |
modules=(bcl2fastq cellranger) | |
module load ${modules[@]} | |
# Jobmode "sge" or path to custom sge.template | |
mode="${HOME}/scscore/Templates/cellranger/sge.template" # cannot contain '~' | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# mkfastq Pipeline | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# mkfastq demultiplexing ouput directory name | |
id=$date | |
# Absolute path to sequencing run folder | |
run="${HOME}/scscore/Sequencing/${project}" | |
# Path to simple CSV file | |
csv="${run}/${project}.csv" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Counts Pipeline | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Expected number of recovered cells | |
numcells="3000" | |
# Transcriptome references: | |
# GRCh38-1.2.0, hg19-1.2.0, mm10-1.2.0, mm10-2.1.0, hg19_and_mm10-1.2.0, hg19_and_mm10-2.1.0, ercc92-1.2.0 | |
transcriptome="" | |
# List of sample names for each library in sequencing run | |
samples=$(cut -f 2 -d , ${csv} | sed 1d | tr ' ' '_' | tr '\n' ' ') | |
# FASTQs path folder | |
fastqs="outs/fastq_path/" | |
# Absolute path to the Cell Ranger compatible transcriptome reference | |
ref="${HOME}/scscore/References/cellranger/${transcriptome}" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Setup job artifacts | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
if [ ! -d logs ]; then mkdir logs; fi | |
if [ ! -d jobs ]; then mkdir jobs; fi | |
# Keep track of information related to the current job | |
echo -e "# ================================================================================\n" | |
echo -e "# JOB NAME: $JOB_NAME\n" | |
echo -e "# USER: $USER\n" | |
echo -e "# PWD: $(pwd)\n" | |
echo -e "# HOST: $HOSTNAME\n" | |
echo -e "# JOB ID: $JOB_ID\n" | |
echo -e "# TASK ID: $SGE_TASK_ID\n" | |
echo -e "# START: $(date)\n" | |
echo -e "# ================================================================================\n\n# " | |
module list | |
echo -e "\n\n\n\n" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Calls | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Run mkfastq pipeline (demultiplex BCL files) | |
cmd="cellranger mkfastq --id=${id} --run=${run} --csv=${csv} --jobmode=${mode}" | |
echo -e "#================================================================================\n" | |
echo -e "# RUNNING mkfastq:\n" | |
echo -e "# $cmd\n" | |
echo -e "#================================================================================\n\n\n" | |
$cmd | |
echo -e "# ================================================================================\n\n\n" | |
# Check if mkfastq demultiplexing directory exists for sequencing run "id" | |
if [ ! -d $id ]; then | |
echo -e "Could not change to $id directory.\n" | |
echo -e "Check if current directory contains project $id from mkfastq call.\n" | |
echo -e "Currently at `pwd`\n" | |
exit 126 | |
fi | |
# Check mkfastq pipeline finished properly | |
if [ $? -eq 0 ]; then | |
echo -e "Changing to $id"; cd $id | |
echo -e "Changed to `pwd`\n\n\n" | |
echo -e "#================================================================================\n" | |
echo -e "# RUNNING counts on:\n" | |
echo -e "# ${samples[@]}\n" | |
echo -e "#================================================================================\n" | |
# Run counts pipeline | |
for sample in ${samples[@]}; do | |
cmd="cellranger count --id=${sample} --sample=${sample} --fastqs=${fastqs} --transcriptome=${ref} --expect-cells=${numcells} --jobmode=${mode}" | |
echo -e "\n\n" | |
echo -e "# RUNNING counts for $sample of [${samples[@]}]:\n" | |
echo -e "# ================================================================================\n" | |
echo -e "$cmd\n\n\n" | |
$cmd | |
done | |
else | |
echo -e "mkfastq failed.\n" | |
exit 126 | |
fi | |
echo -e "# ================================================================================\n\n\n" | |
# Clean up and finalize | |
if [ $? -eq 0 ]; then | |
echo -e "Cleaning up...\n" | |
mv __${date}.mro ${date}.qsub $project | |
fi | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# End job message | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
echo -e "\n\n\n" | |
echo -e "# ================================================================================\n" | |
echo -e "# END: $(date)\n" | |
echo -e "# USER: $USER\n" | |
echo -e "# CORES/THREADS: $NSLOTS\n" | |
echo -e "# TEMPORARY DIRECTORY: $TMPDIR\n" | |
echo -e "# ================================================================================\n\n\n\n" | |
# ================================================================================ | |
# ENVIRONMENTAL VARIABLES AVAILABLE | |
# JOB_ID Current job ID | |
# JOB_NAME Current job name | |
# NSLOTS The number of slots (threads or processors) requested by a job | |
# HOSTNAME Name of execution host | |
# SGE_TASK_ID Array Job task index number | |
# SGE_TASK_STEPSIZE The step size of the array job specification | |
# SGE_TASK_FIRST The index number of the first array job task | |
# SGE_TASK_LAST The index number of the last array job task | |
# TMPDIR The absolute path to the job's temporary working directory | |
#------------------------------------------------------------------------------- | |
# COMPUTING DIRECTIVES | |
# Specify hard time limit for the job; default time is 12 hours | |
#$ -l h_rt=48:00:00 | |
# Memory | |
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total) | |
#$ -l mem_total=128G # Request a node that has at least 128G of total memory | |
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core | |
# Request a parallel environment with 16 cores | |
#$ -pe omp 16 | |
# Request my job to run on Buy-in Compute group hardware project has access to | |
#$ -l buyin | |
#------------------------------------------------------------------------------- | |
# ACTION DIRECTIVES | |
# Set SCC project | |
#$ -P project_name | |
# Job name | |
#$ -N job_name | |
# Specify the output file name | |
#$ -o "logs/$JOB_NAME.stdout" | |
# Specify stderr output | |
#$ -e "logs/$JOB_NAME.stderr" | |
# Send an email (by default no email is sent). The possible values are - | |
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default | |
#$ -m beas | |
# Email adress to send email | |
#$ -M user_email | |
# All current environment variables should be exported to the batch job | |
#$ -V | |
# Set runtime environment variable for SGE Cluster Mode (sge.template) | |
#$ -v SGE_CLUSTER_NAME=SGE | |
#$ -v SGE_CELL=default | |
# Run in current directory | |
#$ -cwd |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -l | |
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE) | |
# Andrés Bretón ~ http://andresbreton.com, [email protected] | |
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449# | |
#------------------------------------------------------------------------------- | |
# ENVIRONMENTAL VARIABLES AVAILABLE | |
# JOB_ID Current job ID | |
# JOB_NAME Current job name | |
# NSLOTS The number of slots (threads or processors) requested by a job | |
# HOSTNAME Name of execution host | |
# SGE_TASK_ID Array Job task index number | |
# SGE_TASK_STEPSIZE The step size of the array job specification | |
# SGE_TASK_FIRST The index number of the first array job task | |
# SGE_TASK_LAST The index number of the last array job task | |
# TMPDIR The absolute path to the job's temporary working directory | |
#------------------------------------------------------------------------------- | |
# COMPUTING DIRECTIVES | |
# Specify hard time limit for the job | |
# The job will be aborted if it runs longer than this time | |
# The default time is 12 hours | |
#$ -l h_rt=24:00:00 | |
# Memory | |
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total) | |
#$ -l mem_total=128G # Request a node that has at least 128G of total memory | |
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core | |
# Request a parallel environment with 16 cores | |
#$ -pe omp 16 | |
# Request my job to run on Buy-in Compute group hardware project has access to | |
#$ -l buyin | |
#------------------------------------------------------------------------------- | |
# ACTION DIRECTIVES | |
# Set SCC project | |
#$ -P project_name | |
# Job name | |
#$ -N job_name | |
# Merge the error and output streams into a single file | |
#$ -j y | |
# Specify the output file name | |
#$ -o $JOB_NAME.qlog | |
# Send an email (by default no email is sent). The possible values are - | |
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default | |
#$ -m eas | |
# Email adress to send email | |
#$ -M user_email | |
# All current environment variables should be exported to the batch job | |
#$ -V | |
# Set runtime environment variable for SGE Cluster Mode (sge.template) | |
#$ -v SGE_CLUSTER_NAME=SGE | |
#$ -v SGE_CELL=default | |
# Run in current directory | |
#$ -cwd | |
#-------------------------------------------------------------------------------- | |
# JOB | |
# Project name | |
project="" | |
# mkfastq demultiplexing ouput directory name | |
id=$project | |
# Path to simple CSV file | |
csv="${run}/${project}.csv" | |
# List of sample names for each library in sequencing run | |
samples="" | |
# FASTQs path folder | |
fastqs="outs/fastq_path/" | |
# Transcriptome references: | |
# GRCh38-1.2.0, hg19-1.2.0, mm10-1.2.0, mm10-2.1.0, hg19_and_mm10-1.2.0, hg19_and_mm10-2.1.0, ercc92-1.2.0 | |
transcriptome="" | |
# Path to the Cell Ranger compatible transcriptome reference | |
ref="~/References/cellranger/refdata-cellranger-${transcriptome}" | |
# Jobmode "sge" or path to custom sge.template | |
mode="../sge.template" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Load modules | |
modules=(bcl2fastq cellranger) | |
module load ${modules[@]} | |
# Save environment to file | |
env > $JOB_NAME.env | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Keep track of information related to the current job | |
echo "================================================================================" | |
echo "Start: $(date)" | |
echo "User: $USER" | |
echo "Host: $HOSTNAME" | |
echo "Job name: $JOB_NAME" | |
echo "Job ID: $JOB_ID" | |
echo "Task ID: $SGE_TASK_ID" | |
echo "PWD: $(pwd)" | |
module list | |
echo "================================================================================" | |
echo "" | |
echo "" | |
echo "" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# CALLS | |
# Check if mkfastq demultiplexing directory exists for sequencing run "id" | |
if [ -d $id ]; then | |
echo "" | |
echo "Changing to $id"; cd $id | |
echo "Changed to `pwd`" | |
echo "" | |
echo "" | |
echo "Running counts on: ${samples[@]}" | |
echo "================================================================================" | |
# Run counts pipeline | |
for sample in ${samples[@]}; do | |
cmd="cellranger count --sample=$sample --id=$sample --fastqs=$fastqs --transcriptome=$ref --jobmode=../$sge" | |
echo "" | |
echo "Running counts for $sample: $cmd" | |
echo "" | |
echo "" | |
$cmd | |
done | |
else | |
echo "Could not change to $id directory." | |
echo "Did mkfastq work?" | |
echo "Check if current directory contains project $id from mkfastq call." | |
echo "Currently at `pwd`" | |
exit 126 | |
fi | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# End job message | |
echo "" | |
echo "" | |
echo "" | |
echo "================================================================================" | |
echo "End: $(date)" | |
echo "User: $USER" | |
echo "Cores/Threads: $NSLOTS" | |
echo "Temporary Directory: $TMPDIR" | |
echo "================================================================================" | |
echo "" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -l | |
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE) | |
# Andrés Bretón ~ http://andresbreton.com, [email protected] | |
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449# | |
#------------------------------------------------------------------------------- | |
# ENVIRONMENTAL VARIABLES AVAILABLE | |
# JOB_ID Current job ID | |
# JOB_NAME Current job name | |
# NSLOTS The number of slots (threads or processors) requested by a job | |
# HOSTNAME Name of execution host | |
# SGE_TASK_ID Array Job task index number | |
# SGE_TASK_STEPSIZE The step size of the array job specification | |
# SGE_TASK_FIRST The index number of the first array job task | |
# SGE_TASK_LAST The index number of the last array job task | |
# TMPDIR The absolute path to the job's temporary working directory | |
#------------------------------------------------------------------------------- | |
# COMPUTING DIRECTIVES | |
# Specify hard time limit for the job | |
# The job will be aborted if it runs longer than this time | |
# The default time is 12 hours | |
#$ -l h_rt=24:00:00 | |
# Memory | |
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total) | |
#$ -l mem_total=128G # Request a node that has at least 128G of total memory | |
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core | |
# Request a parallel environment with 16 cores | |
#$ -pe omp 16 | |
# Request my job to run on Buy-in Compute group hardware project has access to | |
#$ -l buyin | |
#------------------------------------------------------------------------------- | |
# ACTION DIRECTIVES | |
# Set SCC project | |
#$ -P project_name | |
# Job name | |
#$ -N job_name | |
# Merge the error and output streams into a single file | |
#$ -j y | |
# Specify the output file name | |
#$ -o $JOB_NAME.qlog | |
# Send an email (by default no email is sent). The possible values are - | |
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default | |
#$ -m eas | |
# Email adress to send email | |
#$ -M user_email | |
# All current environment variables should be exported to the batch job | |
#$ -V | |
# Set runtime environment variable for SGE Cluster Mode (sge.template) | |
#$ -v SGE_CLUSTER_NAME=SGE | |
#$ -v SGE_CELL=default | |
# Run in current directory | |
#$ -cwd | |
#-------------------------------------------------------------------------------- | |
# JOB | |
# Project name | |
project="" | |
# Path to sequencing run folder | |
run="../Sequencing/${project}" | |
# mkfastq demultiplexing ouput directory name | |
id=$project | |
# Path to simple CSV file | |
csv="${run}/${project}.csv" | |
# Jobmode "sge" or path to custom sge.template | |
mode="../sge.template" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# Load modules | |
modules=(bcl2fastq cellranger) | |
module load ${modules[@]} | |
# Save environment to file | |
env > $JOB_NAME.env | |
# Keep track of information related to the current job | |
echo "================================================================================" | |
echo "Start: $(date)" | |
echo "User: $USER" | |
echo "Host: $HOSTNAME" | |
echo "Job name: $JOB_NAME" | |
echo "Job ID: $JOB_ID" | |
echo "Task ID: $SGE_TASK_ID" | |
echo "PWD: $(pwd)" | |
module list | |
echo "================================================================================" | |
echo "" | |
echo "" | |
echo "" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# CALLS | |
# Run mkfastq pipeline (demultiplex BCL files) | |
cmd="cellranger mkfastq --run=$run --id=$id --csv=$csv --jobmode=$sge" | |
echo "Running mkfastq: $cmd" | |
echo "================================================================================" | |
echo "" | |
$cmd | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# End job message | |
echo "" | |
echo "" | |
echo "" | |
echo "================================================================================" | |
echo "End: $(date)" | |
echo "User: $USER" | |
echo "Cores/Threads: $NSLOTS" | |
echo "Temporary Directory: $TMPDIR" | |
echo "================================================================================" | |
echo "" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -l | |
# sge.template - Template file for Cell Ranger cluster mode job submissions | |
# Andrés Bretón ~ http://andresbreton.com, [email protected] | |
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-sge-template | |
#------------------------------------------------------------------------------- | |
# MRO (MARTIAN RUNTIME) ENVIRONMENTAL VARIABLES AVAILABLE | |
# __MRO_JOB_NAME__ Job name composed of the sample ID and stage being executed (Required) | |
# __MRO_JOB_WORKDIR__ Specify the absolute path to the directory where the job should execute. | |
# __MRO_ACCOUNT__ Charge resources to specific accounts by passing mrp's environment account. | |
# __MRO_THREADS__ Number of threads required by the stage | |
# __MRO_MEM_GB__ Amount of memory in GB required by the stage | |
# __MRO_MEM_MB__ -or- in MB | |
# __MRO_MEM_GB_PER_THREAD__ Amount of memory in GB required per thread in multi-threaded stages | |
# __MRO_MEM_MB_PER_THREAD__ -or- in MB | |
# __MRO_STDOUT__ Paths to the _stdout metadata files for the stage (Required) | |
# __MRO_STDERR__ -or- to the _stderr metadata files for the stage (Required) | |
# __MRO_CMD__ Bourne shell command to run the stage code (Required) | |
#------------------------------------------------------------------------------- | |
# SGE ENVIRONMENTAL VARIABLES AVAILABLE | |
# JOB_ID Current job ID | |
# JOB_NAME Current job name | |
# NSLOTS The number of slots (threads or processors) requested by a job | |
# HOSTNAME Name of execution host | |
# SGE_TASK_ID Array Job task index number | |
# SGE_TASK_STEPSIZE The step size of the array job specification | |
# SGE_TASK_FIRST The index number of the first array job task | |
# SGE_TASK_LAST The index number of the last array job task | |
# TMPDIR The absolute path to the job's temporary working directory | |
#------------------------------------------------------------------------------- | |
# COMPUTING DIRECTIVES | |
# Specify hard time limit for the job | |
# The job will be aborted if it runs longer than this time | |
# The default time is 12 hours | |
#$ -l h_rt=24:00:00 | |
# Memory | |
# Request a node with at least __MRO_MEM_GB__ of free memory | |
#$ -l mem_free=__MRO_MEM_GB__G | |
# Request a parallel environment with __MRO_THREADS__ cores | |
#$ -pe omp __MRO_THREADS__ | |
#------------------------------------------------------------------------------- | |
# ACTION DIRECTIVES | |
# Set SCC project | |
#$ -P my_project | |
# Job name | |
#$ -N __MRO_JOB_NAME__ | |
# Stdout output | |
#$ -o __MRO_STDOUT__ | |
# Stderr output | |
#$ -e __MRO_STDERR__ | |
# Send an email (by default no email is sent). The possible values are - | |
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) – default | |
#$ -m as | |
# Email adress to send email | |
#$ -M user_email | |
# All current environment variables should be exported to the batch job | |
#$ -V | |
# Set runtime environment variable for SGE Cluster Mode (sge.template) | |
#$ -v SGE_CLUSTER_NAME=SGE | |
# Run in current directory | |
#$ -cwd | |
# Interpreting shell for the job | |
#$ -S "/usr/bin/env bash" | |
#------------------------------------------------------------------------------- | |
# JOB | |
# Keep track of information related to the current job | |
echo "# ================================================================================" | |
echo "# JOB NAME: $__MRO_JOB_NAME__" | |
echo "# START: $(date)" | |
echo "# PWD: $(pwd)" | |
echo "# USER: $USER" | |
echo "# HOST: $HOSTNAME" | |
echo "# JOB ID: $JOB_ID" | |
echo "# TASK ID: $SGE_TASK_ID" | |
echo "# STDOUT: $__MRO_STDOUT__" | |
echo "# STDERR: $__MRO_STDERR__" | |
echo "#" | |
echo "# [REQUESTED]" | |
echo "# FREEMEM: $__MRO_MEM_GB__G" | |
echo "# THREADS: $__MRO_THREADS__" | |
echo "# ================================================================================" | |
echo "" | |
echo "" | |
echo "" | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# STAGE CALL | |
echo "#--------------------------------------------------------------------------------" | |
echo "# Running $__MRO_JOB_NAME__:" | |
echo "# $__MRO_CMD__" | |
echo "#--------------------------------------------------------------------------------" | |
echo "" | |
echo "" | |
__MRO_CMD__ | |
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# End job message | |
echo "" | |
echo "" | |
echo "" | |
echo "#================================================================================" | |
echo "# End: $(date)" | |
echo "# User: $USER" | |
echo "# Cores/Threads: $NSLOTS" | |
echo "# Temporary Directory: $TMPDIR" | |
echo "#================================================================================" | |
echo "" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment