Created
November 18, 2017 08:29
-
-
Save sxv/012b3db4e679675a3a26e3bb43c64f51 to your computer and use it in GitHub Desktop.
make fastq; align; make expression table;
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# usage: | |
# python 3reads.py /batch/dir1/ /batch/dir2/ # run all steps on two batches | |
# python 3reads.py -2 -3 -loadAndKeep /batch/dir # run steps 2 and 3 and use STAR --loadAndKeep | |
# python 3reads.py /batch/dir1 /batch/dir2/ -13 # run steps 1 and 3 on two batches | |
import sys, os | |
base_dir = '/home/sxv/code/s7s/' | |
MAKE_FASTQ = '%s/make_fastq.sh' % base_dir | |
ALIGN = '%s/align_smart-3seq.sh' % base_dir | |
MAKE_EXPRESSION_TABLE = '%s/make_expression_table.R' % base_dir | |
steps = '123' | |
batch_dirs = [] | |
for arg in sys.argv[1:]: | |
if arg[0] == '-': | |
if arg == '-loadAndKeep' or arg == '-keep': | |
ALIGN = '%s/align_smart-3seq.loadAndKeep.sh' % base_dir | |
else: | |
if steps == '123': | |
steps = arg[1:] | |
else: | |
steps += arg[1:] | |
else: | |
batch_dirs.append(arg) | |
def make_fastq(): | |
os.system("bash -c 'cp %s %s'" % (MAKE_FASTQ, batch_dir)) | |
os.system("bash -c 'cd %s && bash %s .'" % (batch_dir, MAKE_FASTQ)) | |
os.system("bash -c 'cd %s && mkdir -p fastq && mv *gz fastq'" % batch_dir) | |
def align(): | |
os.system("bash -c 'cp %s %s/fastq/'" % (ALIGN, batch_dir)) | |
os.system("bash -c 'cd %s/fastq && bash %s /media/stroma-common/genome/hg38/star/dbsnp147_gencode25-68/ *fastq.gz'" % (batch_dir, ALIGN)) | |
os.system("bash -c 'cd %s && mkdir -p bam && mv fastq/*ba? fastq/*log bam/'" % batch_dir) | |
def make_expression_table(): | |
os.system("bash -c 'cp %s %s/bam'" % (MAKE_EXPRESSION_TABLE, batch_dir)) | |
os.system("bash -c 'cd %s/bam && Rscript %s --no-rlog /media/stroma-common/genome/hg38/gencode.v25.annotation.gtf *bam'" % (batch_dir, MAKE_EXPRESSION_TABLE)) | |
os.system("bash -c 'cd %s/bam && xlsx2csv -s 1 gene_expression.xlsx raw_reads.csv'" % batch_dir) | |
project_name = batch_dir.split('/')[-1] or batch_dir.split('/')[-2] | |
os.system("bash -c 'cd %s/bam && cp raw_reads.csv %s.raw_reads.csv && cp raw_reads.csv %sresults/%s.raw_reads.csv'" % (batch_dir, project_name, base_dir, project_name)) | |
for batch_dir in batch_dirs: | |
if '1' in steps: make_fastq() | |
if '2' in steps: align() | |
if '3' in steps: make_expression_table() | |
# if 'o' in steps: optional_step() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment