Skip to content

Instantly share code, notes, and snippets.

@hanfeisun
Last active December 14, 2015 21:19
Show Gist options
  • Save hanfeisun/5150128 to your computer and use it in GitHub Desktop.
Save hanfeisun/5150128 to your computer and use it in GitHub Desktop.
def create_workflow(args, conf):
"""
:type conf:ChiLinConfig
"""
root_workflow = Workflow("Main")
ShellCommand("prepare","mkdir -p {output}",output = conf.target_dir).attach_to(root_workflow)
groom_workflow = attach(root_workflow, Workflow("Groom"))
rawQC_workflow = attach(root_workflow, Workflow("RawQC"))
map_workflow = attach(root_workflow, Workflow("ReadsMapping"))
peakcall_workflow = attach(root_workflow, Workflow("PeakCall"))
annotation_workflow = attach(root_workflow, Workflow("Annotation"))
motifscan_workflow = attach(root_workflow, Workflow("MotifScan"))
reportsummary_workflow = attach(root_workflow, Workflow("ReportSummary"))
clean_workflow = attach(root_workflow, Workflow("Clean"))
def step1_prepare_groom_format():
"""
Return the file name pair (raw, target) that can't be groomed
"""
not_groomed = []
for raw, target in conf.sample_map:
if re.search(r"\.bam", raw, re.I):
attach(groom_workflow,
ShellCommand("groom","{tool} -i {input} -fq {output}",
tool = "bamToFastq",
input = raw,
output = target + ".fastq"))
elif re.search(r"\.(fastq|fq)", raw, re.I):
attach(groom_workflow,
ShellCommand("groom","cp {input} {output}",
input = raw,
output = target + ".fastq"))
else:
print(raw, " is neither fastq nor bam file. Skip grooming.")
not_groomed.append(raw,target)
return not_groomed
def step2_prepare_raw_QC():
parse_result_container = []
for raw, target in conf.sample_map:
fastqc_run = attach(rawQC_workflow,
ShellCommand("fastQC","{tool} {input} --extract -t {param[threads]} -o {output[target_dir]}",
input = target + ".fastq",
output= {"target_dir": conf.target_dir,
"summary": target + "_fastqc/fastqc_data.txt"},
tool="fastqc",
param={"threads": 4}).update(param=conf.items("fastqc")))
fastqc_parse = attach(rawQC_workflow,
PythonCommand("fastQC_draw", python_fastqc_parse,
input = fastqc_run.output["summary"],
param = {"id": basename(target)}
))
parse_result_container.append(fastqc_parse)
attach(rawQC_workflow,
PythonCommand("fastQC_draw", python_fastqc_dist_draw,
input = {"db": resource_filename('chilin2', 'db/ChiLinQC.db')},
output= {"rfile": conf.target_prefix + "_historical_draw.r",
"pdf": conf.target_prefix + "_historical_draw.pdf"},
param = LazyCollector(
parse_result_container,
medians = lambda pr: pr.result["median"],
ids = lambda pr: pr.param["id"])))
def step3_prepare_bowtie_map():
for raw, done in conf.sample_map:
attach(map_workflow,
ShellCommand(
"bowtie",
"{tool} -p {param[threads]} -S -m {param[max_align]} \
{param[genome_index]} {input[fastq]} {output[sam]}",
input = {"genome_dir": os.path.dirname(conf.get_path("lib", "genome_index")),
"fastq": done + ".fastq"},
output = {"sam": done + ".sam"},
tool="bowtie",
param={"threads":4,
"max_align":1,
"genome_index": conf.get_path("lib", "genome_index")}).update(param=conf.items("bowtie")))
def step4a_prepare_macs2_peakcall():
for raw,done in conf.sample_map:
attach(peakcall_workflow,
ShellCommand(
"sam_to_bam",
"{tool} view -bt {input[chrom_len]} {input[sam]} -i {output[bam]}",
tool="samtools",
input={"sam":done + ".sam"},
output={"bam":done+ ".bam"}))
attach(peakcall_workflow,
ShellCommand(
"merge_bam",
"{tool} merge {output[merged]} {input[bams]}",
tool = "samtools",
input = {"bams": " ".join(done+".bam" for _, done in conf.treatment_map)},
output= {"merged": conf.target_prefix + "_treatment.bam"}))
attach(peakcall_workflow,
ShellCommand(
"merge_bam",
"{tool} merge {output[merged]} {input[bams]}",
tool = "samtools",
input = {"bams": " ".join(done+".bam" for _, done in conf.control_map)},
output= {"merged": conf.target_prefix + "_control.bam"}))
attach(peakcall_workflow,
ShellCommand(
"macs2_callpeak",
"{tool} callpeak -B -q 0.01 --keep-dup {param[keep_dup]} --shiftsize={param[shiftsize]} --nomodel \
-t {input[treat]} -c {input[control]} -n {param[description]}",
tool = "macs2",
input = {"treat": conf.target_prefix + "_treatment.bam",
"control": conf.target_prefix + "_control.bam"},
output = {"peaks": conf.target_prefix + "_peaks.bed",
"summit": conf.target_prefix + "_summits.bed",
"treat_bdg": conf.target_prefix + "treat_pileup.bdg",
"ENCODE": conf.target_prefix + "_peaks.encodePeak",
"peaks_xls": conf.target_prefix + "_peaks.xls",
"control_bdg": conf.target_prefix + "_control_pileup.bdg"},
param = {"keep_dup": 1})).update(param=conf.items("macs2"))
attach(peakcall_workflow,
ShellCommand(
"bdg_to_bw",
"{tool} {input[bdg]} {input[chrom_len]} {output[bw]}",
tool="bedGraphToBigWig",
input={"bdg": conf.target_prefix + ".bdg",
"chrom_len": conf.get("lib", "chrom_len")},
output={"bw": conf.target_prefix + ".bw"}
)
)
def step4b_prepare_macs2_peakscall_on_rep():
for raw, target in conf.treatment_map:
attach(peakcall_workflow,
ShellCommand(
"macs2_callpeak",
"{tool} callpeak -B -q 0.01 --keep-dup {param[keep_dup]} --shiftsize={param[shiftsize]} --nomodel \
-t {input[treat]} -c {input[control]} -n {param[description]}",
tool = "macs2",
input = {"treat": target + ".bam",
"control": conf.target_prefix + "_control.bam"},
output = {"peaks": target + "_peaks.bed",
"summit": target + "_summits.bed",
"treat_bdg": target + "treat_pileup.bdg",
"ENCODE": target + "_peaks.encodePeak",
"peaks_xls": target + "_peaks.xls",
"control_bdg": target + "_control_pileup.bdg"},
param = {"keep_dup": 1})).update(param=conf.items("macs2"))
def step5_prepare_ceas_annotation():
attach(annotation_workflow,
ShellCommand(
"ceas",
"{tool} --name {param[description]} -b {input[bed]} -w {input[bw]}",
tool="ceasBW",
input={"bed": conf.target_prefix + "_peaks.bed",
"bw": conf.target_prefix + ".bw"},
output={"piechart": ""}))
def step6_prepare_phast_conservation_annotation():
attach(annotation_workflow,
ShellCommand(
"conservation",
)
)
def step7_prepare_mdseqpos_annotation():
attach(motifscan_workflow,
ShellCommand(
"motif",
))
def step8_prepare_report_summary():
attach(clean_workflow,
ShellCommand(
"report",
)
)
step1_prepare_groom_format()
step2_prepare_raw_QC()
step3_prepare_bowtie_map()
step4a_prepare_macs2_peakcall()
step4b_prepare_macs2_peakscall_on_rep()
step5_prepare_ceas_annotation()
step6_prepare_phast_conservation_annotation()
step7_prepare_mdseqpos_annotation()
step8_prepare_report_summary()
return root_workflow
def main(args=None):
args = parse_args(args)
print("Arguments:", args)
conf = ChiLinConfig(args.config)
main_workflow = create_workflow(args, conf)
main_workflow.set_option(verbose_level=args.verbose_level, dry_run=args.dry_run, resume=args.resume)
main_workflow.invoke()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment