Created
April 13, 2013 07:51
-
-
Save hanfeisun/5377516 to your computer and use it in GitHub Desktop.
Two duplicate function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _peaks_parse(input): | |
total = 0 | |
fc20n = 0 | |
fc10n = 0 | |
peaks_info = {} | |
with open(input) as peaks_xls: | |
for line in peaks_xls: | |
if line.startswith('# tags after filtering in treatment'): | |
# tags after filtering in treatment: 13438948 | |
peaks_info["uniloc"] = int(line.strip().split(":")[1]) | |
if line.startswith('# d'): | |
peaks_info["distance"] = int(line.strip().split("=")[1]) | |
if line.strip() != "" and not line.startswith("#") and not line.startswith("chr\t"): | |
l = line.strip().split("\t") | |
total += 1 | |
## column 7th denotes fold change value | |
fc = float(l[7]) | |
if fc >= 20: | |
fc20n += 1 | |
if fc >= 10: | |
fc10n += 1 | |
peaks_info["totalpeak"] = total | |
peaks_info["peaksge20"] = fc20n | |
peaks_info["peaksge10"] = fc10n | |
peaks_info["peaksge20ratio"] = peaks_info["peaksge20"] / peaks_info["totalpeak"] | |
peaks_info["peaksge10ratio"] = peaks_info["peaksge10"] / peaks_info["totalpeak"] | |
return peaks_info | |
def _macs2_summary_parse(input={"macs2_peaks_xls": ""}, param={"id": ""}): | |
"""Basic statistic of peak calling result.""" | |
name = 'dataset' + param["id"] | |
shift_size = 0 | |
with open(input["macs2_peaks_xls"], "rU") as fhd: | |
fold_enrichment_list = [] | |
for i in fhd: | |
i = i.strip() | |
if i.startswith("# qvalue cutoff"): | |
q_value_cutoff = float(i.split('=')[1]) | |
continue | |
if i.startswith("#") or i.startswith("chr\t") or not i: | |
continue | |
if i.startswith("# d"): # parse shift-size, # d = | |
shift_size = int(i.strip().split("=")[1]) / 2 | |
fold_enrichment = float(i.split("\t")[7]) #8th column is fold change | |
fold_enrichment_list.append(fold_enrichment) | |
fold_greater_than_10_peaks = [x for x in fold_enrichment_list if x >= 10] | |
total_peak_count = len(fold_enrichment_list) | |
fold_greater_than_10_peaks_count = len(fold_greater_than_10_peaks) | |
peaks_summary = [name, q_value_cutoff, total_peak_count, fold_greater_than_10_peaks_count, shift_size] | |
return {"peaks_summary": peaks_summary, "fold_gt_10_peaks_count": fold_greater_than_10_peaks_count} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment