Created
September 10, 2025 00:33
-
-
Save andrewparkermorgan/1a78ed978c3a1a79ea404fe8750ef2d7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env python3 | |
| import os | |
| import sys | |
| import re | |
| import subprocess as sp | |
| import argparse as ap | |
| import logging | |
| MALE_CHROMS = r'Y$' | |
| FEMALE_CHROMS = r'X$' | |
| REGION_TYPES = ["A","X","Y"] | |
| query_regions = { _: [] for _ in REGION_TYPES } | |
| parser = ap.ArgumentParser("Use relative read depth on autosomes, X and Y chromosomes to confirm sex chromosome karyotype.") | |
| parser.add_argument("-r","--regions", type = ap.FileType("rU"), | |
| help = "bed file of regions to use for coverage estimation") | |
| parser.add_argument("bam", nargs = "+", | |
| help = "bam file of alignments for which to do the calculation") | |
| args = parser.parse_args() | |
| logging.basicConfig(level = logging.INFO) | |
| logging.info("Reading list of query regions from <{}>...".format(args.regions.name)) | |
| for line in args.regions: | |
| pieces = line.strip().split() | |
| if len(pieces) < 3: | |
| continue | |
| if pieces[0].startswith("#"): | |
| continue | |
| chrom, pos_start, pos_end = pieces[0], int(pieces[1]), int(pieces[2])+1 | |
| region_str = "{}:{}-{}".format(chrom, pos_start, pos_end) | |
| if re.search(MALE_CHROMS, chrom): | |
| query_regions["Y"].append( region_str ) | |
| elif re.search(FEMALE_CHROMS, chrom): | |
| query_regions["X"].append( region_str ) | |
| else: | |
| query_regions["A"].append( region_str ) | |
| for rty in REGION_TYPES: | |
| logging.info("\t{}: {} regions".format(rty, len(query_regions[rty]))) | |
| logging.info("Processing {} bam files ...".format(len(args.bam))) | |
| print("#bam","A","X","Y","XA","YX", sep = "\t") | |
| for this_bam in args.bam: | |
| logging.info("\t{} ...".format(this_bam)) | |
| covg_by_region = { _: float("nan") for _ in REGION_TYPES } | |
| for rty in REGION_TYPES: | |
| n_regions = 0 | |
| covg_mean_sum = 0.0 | |
| for region_str in query_regions[rty]: | |
| cmd = "samtools coverage -H -r {} {}".format(region_str, this_bam) | |
| proc = sp.run(cmd, stdout = sp.PIPE, stderr = sp.PIPE, shell = True, text = True) | |
| for line in proc.stdout.strip().split("\n"): | |
| rez = line.strip().split() | |
| if len(rez) < 9: | |
| break | |
| n_regions += 1 | |
| covg_mean_sum += float(rez[6]) | |
| covg_by_region[rty] = covg_mean_sum/float(n_regions) | |
| covg_A = covg_by_region["A"] | |
| covg_X = covg_by_region["X"] | |
| covg_Y = covg_by_region["Y"] | |
| print(os.path.basename(this_bam), covg_A, covg_X, covg_Y, covg_X/covg_A, covg_Y/covg_X, sep = "\t") | |
| logging.info("Done.\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment