Skip to content

Instantly share code, notes, and snippets.

@andrewparkermorgan
Created September 10, 2025 00:33
Show Gist options
  • Save andrewparkermorgan/1a78ed978c3a1a79ea404fe8750ef2d7 to your computer and use it in GitHub Desktop.
Save andrewparkermorgan/1a78ed978c3a1a79ea404fe8750ef2d7 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
import os
import sys
import re
import subprocess as sp
import argparse as ap
import logging
MALE_CHROMS = r'Y$'
FEMALE_CHROMS = r'X$'
REGION_TYPES = ["A","X","Y"]
query_regions = { _: [] for _ in REGION_TYPES }
parser = ap.ArgumentParser("Use relative read depth on autosomes, X and Y chromosomes to confirm sex chromosome karyotype.")
parser.add_argument("-r","--regions", type = ap.FileType("rU"),
help = "bed file of regions to use for coverage estimation")
parser.add_argument("bam", nargs = "+",
help = "bam file of alignments for which to do the calculation")
args = parser.parse_args()
logging.basicConfig(level = logging.INFO)
logging.info("Reading list of query regions from <{}>...".format(args.regions.name))
for line in args.regions:
pieces = line.strip().split()
if len(pieces) < 3:
continue
if pieces[0].startswith("#"):
continue
chrom, pos_start, pos_end = pieces[0], int(pieces[1]), int(pieces[2])+1
region_str = "{}:{}-{}".format(chrom, pos_start, pos_end)
if re.search(MALE_CHROMS, chrom):
query_regions["Y"].append( region_str )
elif re.search(FEMALE_CHROMS, chrom):
query_regions["X"].append( region_str )
else:
query_regions["A"].append( region_str )
for rty in REGION_TYPES:
logging.info("\t{}: {} regions".format(rty, len(query_regions[rty])))
logging.info("Processing {} bam files ...".format(len(args.bam)))
print("#bam","A","X","Y","XA","YX", sep = "\t")
for this_bam in args.bam:
logging.info("\t{} ...".format(this_bam))
covg_by_region = { _: float("nan") for _ in REGION_TYPES }
for rty in REGION_TYPES:
n_regions = 0
covg_mean_sum = 0.0
for region_str in query_regions[rty]:
cmd = "samtools coverage -H -r {} {}".format(region_str, this_bam)
proc = sp.run(cmd, stdout = sp.PIPE, stderr = sp.PIPE, shell = True, text = True)
for line in proc.stdout.strip().split("\n"):
rez = line.strip().split()
if len(rez) < 9:
break
n_regions += 1
covg_mean_sum += float(rez[6])
covg_by_region[rty] = covg_mean_sum/float(n_regions)
covg_A = covg_by_region["A"]
covg_X = covg_by_region["X"]
covg_Y = covg_by_region["Y"]
print(os.path.basename(this_bam), covg_A, covg_X, covg_Y, covg_X/covg_A, covg_Y/covg_X, sep = "\t")
logging.info("Done.\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment