Created
July 27, 2023 09:13
-
-
Save PoisonAlien/aa07107d61741e7a036e6a5c38189d52 to your computer and use it in GitHub Desktop.
subset a bigWig file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
#Script to subset a bigWig file for user specific loci | |
#MIT License (Anand Mayakonda; [email protected]) | |
function usage (){ | |
echo "Subset a bigWig file for genomic loci. | |
Requires UCSC kentutils bigWigToBedGraph and bedGraphToBigWig to be installed | |
Binaries available from: https://hgdownload.soe.ucsc.edu/admin/exe/ | |
positional arguments: | |
<bigWig> Input bigWig file | |
<REGION> Genomic region to subset. e.g: chr6:31125776-31144789 | |
optional arguments : | |
-o Output basename. Default <REGION> | |
-r UCSC ref genome build. Default hg19 | |
Usage: bwview [options] <bigWig> <REGION> | |
Example: bwview -r hg38 foo.bw chr6:31125776-31144789 | |
" 1>&2 | |
} | |
DB="hg19" | |
out="" | |
bigWig="" | |
REGION="" | |
while getopts "h?r:o:" OPTION | |
do | |
case "${OPTION}" in | |
h) | |
usage | |
exit 1 | |
;; | |
r) | |
DB="$OPTARG" | |
;; | |
o) | |
out="$OPTARG" | |
;; | |
?) | |
unkOpt="$OPTARG" | |
echo -e "Unknown option: ${unkOpt}" | |
usage | |
exit | |
;; | |
esac | |
done | |
bigWig="${@:$((${OPTIND})):1}" | |
REGION="${@:$((${OPTIND}+1)):1}" | |
if [[ -z "${bigWig}" ]] | |
then | |
usage | |
echo "Missing <bigWig>" 1>&2 | |
exit 1 | |
fi | |
if [[ -z "${REGION}" ]] | |
then | |
usage | |
echo "Missing <REGION>" 1>&2 | |
exit 1 | |
fi | |
#Check if required kent utils exist | |
if [[ -z `which bigWigToBedGraph` ]]; then | |
echo -e "bigWigToBedGraph not found! Get it from here: https://hgdownload.soe.ucsc.edu/admin/exe/" | |
exit | |
fi | |
if [[ -z `which bedGraphToBigWig` ]]; then | |
echo -e "bedGraphToBigWig not found! Get it from here: https://hgdownload.soe.ucsc.edu/admin/exe/" | |
exit | |
fi | |
chr=$(echo ${REGION} | cut -f 1 -d ':') | |
loc=$(echo ${REGION} | cut -f 2 -d ':') | |
start=$(echo ${loc} | cut -f 1 -d '-' | sed 's/,//g') | |
end=$(echo ${loc} | cut -f 2 -d '-' | sed 's/,//g') | |
#Below chunk from ucsc fetchChroSizes | |
echo "Fetching chrom sizes.." 1>&2 | |
if [[ -z "${out}" ]]; then | |
out="${chr}_${start}_${end}" | |
fi | |
export DB | |
if [ -z "${DB}" ]; then | |
usage | |
fi | |
DONE=0 | |
export DONE | |
url="http://hgdownload.soe.ucsc.edu/goldenPath/${DB}/bigZips/${DB}.chrom.sizes" | |
if [ "$DB" == "hg19" -o "$DB" == "hg38" ] ; then | |
#printf "INFO: Using latest patch release of genome for hg19/hg38\n" 1>&2 | |
url="http://hgdownload.soe.ucsc.edu/goldenPath/${DB}/bigZips/latest/${DB}.chrom.sizes" | |
fi | |
tmpResult=${DB}.tmp.sizes$$ | |
curl ${url} \ | |
-s > ${tmpResult} 2> /dev/null | |
if [ $? -ne 0 -o ! -s "${tmpResult}" ]; then | |
printf "WARNING: curl command failed\n" 1>&2 | |
rm -f "${tmpResult}" | |
else | |
# | |
if [[ ${chr} != chr* ]]; then | |
echo "Removing chr prefix from chrom.sizes file.." 1>&2 | |
cat ${tmpResult} | sed 's/^chr//' > ${DB}.chrom.sizes | |
else | |
cat ${tmpResult} > ${DB}.chrom.sizes | |
fi | |
rm -f "${tmpResult}" | |
DONE=1 | |
fi | |
#Convert bigWig to bedGraph to bigWIg | |
bigWigToBedGraph -chrom=${chr} -start=${start} -end=${end} ${bigWig} ${out}.bedgraph | |
bedGraphToBigWig ${out}.bedgraph ${DB}.chrom.sizes ${out}.bw | |
if [[ -f ${out}.bw ]];then | |
echo "Done!" 1>&2 | |
fi | |
rm -f ${out}.bedgraph | |
rm -f ${DB}.chrom.sizes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment