Skip to content

Instantly share code, notes, and snippets.

@mojaveazure
Last active January 7, 2016 23:51
Show Gist options
  • Save mojaveazure/7336b7ff5d8b150490ad to your computer and use it in GitHub Desktop.
Save mojaveazure/7336b7ff5d8b150490ad to your computer and use it in GitHub Desktop.
Extract regions from a BED file and format it for use with ANGSD and ANGSD-wrapper
#!/bin/bash
set -e
set -o pipefail
# Extract regions from a BED file and format it for use with ANGSD and ANGSD-wrapper
function Usage() {
echo -e "\
Usage: $(basename $0) BED_FILE NUMBER [OUTFILE] [SEED] \n\
Where: BED_FILE is a BED file to extract regions from \n\
NUMBER is the number of regions to extract \n\
OUTFILE is an optional name for the output file \n\
(defaults to $(pwd -P)/RegionsFromBed.txt) \n\
SEED is an optional seed value, otherwise generated randomly \n\
" >&2
exit 1
}
# Check to see if we have enough arguments
if [[ "$#" -lt 2 ]]; then Usage; fi
# Assign our arguments
BED_FILE=$1 # What is our BED file?
NUMBER=$2 # How many regions are we extracting?
OUTFILE=${3:-$(pwd -P)/RegionsFromBed.txt} # What is our output file?
SEED=${4:-${RANDOM}} # What is our seed?
# Some basic checks
if [[ -f "${BED_FILE}" ]]; then LINES=$(wc -l "${BED_FILE}"); else echo "Failed to find ${BED_FILE}, exiting" >&2; exit 1; fi # Make sure the BED file exists. If so, find out how many lines there are. If not, exit with error
if [[ "${NUMBER}" > "${LINES}" ]]; then echo "${NUMBER} is greater than ${LINES} in ${BED_FILE}, exiting..." >&2; exit 1; fi # Make sure we're not extracting more regions than listed in the BED_FILE
if [[ -f "${OUTFILE}" ]]; then echo "${OUTFILE} exists, will overwrite in five seconds..." >&2; sleep 5; rm -f "${OUTFILE}"; fi # If a file of the same name as our output file, warn of overwrite
declare -a COLLECTED=()
COUNTER=0
# Start the extraction process
for iteration in $(seq "${NUMBER}") # For the current iteration
do
PREVIOUS=true
while PREVIOUS == true
do
ITER="$((${RANDOM}%${LINES}))"
if [[ "${COLLECTED[@]}" =~ "${ITER}" ]]
then
PREVIOUS=true
else
COLLECTED["${COUNTER}"]="${ITER}"
let "COUNTER += 1"
PREVIOUS=false
fi
done
INFO="$(head -"${ITER}" "${BED_FILE}" | tail -1)"
CONTIG="$(cut -f 1 -d '\t' ${INFO})"
START="$(cut -f 2 -d '\t' ${INFO})"
END="$(cut -f 3 -d '\t' ${INFO})"
echo "${CONTIG}:${START}-${END}" >> "${OUTFILE}"
done
echo -e "SEED:\t${SEED}" > $(dirname "${OUTFILE}")/seed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment