Skip to content

Instantly share code, notes, and snippets.

@crazyhottommy
Last active May 9, 2016 21:26
Show Gist options
  • Save crazyhottommy/49c6c5fb89d62ef7eaac4224fa8d9313 to your computer and use it in GitHub Desktop.
Save crazyhottommy/49c6c5fb89d62ef7eaac4224fa8d9313 to your computer and use it in GitHub Desktop.
cat ref.gene.txt
chr1 1736 4272 DDX11L1 +
chr1 4224 19233 WASH7P -
chr1 4224 7502 LOC100288778 -
chr1 7231 7299 MIR6859-1 -
chr1 7231 7299 MIR6859-2 -
chr1 7231 7299 MIR6859-3 -
chr1 7231 7299 MIR6859-4 -
# This first part does not take in consideration the genes orientation
regions=$1
refgenes=$2
closestBed -D ref -id -a $regions -b $refgenes > up.stream
closestBed -D ref -iu -a $regions -b $refgenes > down.stream
cat down.stream up.stream > total
sort -k1,1 -k2,2n -k6,6n -k8,8 -u total | awk '{if($10<0){print $0"\t""L"}; if($10>0){print $0"\t""R"}; if($10==0){print $0"\t""I"}}' > tmp.total
cat tmp.total > no_strands_$regions.hits
rm *stream tmp.* total


# This second part takes into account the genes orientation

regions=$1
refgenes=$2
echo $refgenes
# get overlapping hits
overlap=$(closestBed -d  -iu -a  $regions -b  $refgenes | awk '$10==0' > overlap.hits)
echo $overlap
# get positive hits
awk '$5 == "+"' $refgenes > positive.tmp
pos=$(closestBed -io -d -a $regions -b positive.tmp > pos.hits)
echo $pos
# get negative hits
awk '$5 == "-"' $refgenes > negative.tmp
neg=$(closestBed -io -d -a $regions -b negative.tmp > neg.hits)
echo $neg
# merge all the results
cat overlap.hits pos.hits neg.hits > $regions.hits
# clean up the directory
rm overlap.hits pos.hits neg.hits *.tmp
# sort the file
sort -k1,1 -k2,2n -k6,6n -k7,7 -u $regions.hits -o $regions.hits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment