Created
July 22, 2014 21:17
-
-
Save ShujiaHuang/fbee0a58554075ed5bf0 to your computer and use it in GitHub Desktop.
Find the Overlap region between in the two file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author : Shujia Huang | |
# Date : 2013-01-10 | |
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
my ( $posFile, $regionFile ); | |
GetOptions ( | |
"i=s" => \$posFile, | |
"r=s" => \$regionFile, | |
); | |
Usage() unless $posFile && $regionFile; | |
my %region; | |
my %index; | |
GetRegion ( $regionFile, \%region, \%index ); | |
open IN, $posFile or die "Cannot open file $posFile: $!\n"; | |
while ( <IN> ){ | |
#chr10 105186 105186 T C chr10 105186 T C ...... | |
chomp; | |
my @tmp = split; | |
my $refId = $tmp[0]; | |
next if ( !exists $index{$refId} ); | |
my $flag = 1; | |
my $isOverlap = 0; | |
for( my $i = $index{$refId}; $i < @{$region{$refId}}; ++$i ) { | |
next if ( $tmp[1] > $region{$refId}->[$i][1] ); | |
last if ( $tmp[2] < $region{$refId}->[$i][0] ); | |
if ( $flag ) { | |
$flag = 0; | |
$index{$refId} = $i; | |
} | |
$isOverlap = 1; | |
last; | |
#$length=OverlapLength($tmp[1],$tmp[2],$region{$refId}->[$i][0],$region{$refId}->[$i][1]); | |
} | |
if ( $isOverlap ) { | |
print join "\t", @tmp ; print "\n"; | |
} | |
} | |
close IN; | |
################################################################### | |
################################################################### | |
sub GetRegion { | |
my ( $file, $region, $index ) = @_; | |
open I, $file or die "Cannot open file $file : $!\n"; | |
while ( <I> ) { | |
# chr1 mRNA 65298905 65432187 - NM_002227 JAK1 | |
chomp; | |
my @tmp = split; | |
push ( @{ $$region{$tmp[0]} }, [ @tmp[2,3], $_ ] ); | |
} | |
close I; | |
for my $refId ( keys %$region ) { | |
@{ $$region{$refId} } = sort {$a->[0] <=> $b->[0]} @{ $$region{$refId} }; | |
$$index{ $refId } = 0; | |
} | |
} | |
sub OverlapLength { | |
my ($reg1,$reg2,$tar1,$tar2)= @_; | |
my $length = 0; | |
if($tar1>=$reg1 && $tar1<=$reg2 && $tar2>=$reg2) | |
{ | |
$length=$reg2-$tar1+1; | |
}elsif($tar1>=$reg1 && $tar1<=$reg2 && $tar2<=$reg2) | |
{ | |
$length=$tar2-$tar1+1; | |
}elsif($tar2>=$reg1 && $tar2<=$reg2 && $tar1<=$reg1) | |
{ | |
$length=$tar2-$reg1+1; | |
}else | |
{ | |
$length=$reg2-$reg1+1; | |
} | |
return $length; | |
} | |
################################################################ | |
sub IsOvlap { | |
# | |
my ( $start1, $end1, $start2, $end2 ) = @_; | |
my $isOvlp = 0; | |
$isOvlp = 1 if ( $end1 >= $start2 and $start1 <= $end2 ); | |
return $isOvlp; | |
} | |
sub OvlapReg { | |
# Reture the overlap region. | |
my ( $start1, $end1, $start2, $end2 ) = @_; | |
my ( $start, $end ) = ( 0, 0 ); | |
if ( $end1 >= $start2 and $end1 <= $end2 and $start1 <= $start2 ) { | |
$start = $start2; | |
$end = $end1; | |
} | |
elsif ( $end1 >= $start2 and $end1 <= $end2 and $start1 > $start2 ) { | |
die "The end position is bigger than start position : $end1 > $start1\n" if ( $start1 > $end1 ); | |
$start = $start1; | |
$end = $end1; | |
} | |
elsif ( $start1 <= $start2 and $end1 > $end2 ) { | |
die "The end position is bigger than start position : $end2 > $start2\n" if ( $start2 > $end2 ); | |
$start = $start2; | |
$end = $end2; | |
} | |
elsif ( $start1 <= $end2 and $end1 > $end2 ) { | |
$start = $start1; | |
$end = $end2; | |
} | |
return ( $start, $end ); | |
} | |
sub Usage { | |
print STDERR <<U; | |
Version : 0.0.1 ( 2012-01-10 ) | |
Author : Shujia Huang | |
Created : 2013/01/10 | |
Last Modify : | |
Usage : perl $0 [Options] -i [cout infile] > output | |
Options : | |
-i [str] Position file. Should be sorted! ( Find the positions which in the Region file "-r". ) | |
-r [str] Region file. Not required sorting. | |
U | |
exit(0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment