Created
April 26, 2012 16:34
-
-
Save gingi/2500840 to your computer and use it in GitHub Desktop.
A script for smoothing BED files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
use strict; | |
use warnings; | |
use Readonly; | |
use List::MoreUtils qw/any/; | |
use Getopt::Long; | |
use Pod::Usage; | |
Readonly my $CHR_COLUMN => 0; | |
Readonly my $START_COLUMN => 1; | |
Readonly my $END_COLUMN => 2; | |
Readonly my $SCORE_COLUMN => 3; | |
Readonly my $DEFAULT_WINDOW_SIZE => 5e3; | |
Readonly my $TALLY_COUNT => 1; | |
Readonly my $TALLY_ADD => 2; | |
MAIN: { | |
my ($window_size, $input_file, $output_file, $help, $man, $tally); | |
my ($infh, $outfh); | |
GetOptions( | |
"window|w=s" => \$window_size, | |
"input|i=s" => \$input_file, | |
"output|o=s" => \$output_file, | |
"help|h" => \$help, | |
"man|m" => \$man, | |
"count|c" => sub { $tally = $TALLY_COUNT }, | |
"add|a" => sub { $tally = $TALLY_ADD } | |
) or pod2usage(-verbose => 1); | |
pod2usage(-verbose => 1) if ($help); | |
pod2usage(-verbose => 2) if ($man); | |
$window_size ||= $DEFAULT_WINDOW_SIZE; | |
$input_file ||= $ARGV[0]; | |
$output_file ||= $ARGV[1]; | |
if (defined($input_file)) { | |
die "Cannot find input file $input_file.\n" unless (-e $input_file); | |
die "Cannot open input file $input_file.\n" unless (-r $input_file); | |
open($infh, '<', $input_file) | |
or die "Can't open input file $input_file: $!\n"; | |
} else { | |
$infh = \*STDIN; | |
} | |
if (defined($output_file)) { | |
open($outfh, '>', $output_file) | |
or die "Can't open output file $output_file: $!\n"; | |
} else { | |
$outfh = \*STDOUT; | |
} | |
combine_intervals( | |
input => $infh, | |
output => $outfh, | |
window_size => $window_size, | |
tally => $tally | |
); | |
} | |
sub combine_intervals { | |
my %params = ref($_[0]) eq 'HASH' ? %{ $_[0] } : @_; | |
my $infh = $params{input}; | |
my $current_chr = undef; | |
my $window | |
= window($params{output}, $params{window_size}, $params{tally}); | |
while (my $line = <$infh>) { | |
chomp $line; | |
my @row = split(/\s+/, $line); | |
my $interval_size = $row[$END_COLUMN] - $row[$START_COLUMN] + 1; | |
if (!defined $current_chr || $row[$CHR_COLUMN] ne $current_chr) { | |
if ($window->buffer) { | |
$window->print; | |
} | |
$window->init($row[$CHR_COLUMN]); | |
$current_chr = $row[$CHR_COLUMN]; | |
} | |
$window->add_value($row[$START_COLUMN], $row[$END_COLUMN], | |
$row[$SCORE_COLUMN]); | |
while ($window->end < $row[$END_COLUMN]) { | |
$window->print; | |
$window->shift; | |
$window->add_value($row[$START_COLUMN], $row[$END_COLUMN], | |
$row[$SCORE_COLUMN]); | |
} | |
} | |
if ($window->buffer) { | |
$window->print; | |
} | |
} | |
sub window { | |
my ($outfh, $size, $tally) = @_; | |
my $window_start = 0; | |
my $window_end = $size - 1; | |
my $score = 0; | |
my $chr = 5; | |
my $buffer = 0; | |
my $package = "Window"; | |
no strict 'refs'; | |
*{"${package}::init"} = sub { | |
my ($self, $c) = @_; | |
$chr = $c; | |
$window_start = 0; | |
$window_end = $size - 1; | |
$buffer = 1; | |
}; | |
*{"${package}::score"} = sub {$score}; | |
*{"${package}::start"} = sub {$window_start}; | |
*{"${package}::end"} = sub {$window_end}; | |
*{"${package}::shift"} = sub { | |
$window_start += $size; | |
$window_end += $size; | |
$score = 0; | |
$buffer = 1; | |
}; | |
if ($tally) { | |
*{"${package}::print"} = sub { | |
printf $outfh "%-5s %9d %9d %6d\n", $chr, $window_start, | |
$window_end, | |
$score; | |
$buffer = 0; | |
}; | |
my $func | |
= ($tally == $TALLY_COUNT) | |
? sub { $score++; } | |
: sub { $score += $_[0] }; | |
*{"${package}::add_value"} = sub { | |
my ($self, $istart, $iend, $value) = @_; | |
$func->($value); | |
}; | |
} else { | |
*{"${package}::print"} = sub { | |
printf $outfh "%-5s %9d %9d %6.5f\n", $chr, $window_start, | |
$window_end, | |
$score; | |
$buffer = 0; | |
}; | |
*{"${package}::add_value"} = sub { | |
my ($self, $istart, $iend, $iscore) = @_; | |
if ($iscore =~ m/[^\d\.]/) { | |
$iscore = 0; | |
} | |
my $length = $iend - $istart + 1; | |
if ($window_end < $iend) { | |
$iscore *= ($window_end - $istart + 1) / $length; | |
} elsif ($window_start < $iend && $window_start > $iend) { | |
$iscore *= ($iend - $window_end + 1) / $length; | |
} | |
$score += $iscore; | |
$buffer = 1; | |
}; | |
} | |
*{"${package}::buffer"} = sub {$buffer}; | |
use strict 'refs'; | |
return bless {}, $package; | |
} | |
=head1 NAME | |
smooth.pl - Smooths interval data for visualization. | |
=head1 SYNOPSIS | |
perl smooth.pl [options] input.bed output.bed | |
perl smooth.pl [options] < input.bed > output.bed | |
perl smooth.pl [options] | |
Options: | |
-h --help | |
-m --man | |
-w --window | |
-i --input | |
-o --output | |
-c --count | |
-a --add | |
=head1 OPTIONS | |
B<-h --help> | |
Print a brief help message and exit. | |
B<-m --man> | |
Print the man page and exit. | |
B<-w --window> | |
Specify the interval window size (Default: 5,000 bp). | |
B<-i --input> | |
An input BED file. If not specified, will use STDIN. | |
B<-o --output> | |
An output BED file. If not specified, will use STDOUT. | |
B<-c --count> | |
Count the intervals without using the value. This option is useful when the value is non-numeric and just the presence of intervals are meant to be significant (e.g., this region is a gene). | |
B<-a --add> | |
Use the numeric value to obtain the sum over a given window. | |
=head1 DESCRIPTION | |
B<smooth.pl> | |
Takes input (in BED format) of arbitrary values across dense genomic | |
intervals and generates new BED output of uniform length intervals wher the | |
values are smoothed using either a count (if the source value is boolean, | |
meaning it either exists or doesn't) or sum of numeric values. | |
=head1 AUTHOR | |
Shiran Pasternak ([email protected]) | |
=head1 COPYRIGHT | |
Copyright (c) 2012 Cold Spring Harbor Laboratory. | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment