Skip to content

Instantly share code, notes, and snippets.

@zhoujj2013
Created July 16, 2014 08:11
Show Gist options
  • Save zhoujj2013/2f44ba0c0437f0241cc4 to your computer and use it in GitHub Desktop.
Save zhoujj2013/2f44ba0c0437f0241cc4 to your computer and use it in GitHub Desktop.
convert sam to wig format
#!/usr/bin/perl -w
use strict;
use Data::Dumper;
use Getopt::Long;
# Written by [email protected]
# convert sam to wig format
my ($bin,$mq,$normalize, $chunk_size);
GetOptions(
"bin:i"=>\$bin,
"minMQ:i"=>\$mq,
"norm"=>\$normalize,
"chunk_size:i"=>\$chunk_size
);
$chunk_size ||= 500;
$mq ||= 30;
$bin ||= 50;
my $j = 1;
my %h;
my $chr = "#";
my $pre_end = 0;
while(<>){
next if(/^@/);
my @t = split /\t/;
next if($t[4] < $mq);
my $c = $t[2]; # chromosome name
my $s = $t[3]; # start coordinate
if($j%500 == 0){
#print STDERR Dumper(sort {$a <=> $b} keys %h);
output(\%h,$s,$bin);
}
# check chromosome
if($chr eq "#"){
$chr = $c;
print "variableStep chrom=$chr";
unless($bin eq 1){
print " span=$bin\n";
}else{
print "\n";
}
}elsif($chr ne $c){
$chr = $c;
#print STDERR Dumper(sort {$a <=> $b} keys %h);
output(\%h,$pre_end,$bin);
print "variableStep chrom=$chr";
unless($bin eq 1){
print " span=$bin\n";
}else{
print "\n";
}
$chunk_size = 1;
$j = 1;
}
my $map_str = $t[5];
my @map_num = /(\d+)M/g;
my $mapped_len = sum(\@map_num);
#print "$mapped_len\n";
foreach my $step (1 .. $mapped_len){
my $index = $step - 1 + $s;
$h{$index}++;
}
$pre_end = $mapped_len - 1 + $s;
$j++;
# check reverse/forward mapped, I didn't use this.
#if($t[1] & 16) {
# print "Reverse\n";
#}else{
# print "Forward\n";
#}
}
sub output{
my ($hh, $end, $bin) = @_;
my @h_sorted = sort {$a <=> $b} keys %$hh;
my @dep; # pos index
for(my $i=0; $i < scalar(@h_sorted); $i++){
last if($h_sorted[$i] >= $end);
if($i == 0 || scalar(@dep) == 0){
push @dep,$h_sorted[$i];
}elsif($h_sorted[$i] - $dep[0] + 1 > $bin){
# get the true dep
my @true_dep;
foreach(@dep){
push @true_dep,$hh->{$_};
delete $hh->{$_};
}
my $avg_dep = sprintf "%.3f", sum(\@true_dep)/scalar(@dep);
print "$dep[0] $avg_dep\n";
@dep = ();
push @dep,$h_sorted[$i];
}elsif($h_sorted[$i] - $dep[0] + 1 <= $bin){
push @dep,$h_sorted[$i];
}
}
if(scalar(@dep) > 0){
my @true_dep;
foreach(@dep){
push @true_dep,$hh->{$_};
delete $hh->{$_};
}
my $avg_dep = sprintf "%.3f", sum(\@true_dep)/scalar(@dep);
print "$dep[0] $avg_dep\n";
@dep = ();
}
}
sub sum{
my $arr = shift;
my $all = 0;
foreach(@$arr){
$all += $_;
}
return $all;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment