Skip to content

Instantly share code, notes, and snippets.

@lh3
Created July 19, 2013 18:07
Show Gist options
  • Save lh3/6041132 to your computer and use it in GitHub Desktop.
Save lh3/6041132 to your computer and use it in GitHub Desktop.
Given a SAM containing multiple hits and the AS tag, choose one with the maximum AS.
#!/usr/bin/env perl
use strict;
use warnings;
&main;
sub main {
my $last = '';
my @lines;
while (<>) {
if (/^@/) {
print;
} else {
my @t = split("\t");
if ($t[1] & 4) {
&pick_line(\@lines) if @lines;
print;
next;
}
my $readno = $t[1] & 0xc0;
my $qname = "$t[0]|$readno";
my $score;
$score = $1 if /\tAS:i:([-\d]+)/;
die unless defined($score); # this script only works when there is the AS field
if ($qname ne $last) {
&pick_line(\@lines) if @lines;
$last = $qname;
}
push(@lines, [$score, \@t]);
}
}
&pick_line(\@lines) if @lines;
}
sub pick_line {
my ($lines) = @_;
my ($max, $max2, $max_t) = (-2000000000, -2000000000, undef);
for my $t (@$lines) {
my $score = $t->[0];
if ($max < $score) {
$max2 = $max;
$max = $score; $max_t = $t->[1];
} elsif ($max2 < $score) {
$max2 = $score;
}
}
die unless defined($max_t);
$max_t->[4] = 0 if $max == $max2; # force mapQ=0 if top 2 have equal score. FIXME: not using PE info
@$lines = ();
print join("\t", @$max_t);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment