Skip to content

Instantly share code, notes, and snippets.

@mmterpstra
Last active April 2, 2020 10:36
Show Gist options
  • Save mmterpstra/73561fec04c4742f072f376676d7c3eb to your computer and use it in GitHub Desktop.
Save mmterpstra/73561fec04c4742f072f376676d7c3eb to your computer and use it in GitHub Desktop.
Tool that tries to remove samplenames from bam/sam samplenames are extracted from SM field in header using picard and samtools
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
#Needs samtools binary/picard jarfiles hardcoded in script
my $use = <<"END1";
Use $0 PICARDJAR FILE(s)
Tries to remove samplenames from bam/sam samplenames are extracted from SM field in header
PICARDJAR Needs picard jarfile path in script
FILE Needs *.bam
END1
if (scalar(@ARGV)==0){
print $use;
exit(0);
}
my $sampleCount = 0;
my %sampleNameToSampleCount;
my $picardjar = shift(@ARGV);
for my $bamfile (@ARGV){
next if(not -e $bamfile);
my $header=ExtractSamHeader(\$bamfile);
my @sampleNames = GetSampleNames(\$header);
my $bamOutputName = 'Sample_'.$sampleCount;
if(scalar(@sampleNames) > 1){
my $upperindex =($sampleCount + scalar(@sampleNames) - 1);
$bamOutputName = $bamOutputName.'_to_'.$upperindex;
}
$bamOutputName = $bamOutputName.'.bam';
for my $sampleName (@sampleNames){
my $newSampleName = 'sample'.$sampleCount;
$sampleNameToSampleCount{$sampleName}{$newSampleName} = $bamOutputName;
ReplaceSampleName(\$sampleName,\$newSampleName,\$header);
$sampleCount++;
}
ReplaceSamHeaderAndRenameFile(\$bamfile, \$bamOutputName, \$header, $picardjar);
}
warn Dumper(\%sampleNameToSampleCount);
exit(0);
sub ExtractSamHeader{
my $samtoolsViewCmd = "samtools view -H ".${$_[0]};
warn $samtoolsViewCmd."\n";
my $header = join("",CmdRunner($samtoolsViewCmd));#the header isn't chomped just raw results might get strange results
return $header;
}
sub GetSampleNames{
my @lines = split("\n",${$_[0]});
#warn ${$_[0]}."\n";
my %sampleNames;
for my $line (@lines){
if($line =~ /^\@RG.*SM:(\S*)/){
$line =~ /^\@RG.*SM:(\S*)/;
my $sampleName = $1;
$sampleNames{$sampleName}++;
}
}
warn '##'.join(':',keys(%sampleNames))."##\n";
return keys(%sampleNames);
}
sub SamtoolsIndexBam {
my $samtoolsIndexCmd = "samtools index ".${$_[0]};
warn CmdRunner($samtoolsIndexCmd);
}
sub ReplaceSampleName{
#(\$header)
${$_[2]}=~ s/${$_[0]}/${$_[1]}/g;
#warn $$_[2];
}
sub ReplaceSamHeaderAndRenameFile{
open my $out, '>'. ${$_[1]}.'.samheader.sam';
print $out "${$_[2]}";
my $PicardCmd = "java -jar $_[3] ReplaceSamHeader HEADER=".${$_[1]}.".samheader.sam I=".${$_[0]}." O=".${$_[1]};
warn $PicardCmd."\n";
warn CmdRunner($PicardCmd);
}
sub CmdRunner {
my $ret;
my $cmd = join(" ",@_);
warn localtime( time() ). " [INFO] system call:'". $cmd."'.\n";
@{$ret} = `($cmd )2>&1`;
if ($? == -1) {
die localtime( time() ). " [ERROR] failed to execute: $!\n";
}elsif ($? & 127) {
die localtime( time() ). " [ERROR] " .sprintf "child died with signal %d, %s coredump",
($? & 127), ($? & 128) ? 'with' : 'without';
}elsif ($? != 0) {
die localtime( time() ). " [ERROR] " .sprintf "child died with signal %d, %s coredump",
($? & 127), ($? & 128) ? 'with' : 'without';
}else {
warn localtime( time() ). " [INFO] " . sprintf "child exited with value %d\n", $? >> 8;
}
return @{$ret};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment