Created
September 11, 2012 12:46
-
-
Save johandahlberg/3698172 to your computer and use it in GitHub Desktop.
A Picard class for switching read names according to a translation table in a white space separated file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.sf.picard.sam; | |
import java.io.BufferedReader; | |
import java.io.DataInputStream; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.InputStreamReader; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import net.sf.picard.cmdline.CommandLineProgram; | |
import net.sf.picard.cmdline.Option; | |
import net.sf.picard.cmdline.StandardOptionDefinitions; | |
import net.sf.picard.cmdline.Usage; | |
import net.sf.picard.io.IoUtil; | |
import net.sf.picard.util.Log; | |
import net.sf.samtools.SAMFileHeader; | |
import net.sf.samtools.SAMFileHeader.SortOrder; | |
import net.sf.samtools.SAMFileReader; | |
import net.sf.samtools.SAMFileWriter; | |
import net.sf.samtools.SAMFileWriterFactory; | |
import net.sf.samtools.SAMReadGroupRecord; | |
import net.sf.samtools.SAMRecord; | |
public class ReplaceSampleNameInReadGroup extends CommandLineProgram{ | |
@Usage(programVersion="1.0") | |
public String USAGE = "Replaces the read group sample names as defined in the input csv file."; | |
@Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input file (bam or sam).") | |
public File INPUT = null; | |
@Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="Output file (bam or sam).") | |
public File OUTPUT = null; | |
@Option(shortName=StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, optional=true, | |
doc="Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.") | |
public SortOrder SORT_ORDER; | |
@Option(shortName="TR",doc="Whitespace separated translation file with current names in first column and the new names in the second column.") | |
public File TRANSLATION_FILE; | |
private final Log log = Log.getInstance(ReplaceSampleNameInReadGroup.class); | |
/** Required main method implementation. */ | |
public static void main(final String[] argv) { | |
new ReplaceSampleNameInReadGroup().instanceMainWithExit(argv); | |
} | |
protected int doWork() { | |
IoUtil.assertFileIsReadable(INPUT); | |
IoUtil.assertFileIsReadable(TRANSLATION_FILE); | |
IoUtil.assertFileIsWritable(OUTPUT); | |
//Initialize the translation file | |
Map<String, String> translationTable = initializeTranslationTable(TRANSLATION_FILE); | |
SAMFileReader in = new SAMFileReader(INPUT); | |
// create the new header and output file | |
final SAMFileHeader inHeader = in.getFileHeader(); | |
final SAMFileHeader outHeader = inHeader.clone(); | |
List<SAMReadGroupRecord> readGroups = outHeader.getReadGroups(); | |
for (SAMReadGroupRecord samReadGroupRecord : readGroups) { | |
// Change from the old sample name to the new. | |
samReadGroupRecord.setSample(translationTable.get(samReadGroupRecord.getSample())); | |
} | |
outHeader.setReadGroups(readGroups); | |
if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER); | |
final SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, | |
outHeader.getSortOrder() == inHeader.getSortOrder(), | |
OUTPUT); | |
for (final SAMRecord read : in) { | |
outWriter.addAlignment(read); | |
} | |
// cleanup | |
in.close(); | |
outWriter.close(); | |
return 0; | |
} | |
private Map<String, String> initializeTranslationTable(File transFile) { | |
Map<String,String> translations = new HashMap<String,String>(); | |
try{ | |
FileInputStream fstream = new FileInputStream(transFile); | |
DataInputStream in = new DataInputStream(fstream); | |
BufferedReader br = new BufferedReader(new InputStreamReader(in)); | |
String strLine; | |
while((strLine = br.readLine()) != null) { | |
String key = strLine.split("\\s+")[0]; | |
String value = strLine.split("\\s+")[1]; | |
translations.put(key, value); | |
} | |
} | |
catch (Exception e) | |
{ | |
System.err.println("Error: " + e.getMessage()); | |
} | |
return translations; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment