Created
June 16, 2016 23:14
-
-
Save BrunoGrandePhD/679f649e133099d16bd6fc190e3bfc51 to your computer and use it in GitHub Desktop.
A simple R script to convert an ICGC simple mutations file into the TCGA MAF format for re-annotation with maf2maf (see vcf2maf)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressPackageStartupMessages({ | |
library(readr) | |
library(dplyr) | |
}) | |
maf_cols <- c( | |
"Hugo_Symbol","Entrez_Gene_Id","Center","NCBI_Build","Chromosome","Start_Position", | |
"End_Position","Strand","Variant_Classification","Variant_Type","Reference_Allele", | |
"Tumor_Seq_Allele1","Tumor_Seq_Allele2","dbSNP_RS","dbSNP_Val_Status","Tumor_Sample_Barcode", | |
"Matched_Norm_Sample_Barcode","Match_Norm_Seq_Allele1","Match_Norm_Seq_Allele2", | |
"Tumor_Validation_Allele1","Tumor_Validation_Allele2","Match_Norm_Validation_Allele1", | |
"Match_Norm_Validation_Allele2","Verification_Status","Validation_Status","Mutation_Status", | |
"Sequencing_Phase","Sequence_Source","Validation_Method","Score","BAM_File","Sequencer", | |
"Tumor_Sample_UUID","Matched_Norm_Sample_UUID") | |
args = commandArgs(trailingOnly=TRUE) | |
input <- args[1] | |
output <- args[2] | |
# input <- "simple_somatic_mutation.open.MALY-DE.tsv" | |
# output <- "mutations.MALY-DE.tsv" | |
raw_mutations <- read_tsv(input) | |
mutations <- raw_mutations %>% | |
transmute(NCBI_Build = assembly_version, | |
Chromosome = chromosome, | |
Start_Position = chromosome_start, | |
End_Position = chromosome_end, | |
Strand = "+", | |
Reference_Allele = reference_genome_allele, | |
Tumor_Seq_Allele1 = mutated_from_allele, | |
Tumor_Seq_Allele2 = mutated_to_allele, | |
Tumor_Sample_Barcode = submitted_sample_id, | |
Matched_Norm_Sample_Barcode = submitted_matched_sample_id, | |
Variant_Type = ifelse(mutation_type == "single base substitution", "SNP", | |
ifelse(mutation_type == "deletion of <=200bp", "DEL", | |
ifelse(mutation_type == "insertion of <=200bp", "INS", "Other")))) %>% | |
filter(Variant_Type != "Other") | |
remaining_cols <- setdiff(maf_cols, colnames(mutations)) | |
for (col in remaining_cols) { | |
mutations[, col] <- NA | |
} | |
mutations <- mutations[maf_cols] | |
write_tsv(mutations, output, na = "") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment