Skip to content

Instantly share code, notes, and snippets.

@BrunoGrandePhD
Created June 16, 2016 23:14
Show Gist options
  • Save BrunoGrandePhD/679f649e133099d16bd6fc190e3bfc51 to your computer and use it in GitHub Desktop.
Save BrunoGrandePhD/679f649e133099d16bd6fc190e3bfc51 to your computer and use it in GitHub Desktop.
A simple R script to convert an ICGC simple mutations file into the TCGA MAF format for re-annotation with maf2maf (see vcf2maf)
suppressPackageStartupMessages({
library(readr)
library(dplyr)
})
maf_cols <- c(
"Hugo_Symbol","Entrez_Gene_Id","Center","NCBI_Build","Chromosome","Start_Position",
"End_Position","Strand","Variant_Classification","Variant_Type","Reference_Allele",
"Tumor_Seq_Allele1","Tumor_Seq_Allele2","dbSNP_RS","dbSNP_Val_Status","Tumor_Sample_Barcode",
"Matched_Norm_Sample_Barcode","Match_Norm_Seq_Allele1","Match_Norm_Seq_Allele2",
"Tumor_Validation_Allele1","Tumor_Validation_Allele2","Match_Norm_Validation_Allele1",
"Match_Norm_Validation_Allele2","Verification_Status","Validation_Status","Mutation_Status",
"Sequencing_Phase","Sequence_Source","Validation_Method","Score","BAM_File","Sequencer",
"Tumor_Sample_UUID","Matched_Norm_Sample_UUID")
args = commandArgs(trailingOnly=TRUE)
input <- args[1]
output <- args[2]
# input <- "simple_somatic_mutation.open.MALY-DE.tsv"
# output <- "mutations.MALY-DE.tsv"
raw_mutations <- read_tsv(input)
mutations <- raw_mutations %>%
transmute(NCBI_Build = assembly_version,
Chromosome = chromosome,
Start_Position = chromosome_start,
End_Position = chromosome_end,
Strand = "+",
Reference_Allele = reference_genome_allele,
Tumor_Seq_Allele1 = mutated_from_allele,
Tumor_Seq_Allele2 = mutated_to_allele,
Tumor_Sample_Barcode = submitted_sample_id,
Matched_Norm_Sample_Barcode = submitted_matched_sample_id,
Variant_Type = ifelse(mutation_type == "single base substitution", "SNP",
ifelse(mutation_type == "deletion of <=200bp", "DEL",
ifelse(mutation_type == "insertion of <=200bp", "INS", "Other")))) %>%
filter(Variant_Type != "Other")
remaining_cols <- setdiff(maf_cols, colnames(mutations))
for (col in remaining_cols) {
mutations[, col] <- NA
}
mutations <- mutations[maf_cols]
write_tsv(mutations, output, na = "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment