Skip to content

Instantly share code, notes, and snippets.

@vjcitn
Last active October 23, 2024 12:10
Show Gist options
  • Save vjcitn/a8733bbaf5c91caddb69938467211c76 to your computer and use it in GitHub Desktop.
Save vjcitn/a8733bbaf5c91caddb69938467211c76 to your computer and use it in GitHub Desktop.
create data.frames to help merge AlphaMissense pathogenicity scores to MAF objects from maftools
#' from a MAF instance create a key with ref, alt and positional information,
#' return in a data.frame with key and gene symbol
#' @param mafinst instance of S4 class MAF from maftools
#' @param altfield character(1) name of field to use for mutation substitution
#' @param donorfield character(1) name of field to use for donor
#' @param uniprot_ok character() or NULL, defaults to NULL, if non-NULL, data to be returned will
#' only include records with UNIPROT id matching an element in this vector
#' @export
maf2key = function(mafinst, altfield="Tumor_Seq_Allele2", donorfield="Tumor_Sample_Barcode",
uniprot_ok=NULL) {
stopifnot(is(mafinst, "MAF"))
ld = slot(mafinst, "data")
stopifnot(all(c("Chromosome", "Start_Position", "Reference_Allele", altfield) %in% names(ld)))
alt = ld[[altfield]]
ldclean = ld |> dplyr::filter(nchar(Reference_Allele)==1L,
nchar(local(alt))==1L)
alt = ldclean[[altfield]]
donorid = ldclean[[donorfield]]
key = with(ldclean, paste(Chromosome, Start_Position, Reference_Allele, alt, sep=":"))
gene = ldclean$Hugo_Symbol
tmp = data.frame(key, gene, donor=donorid)
requireNamespace("org.Hs.eg.db")
db = org.Hs.eg.db::org.Hs.eg.db
up = AnnotationDbi::select(db, keys=tmp$gene, keytype="SYMBOL", columns="UNIPROT")
tmp = dplyr::left_join(dplyr::mutate(up, gene=SYMBOL), tmp, by="gene", relationship="many-to-many")
if (!is.null(uniprot_ok)) {
tmp = tmp |> dplyr::filter(UNIPROT %in% local(uniprot_ok))
}
tmp
}
#' with AlphaMissenseR data query, build a key for merging pathogenicity score to MAF
#' information from donors, based on gene symbol with mutation
#' @param symbol character(1) gene symbol, will be mapped to UNIPROT ids using org.Hs.eg.db
#' @param build character(1) "hg19" or "hg38"
#' @param chrsub character(1) defaults to "" (remove UCSC-type chr prefix), use "chr" for prefixed chr name.
#' @examples
#' amk = am_key_by_gene("ABCA10", "hg19")
#' dim(amk)
#' head(amk)
#' @export
am_key_by_gene = function(symbol, build, chrsub="") {
stopifnot( build %in% c("hg19", "hg38"))
dat = AlphaMissenseR::am_data(build)
requireNamespace("org.Hs.eg.db")
db = org.Hs.eg.db::org.Hs.eg.db
up = AnnotationDbi::select(db, keys=symbol, keytype="SYMBOL", columns="UNIPROT")
if (nrow(up)==0) {
message(sprintf("no uniprot for symbol %s, returning NULL", symbol))
return(NULL)
}
hold = dat |> dplyr::filter(uniprot_id %in% local(up$UNIPROT)) |> as.data.frame()
key = with(hold, paste(gsub("chr", chrsub, CHROM), POS, REF, ALT, sep=":"))
data.frame(key, symbol, uniprot=hold$uniprot_id, am_score=hold$am_pathogenicity)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment