Last active
October 23, 2024 12:10
-
-
Save vjcitn/a8733bbaf5c91caddb69938467211c76 to your computer and use it in GitHub Desktop.
create data.frames to help merge AlphaMissense pathogenicity scores to MAF objects from maftools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' from a MAF instance create a key with ref, alt and positional information, | |
#' return in a data.frame with key and gene symbol | |
#' @param mafinst instance of S4 class MAF from maftools | |
#' @param altfield character(1) name of field to use for mutation substitution | |
#' @param donorfield character(1) name of field to use for donor | |
#' @param uniprot_ok character() or NULL, defaults to NULL, if non-NULL, data to be returned will | |
#' only include records with UNIPROT id matching an element in this vector | |
#' @export | |
maf2key = function(mafinst, altfield="Tumor_Seq_Allele2", donorfield="Tumor_Sample_Barcode", | |
uniprot_ok=NULL) { | |
stopifnot(is(mafinst, "MAF")) | |
ld = slot(mafinst, "data") | |
stopifnot(all(c("Chromosome", "Start_Position", "Reference_Allele", altfield) %in% names(ld))) | |
alt = ld[[altfield]] | |
ldclean = ld |> dplyr::filter(nchar(Reference_Allele)==1L, | |
nchar(local(alt))==1L) | |
alt = ldclean[[altfield]] | |
donorid = ldclean[[donorfield]] | |
key = with(ldclean, paste(Chromosome, Start_Position, Reference_Allele, alt, sep=":")) | |
gene = ldclean$Hugo_Symbol | |
tmp = data.frame(key, gene, donor=donorid) | |
requireNamespace("org.Hs.eg.db") | |
db = org.Hs.eg.db::org.Hs.eg.db | |
up = AnnotationDbi::select(db, keys=tmp$gene, keytype="SYMBOL", columns="UNIPROT") | |
tmp = dplyr::left_join(dplyr::mutate(up, gene=SYMBOL), tmp, by="gene", relationship="many-to-many") | |
if (!is.null(uniprot_ok)) { | |
tmp = tmp |> dplyr::filter(UNIPROT %in% local(uniprot_ok)) | |
} | |
tmp | |
} | |
#' with AlphaMissenseR data query, build a key for merging pathogenicity score to MAF | |
#' information from donors, based on gene symbol with mutation | |
#' @param symbol character(1) gene symbol, will be mapped to UNIPROT ids using org.Hs.eg.db | |
#' @param build character(1) "hg19" or "hg38" | |
#' @param chrsub character(1) defaults to "" (remove UCSC-type chr prefix), use "chr" for prefixed chr name. | |
#' @examples | |
#' amk = am_key_by_gene("ABCA10", "hg19") | |
#' dim(amk) | |
#' head(amk) | |
#' @export | |
am_key_by_gene = function(symbol, build, chrsub="") { | |
stopifnot( build %in% c("hg19", "hg38")) | |
dat = AlphaMissenseR::am_data(build) | |
requireNamespace("org.Hs.eg.db") | |
db = org.Hs.eg.db::org.Hs.eg.db | |
up = AnnotationDbi::select(db, keys=symbol, keytype="SYMBOL", columns="UNIPROT") | |
if (nrow(up)==0) { | |
message(sprintf("no uniprot for symbol %s, returning NULL", symbol)) | |
return(NULL) | |
} | |
hold = dat |> dplyr::filter(uniprot_id %in% local(up$UNIPROT)) |> as.data.frame() | |
key = with(hold, paste(gsub("chr", chrsub, CHROM), POS, REF, ALT, sep=":")) | |
data.frame(key, symbol, uniprot=hold$uniprot_id, am_score=hold$am_pathogenicity) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment