Last active
September 30, 2024 09:42
-
-
Save d0choa/499c98bd205b39c98304ee603d034546 to your computer and use it in GitHub Desktop.
Supporting evidence on 2021 FDA approvals
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Drug (brand name) | Sponsor | Properties | Indication | DrugId | DiseaseId | TA | Manual disease mapping | ChemblCheck | |
---|---|---|---|---|---|---|---|---|---|
Vericiguat (Verquvo) | Merck & Co./Bayer | sGC stimulator | Chronic heart failure | CHEMBL4066936 | EFO_0001645 | Cardiovascular | fuzzy | ||
Cabotegravir; rilpivirine (Cabenuva Kit) | ViiV | INSTI and an NNRTI | HIV-1 infection | CHEMBL2403238 | EFO_0000180 | Infectious | exact | ||
Voclosporin (Lupkynis) | Aurinia | Calcineurin inhibitor | Lupus nephritis | CHEMBL2218919 | EFO_0002690 | Nephrology | exact | ||
Tepotinib (Tepmetko) | EMD Serono | MET kinase inhibitor | NSCLC | CHEMBL3402762 | EFO_0003060 | Oncology | exact | ||
Umbralisib (Ukoniq) | TG Therapeutics | PI3Kδ and CK1ε inhibitor | MZL, follicular lymphoma | CHEMBL3948730 | EFO_1000630 | Oncology | exact | ||
Evinacumab (Evkeeza) | Regeneron | ANGPTL3-targeted mAb | HoFH | CHEMBL3545191 | Orphanet_391665 | Metabolic | exact | ||
Trilaciclib (Cosela) | G1 Therapeutics | CDK4 and CDK6 kinase inhibitor | Chemotherapy-induced myelosuppression | CHEMBL3894860 | EFO_0000702 | Oncology | NA | ||
Casimersen (Amondys 45) | Sarepta | Exon 45-skipping ASO | DMD | CHEMBL4297566 | Orphanet_98896 | Other | exact | ||
Fosdenopterin (Nulibry) | BridgeBio | cPMP | MoCD type A | CHEMBL2338675 | Orphanet_308386 | Other | exact | ||
Melphalan flufenamide (Pepaxto) | Oncopeptides | Peptide-conjugated alkylating drug | Multiple myeloma | CHEMBL4303060 | EFO_0001378 | Oncology | exact | ||
Dexmethylphenidate; serdexmethylphenidate (Azstarys) | Commave Therapeutics | CNS stimulant | ADHD | CHEMBL827 | EFO_0003888 | Psychiatric | exact | ||
Tivozanib (Fotivda) | Aveo | VEGFR kinase inhibitor | Renal cell carcinoma | CHEMBL1289494 | EFO_0000681 | Oncology | exact | ||
Ponesimod (Ponvory) | J&J | S1P receptor modulator | Relapsing multiple sclerosis | CHEMBL1096146 | EFO_0003885 | Other | fuzzy | ||
Dasiglucagon (Zegalogue) | Zealand Pharma | Glucagon receptor agonist | Severe hypoglycaemia | CHEMBL4297741 | EFO_0001360 | Metabolic | exact | ||
Viloxazine (Qelbree) | Supernus | SNRI | ADHD | CHEMBL306700 | EFO_0003888 | Psychiatric | exact | ||
Drospirenone; estetrol (Nextstellis) | Mayne Pharma | Spironolactone and oestrogen analogues | To prevent pregnancy | CHEMBL1509 | NA | Reproductive | NA | ||
Dostarlimab (Jemperli) | GlaxoSmithKline | PD1-targeted mAb | Endometrial cancer | CHEMBL4298124 | MONDO_0011962 | Oncology | exact | ||
Loncastuximab tesirine (Zynlonta) | ADC Therapeutics | CD19-targeted ADC | B-cell lymphoma | CHEMBL4297778 | EFO_0000403 | Oncology | exact | ||
Pegcetacoplan (Empaveli) | Apellis | Complement protein C3 inhibitor | PNH | CHEMBL4298211 | Orphanet_447 | Other | exact | ||
Amivantamab (Rybrevant) | J&J | EGFR×METR bispecific antibody | EGFR exon 20-mutated NSCLC | CHEMBL4297774 | EFO_0003060 | Oncology | fuzzy | ||
Piflufolastat F-18 (Pylarify) | Progenics | Radiolabelled PSMA imaging agent | Prostate cancer imaging | NA | NA | Imaging | NA | ||
Infigratinib (Truseltiq) | BridgeBio | FGFR2 kinase inhibitor | FGFR2-mutated bile duct cancer | CHEMBL1852688 | EFO_0005540 | Oncology | fuzzy | ||
Sotorasib (Lumakras) | Amgen | KRAS-G12C inhibitor | KRASG12C-mutated NSCLC | CHEMBL4535757 | EFO_0003060 | Oncology | fuzzy | TRUE | |
Olanzapine; samidorphan (Lybalvi) | Alkermes | Atypical antipsychotic and opioid antagonist | Schizophrenia and bipolar I disorder | CHEMBL715 | EFO_0000692 | Psychiatric | exact | ||
Ibrexafungerp (Brexafemme) | Scynexis | Triterpenoid antifungal | Vulvovaginal candidiasis | CHEMBL4297513 | EFO_0007543 | Infectious | exact | ||
Aducanumab (Aduhelm) | Biogen/Eisai | Amyloid-β-targeted mAb | Alzheimer’s disease | CHEMBL3039540 | EFO_0000249 | Other | exact | ||
Asparaginase erwinia chrysanthemi (Rylaze) | Jazz | Recombinant asparagine-specific enzyme | ALL and LBL, in patients allergic to E. coli-derived products | CHEMBL1863514 | EFO_0000220 | Oncology | fuzzy | ||
Finerenone (Kerendia) | Bayer | Non-steroidal MR antagonist | CKD with type 2 diabetes | CHEMBL2181927 | EFO_0000401 | Other | exact | ||
Fexinidazole (Fexinidazole) | Sanofi/DNDi | Nitroimidazole antimicrobial | Sleeping sickness | CHEMBL1631694 | DOID_10113 | Infectious | exact | ||
Belumosudil (Rezurock) | Kadmon | ROCK2 kinase inhibitor | Chronic GVHD | CHEMBL4594302 | MONDO_0013730 | Other | exact | ||
Odevixibat (Bylvay) | Albireo | IBAT inhibitor | Pruritus in PFIC | CHEMBL4297588 | Orphanet_172 | Other | exact | TRUE | |
Anifrolumab (Saphnelo) | AstraZeneca | IFNAR-targeted mAb | SLE | CHEMBL2364653 | EFO_0002690 | Other | exact | ||
Avalglucosidase alfa (Nexviazyme) | Sanofi | Recombinant α-glucosidase | Pompe disease | CHEMBL4594320 | Orphanet_365 | Other | fuzzy | ||
Belzutifan (Welireg) | Merck & Co. | HIF-2α inhibitor | von Hippel-Lindau disease | CHEMBL4585668 | Orphanet_892 | Oncology | exact | TRUE | |
Difelikefalin (Korsuva) | Cara Therapeutics | κ-Opioid receptor agonist | Pruritus associated with CKD | CHEMBL3989915 | EFO_0003884 | Other | fuzzy | ||
Lonapegsomatropin (Skytrofa) | Ascendis Pharma | PEGylated human growth hormone | Growth failure due to GHD | CHEMBL4298185 | HP_0001510 | Other | NA | TRUE | |
Mobocertinib (Exkivity) | Takeda | EGFR kinase inhibitor | EGFR exon 20-mutated NSCLC | CHEMBL4650319 | EFO_0003060 | Oncology | fuzzy | ||
Tisotumab vedotin (Tivdak) | Seagen/Genmab | Tissue-factor-directed ADC | Cervical cancer | CHEMBL4297841 | MONDO_0002974 | Oncology | exact | ||
Atogepant (Qulipta) | AbbVie | CGRP receptor antagonist | Episodic migraine | CHEMBL3991065 | EFO_0003821 | Other | fuzzy | ||
Maralixibat (Livmarli) | Mirum | IBAT inhibitor | Pruritus in Alagille syndrome | CHEMBL363392 | Orphanet_52 | Other | fuzzy | ||
Avacopan (Tavneos) | ChemoCentryx | Complement 5a receptor antagonist | ANCA-associated vasculitis | CHEMBL3989871 | EFO_0004826 | Cardiovascular | exact | ||
Asciminib (Scemblix) | Novartis | ABL/BCR–ABL1 kinase inhibitor | Ph+ CML | CHEMBL4208229 | EFO_0000339 | Oncology | fuzzy | ||
Ropeginterferon alfa-2b (Besremi) | Pharmaessentia | PEGylated interferon α-2b | Polycythaemia vera | CHEMBL4297819 | EFO_0002429 | Oncology | exact | ||
Vosoritide (Voxzogo) | Biomarin | CNP analogue | Achondroplasia | CHEMBL3707276 | Orphanet_15 | Other | exact | ||
Maribavir (Livtencity) | Takeda | CMV pUL97 kinase inhibitor | Post-transplant CMV infection | CHEMBL515408 | EFO_0001062 | Infectious | fuzzy | ||
Pafolacianine (Cytalux) | On Target Labs | Fluorescent FR imaging agent | Ovarian cancer imaging | CHEMBL4297412 | MONDO_0008170 | Imaging | exact | ||
Efgartigimod alfa (Vyvgart) | Argenx | FcRn-binding Fc fragment | Myasthenia gravis | CHEMBL4297551 | EFO_0004991 | Other | exact | ||
Tezepelumab (Tezspire) | Astrazeneca/Amgen | TSLP-targeted mAb | Severe asthma | CHEMBL3707229 | EFO_0000270 | Respiratory | exact | ||
Inclisiran (Leqvio) | Novartis/Alnylam | PCSK9-targeted siRNA | HeFH or ASCVD | CHEMBL3990033 | MONDO_0021661 | Cardiovascular | fuzzy | ||
Tralokinumab (Adbry) | LEO Pharma | IL-13-targeted mAb | Atopic dermatitis | CHEMBL1743081 | EFO_0000274 | Dermatology | fuzzy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("tidyverse") | |
library("sparklyr") | |
library("sparklyr.nested") | |
library("cowplot") | |
library("ggsci") | |
#Spark config | |
config <- spark_config() | |
# Allowing to GCP datasets access | |
config$spark.hadoop.fs.gs.requester.pays.mode <- "AUTO" # nolint | |
config$spark.hadoop.fs.gs.requester.pays.project.id <- "open-targets-eu-dev" # nolint | |
# spark connect | |
sc <- spark_connect(master = "yarn", config = config) | |
# Approvals as reported in NRDD article | |
gs_approvals <- "gs://ot-team/dochoa/2021_approvals.csv" | |
approvals <- spark_read_csv( | |
sc, | |
path = gs_approvals, | |
memory = FALSE | |
) | |
# Datasource metadata | |
ds_names <- spark_read_csv( | |
sc, | |
path = "gs://ot-team/dochoa/datasourceMetadata.csv", | |
memory = FALSE) %>% | |
collect() | |
# Read Platform data | |
gs_path <- "gs://open-targets-data-releases/" | |
data_release <- "21.11" | |
all_evidence_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/evidence/", | |
sep = "" | |
) | |
moa_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/mechanismOfAction/", | |
sep = "" | |
) | |
ass_indirectby_ds_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/associationByDatasourceIndirect/", | |
sep = "" | |
) | |
disease_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/diseases/", | |
sep = "" | |
) | |
interaction_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/interaction/", | |
sep = "" | |
) | |
disease2phenotype_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/diseaseToPhenotype/", | |
sep = "" | |
) | |
# Mechanisms of action | |
# Extra MoAs required to fill the gaps | |
ammend_moas <- list( | |
"CHEMBL4594302" = "ENSG00000134318", | |
"CHEMBL4297741" = "ENSG00000215644", | |
"CHEMBL4297774" = "ENSG00000146648", | |
"CHEMBL4297774" = "ENSG00000105976", | |
"CHEMBL4298185" = "ENSG00000112964", # chembl missing in platform | |
"CHEMBL4650319" = "ENSG00000146648", | |
"CHEMBL1863514" = "ENSG00000166183", | |
"CHEMBL4594320" = "ENSG00000171298" | |
) | |
new_moas <- data.frame( | |
chemblIds = names(ammend_moas), | |
targetId = unlist(ammend_moas) | |
) | |
new_moas <- sdf_copy_to(sc, new_moas, overwrite = TRUE) | |
# available MoAs + ammended | |
moa <- spark_read_parquet(sc, moa_path, memory = FALSE) %>% | |
select(chemblIds, targets) %>% | |
sdf_explode(chemblIds) %>% | |
sdf_explode(targets) %>% | |
rename(targetId = targets) %>% | |
sdf_distinct() %>% | |
sdf_bind_rows(new_moas) | |
# Platform ssociations indirect (by datasource) | |
ass_indirectby_ds <- spark_read_parquet(sc, ass_indirectby_ds_path) | |
# Joining associations information | |
ass <- approvals %>% | |
rename(diseaseId = DiseaseId) %>% | |
left_join(moa, by = c("DrugId" = "chemblIds")) %>% | |
left_join(ass_indirectby_ds, by = c("diseaseId", "targetId")) %>% | |
collect() | |
# Data about molecular interactions | |
interactions <- spark_read_parquet(sc, interaction_path, memory = FALSE) %>% | |
filter(sourceDatabase == "intact") %>% | |
filter(!is.na(targetA)) %>% | |
filter(!is.na(targetB)) %>% | |
filter(scoring > 0.42) %>% | |
select(targetA, targetB) %>% | |
sdf_distinct() | |
interactors_ass <- approvals %>% | |
rename(diseaseId = DiseaseId) %>% | |
inner_join(moa, by = c("DrugId" = "chemblIds")) %>% | |
inner_join(interactions, by = c("targetId" = "targetA")) %>% | |
inner_join( | |
ass_indirectby_ds, | |
by = c("diseaseId" = "diseaseId", "targetB" = "targetId") | |
) %>% | |
select(datasourceId, Drug_brand_name) %>% | |
sdf_distinct() %>% | |
collect() %>% | |
mutate(interactionAssociation = TRUE) | |
# Additional phenotype curation | |
ammend_phenotypes <- list( | |
# Microalbuminuria (biomarker of CKD) | |
"EFO_0000401" = "HP_0012594", | |
# glycodeoxycholate sulfate (one of the bile acids that cause pruritus) | |
"Orphanet_172" = "EFO_0005653", | |
"Orphanet_52" = "EFO_0005653", | |
# achondroplasia -> body height | |
"Orphanet_15" = "EFO_0004339", | |
"Orphanet_15" = "Orphanet_329191", | |
#von hippel lindau -> renal carcinoma | |
"Orphanet_892" = "EFO_0000681", | |
"EFO_0001360" = "MONDO_0018582", | |
# growth delay -> height | |
"HP_0001510" = "EFO_0004339", | |
#CAD -> myocardial infarctation | |
"EFO_0001645" = "EFO_0000612" | |
) | |
new_phenotypes <- data.frame( | |
diseaseId = names(ammend_phenotypes), | |
phenotype = unlist(ammend_phenotypes) | |
) | |
new_phenotypes <- sdf_copy_to(sc, new_phenotypes, overwrite = TRUE) | |
# Platform disease to phenotype data | |
disease2phenotype <- spark_read_parquet( | |
sc, | |
disease2phenotype_path, | |
memory = FALSE | |
) %>% | |
select(diseaseId = disease, phenotype) %>% | |
sdf_distinct() | |
# Associations through indirect phenotypes | |
phenotype_ass <- approvals %>% | |
rename(diseaseId = DiseaseId) %>% | |
inner_join(moa, by = c("DrugId" = "chemblIds")) %>% | |
inner_join( | |
disease2phenotype %>% | |
sdf_bind_rows(new_phenotypes), | |
by = c("diseaseId")) %>% | |
inner_join( | |
ass_indirectby_ds, | |
by = c("phenotype" = "diseaseId", "targetId")) %>% | |
select(datasourceId, Drug_brand_name) %>% | |
sdf_distinct() %>% | |
collect() %>% | |
mutate(phenotypeAssociation = TRUE) | |
# Data to plot | |
data2plot <- ass %>% | |
select(datasourceId, Drug_brand_name, score) %>% | |
complete(datasourceId, Drug_brand_name) %>% | |
mutate(score = replace_na(score, 0)) %>% | |
filter(!is.na(datasourceId)) %>% | |
# TA | |
left_join( | |
ass %>% | |
select( | |
Drug_brand_name, | |
TA | |
) %>% | |
distinct(), | |
by = "Drug_brand_name" | |
) %>% | |
# targets | |
left_join( | |
ass %>% | |
mutate(noTarget = is.na(targetId)) %>% | |
select( | |
Drug_brand_name, | |
noTarget | |
) %>% | |
distinct(), | |
by = "Drug_brand_name" | |
) %>% | |
# interactions | |
left_join( | |
interactors_ass, | |
by = c("datasourceId", "Drug_brand_name") | |
) %>% | |
# related phenotypes | |
left_join( | |
phenotype_ass, | |
by = c("datasourceId", "Drug_brand_name") | |
) %>% | |
mutate( | |
interactionAssociation = ifelse(score > 0, TRUE, interactionAssociation) | |
) %>% | |
mutate( | |
phenotypeAssociation = ifelse(score > 0, TRUE, phenotypeAssociation) | |
) %>% | |
mutate(score = ifelse(noTarget, NA, score)) %>% | |
mutate(TA = ifelse(noTarget, "No human target", TA)) %>% | |
mutate( | |
TA = fct_other( | |
TA, | |
keep = c("Oncology", "No human target"), | |
other_level = "Other indication" | |
) | |
) %>% | |
mutate( | |
TA = fct_relevel(TA, c( | |
"Oncology", | |
"Other indication", | |
"No human target" | |
)) | |
) %>% | |
# mutate(datasourceId = fct_relevel(datasourceId, names(ds_name_list))) %>% | |
filter(!(datasourceId %in% c("chembl", "expression_atlas", "sysbio", "europepmc", "phenodigm", "reactome", "phewas_catalog"))) %>% | |
#drug score for the purpose of reordering them | |
mutate(rankscore = replace_na(score, 0)) %>% | |
mutate(rankscore = ifelse(!is.na(interactionAssociation), rankscore + 0.01, rankscore)) %>% | |
mutate(rankscore = ifelse(!is.na(phenotypeAssociation), rankscore + 0.03, rankscore)) %>% | |
mutate(Drug_brand_name = fct_rev(fct_reorder( | |
Drug_brand_name, rankscore, mean, | |
na.rm = TRUE, .desc = TRUE | |
))) %>% | |
group_by( | |
datasourceId, | |
Drug_brand_name, | |
TA, | |
noTarget, | |
interactionAssociation, | |
phenotypeAssociation | |
) %>% | |
summarise(score = suppressWarnings(max(score, na.rm = TRUE))) %>% | |
mutate(score = ifelse(score < 0, NA, score)) %>% | |
left_join(ds_names, by = "datasourceId") %>% | |
mutate( | |
datasourceName = factor(datasourceName, levels = ds_names$datasourceName), | |
datasourceType = factor(datasourceType, levels = c("Somatic", "Functional genomics (cancer)", "Rare mendelian", "Common disease")) | |
) | |
# symbols to overlay in the plot | |
overlay_data <- data2plot %>% | |
ungroup() %>% | |
select( | |
datasourceName, | |
datasourceType, | |
Drug_brand_name, | |
TA, | |
interactionAssociation, | |
phenotypeAssociation | |
) %>% | |
gather("overlay", "value", -datasourceName, -datasourceType, -Drug_brand_name, -TA) %>% | |
filter(!is.na(value)) %>% | |
mutate(overlay = str_replace_all(overlay, "Association", "")) %>% | |
mutate(overlaySize = ifelse(overlay == "phenotype", 3, 1)) %>% | |
mutate(overlaySymbol = as.character(ifelse(overlay == "phenotype", 1, 16))) | |
# plotting | |
output <- data2plot %>% | |
ggplot(aes( | |
x = datasourceName, | |
y = Drug_brand_name)) + | |
geom_tile(aes(fill = score), color = "white") + | |
geom_point(data = overlay_data, | |
aes(shape = overlay, size = overlaySize)) + | |
scale_fill_material("blue", | |
na.value = "grey90", | |
name = "Direct association" | |
) + | |
scale_shape_manual( | |
breaks = c("phenotype", "interaction"), | |
labels = c("Direct or related phenotype", "Direct or interacting protein"), | |
values = c(1, 16), | |
name = "Supported by:") + | |
scale_size_identity() + | |
facet_grid(TA ~ datasourceType, scales = "free", space = "free") + | |
theme_cowplot(font_size = 12) + | |
# labs( | |
# title = "Supporting evidence on 2021 FDA drug approvals", | |
# subtitle = "Target-Disease evidence from Open Targets" | |
# # caption = | |
# # "Source: Nat Reviews Drug Discovery 10.1038/d41573-022-00001-9" | |
# ) + | |
theme( | |
plot.background = element_rect(fill = "white"), | |
strip.background = element_blank(), | |
legend.direction = "horizontal", | |
legend.box = "vertical", | |
legend.position = c(-0.7, -0.16), | |
legend.justification = c(0, 0), | |
axis.ticks = element_blank(), | |
axis.text.x = element_text(angle = 45, hjust = 1), | |
axis.title = element_blank(), | |
axis.line = element_blank(), | |
text = element_text(family = "sans") | |
) + | |
guides( | |
fill = guide_colourbar( | |
title.position = "top", | |
title.hjust = 0.5, | |
barwidth = 8, | |
frame.colour = "black", | |
ticks.colour = "black", | |
order = 2 | |
), | |
shape = guide_legend( | |
title.position = "top", | |
direction = "vertical", | |
order = 1 | |
) | |
) | |
ggsave( | |
"/home/ochoa/2021_approvals.pdf", | |
plot = output, | |
width = 9, | |
height = 11 | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("tidyverse") | |
library("sparklyr") | |
library("sparklyr.nested") | |
library("cowplot") | |
library("ggsci") | |
#Spark config | |
config <- spark_config() | |
# Allowing to GCP datasets access | |
config$spark.hadoop.fs.gs.requester.pays.mode <- "AUTO" # nolint | |
config$spark.hadoop.fs.gs.requester.pays.project.id <- "open-targets-eu-dev" # nolint | |
# spark connect | |
sc <- spark_connect(master = "local", config = config) | |
# Approvals as reported in NRDD article | |
gs_approvals <- "gs://ot-team/dochoa/2021_approvals.csv" | |
approvals <- spark_read_csv( | |
sc, | |
path = gs_approvals, | |
memory = FALSE | |
) | |
# Datasource metadata | |
ds_names <- spark_read_csv( | |
sc, | |
path = "gs://ot-team/dochoa/datasourceMetadata.csv", | |
memory = FALSE) %>% | |
collect() | |
# Read Platform data | |
gs_path <- "gs://open-targets-data-releases/" | |
data_release <- "21.11" | |
all_evidence_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/evidence/", | |
sep = "" | |
) | |
moa_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/mechanismOfAction/", | |
sep = "" | |
) | |
ass_indirectby_ds_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/associationByDatasourceIndirect/", | |
sep = "" | |
) | |
disease_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/diseases/", | |
sep = "" | |
) | |
interaction_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/interaction/", | |
sep = "" | |
) | |
disease2phenotype_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/diseaseToPhenotype/", | |
sep = "" | |
) | |
# Mechanisms of action | |
# Extra MoAs required to fill the gaps | |
ammend_moas <- list( | |
"CHEMBL4594302" = "ENSG00000134318", | |
"CHEMBL4297741" = "ENSG00000215644", | |
"CHEMBL4297774" = "ENSG00000146648", | |
"CHEMBL4297774" = "ENSG00000105976", | |
"CHEMBL4298185" = "ENSG00000112964", # chembl missing in platform | |
"CHEMBL4650319" = "ENSG00000146648", | |
"CHEMBL1863514" = "ENSG00000166183", | |
"CHEMBL4594320" = "ENSG00000171298" | |
) | |
new_moas <- data.frame( | |
chemblIds = names(ammend_moas), | |
targetId = unlist(ammend_moas) | |
) | |
new_moas <- sdf_copy_to(sc, new_moas, overwrite = TRUE) | |
# available MoAs + ammended | |
moa <- spark_read_parquet(sc, moa_path, memory = FALSE) %>% | |
select(chemblIds, targets) %>% | |
sdf_explode(chemblIds) %>% | |
sdf_explode(targets) %>% | |
rename(targetId = targets) %>% | |
sdf_distinct() %>% | |
sdf_bind_rows(new_moas) | |
# Platform ssociations indirect (by datasource) | |
ass_indirectby_ds <- spark_read_parquet(sc, ass_indirectby_ds_path) | |
# Joining associations information | |
ass <- approvals %>% | |
rename(diseaseId = DiseaseId) %>% | |
left_join(moa, by = c("DrugId" = "chemblIds")) %>% | |
left_join(ass_indirectby_ds, by = c("diseaseId", "targetId")) %>% | |
collect() | |
# Data about molecular interactions | |
interactions <- spark_read_parquet(sc, interaction_path, memory = FALSE) %>% | |
filter(sourceDatabase == "intact") %>% | |
filter(!is.na(targetA)) %>% | |
filter(!is.na(targetB)) %>% | |
filter(scoring > 0.42) %>% | |
select(targetA, targetB) %>% | |
sdf_distinct() | |
interactors_ass <- approvals %>% | |
rename(diseaseId = DiseaseId) %>% | |
inner_join(moa, by = c("DrugId" = "chemblIds")) %>% | |
inner_join(interactions, by = c("targetId" = "targetA")) %>% | |
inner_join( | |
ass_indirectby_ds, | |
by = c("diseaseId" = "diseaseId", "targetB" = "targetId") | |
) %>% | |
select(datasourceId, Drug_brand_name) %>% | |
sdf_distinct() %>% | |
collect() %>% | |
mutate(interactionAssociation = TRUE) | |
# Additional phenotype curation | |
ammend_phenotypes <- list( | |
# Microalbuminuria (biomarker of CKD) | |
"EFO_0000401" = "HP_0012594", | |
# glycodeoxycholate sulfate (one of the bile acids that cause pruritus) | |
"Orphanet_172" = "EFO_0005653", | |
"Orphanet_52" = "EFO_0005653", | |
# achondroplasia -> body height | |
"Orphanet_15" = "EFO_0004339", | |
"Orphanet_15" = "Orphanet_329191", | |
#von hippel lindau -> renal carcinoma | |
"Orphanet_892" = "EFO_0000681", | |
"EFO_0001360" = "MONDO_0018582", | |
# growth delay -> height | |
"HP_0001510" = "EFO_0004339", | |
#CAD -> myocardial infarctation | |
"EFO_0001645" = "EFO_0000612" | |
) | |
new_phenotypes <- data.frame( | |
diseaseId = names(ammend_phenotypes), | |
phenotype = unlist(ammend_phenotypes) | |
) | |
new_phenotypes <- sdf_copy_to(sc, new_phenotypes, overwrite = TRUE) | |
# Platform disease to phenotype data | |
disease2phenotype <- spark_read_parquet( | |
sc, | |
disease2phenotype_path, | |
memory = FALSE | |
) %>% | |
select(diseaseId = disease, phenotype) %>% | |
sdf_distinct() | |
# Associations through indirect phenotypes | |
phenotype_ass <- approvals %>% | |
rename(diseaseId = DiseaseId) %>% | |
inner_join(moa, by = c("DrugId" = "chemblIds")) %>% | |
inner_join( | |
disease2phenotype %>% | |
sdf_bind_rows(new_phenotypes), | |
by = c("diseaseId")) %>% | |
inner_join( | |
ass_indirectby_ds, | |
by = c("phenotype" = "diseaseId", "targetId")) %>% | |
select(datasourceId, Drug_brand_name) %>% | |
sdf_distinct() %>% | |
collect() %>% | |
mutate(phenotypeAssociation = TRUE) | |
# Data to plot | |
data2plot <- ass %>% | |
select(datasourceId, Drug_brand_name, score) %>% | |
complete(datasourceId, Drug_brand_name) %>% | |
mutate(score = replace_na(score, 0)) %>% | |
filter(!is.na(datasourceId)) %>% | |
# TA | |
left_join( | |
ass %>% | |
select( | |
Drug_brand_name, | |
TA | |
) %>% | |
distinct(), | |
by = "Drug_brand_name" | |
) %>% | |
# targets | |
left_join( | |
ass %>% | |
mutate(noTarget = is.na(targetId)) %>% | |
select( | |
Drug_brand_name, | |
noTarget | |
) %>% | |
distinct(), | |
by = "Drug_brand_name" | |
) %>% | |
# interactions | |
left_join( | |
interactors_ass, | |
by = c("datasourceId", "Drug_brand_name") | |
) %>% | |
# related phenotypes | |
left_join( | |
phenotype_ass, | |
by = c("datasourceId", "Drug_brand_name") | |
) %>% | |
mutate( | |
interactionAssociation = ifelse(score > 0, TRUE, interactionAssociation) | |
) %>% | |
mutate( | |
phenotypeAssociation = ifelse(score > 0, TRUE, phenotypeAssociation) | |
) %>% | |
mutate(score = ifelse(noTarget, NA, score)) %>% | |
mutate(TA = ifelse(noTarget, "No human target", TA)) %>% | |
mutate( | |
TA = fct_other( | |
TA, | |
keep = c("Oncology", "No human target"), | |
other_level = "Other indication" | |
) | |
) %>% | |
mutate( | |
TA = fct_relevel(TA, c( | |
"Oncology", | |
"Other indication", | |
"No human target" | |
)) | |
) %>% | |
# mutate(datasourceId = fct_relevel(datasourceId, names(ds_name_list))) %>% | |
filter(!(datasourceId %in% c("chembl", "expression_atlas", "sysbio", "europepmc", "phenodigm", "reactome", "phewas_catalog"))) %>% | |
#drug score for the purpose of reordering them | |
mutate(rankscore = replace_na(score, 0)) %>% | |
mutate(rankscore = ifelse(!is.na(interactionAssociation), rankscore + 0.01, rankscore)) %>% | |
mutate(rankscore = ifelse(!is.na(phenotypeAssociation), rankscore + 0.03, rankscore)) %>% | |
mutate(Drug_brand_name = fct_rev(fct_reorder( | |
Drug_brand_name, rankscore, mean, | |
na.rm = TRUE, .desc = TRUE | |
))) %>% | |
group_by( | |
datasourceId, | |
Drug_brand_name, | |
TA, | |
noTarget, | |
interactionAssociation, | |
phenotypeAssociation | |
) %>% | |
summarise(score = suppressWarnings(max(score, na.rm = TRUE))) %>% | |
mutate(score = ifelse(score < 0, NA, score)) %>% | |
left_join(ds_names, by = "datasourceId") %>% | |
mutate( | |
datasourceName = factor(datasourceName, levels = ds_names$datasourceName), | |
datasourceType = factor(datasourceType, levels = c("Somatic", "Functional genomics (cancer)", "Rare mendelian", "Common disease")) | |
) | |
# Values per data source | |
briefplotdata <- data2plot %>% | |
mutate(score = replace_na(score, 0)) %>% | |
group_by(Drug_brand_name, TA, datasourceType) %>% | |
summarise( | |
noTarget = any(noTarget), | |
interactionAssociation = any(interactionAssociation), | |
phenotypeAssociation = any(phenotypeAssociation), | |
score = ifelse(max(score, na.rm = TRUE) > 0, TRUE, FALSE) | |
) %>% | |
mutate(noTarget = replace_na(noTarget, FALSE)) %>% | |
mutate(phenotypeAssociation = replace_na(phenotypeAssociation, FALSE)) %>% | |
mutate(phenotypeAssociation = ifelse(score, FALSE, phenotypeAssociation)) %>% | |
mutate(interactionAssociation = replace_na(interactionAssociation, FALSE)) %>% | |
mutate(interactionAssociation = ifelse(score, FALSE, interactionAssociation)) %>% | |
mutate(interactionAssociation = ifelse(phenotypeAssociation, FALSE, interactionAssociation)) %>% | |
mutate(noEvidence = !(interactionAssociation | phenotypeAssociation | score | noTarget)) %>% | |
gather("evidence", "value", -Drug_brand_name, -TA, -datasourceType) %>% | |
filter(value) | |
# Values any data source | |
briefplotdataAny <- data2plot %>% | |
mutate(score = replace_na(score, 0)) %>% | |
group_by(Drug_brand_name, TA) %>% | |
summarise( | |
noTarget = any(noTarget), | |
interactionAssociation = any(interactionAssociation), | |
phenotypeAssociation = any(phenotypeAssociation), | |
score = ifelse(max(score, na.rm = TRUE) > 0, TRUE, FALSE) | |
) %>% | |
mutate(datasourceType = "Any") %>% | |
mutate(noTarget = replace_na(noTarget, FALSE)) %>% | |
mutate(phenotypeAssociation = replace_na(phenotypeAssociation, FALSE)) %>% | |
mutate(phenotypeAssociation = ifelse(score, FALSE, phenotypeAssociation)) %>% | |
mutate(interactionAssociation = replace_na(interactionAssociation, FALSE)) %>% | |
mutate(interactionAssociation = ifelse(score, FALSE, interactionAssociation)) %>% | |
mutate(interactionAssociation = ifelse(phenotypeAssociation, FALSE, interactionAssociation)) %>% | |
mutate(noEvidence = !(interactionAssociation | phenotypeAssociation | score | noTarget)) %>% | |
gather("evidence", "value", -Drug_brand_name, -TA, -datasourceType) %>% | |
filter(value) | |
output <- bind_rows(briefplotdataAny, briefplotdata) %>% | |
mutate(datasourceType = fct_relevel(datasourceType, levels = c("Any", "Somatic", "Functional genomics (cancer)", "Rare mendelian", "Common disease"))) %>% | |
mutate(evidence = fct_relevel(evidence, | |
"score", | |
"phenotypeAssociation", | |
"interactionAssociation", | |
"noTarget", | |
"noEvidence")) %>% | |
mutate(evidence = fct_recode(evidence, | |
"Direct" = "score", | |
"Close phenotype" = "phenotypeAssociation", | |
"Interacting protein" = "interactionAssociation", | |
"No human target" = "noTarget", | |
"Not available" = "noEvidence" | |
)) %>% | |
arrange(TA, desc(evidence)) %>% | |
group_by(datasourceType) %>% | |
mutate(rn = row_number()) %>% | |
mutate(evidence = replace(evidence, evidence == "Not available", NA)) %>% | |
ggplot(aes(x = rn, y = fct_rev(datasourceType), fill = fct_rev(evidence))) + | |
geom_tile(color = "white", height = .8, size = 0.5) + | |
facet_grid( | |
~TA, | |
scales = "free", | |
space = "free" | |
) + | |
# scale_fill_npg(name = "Genetic support", na.value = "grey90") + | |
scale_fill_manual( | |
name = "Genetic support", | |
values = c("#3C5488FF", "#00A087FF", "#4DBBD5FF", "grey60"), | |
breaks = c("Direct", "Close phenotype", "Interacting protein", "No human target"), | |
na.value = "grey90") + | |
scale_y_discrete(name = "Genetic data source", labels = function(x) str_wrap(x, width = 12)) + | |
theme_cowplot(font_size = 11) + | |
theme( | |
plot.background = element_rect(fill = "white"), | |
strip.background = element_blank(), | |
axis.ticks = element_blank(), | |
legend.position = "bottom", | |
axis.text.x = element_blank(), | |
axis.title.x = element_blank(), | |
axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)), | |
axis.line = element_blank(), | |
text = element_text(family = "sans"), | |
panel.spacing = unit(-0.5, "lines") | |
) | |
ggsave( | |
"/home/ochoa/2021_approvals_brief.pdf", | |
plot = output, | |
width = 6.5, | |
height = 3.5, | |
dpi = 400, | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
datasourceId | datasourceName | datasourceType | |
---|---|---|---|
cancer_gene_census | CGC (COSMIC) | Somatic | |
intogen | IntOgen | Somatic | |
cancer_biomarkers | Cancer Biomarkers (CGI) | Somatic | |
crispr | Project Score | Functional genomics (cancer) | |
slapenrich | SlapEnrich | Functional genomics (cancer) | |
progeny | Progeny | Functional genomics (cancer) | |
eva_somatic | ClinVar (Somatic) | Somatic | |
ot_genetics_portal | OT Genetics Portal | Common disease | |
phewas_catalog | Phewas Catalog | Common disease | |
eva | ClinVar | Rare mendelian | |
clingen | Clingen | Rare mendelian | |
genomics_england | GEL PanelApp | Rare mendelian | |
orphanet | Orphanet | Rare mendelian | |
gene2phenotype | gene2phenotype | Rare mendelian | |
uniprot_literature | Uniprot (gene-disease) | Rare mendelian | |
uniprot_variants | Uniprot (variants) | Rare mendelian | |
reactome | Reactome | Functional genomics (cancer) | |
phenodigm | Mouse model (phenodigm) | Mouse model | |
europepmc | Literature (EPMC) | Literature | |
expression_atlas | ExpressionAtlas (Diff expression) | Differential Expression | |
chembl | drugs | Drugs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
directSources <- ass %>% | |
filter(!(datasourceId %in% c("chembl", "expression_atlas", "sysbio", "europepmc", "phenodigm", "reactome", "phewas_catalog"))) %>% | |
mutate(datasourceId = datasourceId %>% str_replace("eva", "clinvar")) %>% | |
filter(!is.na(datasourceId)) %>% | |
group_by(Drug_brand_name) %>% | |
summarise(directSources = paste(unique(datasourceId), collapse = ";")) | |
summaryResults <- output %>% | |
filter(datasourceType == "Any") %>% | |
select(Drug_brand_name, evidence) | |
closePhenotypes <- phenotype_ass %>% | |
select(Drug_brand_name, datasourceId, phenotype) %>% | |
left_join( | |
spark_read_parquet(sc, disease_path) %>% | |
select(phenotype = id, phenotypeName = name), | |
by = "phenotype") %>% | |
collect() %>% | |
mutate(datasourceId = datasourceId %>% str_replace("eva", "clinvar")) %>% | |
filter(!(datasourceId %in% c("chembl", "expression_atlas", "sysbio", "europepmc", "phenodigm", "reactome", "phewas_catalog"))) %>% | |
distinct() %>% | |
group_by(Drug_brand_name) %>% | |
summarise( | |
closePhenotypeIds = paste(unique(phenotype), collapse = ";"), | |
closePhenotypeNames = paste(unique(phenotypeName), collapse = ";"), | |
closePhenotypeDataSources = paste(unique(datasourceId), collapse = ";") | |
) | |
target_path <- paste( | |
gs_path, data_release, | |
"/output/etl/parquet/target/", | |
sep = "" | |
) | |
intDf <- approvals %>% | |
rename(diseaseId = DiseaseId) %>% | |
inner_join(moa, by = c("DrugId" = "chemblIds")) %>% | |
inner_join(interactions, by = c("targetId" = "targetA")) %>% | |
inner_join( | |
ass_indirectby_ds, | |
by = c("diseaseId" = "diseaseId", "targetB" = "targetId") | |
) %>% | |
left_join( | |
spark_read_parquet(sc, target_path) %>% | |
select(targetB = id, approvedSymbol), | |
by = "targetB" | |
) %>% | |
select(Drug_brand_name, targetB, datasourceId, approvedSymbol) %>% | |
collect() %>% | |
mutate(datasourceId = datasourceId %>% str_replace("eva", "clinvar")) %>% | |
filter(!(datasourceId %in% c("chembl", "expression_atlas", "sysbio", "europepmc", "phenodigm", "reactome", "phewas_catalog"))) %>% | |
distinct() %>% | |
group_by(Drug_brand_name) %>% | |
summarise( | |
interactingIds = paste(unique(targetB), collapse = ";"), | |
interactingSymbols = paste(unique(approvedSymbol), collapse = ";"), | |
interactingDataSources = paste(unique(datasourceId), collapse = ";") | |
) | |
out <- ass %>% | |
group_by(Drug_brand_name, Sponsor, DrugId, Indication, diseaseId, Properties) %>% | |
summarise(targetIds = paste(targetId, collapse = ";")) %>% | |
left_join(summaryResults, by = "Drug_brand_name") %>% | |
left_join(directSources, by = "Drug_brand_name") %>% | |
left_join(closePhenotypes, by = "Drug_brand_name") %>% | |
left_join(intDf, by = "Drug_brand_name") | |
out %>% write_csv("/home/ochoa/2021_approvals_output.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment