Created
October 9, 2015 16:04
-
-
Save explodecomputer/717fa4cfd3dd236a5e6f to your computer and use it in GitHub Desktop.
mesh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
query= 17554300 | |
#run the search | |
search <- EUtilsSummary(query, type="esearch",db = "pubmed",retmax=30000) | |
QueryCount(search) #how many hits | |
summary(search) #number of hits and search terms used | |
records <- EUtilsGet(search) #get the results from search | |
PMID(records) | |
Author(records)[[1]] | |
a <- Mesh(records)[[1]] | |
class(a) | |
[[1]] | |
mesh <- lapply(pmid[1:10], function(x) | |
{ | |
print(x) | |
search <- EUtilsSummary(x, type="esearch",db = "pubmed",retmax=30000) | |
records <- EUtilsGet(search) #get the results from search | |
Mesh(records)[[1]] | |
}) | |
library(plyr) | |
library(RISmed) | |
# For every PMID for which there is only ONE phenotype, download the mesh terms | |
... | |
# For every PMID for which there are multiple phenotypes, download the mesh terms | |
gwas_catalog$code <- paste(gwas_catalog$PubmedID, gwas_catalog$Phenotype) | |
g <- subset(gwas_catalog, !duplicated(code)) | |
tab <- table(g$PubmedID) | |
n <- names(tab[tab>1]) | |
head(sort(tab, decreasing=T)) | |
g2 <- subset(g, PubmedID %in% n, select=c(Phenotype, PubmedID)) | |
mesh <- lapply(unique(g2$PubmedID), function(x) | |
{ | |
print(x) | |
search <- EUtilsSummary(x, type="esearch",db = "pubmed",retmax=30000) | |
records <- EUtilsGet(search) #get the results from search | |
Mesh(records)[[1]] | |
}) | |
names(mesh) <- unique(g2$PubmedID) | |
mesh2 <- mesh[sapply(mesh, is.data.frame)] | |
for(i in 1:length(mesh2)) | |
{ | |
mesh2[[i]]$PubmedID <- names(mesh2)[i] | |
} | |
x <- rbind.fill(mesh2) | |
x1 <- ddply(x, .(PubmedID), function(x) | |
{ | |
x <- mutate(x) | |
y <- paste(x$Heading, collapse="\t") | |
x <- x[1,-1] | |
x$Headflat <- y | |
x | |
}) | |
tab <- table(x$Heading) | |
n <- names(tab[tab > 10]) | |
x1 <- subset(x, ! Heading %in% n) | |
x2 <- ddply | |
x2 <- merge(x1, g2, by="PubmedID") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment