explodecomputer · October 9, 2015 16:04
diff --git a/mesh.R b/mesh.R
 query= 17554300
 #run the search
 search <- EUtilsSummary(query, type="esearch",db = "pubmed",retmax=30000)
 QueryCount(search) #how many hits
 summary(search) #number of hits and search terms used
 records <- EUtilsGet(search) #get the results from search

 PMID(records)
 Author(records)[[1]]
 a <- Mesh(records)[[1]]
 class(a)



 [[1]]


 mesh <- lapply(pmid[1:10], function(x)
 {
 	print(x)
 	search <- EUtilsSummary(x, type="esearch",db = "pubmed",retmax=30000)
 	records <- EUtilsGet(search) #get the results from search
 	Mesh(records)[[1]]
 })


 library(plyr)
 library(RISmed)


 # For every PMID for which there is only ONE phenotype, download the mesh terms

 ...



 # For every PMID for which there are multiple phenotypes, download the mesh terms
 gwas_catalog$code <- paste(gwas_catalog$PubmedID, gwas_catalog$Phenotype)
 g <- subset(gwas_catalog, !duplicated(code))

 tab <- table(g$PubmedID)
 n <- names(tab[tab>1])
 head(sort(tab, decreasing=T))

 g2 <- subset(g, PubmedID %in% n, select=c(Phenotype, PubmedID))

 mesh <- lapply(unique(g2$PubmedID), function(x)
 {
 	print(x)
 	search <- EUtilsSummary(x, type="esearch",db = "pubmed",retmax=30000)
 	records <- EUtilsGet(search) #get the results from search
 	Mesh(records)[[1]]
 })


 names(mesh) <- unique(g2$PubmedID)
 mesh2 <- mesh[sapply(mesh, is.data.frame)]


 for(i in 1:length(mesh2))
 {
 	mesh2[[i]]$PubmedID <- names(mesh2)[i]
 }

 x <- rbind.fill(mesh2)

 x1 <- ddply(x, .(PubmedID), function(x)
 {
 	x <- mutate(x)
 	y <- paste(x$Heading, collapse="\t")
 	x <- x[1,-1]
 	x$Headflat <- y
 	x
 })


 tab <- table(x$Heading)
 n <- names(tab[tab > 10])
 x1 <- subset(x, ! Heading %in% n)

 x2 <- ddply


 x2 <- merge(x1, g2, by="PubmedID")
	query= 17554300
	#run the search
	search <- EUtilsSummary(query, type="esearch",db = "pubmed",retmax=30000)
	QueryCount(search) #how many hits
	summary(search) #number of hits and search terms used
	records <- EUtilsGet(search) #get the results from search

	PMID(records)
	Author(records)[[1]]
	a <- Mesh(records)[[1]]
	class(a)



	[[1]]


	mesh <- lapply(pmid[1:10], function(x)
	{
	print(x)
	search <- EUtilsSummary(x, type="esearch",db = "pubmed",retmax=30000)
	records <- EUtilsGet(search) #get the results from search
	Mesh(records)[[1]]
	})


	library(plyr)
	library(RISmed)


	# For every PMID for which there is only ONE phenotype, download the mesh terms

	...



	# For every PMID for which there are multiple phenotypes, download the mesh terms
	gwas_catalog$code <- paste(gwas_catalog$PubmedID, gwas_catalog$Phenotype)
	g <- subset(gwas_catalog, !duplicated(code))

	tab <- table(g$PubmedID)
	n <- names(tab[tab>1])
	head(sort(tab, decreasing=T))

	g2 <- subset(g, PubmedID %in% n, select=c(Phenotype, PubmedID))

	mesh <- lapply(unique(g2$PubmedID), function(x)
	{
	print(x)
	search <- EUtilsSummary(x, type="esearch",db = "pubmed",retmax=30000)
	records <- EUtilsGet(search) #get the results from search
	Mesh(records)[[1]]
	})


	names(mesh) <- unique(g2$PubmedID)
	mesh2 <- mesh[sapply(mesh, is.data.frame)]


	for(i in 1:length(mesh2))
	{
	mesh2[[i]]$PubmedID <- names(mesh2)[i]
	}

	x <- rbind.fill(mesh2)

	x1 <- ddply(x, .(PubmedID), function(x)
	{
	x <- mutate(x)
	y <- paste(x$Heading, collapse="\t")
	x <- x[1,-1]
	x$Headflat <- y
	x
	})


	tab <- table(x$Heading)
	n <- names(tab[tab > 10])
	x1 <- subset(x, ! Heading %in% n)

	x2 <- ddply


	x2 <- merge(x1, g2, by="PubmedID")