Skip to content

Instantly share code, notes, and snippets.

@cigrainger
Created April 20, 2014 19:44
Show Gist options
  • Select an option

  • Save cigrainger/11123281 to your computer and use it in GitHub Desktop.

Select an option

Save cigrainger/11123281 to your computer and use it in GitHub Desktop.
.libPaths("C:/Users/graingec/R/win-library")
library(dplyr)
library(tm)
library(lda)
library(wordcloud)
load('./data/abstractslang.rdata')
patents$APPLN_ABSTRACT_LG <- gsub("[^[:alnum:] ]", "",patents$APPLN_ABSTRACT_LG)
patents <- filter(patents,APPLN_ABSTRACT_LG == 'EN')
patents$appln_abstract <- tolower(patents$appln_abstract)
patents$appln_abstract <- gsub("<IMAGE>", "",patents$appln_abstract)
patents$appln_abstract <- gsub("[^[:alnum:] ]", "",patents$appln_abstract)
patents <- select(patents,appln_id,appln_abstract)
patents$appln_id <- gsub("[^[:alnum:] ]", "",patents$appln_id)
save(patents,file='./data/abstractsenglish.rdata')
# Word Clouds
load('./data/cpc_codes.rdata')
cpc_codes$subclass <- substr(cpc_codes$cpc_code,1,3)
cpc_codes <- filter(cpc_codes,subclass=='Y02')
cpc_codes$appln_id <- as.character(cpc_codes$appln_id)
cpc_codes$subsec <- substr(cpc_codes$cpc_code,1,5)
wcpatents <- inner_join(cpc_codes,patents)
save(wcpatents,cpc_codes,file='wcabstract.rdata')
for(i in 1:length(unique(cpc_codes$subsec))){
assign(paste(unique(cpc_codes$subsec)[i]),filter(wcpatents,subsec==unique(cpc_codes$subsec)[i]))
}
cloudlist <- list(Y02B1,Y02B2,Y02B3,Y02B4,Y02B5,Y02B6,Y02B7,Y02B8,Y02B9,Y02C1,Y02C2,Y02E1,Y02E2,Y02E3,Y02E4,Y02E5,Y02E6,Y02E7,Y02T1,Y02T3,Y02T5,Y02T7,Y02T9)
wordcloudpng <- function(x){
name <- x$subsec[1]
x <- select(x,appln_id,appln_abstract)
x <- x[!duplicated(x$appln_id),]
vs <- VectorSource(x$appln_abstract)
c <- Corpus(vs)
c <- tm_map(c,removeWords,stopwords('english'))
c <- tm_map(c,removeWords,c('method','device','system','apparatus'))
png(paste('./wordclouds/abstracts/',name,'.png',sep=''), width=1280,height=800)
wordcloud(c)
dev.off()
}
for(i in 1:length(cloudlist)){
wordcloudpng(cloudlist[[i]])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment