Created
April 9, 2012 15:09
-
-
Save josefslerka/2344144 to your computer and use it in GitHub Desktop.
Wordlcloud pro Vodafone
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tm) | |
require(wordcloud) | |
require(RColorBrewer) | |
# zde je treba vlozit korpus | |
# mydata.vectors <- character(0) | |
# mydata.vectors <- c("Dobre dopoledne Mateji,pokud jste pri registraci zvolil Moravskoslezsky kraj, zobrazila se vam v aplikaci informace, ze soutez probehne 30.1. Vcera 31.1. sef gangu navstivil Olomoucky a Zlinsky kraj. Zvoleny kraj naleznete v aplikaci v zalozce "Souboj kraju".Kazdopadne dnes hrajeme o 75 telefonu Samsung GALAXY mini. Online kolo zacina jiz ve 13:00.Drzim palce, Sandra, O2 Guru", mydata.vectors) | |
corpus <- Corpus(VectorSource(mydata.vectors)) | |
corpus <- tm_map(corpus, tolower) | |
corpus <- tm_map(corpus, removePunctuation) | |
corpus <- tm_map(corpus, removeNumbers) | |
# remove generic and custom stopwords | |
my_stopwords <- c(stopwords('english'), 'mam', 'prosim', 'vodafonepece', 'kdy', 'bych', 'vodafonu' , 'vodafone', 'vodafonecz', 'dobry den', 'se', 'na', 'v', 'co', 'ze', 'o', 'je', 'k', 'z', 'proti', 'tam','taky','ted', 'ocz', 'mel','jde','dekuji','nekdo','treba','tomu','jestli','snad,','neco','den', 'mozna','neni','byly', 'si', 'dnes', 'cz', 'timto', 'budes', 'budem', 'byli', 'jses', 'muj', 'svym', 'ta', 'tomto', 'tohle', 'tuto', 'tyto', 'jej', 'zda', 'proc', 'mate', 'tato', 'kam', 'tohoto', 'kdo', 'kteri', 'mi', 'nam', 'tom', 'tomuto', 'mit', 'nic', 'proto', 'kterou', 'byla', 'toho', 'protoze', 'asi', 'ho', 'nasi', 'napiste', 're', 'rt', 'coz', 'tim', 'takze', 'svych', 'jeji', 'svymi', 'jste', 'aj', 'tu', 'tedy', 'teto', 'bylo', 'kde', 'ke', 'prave', 'ji', 'nad', 'nejsou', 'ci', 'pod', 'tema', 'mezi', 'pres', 'ty', 'pak', 'vam', 'ani', 'kdyz', 'vsak', 'ne', 'jsem', 'tento', 'aby', 'jsme', 'pred', 'pta', 'jejich', 'byl', 'jeste', 'az', 'bez', 'take', 'pouze', 'prvni', 'vase', 'ktera', 'nas', 'novy', 'pokud', 'muze', 'jeho', 'sve', 'jine', 'zpravy', 'nove', 'neni', 'vas', 'jen', 'podle', 'zde', 'clanek', 'uz', 'byt', 'vice', 'bude', 'jiz', 'nez', 'ktery', 'by', 'ktere', 'co', 'nebo', 'ten', 'tak', 'ma', 'pri', 'od', 'po', 'jsou', 'jak', 'dalsi', 'ale', 'si', 've', 'to', 'jako', 'za', 'zpet', 'ze', 'do', 'pro', 'je', 'na') | |
corpus <- tm_map(corpus, removeWords, my_stopwords) | |
ap.tdm <- TermDocumentMatrix(corpus) | |
ap.m <- as.matrix(ap.tdm) | |
ap.v <- sort(rowSums(ap.m),decreasing=TRUE) | |
ap.d <- data.frame(word = names(ap.v),freq=ap.v) | |
table(ap.d$freq) | |
pal2 <- brewer.pal(8,"Dark2") | |
png("wordcloud_packages.png", width=1024,height=768) | |
wordcloud(ap.d$word,ap.d$freq, scale=c(10,.2),min.freq=3, | |
max.words=150, random.order=FALSE, rot.per=.15, colors=pal2) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment