-
-
Save jan-glx/8e582a60bfee435447db7e042c7cf6ff to your computer and use it in GitHub Desktop.
Create a wordcloud of your google search history
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to make a word cloud of your google searches. Get your google search | |
# history at https://myactivity.google.com/more-activity . This script assumes the JSON files | |
# exported are in a 'Searches' subfolder | |
library(jsonlite) | |
library(rlist) | |
library(magrittr) | |
library(stringi) | |
library(wordcloud) | |
library(tm) | |
library(SnowballC) | |
queries = lapply(list.files('Searches', full.names=TRUE), fromJSON, simplifyDataFrame=FALSE) %>% | |
do.call("c", .) %>% | |
do.call("c", .) %>% | |
do.call("c", .) %>% | |
list.mapv(.$query) %>% | |
stri_trans_tolower %>% | |
removeWords(stopwords("english")) %>% | |
# removeWords(stopwords("german")) %>% | |
`[`(., !grepl(pattern = '[-][>]',x = .)) # filter some gmaps stuff | |
words = stri_split_regex(queries, "\\s") %>% | |
do.call("c", .) %>% | |
`[`(., . != "") %>% | |
`[`(., !grepl(pattern = '[:]',x = .))# filter out google search paramters | |
# words %<>% wordStem | |
word_table = table(words) %>% | |
sort(decreasing = TRUE) | |
pal <- colorRampPalette(c("red","blue"))(10) | |
wordcloud(names(word_table), word_table, scale=c(3, 1), min.freq=10,colors=pal,random.order=TRUE, max.words=150) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment