Skip to content

Instantly share code, notes, and snippets.

@cigrainger
Created February 20, 2014 12:12
Show Gist options
  • Select an option

  • Save cigrainger/9112214 to your computer and use it in GitHub Desktop.

Select an option

Save cigrainger/9112214 to your computer and use it in GitHub Desktop.
# Load required packages
library(dplyr)
library(reshape2)
# Load data
setwd('~/AeroFS/Googlestuff/chris/')
load("longform.rdata")
data$month <- as.Date(data$month)
# Get a random sample of word-country combinations to check against google trends
comb.sample <- select(data,country,word,country.name)
comb.sample <- unique(comb.sample)
random.sample <- sample(row.names(comb.sample),50)
comb.sample <- comb.sample[random.sample,]
comb.sample$trends <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1)
sample.data <- inner_join(data,comb.sample)
sample.data <- sample.data %.%
select(word,country,value,trends) %.%
group_by(word,country,trends) %.%
summarise(value = max(value))
length(sample.data$word[sample.data$trends == 1 & sample.data$value == 0])
english.countries <- c('india','nigeria','united kingdom','south sudan','tanzania','kenya','canda','ghana','australia','zambia','sudan')
english.data <- data %.%
select(country,word,country.name) %.%
filter(country.name %in% english.countries)
english.data <- unique(english.data)
english.sample <- sample(row.names(english.data),20)
english.data <- english.data[english.sample,]
english.data$trends <- c(0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0)
english.data <- inner_join(data,english.data)
english.data <- english.data %.%
select(word,country,value,trends) %.%
group_by(word,country,trends) %.%
summarise(value = max(value))
length(english.data$word[english.data$trends == 1 & english.data$value == 0])
# Check maximum values for keywords/countries and create dfs for comparison plots
max.country.word <- summarise(group_by(data,country,word),value=max(value))
max.country <- summarise(group_by(data,country),value=max(value))
max.word <- summarise(group_by(data,word),value=max(value))
gbr.primarycircuit <- filter(data,country=='gbr',word=='primary circuit')
chn.biologicaltreatment <- filter(data,country=='chn',word=='biological treatment')
jpn.carcontrol <- filter(data,country=='jpn',word=='car control')
ind.nitrogencarbon <- filter(data,country=='ind',word=='nitrogen carbon')
fra.gasoil <- filter(data,country=='fra',word=='gas oil')
nga.constructionmachine <- filter(data,country=='nga',word=='construction machine')
gbr.combustionwaste <- filter(data,country=='gbr',word=='combustion waste')
rus.systemsolar <- filter(data,country=='rus',word=='system solar')
# Save file for use in comparison.rmd
save(max.country,max.word,max.country.word,gbr.primarycircuit,chn.biologicaltreatment,jpn.carcontrol,ind.nitrogencarbon,fra.gasoil,nga.constructionmachine,gbr.combustionwaste,rus.systemsolar,file='tempdata.rdata')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment