cigrainger · February 20, 2014 12:12
diff --git a/gistfile1.r b/gistfile1.r
 # Load required packages
 library(dplyr)
 library(reshape2)

 # Load data
 setwd('~/AeroFS/Googlestuff/chris/')
 load("longform.rdata")
 data$month <- as.Date(data$month)

 # Get a random sample of word-country combinations to check against google trends
 comb.sample <- select(data,country,word,country.name)
 comb.sample <- unique(comb.sample)
 random.sample <- sample(row.names(comb.sample),50)
 comb.sample <- comb.sample[random.sample,]
 comb.sample$trends <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1)
 sample.data <- inner_join(data,comb.sample)
 sample.data <- sample.data %.%
                select(word,country,value,trends) %.%
                group_by(word,country,trends) %.%
                summarise(value = max(value))
 length(sample.data$word[sample.data$trends == 1 & sample.data$value == 0])

 english.countries <- c('india','nigeria','united kingdom','south sudan','tanzania','kenya','canda','ghana','australia','zambia','sudan')
 english.data <- data %.%
                select(country,word,country.name) %.%
                filter(country.name %in% english.countries)
 english.data <- unique(english.data)
 english.sample <- sample(row.names(english.data),20)
 english.data <- english.data[english.sample,]
 english.data$trends <- c(0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0)
 english.data <- inner_join(data,english.data)
 english.data <- english.data %.%
                select(word,country,value,trends) %.%
                group_by(word,country,trends) %.%
                summarise(value = max(value))
 length(english.data$word[english.data$trends == 1 & english.data$value == 0])

 # Check maximum values for keywords/countries and create dfs for comparison plots
 max.country.word <- summarise(group_by(data,country,word),value=max(value))
 max.country <- summarise(group_by(data,country),value=max(value))
 max.word <- summarise(group_by(data,word),value=max(value))
 gbr.primarycircuit <- filter(data,country=='gbr',word=='primary circuit')
 chn.biologicaltreatment <- filter(data,country=='chn',word=='biological treatment')
 jpn.carcontrol <- filter(data,country=='jpn',word=='car control')
 ind.nitrogencarbon <- filter(data,country=='ind',word=='nitrogen carbon')
 fra.gasoil <- filter(data,country=='fra',word=='gas oil')
 nga.constructionmachine <- filter(data,country=='nga',word=='construction machine')
 gbr.combustionwaste <- filter(data,country=='gbr',word=='combustion waste')
 rus.systemsolar <- filter(data,country=='rus',word=='system solar')

 # Save file for use in comparison.rmd
 save(max.country,max.word,max.country.word,gbr.primarycircuit,chn.biologicaltreatment,jpn.carcontrol,ind.nitrogencarbon,fra.gasoil,nga.constructionmachine,gbr.combustionwaste,rus.systemsolar,file='tempdata.rdata')
	# Load required packages
	library(dplyr)
	library(reshape2)

	# Load data
	setwd('~/AeroFS/Googlestuff/chris/')
	load("longform.rdata")
	data$month <- as.Date(data$month)

	# Get a random sample of word-country combinations to check against google trends
	comb.sample <- select(data,country,word,country.name)
	comb.sample <- unique(comb.sample)
	random.sample <- sample(row.names(comb.sample),50)
	comb.sample <- comb.sample[random.sample,]
	comb.sample$trends <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1)
	sample.data <- inner_join(data,comb.sample)
	sample.data <- sample.data %.%
	select(word,country,value,trends) %.%
	group_by(word,country,trends) %.%
	summarise(value = max(value))
	length(sample.data$word[sample.data$trends == 1 & sample.data$value == 0])

	english.countries <- c('india','nigeria','united kingdom','south sudan','tanzania','kenya','canda','ghana','australia','zambia','sudan')
	english.data <- data %.%
	select(country,word,country.name) %.%
	filter(country.name %in% english.countries)
	english.data <- unique(english.data)
	english.sample <- sample(row.names(english.data),20)
	english.data <- english.data[english.sample,]
	english.data$trends <- c(0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0)
	english.data <- inner_join(data,english.data)
	english.data <- english.data %.%
	select(word,country,value,trends) %.%
	group_by(word,country,trends) %.%
	summarise(value = max(value))
	length(english.data$word[english.data$trends == 1 & english.data$value == 0])

	# Check maximum values for keywords/countries and create dfs for comparison plots
	max.country.word <- summarise(group_by(data,country,word),value=max(value))
	max.country <- summarise(group_by(data,country),value=max(value))
	max.word <- summarise(group_by(data,word),value=max(value))
	gbr.primarycircuit <- filter(data,country=='gbr',word=='primary circuit')
	chn.biologicaltreatment <- filter(data,country=='chn',word=='biological treatment')
	jpn.carcontrol <- filter(data,country=='jpn',word=='car control')
	ind.nitrogencarbon <- filter(data,country=='ind',word=='nitrogen carbon')
	fra.gasoil <- filter(data,country=='fra',word=='gas oil')
	nga.constructionmachine <- filter(data,country=='nga',word=='construction machine')
	gbr.combustionwaste <- filter(data,country=='gbr',word=='combustion waste')
	rus.systemsolar <- filter(data,country=='rus',word=='system solar')

	# Save file for use in comparison.rmd
	save(max.country,max.word,max.country.word,gbr.primarycircuit,chn.biologicaltreatment,jpn.carcontrol,ind.nitrogencarbon,fra.gasoil,nga.constructionmachine,gbr.combustionwaste,rus.systemsolar,file='tempdata.rdata')
No results found