flovv · October 25, 2016 05:31
diff --git a/digitalTransformation.R b/digitalTransformation.R
 #devtools::install_github("ropengov/rtimes")
 #devtools::install_github("chgrl/diezeit")
 library(rtimes)
 library(diezeit)

 require(plyr)
 require(lubridate)
 require(ggthemes)
 require(ggplot2)


 ###################

 dataframeFromResult <- function(l) {
  l1 <- lapply(l, function(x) {
    x[sapply(x, is.null)] <- NA
    unlist(x)
  })
  keys <- unique(unlist(lapply(l1, names)))
  l2 <- lapply(l1, '[', keys)
  l3 <- lapply(l2, setNames, keys)
  res <- data.frame(do.call(rbind, l3))
  return(res)
 }

 ############# NYT
 getArticles <- function(q, page, begin_date = "19800101", end_date = '20161010',NYTIMES_AS_KEY = NYTIMES_AS_KEY){
  
  res <- as_search(q=q, page=page, begin_date = begin_date, end_date = end_date,key = NYTIMES_AS_KEY)
  
  df <- dataframeFromResult(res$data)
  
  return(df)
 }

 howManyArticles <- function(q, begin_date = "19800101", end_date = '20161010',NYTIMES_AS_KEY = NYTIMES_AS_KEY){
  
  res <- as_search(q=q,  begin_date = begin_date, end_date = end_date,key = NYTIMES_AS_KEY)
  
  return(res$meta)
 }

 ############### ZEIT
 getZeitResults <- function(term){
  ##limitSearch <- zeit_search("content", term, print=FALSE, limit=3)
  unlimited <- zeit_search("content", term, print=FALSE, limit=1000)
  
  dff <- dataframeFromResult(unlimited$matches)
  dff$date <- as.Date(str_split_fixed(dff$release_date, "T",2)[,1])
  return(dff)
 }


 ############################# NYT data set
 q='"digital transformation"' 
 pages <- 0:9

 dat <- expand.grid(pages, q, NYTIMES_AS_KEY)
 colnames(dat) <- c("page",'q','NYTIMES_AS_KEY')
 allArticles <- mdply(dat, getArticles)

 #################


 allArticles$PubYear <- year(allArticles$pub_date)
 dd <- ddply(allArticles, .(PubYear), summarise, N= length(pub_date))

 ggplot(dd, aes(PubYear, N)) +geom_point() + geom_smooth() + theme_economist(base_size = 12) + ylab("N - Number of aricles per year in the NYT")+ xlab("Publication Year")


 ###################
 ### ZEIT

 out <- getZeitResults("digitale Transformation")
 out$PubYear <- year(out$date)
 dd <- ddply(out, .(PubYear), summarise, N=length(date))
 ggplot(dd, aes(PubYear, N)) + geom_point() +theme_economist(base_size = 12)  +geom_point() + geom_smooth() + theme_economist() + ylab("N - Number of aricles per year in the ZEIT")+ xlab("Publication Year")


 ############# Hackernews API calls
 require(httr)
 require(reshape2)

 res <- GET('http://hn.algolia.com/api/v1/search?query="digital%20transformation"&tags=story&hitsPerPage=50')
 cont <- content(res, "parse")
 df <- dataframeFromJSON(cont$hits)

 df$points <- as.numeric(df$points)
 df$created_at <- as.Date(df$created_at)
 ggplot(df, aes(created_at, points))+ geom_smooth() +geom_point(size=1) +theme_economist() 

 df$year <- year(df$created_at)
 df$num_comments <- as.numeric(df$num_comments)
 dd <- ddply(df, .(year), summarise, Points = sum(points), Comments=sum(num_comments),Articles=length(year))
 mm <- melt(dd, id.vars = "year")
 ggplot(mm, aes(year, value))+geom_point(size=2, color="red") +geom_line()+theme_economist() + facet_grid(~variable) + ggtitle("Hackernews search for digital transformation")
	#devtools::install_github("ropengov/rtimes")
	#devtools::install_github("chgrl/diezeit")
	library(rtimes)
	library(diezeit)

	require(plyr)
	require(lubridate)
	require(ggthemes)
	require(ggplot2)


	###################

	dataframeFromResult <- function(l) {
	l1 <- lapply(l, function(x) {
	x[sapply(x, is.null)] <- NA
	unlist(x)
	})
	keys <- unique(unlist(lapply(l1, names)))
	l2 <- lapply(l1, '[', keys)
	l3 <- lapply(l2, setNames, keys)
	res <- data.frame(do.call(rbind, l3))
	return(res)
	}

	############# NYT
	getArticles <- function(q, page, begin_date = "19800101", end_date = '20161010',NYTIMES_AS_KEY = NYTIMES_AS_KEY){

	res <- as_search(q=q, page=page, begin_date = begin_date, end_date = end_date,key = NYTIMES_AS_KEY)

	df <- dataframeFromResult(res$data)

	return(df)
	}

	howManyArticles <- function(q, begin_date = "19800101", end_date = '20161010',NYTIMES_AS_KEY = NYTIMES_AS_KEY){

	res <- as_search(q=q, begin_date = begin_date, end_date = end_date,key = NYTIMES_AS_KEY)

	return(res$meta)
	}

	############### ZEIT
	getZeitResults <- function(term){
	##limitSearch <- zeit_search("content", term, print=FALSE, limit=3)
	unlimited <- zeit_search("content", term, print=FALSE, limit=1000)

	dff <- dataframeFromResult(unlimited$matches)
	dff$date <- as.Date(str_split_fixed(dff$release_date, "T",2)[,1])
	return(dff)
	}


	############################# NYT data set
	q='"digital transformation"'
	pages <- 0:9

	dat <- expand.grid(pages, q, NYTIMES_AS_KEY)
	colnames(dat) <- c("page",'q','NYTIMES_AS_KEY')
	allArticles <- mdply(dat, getArticles)

	#################


	allArticles$PubYear <- year(allArticles$pub_date)
	dd <- ddply(allArticles, .(PubYear), summarise, N= length(pub_date))

	ggplot(dd, aes(PubYear, N)) +geom_point() + geom_smooth() + theme_economist(base_size = 12) + ylab("N - Number of aricles per year in the NYT")+ xlab("Publication Year")


	###################
	### ZEIT

	out <- getZeitResults("digitale Transformation")
	out$PubYear <- year(out$date)
	dd <- ddply(out, .(PubYear), summarise, N=length(date))
	ggplot(dd, aes(PubYear, N)) + geom_point() +theme_economist(base_size = 12) +geom_point() + geom_smooth() + theme_economist() + ylab("N - Number of aricles per year in the ZEIT")+ xlab("Publication Year")


	############# Hackernews API calls
	require(httr)
	require(reshape2)

	res <- GET('http://hn.algolia.com/api/v1/search?query="digital%20transformation"&tags=story&hitsPerPage=50')
	cont <- content(res, "parse")
	df <- dataframeFromJSON(cont$hits)

	df$points <- as.numeric(df$points)
	df$created_at <- as.Date(df$created_at)
	ggplot(df, aes(created_at, points))+ geom_smooth() +geom_point(size=1) +theme_economist()

	df$year <- year(df$created_at)
	df$num_comments <- as.numeric(df$num_comments)
	dd <- ddply(df, .(year), summarise, Points = sum(points), Comments=sum(num_comments),Articles=length(year))
	mm <- melt(dd, id.vars = "year")
	ggplot(mm, aes(year, value))+geom_point(size=2, color="red") +geom_line()+theme_economist() + facet_grid(~variable) + ggtitle("Hackernews search for digital transformation")