Skip to content

Instantly share code, notes, and snippets.

@ttunguz
Created July 15, 2014 11:10
Show Gist options
  • Save ttunguz/7ba8ec7e2588736e86f6 to your computer and use it in GitHub Desktop.
Save ttunguz/7ba8ec7e2588736e86f6 to your computer and use it in GitHub Desktop.
library(ggplot2)
library(reshape)
library(scales)
### INSTRUCTIONS
# 1. Save file to same local directory
# 2. Change time zone specification in third IMPORT DATA statement
# 3. In each top_n_for_hour or top_n_for_day, test to statistical significance by comparing the result of the top_n function and the result of the matrix t test
#IMPORT DATA
data = read.csv("tweet_activity_metrics.csv")
data$time = as.POSIXct(data$time, tz="Europe/London")
data$time = format(data$time, tz="America/Los_Angeles")
data$hour = as.POSIXlt(strftime(data$time, format="%H:%M"), format="%H:%M")$hour
data = subset(data, hour>4)
data = subset(data, hour<21)
## BASICS
## GENERAL PLOTS FOR HOUR
ggplot(data) + geom_point(aes(data$hour, data$engagement.rate), size=5, alpha=0.7, colour="red", position="jitter") + xlab("") + ylab("Engagement Rate") + ggtitle("Engagement Rate by Time of Day")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8) + scale_y_continuous( labels = percent_format())
ggsave("era_by_tod.png", dpi=300, width=9, height=6)
### TIME OF DAY FUNCTIONS
top_n_for_hour = function(df, n){
ordered = df[order(df$average, decreasing=TRUE),][1:n,]
return (ordered)
}
time_of_day_t = function(df, field, top_hours){
x = data.frame(matrix(NA, nrow = 25, ncol = 25))
colnames(x) = seq(1:25)
rownames(x) = seq(1:25)
for (i in 5:20){
current_hour = subset(df, df$hour == i)
for (j in 5:20){
testing_hour = subset(df, df$hour == j)
if (is.data.frame(testing_hour) && nrow(testing_hour) > 1 && nrow(current_hour)>1){
x[i, j] = t.test(current_hour[field], testing_hour[field])$p.value
}
else {
x[i, j] = NA
}
}
}
x$hour1 = factor(rownames(x))
x = melt(x)
x = subset(x, value < 0.05)
x = subset(x, hour1 %in% top_hours)
x = subset(x, variable %in% top_hours)
return (x)
}
designate_top = function(hourly_data, top_hours){
hourly_data$top = 0
hourly_data$top[hourly_data$hour %in% top_hours]=1
return (hourly_data)
}
## RETWEETS BY HOUR
rt_by_hour = ddply(data, .(hour), summarise, average=mean(retweets), count_posts = length(Tweet.id))
top_retweet_hours = top_n_for_hour(rt_by_hour, 3)$hour
rt_by_hour = designate_top(rt_by_hour, top_retweet_hours)
retweet_t = time_of_day_t(data, "retweets", top_retweet_hours)
## 8 and 9 are the best hours
ggplot(rt_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Retweets per Post") + ggtitle("Best Time of Day to Maximize RT")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ theme(legend.position="none")
ggsave("time_of_day_max_rt.png", dpi=300, width=12, height=9)
## IMPRESSIONS BY HOUR
imp_by_hour = ddply(data, .(hour), summarise, average = mean(impressions), count = length(Tweet.id))
top_imp_hours = top_n_for_hour(imp_by_hour, 5)$hour
imp_by_hour = designate_top(imp_by_hour, top_imp_hours)
imp_t = time_of_day_t(data, "impressions", top_imp_hours)
ggplot(imp_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Impressions") + ggtitle("Best Time of Day to Maximize Impressions")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") +annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ theme(legend.position="none")
ggsave("time_of_day_max_imp.png", dpi=300, width=12, height=9)
## ENGAGEMENT RATE BY HOUR
er_by_hour = ddply(data, .(hour), summarise, average = mean(engagement.rate))
top_er_hours = top_n_for_hour(er_by_hour, 11)$hour
er_by_hour = designate_top(er_by_hour, top_er_hours)
er_t = time_of_day_t(data, "engagement.rate", top_er_hours)
ggplot(er_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Engagement Rate") + ggtitle("Best Time of Day to Maximize Engagement Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
ggsave("er_by_tod.png", dpi=300, width=12, height=9)
## URL CLICKS BY HOUR
data$url.ctr = data$url.clicks/data$impressions
url_by_hour = ddply(data, .(hour), summarise, average = sum(url.clicks)/sum(impressions),url.ctr = sum(url.clicks)/sum(impressions))
top_url_hours = top_n_for_hour(url_by_hour, 4)$hour
url_by_hour = designate_top(url_by_hour, top_url_hours)
url_t = time_of_day_t(data, "url.ctr", top_url_hours)
ggplot(url_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Click Rate") + ggtitle("Best Time of Day to Maximize Click Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
ggsave("url_click_by_tod.png", dpi=300, width=12, height=9)
## FOLLOWS BY HOUR
follows_by_hour = ddply(data, .(hour), summarise, average = mean(follows))
top_follows_hours = top_n_for_hour(follows_by_hour, 4)$hour
follows_by_hour = designate_top(follows_by_hour, top_follows_hours)
follows_t = time_of_day_t(data, "follows", top_follows_hours)
ggplot(follows_by_hour) + geom_bar(aes(hour, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Follows") + ggtitle("Best Time of Day to New Follows")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
ggsave("follow_rate_by_tod.png", dpi=300, width=12, height=9)
### DAY OF WEEK ANALYSIS
## DAY OF WEEK FUNCTIONS
data$day = weekdays(as.Date(data$time))
data$weekday = as.POSIXlt(data$time)$wday
weekday_t = function(df, field, top_days){
x = data.frame(matrix(NA, nrow = 7, ncol = 7))
colnames(x) = seq(0:6)
rownames(x) = seq(0:6)
for (i in 1:7){
current_day = subset(df, df$weekday == i-1)
for (j in 1:7){
testing_day = subset(df, df$day == j-1)
if (is.data.frame(testing_day) && nrow(testing_day) > 1 && nrow(current_day)>1){
x[i, j] = t.test(current_day[field], testing_day[field])$p.value
}
else {
x[i, j] = NA
}
}
}
x$day = factor(rownames(x))
x = melt(x)
x = subset(x, value < 0.05)
x = subset(x, day %in% top_days)
x = subset(x, variable %in% top_days)
return (x)
}
top_n_for_day = function(df, n){
ordered = df[order(df$average, decreasing=TRUE),][1:n,]
return (ordered)
}
## PLOT HIGH LEVEL DATA
ggplot(data) + geom_point(aes(data$weekday, data$engagement.rate), size=5, alpha=0.7, colour="dodgerblue", position="jitter") + xlab("") + ylab("Engagement Rate") + ggtitle("Engagement Rate by Day of Week")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8) + scale_y_continuous( labels = percent_format())
ggplot(data) + geom_boxplot(aes(data$weekday, data$impressions,outlier.color="gray", group=data$weekday), fill="orange", colour="gray", outlier.colour="gray50", outlier.size=3) + xlab("") + ylab("Impressions") + ggtitle("Impressions by Day of Week")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + scale_colour_manual(values = c("red", "dodgerblue4")) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_y_continuous( labels = comma_format())
## URL CLICKS BY DAY
data$url.ctr = data$url.clicks/data$impressions
url_by_day = ddply(data, .(weekday), summarise, average = sum(url.clicks)/sum(impressions),url.ctr = sum(url.clicks)/sum(impressions), count = length(url.clicks))
top_url_day = top_n_for_day(url_by_day, 7)$weekday
url_by_day = designate_top(url_by_day, top_url_day)
day_url_t = weekday_t(data, "url.ctr", top_url_day)
ggplot(url_by_day) + geom_bar(aes(weekday, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Click Rate") + ggtitle("Best Day of Week to Maximize Click Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
## ENGAGEMENT RATE BY DAY
er_by_day = ddply(data, .(weekday), summarise, average = mean(engagement.rate))
top_er_day = top_n_for_day(er_by_day, 7)$weekday
er_by_day = designate_top(er_by_day, top_er_day)
day_er_t = weekday_t(data, "engagement.rate", top_er_day)
ggplot(er_by_day) + geom_bar(aes(weekday, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Engagement Rate") + ggtitle("Best Day of Week to Maximize Engagement Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = percent_format())
## IMPRESSIONS BY DAY
imp_by_day = ddply(data, .(weekday), summarise, average = mean(impressions))
top_imp_day = top_n_for_day(imp_by_day, 7)$weekday
imp_by_day = designate_top(imp_by_day, top_imp_day)
day_imp_t = weekday_t(data, "impressions", top_imp_day)
ggplot(imp_by_day) + geom_bar(aes(weekday, average, fill=top), stat="identity")+ xlab("Time of Day") + ylab("Engagement Rate") + ggtitle("Best Day of Week to Maximize Engagement Rate")+ theme(panel.grid.major.y = element_line(colour="gray"), panel.grid.minor.x = element_blank(), plot.title = element_text(size= rel(2))) + theme(legend.text = element_text(size = 18), axis.text = element_text(size=24))+ expand_limits(y=0) + theme(panel.background = element_rect(fill = 'white'), axis.title.y=element_text(size=24)) + theme(legend.position="bottom")+ theme(strip.text.x = element_text(size = 14), strip.background = element_rect(fill='white'))+ theme(text=element_text(family="News Gothic MT", face="bold"))+ annotate("text", x = Inf, y = -Inf, label = "tomtunguz.com",hjust=1.1, vjust=-1.1, col="gray", cex=6,fontface = "bold", alpha = 0.8)+ scale_fill_gradient2(low="dodgerblue", high="red", mid="dodgerblue") + theme(legend.position="none")+ scale_y_continuous( labels = comma_format())
### CORRELATIONS
cor(data$impressions, data$engagement.rate)
cor(data$retweets, data$impressions)
cor(data$replies, data$impressions)
data$tweet.length = nchar(as.character(data$Tweet.text[1]))
@manualwise
Copy link

Hey Tom,
thanks for this great script. Clemens and me took the time to make it a bit easier understandable as we feel that this script can be very useful for people that might not have a lot of experience with scripting.

We've extracted a few variables to facilitate configuration, make the charts show your chosen time zone and improved documentation to make it easier to get started with the script.

The updated gist can be found here:
https://gist.github.com/manualwise/c92fda9d884f47f43794

Thanks again,
Manuel

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment