Skip to content

Instantly share code, notes, and snippets.

@vsbuffalo
Created May 22, 2014 20:30
Show Gist options
  • Save vsbuffalo/de4656058337e95683e2 to your computer and use it in GitHub Desktop.
Save vsbuffalo/de4656058337e95683e2 to your computer and use it in GitHub Desktop.
Visualize your mentions over time
library(ggplot2)
library(lubridate)
library(dplyr)
library(reshape2)
myname <- "@vsbuffalo" # for removing later
d <- read.csv("tweets.csv", header=TRUE, stringsAsFactors=FALSE)
extractMentions <- function(x) {
gsub("[^@]*(@[a-zA-Z0-9_]+).*", "\\1", x, perl=TRUE)
}
getMentions <- function(x) {
words <- strsplit(x, " +")
mentions <- lapply(words, function(w) {
extractMentions(w[grep("@", w)])
})
all_users <- sort(unique(unlist(mentions)))
mentions <- lapply(mentions, function(m) factor(m, levels=all_users))
tmp <- do.call(rbind, lapply(mentions, table))
tmp
}
monyr <- function(x) {
x <- as.POSIXlt(x)
x$mday <- 1
as.Date(x)
}
mentions <- getMentions(d$text)
csums_mentions <- colSums(mentions)
# tweek how many folks you see here:
# mentions_subset <- mentions[, csums_mentions > quantile(csums_mentions, probs=0.98)]
mentions_subset <- mentions[, csums_mentions > 50]
times <- parse_date_time(d$timestamp, "ymd_hms z*!")
dm <- data.frame(time=times, year=year(times), month=month(times))
dm <- cbind(dm, mentions_subset)
dmelt <- melt(dm, id.vars=c('time', 'year', 'month'))
xx <- dmelt %.% group_by(monyr=monyr(time), variable) %.% summarise(ntweets = sum(value))
p <- ggplot(xx[xx$variable != myname,])
p <- p + geom_line(aes(x=monyr, group=variable, color=variable, y=ntweets))
p <- p + xlab("") + ylab("tweets per month") + scale_color_discrete("mentions")
p
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment