This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from_db = load_tweets_db() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tweets = searchTwitter("#rstats", n=500) | |
store_tweets_db(tweets) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
register_db_backend(dbi_connection) | |
# or create a sqlite connection | |
register_sqlite_backend("/path/to/sqlite/file") | |
# or create a mysql connection | |
register_mysql_backend("my_database", "hostname", "username", "password") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
load("code2013.rda") # 6028 tweets | |
filtered_tweets = strip_retweets(code2013) # 5006 tweets | |
statuses = sapply(filtered_tweets, function(x) x$getText()) | |
# Read in the TIOBE data | |
tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE) | |
tiobe_langs = tolower(tiobe[, "lang"]) | |
# Looking at the TIOBE listings and some of the tweet data, massage some of the entries |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
code2013_lang_table$code2013_rank = 1:nrow(code2013_lang_table) | |
code2013_lang_table$tiobe_rank = match(code2013_lang_table$code2013_langs, tiobe[, "lang"]) | |
# Make a scatterplot of the ranking differences | |
png(file="code2013_tiobe_scatter.png", width=640, height=640) | |
ggplot(code2013_lang_table, aes(x=code2013_rank, y=tiobe_rank, color=code2013_tier)) + | |
geom_text(aes(label=code2013_langs), size=3) + | |
ylab("TIOBE Rank") + xlab("#code2013 rank") + | |
ggtitle("#code2013 vs TIOBE rankings") | |
dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
png(file="code2013_tiobe.png", width=640, height=640) | |
ggplot(code2013_lang_table, aes(x=code2013_langs, y=Count, fill=code2013_tier)) + | |
geom_bar(stat="identity") + | |
xlab("Language") + ylab("Count") + | |
ggtitle("#code2013 Languages Sorted By TIOBE Rankings") + | |
coord_flip() | |
dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tokenize each status. split on comma period or whitespace | |
status_tokens = strsplit(statuses, ",|\\.|\\s+") | |
matching_tokens = sapply(status_tokens, function(x) { | |
x[which(x %in% tiobe_langs)] | |
}) | |
# Now have the languages mentioned in #code2013 which are in TIOBE | |
code2013_langs = unlist(matching_tokens) | |
code2013_lang_table = as.data.frame(sort(table(code2013_langs), decreasing=TRUE)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# I want to convert this all to lowercase but there are 67 with weird encodings | |
bad_statuses = numeric() | |
lowercase_statuses = character() | |
for (i in seq_along(statuses)) { | |
tl = try(tolower(statuses[[i]]), silent=TRUE) | |
if (inherits(tl, "try-error")) { | |
bad_statuses = c(bad_statuses, i) | |
} else { | |
lowercase_statuses = c(lowercase_statuses, tl) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Read in the TIOBE data | |
tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE) | |
tiobe_langs = tolower(tiobe[, "lang"]) | |
# Looking at the TIOBE listings and some of the tweet data, massage some of the entries | |
# here. This won't be perfect but will help a little bit | |
replace_statuses = function(statuses, was, is) { | |
gsub(was, is, statuses, ignore.case=TRUE) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
load("code2013.rda") | |
# Find/remove the tweets flagged as retweets | |
is_retweets = which(sapply(code2013, function(x) x$getIsRetweet())) | |
if (length(is_retweets) > 0) { | |
filtered_tweets = code2013[-is_retweets] | |
} else { | |
filtered_tweets = code2013 | |
} |