Jeff Gentry geoffjentry

153 followers · 0 following

Cambridge, MA

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

geoffjentry / gist:9178556

Last active August 29, 2015 13:56

from_db = load_tweets_db()

geoffjentry / gist:9178541

Created February 23, 2014 22:54

	tweets = searchTwitter("#rstats", n=500)

	store_tweets_db(tweets)

geoffjentry / gist:9178523

Created February 23, 2014 22:53

	register_db_backend(dbi_connection)

	# or create a sqlite connection

	register_sqlite_backend("/path/to/sqlite/file")

	# or create a mysql connection

	register_mysql_backend("my_database", "hostname", "username", "password")

geoffjentry / gist:8620150

Created January 25, 2014 17:33

	load("code2013.rda") # 6028 tweets

	filtered_tweets = strip_retweets(code2013) # 5006 tweets
	statuses = sapply(filtered_tweets, function(x) x$getText())

	# Read in the TIOBE data
	tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE)
	tiobe_langs = tolower(tiobe[, "lang"])

	# Looking at the TIOBE listings and some of the tweet data, massage some of the entries

geoffjentry / gist:8230289

Last active January 2, 2016 01:29

	code2013_lang_table$code2013_rank = 1:nrow(code2013_lang_table)
	code2013_lang_table$tiobe_rank = match(code2013_lang_table$code2013_langs, tiobe[, "lang"])

	# Make a scatterplot of the ranking differences
	png(file="code2013_tiobe_scatter.png", width=640, height=640)
	ggplot(code2013_lang_table, aes(x=code2013_rank, y=tiobe_rank, color=code2013_tier)) +
	geom_text(aes(label=code2013_langs), size=3) +
	ylab("TIOBE Rank") + xlab("#code2013 rank") +
	ggtitle("#code2013 vs TIOBE rankings")
	dev.off()

geoffjentry / gist:8226975

Created January 2, 2014 21:09

	library(ggplot2)
	png(file="code2013_tiobe.png", width=640, height=640)
	ggplot(code2013_lang_table, aes(x=code2013_langs, y=Count, fill=code2013_tier)) +
	geom_bar(stat="identity") +
	xlab("Language") + ylab("Count") +
	ggtitle("#code2013 Languages Sorted By TIOBE Rankings") +
	coord_flip()
	dev.off()

geoffjentry / gist:8226512

Created January 2, 2014 20:46

create data.frame

	# tokenize each status. split on comma period or whitespace
	status_tokens = strsplit(statuses, ",\|\\.\|\\s+")

	matching_tokens = sapply(status_tokens, function(x) {
	x[which(x %in% tiobe_langs)]
	})

	# Now have the languages mentioned in #code2013 which are in TIOBE
	code2013_langs = unlist(matching_tokens)
	code2013_lang_table = as.data.frame(sort(table(code2013_langs), decreasing=TRUE))

geoffjentry / gist:8226425

Created January 2, 2014 20:42

remove weird encodings

	# I want to convert this all to lowercase but there are 67 with weird encodings
	bad_statuses = numeric()
	lowercase_statuses = character()
	for (i in seq_along(statuses)) {
	tl = try(tolower(statuses[[i]]), silent=TRUE)
	if (inherits(tl, "try-error")) {
	bad_statuses = c(bad_statuses, i)
	} else {
	lowercase_statuses = c(lowercase_statuses, tl)
	}

geoffjentry / gist:8226310

Created January 2, 2014 20:35

massage data

	# Read in the TIOBE data
	tiobe = read.csv("tiobe.csv", stringsAsFactors=FALSE)
	tiobe_langs = tolower(tiobe[, "lang"])

	# Looking at the TIOBE listings and some of the tweet data, massage some of the entries
	# here. This won't be perfect but will help a little bit
	replace_statuses = function(statuses, was, is) {
	gsub(was, is, statuses, ignore.case=TRUE)
	}

geoffjentry / gist:8226180

Last active March 2, 2021 06:36

Remove retweets

	load("code2013.rda")

	# Find/remove the tweets flagged as retweets
	is_retweets = which(sapply(code2013, function(x) x$getIsRetweet()))

	if (length(is_retweets) > 0) {
	filtered_tweets = code2013[-is_retweets]
	} else {
	filtered_tweets = code2013
	}