Skip to content

Instantly share code, notes, and snippets.

@pengelbrecht
Created September 30, 2014 15:58
Show Gist options
  • Save pengelbrecht/9763dfec14ba31d33075 to your computer and use it in GitHub Desktop.
Save pengelbrecht/9763dfec14ba31d33075 to your computer and use it in GitHub Desktop.
R Cohort Analysis Example
require(RMySQL)
require(ggplot2)
require(scales)
myusername = "peter"
mypassword = "sekret"
system('ssh -f [email protected] -L 3306:localhost:3306 -N -o ExitOnForwardFailure=yes')
con <- dbConnect(MySQL(),
user=myusername, password=mypassword,
dbname="mainframe_production", host="127.0.0.1", port=3306)
query = "
SELECT date_format(companies.activate_at, '%Y-%m') AS cohort, PERIOD_DIFF(date_format(r.revenue_month, '%Y%m'), date_format(companies.activate_at, '%Y%m')) AS age, round(sum(r.sum_to_vat_cents + r.sum_no_vat_cents)/100) AS revenue
FROM revenues AS r
JOIN companies ON companies.id = r.company_id
WHERE r.kind = 'subscription' AND revenue_month < date_format(now(), '%Y-%m-01')
GROUP BY cohort, age
HAVING age >= 0 AND cohort >= '2014-01'
ORDER BY cohort
"
rs <- dbSendQuery(con, query)
mcd <- fetch(rs, n=-1)
ggplot(mcd, aes(x=age, y=revenue, group=cohort, colour=cohort)) +
geom_line(size = 1.5) +
geom_point(size=3, shape=21, solid=FALSE, fill="white") +
ylab("Subscription Revenue") + xlab("Age in months") +
theme_minimal() +
theme(axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15)) +
scale_y_continuous(labels = comma)
dbClearResult(rs)
dbDisconnect(con)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment