Created
February 24, 2014 16:11
-
-
Save jdavidson/9191278 to your computer and use it in GitHub Desktop.
Analysis of follow on rates by investor class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(ggthemes) | |
library(plyr) | |
library(dplyr) | |
library(lubridate) | |
library(scales) | |
library(data.table) | |
library(reshape2) | |
options(scipen=999) | |
options(stringsAsFactors = FALSE) | |
# fix strange difference in units from diff | |
my.diff <- function(x, lag=1) { | |
n <- length(x) | |
round(difftime(x[(1+lag):n], x[1:(n-lag)], units="days") / 30) | |
} | |
rounds <- read.csv("2014-01-06-crunchbase_monthly_export_rounds.csv") | |
# dedup | |
rounds <- data.table(rounds) | |
setkeyv(rounds, c("company_name", "funded_at", "funding_round_type")) | |
rounds <- unique(rounds) | |
rounds <- subset(rounds, funded_month != "1960-01") | |
# fix strange date data | |
rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-")) | |
rounds <- arrange(rounds, funded_at) | |
rounds <- rounds[, id := seq_along(funded_at), by=company_name] | |
rounds <- rounds[, diff := c(my.diff(funded_at), NA), by=company_name] | |
# clean up rounds | |
rounds <- filter(rounds, company_country_code == "USA", company_state_code != "") | |
rounds <- filter(rounds, !is.na(raised_amount_usd)) | |
rounds$round_raised_amount_usd <- cut(rounds$raised_amount_usd, breaks=c(0, 1000000, 5000000, 10000000, Inf), right=FALSE) | |
rounds$round_raised_amount_usd <- mapvalues(rounds $round_raised_amount_usd, from = c("[0,1e+06)", "[1e+06,5e+06)", "[5e+06,1e+07)", "[1e+07,Inf)"), to = c("$0-1M", "$1-5M", "$5-10M", "$10+")) | |
categories <- read.csv("categories.csv") | |
names(categories)[1] <- c("company_category_code") | |
rounds <- data.table(inner_join(rounds, select(categories, company_category_code, broad_category))) | |
rounds <- filter(rounds, broad_category %in% c("enterprise", "consumer")) | |
# | |
investments <- read.csv("2014-01-06-crunchbase_monthly_export_investments.csv") | |
investments <- subset(investments, funded_month != "1960-01") | |
investments$funded_at <- ymd(paste(investments$funded_month, "01", sep="-")) | |
investments <- data.table(investments) | |
setkeyv(investments, c("company_name", "funded_at", "funding_round_type", "investor_name")) | |
investments <- unique(investments) | |
investments$funded_month <- floor_date(ymd(paste(investments $funded_month, "01", sep="-")), "month") | |
investments$funded_year <- floor_date(investments $funded_month, "year") | |
investments$investor_type <- unlist(lapply(strsplit(investments$investor_permalink, "/"), function(x) {x[2]})) | |
investments$investor_type <- as.factor(sub("-organization", "", investments$investor_type)) | |
investments <- filter(investments, funding_round_type %in% c("venture", "angel", "crowdfunding", "series-a", "series-b", "series-c+")) | |
investors <- filter(investments, funded_at > ymd("2008-01-01"), investor_type == "financial") %.% group_by(investor_name) %.% summarise(total_rounds=n()) | |
investors_rounds <- filter(investments, funded_at > ymd("2008-01-01"), investor_type == "financial") %.% group_by(investor_name, funding_round_type) %.% summarise(rounds=n()) | |
investors_rounds <- join(investors_rounds, investors) | |
investors_rounds <- transform(investors_rounds, perc_rounds = rounds / total_rounds) | |
seed_funds <- as.data.frame(filter(investors_rounds, total_rounds > 20, funding_round_type == "angel", perc_rounds > .2)) %.% arrange(desc(perc_rounds)) %.% head(100) | |
top_institutional_investors <- as.data.frame(filter(investments, funded_at > ymd("2008-01-01"), investor_type == "financial", !(investor_name %in% seed_funds$investor_name))) %.% group_by(investor_name) %.% summarise(total_rounds=n()) %.% arrange(desc(total_rounds)) %.% head(100) | |
investments <- join(investments, select(rounds, company_name,funded_at,funding_round_type,diff)) | |
seed_fund_success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2008-01-01"), investor_type == "financial") %.% group_by(seed_fund = investor_name %in% seed_funds$investor_name, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds) | |
ggplot(filter(seed_fund_success, year < 2014), aes(x=as.factor(year), y= follow_on_rate, fill=seed_fund)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format()) | |
top_fund_success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2006-01-01"), investor_type == "financial") %.% group_by(top_fund = investor_name %in% top_institutional_investors$investor_name, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds) | |
ggplot(filter(top_fund_success, year < 2014), aes(x=as.factor(year), y= follow_on_rate, fill= top_fund)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format()) | |
investments <- transform(investments, fund_class = ifelse(investor_name %in% top_institutional_investors$investor_name, "top_fund", ifelse(investor_name %in% seed_funds$investor_name, "seed_fund", as.character(investor_type)))) | |
success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2008-01-01"), investor_type %in% c("financial", "person")) %.% group_by(fund_class, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds) | |
aplot <- ggplot(filter(success, year < 2013), aes(x=as.factor(year), y= follow_on_rate, fill= fund_class)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format()) + ggtitle("Angel Follow On Rate") + xlab("") + ylab("Follow On Rate") | |
ggsave("angel-follow-on.png", aplot, width=640 / 72, height=400 / 72, dpi=72) | |
investor_type_success <- filter(investments, funding_round_type == "angel", funded_at > ymd("2008-01-01")) %.% group_by(investor_type, year=year(funded_at)) %.% summarise(rounds=n(), follow_on=sum(!is.na(diff))) %.% transform(follow_on_rate = follow_on / rounds) | |
tplot <- ggplot(filter(investor_type_success, year < 2014), aes(x=as.factor(year), y= follow_on_rate, fill= investor_type)) + geom_bar(stat="identity", position="dodge") + scale_y_continuous(labels = percent_format()) + ggtitle("Angel Follow On Rate") + xlab("") + ylab("Follow On Rate") | |
ggsave("angel-follow-on-type.png", tplot, width=640 / 72, height=400 / 72, dpi=72) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment