Created
April 1, 2014 00:35
-
-
Save jdavidson/9905430 to your computer and use it in GitHub Desktop.
Analysis of crunchbase acquisition data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(ggthemes) | |
library(plyr) | |
library(dplyr) | |
library(lubridate) | |
library(scales) | |
library(data.table) | |
library(reshape2) | |
options(scipen=999) | |
options(stringsAsFactors = FALSE) | |
companies <- read.csv("2014-03-04-crunchbase_monthly_export_companies.csv") | |
companies <- data.table(companies) | |
companies$founded_month <- floor_date(ymd(paste(companies$founded_month, "01", sep="-")), "month") | |
companies$founded_year <- floor_date(companies$founded_month, "year") | |
categories <- read.csv("categories.csv") | |
companies <- data.table(inner_join(companies, select(categories, category_code, broad_category))) | |
companies <- filter(companies, broad_category %in% c("enterprise", "consumer")) | |
# clean up companies | |
companies <- filter(companies, country_code == "USA", state_code != "", founded_year > ymd("2002-01-01")) | |
companies$region <- toupper(gsub(" - Other", "", companies$region)) | |
companies$city <- toupper(gsub("[^[:alnum:]///' ]", "", companies$city)) | |
companies[region == "SF BAY"]$state_code <- "CA" | |
companies[region == "NEW YORK"]$state_code <- "NY" | |
companies[region == "LOS ANGELES"]$state_code <- "CA" | |
companies <- filter(companies, !region %in% c("UNKNOWN", "TBD")) | |
rounds <- read.csv("2014-03-04-crunchbase_monthly_export_rounds.csv") | |
# dedup | |
rounds <- data.table(rounds) | |
setkeyv(rounds, c("company_name", "funded_at", "funding_round_type")) | |
rounds <- unique(rounds) | |
rounds <- subset(rounds, funded_month != "1960-01") | |
# fix strange date data | |
rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-")) | |
# clean up rounds | |
rounds <- filter(rounds, company_country_code == "USA", company_state_code != "") | |
categories <- read.csv("categories.csv") | |
names(categories)[1] <- c("company_category_code") | |
rounds <- data.table(inner_join(rounds, select(categories, company_category_code, broad_category))) | |
rounds <- filter(rounds, broad_category %in% c("enterprise", "consumer")) | |
rounds <- filter(rounds, !is.na(raised_amount_usd)) | |
rounds <- filter(rounds, funding_round_type %in% c("venture", "angel", "series-a", "series-b", "series-c+")) | |
last_funding_round_type <- rounds %.% arrange(company_name, funded_at) %.% group_by(company_name) %.% summarise(last_funding_round_type=last(funding_round_type)) | |
setnames(last_funding_round_type, "company_name", "name") | |
companies <- join(companies, last_funding_round_type) | |
companies <- filter(companies, !is.na(last_funding_round_type)) | |
acquisitions <- read.csv("2014-03-04-crunchbase_monthly_export_acquisitions.csv") | |
acquisitions <- data.table(acquisitions) | |
acquisitions$acquired_month <- floor_date(ymd(paste(acquisitions$acquired_month, "01", sep="-")), "month") | |
acquisitions$acquired_year <- floor_date(acquisitions$acquired_month, "year") | |
acquisitions <- filter(acquisitions, price_currency_code == "USD" || is.na(price_currency_code)) | |
acquisitions$price_amount <- as.numeric(acquisitions$price_amount) | |
setnames(acquisitions, "company_name", "name") | |
companies <- join(companies, select(acquisitions, name, acquirer_name, acquired_at, acquired_month, acquired_year, price_amount, price_currency_code)) | |
year_status <- companies %.% group_by(founded_year=founded_year) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n()) | |
ggplot(year_status, aes(x=founded_year, y=acquired/companies)) + geom_point() | |
year_status <- filter(companies, acquired_year > ymd("2005-01-01")) %.% group_by(acquired_year=acquired_year) %.% summarise(companies=n()) | |
acqplot <- ggplot(year_status, aes(x=acquired_year, y=companies)) + geom_point() + ylim(0, max(year_status$companies)) + xlab("Acquisition Year") + ylab("Acquisitions") + ggtitle("Crunchbase Covered Acquisitions") | |
ggsave("acquisitions.png", acqplot, width=640 / 72, height=400 / 72, dpi=72) | |
top_categories <- companies %.% group_by(category_code=category_code) %.% summarise(companies=n()) %.% arrange(desc(companies)) %.% head(9) | |
category_status <- filter(companies, category_code %in% top_categories$category_code) %.% group_by(founded_year=founded_year, category_code=category_code) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n()) | |
ggplot(category_status, aes(x=founded_year, y=acquired/companies, group=category_code, color=category_code)) + geom_point() | |
category_status <- filter(companies, category_code %in% top_categories$category_code, founded_year > ymd("2006-01-01"), founded_year < ymd("2011-01-01")) %.% group_by(category_code=category_code) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n()) | |
catplot <- ggplot(category_status, aes(x=category_code, y=acquired/companies)) + geom_bar(stat="identity") + ylab("Acquisition Rate") + xlab("Category") + ggtitle("Category Acquisition Rates") | |
ggsave("category_acquisition.png", catplot, width=640 / 72, height=400 / 72, dpi=72) | |
category_status <- companies %.% group_by(founded_year=founded_year, broad_category=broad_category) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n()) | |
ggplot(category_status, aes(x=founded_year, y=acquired/companies, group=broad_category, color=broad_category)) + geom_point() | |
catplot <- ggplot(filter(category_status, founded_year < ymd("2013-01-01")), aes(x=founded_year, y=acquired/companies, fill=broad_category)) + geom_bar(stat="identity", position="dodge") + ylab("Acquisition Rate") + xlab("Founded Year") + ggtitle("Category Acquisition Rates Over Time") | |
ggsave("category_year_acquisition.png", catplot, width=640 / 72, height=400 / 72, dpi=72) | |
category_status <- companies %.% group_by(broad_category=broad_category) %.% summarise(acquired=sum(!is.na(acquired_at)), companies=n()) | |
ggplot(category_status, aes(x=broad_category, y=acquired/companies)) + geom_bar(stat="identity") | |
category_status.pt <- melt(data.frame(category_status), "broad_category") | |
ggplot(category_status.pt, aes(x=broad_category, y=value, fill=variable)) + geom_bar(stat="identity", position="dodge") | |
last_funding_round_type_total <- companies %.% group_by(founded_year=founded_year, last_funding_round_type=last_funding_round_type) %.% summarise(companies=n(), acquired=sum(!is.na(acquired_at)), reported=sum(!is.na(price_amount)), price_amount=sum(price_amount, na.rm=T)) | |
ggplot(filter(last_funding_round_type_total, last_funding_round_type %in% c("series-a", "series-b", "series-c+")), aes(x=founded_year, y=acquired/companies, color=last_funding_round_type, group=last_funding_round_type)) + geom_point() | |
companies <- transform(companies, delta=year(acquired_year) - year(founded_year)) | |
age <- filter(companies, acquired_year > ymd("2005-01-01"), delta >= 0) %.% | |
group_by(acquired_year=acquired_year, founded_year=founded_year, delta=delta) %.% | |
summarise(companies=n()) | |
ggplot(age, aes(x=acquired_year, y=companies, fill=as.factor(founded_year))) + geom_bar(stat="identity") | |
ggplot(age, aes(x=acquired_year, y=companies, fill=as.factor(delta))) + geom_bar(stat="identity") | |
age_summary <- filter(companies, acquired_year > ymd("2005-01-01"), delta >= 0) %.% | |
group_by(acquired_year=acquired_year) %.% | |
summarise(companies=n(), avg_delta=mean(delta), med_delta=as.double(median(delta))) | |
ageplot <- ggplot(filter(age_summary, acquired_year > ymd("2008-01-01")), aes(x=acquired_year, y=avg_delta)) + geom_point() + geom_smooth(method="lm") + ylab("Average LifeTime (Years)") + xlab("Acquisition Year") | |
ggsave("avg_acquisition.png", ageplot, width=640 / 72, height=400 / 72, dpi=72) | |
category_age_summary <- filter(companies, acquired_year > ymd("2005-01-01"), delta >= 0) %.% | |
group_by(acquired_year=acquired_year, broad_category=broad_category) %.% | |
summarise(companies=n(), avg_delta=mean(delta), med_delta=as.double(median(delta))) | |
cageplot <- ggplot(filter(category_age_summary, acquired_year > ymd("2008-01-01")), aes(x=acquired_year, y=avg_delta, color=broad_category)) + geom_point() + geom_smooth(method="lm") + ylab("Average LifeTime (Years)") + xlab("Acquisition Year") | |
ggsave("avg_category_acquisition_lifetime.png", cageplot, width=640 / 72, height=400 / 72, dpi=72) | |
avg_delta=mean(delta), med_delta=as.double(median(delta) | |
price_summary <- filter(companies, acquired_year > ymd("2005-01-01"), !is.na(price_amount)) %.% | |
group_by(acquired_year=acquired_year) %.% | |
summarise(companies=n(), | |
raised=sum(!is.na(funding_total_usd)), | |
price_amount=sum(price_amount), | |
funding_total_usd=sum(funding_total_usd, na.rm=T), | |
avg_price_amount=mean(price_amount), | |
avg_funding_total_usd=mean(funding_total_usd), | |
med_price_amount=as.double(median(price_amount)), | |
med_funding_total_usd=as.double(median(funding_total_usd))) | |
sub_price_summary <- select(filter(price_summary, acquired_year > ymd("2008-01-01")), acquired_year, avg_price_amount, med_price_amount) | |
sub_price_summary.pt <- melt(data.frame(sub_price_summary), "acquired_year") | |
priceplot <- ggplot(sub_price_summary.pt, aes(x=year(acquired_year), y=value / 1e6, color=variable)) + geom_point() + ylab("$ (Millions)") + xlab("Acquisition Year") + scale_y_continuous(labels=dollar_format()) + ggtitle("Avg vs Median Acquisition Price") | |
ggsave("avg_acquisition.png", priceplot, width=640 / 72, height=400 / 72, dpi=72) | |
momplot <- ggplot(filter(price_summary, acquired_year > ymd("2008-01-01")), aes(x=year(acquired_year), y=avg_price_amount / avg_funding_total_usd)) + geom_point() + ylab("Cash on Cash Multiple") + xlab("Acquisition Year") + ylim(0, 40) + ggtitle("Cash on Cash Multiples") | |
ggsave("money_multiple.png", momplot, width=640 / 72, height=400 / 72, dpi=72) | |
acqplot <- ggplot(filter(price_summary, acquired_year > ymd("2008-01-01")), aes(x=acquired_year, y=companies)) + geom_point() + ylab("Acquisitions") + xlab("Acquisition Year") + ggtitle("Companies with Reported Acquisitions") + ylim(0, max(price_summary$companies)) | |
ggsave("reported_acquisitions.png", acqplot, width=640 / 72, height=400 / 72, dpi=72) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment