Last active
August 29, 2015 14:18
-
-
Save jwinternheimer/707915a78c76a9e98ae4 to your computer and use it in GitHub Desktop.
Churn Rate Exploration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(dplyr) | |
library(tidyr) | |
library(ggplot2) | |
## Read CSV from Looker | |
churn <- read.table("~/Downloads/trailing_churn.csv",sep=",",header=T) | |
names(churn) <- c("date","awesome","business","overall") | |
## Format Data to correct types | |
churn$date <- as.Date(churn$date,format="%Y-%m-%d") | |
churn$overall <- as.numeric(sub("%", "", churn$overall)) | |
churn$awesome <- as.numeric(sub("%", "", churn$awesome)) | |
churn$business <- as.numeric(sub("%", "", churn$business)) | |
## Tidy data for analysis | |
churn_rates <- churn %>% | |
gather(type,count,-date) %>% | |
rename(churn_rate = count) | |
## Plot churn rates over time | |
churn_plot <-ggplot(churn_rates, aes(x=date, y=churn_rate, color=type)) + | |
geom_line(stat="identity",size=1) + geom_smooth(size=1) + fte_theme() + | |
labs(y="Churn Rate",x="Date",title="Churn Rates by Date") + | |
scale_y_continuous(breaks = round(seq(min(churn_rates$churn_rate), max(churn_rates$churn_rate), by = 0.25),1)) | |
## Plot churn rate distributions | |
churn_density <- ggplot(churn_rates, aes(x=churn_rate, fill=type)) + geom_density(alpha=0.5) + | |
labs(x="Churn Rate",title="Churn Density by Plan Type") + fte_theme() + | |
scale_x_continuous(breaks = round(seq(min(churn_rates$churn_rate), | |
max(churn_rates$churn_rate), by = 0.25),1)) | |
## Plot box plots of churn rates | |
churn_boxplots <- ggplot(churn_rates, aes(x=type, y=churn_rate)) + geom_boxplot(width=0.5) + | |
labs(x="Type",y="Churn Rate", title="Churn Rates by Plan Type") + fte_theme() + | |
scale_y_continuous(breaks = round(seq(min(churn_rates$churn_rate), max(churn_rates$churn_rate), by = 0.25),1)) | |
########################################################### | |
## Churn by Month Paying | |
########################################################### | |
## Import and Tidy Data | |
churn_by_paying <- read.table("~/Downloads/churn_by_months_paying.csv",sep=",",header=T) | |
names(churn_by_paying) <- c("date","0","1","2","3","4","5","6","7","8", | |
"9","10","11","12", "13","14","15") | |
churn_by_paying <- churn_by_paying[,c(1:17)] | |
## Gather Churn Rates | |
churn_by_month_paying <- churn_by_paying %>% | |
gather(month_paying,count,-date) %>% | |
rename(churn_rate=count) %>% | |
mutate(churn_rate = as.numeric(sub("%", "", churn_rate)), date=as.Date(date,"%Y-%m-%d")) | |
## Average Churn Rate by Month Paying | |
average_churn_by_paying <- churn_by_month_paying %>% | |
select(-date) | |
group_by(month_paying) %>% | |
summarise(avg_churn=mean(churn_rate),med_churn=median(churn_rate)) | |
## Plot Average Churn Rate by Month Paying | |
average_churn_plot <- ggplot(average_churn_by_paying, aes(x=month_paying, y=avg_churn)) + | |
geom_bar(stat="identity") + geom_text(aes(label=round(avg_churn,2)), vjust=-0.2) + fte_theme() + ylim(0,80) + | |
scale_x_discrete(breaks=c("0","1","2","3","4","5","6","7","8","9","10","11","12", "13","14","15")) + | |
labs(x="Months Paying", y="Average Churn Rate",title="Average Churn Rate by Months Paying") | |
## Plot Median Churn Rate by Month Paying | |
median_churn_plot <- ggplot(average_churn_by_paying, aes(x=month_paying, y=med_churn)) + | |
geom_bar(stat="identity") + geom_text(aes(label=round(med_churn,2)), vjust=-0.2) + fte_theme() + ylim(0,80) + | |
scale_x_discrete(breaks=c("0","1","2","3","4","5","6","7","8","9","10","11","12", "13","14","15")) + | |
labs(x="Months Paying", y="Median Churn Rate",title="Median Churn Rate by Months Paying") | |
########################################################### | |
## ggplot Theme | |
########################################################### | |
## ggplot Theme | |
fte_theme <- function() { | |
# Generate the colors for the chart procedurally with RColorBrewer | |
palette <- brewer.pal("Greys", n=9) | |
color.background = palette[2] | |
color.grid.major = palette[3] | |
color.axis.text = palette[6] | |
color.axis.title = palette[7] | |
color.title = palette[9] | |
# Begin construction of chart | |
theme_bw(base_size=9) + | |
# Set the entire chart region to a light gray color | |
theme(panel.background=element_rect(fill=color.background, color=color.background)) + | |
theme(plot.background=element_rect(fill=color.background, color=color.background)) + | |
theme(panel.border=element_rect(color=color.background)) + | |
# Format the grid | |
theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) + | |
theme(panel.grid.minor=element_blank()) + | |
theme(axis.ticks=element_blank()) + | |
theme(panel.grid.major.x = element_blank(),panel.grid.minor.x = element_blank()) + | |
# Format the legend, but hide by default | |
theme(legend.background = element_rect(fill=color.background)) + | |
theme(legend.text = element_text(size=15,color=color.axis.title)) + | |
# Set title and axis labels, and format these and tick marks | |
theme(plot.title=element_text(color=color.title, size=20, vjust=1.25)) + | |
theme(axis.text.x=element_text(size=10,color=color.axis.text)) + | |
theme(axis.text.y=element_text(size=10,color=color.axis.text)) + | |
theme(axis.title.x=element_text(size=15,color=color.axis.title, vjust=0)) + | |
theme(axis.title.y=element_text(size=15,color=color.axis.title, vjust=1.25)) + | |
# Plot margins | |
theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm")) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment