Skip to content

Instantly share code, notes, and snippets.

@ESeufert
Last active June 25, 2025 12:46
Show Gist options
  • Save ESeufert/84c289461480b085aa63eff71db7b5eb to your computer and use it in GitHub Desktop.
Save ESeufert/84c289461480b085aa63eff71db7b5eb to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
## create the retention curve and cohort size vectors
r = np.array( [ 1, 0.75, 0.5, 0.3, 0.2, 0.15, 0.12 ] ) ## retention rates
c = np.array( [ 500, 600, 1000, 400, 350 ] ) ## cohort sizes
D_r = len( r )
D_c = len( c )
calendar_days = D_c + D_r - 1
## create the banded retention matrix, Z_banded
Z_banded = np.zeros( ( D_c, calendar_days ) ) ## shape D_c * D_c + D_r - 1
for i in range( D_c ):
start_idx = i
end_idx = min( i + D_r, calendar_days )
Z_banded[ i, start_idx:end_idx ] = r[ :end_idx - start_idx ]
## create the DAU_banded matrix and get the total DAU per calendar day
DAU_banded = ( c[ :, np.newaxis ] ) * Z_banded
total_DAU = DAU_banded.sum( axis=0 )
## plot
fig, axs = plt.subplots( 3, 1, figsize=( 12, 20 ) )
fig.patch.set_facecolor( 'white' )
colors = plt.cm.Pastel1( np.linspace( 0, 1, D_c ) )
## stacked bar chart of DAU per cohort, with total
days = np.arange( 1, calendar_days + 1 )
bottoms = np.zeros( calendar_days )
for i in range( D_c ):
axs[ 0 ].bar( days, DAU_banded[ i ], bottom=bottoms, label=f'Cohort {i+1}', color=colors[ i ] )
for j in range( calendar_days ):
if DAU_banded[ i, j ] > 0:
axs[0].text( j + 1, bottoms[ j ] + DAU_banded[ i, j ] / 2, f'{int(DAU_banded[i, j])}',
ha='center', va='center', fontsize=8 )
bottoms += DAU_banded[ i ]
for j in range( calendar_days ):
axs[0].text( j + 1, total_DAU[ j ] + 10, f'DAU: {int(total_DAU[j])}',
ha='center', va='bottom', fontsize=10, fontweight='bold' )
axs[ 0 ].set_ylim( 0, max( total_DAU ) * 1.1 )
axs[ 0 ].set_title( 'DAU by Cohort Over Time', fontsize=16, fontweight='bold' )
axs[ 0 ].set_xlabel( 'Calendar Day' )
axs[ 0 ].set_ylabel( 'Daily Active Users' )
axs[ 0 ].grid( True )
axs[ 0 ].set_axisbelow( True )
axs[ 0 ].legend()
## retention curve
axs[ 1 ].plot(np.arange( 1, D_r + 1 ), r, marker='o' )
axs[ 1 ].set_title( 'Retention Curve', fontsize=16, fontweight='bold' )
axs[ 1 ].set_xlabel( 'Days After Onboarding' )
axs[ 1 ].set_ylabel( 'Retention Rate' )
axs[ 1 ].yaxis.set_major_formatter( PercentFormatter( xmax=1, decimals=0 ) )
axs[ 1 ].grid( True )
## cohorts
axs[ 2 ].bar( np.arange( 1, D_c + 1 ), c, color='lightsteelblue' )
axs[ 2 ].set_title( 'Cohort Sizes', fontsize=16, fontweight='bold' )
axs[ 2 ].set_xlabel( 'Cohort' )
axs[ 2 ].set_ylabel( 'Size' )
axs[ 2 ].grid( True )
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment