Skip to content

Instantly share code, notes, and snippets.

@karamanbk
Last active June 3, 2020 05:35
Show Gist options
  • Save karamanbk/03dc42f3000d21886fea145f67eca1a9 to your computer and use it in GitHub Desktop.
Save karamanbk/03dc42f3000d21886fea145f67eca1a9 to your computer and use it in GitHub Desktop.
#create our retention table again with crosstab() and add firs purchase year month view
tx_retention = pd.crosstab(tx_user_purchase['CustomerID'], tx_user_purchase['InvoiceYearMonth']).reset_index()
tx_retention = pd.merge(tx_retention,tx_min_purchase[['CustomerID','MinPurchaseYearMonth']],on='CustomerID')
new_column_names = [ 'm_' + str(column) for column in tx_retention.columns[:-1]]
new_column_names.append('MinPurchaseYearMonth')
tx_retention.columns = new_column_names
#create the array of Retained users for each cohort monthly
retention_array = []
for i in range(len(months)):
retention_data = {}
selected_month = months[i]
prev_months = months[:i]
next_months = months[i+1:]
for prev_month in prev_months:
retention_data[prev_month] = np.nan
total_user_count = tx_retention[tx_retention.MinPurchaseYearMonth == selected_month].MinPurchaseYearMonth.count()
retention_data['TotalUserCount'] = total_user_count
retention_data[selected_month] = 1
query = "MinPurchaseYearMonth == {}".format(selected_month)
for next_month in next_months:
new_query = query + " and {} > 0".format(str('m_' + str(next_month)))
retention_data[next_month] = np.round(tx_retention.query(new_query)['m_' + str(next_month)].sum()/total_user_count,2)
retention_array.append(retention_data)
tx_retention = pd.DataFrame(retention_array)
tx_retention.index = months
#showing new cohort based retention table
tx_retention
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment