Last active
July 12, 2018 16:13
-
-
Save JulianNorton/2d49d0529500db56ba262fce0fb1acc3 to your computer and use it in GitHub Desktop.
retention analysis by cohort
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
org_data = open("data2.csv") | |
data_alpha, data_beta, org_first_seen = list(), list(), list() | |
with open('data2.csv', 'rb') as csvfile_A: | |
csvfile_A = csv.reader(csvfile_A) | |
for item in csvfile_A: | |
# print(item) | |
data_alpha.append(item) | |
def parse_data(data): | |
parsed_data = list() | |
# Get the ordered date and Org ID, and Order status | |
parsed_data = [data[1], data[3], data[4]] | |
# Convert ordered date to yyyy-mm | |
parsed_data[0] = parsed_data[0][1:8] | |
# convert order status to lowercase and delete extra space | |
parsed_data[1] = parsed_data[1][1:].lower() | |
return parsed_data | |
def generate_clean_data(raw_data, clean_data): | |
clean_data = list() | |
for item in raw_data: | |
clean_data.append(parse_data(item)) | |
return clean_data | |
def add_to_cohort(data, existing_cohort, cohort_date): | |
order_date = data[0] | |
order_status = data[1] | |
org_id = data[2] | |
if order_date == cohort_date and order_status == 'completed' and org_id not in existing_cohort: | |
return True | |
else: | |
return False | |
data_beta = generate_clean_data(data_alpha, data_beta) | |
def generate_cohort(data_beta, cohort, date): | |
for item in data_beta: | |
if add_to_cohort(item, cohort, date) == True: | |
cohort.append(item[2]) | |
april_cohort, may_cohort, june_cohort = list(), list(), list() | |
generate_cohort(data_beta, april_cohort, '2018-04') | |
generate_cohort(data_beta, may_cohort, '2018-05') | |
generate_cohort(data_beta, june_cohort, '2018-06') | |
print(len(april_cohort)) | |
print(len(may_cohort)) | |
print(len(june_cohort)) | |
returning_orgs_april_may = 0 | |
for item in april_cohort: | |
if item in may_cohort: | |
# print(item) | |
returning_orgs_april_may += 1 | |
# print(returning_orgs_april_may, '!!!') | |
print('Retention is') | |
print(float(returning_orgs_april_may)) / float(len(april_cohort)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment