Last active
September 29, 2020 14:14
-
-
Save merltron-pa/f28354583e8f002ede83c4ee49c18a52 to your computer and use it in GitHub Desktop.
multitouch_article_pt3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Setting up the empty dataframe that will be filled with the attributed conversions | |
columns = ['date', 'utm_medium', 'conversions'] | |
attributed_conversion_df = pd.DataFrame(columns=columns) | |
#looping over all the rows of the raw GA dataframe | |
for index, row in ga_raw_data.iterrows(): | |
#looking for rows that have at least 1 conversion | |
if row['submitted_applications'] > 0: | |
#create a dataframe with the conversion raw and all former sessions of this user | |
single_conversion_df = ga_raw_data[(ga_raw_data['date'] <= pd.to_datetime(row["date"])) & (ga_raw_data['user_id'] == row["user_id"])] | |
#sorting by date and number of conversion ascending | |
single_conversion_df = single_conversion_df.sort_values(by=['date', 'submitted_applications'], ascending=True) | |
#rank all the sessions in the user history (from the first one to the conversion) | |
single_conversion_df['occurrences'] = single_conversion_df.groupby('user_id').cumcount() + 1 | |
#set up a variable with the total number of sessions | |
occurrences = single_conversion_df.loc[(single_conversion_df['submitted_applications'] == row["submitted_applications"]) & (single_conversion_df['date'] == row["date"])& (single_conversion_df['utm_medium'] == row["utm_medium"]), 'occurrences'].iloc[0] | |
#set up a value that divides the conversion(s) by the number of session | |
conversion_per_row = single_conversion_df.loc[single_conversion_df['submitted_applications'] == row["submitted_applications"], 'submitted_applications'].iloc[0]/occurrences | |
#append the linear distributed conversion to each row | |
single_conversion_df['conversions'] = float(conversion_per_row) | |
#set the conversion date for each row | |
single_conversion_df['date'] = single_conversion_df.loc[(single_conversion_df['submitted_applications'] == row["submitted_applications"]) & (single_conversion_df['date'] == row["date"])& (single_conversion_df['utm_medium'] == row["utm_medium"]), 'occurrences'].iloc[0] | |
#filter the relevant columns for the attributed dataframe | |
single_conversion_df = single_conversion_df.filter(items=['date', 'utm_medium', 'conversions']) | |
#append the conversion dataframe to the main dataframe | |
attributed_conversion_df = attributed_conversion_df.append(single_conversion_df) | |
attributed_conversion_df.head(50) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment