Skip to content

Instantly share code, notes, and snippets.

@merltron-pa
Last active September 29, 2020 14:14
Show Gist options
  • Save merltron-pa/f28354583e8f002ede83c4ee49c18a52 to your computer and use it in GitHub Desktop.
Save merltron-pa/f28354583e8f002ede83c4ee49c18a52 to your computer and use it in GitHub Desktop.
multitouch_article_pt3
#Setting up the empty dataframe that will be filled with the attributed conversions
columns = ['date', 'utm_medium', 'conversions']
attributed_conversion_df = pd.DataFrame(columns=columns)
#looping over all the rows of the raw GA dataframe
for index, row in ga_raw_data.iterrows():
#looking for rows that have at least 1 conversion
if row['submitted_applications'] > 0:
#create a dataframe with the conversion raw and all former sessions of this user
single_conversion_df = ga_raw_data[(ga_raw_data['date'] <= pd.to_datetime(row["date"])) & (ga_raw_data['user_id'] == row["user_id"])]
#sorting by date and number of conversion ascending
single_conversion_df = single_conversion_df.sort_values(by=['date', 'submitted_applications'], ascending=True)
#rank all the sessions in the user history (from the first one to the conversion)
single_conversion_df['occurrences'] = single_conversion_df.groupby('user_id').cumcount() + 1
#set up a variable with the total number of sessions
occurrences = single_conversion_df.loc[(single_conversion_df['submitted_applications'] == row["submitted_applications"]) & (single_conversion_df['date'] == row["date"])& (single_conversion_df['utm_medium'] == row["utm_medium"]), 'occurrences'].iloc[0]
#set up a value that divides the conversion(s) by the number of session
conversion_per_row = single_conversion_df.loc[single_conversion_df['submitted_applications'] == row["submitted_applications"], 'submitted_applications'].iloc[0]/occurrences
#append the linear distributed conversion to each row
single_conversion_df['conversions'] = float(conversion_per_row)
#set the conversion date for each row
single_conversion_df['date'] = single_conversion_df.loc[(single_conversion_df['submitted_applications'] == row["submitted_applications"]) & (single_conversion_df['date'] == row["date"])& (single_conversion_df['utm_medium'] == row["utm_medium"]), 'occurrences'].iloc[0]
#filter the relevant columns for the attributed dataframe
single_conversion_df = single_conversion_df.filter(items=['date', 'utm_medium', 'conversions'])
#append the conversion dataframe to the main dataframe
attributed_conversion_df = attributed_conversion_df.append(single_conversion_df)
attributed_conversion_df.head(50)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment