Skip to content

Instantly share code, notes, and snippets.

@ravishchawla
Created June 6, 2019 17:36
Show Gist options
  • Save ravishchawla/bad1bd7fb9da3406c1ed1625c6442ac6 to your computer and use it in GitHub Desktop.
Save ravishchawla/bad1bd7fb9da3406c1ed1625c6442ac6 to your computer and use it in GitHub Desktop.
'''Cleaning the *Transcript* dataset'''
transcript_event = transcript['event'].str.get_dummies();
transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];
# standardize "offer id" column names
def transcript_value_clean(x_dict):
if 'offer id' in x_dict:
x_dict['offer_id'] = x_dict['offer id'];
del x_dict['offer id'];
return x_dict;
transcript_values = transcript['value'].apply(lambda x: transcript_value_clean(x));
transcript_values = pd.DataFrame(list(transcript_values.values));
transcript_values['is_reward'] = transcript_values['reward'].apply(lambda x: int(not np.isnan(x)));
# merge amount and reward columns
transcript_values['is_amount'] = transcript_values['amount'].apply(lambda x: int(not np.isnan(x)));
transcript_values['amount'] = transcript_values[['amount', 'reward']].apply(lambda x: x[0] if np.isnan(x[1]) else x[1], axis=1)
# filling null offer ids with "0"
transcript_values['has_offer'] = transcript_values['offer_id'].apply(lambda x: int(not pd.isna(x)));
transcript_values['offer_id'] = transcript_values['offer_id'].apply(lambda x: '0' if pd.isna(x) else x);
transcript = pd.concat([transcript, transcript_values, transcript_event], axis=1);
transcript = transcript.drop(['value', 'event', 'reward'], axis=1);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment