ravishchawla · June 6, 2019 17:36
diff --git a/starbucks_post_5.py b/starbucks_post_5.py
 '''Cleaning the *Transcript* dataset'''
 transcript_event = transcript['event'].str.get_dummies();
 transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];

 # standardize "offer id" column names
 def transcript_value_clean(x_dict):
    if 'offer id' in x_dict:
        x_dict['offer_id'] = x_dict['offer id'];
        del x_dict['offer id'];
    return x_dict;

 transcript_values = transcript['value'].apply(lambda x: transcript_value_clean(x));
 transcript_values = pd.DataFrame(list(transcript_values.values));
 transcript_values['is_reward'] = transcript_values['reward'].apply(lambda x: int(not np.isnan(x)));

 # merge amount and reward columns
 transcript_values['is_amount'] = transcript_values['amount'].apply(lambda x: int(not np.isnan(x)));
 transcript_values['amount'] = transcript_values[['amount', 'reward']].apply(lambda x: x[0] if np.isnan(x[1]) else x[1], axis=1)

 # filling null offer ids with "0"
 transcript_values['has_offer'] = transcript_values['offer_id'].apply(lambda x: int(not pd.isna(x)));
 transcript_values['offer_id'] = transcript_values['offer_id'].apply(lambda x: '0' if pd.isna(x) else x);

 transcript = pd.concat([transcript, transcript_values, transcript_event], axis=1);
 transcript = transcript.drop(['value', 'event', 'reward'], axis=1);
	'''Cleaning the Transcript dataset'''
	transcript_event = transcript['event'].str.get_dummies();
	transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];

	# standardize "offer id" column names
	def transcript_value_clean(x_dict):
	if 'offer id' in x_dict:
	x_dict['offer_id'] = x_dict['offer id'];
	del x_dict['offer id'];
	return x_dict;

	transcript_values = transcript['value'].apply(lambda x: transcript_value_clean(x));
	transcript_values = pd.DataFrame(list(transcript_values.values));
	transcript_values['is_reward'] = transcript_values['reward'].apply(lambda x: int(not np.isnan(x)));

	# merge amount and reward columns
	transcript_values['is_amount'] = transcript_values['amount'].apply(lambda x: int(not np.isnan(x)));
	transcript_values['amount'] = transcript_values[['amount', 'reward']].apply(lambda x: x[0] if np.isnan(x[1]) else x[1], axis=1)

	# filling null offer ids with "0"
	transcript_values['has_offer'] = transcript_values['offer_id'].apply(lambda x: int(not pd.isna(x)));
	transcript_values['offer_id'] = transcript_values['offer_id'].apply(lambda x: '0' if pd.isna(x) else x);

	transcript = pd.concat([transcript, transcript_values, transcript_event], axis=1);
	transcript = transcript.drop(['value', 'event', 'reward'], axis=1);