Dzaky Widya Putra dzakyputra

Software Engineer

dzakyputra / capstone_project_starbucks_9.py

Created June 17, 2020 00:40

	query = """
	SELECT
	complete.offer_type,
	complete.complete_without_view_income,
	complete_view.complete_with_view_income

	FROM
	(SELECT
	offer_type,
	AVG(income) complete_without_view_income

dzakyputra / capstone_project_starbucks_8.py

Created June 17, 2020 00:36

	# Find the spending distribution
	spending_distribution = transcript[transcript['event'] == 'transaction'].merge(profile, left_on='person', right_on='id')

	# Histogram for the Male
	plt.hist(spending_distribution[spending_distribution['gender'] == 'M']['amount'],
	range=(0, 40),
	alpha=0.5,
	bins=40,
	label='Male')

dzakyputra / capstone_project_starbucks_7.py

Created June 17, 2020 00:34

	# Calculate the average spending for each gender
	avg_spending = transcript[transcript['event'] == 'transaction'].merge(profile, left_on='person', right_on='id') \
	.groupby('gender', as_index=False)['amount'] \
	.mean() \
	.rename(columns={'amount': 'average_spending_per_transaction'}) \
	.sort_values('average_spending_per_transaction')

	avg_spending['gender'] = avg_spending['gender'].map({'F': 'Female', 'M': 'Male', 'O': 'Others'})

	# Start the visualization process

dzakyputra / capstone_project_starbucks_7.py

Created June 17, 2020 00:30

	query = """
	SELECT
	complete.offer_type,
	complete.gender,
	complete.complete_without_view,
	complete_view.complete_with_view,
	(complete.complete_without_view + complete_view.complete_with_view) total_complete

	FROM
	(SELECT

dzakyputra / capstone_project_starbucks_6.py

Created June 17, 2020 00:23

	query = """
	SELECT
	complete.offer_id,
	complete.offer_type,
	total_completed,
	total_completed_without_view,
	ROUND(((1.0total_completed_without_view) / (1.0total_completed))*100, 2) as total_completed_without_view_ratio,
	100 - ROUND(((1.0total_completed_without_view) / (1.0total_completed))*100, 2) as total_completed_with_view_ratio,
	`loss ($)`
	FROM

dzakyputra / capstone_project_starbucks_5.py

Created June 17, 2020 00:13

	# Reference
	offer_reference = {}
	offer_duration_reference = {}

	for i,j in zip(portfolio['id'], portfolio['offer_type']):
	offer_reference[i] = j

	for i,j in zip(portfolio['id'], portfolio['duration']):
	offer_duration_reference[i] = j*24

dzakyputra / capstone_project_starbucks_4.py

Created June 17, 2020 00:07

	# Profile preprocessing

	# Convert the became_member_on into datetime type
	profile['became_member_on'] = pd.to_datetime(profile['became_member_on'], format='%Y%m%d')

	# Create a new column with the value of the difference days between the column became_member_on and the max days
	profile['difference_days'] = (profile['became_member_on'].max() - profile['became_member_on']).dt.days


	## Find the median and mode

dzakyputra / capstone_project_starbucks_3.py

Created June 16, 2020 23:59

	## Transcript preprocessing

	# Convert the dictionary value into columns and concatenate with the current dataframe
	value = pd.io.json.json_normalize(transcript['value'])
	transcript = pd.concat([transcript, value], axis=1).drop(columns=['value'])

	# Merge the offer_id column and offer id collumn so that it only has one column
	transcript['offer_id'] = np.where(pd.isnull(transcript['offer_id']), transcript['offer id'], transcript['offer_id'])
	transcript.drop(columns=['offer id'], inplace=True)

dzakyputra / capstone_project_starbucks_2.py

Last active June 17, 2020 00:01

	# Portfolio preprocessing

	values = []

	# Iterate the dataframe
	for index, row in portfolio.iterrows():

	# Set the default value
	new_value = [0,0,0,0]

dzakyputra / capstone_project_starbucks_1.py

Last active June 17, 2020 00:00

	from matplotlib.colors import ListedColormap
	from tqdm import tqdm

	import matplotlib.patches as mpatches
	import matplotlib.pyplot as plt
	import pandasql as pdsql
	import seaborn as sns
	import pandas as pd
	import numpy as np
	import math