Cyrus atriptoparadise

Let's make it happen

atriptoparadise / Data Preparation1.txt

Created July 27, 2019 13:10

	df_timeseries = pd.DataFrame(df.groupby(['Date'])['Amount'].sum()).sort_index(axis=0)

	data = pd.DataFrame(df_timeseries.Amount)
	data.columns = ["y"]
	data = data.loc[data.index[:-1]] # drop data after 2019-06 since we want to predict next half year in 2019

	# Adding the lag of the target variable from 7 steps back up to 48 months ago
	for i in range(7, 48):
	data["lag_{}".format(i)] = data.y.shift(i)

atriptoparadise / gist:96ed28124c4ae05aa2b9f577ddcb554f

Created July 27, 2019 13:09

	def plotFor5(Territory):
	df_model = df[df['TerritoryID'] == Territory]
	df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0)
	data = pd.DataFrame(df_timeseries.NetAmount)
	data.columns = ["y"]

	# Drop data after 2019-06
	data = data.loc[data.index[:-1]]
	# Adding the lag of the target variable from 7 steps back up to 48 months ago
	for i in range(7, 48):

atriptoparadise / Stacking Model

Last active March 20, 2020 16:48

	def modelingFor5(Account):
	df_model = df[df['Account'] == Account]
	df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0)

	data = pd.DataFrame(df_timeseries.NetAmount)
	data.columns = ["y"]

	# Drop data after 2019-06
	data = data.loc[data.index[:-1]]

atriptoparadise / XGB

Created July 26, 2019 21:05

	# XGB
	xgb = XGBRegressor()
	xgb.fit(X_train_scaled, y_train)

	plotModelResults(xgb, X_train=X_train_scaled, X_test=X_test_scaled, plot_intervals=True, plot_anomalies=True)

atriptoparadise / gist:ea009ef33ef226e62c93c60e8728af82

Created July 26, 2019 20:45

Data Preparation

	# A function read data from my database and dropped some rows by some simple requirements
	df = loadData('***')

	# There're some accounts stopping selling products after 2019, so I dropped as we don't need to predict these accounts
	df = dropAccByRatio2019(df, 0.06)

	# Dropped account and product less than 6 records
	df = dropAccPrdLessRecord(df, 6)

	df.head()

atriptoparadise / Data Preparation.txt

Last active July 27, 2019 13:09

Data Preparation

	y = data.dropna().y
	X = data.dropna().drop(['y'], axis=1)

	# Reserve 30% of data for testing
	X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3)
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# Linear Regression
	lr = LinearRegression()