This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_timeseries = pd.DataFrame(df.groupby(['Date'])['Amount'].sum()).sort_index(axis=0) | |
data = pd.DataFrame(df_timeseries.Amount) | |
data.columns = ["y"] | |
data = data.loc[data.index[:-1]] # drop data after 2019-06 since we want to predict next half year in 2019 | |
# Adding the lag of the target variable from 7 steps back up to 48 months ago | |
for i in range(7, 48): | |
data["lag_{}".format(i)] = data.y.shift(i) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plotFor5(Territory): | |
df_model = df[df['TerritoryID'] == Territory] | |
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0) | |
data = pd.DataFrame(df_timeseries.NetAmount) | |
data.columns = ["y"] | |
# Drop data after 2019-06 | |
data = data.loc[data.index[:-1]] | |
# Adding the lag of the target variable from 7 steps back up to 48 months ago | |
for i in range(7, 48): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def modelingFor5(Account): | |
df_model = df[df['Account'] == Account] | |
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0) | |
data = pd.DataFrame(df_timeseries.NetAmount) | |
data.columns = ["y"] | |
# Drop data after 2019-06 | |
data = data.loc[data.index[:-1]] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# XGB | |
xgb = XGBRegressor() | |
xgb.fit(X_train_scaled, y_train) | |
plotModelResults(xgb, X_train=X_train_scaled, X_test=X_test_scaled, plot_intervals=True, plot_anomalies=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A function read data from my database and dropped some rows by some simple requirements | |
df = loadData('***') | |
# There're some accounts stopping selling products after 2019, so I dropped as we don't need to predict these accounts | |
df = dropAccByRatio2019(df, 0.06) | |
# Dropped account and product less than 6 records | |
df = dropAccPrdLessRecord(df, 6) | |
df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y = data.dropna().y | |
X = data.dropna().drop(['y'], axis=1) | |
# Reserve 30% of data for testing | |
X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3) | |
X_train_scaled = scaler.fit_transform(X_train) | |
X_test_scaled = scaler.transform(X_test) | |
# Linear Regression | |
lr = LinearRegression() |