Skip to content

Instantly share code, notes, and snippets.

df_timeseries = pd.DataFrame(df.groupby(['Date'])['Amount'].sum()).sort_index(axis=0)
data = pd.DataFrame(df_timeseries.Amount)
data.columns = ["y"]
data = data.loc[data.index[:-1]] # drop data after 2019-06 since we want to predict next half year in 2019
# Adding the lag of the target variable from 7 steps back up to 48 months ago
for i in range(7, 48):
data["lag_{}".format(i)] = data.y.shift(i)
def plotFor5(Territory):
df_model = df[df['TerritoryID'] == Territory]
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0)
data = pd.DataFrame(df_timeseries.NetAmount)
data.columns = ["y"]
# Drop data after 2019-06
data = data.loc[data.index[:-1]]
# Adding the lag of the target variable from 7 steps back up to 48 months ago
for i in range(7, 48):
def modelingFor5(Account):
df_model = df[df['Account'] == Account]
df_timeseries = pd.DataFrame(df_model.groupby(['Date'])['NetAmount'].sum()).sort_index(axis=0)
data = pd.DataFrame(df_timeseries.NetAmount)
data.columns = ["y"]
# Drop data after 2019-06
data = data.loc[data.index[:-1]]
# XGB
xgb = XGBRegressor()
xgb.fit(X_train_scaled, y_train)
plotModelResults(xgb, X_train=X_train_scaled, X_test=X_test_scaled, plot_intervals=True, plot_anomalies=True)
# A function read data from my database and dropped some rows by some simple requirements
df = loadData('***')
# There're some accounts stopping selling products after 2019, so I dropped as we don't need to predict these accounts
df = dropAccByRatio2019(df, 0.06)
# Dropped account and product less than 6 records
df = dropAccPrdLessRecord(df, 6)
df.head()
@atriptoparadise
atriptoparadise / Data Preparation.txt
Last active July 27, 2019 13:09
Data Preparation
y = data.dropna().y
X = data.dropna().drop(['y'], axis=1)
# Reserve 30% of data for testing
X_train, X_test, y_train, y_test = timeseries_train_test_split(X, y, test_size=0.3)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Linear Regression
lr = LinearRegression()