ranpelta · July 18, 2020 08:29
diff --git a/entire code.py b/entire code.py
 import pandas as pd
 import numpy as np
 import keras
 import matplotlib.pyplot as plt
 from sklearn.preprocessing import MinMaxScaler
 from pandas.plotting import register_matplotlib_converters
 register_matplotlib_converters()
 from keras.preprocessing.sequence import TimeseriesGenerator
 from keras.models import Sequential
 from keras.layers import Dense
 from keras.layers import LSTM

 df = pd.read_pickle(r'C:\....\data.pkl') # read data 
 y_col='y' # define y variable, i.e., what we want to predict

 test_size = int(len(df) * 0.1) # here I ask that the test data will be 10% (0.1) of the entire data
 train = df.iloc[:-test_size,:].copy() # the copy() here is important, it will prevent us from getting: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
 # Try using .loc[row_index,col_indexer] = value instead
 test = df.iloc[-test_size:,:].copy()


 X_train = train.drop(y_col,axis=1).copy()
 y_train = train[[y_col]].copy() # the double brakets here are to keep the y in dataframe format, otherwise it will be pandas Series

 Xscaler = MinMaxScaler(feature_range=(0, 1)) # scale so that all the X data will range from 0 to 1
 Xscaler.fit(X_train)
 scaled_X_train = Xscaler.transform(X_train)
 Yscaler = MinMaxScaler(feature_range=(0, 1))
 Yscaler.fit(y_train)
 scaled_y_train = Yscaler.transform(y_train)
 scaled_y_train = scaled_y_train.reshape(-1) # remove the second dimention from y so the shape changes from (n,1) to (n,)

 scaled_y_train = np.insert(scaled_y_train, 0, 0)
 scaled_y_train = np.delete(scaled_y_train, -1)

 n_input = 25 #how many samples/rows/timesteps to look in the past in order to forecast the next sample
 n_features= X_train.shape[1] # how many predictors/Xs/features we have to predict y
 b_size = 32 # Number of timeseries samples in each batch
 generator = TimeseriesGenerator(scaled_X_train, scaled_y_train, length=n_input, batch_size=b_size)

 model = Sequential()
 model.add(LSTM(150, activation='relu', input_shape=(n_input, n_features)))
 model.add(Dense(1))
 model.compile(optimizer='adam', loss='mse')

 model.fit_generator(generator,epochs=5)

 X_test = test.drop(y_col,axis=1).copy()
 scaled_X_test = Xscaler.transform(X_test)
 test_generator = TimeseriesGenerator(scaled_X_test, np.zeros(len(X_test)), length=n_input, batch_size=b_size)

 y_pred_scaled = model.predict(test_generator)
 y_pred = Yscaler.inverse_transform(y_pred_scaled)
 results = pd.DataFrame({'y_true':test[y_col].values[n_input:],'y_pred':y_pred.ravel()})
	import pandas as pd
	import numpy as np
	import keras
	import matplotlib.pyplot as plt
	from sklearn.preprocessing import MinMaxScaler
	from pandas.plotting import register_matplotlib_converters
	register_matplotlib_converters()
	from keras.preprocessing.sequence import TimeseriesGenerator
	from keras.models import Sequential
	from keras.layers import Dense
	from keras.layers import LSTM

	df = pd.read_pickle(r'C:\....\data.pkl') # read data
	y_col='y' # define y variable, i.e., what we want to predict

	test_size = int(len(df) * 0.1) # here I ask that the test data will be 10% (0.1) of the entire data
	train = df.iloc[:-test_size,:].copy() # the copy() here is important, it will prevent us from getting: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
	# Try using .loc[row_index,col_indexer] = value instead
	test = df.iloc[-test_size:,:].copy()


	X_train = train.drop(y_col,axis=1).copy()
	y_train = train[[y_col]].copy() # the double brakets here are to keep the y in dataframe format, otherwise it will be pandas Series

	Xscaler = MinMaxScaler(feature_range=(0, 1)) # scale so that all the X data will range from 0 to 1
	Xscaler.fit(X_train)
	scaled_X_train = Xscaler.transform(X_train)
	Yscaler = MinMaxScaler(feature_range=(0, 1))
	Yscaler.fit(y_train)
	scaled_y_train = Yscaler.transform(y_train)
	scaled_y_train = scaled_y_train.reshape(-1) # remove the second dimention from y so the shape changes from (n,1) to (n,)

	scaled_y_train = np.insert(scaled_y_train, 0, 0)
	scaled_y_train = np.delete(scaled_y_train, -1)

	n_input = 25 #how many samples/rows/timesteps to look in the past in order to forecast the next sample
	n_features= X_train.shape[1] # how many predictors/Xs/features we have to predict y
	b_size = 32 # Number of timeseries samples in each batch
	generator = TimeseriesGenerator(scaled_X_train, scaled_y_train, length=n_input, batch_size=b_size)

	model = Sequential()
	model.add(LSTM(150, activation='relu', input_shape=(n_input, n_features)))
	model.add(Dense(1))
	model.compile(optimizer='adam', loss='mse')

	model.fit_generator(generator,epochs=5)

	X_test = test.drop(y_col,axis=1).copy()
	scaled_X_test = Xscaler.transform(X_test)
	test_generator = TimeseriesGenerator(scaled_X_test, np.zeros(len(X_test)), length=n_input, batch_size=b_size)

	y_pred_scaled = model.predict(test_generator)
	y_pred = Yscaler.inverse_transform(y_pred_scaled)
	results = pd.DataFrame({'y_true':test[y_col].values[n_input:],'y_pred':y_pred.ravel()})