Last active
January 22, 2018 21:18
-
-
Save MauroCE/c88fb18849bc1c38f470ce535457e17b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import relevant modules | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.model_selection import train_test_split | |
from keras.layers import Dense, LSTM | |
from keras.models import Sequential | |
# Decide some settings | |
batch_size = 100 | |
epochs = 50 | |
timesteps = 5 | |
features = 2 | |
samples = 3000 | |
# Randomly create some data. In this case, I am creating 2 features. One is simple sine wave | |
# with some noise, the other is a combination of various sine and cosines so that it is more messy | |
np.random.seed(1) | |
array1 = np.linspace(0,100, samples).reshape(-1,1) | |
array1 = np.sin(array1)**2 + 2 * np.sin(array1 / 4) - 4 * np.cos(array1 / 2)**3 + np.cos(array1) | |
array2 = np.sin(np.linspace(0,100, samples).reshape(-1,1)) | |
array = np.hstack((array1, array2)) + np.random.uniform(-0.2, 0.2, size = (samples, features)) | |
# Basically array is a 3000 x 2 numpy array. | |
# Define a function that takes in an array and returns a pd.DataFrame that has various columns for | |
# each variable, showing different timesteps | |
def series_to_supervised(data, n_in =1, n_out=1, dropnan=True): | |
''' | |
If data has 2 columns representing 2 features, and we specify n_in = 3, n_out = 2 | |
then this returns a DataFrame where the columns are | |
["var1(t-3)", "var2(t-3)", "var1(t-2)", "var2(t-2)", "var1(t-1)", "var2(t-1)", \ | |
"var1(t)", "var2(t)", "var1(t+1)", "var2(t+1)"] | |
''' | |
n_vars = 1 if type(data) is list else data.shape[1] | |
df = pd.DataFrame(data) | |
cols, names = list(), list() | |
# Input sequence (t-n, .., t-1) | |
for i in range(n_in, 0,-1): | |
cols.append(df.shift(i)) | |
names+= ['var%d(t-%d)' % (j+1, i) for j in range(n_vars)] | |
# Forecast sequence (t,t+1,...,t+n) | |
for i in range(0, n_out): | |
cols.append(df.shift(-i)) | |
if i == 0: | |
names += [('var%d(t)' % (j+1)) for j in range(n_vars)] | |
else: | |
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)] | |
# Put it all together | |
agg = pd.concat(cols, axis = 1) | |
agg.columns = names | |
# Drop rows with Nan Values | |
if dropnan: | |
agg.dropna(inplace=True) | |
return agg | |
# Use the function to create the data. Want to use previous 5 observations to predict next 2 | |
# I.e. use t-5, .., t-1 (for both variables) to predict t, t+1 (for both variables) | |
data = series_to_supervised(array, n_in = timesteps, n_out = 2) | |
# Separate the "X" data by the "Y" data | |
X = data[['var1(t-5)', 'var2(t-5)', 'var1(t-4)', 'var2(t-4)','var1(t-3)', 'var2(t-3)', 'var1(t-2)', 'var2(t-2)', 'var1(t-1)','var2(t-1)']] | |
Y = data[['var1(t)', 'var2(t)', 'var1(t+1)', 'var2(t+1)']] | |
# Separate into training and testing sets | |
X_train, X_test, Y_train, Y_test = train_test_split(X.values, Y.values, test_size=0.33, shuffle=False) | |
# Need to reshape into (samples, timesteps, features) | |
X_train = X_train.reshape(X.train.shape[0], timesteps, features) | |
X_test = X_test.reshape(X_test.shape[0], timesteps, features) | |
# Make the Keras Model | |
model = Sequential() | |
model.add(LSTM(30, input_shape = (timesteps, features), return_sequences=True)) | |
model.add(LSTM(20)) | |
model.add(Dense(4, activation='linear')) # Want to predict t and t+1 for both features | |
# Compile model | |
model.compile(loss='mse', optimizer='rmsprop') | |
# Fit and store the history | |
history = model.fit(X_train, Y_train, batch_size = batch_size, epochs = epochs, verbose = 1) | |
# Make predictions | |
pred = model.predict(X_test, batch_size = batch_size) | |
# To check if something works, we can plot | |
plt.plot(pred[:200, 0], label = 'pred') | |
plt.plot(Y_test[:200, 0], label = 'true') | |
plt.legend() | |
plt.title('First 200 predictions for 1 step ahead (t) for first feature/column') | |
plt.plot(pred[:200, 1], label = 'pred') | |
plt.plot(Y_test[:200, 1], label = 'true') | |
plt.legend() | |
plt.title('First 200 predictions for 1 step ahead (t) for second feature/column') | |
plt.plot(pred[:200, 2], label = 'pred') | |
plt.plot(Y_test[:200, 2], label = 'true') | |
plt.legend() | |
plt.title('First 200 predictions for 2 step ahead (t+1) for first feature/column') | |
plt.plot(pred[:200, 3], label = 'pred') | |
plt.plot(Y_test[:200, 3], label = 'true') | |
plt.legend() | |
plt.title('First 200 predictions for 2 step ahead (t+1) for second feature/column') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment