Skip to content

Instantly share code, notes, and snippets.

@hnykda
Last active August 21, 2020 01:33
Show Gist options
  • Save hnykda/c362f0ad488e3b289394 to your computer and use it in GitHub Desktop.
Save hnykda/c362f0ad488e3b289394 to your computer and use it in GitHub Desktop.
Predicting sequences of vectors (regression) in Keras using RNN - LSTM (danielhnyk.cz)
import pandas as pd
from random import random
flow = (list(range(1,10,1)) + list(range(10,1,-1)))*100
pdata = pd.DataFrame({"a":flow, "b":flow})
pdata.b = pdata.b.shift(9)
data = pdata.iloc[10:] * random() # some noise
import numpy as np
def _load_data(data, n_prev = 100):
"""
data should be pd.DataFrame()
"""
docX, docY = [], []
for i in range(len(data)-n_prev):
docX.append(data.iloc[i:i+n_prev].as_matrix())
docY.append(data.iloc[i+n_prev].as_matrix())
alsX = np.array(docX)
alsY = np.array(docY)
return alsX, alsY
def train_test_split(df, test_size=0.1):
"""
This just splits data to training and testing parts
"""
ntrn = round(len(df) * (1 - test_size))
X_train, y_train = _load_data(df.iloc[0:ntrn])
X_test, y_test = _load_data(df.iloc[ntrn:])
return (X_train, y_train), (X_test, y_test)
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
in_out_neurons = 2
hidden_neurons = 50
model = Sequential()
model.add(LSTM(in_out_neurons, hidden_neurons, return_sequences=False))
model.add(Dense(hidden_neurons, in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mean_squared_error", optimizer="rmsprop")
(X_train, y_train), (X_test, y_test) = train_test_split(data) # retrieve data
model.fit(X_train, y_train, batch_size=700, nb_epoch=10, validation_split=0.05)
predicted = model.predict(X_test)
rmse = np.sqrt(((predicted - y_test) ** 2).mean(axis=0))
# and maybe plot it
pd.DataFrame(predicted).to_csv("predicted.csv")
pd.DataFrame(y_test).to_csv("test_data.csv")
@dare0021
Copy link

dare0021 commented Jun 29, 2016

I changed the Keras 0.2 code above to work on Keras 1.0.3 w/h TensorFLow

import pandas as pd
from random import random

flow = (list(range(1,10,1)) + list(range(10,1,-1)))*100
pdata = pd.DataFrame({"a":flow, "b":flow})
pdata.b = pdata.b.shift(9)
data = pdata.iloc[10:] * random()  # some noise

import numpy as np

def _load_data(data, n_prev = 100):
    """
    data should be pd.DataFrame()
    """

    docX, docY = [], []
    for i in range(len(data)-n_prev):
        docX.append(data.iloc[i:i+n_prev].as_matrix())
        docY.append(data.iloc[i+n_prev].as_matrix())
    alsX = np.array(docX)
    alsY = np.array(docY)

    return alsX, alsY

def train_test_split(df, test_size=0.1):
    """
    This just splits data to training and testing parts
    """
    ntrn = int(round(len(df) * (1 - test_size)))

    X_train, y_train = _load_data(df.iloc[0:ntrn])
    X_test, y_test = _load_data(df.iloc[ntrn:])

    return (X_train, y_train), (X_test, y_test)

from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM

in_out_neurons = 2
hidden_neurons = 50

model = Sequential()

# n_prev = 100, 2 values per x axis
model.add(LSTM(hidden_neurons, input_shape=(100, 2)))
model.add(Dense(in_out_neurons))
model.add(Activation("linear"))

model.compile(loss="mean_squared_error",
    optimizer="rmsprop",
    metrics=['accuracy'])

(X_train, y_train), (X_test, y_test) = train_test_split(data)

model.fit(X_train, y_train, batch_size=700, nb_epoch=50, validation_data=(X_test, y_test), verbose=1)
score = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

predicted = model.predict(X_test, batch_size=700)

# and maybe plot it
pd.DataFrame(predicted).to_csv("predicted.csv")
pd.DataFrame(y_test).to_csv("test_data.csv")

Produces some pretty rubbish results at 10 epochs, but gets pretty good with 20.
I'm still a beginner, so I'm not sure if this constitutes over fitting or not.

With 20 epochs
epoch 20
With 50 epochs
epoch 50

@hnykda
Copy link
Author

hnykda commented Mar 24, 2017

There is a new code for Keras 2 by my friend. On the beginning of the original blogpost: http://danielhnyk.cz/predicting-sequences-vectors-keras-using-rnn-lstm/

@sonmeznezahat
Copy link

Hi, is there anyone who has the idea about applying this to categorical data. I mean I have a data in sequence; first column represents the event code which is categorical column and second column is their sequence number.

Any help?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment