DSA101 · December 6, 2017 06:52 · DSA101 · Mar 4, 2016 · onanpu · Nov 3, 2016
diff --git a/RNN.py b/RNN.py
 # Time series forecasting based on multiple time series, including the original one
 # This script is based on the following examples and discussions:
 # https://gist.github.com/lukovkin/1aefa4509e066690b892
 # https://groups.google.com/forum/#!topic/keras-users/9GsDwkSdqBg

 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import random
 import theano
 from keras.models import Sequential  
 from keras.layers.core import Dense, Activation, Dense, Dropout
 from keras.layers.recurrent import LSTM, SimpleRNN, GRU

 # Generate training data
 #
 # One time series is a COS function, influenced by a separate scale signal time series which is a set of multipliers (aka scales)
 # for the COS function, that changes periodically. Furthermore, to validate that LSTM can spot changes that influence the
 # time series ahead of time (i.e. changes acting as leading indicators), the COS time series is setup to adjusts its scale 
 # with a 25 steps delay after the scale signal time series changes.

 length = 3000 # Time series length
 scales = [0.5, 1, 1.5] # By how much the COS function can be scaled
 scale_step = 100 # How frequently to change scale factor
 steps_ahead = 25 # How far ahead scale factor changes before the COS time series scale should change

 df = pd.DataFrame(columns=['Series', 'Scale Signal'])

 scale_signal = 1 #initial settings
 scale = 1

 for i in range(length):
    if (i + steps_ahead) % scale_step == 0:
        scale_signal = scales[random.randint(0, 2)]
    
    if i % scale_step == 0:
        scale = scale_signal
    
    df.loc[i,'Series'] = np.cos(i/4.0) * scale
    df.loc[i,'Scale Signal'] = scale_signal

 # Prepare and format data for training
 data = df.values
 examples = 200 # how far back to look
 y_examples = 100 # how many steps forward to predict
 nb_samples = len(data) - examples - y_examples

 input_list = [np.expand_dims(np.atleast_2d(data[i:examples+i,:]), axis=0) for i in xrange(nb_samples)]
 input_mat = np.concatenate(input_list, axis=0)

 # use the tail of the series as the test data
 df_test = pd.DataFrame(df[-examples:])

 test_data = df_test.values
 test_input_list = [np.expand_dims(np.atleast_2d(test_data[len(test_data)-examples:len(test_data),:]), axis=0) for i in xrange(1)]
 test_input_mat = np.concatenate(test_input_list, axis=0)

 # target - the first column in df dataframe
 target_list = [np.atleast_2d(data[i+examples:examples+i+y_examples,0]) for i in xrange(nb_samples)]
 target_mat = np.concatenate(target_list, axis=0)

 # set up model
 features = input_mat.shape[2]
 hidden = 128

 model = Sequential()
 model.add(LSTM(hidden, input_shape=(examples, features)))
 model.add(Dropout(.2))
 model.add(Dense(y_examples))
 model.add(Activation('linear'))
 model.compile(loss='mse', optimizer='rmsprop')

 # Train
 hist = model.fit(input_mat, target_mat, nb_epoch=100, batch_size=100, validation_split=0.05, show_accuracy=False)

 # Get and plot predicted data and the validation loss
 predicted = model.predict(test_input_mat)  

 df_val_loss = pd.DataFrame(hist.history['val_loss'])
 df_val_loss.plot()

 df_predicted = pd.DataFrame(predicted).T
 df_predicted.columns = ['Predicted']

 df_result = pd.concat([df, df_predicted], ignore_index=True)
 df_result.plot()

 plt.show()
	# Time series forecasting based on multiple time series, including the original one
	# This script is based on the following examples and discussions:
	# https://gist.github.com/lukovkin/1aefa4509e066690b892
	# https://groups.google.com/forum/#!topic/keras-users/9GsDwkSdqBg

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import random
	import theano
	from keras.models import Sequential
	from keras.layers.core import Dense, Activation, Dense, Dropout
	from keras.layers.recurrent import LSTM, SimpleRNN, GRU

	# Generate training data
	#
	# One time series is a COS function, influenced by a separate scale signal time series which is a set of multipliers (aka scales)
	# for the COS function, that changes periodically. Furthermore, to validate that LSTM can spot changes that influence the
	# time series ahead of time (i.e. changes acting as leading indicators), the COS time series is setup to adjusts its scale
	# with a 25 steps delay after the scale signal time series changes.

	length = 3000 # Time series length
	scales = [0.5, 1, 1.5] # By how much the COS function can be scaled
	scale_step = 100 # How frequently to change scale factor
	steps_ahead = 25 # How far ahead scale factor changes before the COS time series scale should change

	df = pd.DataFrame(columns=['Series', 'Scale Signal'])

	scale_signal = 1 #initial settings
	scale = 1

	for i in range(length):
	if (i + steps_ahead) % scale_step == 0:
	scale_signal = scales[random.randint(0, 2)]

	if i % scale_step == 0:
	scale = scale_signal

	df.loc[i,'Series'] = np.cos(i/4.0) * scale
	df.loc[i,'Scale Signal'] = scale_signal

	# Prepare and format data for training
	data = df.values
	examples = 200 # how far back to look
	y_examples = 100 # how many steps forward to predict
	nb_samples = len(data) - examples - y_examples

	input_list = [np.expand_dims(np.atleast_2d(data[i:examples+i,:]), axis=0) for i in xrange(nb_samples)]
	input_mat = np.concatenate(input_list, axis=0)

	# use the tail of the series as the test data
	df_test = pd.DataFrame(df[-examples:])

	test_data = df_test.values
	test_input_list = [np.expand_dims(np.atleast_2d(test_data[len(test_data)-examples:len(test_data),:]), axis=0) for i in xrange(1)]
	test_input_mat = np.concatenate(test_input_list, axis=0)

	# target - the first column in df dataframe
	target_list = [np.atleast_2d(data[i+examples:examples+i+y_examples,0]) for i in xrange(nb_samples)]
	target_mat = np.concatenate(target_list, axis=0)

	# set up model
	features = input_mat.shape[2]
	hidden = 128

	model = Sequential()
	model.add(LSTM(hidden, input_shape=(examples, features)))
	model.add(Dropout(.2))
	model.add(Dense(y_examples))
	model.add(Activation('linear'))
	model.compile(loss='mse', optimizer='rmsprop')

	# Train
	hist = model.fit(input_mat, target_mat, nb_epoch=100, batch_size=100, validation_split=0.05, show_accuracy=False)

	# Get and plot predicted data and the validation loss
	predicted = model.predict(test_input_mat)

	df_val_loss = pd.DataFrame(hist.history['val_loss'])
	df_val_loss.plot()

	df_predicted = pd.DataFrame(predicted).T
	df_predicted.columns = ['Predicted']

	df_result = pd.concat([df, df_predicted], ignore_index=True)
	df_result.plot()

	plt.show()