hhl60492 · November 27, 2017 19:10
diff --git a/gpr.py b/gpr.py
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt

 # read in the csv
 df = pd.read_csv("eth-cad-max.csv")

 # get the prices col slice from df
 prices = np.array(df['price'])
 H = 0.55
 start = 600
 # pick a point in the time series to start
 holdout = 0.05 # 5 percent holdout set to test the Gaussian Process predictions
 scaler = 1
 prices = prices / scaler
 temp = prices[start:]
 stop = int((1-holdout) * len(prices))

 # assume our time series is a function of the form y = f(x), where x is the date or time index
 train_y = prices[start:stop]
 test_y = prices[stop:]
 train_x = np.linspace(1,len(train_y),len(train_y))
 test_x = np.linspace(1,len(test_y),len(test_y))

 # assuming our time series is a Fractional Brownian Process (FBM), define the kernel function
 def FBM_kernel(x1,x2, H):
    sigma = np.zeros([len(x1), len(x2)])
    for i in range(len(x1)):
        for j in range(len(x2)):
            sigma[i,j] = 0.5 * (np.power(np.absolute(x1[i]), 2*H) +
                np.power(np.absolute(x2[j]), 2 * H) - np.power(np.absolute(x1[i] - x2[j]), 2 * H))
    return sigma

 # here's where the magic happens

 # training covariance
 xx = FBM_kernel(train_y,train_y, H)
 # training and testing covariance
 xx_t = FBM_kernel(train_y,test_y, H)
 x_tx = FBM_kernel(test_y,train_y, H)

 # testing covariance
 x_tx_t = FBM_kernel(test_y,test_y, H)

 # compute prediction
 noise_sigma = 0
 V_inv = np.linalg.inv(xx + noise_sigma * np.diag(xx))

 # test set mean
 foo = np.matmul(x_tx,V_inv)
 mu_test = np.matmul(foo, train_y)
 print(mu_test)
 foo = np.matmul(x_tx,V_inv)
 sigma_test = x_tx_t - np.matmul(foo,xx_t)
 print()
 plt.figure()

 plt.plot(stop + test_x,mu_test)
 plt.fill_between(stop + test_x, mu_test-2*np.diagonal(sigma_test), mu_test+2*np.diagonal(sigma_test), color = 'grey', label = '95% confidence region')
 plt.scatter(stop + test_x,mu_test, label='Predicted mean')
 plt.plot(stop + test_x, test_y, 'r--', label = 'Actual Price')

 plt.xlabel('Time step (days)')
 plt.ylabel('CAD$')
 plt.legend(loc = 2)
 plt.show()
 # test set variance
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	# read in the csv
	df = pd.read_csv("eth-cad-max.csv")

	# get the prices col slice from df
	prices = np.array(df['price'])
	H = 0.55
	start = 600
	# pick a point in the time series to start
	holdout = 0.05 # 5 percent holdout set to test the Gaussian Process predictions
	scaler = 1
	prices = prices / scaler
	temp = prices[start:]
	stop = int((1-holdout) * len(prices))

	# assume our time series is a function of the form y = f(x), where x is the date or time index
	train_y = prices[start:stop]
	test_y = prices[stop:]
	train_x = np.linspace(1,len(train_y),len(train_y))
	test_x = np.linspace(1,len(test_y),len(test_y))

	# assuming our time series is a Fractional Brownian Process (FBM), define the kernel function
	def FBM_kernel(x1,x2, H):
	sigma = np.zeros([len(x1), len(x2)])
	for i in range(len(x1)):
	for j in range(len(x2)):
	sigma[i,j] = 0.5 * (np.power(np.absolute(x1[i]), 2*H) +
	np.power(np.absolute(x2[j]), 2 * H) - np.power(np.absolute(x1[i] - x2[j]), 2 * H))
	return sigma

	# here's where the magic happens

	# training covariance
	xx = FBM_kernel(train_y,train_y, H)
	# training and testing covariance
	xx_t = FBM_kernel(train_y,test_y, H)
	x_tx = FBM_kernel(test_y,train_y, H)

	# testing covariance
	x_tx_t = FBM_kernel(test_y,test_y, H)

	# compute prediction
	noise_sigma = 0
	V_inv = np.linalg.inv(xx + noise_sigma * np.diag(xx))

	# test set mean
	foo = np.matmul(x_tx,V_inv)
	mu_test = np.matmul(foo, train_y)
	print(mu_test)
	foo = np.matmul(x_tx,V_inv)
	sigma_test = x_tx_t - np.matmul(foo,xx_t)
	print()
	plt.figure()

	plt.plot(stop + test_x,mu_test)
	plt.fill_between(stop + test_x, mu_test-2np.diagonal(sigma_test), mu_test+2np.diagonal(sigma_test), color = 'grey', label = '95% confidence region')
	plt.scatter(stop + test_x,mu_test, label='Predicted mean')
	plt.plot(stop + test_x, test_y, 'r--', label = 'Actual Price')

	plt.xlabel('Time step (days)')
	plt.ylabel('CAD$')
	plt.legend(loc = 2)
	plt.show()
	# test set variance