Created
November 27, 2017 19:10
-
-
Save hhl60492/0d1b99314a662e0c6c7d918caf20b0b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# read in the csv | |
df = pd.read_csv("eth-cad-max.csv") | |
# get the prices col slice from df | |
prices = np.array(df['price']) | |
H = 0.55 | |
start = 600 | |
# pick a point in the time series to start | |
holdout = 0.05 # 5 percent holdout set to test the Gaussian Process predictions | |
scaler = 1 | |
prices = prices / scaler | |
temp = prices[start:] | |
stop = int((1-holdout) * len(prices)) | |
# assume our time series is a function of the form y = f(x), where x is the date or time index | |
train_y = prices[start:stop] | |
test_y = prices[stop:] | |
train_x = np.linspace(1,len(train_y),len(train_y)) | |
test_x = np.linspace(1,len(test_y),len(test_y)) | |
# assuming our time series is a Fractional Brownian Process (FBM), define the kernel function | |
def FBM_kernel(x1,x2, H): | |
sigma = np.zeros([len(x1), len(x2)]) | |
for i in range(len(x1)): | |
for j in range(len(x2)): | |
sigma[i,j] = 0.5 * (np.power(np.absolute(x1[i]), 2*H) + | |
np.power(np.absolute(x2[j]), 2 * H) - np.power(np.absolute(x1[i] - x2[j]), 2 * H)) | |
return sigma | |
# here's where the magic happens | |
# training covariance | |
xx = FBM_kernel(train_y,train_y, H) | |
# training and testing covariance | |
xx_t = FBM_kernel(train_y,test_y, H) | |
x_tx = FBM_kernel(test_y,train_y, H) | |
# testing covariance | |
x_tx_t = FBM_kernel(test_y,test_y, H) | |
# compute prediction | |
noise_sigma = 0 | |
V_inv = np.linalg.inv(xx + noise_sigma * np.diag(xx)) | |
# test set mean | |
foo = np.matmul(x_tx,V_inv) | |
mu_test = np.matmul(foo, train_y) | |
print(mu_test) | |
foo = np.matmul(x_tx,V_inv) | |
sigma_test = x_tx_t - np.matmul(foo,xx_t) | |
print() | |
plt.figure() | |
plt.plot(stop + test_x,mu_test) | |
plt.fill_between(stop + test_x, mu_test-2*np.diagonal(sigma_test), mu_test+2*np.diagonal(sigma_test), color = 'grey', label = '95% confidence region') | |
plt.scatter(stop + test_x,mu_test, label='Predicted mean') | |
plt.plot(stop + test_x, test_y, 'r--', label = 'Actual Price') | |
plt.xlabel('Time step (days)') | |
plt.ylabel('CAD$') | |
plt.legend(loc = 2) | |
plt.show() | |
# test set variance |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment