-
-
Save usametov/3684fea1bf3138d050521e4a13326fcd to your computer and use it in GitHub Desktop.
import pandas as pd | |
import matplotlib.pyplot as plt | |
from fbprophet import Prophet | |
from datetime import datetime | |
import psycopg2 | |
from sqlalchemy import create_engine | |
#Read the Parquet file | |
#df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet') | |
# read from db | |
alchemyEngine = create_engine('postgresql+psycopg2://clj_user:[email protected]/volumebars', pool_recycle=3600); | |
dbConnection = alchemyEngine.connect(); | |
pair = 'BTCUSD' | |
df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection); | |
# Select specific columns, high, low | |
selected_columns = df[['high', 'time']] | |
# Convert timestamp in milliseconds to pandas timestamp | |
selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms') | |
selected_columns = selected_columns.drop('time', axis=1) | |
selected_columns.columns = ['y', 'ds'] | |
reordered_columns = ['ds', 'y'] | |
df_reordered = selected_columns[reordered_columns] | |
# Show the resulting DataFrame | |
print(df_reordered.tail(5)) | |
changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection) | |
changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms') | |
changepoints_df = changepoints_df.drop('time', axis=1) | |
changepoints = [x[0] for x in changepoints_df.values.tolist()] | |
model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9) | |
model.fit(df_reordered) | |
future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly | |
forecast = model.predict(future) | |
print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8)) | |
fig = model.plot(forecast, xlabel='Date', | |
ylabel=r'yhat') | |
plt.title('price prediction - high') | |
plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png") |
# Load the 'arrow' package | |
library(arrow) | |
#TODO: extract function from the code below | |
# Read data from a Parquet file | |
parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet" | |
data <- arrow::read_parquet(parquet_file) | |
#make a data frame | |
df = as.data.frame(data) | |
colnames(df) | |
#build features, indicators and target class | |
rsi = RSI(df$close, n=14, maType="WMA") | |
adx = data.frame(ADX(df[,c("high","low","close")])) | |
sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2)) | |
trend = df$Close - sar | |
#create a lag | |
rsi = c(NA,head(rsi,-1)) | |
adx$ADX = c(NA,head(adx$ADX,-1)) | |
trend = c(NA,head(trend,-1)) | |
price = df$close-df$open | |
#target variable | |
class = ifelse(price > 0,1,0) | |
# Create a Matrix | |
model_df = data.frame(class,rsi,adx$ADX,trend) | |
model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class)) | |
model = na.omit(model) | |
colnames(model) = c("class","rsi","adx","trend") | |
# Split data into train and test sets | |
train_size = 2/3 | |
breakpoint = nrow(model) * train_size | |
training_data = model[1:breakpoint,] | |
test_data = model[(breakpoint+1):nrow(model),] | |
# Split data training and test data into X and Y | |
X_train = training_data[,2:4] ; Y_train = training_data[,1] | |
class(X_train)[1]; class(Y_train) | |
X_test = test_data[,2:4] ; Y_test = test_data[,1] | |
class(X_test)[1]; class(Y_test) | |
# Train the xgboost model using the "xgboost" function | |
dtrain = xgb.DMatrix(data = X_train, label = Y_train) | |
xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic") | |
# Using cross validation | |
dtrain = xgb.DMatrix(data = X_train, label = Y_train) | |
cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic") | |
# Make the predictions on the test data | |
preds = predict(xgModel, X_test) | |
# Determine the size of the prediction vector | |
print(length(preds)) | |
# Limit display of predictions to the first 6 | |
print(head(preds)) | |
# Measuring model performance | |
error_value = mean(as.numeric(preds > 0.5) != Y_test) | |
print(paste("test-error=", error_value)) | |
prediction = as.numeric(preds > 0.5) | |
print(head(prediction)) | |
# View feature importance from the learnt model | |
importance_matrix = xgb.importance(model = xgModel) | |
print(importance_matrix) | |
#TODO: set device to print plots to png | |
# View the trees from a model | |
# xgb.plot.tree(model = xgModel) | |
# View only the first tree in the XGBoost model | |
# xgb.plot.tree(model = xgModel, n_first_tree = 1) | |
#re-train Prophet from existing checkpoint:
import numpy as np
class StanInit:
def init(self, model):
self.params = {
'k': np.mean(model.params['k']),
'm': np.mean(model.params['m']),
'sigma_obs':
np.mean(model.params['sigma_obs']),
'delta': np.mean(model.params['delta'],
axis=0),
'beta': np.mean(model.params['beta'], axis=0)
}
def call(self):
return self.params
fine-tuning
model2 = Prophet().fit(df, init=StanInit(prev_model))
#use plotly
from fbprophet.plot import (
plot_plotly,
plot_components_plotly,
plot_forecast_component_plotly,
plot_seasonality_plotly
)
fig = plot_plotly(model, forecast, trend=True)
py.iplot(fig)
#components
fig = plot_components_plotly(model, forecast ,
figsize=(800, 175))
py.iplot(fig)
#seasonality
fig = plot_seasonality_plotly(model, 'yearly')
py.iplot(fig)
#how to save a checkpoint:
with open('volume-bars-model.json', 'w') as file_out:
json.dump(model_to_json(model), file_out)
#how to load checkpoint
with open('volume-bars-model.json', 'r') as file_in:
model = model_from_json(json.load(file_in))
then we just use it:
forecast = model.predict()
fig = model.plot(forecast)