-
-
Save usametov/3684fea1bf3138d050521e4a13326fcd to your computer and use it in GitHub Desktop.
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from fbprophet import Prophet | |
| from datetime import datetime | |
| import psycopg2 | |
| from sqlalchemy import create_engine | |
| #Read the Parquet file | |
| #df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet') | |
| # read from db | |
| alchemyEngine = create_engine('postgresql+psycopg2://clj_user:[email protected]/volumebars', pool_recycle=3600); | |
| dbConnection = alchemyEngine.connect(); | |
| pair = 'BTCUSD' | |
| df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection); | |
| # Select specific columns, high, low | |
| selected_columns = df[['high', 'time']] | |
| # Convert timestamp in milliseconds to pandas timestamp | |
| selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms') | |
| selected_columns = selected_columns.drop('time', axis=1) | |
| selected_columns.columns = ['y', 'ds'] | |
| reordered_columns = ['ds', 'y'] | |
| df_reordered = selected_columns[reordered_columns] | |
| # Show the resulting DataFrame | |
| print(df_reordered.tail(5)) | |
| changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection) | |
| changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms') | |
| changepoints_df = changepoints_df.drop('time', axis=1) | |
| changepoints = [x[0] for x in changepoints_df.values.tolist()] | |
| model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9) | |
| model.fit(df_reordered) | |
| future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly | |
| forecast = model.predict(future) | |
| print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8)) | |
| fig = model.plot(forecast, xlabel='Date', | |
| ylabel=r'yhat') | |
| plt.title('price prediction - high') | |
| plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png") |
| # Load the 'arrow' package | |
| library(arrow) | |
| #TODO: extract function from the code below | |
| # Read data from a Parquet file | |
| parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet" | |
| data <- arrow::read_parquet(parquet_file) | |
| #make a data frame | |
| df = as.data.frame(data) | |
| colnames(df) | |
| #build features, indicators and target class | |
| rsi = RSI(df$close, n=14, maType="WMA") | |
| adx = data.frame(ADX(df[,c("high","low","close")])) | |
| sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2)) | |
| trend = df$Close - sar | |
| #create a lag | |
| rsi = c(NA,head(rsi,-1)) | |
| adx$ADX = c(NA,head(adx$ADX,-1)) | |
| trend = c(NA,head(trend,-1)) | |
| price = df$close-df$open | |
| #target variable | |
| class = ifelse(price > 0,1,0) | |
| # Create a Matrix | |
| model_df = data.frame(class,rsi,adx$ADX,trend) | |
| model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class)) | |
| model = na.omit(model) | |
| colnames(model) = c("class","rsi","adx","trend") | |
| # Split data into train and test sets | |
| train_size = 2/3 | |
| breakpoint = nrow(model) * train_size | |
| training_data = model[1:breakpoint,] | |
| test_data = model[(breakpoint+1):nrow(model),] | |
| # Split data training and test data into X and Y | |
| X_train = training_data[,2:4] ; Y_train = training_data[,1] | |
| class(X_train)[1]; class(Y_train) | |
| X_test = test_data[,2:4] ; Y_test = test_data[,1] | |
| class(X_test)[1]; class(Y_test) | |
| # Train the xgboost model using the "xgboost" function | |
| dtrain = xgb.DMatrix(data = X_train, label = Y_train) | |
| xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic") | |
| # Using cross validation | |
| dtrain = xgb.DMatrix(data = X_train, label = Y_train) | |
| cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic") | |
| # Make the predictions on the test data | |
| preds = predict(xgModel, X_test) | |
| # Determine the size of the prediction vector | |
| print(length(preds)) | |
| # Limit display of predictions to the first 6 | |
| print(head(preds)) | |
| # Measuring model performance | |
| error_value = mean(as.numeric(preds > 0.5) != Y_test) | |
| print(paste("test-error=", error_value)) | |
| prediction = as.numeric(preds > 0.5) | |
| print(head(prediction)) | |
| # View feature importance from the learnt model | |
| importance_matrix = xgb.importance(model = xgModel) | |
| print(importance_matrix) | |
| #TODO: set device to print plots to png | |
| # View the trees from a model | |
| # xgb.plot.tree(model = xgModel) | |
| # View only the first tree in the XGBoost model | |
| # xgb.plot.tree(model = xgModel, n_first_tree = 1) | |
#re-train Prophet from existing checkpoint:
import numpy as np
class StanInit:
def init(self, model):
self.params = {
'k': np.mean(model.params['k']),
'm': np.mean(model.params['m']),
'sigma_obs':
np.mean(model.params['sigma_obs']),
'delta': np.mean(model.params['delta'],
axis=0),
'beta': np.mean(model.params['beta'], axis=0)
}
def call(self):
return self.params
fine-tuning
model2 = Prophet().fit(df, init=StanInit(prev_model))
#use plotly
from fbprophet.plot import (
plot_plotly,
plot_components_plotly,
plot_forecast_component_plotly,
plot_seasonality_plotly
)
fig = plot_plotly(model, forecast, trend=True)
py.iplot(fig)
#components
fig = plot_components_plotly(model, forecast ,
figsize=(800, 175))
py.iplot(fig)
#seasonality
fig = plot_seasonality_plotly(model, 'yearly')
py.iplot(fig)
#how to save a checkpoint:
with open('volume-bars-model.json', 'w') as file_out:
json.dump(model_to_json(model), file_out)
#how to load checkpoint
with open('volume-bars-model.json', 'r') as file_in:
model = model_from_json(json.load(file_in))
then we just use it:
forecast = model.predict()
fig = model.plot(forecast)