Last active
June 4, 2023 19:44
-
-
Save usametov/3684fea1bf3138d050521e4a13326fcd to your computer and use it in GitHub Desktop.
prophet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
from fbprophet import Prophet | |
from datetime import datetime | |
import psycopg2 | |
from sqlalchemy import create_engine | |
#Read the Parquet file | |
#df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet') | |
# read from db | |
alchemyEngine = create_engine('postgresql+psycopg2://clj_user:[email protected]/volumebars', pool_recycle=3600); | |
dbConnection = alchemyEngine.connect(); | |
pair = 'BTCUSD' | |
df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection); | |
# Select specific columns, high, low | |
selected_columns = df[['high', 'time']] | |
# Convert timestamp in milliseconds to pandas timestamp | |
selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms') | |
selected_columns = selected_columns.drop('time', axis=1) | |
selected_columns.columns = ['y', 'ds'] | |
reordered_columns = ['ds', 'y'] | |
df_reordered = selected_columns[reordered_columns] | |
# Show the resulting DataFrame | |
print(df_reordered.tail(5)) | |
changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection) | |
changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms') | |
changepoints_df = changepoints_df.drop('time', axis=1) | |
changepoints = [x[0] for x in changepoints_df.values.tolist()] | |
model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9) | |
model.fit(df_reordered) | |
future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly | |
forecast = model.predict(future) | |
print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8)) | |
fig = model.plot(forecast, xlabel='Date', | |
ylabel=r'yhat') | |
plt.title('price prediction - high') | |
plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load the 'arrow' package | |
library(arrow) | |
#TODO: extract function from the code below | |
# Read data from a Parquet file | |
parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet" | |
data <- arrow::read_parquet(parquet_file) | |
#make a data frame | |
df = as.data.frame(data) | |
colnames(df) | |
#build features, indicators and target class | |
rsi = RSI(df$close, n=14, maType="WMA") | |
adx = data.frame(ADX(df[,c("high","low","close")])) | |
sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2)) | |
trend = df$Close - sar | |
#create a lag | |
rsi = c(NA,head(rsi,-1)) | |
adx$ADX = c(NA,head(adx$ADX,-1)) | |
trend = c(NA,head(trend,-1)) | |
price = df$close-df$open | |
#target variable | |
class = ifelse(price > 0,1,0) | |
# Create a Matrix | |
model_df = data.frame(class,rsi,adx$ADX,trend) | |
model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class)) | |
model = na.omit(model) | |
colnames(model) = c("class","rsi","adx","trend") | |
# Split data into train and test sets | |
train_size = 2/3 | |
breakpoint = nrow(model) * train_size | |
training_data = model[1:breakpoint,] | |
test_data = model[(breakpoint+1):nrow(model),] | |
# Split data training and test data into X and Y | |
X_train = training_data[,2:4] ; Y_train = training_data[,1] | |
class(X_train)[1]; class(Y_train) | |
X_test = test_data[,2:4] ; Y_test = test_data[,1] | |
class(X_test)[1]; class(Y_test) | |
# Train the xgboost model using the "xgboost" function | |
dtrain = xgb.DMatrix(data = X_train, label = Y_train) | |
xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic") | |
# Using cross validation | |
dtrain = xgb.DMatrix(data = X_train, label = Y_train) | |
cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic") | |
# Make the predictions on the test data | |
preds = predict(xgModel, X_test) | |
# Determine the size of the prediction vector | |
print(length(preds)) | |
# Limit display of predictions to the first 6 | |
print(head(preds)) | |
# Measuring model performance | |
error_value = mean(as.numeric(preds > 0.5) != Y_test) | |
print(paste("test-error=", error_value)) | |
prediction = as.numeric(preds > 0.5) | |
print(head(prediction)) | |
# View feature importance from the learnt model | |
importance_matrix = xgb.importance(model = xgModel) | |
print(importance_matrix) | |
#TODO: set device to print plots to png | |
# View the trees from a model | |
# xgb.plot.tree(model = xgModel) | |
# View only the first tree in the XGBoost model | |
# xgb.plot.tree(model = xgModel, n_first_tree = 1) | |
#use plotly
from fbprophet.plot import (
plot_plotly,
plot_components_plotly,
plot_forecast_component_plotly,
plot_seasonality_plotly
)
fig = plot_plotly(model, forecast, trend=True)
py.iplot(fig)
#components
fig = plot_components_plotly(model, forecast ,
figsize=(800, 175))
py.iplot(fig)
#seasonality
fig = plot_seasonality_plotly(model, 'yearly')
py.iplot(fig)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
#re-train Prophet from existing checkpoint:
import numpy as np
class StanInit:
def init(self, model):
self.params = {
'k': np.mean(model.params['k']),
'm': np.mean(model.params['m']),
'sigma_obs':
np.mean(model.params['sigma_obs']),
'delta': np.mean(model.params['delta'],
axis=0),
'beta': np.mean(model.params['beta'], axis=0)
}
def call(self):
return self.params
fine-tuning
model2 = Prophet().fit(df, init=StanInit(prev_model))