usametov · June 4, 2023 19:44 · usametov · May 23, 2023 · usametov · May 23, 2023
diff --git a/prophet1.py b/prophet1.py
 import pandas as pd
 import matplotlib.pyplot as plt
 from fbprophet import Prophet
 from datetime import datetime
 import psycopg2
 from sqlalchemy import create_engine

 #Read the Parquet file
 #df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet')

 # read from db
 alchemyEngine   = create_engine('postgresql+psycopg2://clj_user:[email protected]/volumebars', pool_recycle=3600);
 dbConnection    = alchemyEngine.connect();

 pair = 'BTCUSD'
 df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection);

 # Select specific columns, high, low
 selected_columns = df[['high', 'time']] 

 # Convert timestamp in milliseconds to pandas timestamp
 selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms')

 selected_columns = selected_columns.drop('time', axis=1)
 selected_columns.columns = ['y', 'ds']

 reordered_columns = ['ds', 'y']
 df_reordered = selected_columns[reordered_columns]

 # Show the resulting DataFrame
 print(df_reordered.tail(5))


 changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection)

 changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms')
 changepoints_df = changepoints_df.drop('time', axis=1)

 changepoints = [x[0] for x in changepoints_df.values.tolist()]

 model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9) 

 model.fit(df_reordered)

 future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly
 forecast = model.predict(future)

 print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8))
 fig = model.plot(forecast, xlabel='Date', 
                 ylabel=r'yhat')

 plt.title('price prediction - high')
 plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png")
diff --git a/xgboost.R b/xgboost.R
 # Load the 'arrow' package
 library(arrow)


 #TODO: extract function from the code below
 # Read data from a Parquet file
 parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet"

 data <- arrow::read_parquet(parquet_file)

 #make a data frame
 df = as.data.frame(data) 
 colnames(df)

 #build features, indicators and target class
 rsi = RSI(df$close, n=14, maType="WMA")
 adx = data.frame(ADX(df[,c("high","low","close")]))
 sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2))
 trend = df$Close - sar

 #create a lag
 rsi = c(NA,head(rsi,-1)) 
 adx$ADX = c(NA,head(adx$ADX,-1))
 trend = c(NA,head(trend,-1))
 price = df$close-df$open

 #target variable
 class = ifelse(price > 0,1,0)

 # Create a Matrix
 model_df = data.frame(class,rsi,adx$ADX,trend)
 model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class))
 model = na.omit(model)
 colnames(model) = c("class","rsi","adx","trend")

 # Split data into train and test sets 
 train_size = 2/3
 breakpoint = nrow(model) * train_size

 training_data = model[1:breakpoint,]
 test_data = model[(breakpoint+1):nrow(model),]

 # Split data training and test data into X and Y
 X_train = training_data[,2:4] ; Y_train = training_data[,1]
 class(X_train)[1]; class(Y_train)

 X_test = test_data[,2:4] ; Y_test = test_data[,1]
 class(X_test)[1]; class(Y_test)

 # Train the xgboost model using the "xgboost" function
 dtrain = xgb.DMatrix(data = X_train, label = Y_train)
 xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic")

 # Using cross validation
 dtrain = xgb.DMatrix(data = X_train, label = Y_train)
 cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic")

 # Make the predictions on the test data
 preds = predict(xgModel, X_test)

 # Determine the size of the prediction vector
 print(length(preds))

 # Limit display of predictions to the first 6
 print(head(preds))

 # Measuring model performance
 error_value = mean(as.numeric(preds > 0.5) != Y_test)
 print(paste("test-error=", error_value))

 prediction = as.numeric(preds > 0.5)
 print(head(prediction))

 # View feature importance from the learnt model
 importance_matrix = xgb.importance(model = xgModel)
 print(importance_matrix)

 #TODO: set device to print plots to png
 # View the trees from a model
 # xgb.plot.tree(model = xgModel)
 # View only the first tree in the XGBoost model
 # xgb.plot.tree(model = xgModel, n_first_tree = 1)
	import pandas as pd
	import matplotlib.pyplot as plt
	from fbprophet import Prophet
	from datetime import datetime
	import psycopg2
	from sqlalchemy import create_engine

	#Read the Parquet file
	#df = pd.read_parquet('./BTCUSD-dec12-may21-volume-bars.parquet')

	# read from db
	alchemyEngine = create_engine('postgresql+psycopg2://clj_user:[email protected]/volumebars', pool_recycle=3600);
	dbConnection = alchemyEngine.connect();

	pair = 'BTCUSD'
	df = pd.read_sql(f'select sum(volume) as volume, min(price) as low, max(price) as high, max(time) as time from volume_bars where pair=\'{pair}\' group by group_cumsum', dbConnection);

	# Select specific columns, high, low
	selected_columns = df[['high', 'time']]

	# Convert timestamp in milliseconds to pandas timestamp
	selected_columns['ds'] = pd.to_datetime(selected_columns['time'], unit='ms')

	selected_columns = selected_columns.drop('time', axis=1)
	selected_columns.columns = ['y', 'ds']

	reordered_columns = ['ds', 'y']
	df_reordered = selected_columns[reordered_columns]

	# Show the resulting DataFrame
	print(df_reordered.tail(5))


	changepoints_df = pd.read_sql(f'select time from changepoints where pair =\'{pair}\' and spread > 777', dbConnection)

	changepoints_df['ds'] = pd.to_datetime(changepoints_df['time'], unit='ms')
	changepoints_df = changepoints_df.drop('time', axis=1)

	changepoints = [x[0] for x in changepoints_df.values.tolist()]

	model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=1,changepoints=changepoints, changepoint_range=.9)

	model.fit(df_reordered)

	future = model.make_future_dataframe(periods=8, freq='30min') #15 min interval #'h' is hourly
	forecast = model.predict(future)

	print(forecast[['ds', 'yhat_lower', 'yhat', 'yhat_upper', 'trend']].tail(8))
	fig = model.plot(forecast, xlabel='Date',
	ylabel=r'yhat')

	plt.title('price prediction - high')
	plt.savefig(f"prophet-high-{datetime.now().strftime('%Y%m%d-%H%M%S')}.png")
	# Load the 'arrow' package
	library(arrow)


	#TODO: extract function from the code below
	# Read data from a Parquet file
	parquet_file <- "./BTCUSD-dec12-may21-volume-bars.parquet"

	data <- arrow::read_parquet(parquet_file)

	#make a data frame
	df = as.data.frame(data)
	colnames(df)

	#build features, indicators and target class
	rsi = RSI(df$close, n=14, maType="WMA")
	adx = data.frame(ADX(df[,c("high","low","close")]))
	sar = SAR(df[,c("high","low")], accel = c(0.02, 0.2))
	trend = df$Close - sar

	#create a lag
	rsi = c(NA,head(rsi,-1))
	adx$ADX = c(NA,head(adx$ADX,-1))
	trend = c(NA,head(trend,-1))
	price = df$close-df$open

	#target variable
	class = ifelse(price > 0,1,0)

	# Create a Matrix
	model_df = data.frame(class,rsi,adx$ADX,trend)
	model = matrix(c(class,rsi,adx$ADX,trend), nrow=length(class))
	model = na.omit(model)
	colnames(model) = c("class","rsi","adx","trend")

	# Split data into train and test sets
	train_size = 2/3
	breakpoint = nrow(model) * train_size

	training_data = model[1:breakpoint,]
	test_data = model[(breakpoint+1):nrow(model),]

	# Split data training and test data into X and Y
	X_train = training_data[,2:4] ; Y_train = training_data[,1]
	class(X_train)[1]; class(Y_train)

	X_test = test_data[,2:4] ; Y_test = test_data[,1]
	class(X_test)[1]; class(Y_test)

	# Train the xgboost model using the "xgboost" function
	dtrain = xgb.DMatrix(data = X_train, label = Y_train)
	xgModel = xgboost(data = dtrain, nround = 5, objective = "binary:logistic")

	# Using cross validation
	dtrain = xgb.DMatrix(data = X_train, label = Y_train)
	cv = xgb.cv(data = dtrain, nround = 10, nfold = 5, objective = "binary:logistic")

	# Make the predictions on the test data
	preds = predict(xgModel, X_test)

	# Determine the size of the prediction vector
	print(length(preds))

	# Limit display of predictions to the first 6
	print(head(preds))

	# Measuring model performance
	error_value = mean(as.numeric(preds > 0.5) != Y_test)
	print(paste("test-error=", error_value))

	prediction = as.numeric(preds > 0.5)
	print(head(prediction))

	# View feature importance from the learnt model
	importance_matrix = xgb.importance(model = xgModel)
	print(importance_matrix)

	#TODO: set device to print plots to png
	# View the trees from a model
	# xgb.plot.tree(model = xgModel)
	# View only the first tree in the XGBoost model
	# xgb.plot.tree(model = xgModel, n_first_tree = 1)