marcosan93 · November 12, 2021 01:44
diff --git a/classification_crypto_backtest_fit.py b/classification_crypto_backtest_fit.py
 def fitModels(new_df, tuning=False):
    """
    From a list of models. Fit and trains them to the dataframe.
    Returns the fitted models.
    """
    # Models
    models = {
        "adaboost":AdaBoostClassifier(random_state=11), 
        "gradboost":GradientBoostingClassifier(random_state=11), 
        "randomforest":RandomForestClassifier(random_state=11), 
        "knn":KNeighborsClassifier(),
        "logreg":LogisticRegression(solver='liblinear'), 
        "naivebayes":GaussianNB(), 
        "svm":SVC(),
        "dectree": DecisionTreeClassifier()
    }
    
    # Model parameters (to be tuned if needed)
    params = {
        "adaboost": {
            "n_estimators": [50, 100, 200],
            "learning_rate": [1.0, .1, .01]
        }, 
        "gradboost":{
            "n_estimators": [50, 100, 200],
            "learning_rate": [1.0, .1, .01]
        }, 
        "randomforest":{
            "n_estimators": [50, 100, 200],
            "criterion": ['gini', 'entropy']
        }, 
        "knn":{
            "n_neighbors": [5, 10, 20, 50]
        },
        "logreg":{
            "penalty": ["l1", "l2", "elasticnet", "none"]
        }, 
        "naivebayes":{}, 
        "svm":{
            "kernel": ["linear", "poly", "rbf", "sigmoid"]
        },
        "dectree": {
            "criterion": ['gini', 'entropy'],
            "splitter": ['best', "random"]
        }
    }
    
    # Fitted models
    fitted = {}
    
    # Variables
    X = new_df.drop('decision', axis=1)

    y = new_df['decision']
    
    # Fitting each model
    for model_name, model in models.items():

        # Instantiating model
        clf = model
        
        if tuning:
            
            # Grid searching the model if wanted
            clf = GridSearchCV(
                clf, 
                params[model_name], 
                verbose=3, 
                scoring='f1_macro'
            )

            clf.fit(X, y)
            
            clf = clf.best_estimator_
        
        else:
            clf.fit(X, y)
        
        # Adding each fitted model to the fitted dictionary
        fitted[model_name] = clf

    return fitted
  
  
 def backtestModels(ticker, days_to_backtest, days_to_train, api_key):
    """
    Backtests multiple classification models based on the given crypto ticker. Range of 
    backtest is dependent on days_to_backtest. Training data is dependent on days_to_train.
    """

    # Getting Data 
    df = getCryptoPrice(
        api_key=api_key,
        ticker=ticker, 
        n_days=days_to_backtest+days_to_train
    )

    # Creating a DF for predictions
    pred_df = pd.DataFrame()

    # Iterating through the DF starting from the least amount of days to train on
    for i in tqdm(range(days_to_train, len(df)+1)):

        # Training DF with new data added everyday
        train_df = df[:i]

        # Tranforming the Training DF
        t_df, last_val = transformData(train_df)
        
        # Rebalancing the data
        t_df = balanceDecisions(t_df)

        # Fitting models
        fitted_models = fitModels(t_df)
        
        # Setting the new index to classify
        pred = pd.DataFrame(index=last_val.index+pd.offsets.Day(1))

        # Making classifications for the very next day
        for model_name, model in fitted_models.items():
                        
            pred[model_name] = model.predict(last_val)

        # Appending the classifications to the dataframe
        pred_df = pred_df.append(pred)
        
    # Random strategy
    pred_df['random_predict'] = random.choices([1,0], k=len(pred_df))
        
    # The Buy and Hold Strategy
    pred_df['buy_hold'] = 1

    # Getting log returns from the original DF
    log_returns = df['Open'].tail(len(pred_df)).apply(np.log).diff()
    
    # Dropping any Nans and compensating for lookahead bias
    pred_df = pred_df.shift(1).dropna()

    # Performing the backtest
    returns = pred_df.multiply(log_returns, axis=0)
    
    # Inversing the log returns and getting daily portfolio balance
    performance = returns.cumsum().apply(np.exp)
    
    return performance
	def fitModels(new_df, tuning=False):
	"""
	From a list of models. Fit and trains them to the dataframe.
	Returns the fitted models.
	"""
	# Models
	models = {
	"adaboost":AdaBoostClassifier(random_state=11),
	"gradboost":GradientBoostingClassifier(random_state=11),
	"randomforest":RandomForestClassifier(random_state=11),
	"knn":KNeighborsClassifier(),
	"logreg":LogisticRegression(solver='liblinear'),
	"naivebayes":GaussianNB(),
	"svm":SVC(),
	"dectree": DecisionTreeClassifier()
	}

	# Model parameters (to be tuned if needed)
	params = {
	"adaboost": {
	"n_estimators": [50, 100, 200],
	"learning_rate": [1.0, .1, .01]
	},
	"gradboost":{
	"n_estimators": [50, 100, 200],
	"learning_rate": [1.0, .1, .01]
	},
	"randomforest":{
	"n_estimators": [50, 100, 200],
	"criterion": ['gini', 'entropy']
	},
	"knn":{
	"n_neighbors": [5, 10, 20, 50]
	},
	"logreg":{
	"penalty": ["l1", "l2", "elasticnet", "none"]
	},
	"naivebayes":{},
	"svm":{
	"kernel": ["linear", "poly", "rbf", "sigmoid"]
	},
	"dectree": {
	"criterion": ['gini', 'entropy'],
	"splitter": ['best', "random"]
	}
	}

	# Fitted models
	fitted = {}

	# Variables
	X = new_df.drop('decision', axis=1)

	y = new_df['decision']

	# Fitting each model
	for model_name, model in models.items():

	# Instantiating model
	clf = model

	if tuning:

	# Grid searching the model if wanted
	clf = GridSearchCV(
	clf,
	params[model_name],
	verbose=3,
	scoring='f1_macro'
	)

	clf.fit(X, y)

	clf = clf.best_estimator_

	else:
	clf.fit(X, y)

	# Adding each fitted model to the fitted dictionary
	fitted[model_name] = clf

	return fitted


	def backtestModels(ticker, days_to_backtest, days_to_train, api_key):
	"""
	Backtests multiple classification models based on the given crypto ticker. Range of
	backtest is dependent on days_to_backtest. Training data is dependent on days_to_train.
	"""

	# Getting Data
	df = getCryptoPrice(
	api_key=api_key,
	ticker=ticker,
	n_days=days_to_backtest+days_to_train
	)

	# Creating a DF for predictions
	pred_df = pd.DataFrame()

	# Iterating through the DF starting from the least amount of days to train on
	for i in tqdm(range(days_to_train, len(df)+1)):

	# Training DF with new data added everyday
	train_df = df[:i]

	# Tranforming the Training DF
	t_df, last_val = transformData(train_df)

	# Rebalancing the data
	t_df = balanceDecisions(t_df)

	# Fitting models
	fitted_models = fitModels(t_df)

	# Setting the new index to classify
	pred = pd.DataFrame(index=last_val.index+pd.offsets.Day(1))

	# Making classifications for the very next day
	for model_name, model in fitted_models.items():

	pred[model_name] = model.predict(last_val)

	# Appending the classifications to the dataframe
	pred_df = pred_df.append(pred)

	# Random strategy
	pred_df['random_predict'] = random.choices([1,0], k=len(pred_df))

	# The Buy and Hold Strategy
	pred_df['buy_hold'] = 1

	# Getting log returns from the original DF
	log_returns = df['Open'].tail(len(pred_df)).apply(np.log).diff()

	# Dropping any Nans and compensating for lookahead bias
	pred_df = pred_df.shift(1).dropna()

	# Performing the backtest
	returns = pred_df.multiply(log_returns, axis=0)

	# Inversing the log returns and getting daily portfolio balance
	performance = returns.cumsum().apply(np.exp)

	return performance
No results found