Skip to content

Instantly share code, notes, and snippets.

@marcosan93
Last active November 12, 2021 01:44
Show Gist options
  • Select an option

  • Save marcosan93/d1df198874f590ce823afb0bc8e4c9e0 to your computer and use it in GitHub Desktop.

Select an option

Save marcosan93/d1df198874f590ce823afb0bc8e4c9e0 to your computer and use it in GitHub Desktop.
def fitModels(new_df, tuning=False):
"""
From a list of models. Fit and trains them to the dataframe.
Returns the fitted models.
"""
# Models
models = {
"adaboost":AdaBoostClassifier(random_state=11),
"gradboost":GradientBoostingClassifier(random_state=11),
"randomforest":RandomForestClassifier(random_state=11),
"knn":KNeighborsClassifier(),
"logreg":LogisticRegression(solver='liblinear'),
"naivebayes":GaussianNB(),
"svm":SVC(),
"dectree": DecisionTreeClassifier()
}
# Model parameters (to be tuned if needed)
params = {
"adaboost": {
"n_estimators": [50, 100, 200],
"learning_rate": [1.0, .1, .01]
},
"gradboost":{
"n_estimators": [50, 100, 200],
"learning_rate": [1.0, .1, .01]
},
"randomforest":{
"n_estimators": [50, 100, 200],
"criterion": ['gini', 'entropy']
},
"knn":{
"n_neighbors": [5, 10, 20, 50]
},
"logreg":{
"penalty": ["l1", "l2", "elasticnet", "none"]
},
"naivebayes":{},
"svm":{
"kernel": ["linear", "poly", "rbf", "sigmoid"]
},
"dectree": {
"criterion": ['gini', 'entropy'],
"splitter": ['best', "random"]
}
}
# Fitted models
fitted = {}
# Variables
X = new_df.drop('decision', axis=1)
y = new_df['decision']
# Fitting each model
for model_name, model in models.items():
# Instantiating model
clf = model
if tuning:
# Grid searching the model if wanted
clf = GridSearchCV(
clf,
params[model_name],
verbose=3,
scoring='f1_macro'
)
clf.fit(X, y)
clf = clf.best_estimator_
else:
clf.fit(X, y)
# Adding each fitted model to the fitted dictionary
fitted[model_name] = clf
return fitted
def backtestModels(ticker, days_to_backtest, days_to_train, api_key):
"""
Backtests multiple classification models based on the given crypto ticker. Range of
backtest is dependent on days_to_backtest. Training data is dependent on days_to_train.
"""
# Getting Data
df = getCryptoPrice(
api_key=api_key,
ticker=ticker,
n_days=days_to_backtest+days_to_train
)
# Creating a DF for predictions
pred_df = pd.DataFrame()
# Iterating through the DF starting from the least amount of days to train on
for i in tqdm(range(days_to_train, len(df)+1)):
# Training DF with new data added everyday
train_df = df[:i]
# Tranforming the Training DF
t_df, last_val = transformData(train_df)
# Rebalancing the data
t_df = balanceDecisions(t_df)
# Fitting models
fitted_models = fitModels(t_df)
# Setting the new index to classify
pred = pd.DataFrame(index=last_val.index+pd.offsets.Day(1))
# Making classifications for the very next day
for model_name, model in fitted_models.items():
pred[model_name] = model.predict(last_val)
# Appending the classifications to the dataframe
pred_df = pred_df.append(pred)
# Random strategy
pred_df['random_predict'] = random.choices([1,0], k=len(pred_df))
# The Buy and Hold Strategy
pred_df['buy_hold'] = 1
# Getting log returns from the original DF
log_returns = df['Open'].tail(len(pred_df)).apply(np.log).diff()
# Dropping any Nans and compensating for lookahead bias
pred_df = pred_df.shift(1).dropna()
# Performing the backtest
returns = pred_df.multiply(log_returns, axis=0)
# Inversing the log returns and getting daily portfolio balance
performance = returns.cumsum().apply(np.exp)
return performance
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment