Skip to content

Instantly share code, notes, and snippets.

@raddy
Created October 29, 2013 23:57
Show Gist options
  • Select an option

  • Save raddy/7224845 to your computer and use it in GitHub Desktop.

Select an option

Save raddy/7224845 to your computer and use it in GitHub Desktop.
example shitty rf
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.cross_validation import train_test_split
import pandas as pd
import numpy as np
def training_to_XY(hi,side='buy'):
X = pd.DataFrame(hi.implied_size)
if side == "buy":
X['Through2'] = hi.implied_trd-hi.AskPrice2
X['Through5'] = hi.implied_trd-hi.AskPrice5
X['Support2'] = hi.implied_trd-hi.BidPrice2
X['Support5'] = hi.implied_trd-hi.BidPrice5
X['bsize'] = hi.bidsize1
X['asize'] = hi.asksize1
X['face'] = deltas_in_face(hi.implied_trd,hi.ix[:,['AskPrice1','AskSize1','AskPrice2','AskSize2','AskPrice3','AskSize3','AskPrice4','AskSize4','AskPrice5','AskSize5',]].values,1)
X['supporting'] = deltas_in_face(hi.implied_trd,hi.ix[:,['BidPrice1','BidSize1','BidPrice2','BidSize2','BidPrice3','BidSize3','BidPrice4','BidSize4','BidPrice5','BidSize5',]].values,-1)
X['prev'] = hi.prev_tick
X['bigsize'] = hi.implied_size > hi.asksize1
else:
X['Through2'] = hi.implied_trd-hi.BidPrice2
X['Through5'] = hi.implied_trd-hi.BidPrice5
X['Support2'] = hi.implied_trd-hi.AskPrice2
X['Support5'] = hi.implied_trd-hi.AskPrice5
X['bsize'] = hi.bidsize1
X['asize'] = hi.asksize1
X['face'] = deltas_in_face(hi.implied_trd,hi.ix[:,['BidPrice1','BidSize1','BidPrice2','BidSize2','BidPrice3','BidSize3','BidPrice4','BidSize4','BidPrice5','BidSize5',]].values,-1)
X['supporting'] = deltas_in_face(hi.implied_trd,hi.ix[:,['AskPrice1','AskSize1','AskPrice2','AskSize2','AskPrice3','AskSize3','AskPrice4','AskSize4','AskPrice5','AskSize5',]].values,1)
X['prev'] = hi.prev_tick
X['bigsize'] = hi.implied_size > hi.asksize1
Y = hi.tick
return X,Y
def quick_eval(X,Y):
X_train, X_test, Y_train, Y_test = train_test_split(X.values, Y.values, random_state=0,test_size=.4)
forest = RandomForestClassifier(n_estimators = 100,compute_importances=True)
Y_pred = forest.fit(X_train,Y_train).predict(X_test)
cm = confusion_matrix(Y_test,Y_pred)
print(cm)
matshow(cm)
title('Confusion matrix')
colorbar()
ylabel('True label')
xlabel('Predicted label')
show()
print pd.DataFrame(zip(X.columns,forest.feature_importances_)).sort(columns=1)
training_store = pd.HDFStore('training_slugger.h5')
X,Y = training_to_XY(training_store['buys'],'buy')
quick_eval(X,Y)
X,Y = training_to_XY(training_store['sells'],'sell')
quick_eval(X,Y)
training_store.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment