Created
October 21, 2018 12:27
-
-
Save kmlebedev/8511ee5f7a70957cac51a46d1ca029e8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from pandas_datareader import data | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import MinMaxScaler | |
from sklearn.neural_network import MLPClassifier | |
from sklearn.metrics import classification_report,confusion_matrix | |
from datetime import datetime | |
def stat(X_test, predictions, y_test, position_sizing, accuracy): | |
X_start = X_test.ix[:, 0] | |
Trade_test = (y_test / accuracy - X_start) | |
Trade_pred = (predictions / accuracy - X_start) | |
Trade_diff = pd.concat([Trade_test, Trade_pred, X_start, y_test/accuracy], axis=1) | |
summ = 0 | |
summ_n = 0 | |
trend = 0 | |
trend_false = 0 | |
for row in Trade_diff.iterrows(): | |
trend += 1 | |
if row[1].values[0] > 0 and row[1].values[1] > 0: | |
summ += (row[1].values[0] / row[1].values[2]) * position_sizing / 100 | |
elif row[1].values[0] < 0 and row[1].values[1] < 0: | |
summ -= (row[1].values[0] / row[1].values[2]) * position_sizing / 100 | |
else: | |
summ -= (row[1].values[0].__abs__() / row[1].values[2]) * position_sizing / 100 | |
summ_n += (row[1].values[0].__abs__() / row[1].values[2]) * position_sizing / 100 | |
trend_false += 1 | |
print("count {}, average absolute {}% , profit {}% , loss {}% , trend failing {}%".format( | |
len(Trade_diff), | |
round((predictions - y_test).abs().div(predictions).sum() / predictions.size * 100, 2), | |
round(summ*100, 2), round(summ_n*100, 2), round(trend_false/trend*100, 2))) | |
#stock = data.DataReader('SBER', 'moex', start='2004-01-01', end='2018-09-30') | |
#stock_TQBR = stock[stock.BOARDID == 'TQBR'] | |
#X = stock_TQBR['CLOSE'].dropna() | |
#SBER_040101_181002 | |
# http://export.finam.ru/MGNT_080101_181002.csv?market=1&em=17086&code=MGNT&apply=0&df=1&mf=0&yf=2008&from=01.01.2008&dt=2&mt=9&yt=2018&to=09.10.2018&p=8&f=MGNT_080101_181002&e=.csv&cn=MGNT&dtf=1&tmf=1&MSOR=1&mstime=on&mstimever=1&sep=1&sep2=1&datf=5&at=1 | |
X = pd.read_csv( | |
'~/Downloads/MGNT_080101_181009.csv', | |
parse_dates=[0], index_col=0 | |
)['<CLOSE>'].astype(float) | |
accuracy = 10 | |
prediction_days = 10 | |
window_days = 200 | |
test_tail_days = 0 | |
predic_tail_days = 5 | |
X_window = pd.concat( | |
[X.shift(w) for w in range(0, window_days, 1)], axis=1 | |
) | |
# Add day of week | |
#X_dayofweek = X.index.dayofweek.astype(int) | |
#for d in range(5): | |
# X_window.insert(loc=d+1, column='dayofweek{}'.format(d), value=np.where(X_dayofweek==d, 1, 0)) | |
y = X.shift(-prediction_days)*accuracy | |
X_train, X_test, y_train, y_test = train_test_split( | |
X_window.iloc[window_days-1:-prediction_days-test_tail_days], | |
y.iloc[window_days-1:-prediction_days-test_tail_days].astype(int), | |
test_size = 0.15, | |
shuffle=True | |
) | |
X_test = X_test.append(X_window.iloc[-prediction_days-test_tail_days:-prediction_days]) | |
y_test = y_test.append(y.iloc[-prediction_days-test_tail_days:-prediction_days].astype(int)) | |
X_predic = X_window[-predic_tail_days:] | |
# Fit only to the training data | |
# Now apply the transformations to the data: | |
scaler = MinMaxScaler() | |
scaler.fit(X.values.reshape(-1,1)) | |
X_train_norm = scaler.transform(X_train) | |
#Reverse | |
#X_train_norm = scaler.transform(X_train.reindex(index=X_train.index[::-1])) | |
#y = y.reindex(index=y.index[::-1]) | |
X_test_norm = scaler.transform(X_test) | |
X_predic_norm = scaler.transform(X_predic) | |
# train our model | |
mlp = MLPClassifier(activation='relu', solver='lbfgs', max_iter=1100, alpha=0.015, hidden_layer_sizes=(375), | |
random_state=1) | |
mlp.fiat(X_train_norm, y_train) | |
predictions = mlp.predict(X_test_norm) | |
new_predictions = mlp.predict(X_predic_norm) | |
stat(X_test, predictions, y_test, 25, accuracy) | |
#print((predictions-y_test).sort_index().tail()/100) | |
#print((predictions-y_test).div(predictions).abs().mul(100) .describe()) | |
#print((predictions - y_test).abs().div(predictions).sum() / predictions.size * 100) | |
# mlp = MLPClassifier(activation='relu', solver='lbfgs',max_iter=1000, alpha=0.0001, hidden_layer_sizes=(200) | |
# 2.7549654896139275 | |
# logistic | |
# total % 273.2222664780387, negativ % 144.6650856196263, trend % 30.252100840336134 | |
# relu test_size 0.5 alpha=0.015 hidden_layer_sizes=375 max_iter=1100 | |
# count 596, average absolute 1.83% , profit 707.8% , loss 13.86% , trend failing 8.56% | |
# count 598, average absolute 1.88% , profit 723.24% , loss 22.78% , trend failing 10.87% | |
# relu test_size 0.75 alpha=0.015 hidden_layer_sizes = 375 max_iter 1000 | |
# count 894, average absolute 2.38% , profit 1004.97% , loss 66.52% , trend failing 12.98% | |
# relu test_size 0.75 alpha=0.015 hidden_layer_sizes = 375 max_iter 1100 | |
# count 894, average absolute 2.11% , profit 996.95% , loss 60.53% , trend failing 13.31% | |
# w160 count 862, average absolute 2.17% , profit 964.68% , loss 57.2% , trend failing 14.15% | |
# w180 count 848, average absolute 2.2% , profit 969.73% , loss 46.88% , trend failing 12.74% | |
# w220 count 820, average absolute 2.64% , profit 916.49% , loss 76.91% , trend failing 15.24% | |
# 25 days | |
# count 883, average absolute 3.15% , profit 1700.48% , loss 86.18% , trend failing 11.1% | |
# 20 days random | |
# count 886, average absolute 2.47% , profit 1627.53% , loss 26.85% , trend failing 8.47% | |
# count 886, average absolute 2.65% , profit 1528.17% , loss 50.05% , trend failing 10.72% | |
# count 591, average absolute 1.94% , profit 1045.88% , loss 21.53% , trend failing 8.97% | |
# 15 days | |
# count 594, average absolute 1.8% , profit 365.24% , loss 10.05% , trend failing 10.1% | |
# count 594, average absolute 1.95% , profit 395.5% , loss 4.95% , trend failing 5.89% | |
# 10 days | |
# count 894, average absolute 2.46% , profit 970.95% , loss 81.8% , trend failing 14.99% | |
# count 596, average absolute 1.75% , profit 281.79% , loss 7.06% , trend failing 9.9% | |
# count 597, average absolute 1.73% , profit 287.0% , loss 8.33% , trend failing 12.06% | |
# dayofweek count 596, average absolute 4.07% , profit 642.56% , loss 77.19% , trend failing 17.79% | |
# dayofweek count 596, average absolute 4.38% , profit 582.84% , loss 88.09% , trend failing 20.13% | |
# 5 days random | |
# count 838, average absolute 2.3% , profit 522.07% , loss 91.74% , trend failing 21.72% | |
# count 838, average absolute 2.09% , profit 539.57% , loss 84.15% , trend failing 19.21% | |
# w250 count 803, average absolute 2.47% , profit 438.98% , loss 121.09% , trend failing 24.41% | |
# w180 count 852, average absolute 2.29% , profit 558.84% , loss 81.27% , trend failing 20.07% | |
# MGNT | |
# count 1243, average absolute 2.31% , profit 1580.98% , loss 56.97% , trend failing 11.91% | |
# count 621, average absolute 1.62% , profit 764.6% , loss 20.59% , trend failing 10.31% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment