-
-
Save greencoder/ab37304b6d47e6d1e55b4adf96ea7b47 to your computer and use it in GitHub Desktop.
from __future__ import print_function | |
import numpy as np | |
import pandas as pd | |
import talib as ta | |
import pandas_datareader as web | |
from sklearn.linear_model import LinearRegression | |
from sklearn.linear_model import Ridge | |
from sklearn.model_selection import cross_val_score | |
from sklearn.model_selection import train_test_split | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.tree import DecisionTreeRegressor | |
def make_prediction(quotes_df, estimator): | |
# Make a copy of the dataframe so we don't modify the original | |
df = quotes_df.copy() | |
# Add the five day moving average technical indicator | |
df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0) | |
# Add the twenty day moving average technical indicator | |
df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0) | |
# Add the fifty day moving average technical indicator | |
df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0) | |
# Add the Bollinger Bands technical indicators | |
df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, | |
timeperiod=20, nbdevup=2, nbdevdn=2, matype=0) | |
# Add the Relative strength index technical indicator | |
df['RSI'] = ta.RSI(df['Close'].values, 14) | |
# Add the Simple Moving Average (Fast & Slow) technical indicators | |
df['SMA_Fast'] = ta.SMA(df['Close'].values, 5) | |
df['SMA_Slow'] = ta.SMA(df['Close'].values, 20) | |
# Add the percent change of the daily closing price | |
df['ClosingPctChange'] = df['Close'].pct_change() | |
# Get today's record (the last record) so we can predict it later. Do this | |
# before we add the 'NextDayPrice' column so we don't have to drop it later | |
df_today = df.iloc[-1:, :].copy() | |
# Create a column of the next day's closing prices so we can train on it | |
# and then eventually predict the value | |
df['NextClose'] = df['Close'].shift(-1) | |
# Get rid of the rows that have NaNs | |
df.dropna(inplace=True) | |
# Decide which features to use for our regression. This will allow us to | |
# tweak things during testing | |
features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50', | |
'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange'] | |
# Create our target and labels | |
X = df[features_to_fit] | |
y = df['NextClose'] | |
# Create training and testing data sets | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, | |
random_state=42) | |
# Do ten-fold cross-validation and compute our average accuracy | |
cv = cross_val_score(estimator, X_test, y_test, cv=10) | |
print('Accuracy:', cv.mean()) | |
# Fit the regressor with the full dataset to be used with predictions | |
estimator.fit(X, y) | |
# Predict today's closing price | |
X_new = df_today[features_to_fit] | |
next_price_prediction = estimator.predict(X_new) | |
# Return the predicted closing price | |
return next_price_prediction | |
# Choose which company to predict | |
symbol = 'AAPL' | |
# Import a year's OHLCV data from Google using DataReader | |
quotes_df = web.data.DataReader(symbol, 'google') | |
# Predict the last day's closing price using linear regression | |
print('Unscaled Linear Regression:') | |
linreg = LinearRegression() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, linreg)) | |
# Predict the last day's closing price using Linear regression with scaled features | |
print('Scaled Linear Regression:') | |
pipe = make_pipeline(StandardScaler(), LinearRegression()) | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, pipe)) | |
# Predict the last day's closing price using ridge regression | |
print('Unscaled Ridge Regression:') | |
ridge = Ridge() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge)) | |
# Predict the last day's closing price using ridge regression and scaled features | |
print('Scaled Linear Regression:') | |
ridge_pipe = make_pipeline(StandardScaler(), Ridge()) | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge_pipe)) | |
# Predict the last day's closing price using decision tree regression | |
print('Unscaled Decision Tree Regressor:') | |
tree = DecisionTreeRegressor() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, tree)) | |
# Predict the last day's closing price using Gaussian Naive Bayes | |
print('Unscaled Gaussian Naive Bayes:') | |
nb = GaussianNB() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, nb)) |
If anyone comes across this gist and is interested in trading notes and implementation ideas, please let me know. I'm getting high accuracy on my test set, but real-world performance is not great.
The Gaussian Naive Bayes predictor is defined on line 117, but is not passed to 'make_prediction' on line 118.
Correcting that results in a compile error 'ValueError: Unknown label type....'
Can you fix this?
I want to implement a project to find (predict) points scored by each player for the coming gameweek. I am targeting Fantasy Premier League(FPL) for this. What I exactly want to do is using previous data based on some features the model should predict the total points to be scored by a player. Example : Consider a player - Lukaku. Suppose for Lukaku, I have data for 28 game weeks and I want to find out how many points would Lukaku score for 29th game week(consider factors like goals scored, assists, minutes played, etc).
I had thought about implementing Linear Regression for which I can train the model for 28 game weeks but then I dont know how or what should be my input for the 29th week so as to predict points for that week.
Can you help me with this?
I got Bayesian technical paper but needs to be coded.. Wanna collab on it? :)
Adapted from here:
https://www.reddit.com/r/stocks/comments/5mfdjk/howto_technical_trading_using_python_and_machine/