-
-
Save grx7/f2e22d768e057aadf7bbef9f14215251 to your computer and use it in GitHub Desktop.
SKLearn Linear Regression Stock Price Prediction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import numpy as np | |
import pandas as pd | |
import talib as ta | |
import pandas_datareader as web | |
from sklearn.linear_model import LinearRegression | |
from sklearn.linear_model import Ridge | |
from sklearn.model_selection import cross_val_score | |
from sklearn.model_selection import train_test_split | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.tree import DecisionTreeRegressor | |
def make_prediction(quotes_df, estimator): | |
# Make a copy of the dataframe so we don't modify the original | |
df = quotes_df.copy() | |
# Add the five day moving average technical indicator | |
df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0) | |
# Add the twenty day moving average technical indicator | |
df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0) | |
# Add the fifty day moving average technical indicator | |
df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0) | |
# Add the Bollinger Bands technical indicators | |
df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values, | |
timeperiod=20, nbdevup=2, nbdevdn=2, matype=0) | |
# Add the Relative strength index technical indicator | |
df['RSI'] = ta.RSI(df['Close'].values, 14) | |
# Add the Simple Moving Average (Fast & Slow) technical indicators | |
df['SMA_Fast'] = ta.SMA(df['Close'].values, 5) | |
df['SMA_Slow'] = ta.SMA(df['Close'].values, 20) | |
# Add the percent change of the daily closing price | |
df['ClosingPctChange'] = df['Close'].pct_change() | |
# Get today's record (the last record) so we can predict it later. Do this | |
# before we add the 'NextDayPrice' column so we don't have to drop it later | |
df_today = df.iloc[-1:, :].copy() | |
# Create a column of the next day's closing prices so we can train on it | |
# and then eventually predict the value | |
df['NextClose'] = df['Close'].shift(-1) | |
# Get rid of the rows that have NaNs | |
df.dropna(inplace=True) | |
# Decide which features to use for our regression. This will allow us to | |
# tweak things during testing | |
features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50', | |
'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange'] | |
# Create our target and labels | |
X = df[features_to_fit] | |
y = df['NextClose'] | |
# Create training and testing data sets | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, | |
random_state=42) | |
# Do ten-fold cross-validation and compute our average accuracy | |
cv = cross_val_score(estimator, X_test, y_test, cv=10) | |
print('Accuracy:', cv.mean()) | |
# Fit the regressor with the full dataset to be used with predictions | |
estimator.fit(X, y) | |
# Predict today's closing price | |
X_new = df_today[features_to_fit] | |
next_price_prediction = estimator.predict(X_new) | |
# Return the predicted closing price | |
return next_price_prediction | |
# Choose which company to predict | |
symbol = 'AAPL' | |
# Import a year's OHLCV data from Google using DataReader | |
quotes_df = web.data.DataReader(symbol, 'google') | |
# Predict the last day's closing price using linear regression | |
print('Unscaled Linear Regression:') | |
linreg = LinearRegression() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, linreg)) | |
# Predict the last day's closing price using Linear regression with scaled features | |
print('Scaled Linear Regression:') | |
pipe = make_pipeline(StandardScaler(), LinearRegression()) | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, pipe)) | |
# Predict the last day's closing price using ridge regression | |
print('Unscaled Ridge Regression:') | |
ridge = Ridge() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge)) | |
# Predict the last day's closing price using ridge regression and scaled features | |
print('Scaled Linear Regression:') | |
ridge_pipe = make_pipeline(StandardScaler(), Ridge()) | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge_pipe)) | |
# Predict the last day's closing price using decision tree regression | |
print('Unscaled Decision Tree Regressor:') | |
tree = DecisionTreeRegressor() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, tree)) | |
# Predict the last day's closing price using Gaussian Naive Bayes | |
print('Unscaled Gaussian Naive Bayes:') | |
nb = GaussianNB() | |
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, tree)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment