Skip to content

Instantly share code, notes, and snippets.

@grx7
Forked from greencoder/predict.py
Created May 3, 2018 18:18
Show Gist options
  • Save grx7/f2e22d768e057aadf7bbef9f14215251 to your computer and use it in GitHub Desktop.
Save grx7/f2e22d768e057aadf7bbef9f14215251 to your computer and use it in GitHub Desktop.
SKLearn Linear Regression Stock Price Prediction
from __future__ import print_function
import numpy as np
import pandas as pd
import talib as ta
import pandas_datareader as web
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
def make_prediction(quotes_df, estimator):
# Make a copy of the dataframe so we don't modify the original
df = quotes_df.copy()
# Add the five day moving average technical indicator
df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)
# Add the twenty day moving average technical indicator
df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)
# Add the fifty day moving average technical indicator
df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)
# Add the Bollinger Bands technical indicators
df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values,
timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
# Add the Relative strength index technical indicator
df['RSI'] = ta.RSI(df['Close'].values, 14)
# Add the Simple Moving Average (Fast & Slow) technical indicators
df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)
# Add the percent change of the daily closing price
df['ClosingPctChange'] = df['Close'].pct_change()
# Get today's record (the last record) so we can predict it later. Do this
# before we add the 'NextDayPrice' column so we don't have to drop it later
df_today = df.iloc[-1:, :].copy()
# Create a column of the next day's closing prices so we can train on it
# and then eventually predict the value
df['NextClose'] = df['Close'].shift(-1)
# Get rid of the rows that have NaNs
df.dropna(inplace=True)
# Decide which features to use for our regression. This will allow us to
# tweak things during testing
features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']
# Create our target and labels
X = df[features_to_fit]
y = df['NextClose']
# Create training and testing data sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
random_state=42)
# Do ten-fold cross-validation and compute our average accuracy
cv = cross_val_score(estimator, X_test, y_test, cv=10)
print('Accuracy:', cv.mean())
# Fit the regressor with the full dataset to be used with predictions
estimator.fit(X, y)
# Predict today's closing price
X_new = df_today[features_to_fit]
next_price_prediction = estimator.predict(X_new)
# Return the predicted closing price
return next_price_prediction
# Choose which company to predict
symbol = 'AAPL'
# Import a year's OHLCV data from Google using DataReader
quotes_df = web.data.DataReader(symbol, 'google')
# Predict the last day's closing price using linear regression
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, linreg))
# Predict the last day's closing price using Linear regression with scaled features
print('Scaled Linear Regression:')
pipe = make_pipeline(StandardScaler(), LinearRegression())
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, pipe))
# Predict the last day's closing price using ridge regression
print('Unscaled Ridge Regression:')
ridge = Ridge()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge))
# Predict the last day's closing price using ridge regression and scaled features
print('Scaled Linear Regression:')
ridge_pipe = make_pipeline(StandardScaler(), Ridge())
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge_pipe))
# Predict the last day's closing price using decision tree regression
print('Unscaled Decision Tree Regressor:')
tree = DecisionTreeRegressor()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, tree))
# Predict the last day's closing price using Gaussian Naive Bayes
print('Unscaled Gaussian Naive Bayes:')
nb = GaussianNB()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, tree))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment