Skip to content

Instantly share code, notes, and snippets.

@PatrickAlphaC
Last active February 11, 2020 22:24
Show Gist options
  • Select an option

  • Save PatrickAlphaC/97dcc47032aaff0248202bdca74e07ce to your computer and use it in GitHub Desktop.

Select an option

Save PatrickAlphaC/97dcc47032aaff0248202bdca74e07ce to your computer and use it in GitHub Desktop.
Transform data to be digestable by scikit-learn
TRADING_DAYS_IN_2019 = 252
TODAY_NUMERIC_VALUE = 0
from datetime import datetime
end = datetime.strptime('2019-02-07', '%Y-%m-%d')
shape = 40000
train_valid_tickers = 2392
test_valid_tickers = 800
def transform_data(test_or_train_set, valid_tickers):
y_set = []
x_set = np.zeros((valid_tickers,shape))
index = 0
ticker_map = []
for ticker in test_or_train_set:
if len(ticker[0]) > TRADING_DAYS_IN_2019 + 1:
y_set.append([ticker[0]['5. adjusted close'].to_numpy()[TODAY_NUMERIC_VALUE] > ticker[0]['5. adjusted close'].to_numpy()[TRADING_DAYS_IN_2019]])
values = ticker[0].loc[ticker[0].index < end].values.flatten()
internal_index = 0
for value in values:
x_set[index][internal_index] = value
internal_index = internal_index + 1
index = index + 1
ticker_map += [ticker[1]['2. Symbol']]
return x_set, np.asarray(y_set).reshape((valid_tickers,)), ticker_map
x_train_set, y_train_set, train_ticker_map = transform_data(train_set, train_valid_tickers)
x_test_set, y_test_set, test_ticker_map = transform_data(test_set, test_valid_tickers)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment