Last active
February 11, 2020 22:24
-
-
Save PatrickAlphaC/97dcc47032aaff0248202bdca74e07ce to your computer and use it in GitHub Desktop.
Transform data to be digestable by scikit-learn
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| TRADING_DAYS_IN_2019 = 252 | |
| TODAY_NUMERIC_VALUE = 0 | |
| from datetime import datetime | |
| end = datetime.strptime('2019-02-07', '%Y-%m-%d') | |
| shape = 40000 | |
| train_valid_tickers = 2392 | |
| test_valid_tickers = 800 | |
| def transform_data(test_or_train_set, valid_tickers): | |
| y_set = [] | |
| x_set = np.zeros((valid_tickers,shape)) | |
| index = 0 | |
| ticker_map = [] | |
| for ticker in test_or_train_set: | |
| if len(ticker[0]) > TRADING_DAYS_IN_2019 + 1: | |
| y_set.append([ticker[0]['5. adjusted close'].to_numpy()[TODAY_NUMERIC_VALUE] > ticker[0]['5. adjusted close'].to_numpy()[TRADING_DAYS_IN_2019]]) | |
| values = ticker[0].loc[ticker[0].index < end].values.flatten() | |
| internal_index = 0 | |
| for value in values: | |
| x_set[index][internal_index] = value | |
| internal_index = internal_index + 1 | |
| index = index + 1 | |
| ticker_map += [ticker[1]['2. Symbol']] | |
| return x_set, np.asarray(y_set).reshape((valid_tickers,)), ticker_map | |
| x_train_set, y_train_set, train_ticker_map = transform_data(train_set, train_valid_tickers) | |
| x_test_set, y_test_set, test_ticker_map = transform_data(test_set, test_valid_tickers) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment