Created
January 7, 2018 23:22
-
-
Save dast1/0fd4e3aa3888f6b0de020dc433f7b746 to your computer and use it in GitHub Desktop.
This is a multiprocessing enabled batch Yahoo Stock Price downloader.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Choose Ticker Universe (example: Run russell3000.py) | |
exec(open("russell3000(v.1.0).py").read()) | |
# Import Libraries | |
import datetime as dt | |
import pandas_datareader as web | |
import multiprocessing as multi | |
import numpy as np | |
import math | |
# Build Yahoo fetch data function | |
def fetch_data(symbol): | |
df = web.DataReader(symbol, 'yahoo', start, end) | |
f_path = 'EOD_data/' | |
df.to_csv(f_path + symbol + '.csv') | |
# Build looping function | |
def loop(subset): | |
for ticker in subset: | |
try: | |
fetch_data(ticker) | |
print(ticker + '(%d): success!' % subset.index(ticker) ) | |
except: | |
print(ticker + '(%d): failed!' % subset.index(ticker) ) | |
if subset.index(ticker) == 5: | |
break # break here: | |
# Set default start date and end date | |
start = dt.datetime(1980,1,1) | |
end = dt.datetime.today() | |
# Split work among CPUs workers | |
num_workers = mp.cpu_count()-1 | |
par_start_idx = list(np.arange(0, len(Russell3000), round(len(Russell3000)/num_workers))) | |
par_end_idx = par_start_idx[1:] | |
par_end_idx.append(len(Russell3000)) | |
# Bring it all together | |
if __name__ == '__main__': | |
for i in range(num_workers): | |
chunk = Russell3000[par_start_idx[i]:par_end_idx[i]] | |
p = multi.Process(target = loop, args = (chunk,)) | |
p.start() | |
del i, chunk, start, end, par_end_idx, par_start_idx | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment