Created
February 8, 2014 20:28
-
-
Save dougvk/8889701 to your computer and use it in GitHub Desktop.
Ingest, scrub, and store intra-day yahoo financial ticker information in pandas DataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Import and process yahoo data | |
""" | |
from cStringIO import StringIO | |
import requests | |
from datetime import datetime | |
from pandas import DataFrame | |
from pandas.io.parsers import read_table | |
ticker_dfs = [] | |
source_label = 'yahoo_hist' | |
tickers = locals()['tickers'] | |
d = datetime.utcnow().date() | |
today = datetime.combine(d, datetime.min.time()) | |
columns = ['open', 'high', 'low', 'volume', 'close'] | |
# Helper function for 'ingest_yahoo_hist' method | |
def build_yahoo_hist_url(ticker): | |
url = 'http://download.finance.yahoo.com/d/quotes.csv?s={}&f=ohgvp' | |
return url.format(ticker) | |
# Request the data and store in the original orientation by ticker. | |
# Yahoo limits requests to 200 tickers. | |
content = '' | |
for idx in range(0, len(tickers), 200): | |
ticker_list = '+'.join(tickers[idx:idx + 200]) | |
url = build_yahoo_hist_url(ticker_list) | |
content += requests.get(url).content | |
# Make the line delimiter just '\n' because pandas can't handle '\n\r'. | |
content = '\n'.join(content.splitlines()) | |
# read_table requires a buffer to read from | |
strio = StringIO(content) | |
ticker_df = read_table(strio, lineterminator='\n', names=columns, sep=',', index_col=False) | |
ticker_df.index = tickers | |
# reshape each row of the ticker df into its own per-ticker df with an index of today. | |
# I'm sure this would've been a good use case for panels but didn't want to bother. | |
for i in range(len(ticker_df.index)): | |
df = DataFrame(ticker_df.ix[i]).T.copy() | |
name = df.index[0] | |
df.index = [today] | |
df.name = name | |
ticker_dfs.append(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment