Skip to content

Instantly share code, notes, and snippets.

@scubamut
Last active August 30, 2025 09:52
Show Gist options
  • Save scubamut/cff1987dcf9aeb0393a371397358acc0 to your computer and use it in GitHub Desktop.
Save scubamut/cff1987dcf9aeb0393a371397358acc0 to your computer and use it in GitHub Desktop.
def get_etf_data(symbols, period='2y'):
"""Download ETF data with comprehensive error handling"""
data = {}
successful_downloads = 0
# print(f"📊 Downloading data for {len(symbols)} ETFs...")
for i, symbol in enumerate(symbols):
try:
# print(f" • Fetching {symbol}... ({i+1}/{len(symbols)})", end="")
ticker = yf.Ticker(symbol)
# Get historical data
hist = ticker.history(period=period)
if not hist.empty:
# Get info with error handling
try:
info = ticker.info
except:
info = {}
data[symbol] = {
'price_data': hist,
'info': info
}
successful_downloads += 1
# print(" ✅")
else:
print(" ❌ No data")
except Exception as e:
print(f" ❌ Error: {str(e)[:50]}...")
continue
# print(f"✅ Successfully downloaded {successful_downloads}/{len(symbols)} ETFs\n")
return data
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import logging
def get_data(symbols, start_date, end_date=None, frequency='daily'):
"""
Fetches historical data for one or more ETFs using yfinance.
Args:
symbols (str or list of str): Ticker symbol(s) of the ETF(s).
start (str): Start date for data retrieval (YYYY-MM-DD).
end (str, optional): End date for data retrieval (YYYY-MM-DD).
Defaults to yesterday's date if None.
frequency (str, optional): Data frequency ('daily', 'weekly', 'monthly').
Defaults to 'daily'.
Returns:
data_df: A MultiIndex DataFrame where each column is a DataFrame containing
the historical data for a specific symbol. The index of
the result DataFrame starts with the latest_start_date.
data_dict: A dictionary where keys are ETF symbols and values are DataFrames.
Each DataFrame in each column is indexed by date,
and contains the columns:
['Close', 'High', 'Low','Open', 'Volume']
Note: 'Close' = 'Adj Close' if auto_adjust=True
Raises:
ValueError: If start date is >= end date.
Exception: If an error occurs during data retrieval.
"""
try:
if frequency == 'daily':
interval = "1d"
elif frequency == 'weekly':
interval = "1wk"
elif frequency == "monthly":
interval = "1mo"
data = yf.download(symbols, start=start_date, end=end_date, interval=interval, progress=False, auto_adjust=True)
# drop any columns with all NaNs
data_df = data.dropna(axis=1, how='all').dropna()
# junk ETFs are droppped
symbols = list(data_df['Close'].columns)
# data is a multiindex dataframe where
# start_date = latest_start_date, due to dropna()
# # Dictionary of frames
data_dict = {s: data_df.xs(s, axis=1, level=1) for s in symbols}
except yf.exceptions.YFTickerMissingError as yfe:
logging.error(f"Yahoo Finance error for {sym}: {yfe}")
pass
data_df.columns = data_df.columns.swaplevel(0,1)
return data_df, data_dict
if __name__ == '__main__':
# Example usage:
logging.basicConfig(level=logging.ERROR)
start_date = '1990-01-01'
end_date = '2023-03-31' # or None for yesterday's date
# Single ETF test
symbols_single = 'SPY'
data_df, data_dict = get_data(symbols_single, start_date, end_date, frequency='daily')
data_single = data_df
if not data_single.empty:
print("Data for single ETF:\n", data_single.head(2))
print(f"columns: {data_single.columns}")
print(f"index: {data_single.index}")
else:
print("No data for single ETF")
print('\n===================================\n\n')
# Multiple ETFs test:
symbols_multiple = ['SPY', 'QQQ', 'BND']
data_multiple, data_dict = get_data(symbols_multiple, start_date, end_date, frequency='weekly')
if not data_multiple.empty:
print("\nData for multiple ETFs:\n", data_multiple)
print(f"columns: {data_multiple.columns}")
print(f"index: {data_multiple.index}")
else:
print("No data for multiple ETFs")
# Test with default end date (yesterday)
print('\n===================================\n\n')
data_yesterday, data_dict = get_data(symbols_multiple, start_date, end_date, frequency='daily')
if not data_yesterday.empty:
print("\nData with default end date:\n", data_yesterday)
print(f"columns: {data_yesterday.columns}")
print(f"index: {data_yesterday.index}")
else:
print("No data with default end date")
from __future__ import print_function
import pandas as pd
import yfinance as yf
# tickers = ['VCVSX','VWINX','VWEHX','VGHCX','VFIIX','VWAHX','FGOVX','FFXSX']
tickers = ['BND','VOO','VEA','VWO','VB','IWN','GOVT','BIL']
start = '2000-01-01'
end = '2016-12-31'
df = yf.download(tickers, start, end).dropna()
df[:3]
import pandas as pd
import yfinance as yf
# DAILY
QQQ_d_df = yf.download("QQQ", progress=False, auto_adjust=True)
# WEEKLY
QQQ_w_df = QQQ_d_df.resample("W").first()
# MONTHLY
QQQ_m_df = QQQ_d_df.resample("ME").last()
# QUARTERLY
QQQ_q_df = QQQ_d_df.resample("QE").last()
# YEARLY
QQQ_y_df = QQQ_d_df.resample("YE").last()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment