Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active May 7, 2026 01:34
Show Gist options
  • Select an option

  • Save jweinst1/3c3eb26c76d765dc4784309d07d3298f to your computer and use it in GitHub Desktop.

Select an option

Save jweinst1/3c3eb26c76d765dc4784309d07d3298f to your computer and use it in GitHub Desktop.
snippets to get ticker lists from ishares catalogue CSVs
import requests
import csv
import io
def get_ishares_tickers(etf_ticker: str = "IVV") -> list[str]:
"""
Fetches current constituents from any major iShares US equity ETF via their holdings CSV.
Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer.
Returns clean tickers ready for yfinance (BRK.B → BRK-B).
Supported ETFs (add more easily!):
IVV → S&P 500
IWM → Russell 2000
IWB → Russell 1000
IWV → Russell 3000
IJH → Core S&P Mid-Cap
IJR → Core S&P Small-Cap
ITOT → Core S&P Total U.S. Stock Market
"""
etf_ticker = etf_ticker.upper().strip()
# ticker -> (product_id, slug, filename_base)
# To add a new ETF: go to its iShares product page → the URL contains /products/XXXXX/slug-here/
etf_map = {
"IVV": ("239726", "ishares-core-sp-500-etf", "IVV"),
"IWM": ("239710", "ishares-russell-2000-etf", "IWM"),
"IWB": ("239707", "ishares-russell-1000-etf", "IWB"),
"IWV": ("239714", "ishares-russell-3000-etf", "IWV"),
"IJH": ("239763", "ishares-core-sp-midcap-etf", "IJH"),
"IJR": ("239774", "ishares-core-sp-smallcap-etf", "IJR"),
"ITOT": ("239724", "ishares-core-sp-total-us-stock-market-etf", "ITOT"),
# Example of how to add more:
# "IWF": ("239706", "ishares-russell-1000-growth-etf", "IWF"),
}
if etf_ticker not in etf_map:
supported = ", ".join(sorted(etf_map.keys()))
raise ValueError(
f"ETF '{etf_ticker}' not in mapping yet.\n"
f"Currently supported: {supported}\n"
"Add it to the etf_map dict (product ID + slug from the iShares product page URL)."
)
product_id, slug, base = etf_map[etf_ticker]
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
"Accept": "text/csv"
}
# Modern format (most ETFs)
url = (
f"https://www.ishares.com/us/products/{product_id}/{slug}/"
f"1467271812596.ajax?fileType=csv&fileName={base}_holdings&dataType=fund"
)
resp = requests.get(url, headers=headers, timeout=20)
# Fallback for a few older-style ETFs (e.g. original IWM)
if resp.status_code != 200:
url = (
f"https://www.ishares.com/us/products/{product_id}/{slug}/"
f"1467271812596.ajax?fileType=csv&fileName={base}"
)
resp = requests.get(url, headers=headers, timeout=20)
resp.raise_for_status()
# === Exact same robust parsing you already had ===
text = resp.text
lines = [line.strip() for line in text.splitlines() if line.strip()]
# Find the start of the real data (header row that contains "Ticker")
start_idx = 0
for i, line in enumerate(lines):
if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line):
start_idx = i
break
else:
# Fallback: first line with many commas
for i, line in enumerate(lines):
if line.count(',') > 8:
start_idx = i
break
# Parse only the data section with csv module
data_section = '\n'.join(lines[start_idx:])
csv_file = io.StringIO(data_section)
reader = csv.reader(csv_file)
tickers = []
header = next(reader, None) # skip header row
if header:
# Find Ticker column index
ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0)
for row in reader:
if not row or len(row) <= ticker_col:
continue
ticker = row[ticker_col].strip()
# Stop when we hit the legal disclaimer text
if (ticker.startswith('The content contained herein') or
ticker.startswith('©') or
ticker.startswith('Holdings subject to change') or
'BlackRock' in ticker):
break
# Skip junk rows
if (not ticker or
ticker == '-' or
len(ticker) < 2):
continue
# Clean for yfinance
ticker = ticker.replace('.', '-')
tickers.append(ticker)
clean_tickers = sorted(set(tickers))
print(f"✅ Successfully parsed {len(clean_tickers)} {etf_ticker} tickers")
return clean_tickers
def get_sp500_tickers() -> list[str]:
"""
Fetches current S&P 500 constituents from iShares IVV ETF holdings CSV.
Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer.
Returns clean tickers ready for yfinance (BRK.B → BRK-B).
"""
url = "https://www.ishares.com/us/products/239726/ishares-core-sp-500-etf/1467271812596.ajax?fileType=csv&fileName=IVV_holdings&dataType=fund"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
"Accept": "text/csv"
}
resp = requests.get(url, headers=headers, timeout=20)
resp.raise_for_status()
text = resp.text
lines = [line.strip() for line in text.splitlines() if line.strip()]
# Find the start of the real data (header row that contains "Ticker")
start_idx = 0
for i, line in enumerate(lines):
if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line):
start_idx = i
break
else:
# Fallback: first line with many commas
for i, line in enumerate(lines):
if line.count(',') > 8:
start_idx = i
break
# Parse only the data section with csv module
data_section = '\n'.join(lines[start_idx:])
csv_file = io.StringIO(data_section)
reader = csv.reader(csv_file)
tickers = []
header = next(reader, None) # skip header row
if header:
# Find Ticker column index (handles slight column name variations)
ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0)
for row in reader:
if not row or len(row) <= ticker_col:
continue
ticker = row[ticker_col].strip()
# Stop when we hit the legal disclaimer text
if (ticker.startswith('The content contained herein') or
ticker.startswith('©') or
ticker.startswith('Holdings subject to change') or
'BlackRock' in ticker):
break
# Skip junk rows
if (not ticker or
ticker == '-' or
ticker.startswith('RTYM') or # futures
len(ticker) < 2):
continue
# Clean for yfinance
ticker = ticker.replace('.', '-')
tickers.append(ticker)
clean_tickers = sorted(set(tickers))
print(f"✅ Successfully parsed {len(clean_tickers)} S&P 500 tickers")
return clean_tickers
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment