Instantly share code, notes, and snippets.
Last active
May 7, 2026 01:34
-
Star
1
(1)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
-
Save jweinst1/3c3eb26c76d765dc4784309d07d3298f to your computer and use it in GitHub Desktop.
snippets to get ticker lists from ishares catalogue CSVs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import csv | |
| import io | |
| def get_ishares_tickers(etf_ticker: str = "IVV") -> list[str]: | |
| """ | |
| Fetches current constituents from any major iShares US equity ETF via their holdings CSV. | |
| Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer. | |
| Returns clean tickers ready for yfinance (BRK.B → BRK-B). | |
| Supported ETFs (add more easily!): | |
| IVV → S&P 500 | |
| IWM → Russell 2000 | |
| IWB → Russell 1000 | |
| IWV → Russell 3000 | |
| IJH → Core S&P Mid-Cap | |
| IJR → Core S&P Small-Cap | |
| ITOT → Core S&P Total U.S. Stock Market | |
| """ | |
| etf_ticker = etf_ticker.upper().strip() | |
| # ticker -> (product_id, slug, filename_base) | |
| # To add a new ETF: go to its iShares product page → the URL contains /products/XXXXX/slug-here/ | |
| etf_map = { | |
| "IVV": ("239726", "ishares-core-sp-500-etf", "IVV"), | |
| "IWM": ("239710", "ishares-russell-2000-etf", "IWM"), | |
| "IWB": ("239707", "ishares-russell-1000-etf", "IWB"), | |
| "IWV": ("239714", "ishares-russell-3000-etf", "IWV"), | |
| "IJH": ("239763", "ishares-core-sp-midcap-etf", "IJH"), | |
| "IJR": ("239774", "ishares-core-sp-smallcap-etf", "IJR"), | |
| "ITOT": ("239724", "ishares-core-sp-total-us-stock-market-etf", "ITOT"), | |
| # Example of how to add more: | |
| # "IWF": ("239706", "ishares-russell-1000-growth-etf", "IWF"), | |
| } | |
| if etf_ticker not in etf_map: | |
| supported = ", ".join(sorted(etf_map.keys())) | |
| raise ValueError( | |
| f"ETF '{etf_ticker}' not in mapping yet.\n" | |
| f"Currently supported: {supported}\n" | |
| "Add it to the etf_map dict (product ID + slug from the iShares product page URL)." | |
| ) | |
| product_id, slug, base = etf_map[etf_ticker] | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", | |
| "Accept": "text/csv" | |
| } | |
| # Modern format (most ETFs) | |
| url = ( | |
| f"https://www.ishares.com/us/products/{product_id}/{slug}/" | |
| f"1467271812596.ajax?fileType=csv&fileName={base}_holdings&dataType=fund" | |
| ) | |
| resp = requests.get(url, headers=headers, timeout=20) | |
| # Fallback for a few older-style ETFs (e.g. original IWM) | |
| if resp.status_code != 200: | |
| url = ( | |
| f"https://www.ishares.com/us/products/{product_id}/{slug}/" | |
| f"1467271812596.ajax?fileType=csv&fileName={base}" | |
| ) | |
| resp = requests.get(url, headers=headers, timeout=20) | |
| resp.raise_for_status() | |
| # === Exact same robust parsing you already had === | |
| text = resp.text | |
| lines = [line.strip() for line in text.splitlines() if line.strip()] | |
| # Find the start of the real data (header row that contains "Ticker") | |
| start_idx = 0 | |
| for i, line in enumerate(lines): | |
| if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line): | |
| start_idx = i | |
| break | |
| else: | |
| # Fallback: first line with many commas | |
| for i, line in enumerate(lines): | |
| if line.count(',') > 8: | |
| start_idx = i | |
| break | |
| # Parse only the data section with csv module | |
| data_section = '\n'.join(lines[start_idx:]) | |
| csv_file = io.StringIO(data_section) | |
| reader = csv.reader(csv_file) | |
| tickers = [] | |
| header = next(reader, None) # skip header row | |
| if header: | |
| # Find Ticker column index | |
| ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0) | |
| for row in reader: | |
| if not row or len(row) <= ticker_col: | |
| continue | |
| ticker = row[ticker_col].strip() | |
| # Stop when we hit the legal disclaimer text | |
| if (ticker.startswith('The content contained herein') or | |
| ticker.startswith('©') or | |
| ticker.startswith('Holdings subject to change') or | |
| 'BlackRock' in ticker): | |
| break | |
| # Skip junk rows | |
| if (not ticker or | |
| ticker == '-' or | |
| len(ticker) < 2): | |
| continue | |
| # Clean for yfinance | |
| ticker = ticker.replace('.', '-') | |
| tickers.append(ticker) | |
| clean_tickers = sorted(set(tickers)) | |
| print(f"✅ Successfully parsed {len(clean_tickers)} {etf_ticker} tickers") | |
| return clean_tickers |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_sp500_tickers() -> list[str]: | |
| """ | |
| Fetches current S&P 500 constituents from iShares IVV ETF holdings CSV. | |
| Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer. | |
| Returns clean tickers ready for yfinance (BRK.B → BRK-B). | |
| """ | |
| url = "https://www.ishares.com/us/products/239726/ishares-core-sp-500-etf/1467271812596.ajax?fileType=csv&fileName=IVV_holdings&dataType=fund" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", | |
| "Accept": "text/csv" | |
| } | |
| resp = requests.get(url, headers=headers, timeout=20) | |
| resp.raise_for_status() | |
| text = resp.text | |
| lines = [line.strip() for line in text.splitlines() if line.strip()] | |
| # Find the start of the real data (header row that contains "Ticker") | |
| start_idx = 0 | |
| for i, line in enumerate(lines): | |
| if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line): | |
| start_idx = i | |
| break | |
| else: | |
| # Fallback: first line with many commas | |
| for i, line in enumerate(lines): | |
| if line.count(',') > 8: | |
| start_idx = i | |
| break | |
| # Parse only the data section with csv module | |
| data_section = '\n'.join(lines[start_idx:]) | |
| csv_file = io.StringIO(data_section) | |
| reader = csv.reader(csv_file) | |
| tickers = [] | |
| header = next(reader, None) # skip header row | |
| if header: | |
| # Find Ticker column index (handles slight column name variations) | |
| ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0) | |
| for row in reader: | |
| if not row or len(row) <= ticker_col: | |
| continue | |
| ticker = row[ticker_col].strip() | |
| # Stop when we hit the legal disclaimer text | |
| if (ticker.startswith('The content contained herein') or | |
| ticker.startswith('©') or | |
| ticker.startswith('Holdings subject to change') or | |
| 'BlackRock' in ticker): | |
| break | |
| # Skip junk rows | |
| if (not ticker or | |
| ticker == '-' or | |
| ticker.startswith('RTYM') or # futures | |
| len(ticker) < 2): | |
| continue | |
| # Clean for yfinance | |
| ticker = ticker.replace('.', '-') | |
| tickers.append(ticker) | |
| clean_tickers = sorted(set(tickers)) | |
| print(f"✅ Successfully parsed {len(clean_tickers)} S&P 500 tickers") | |
| return clean_tickers |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment