jweinst1 · May 7, 2026 01:34
diff --git a/any_tickers_ishares.py b/any_tickers_ishares.py
 import requests
 import csv
 import io

 def get_ishares_tickers(etf_ticker: str = "IVV") -> list[str]:
    """
    Fetches current constituents from any major iShares US equity ETF via their holdings CSV.
    Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer.
    Returns clean tickers ready for yfinance (BRK.B → BRK-B).
    
    Supported ETFs (add more easily!):
        IVV  → S&P 500
        IWM  → Russell 2000
        IWB  → Russell 1000
        IWV  → Russell 3000
        IJH  → Core S&P Mid-Cap
        IJR  → Core S&P Small-Cap
        ITOT → Core S&P Total U.S. Stock Market
    """
    etf_ticker = etf_ticker.upper().strip()
    
    # ticker -> (product_id, slug, filename_base)
    # To add a new ETF: go to its iShares product page → the URL contains /products/XXXXX/slug-here/
    etf_map = {
        "IVV":  ("239726", "ishares-core-sp-500-etf",          "IVV"),
        "IWM":  ("239710", "ishares-russell-2000-etf",         "IWM"),
        "IWB":  ("239707", "ishares-russell-1000-etf",         "IWB"),
        "IWV":  ("239714", "ishares-russell-3000-etf",         "IWV"),
        "IJH":  ("239763", "ishares-core-sp-midcap-etf",       "IJH"),
        "IJR":  ("239774", "ishares-core-sp-smallcap-etf",     "IJR"),
        "ITOT": ("239724", "ishares-core-sp-total-us-stock-market-etf", "ITOT"),
        # Example of how to add more:
        # "IWF": ("239706", "ishares-russell-1000-growth-etf", "IWF"),
    }
    
    if etf_ticker not in etf_map:
        supported = ", ".join(sorted(etf_map.keys()))
        raise ValueError(
            f"ETF '{etf_ticker}' not in mapping yet.\n"
            f"Currently supported: {supported}\n"
            "Add it to the etf_map dict (product ID + slug from the iShares product page URL)."
        )
    
    product_id, slug, base = etf_map[etf_ticker]
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept": "text/csv"
    }
    
    # Modern format (most ETFs)
    url = (
        f"https://www.ishares.com/us/products/{product_id}/{slug}/"
        f"1467271812596.ajax?fileType=csv&fileName={base}_holdings&dataType=fund"
    )
    
    resp = requests.get(url, headers=headers, timeout=20)
    
    # Fallback for a few older-style ETFs (e.g. original IWM)
    if resp.status_code != 200:
        url = (
            f"https://www.ishares.com/us/products/{product_id}/{slug}/"
            f"1467271812596.ajax?fileType=csv&fileName={base}"
        )
        resp = requests.get(url, headers=headers, timeout=20)
    
    resp.raise_for_status()
    
    # === Exact same robust parsing you already had ===
    text = resp.text
    lines = [line.strip() for line in text.splitlines() if line.strip()]
    
    # Find the start of the real data (header row that contains "Ticker")
    start_idx = 0
    for i, line in enumerate(lines):
        if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line):
            start_idx = i
            break
    else:
        # Fallback: first line with many commas
        for i, line in enumerate(lines):
            if line.count(',') > 8:
                start_idx = i
                break
    
    # Parse only the data section with csv module
    data_section = '\n'.join(lines[start_idx:])
    csv_file = io.StringIO(data_section)
    reader = csv.reader(csv_file)
    
    tickers = []
    header = next(reader, None)  # skip header row
    
    if header:
        # Find Ticker column index
        ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0)
        
        for row in reader:
            if not row or len(row) <= ticker_col:
                continue
            
            ticker = row[ticker_col].strip()
            
            # Stop when we hit the legal disclaimer text
            if (ticker.startswith('The content contained herein') or
                ticker.startswith('©') or
                ticker.startswith('Holdings subject to change') or
                'BlackRock' in ticker):
                break
            
            # Skip junk rows
            if (not ticker or
                ticker == '-' or
                len(ticker) < 2):
                continue
            
            # Clean for yfinance
            ticker = ticker.replace('.', '-')
            tickers.append(ticker)
    
    clean_tickers = sorted(set(tickers))
    print(f"✅ Successfully parsed {len(clean_tickers)} {etf_ticker} tickers")
    return clean_tickers
diff --git a/sp500_ishares_tickers.py b/sp500_ishares_tickers.py
 def get_sp500_tickers() -> list[str]:
    """
    Fetches current S&P 500 constituents from iShares IVV ETF holdings CSV.
    Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer.
    Returns clean tickers ready for yfinance (BRK.B → BRK-B).
    """
    url = "https://www.ishares.com/us/products/239726/ishares-core-sp-500-etf/1467271812596.ajax?fileType=csv&fileName=IVV_holdings&dataType=fund"
   
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Accept": "text/csv"
    }
   
    resp = requests.get(url, headers=headers, timeout=20)
    resp.raise_for_status()
   
    text = resp.text
    lines = [line.strip() for line in text.splitlines() if line.strip()]
   
    # Find the start of the real data (header row that contains "Ticker")
    start_idx = 0
    for i, line in enumerate(lines):
        if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line):
            start_idx = i
            break
    else:
        # Fallback: first line with many commas
        for i, line in enumerate(lines):
            if line.count(',') > 8:
                start_idx = i
                break
   
    # Parse only the data section with csv module
    data_section = '\n'.join(lines[start_idx:])
    csv_file = io.StringIO(data_section)
    reader = csv.reader(csv_file)
   
    tickers = []
    header = next(reader, None) # skip header row
   
    if header:
        # Find Ticker column index (handles slight column name variations)
        ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0)
       
        for row in reader:
            if not row or len(row) <= ticker_col:
                continue
           
            ticker = row[ticker_col].strip()
           
            # Stop when we hit the legal disclaimer text
            if (ticker.startswith('The content contained herein') or
                ticker.startswith('©') or
                ticker.startswith('Holdings subject to change') or
                'BlackRock' in ticker):
                break
           
            # Skip junk rows
            if (not ticker or
                ticker == '-' or
                ticker.startswith('RTYM') or # futures
                len(ticker) < 2):
                continue
           
            # Clean for yfinance
            ticker = ticker.replace('.', '-')
            tickers.append(ticker)
   
    clean_tickers = sorted(set(tickers))
    print(f"✅ Successfully parsed {len(clean_tickers)} S&P 500 tickers")
    return clean_tickers
	import requests
	import csv
	import io

	def get_ishares_tickers(etf_ticker: str = "IVV") -> list[str]:
	"""
	Fetches current constituents from any major iShares US equity ETF via their holdings CSV.
	Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer.
	Returns clean tickers ready for yfinance (BRK.B → BRK-B).

	Supported ETFs (add more easily!):
	IVV → S&P 500
	IWM → Russell 2000
	IWB → Russell 1000
	IWV → Russell 3000
	IJH → Core S&P Mid-Cap
	IJR → Core S&P Small-Cap
	ITOT → Core S&P Total U.S. Stock Market
	"""
	etf_ticker = etf_ticker.upper().strip()

	# ticker -> (product_id, slug, filename_base)
	# To add a new ETF: go to its iShares product page → the URL contains /products/XXXXX/slug-here/
	etf_map = {
	"IVV": ("239726", "ishares-core-sp-500-etf", "IVV"),
	"IWM": ("239710", "ishares-russell-2000-etf", "IWM"),
	"IWB": ("239707", "ishares-russell-1000-etf", "IWB"),
	"IWV": ("239714", "ishares-russell-3000-etf", "IWV"),
	"IJH": ("239763", "ishares-core-sp-midcap-etf", "IJH"),
	"IJR": ("239774", "ishares-core-sp-smallcap-etf", "IJR"),
	"ITOT": ("239724", "ishares-core-sp-total-us-stock-market-etf", "ITOT"),
	# Example of how to add more:
	# "IWF": ("239706", "ishares-russell-1000-growth-etf", "IWF"),
	}

	if etf_ticker not in etf_map:
	supported = ", ".join(sorted(etf_map.keys()))
	raise ValueError(
	f"ETF '{etf_ticker}' not in mapping yet.\n"
	f"Currently supported: {supported}\n"
	"Add it to the etf_map dict (product ID + slug from the iShares product page URL)."
	)

	product_id, slug, base = etf_map[etf_ticker]

	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
	"Accept": "text/csv"
	}

	# Modern format (most ETFs)
	url = (
	f"https://www.ishares.com/us/products/{product_id}/{slug}/"
	f"1467271812596.ajax?fileType=csv&fileName={base}_holdings&dataType=fund"
	)

	resp = requests.get(url, headers=headers, timeout=20)

	# Fallback for a few older-style ETFs (e.g. original IWM)
	if resp.status_code != 200:
	url = (
	f"https://www.ishares.com/us/products/{product_id}/{slug}/"
	f"1467271812596.ajax?fileType=csv&fileName={base}"
	)
	resp = requests.get(url, headers=headers, timeout=20)

	resp.raise_for_status()

	# === Exact same robust parsing you already had ===
	text = resp.text
	lines = [line.strip() for line in text.splitlines() if line.strip()]

	# Find the start of the real data (header row that contains "Ticker")
	start_idx = 0
	for i, line in enumerate(lines):
	if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line):
	start_idx = i
	break
	else:
	# Fallback: first line with many commas
	for i, line in enumerate(lines):
	if line.count(',') > 8:
	start_idx = i
	break

	# Parse only the data section with csv module
	data_section = '\n'.join(lines[start_idx:])
	csv_file = io.StringIO(data_section)
	reader = csv.reader(csv_file)

	tickers = []
	header = next(reader, None) # skip header row

	if header:
	# Find Ticker column index
	ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0)

	for row in reader:
	if not row or len(row) <= ticker_col:
	continue

	ticker = row[ticker_col].strip()

	# Stop when we hit the legal disclaimer text
	if (ticker.startswith('The content contained herein') or
	ticker.startswith('©') or
	ticker.startswith('Holdings subject to change') or
	'BlackRock' in ticker):
	break

	# Skip junk rows
	if (not ticker or
	ticker == '-' or
	len(ticker) < 2):
	continue

	# Clean for yfinance
	ticker = ticker.replace('.', '-')
	tickers.append(ticker)

	clean_tickers = sorted(set(tickers))
	print(f"✅ Successfully parsed {len(clean_tickers)} {etf_ticker} tickers")
	return clean_tickers
	def get_sp500_tickers() -> list[str]:
	"""
	Fetches current S&P 500 constituents from iShares IVV ETF holdings CSV.
	Pure Python + built-in csv module. No pandas. Handles the messy header + huge disclaimer.
	Returns clean tickers ready for yfinance (BRK.B → BRK-B).
	"""
	url = "https://www.ishares.com/us/products/239726/ishares-core-sp-500-etf/1467271812596.ajax?fileType=csv&fileName=IVV_holdings&dataType=fund"

	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
	"Accept": "text/csv"
	}

	resp = requests.get(url, headers=headers, timeout=20)
	resp.raise_for_status()

	text = resp.text
	lines = [line.strip() for line in text.splitlines() if line.strip()]

	# Find the start of the real data (header row that contains "Ticker")
	start_idx = 0
	for i, line in enumerate(lines):
	if line.startswith('"Ticker"') or ('Ticker' in line and 'Name' in line):
	start_idx = i
	break
	else:
	# Fallback: first line with many commas
	for i, line in enumerate(lines):
	if line.count(',') > 8:
	start_idx = i
	break

	# Parse only the data section with csv module
	data_section = '\n'.join(lines[start_idx:])
	csv_file = io.StringIO(data_section)
	reader = csv.reader(csv_file)

	tickers = []
	header = next(reader, None) # skip header row

	if header:
	# Find Ticker column index (handles slight column name variations)
	ticker_col = next((i for i, col in enumerate(header) if col and ('Ticker' in col or 'Symbol' in col)), 0)

	for row in reader:
	if not row or len(row) <= ticker_col:
	continue

	ticker = row[ticker_col].strip()

	# Stop when we hit the legal disclaimer text
	if (ticker.startswith('The content contained herein') or
	ticker.startswith('©') or
	ticker.startswith('Holdings subject to change') or
	'BlackRock' in ticker):
	break

	# Skip junk rows
	if (not ticker or
	ticker == '-' or
	ticker.startswith('RTYM') or # futures
	len(ticker) < 2):
	continue

	# Clean for yfinance
	ticker = ticker.replace('.', '-')
	tickers.append(ticker)

	clean_tickers = sorted(set(tickers))
	print(f"✅ Successfully parsed {len(clean_tickers)} S&P 500 tickers")
	return clean_tickers