Skip to content

Instantly share code, notes, and snippets.

@18182324
Last active May 29, 2025 16:56
Show Gist options
  • Save 18182324/18113037c8e10eb972417089baaf86da to your computer and use it in GitHub Desktop.
Save 18182324/18113037c8e10eb972417089baaf86da to your computer and use it in GitHub Desktop.
import requests
import json
import time
from bs4 import BeautifulSoup
# ================================================
# CONFIGURATION
# ================================================
# Your SEC EDGAR user agent string
HEADERS = {'User-Agent': 'Your Name ([email protected])'}
# List of tickers you want to scan
TICKERS = ['LUCY', 'CAPR', 'PLTR']
# Red flag keywords to scan for
KEYWORDS = [
"reversal", "reversals", "off-balance sheet", "special purpose entity",
"related party", "nonrecurring", "inventory write-off", "EBBS",
"insider sales", "auditor disagreement", "goodwill", "charge", "R&D write-off"
]
# SEC API base URL
SEC_TICKER_LOOKUP = "https://data.sec.gov/submissions/CIK{}.json"
SEC_FILINGS_URL = "https://www.sec.gov/Archives/edgar/data/{}/{}"
# ================================================
# UTILITY FUNCTIONS
# ================================================
def get_cik(ticker):
"""Retrieve CIK for a ticker."""
url = f"https://www.sec.gov/files/company_tickers.json"
r = requests.get(url, headers=HEADERS)
data = r.json()
for item in data.values():
if item['ticker'].lower() == ticker.lower():
return str(item['cik_str']).zfill(10)
return None
def get_latest_10k_exhibit_urls(cik):
"""Fetch recent 10-K or 8-K filings and return Exhibit 99.1 URLs."""
url = SEC_TICKER_LOOKUP.format(cik)
r = requests.get(url, headers=HEADERS)
data = r.json()
filings = data['filings']['recent']
urls = []
for i, form in enumerate(filings['form']):
if form in ['10-K', '8-K']:
accession = filings['accessionNumber'][i].replace("-", "")
full_url = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession}/index.json"
filing_data = requests.get(full_url, headers=HEADERS).json()
for file in filing_data['directory']['item']:
if 'ex99' in file['name'].lower() and file['name'].endswith('.htm'):
urls.append(f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession}/{file['name']}")
break
time.sleep(0.2)
return urls
def scan_filing_for_keywords(url):
"""Scan a single filing for red flag keywords."""
r = requests.get(url, headers=HEADERS)
soup = BeautifulSoup(r.text, 'html.parser')
text = soup.get_text().lower()
flags_found = [kw for kw in KEYWORDS if kw in text]
return flags_found
# ================================================
# MAIN SCANNING LOGIC
# ================================================
watchlist = []
for ticker in TICKERS:
cik = get_cik(ticker)
if not cik:
print(f"[!] CIK not found for {ticker}")
continue
exhibit_urls = get_latest_10k_exhibit_urls(cik)
red_flags = []
for url in exhibit_urls:
flags = scan_filing_for_keywords(url)
red_flags.extend(flags)
time.sleep(0.5)
score = len(set(red_flags))
if score > 0:
watchlist.append((ticker, score, list(set(red_flags))))
# Sort tickers by red flag score
watchlist = sorted(watchlist, key=lambda x: x[1], reverse=True)
# ================================================
# DISPLAY WATCHLIST
# ================================================
print("\n🔍 Red Flag Watchlist (Most Concerning at Top):")
for ticker, score, flags in watchlist:
print(f"{ticker}: {score} red flags → {', '.join(flags)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment