Last active
May 29, 2025 16:56
-
-
Save 18182324/18113037c8e10eb972417089baaf86da to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import time | |
from bs4 import BeautifulSoup | |
# ================================================ | |
# CONFIGURATION | |
# ================================================ | |
# Your SEC EDGAR user agent string | |
HEADERS = {'User-Agent': 'Your Name ([email protected])'} | |
# List of tickers you want to scan | |
TICKERS = ['LUCY', 'CAPR', 'PLTR'] | |
# Red flag keywords to scan for | |
KEYWORDS = [ | |
"reversal", "reversals", "off-balance sheet", "special purpose entity", | |
"related party", "nonrecurring", "inventory write-off", "EBBS", | |
"insider sales", "auditor disagreement", "goodwill", "charge", "R&D write-off" | |
] | |
# SEC API base URL | |
SEC_TICKER_LOOKUP = "https://data.sec.gov/submissions/CIK{}.json" | |
SEC_FILINGS_URL = "https://www.sec.gov/Archives/edgar/data/{}/{}" | |
# ================================================ | |
# UTILITY FUNCTIONS | |
# ================================================ | |
def get_cik(ticker): | |
"""Retrieve CIK for a ticker.""" | |
url = f"https://www.sec.gov/files/company_tickers.json" | |
r = requests.get(url, headers=HEADERS) | |
data = r.json() | |
for item in data.values(): | |
if item['ticker'].lower() == ticker.lower(): | |
return str(item['cik_str']).zfill(10) | |
return None | |
def get_latest_10k_exhibit_urls(cik): | |
"""Fetch recent 10-K or 8-K filings and return Exhibit 99.1 URLs.""" | |
url = SEC_TICKER_LOOKUP.format(cik) | |
r = requests.get(url, headers=HEADERS) | |
data = r.json() | |
filings = data['filings']['recent'] | |
urls = [] | |
for i, form in enumerate(filings['form']): | |
if form in ['10-K', '8-K']: | |
accession = filings['accessionNumber'][i].replace("-", "") | |
full_url = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession}/index.json" | |
filing_data = requests.get(full_url, headers=HEADERS).json() | |
for file in filing_data['directory']['item']: | |
if 'ex99' in file['name'].lower() and file['name'].endswith('.htm'): | |
urls.append(f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession}/{file['name']}") | |
break | |
time.sleep(0.2) | |
return urls | |
def scan_filing_for_keywords(url): | |
"""Scan a single filing for red flag keywords.""" | |
r = requests.get(url, headers=HEADERS) | |
soup = BeautifulSoup(r.text, 'html.parser') | |
text = soup.get_text().lower() | |
flags_found = [kw for kw in KEYWORDS if kw in text] | |
return flags_found | |
# ================================================ | |
# MAIN SCANNING LOGIC | |
# ================================================ | |
watchlist = [] | |
for ticker in TICKERS: | |
cik = get_cik(ticker) | |
if not cik: | |
print(f"[!] CIK not found for {ticker}") | |
continue | |
exhibit_urls = get_latest_10k_exhibit_urls(cik) | |
red_flags = [] | |
for url in exhibit_urls: | |
flags = scan_filing_for_keywords(url) | |
red_flags.extend(flags) | |
time.sleep(0.5) | |
score = len(set(red_flags)) | |
if score > 0: | |
watchlist.append((ticker, score, list(set(red_flags)))) | |
# Sort tickers by red flag score | |
watchlist = sorted(watchlist, key=lambda x: x[1], reverse=True) | |
# ================================================ | |
# DISPLAY WATCHLIST | |
# ================================================ | |
print("\n🔍 Red Flag Watchlist (Most Concerning at Top):") | |
for ticker, score, flags in watchlist: | |
print(f"{ticker}: {score} red flags → {', '.join(flags)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment