nahamsec · March 16, 2026 19:16
diff --git a/bb_ai_changelog_scraper.py b/bb_ai_changelog_scraper.py
 #!/usr/bin/env python3
 """
 HackerOne AI/LLM Timeline Scanner
 ===================================
 Uses the official HackerOne Hacker API to find when public bug bounty programs
 added AI / Chatbot / LLM to their scope. Produces a month-by-month trend.

 Created by @NahamSec using Claude AI
 Youtube.com/NahamSec 

 Checks BOTH:
  - Structured scopes (target list) — with created_at dates
  - Policy text (free-form description) — catches programs like HubSpot

 Usage:
    pip install requests
    python bb_ai_changelog_scraper.py --username nahamsec --token YOUR_TOKEN

    # Or set as environment variables
    export H1_USERNAME=nahamsec
    export H1_TOKEN=your_token
    python bb_ai_changelog_scraper.py

    # Test with a few programs first
    python bb_ai_changelog_scraper.py --max-programs 20

    # Fast mode: also pull from bounty-targets-data for comparison
    python bb_ai_changelog_scraper.py --include-bounty-targets

 Output:
    - ai_bounty_programs.csv   (one row per AI scope item, with dates)
    - ai_bounty_programs.json  (full structured data)
    - ai_timeline.csv          (month-by-month trend data)
    - Console: program list + trend chart
 """

 import requests
 import json
 import re
 import time
 import csv
 import sys
 import os
 import argparse
 from datetime import datetime
 from collections import defaultdict, OrderedDict


 # =============================================================================
 # AI KEYWORD DETECTION
 # =============================================================================

 # --- SCOPE ITEM PATTERNS ---
 # Applied to: asset_identifier, asset_type, instruction text
 # These indicate a real AI feature/product in the program's target list.

 SCOPE_KEYWORD_PATTERNS = [
    # AI product/feature labels
    r'\bai[\s/]+ml\s+feature',              # AI/ML Features
    r'\bai\s+feature',                       # AI Features
    r'\bml\s+feature',                       # ML Features
    r'\bai\s+scope\b',                       # AI Scope
    r'\bai[\s-]?powered\b',                  # AI-Powered / AI Powered
    r'\bgenai\b',                            # GenAI
    r'\bgenerative\s+ai\b',                  # Generative AI
    # AI product types
    r'\bai\s*chatbot\b',                     # AI ChatBot
    r'\bchatbot\b',                          # Chatbot
    r'\bchat[\s-]?bot\b',                    # Chat Bot / Chat-Bot
    r'\bcopilot\b',                          # Copilot
    r'\bco-pilot\b',                         # Co-pilot
    r'\bai\s+assistant\b',                   # AI Assistant
    r'\bai\s+agent\b',                       # AI Agent
    # LLM references
    r'\bllm\b',                              # LLM
    r'\blarge\s+language\s+model\b',         # Large Language Model
    # Specific AI products
    r'\bchatgpt\b',                          # ChatGPT
    r'\bopenai\b',                           # OpenAI
    r'\bgpt[-\s]?\d\b',                      # GPT-4, GPT 4
    # AI security
    r'\bprompt\s+injection\b',               # Prompt Injection
 ]
 SCOPE_PATTERN = re.compile('|'.join(SCOPE_KEYWORD_PATTERNS), re.IGNORECASE)

 # --- DOMAIN PATTERNS ---
 # Applied to: asset_identifier (URLs/domains)
 # Catches domains like company.ai, ai.company.com, chat-ai.company.com
 DOMAIN_AI_PATTERNS = [
    r'\.ai$',                                # ends in .ai (e.g. company.ai)
    r'\.ai[:/]',                             # .ai followed by : or / (e.g. company.ai/path)
    r'[./]ai\.',                             # .ai. or /ai. subdomain (e.g. ai.company.com)
    r'[-.]ai[-.]',                           # -ai- or -ai. in domain (e.g. chat-ai.company.com)
 ]
 DOMAIN_PATTERN = re.compile('|'.join(DOMAIN_AI_PATTERNS), re.IGNORECASE)

 # --- POLICY TEXT PATTERNS ---
 # Applied to: free-form policy/description text
 # Same core keywords but used with false-positive filtering
 POLICY_KEYWORD_PATTERNS = [
    r'\bai[\s/]+ml\s+feature',              # AI/ML Features
    r'\bai\s+feature',                       # AI Features
    r'\bml\s+feature',                       # ML Features
    r'\bai\s+scope\b',                       # AI Scope
    r'\bai[\s-]?powered\b',                  # AI-Powered
    r'\bgenai\b',                            # GenAI
    r'\bgenerative\s+ai\b',                  # Generative AI
    r'\bai\s*chatbot\b',                     # AI ChatBot
    r'\bchatbot\b',                          # Chatbot
    r'\bchat[\s-]?bot\b',                    # Chat Bot
    r'\bcopilot\b',                          # Copilot
    r'\bco-pilot\b',                         # Co-pilot
    r'\bai\s+assistant\b',                   # AI Assistant
    r'\bai\s+agent\b',                       # AI Agent
    r'\bllm\b',                              # LLM
    r'\blarge\s+language\s+model\b',         # Large Language Model
    r'\bprompt\s+injection\b',               # Prompt Injection
 ]
 POLICY_PATTERN = re.compile('|'.join(POLICY_KEYWORD_PATTERNS), re.IGNORECASE)

 # --- FALSE POSITIVE BLOCKLIST ---
 # Phrases where AI keywords appear in context of reporting rules, tool usage
 # policies, or quality warnings — NOT about AI features being in scope.
 FALSE_POSITIVE_PHRASES = [
    # Report quality / AI slop warnings
    r'ai\s+slop',
    r'ai\s+generated\s+report',
    r'auto[\s-]?generated',
    r'generated\s+(largely\s+)?by\s+(llm|ai|chatgpt)',
    r'ai\s+hallucinate',
    r'hallucinated',
    # Policy warnings about using AI tools for reporting
    r'don.t\s+leak.*ai\s+service',
    r'leak.*to\s+(any\s+)?(saas|ai\s+service|chatgpt|llm)',
    r'(do\s+not|don.t|never)\s+(use|share|upload|submit|leak).*\b(ai|chatgpt|llm|gpt)\b',
    r'ai\s+services?\s+like\s+chatgpt',
    r'services?\s+such\s+as\s+(large\s+language\s+model|ai|llm|chatgpt)',
    r'(run|use)\s+(locally|on\s+your\s+own)',
    r'browser\s+plugin.*translation',
    r'large\s+language\s+model.{0,30}(leak|local|own\s+hardware)',
    # Report generation disclaimers
    r'(report|submission).{0,40}(generated|written|created)\s+(by|using|with)\s+(ai|llm|chatgpt|gpt)',
    r'(ai|llm|chatgpt).{0,30}(generated|written|created)\s+(report|submission)',
    r'without\s+careful\s+review',
    r'additional\s+work\s+on\s+our\s+side',
    r'invalid\s+report',
    # General tool usage disclaimers
    r'(use|using)\s+(of\s+)?(ai|llm).{0,30}(prohibited|not\s+allowed|forbidden|banned)',
    r'(ai|llm).{0,20}(tool|service|platform).{0,30}(prohibited|not\s+allowed|forbidden|banned)',
 ]
 FALSE_POSITIVE_PATTERN = re.compile('|'.join(FALSE_POSITIVE_PHRASES), re.IGNORECASE)


 def check_scope_keywords(text):
    """
    Check scope item text (asset_identifier + asset_type + instruction)
    for AI/LLM keywords. Returns matched keyword strings.
    """
    if not text:
        return []
    matches = set()
    for m in SCOPE_PATTERN.finditer(text):
        matches.add(m.group().strip().lower())
    return sorted(matches)


 def check_domain_ai(domain):
    """
    Check if a domain/URL is AI-related (ends in .ai, has ai. subdomain, etc.)
    Returns the matched pattern or None.
    """
    if not domain:
        return None
    m = DOMAIN_PATTERN.search(domain)
    if m:
        return m.group().strip()
    return None


 def check_policy_keywords(text):
    """
    Check policy text for AI/LLM keywords. Returns matched keyword strings.
    """
    if not text:
        return []
    matches = set()
    for m in POLICY_PATTERN.finditer(text):
        matches.add(m.group().strip().lower())
    return sorted(matches)


 def is_false_positive_context(text):
    """
    Check if text is a false positive — mentions AI but in context of
    reporting rules, tool usage policies, or quality warnings, NOT about
    AI features actually being in scope.
    """
    if not text:
        return False
    return bool(FALSE_POSITIVE_PATTERN.search(text))


 # =============================================================================
 # HACKERONE API CLIENT
 # =============================================================================

 class HackerOneAPI:
    """Official HackerOne Hacker API v1 client."""

    BASE_URL = "https://api.hackerone.com/v1/hackers"

    def __init__(self, username, token, delay=0.1):
        self.auth = (username, token)
        self.delay = delay
        self.session = requests.Session()
        self.session.headers.update({
            'Accept': 'application/json',
        })
        self.request_count = 0

    def _get(self, endpoint, params=None):
        """Make authenticated GET request with rate limiting."""
        time.sleep(self.delay)
        self.request_count += 1
        url = f"{self.BASE_URL}{endpoint}"

        try:
            resp = self.session.get(url, auth=self.auth, params=params, timeout=30)

            if resp.status_code == 429:
                wait = int(resp.headers.get('Retry-After', 60))
                print(f"\n    [Rate limited] Waiting {wait}s...")
                time.sleep(wait)
                return self._get(endpoint, params)

            if resp.status_code == 401:
                print(f"\n  [!] Authentication failed. Check your username and token.")
                sys.exit(1)

            if resp.status_code != 200:
                return None

            return resp.json()
        except requests.exceptions.RequestException as e:
            print(f"\n    [Error] {e}")
            return None

    def _get_paginated(self, endpoint, params=None):
        """Fetch all pages from a paginated endpoint."""
        all_data = []
        url = f"{self.BASE_URL}{endpoint}"
        page_params = params or {}

        while url:
            time.sleep(self.delay)
            self.request_count += 1

            try:
                resp = self.session.get(url, auth=self.auth, params=page_params, timeout=30)

                if resp.status_code == 429:
                    wait = int(resp.headers.get('Retry-After', 60))
                    print(f"\n    [Rate limited] Waiting {wait}s...")
                    time.sleep(wait)
                    continue

                if resp.status_code != 200:
                    break

                data = resp.json()
                items = data.get('data', [])
                all_data.extend(items)

                # Follow pagination
                next_url = data.get('links', {}).get('next')
                if next_url:
                    url = next_url
                    page_params = {}  # URL already has params
                else:
                    break

            except requests.exceptions.RequestException:
                break

        return all_data

    def get_programs(self):
        """Fetch all public bounty programs with policy text."""
        print("    Fetching programs via API...", flush=True)
        programs = self._get_paginated('/programs', params={
            'page[size]': 100,
        })
        print(f"    Got {len(programs)} programs", flush=True)
        return programs

    def get_structured_scopes(self, handle):
        """Fetch structured scopes for a program (includes created_at dates)."""
        scopes = self._get_paginated(f'/programs/{handle}/structured_scopes', params={
            'page[size]': 100,
        })
        return scopes


 # =============================================================================
 # BOUNTY TARGETS DATA (optional supplement)
 # =============================================================================

 BOUNTY_TARGETS_URL = "https://raw.githubusercontent.com/arkadiyt/bounty-targets-data/main/data/hackerone_data.json"


 def fetch_bounty_targets():
    """Fetch HackerOne data from bounty-targets-data repo."""
    print("    Downloading bounty-targets-data...")
    try:
        resp = requests.get(BOUNTY_TARGETS_URL, timeout=120)
        if resp.status_code == 200:
            data = resp.json()
            print(f"    Got {len(data)} programs from bounty-targets-data")
            return data
    except Exception as e:
        print(f"    [!] Failed: {e}")
    return None


 # =============================================================================
 # SCANNER
 # =============================================================================

 def scan_programs(api, max_programs=None, include_bounty_targets=False):
    """
    Scan all HackerOne programs for AI/LLM in scope and policy.
    Returns list of programs with AI findings and dates.
    """
    results = []

    # Phase 1: Get all programs via API
    programs = api.get_programs()
    if not programs:
        print("  [!] No programs returned from API")
        return results

    if max_programs:
        programs = programs[:max_programs]

    # Phase 1b: Optionally supplement with bounty-targets-data
    bt_data = {}
    if include_bounty_targets:
        bt_raw = fetch_bounty_targets()
        if bt_raw:
            for p in bt_raw:
                handle = p.get('handle', '')
                if handle:
                    bt_data[handle] = p

    total = len(programs)
    print(f"\n    Scanning {total} programs for AI/LLM keywords...", flush=True)
    print(f"    (checking policy text + structured scopes with dates)\n", flush=True)

    private_skipped = 0

    for i, program_data in enumerate(programs):
        attrs = program_data.get('attributes', {})
        handle = attrs.get('handle', '')
        name = attrs.get('name', handle)
        policy = attrs.get('policy', '') or ''
        state = attrs.get('state', '')
        offers_bounties = attrs.get('offers_bounties', False)
        submission_state = attrs.get('submission_state', '')

        if not handle:
            continue

        # Determine if program is public or private
        # HackerOne: "soft_launched" = private/invite-only, anything else = public
        is_public = state != 'soft_launched'

        # Progress
        if (i + 1) % 50 == 0 or i == 0:
            progress_name = handle if is_public else '[REDACTED]'
            print(f"    [{i+1}/{total}] {progress_name} ({'public' if is_public else 'private'})...", flush=True)

        ai_findings = {
            'program': name,
            'handle': handle,
            'url': f"https://hackerone.com/{handle}",
            'offers_bounties': offers_bounties,
            'state': state,
            'visibility': 'public' if is_public else 'private',
            'policy_matches': [],
            'scope_matches': [],
            'earliest_ai_date': None,
        }

        has_ai = False

        # --- Check policy text (fast — already in memory) ---
        policy_matches = check_policy_keywords(policy)
        if policy_matches:
            # Extract snippets, then filter out false positives
            snippets = []
            real_matches = []
            for kw in policy_matches:
                kw_pattern = re.compile(re.escape(kw), re.IGNORECASE)
                for m in kw_pattern.finditer(policy):
                    start = max(0, m.start() - 80)
                    end = min(len(policy), m.end() + 80)
                    snippet = policy[start:end].replace('\n', ' ').strip()

                    if is_false_positive_context(snippet):
                        continue  # Skip — reporting rules, not AI in scope

                    if snippet not in [s.strip('.') for s in snippets]:
                        snippets.append(f"...{snippet}...")
                    real_matches.append(kw)
                    break

            if real_matches:
                has_ai = True
                ai_findings['policy_matches'] = sorted(set(real_matches))
                ai_findings['policy_snippets'] = snippets[:5]

        # --- Check structured scopes (API call — with created_at dates) ---
        scopes = api.get_structured_scopes(handle)
        for scope_data in scopes:
            scope_attrs = scope_data.get('attributes', {})
            asset_id = scope_attrs.get('asset_identifier', '') or ''
            asset_type = scope_attrs.get('asset_type', '') or ''
            instruction = scope_attrs.get('instruction', '') or ''
            eligible = scope_attrs.get('eligible_for_bounty', False)
            created_at = scope_attrs.get('created_at', '') or ''
            updated_at = scope_attrs.get('updated_at', '') or ''

            # Check 1: keyword match in asset_id + asset_type + instruction
            search_text = f"{asset_id} {asset_type} {instruction}"
            kw_matches = check_scope_keywords(search_text)

            # Check 2: domain pattern match (.ai TLD, ai. subdomain, etc.)
            domain_match = check_domain_ai(asset_id)

            matches = list(kw_matches)
            if domain_match:
                matches.append(f".ai domain ({domain_match})")

            if matches:
                # Filter false positives in instruction text
                if is_false_positive_context(instruction):
                    continue

                has_ai = True
                scope_date = created_at[:10] if created_at else ''

                ai_findings['scope_matches'].append({
                    'keywords': matches,
                    'asset_identifier': asset_id,
                    'asset_type': asset_type,
                    'instruction': instruction[:300],
                    'eligible_for_bounty': eligible,
                    'created_at': scope_date,
                    'updated_at': (updated_at[:10] if updated_at else ''),
                })

                # Track earliest AI scope date
                if scope_date:
                    if not ai_findings['earliest_ai_date'] or scope_date < ai_findings['earliest_ai_date']:
                        ai_findings['earliest_ai_date'] = scope_date

        # --- Also check bounty-targets-data if available ---
        if not has_ai and handle in bt_data:
            bt_program = bt_data[handle]
            targets = bt_program.get('targets', {}) or {}
            for target in (targets.get('in_scope', []) or []):
                asset_id = target.get('asset_identifier', '') or ''
                asset_type = target.get('asset_type', '') or ''
                instruction = target.get('instruction', '') or ''
                search_text = f"{asset_id} {asset_type} {instruction}"
                kw_matches = check_scope_keywords(search_text)
                domain_match = check_domain_ai(asset_id)
                matches = list(kw_matches)
                if domain_match:
                    matches.append(f".ai domain ({domain_match})")
                if matches:
                    has_ai = True
                    ai_findings['scope_matches'].append({
                        'keywords': matches,
                        'asset_identifier': asset_id,
                        'asset_type': asset_type,
                        'instruction': instruction[:300],
                        'eligible_for_bounty': target.get('eligible_for_bounty', False),
                        'created_at': '',
                        'updated_at': '',
                        'source': 'bounty-targets-data',
                    })

        if has_ai:
            results.append(ai_findings)
            all_kw = set(ai_findings['policy_matches'])
            for s in ai_findings['scope_matches']:
                all_kw.update(s['keywords'])
            sources = []
            if ai_findings['policy_matches']:
                sources.append('policy')
            if ai_findings['scope_matches']:
                sources.append('scope')
            date_str = ai_findings['earliest_ai_date'] or 'no date'
            vis = ai_findings['visibility'].upper()
            display_name = '[REDACTED]' if not is_public else name
            print(f"    ✓ [{vis}] {display_name} [{', '.join(sources)}] "
                  f"keywords: {', '.join(sorted(all_kw))} | date: {date_str}", flush=True)

    # Summary
    public_count = len([r for r in results if r['visibility'] == 'public'])
    private_count = len([r for r in results if r['visibility'] == 'private'])
    print(f"\n    Summary: {len(results)} programs with AI/LLM "
          f"({public_count} public, {private_count} private)")

    return results


 # =============================================================================
 # OUTPUT: TREND TIMELINE
 # =============================================================================

 def print_trend_timeline(results):
    """Print month-by-month trend chart of AI adoption."""
    dated = [(r['program'] if r.get('visibility') == 'public' else '[REDACTED]',
              r['earliest_ai_date'])
             for r in results if r.get('earliest_ai_date')]
    undated = [r for r in results if not r.get('earliest_ai_date')]

    if not dated:
        print("\n  No date data available for timeline.")
        return

    monthly = defaultdict(list)
    for name, date_str in dated:
        month_key = date_str[:7]  # "2024-03"
        monthly[month_key].append(name)

    sorted_months = sorted(monthly.keys())
    cumulative = 0
    max_new = max(len(v) for v in monthly.values())

    print("\n" + "=" * 80)
    print("  AI/LLM ADOPTION TIMELINE — HACKERONE BUG BOUNTY PROGRAMS")
    print("=" * 80)
    print(f"\n  {'Month':<10} {'New':>4} {'Total':>6}  {'Bar':<30} Programs")
    print(f"  {'─' * 76}")

    for month in sorted_months:
        programs = monthly[month]
        cumulative += len(programs)
        bar_len = int((len(programs) / max(max_new, 1)) * 30)
        bar = '█' * bar_len

        names = ', '.join(programs[:3])
        if len(programs) > 3:
            names += f", +{len(programs) - 3} more"

        print(f"  {month:<10} {len(programs):>4} {cumulative:>6}  {bar:<30} {names}")

    print(f"\n  {'─' * 76}")
    print(f"  {'TOTAL':<10} {cumulative:>4}")

    if undated:
        print(f"\n  + {len(undated)} programs with AI in scope but no scope creation date")
        for r in undated[:10]:
            dn = r['program'] if r.get('visibility') == 'public' else '[REDACTED]'
            print(f"    - {dn} (found in: {'policy' if r['policy_matches'] else 'scope'})")
        if len(undated) > 10:
            print(f"    ... and {len(undated) - 10} more")


 def print_program_list(results):
    """Print full list of programs with AI in scope."""
    print("\n" + "=" * 80)
    print("  ALL HACKERONE PROGRAMS WITH AI/CHATBOT/LLM")
    print("=" * 80)

    if not results:
        print("\n  No programs found.\n")
        return

    print(f"\n  Total: {len(results)} programs\n")

    # Sort by date (dated first, then undated)
    sorted_results = sorted(results,
        key=lambda x: (x.get('earliest_ai_date') or 'zzzz', x['program']))

    for r in sorted_results:
        all_kw = set(r.get('policy_matches', []))
        for s in r.get('scope_matches', []):
            all_kw.update(s.get('keywords', []))

        vis_tag = f"[{r.get('visibility', 'unknown').upper()}]"
        is_private = r.get('visibility', '') != 'public'
        display_name = '[REDACTED]' if is_private else r['program']

        print(f"  {'─' * 70}")
        print(f"  {vis_tag} {display_name}")
        if not is_private:
            print(f"  https://hackerone.com/{r['handle']}")

        if r.get('earliest_ai_date'):
            print(f"  AI scope added: {r['earliest_ai_date']}")

        print(f"  Keywords: {', '.join(sorted(all_kw))}")

        if r.get('policy_matches'):
            print(f"  Found in policy: {', '.join(r['policy_matches'])}")
            for snippet in r.get('policy_snippets', [])[:2]:
                print(f"    \"{snippet}\"")

        if r.get('scope_matches'):
            print(f"  Scope items ({len(r['scope_matches'])}):")
            for s in r['scope_matches'][:5]:
                bounty = ' [BOUNTY]' if s.get('eligible_for_bounty') else ''
                date = f" (added: {s['created_at']})" if s.get('created_at') else ''
                print(f"    • {s['asset_identifier']} ({s['asset_type']}){bounty}{date}")
                if s.get('instruction'):
                    print(f"      {s['instruction'][:100]}")

        print()


 def save_csv(results, filename):
    """Save results to CSV — one row per keyword match, separate lines for policy vs scope."""
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow([
            'Program Name', 'Handle', 'Visibility', 'Date AI Was Introduced',
            'Keyword Matched', 'Where (Policy or Scope)'
        ])

        for r in results:
            # Write one row per policy keyword match
            for kw in r.get('policy_matches', []):
                writer.writerow([
                    r['program'],
                    r['handle'],
                    r.get('visibility', 'unknown'),
                    r.get('earliest_ai_date', 'Unknown'),
                    kw,
                    'Policy',
                ])

            # Write one row per scope keyword match
            for s in r.get('scope_matches', []):
                scope_date = s.get('created_at', '') or r.get('earliest_ai_date', '') or 'Unknown'
                for kw in s.get('keywords', []):
                    writer.writerow([
                        r['program'],
                        r['handle'],
                        r.get('visibility', 'unknown'),
                        scope_date,
                        kw,
                        'Scope',
                    ])

    print(f"  Programs CSV:  {filename}")


 def save_timeline_csv(results, filename):
    """Save month-by-month timeline to CSV."""
    dated = [(r['program'], r['earliest_ai_date'])
             for r in results if r.get('earliest_ai_date')]

    monthly = defaultdict(list)
    for name, date_str in dated:
        monthly[date_str[:7]].append(name)

    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Month', 'New Programs', 'Cumulative Total', 'Programs Added'])

        cumulative = 0
        for month in sorted(monthly.keys()):
            programs = monthly[month]
            cumulative += len(programs)
            writer.writerow([month, len(programs), cumulative, '; '.join(programs)])

    print(f"  Timeline CSV:  {filename}")


 def save_json(results, filename):
    """Save full results to JSON."""
    dated = [(r['program'], r['earliest_ai_date'])
             for r in results if r.get('earliest_ai_date')]

    monthly = defaultdict(list)
    for name, date_str in dated:
        monthly[date_str[:7]].append(name)

    output = {
        'scan_date': datetime.now().isoformat(),
        'platform': 'HackerOne',
        'summary': {
            'total_programs_with_ai': len(results),
            'programs_with_dates': len(dated),
            'programs_without_dates': len(results) - len(dated),
        },
        'timeline': {month: {
            'count': len(programs),
            'programs': programs,
        } for month, programs in sorted(monthly.items())},
        'programs': results,
    }

    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(output, f, indent=2, ensure_ascii=False, default=str)

    print(f"  Full JSON:     {filename}")


 # =============================================================================
 # MAIN
 # =============================================================================

 def main():
    parser = argparse.ArgumentParser(
        description='Find when HackerOne programs added AI/LLM to scope (uses official API)'
    )
    parser.add_argument('--username', type=str,
                        default=os.environ.get('H1_USERNAME', ''),
                        help='HackerOne API username (or set H1_USERNAME env var)')
    parser.add_argument('--token', type=str,
                        default=os.environ.get('H1_TOKEN', ''),
                        help='HackerOne API token (or set H1_TOKEN env var)')
    parser.add_argument('--max-programs', type=int, default=None,
                        help='Max programs to scan (for testing)')
    parser.add_argument('--delay', type=float, default=0.1,
                        help='Delay between API requests in seconds (default: 0.1)')
    parser.add_argument('--output', type=str, default='ai_bounty_programs',
                        help='Output filename prefix')
    parser.add_argument('--include-bounty-targets', action='store_true',
                        help='Also check bounty-targets-data repo for extra coverage')

    args = parser.parse_args()

    if not args.username or not args.token:
        print("Error: HackerOne API credentials required.")
        print()
        print("  Option 1: Pass as arguments")
        print("    python bb_ai_changelog_scraper.py --username YOUR_USER --token YOUR_TOKEN")
        print()
        print("  Option 2: Set environment variables")
        print("    export H1_USERNAME=your_username")
        print("    export H1_TOKEN=your_token")
        print("    python bb_ai_changelog_scraper.py")
        print()
        print("  Get your API token at: https://hackerone.com/settings/api_token/edit")
        sys.exit(1)

    print("=" * 80)
    print("  HackerOne AI/LLM Timeline Scanner")
    print("  Using official HackerOne Hacker API (policy text + scope dates)")
    print(f"  Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 80)

    api = HackerOneAPI(args.username, args.token, delay=args.delay)

    # Verify credentials
    print("\n  Verifying API credentials...")
    test = api._get('/programs', params={'page[size]': 1})
    if test is None:
        print("  [!] API request failed. Check your credentials.")
        sys.exit(1)
    print("  ✓ Credentials valid\n")

    # Scan
    results = scan_programs(
        api,
        max_programs=args.max_programs,
        include_bounty_targets=args.include_bounty_targets,
    )

    # Output
    print_program_list(results)
    print_trend_timeline(results)

    print("\n── Saving files ──────────────────────────────────────────────")
    save_csv(results, f"{args.output}.csv")
    save_timeline_csv(results, "ai_timeline.csv")
    save_json(results, f"{args.output}.json")

    print(f"\n  API requests made: {api.request_count}")
    print(f"  Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"  Total programs with AI/LLM: {len(results)}")
    print()


 if __name__ == "__main__":
    main()
No results found