Last active
June 1, 2025 16:51
-
-
Save ccerv1/7995ff87009dfe4ad6ebfc49658cb78a to your computer and use it in GitHub Desktop.
Lookup the SBOM for a given repo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # python sbom.py --analyze-test-repos | |
| import requests | |
| from typing import List, Dict, Tuple | |
| import os | |
| from datetime import datetime | |
| GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') | |
| TEST_REPOS = [ | |
| 'https://github.com/opensource-observer/oss-directory', | |
| 'https://github.com/wevm/viem', | |
| 'https://github.com/ethereum/remix-project', | |
| 'https://github.com/libp2p/go-libp2p', | |
| 'https://github.com/libp2p/js-libp2p' | |
| ] | |
| class GitHubSBOMFetcher: | |
| def __init__(self, github_token: str = None): | |
| """Initialize the SBOM fetcher with GitHub token.""" | |
| self.github_token = github_token or os.getenv('GITHUB_TOKEN') | |
| if not self.github_token: | |
| raise ValueError("GitHub token is required. Set GITHUB_TOKEN environment variable or pass it to the constructor.") | |
| self.graphql_url = "https://api.github.com/graphql" | |
| self.headers = { | |
| "Authorization": f"Bearer {GITHUB_TOKEN}", | |
| "Content-Type": "application/json", | |
| } | |
| def _extract_repo_info(self, repo_url: str) -> Tuple[str, str]: | |
| """Extract owner and repo name from GitHub URL.""" | |
| # Handle both HTTPS and SSH URLs | |
| if repo_url.startswith(('http://', 'https://')): | |
| parts = repo_url.rstrip('/').split('/') | |
| return parts[-2], parts[-1] | |
| elif repo_url.startswith('git@'): | |
| parts = repo_url.split(':')[1].replace('.git', '').split('/') | |
| return parts[0], parts[1] | |
| else: | |
| # Assume format is "owner/repo" | |
| parts = repo_url.split('/') | |
| if len(parts) != 2: | |
| raise ValueError("Invalid repository URL format") | |
| return parts[0], parts[1] | |
| def get_source_type(self) -> str: | |
| """Return the source type for the SBOM.""" | |
| return "github" | |
| def fetch_dependencies(self, repo_url: str) -> List[Dict]: | |
| """Fetch repository dependencies using GitHub's GraphQL API.""" | |
| owner, repo = self._extract_repo_info(repo_url) | |
| query = """ | |
| query($owner: String!, $repo: String!) { | |
| repository(owner: $owner, name: $repo) { | |
| dependencyGraphManifests(first: 100) { | |
| nodes { | |
| filename | |
| dependencies { | |
| nodes { | |
| hasDependencies | |
| packageName | |
| packageManager | |
| packageUrl | |
| requirements | |
| relationship | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| """ | |
| variables = { | |
| "owner": owner, | |
| "repo": repo | |
| } | |
| try: | |
| response = requests.post( | |
| self.graphql_url, | |
| headers=self.headers, | |
| json={"query": query, "variables": variables} | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Add debug logging | |
| print(f"\nFetching dependencies for {owner}/{repo}") | |
| if "errors" in data: | |
| print(f"GraphQL errors: {data['errors']}") | |
| return [] | |
| # Check if we got a valid response | |
| if not data.get("data", {}).get("repository"): | |
| print(f"No repository data found for {owner}/{repo}") | |
| return [] | |
| manifests = data["data"]["repository"]["dependencyGraphManifests"]["nodes"] | |
| if not manifests: | |
| print(f"No dependency manifests found for {owner}/{repo}") | |
| return [] | |
| # Extract all dependencies from all manifests | |
| all_dependencies = [] | |
| for manifest in manifests: | |
| deps = manifest.get("dependencies", {}).get("nodes", []) | |
| print(f"Manifest {manifest.get('filename')}: {len(deps)} dependencies") | |
| for dep in deps: | |
| # Add source_type to each dependency | |
| dep["source_type"] = self.get_source_type() | |
| all_dependencies.append(dep) | |
| return all_dependencies | |
| except Exception as e: | |
| print(f"Error fetching dependencies for {owner}/{repo}: {str(e)}") | |
| return [] | |
| def generate_sbom(self, repo_url: str) -> Dict: | |
| """Generate a complete SBOM for the repository.""" | |
| dependencies = self.fetch_dependencies(repo_url) | |
| sbom = { | |
| "bomFormat": "CycloneDX", | |
| "specVersion": "1.4", | |
| "version": 1, | |
| "metadata": { | |
| "timestamp": datetime.utcnow().isoformat(), | |
| "tools": [{ | |
| "vendor": "GitHub", | |
| "name": "Dependency Graph API", | |
| "version": "1.0" | |
| }] | |
| }, | |
| "components": [] | |
| } | |
| for dep in dependencies: | |
| component = { | |
| "type": "library", | |
| "name": dep.get("packageName"), | |
| "purl": dep.get("packageUrl"), | |
| "properties": [ | |
| { | |
| "name": "packageManager", | |
| "value": dep.get("packageManager") | |
| }, | |
| { | |
| "name": "requirements", | |
| "value": dep.get("requirements") | |
| }, | |
| { | |
| "name": "relationship", | |
| "value": dep.get("relationship") | |
| } | |
| ] | |
| } | |
| sbom["components"].append(component) | |
| return sbom | |
| def analyze_dependencies_by_relationship(dependencies: List[Dict]) -> Dict[str, int]: | |
| """Analyze dependencies and count them by relationship type.""" | |
| relationship_counts = {} | |
| for dep in dependencies: | |
| relationship = dep.get("relationship", "unknown") | |
| relationship_counts[relationship] = relationship_counts.get(relationship, 0) + 1 | |
| return relationship_counts | |
| def main(): | |
| """Example usage of the SBOM fetcher.""" | |
| import argparse | |
| import json | |
| parser = argparse.ArgumentParser(description='Generate SBOM for a GitHub repository') | |
| parser.add_argument('--repo_url', help='GitHub repository URL (e.g., https://github.com/owner/repo)') | |
| parser.add_argument('--output', '-o', help='Output file path (default: stdout)') | |
| parser.add_argument('--analyze-test-repos', action='store_true', help='Analyze all test repositories') | |
| args = parser.parse_args() | |
| try: | |
| fetcher = GitHubSBOMFetcher() | |
| if args.analyze_test_repos: | |
| print("\nAnalyzing test repositories:") | |
| print("-" * 50) | |
| for repo_url in TEST_REPOS: | |
| print(f"\nRepository: {repo_url}") | |
| dependencies = fetcher.fetch_dependencies(repo_url) | |
| relationship_counts = analyze_dependencies_by_relationship(dependencies) | |
| print("Dependencies by relationship type:") | |
| for relationship, count in relationship_counts.items(): | |
| print(f" {relationship}: {count}") | |
| print(f"Total dependencies: {len(dependencies)}") | |
| print("-" * 50) | |
| elif args.repo_url: | |
| sbom = fetcher.generate_sbom(args.repo_url) | |
| if args.output: | |
| with open(args.output, 'w') as f: | |
| json.dump(sbom, f, indent=2) | |
| else: | |
| print(json.dumps(sbom, indent=2)) | |
| else: | |
| parser.print_help() | |
| except Exception as e: | |
| print(f"Error: {str(e)}") | |
| exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment