Skip to content

Instantly share code, notes, and snippets.

@ccerv1
Last active June 1, 2025 16:51
Show Gist options
  • Select an option

  • Save ccerv1/7995ff87009dfe4ad6ebfc49658cb78a to your computer and use it in GitHub Desktop.

Select an option

Save ccerv1/7995ff87009dfe4ad6ebfc49658cb78a to your computer and use it in GitHub Desktop.
Lookup the SBOM for a given repo
# python sbom.py --analyze-test-repos
import requests
from typing import List, Dict, Tuple
import os
from datetime import datetime
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
TEST_REPOS = [
'https://github.com/opensource-observer/oss-directory',
'https://github.com/wevm/viem',
'https://github.com/ethereum/remix-project',
'https://github.com/libp2p/go-libp2p',
'https://github.com/libp2p/js-libp2p'
]
class GitHubSBOMFetcher:
def __init__(self, github_token: str = None):
"""Initialize the SBOM fetcher with GitHub token."""
self.github_token = github_token or os.getenv('GITHUB_TOKEN')
if not self.github_token:
raise ValueError("GitHub token is required. Set GITHUB_TOKEN environment variable or pass it to the constructor.")
self.graphql_url = "https://api.github.com/graphql"
self.headers = {
"Authorization": f"Bearer {GITHUB_TOKEN}",
"Content-Type": "application/json",
}
def _extract_repo_info(self, repo_url: str) -> Tuple[str, str]:
"""Extract owner and repo name from GitHub URL."""
# Handle both HTTPS and SSH URLs
if repo_url.startswith(('http://', 'https://')):
parts = repo_url.rstrip('/').split('/')
return parts[-2], parts[-1]
elif repo_url.startswith('git@'):
parts = repo_url.split(':')[1].replace('.git', '').split('/')
return parts[0], parts[1]
else:
# Assume format is "owner/repo"
parts = repo_url.split('/')
if len(parts) != 2:
raise ValueError("Invalid repository URL format")
return parts[0], parts[1]
def get_source_type(self) -> str:
"""Return the source type for the SBOM."""
return "github"
def fetch_dependencies(self, repo_url: str) -> List[Dict]:
"""Fetch repository dependencies using GitHub's GraphQL API."""
owner, repo = self._extract_repo_info(repo_url)
query = """
query($owner: String!, $repo: String!) {
repository(owner: $owner, name: $repo) {
dependencyGraphManifests(first: 100) {
nodes {
filename
dependencies {
nodes {
hasDependencies
packageName
packageManager
packageUrl
requirements
relationship
}
}
}
}
}
}
"""
variables = {
"owner": owner,
"repo": repo
}
try:
response = requests.post(
self.graphql_url,
headers=self.headers,
json={"query": query, "variables": variables}
)
response.raise_for_status()
data = response.json()
# Add debug logging
print(f"\nFetching dependencies for {owner}/{repo}")
if "errors" in data:
print(f"GraphQL errors: {data['errors']}")
return []
# Check if we got a valid response
if not data.get("data", {}).get("repository"):
print(f"No repository data found for {owner}/{repo}")
return []
manifests = data["data"]["repository"]["dependencyGraphManifests"]["nodes"]
if not manifests:
print(f"No dependency manifests found for {owner}/{repo}")
return []
# Extract all dependencies from all manifests
all_dependencies = []
for manifest in manifests:
deps = manifest.get("dependencies", {}).get("nodes", [])
print(f"Manifest {manifest.get('filename')}: {len(deps)} dependencies")
for dep in deps:
# Add source_type to each dependency
dep["source_type"] = self.get_source_type()
all_dependencies.append(dep)
return all_dependencies
except Exception as e:
print(f"Error fetching dependencies for {owner}/{repo}: {str(e)}")
return []
def generate_sbom(self, repo_url: str) -> Dict:
"""Generate a complete SBOM for the repository."""
dependencies = self.fetch_dependencies(repo_url)
sbom = {
"bomFormat": "CycloneDX",
"specVersion": "1.4",
"version": 1,
"metadata": {
"timestamp": datetime.utcnow().isoformat(),
"tools": [{
"vendor": "GitHub",
"name": "Dependency Graph API",
"version": "1.0"
}]
},
"components": []
}
for dep in dependencies:
component = {
"type": "library",
"name": dep.get("packageName"),
"purl": dep.get("packageUrl"),
"properties": [
{
"name": "packageManager",
"value": dep.get("packageManager")
},
{
"name": "requirements",
"value": dep.get("requirements")
},
{
"name": "relationship",
"value": dep.get("relationship")
}
]
}
sbom["components"].append(component)
return sbom
def analyze_dependencies_by_relationship(dependencies: List[Dict]) -> Dict[str, int]:
"""Analyze dependencies and count them by relationship type."""
relationship_counts = {}
for dep in dependencies:
relationship = dep.get("relationship", "unknown")
relationship_counts[relationship] = relationship_counts.get(relationship, 0) + 1
return relationship_counts
def main():
"""Example usage of the SBOM fetcher."""
import argparse
import json
parser = argparse.ArgumentParser(description='Generate SBOM for a GitHub repository')
parser.add_argument('--repo_url', help='GitHub repository URL (e.g., https://github.com/owner/repo)')
parser.add_argument('--output', '-o', help='Output file path (default: stdout)')
parser.add_argument('--analyze-test-repos', action='store_true', help='Analyze all test repositories')
args = parser.parse_args()
try:
fetcher = GitHubSBOMFetcher()
if args.analyze_test_repos:
print("\nAnalyzing test repositories:")
print("-" * 50)
for repo_url in TEST_REPOS:
print(f"\nRepository: {repo_url}")
dependencies = fetcher.fetch_dependencies(repo_url)
relationship_counts = analyze_dependencies_by_relationship(dependencies)
print("Dependencies by relationship type:")
for relationship, count in relationship_counts.items():
print(f" {relationship}: {count}")
print(f"Total dependencies: {len(dependencies)}")
print("-" * 50)
elif args.repo_url:
sbom = fetcher.generate_sbom(args.repo_url)
if args.output:
with open(args.output, 'w') as f:
json.dump(sbom, f, indent=2)
else:
print(json.dumps(sbom, indent=2))
else:
parser.print_help()
except Exception as e:
print(f"Error: {str(e)}")
exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment