Last active
May 11, 2025 17:22
-
-
Save BigLep/4031aaf7fcfaf26fed74759d4d910d2f to your computer and use it in GitHub Desktop.
Find "filecoin-project" crates that should have team ownership. This is in support of https://github.com/filecoin-project/github-mgmt/issues/104 . This was generated using claude code.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
import csv | |
import time | |
import sys | |
def search_crates(query, page=1, per_page=100): | |
"""Search crates from crates.io API with pagination""" | |
url = f"https://crates.io/api/v1/crates?q={query}&page={page}&per_page={per_page}&sort=alphabetical" | |
headers = { | |
'User-Agent': 'FilecoinCratesScanner/1.0 (educational purposes)', | |
'Accept': 'application/json' | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
print(f"Error searching crates: {response.status_code}") | |
print(f"Response: {response.text}") | |
# Retry with a longer delay if rate limited | |
if response.status_code == 429 or response.status_code == 403: | |
print("Rate limited, waiting 60 seconds...") | |
time.sleep(60) | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
return response.json() | |
return None | |
return response.json() | |
def fetch_crate_owners(crate_name): | |
"""Fetch owners for a specific crate""" | |
url = f"https://crates.io/api/v1/crates/{crate_name}/owners" | |
headers = { | |
'User-Agent': 'FilecoinCratesScanner/1.0 (educational purposes)', | |
'Accept': 'application/json' | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
print(f"Error fetching owners for {crate_name}: {response.status_code}") | |
# Retry with a longer delay if rate limited | |
if response.status_code == 429 or response.status_code == 403: | |
print(f"Rate limited, waiting 30 seconds before retrying owner fetch for {crate_name}...") | |
time.sleep(30) | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
data = response.json() | |
owners = data.get('users', []) | |
return [owner.get('login') for owner in owners] | |
return [] | |
data = response.json() | |
owners = data.get('users', []) | |
return [owner.get('login') for owner in owners] | |
def check_repo_org(repo_url, org_name="filecoin-project"): | |
"""Check if a repository URL belongs to the specified GitHub organization""" | |
if not repo_url: | |
return False | |
# Normalize GitHub URLs | |
repo_url = repo_url.lower() | |
if "github.com" not in repo_url: | |
return False | |
# Different formats of GitHub URLs | |
org_patterns = [ | |
f"github.com/{org_name}/", | |
f"github.com:{org_name}/", | |
f"github.com/{org_name}.git", | |
] | |
return any(pattern.lower() in repo_url.lower() for pattern in org_patterns) | |
def find_matching_crates(search_queries=["filecoin", "fvm"], org_name="filecoin-project"): | |
"""Find all crates where the repository is part of the specified GitHub organization""" | |
matching_crates = [] | |
print(f"Searching for crates with keywords {search_queries} from GitHub organization: {org_name}") | |
for query in search_queries: | |
print(f"\nSearching for '{query}'...") | |
page = 1 | |
page_count = 1 # Default to at least 1 page | |
while page <= page_count: | |
print(f"Fetching page {page}...") | |
data = search_crates(query, page) | |
if not data: | |
print(f"No data returned for query '{query}' page {page}, moving to next query") | |
break | |
# Get total on first page and calculate page count | |
if page == 1: | |
total_crates = data.get('meta', {}).get('total', 0) | |
per_page = len(data.get('crates', [])) | |
if per_page > 0: | |
page_count = (total_crates + per_page - 1) // per_page | |
print(f"Total crates for search term '{query}': {total_crates} (across {page_count} pages)") | |
crates = data.get('crates', []) | |
if not crates: | |
print(f"No crates found for query '{query}' page {page}, moving to next query") | |
break | |
print(f"Processing {len(crates)} crates on page {page}...") | |
for crate in crates: | |
# Check repository URL | |
repository = crate.get('repository') | |
if check_repo_org(repository, org_name): | |
# Check if this crate is already in our list | |
if not any(mc['name'] == crate.get('name') for mc in matching_crates): | |
print(f"Found matching crate: {crate.get('name')}") | |
# Fetch owners for crates that match the org | |
crate_name = crate.get('name') | |
print(f"Fetching owners for {crate_name}...") | |
owners = fetch_crate_owners(crate_name) | |
owners_str = ", ".join(owners) | |
matching_crates.append({ | |
'name': crate_name, | |
'repository': repository, | |
'owners': owners_str, | |
'downloads': crate.get('downloads', 0) | |
}) | |
# Be nice to the API | |
time.sleep(1) | |
# Go to next page | |
page += 1 | |
# Be nice to the API - longer delay between pages | |
time.sleep(2) | |
print(f"\nFound {len(matching_crates)} unique crates from {org_name}") | |
return matching_crates | |
def main(): | |
org_name = "filecoin-project" | |
search_queries = ["filecoin", "fvm"] | |
if len(sys.argv) > 1: | |
org_name = sys.argv[1] | |
if len(sys.argv) > 2: | |
search_queries = sys.argv[2].split(',') | |
matching_crates = find_matching_crates(search_queries, org_name) | |
# Save to CSV file | |
output_file = f"{org_name}-crates.csv" | |
with open(output_file, 'w', newline='') as f: | |
fieldnames = ['crateName', 'crateRepo', 'crateOwners', 'crateDownloads'] | |
writer = csv.DictWriter(f, fieldnames=fieldnames) | |
writer.writeheader() | |
for crate in matching_crates: | |
writer.writerow({ | |
'crateName': crate['name'], | |
'crateRepo': crate['repository'], | |
'crateOwners': crate['owners'], | |
'crateDownloads': crate['downloads'] | |
}) | |
print(f"Results saved to {output_file}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here are the scripts used to update crate ownership according to the guidance provided in filecoin-project/github-mgmt#104 (comment)