Last active
February 10, 2025 19:26
-
-
Save zwned/5525359723a2a7cc7363425cc3ed6636 to your computer and use it in GitHub Desktop.
Summarizes GitHub stars with chatGPT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import csv | |
import openai | |
from math import ceil | |
from argparse import ArgumentParser | |
from github import Github | |
from github.GithubException import RateLimitExceededException, GithubException | |
from urllib3 import Retry | |
# Global Prompt for ChatGPT | |
GLOBAL_PROMPT = """ | |
You are a cybersecurity expert specializing in penetration testing, red teaming, and security research. | |
When provided with a GitHub repository, return both: | |
1. A brief summary of the repository’s purpose and functionality. | |
2. A category in 1-3 words (e.g., Reconnaissance, Exploitation, Persistence, Post-Exploitation, Privilege Escalation, Command and Control). | |
Your response **must** be formatted as follows: | |
--- | |
Summary: <Provide a concise summary> | |
Category: <Provide a category> | |
--- | |
Avoid using quotation marks. Ensure responses are on a single line with no extra line breaks. | |
""" | |
def starred_repos(user): | |
"""Generator to iterate over starred repositories for a given GitHub user.""" | |
starred = user.get_starred() | |
total_pages = ceil(starred.totalCount / 30) | |
for page_num in range(total_pages): | |
for repo in starred.get_page(page_num): | |
yield repo | |
def get_github_repo_info(gh_client, repo_full_name): | |
"""Fetch repository details (stars and last updated time) using an authenticated GitHub client.""" | |
try: | |
repo = gh_client.get_repo(repo_full_name) | |
return repo.stargazers_count, repo.updated_at.isoformat() # ISO 8601 timestamp | |
except RateLimitExceededException: | |
sys.stderr.write(f"Error: GitHub API rate limit exceeded while fetching data for {repo_full_name}\n") | |
return None, None | |
except GithubException as e: | |
sys.stderr.write(f"Warning: Failed to fetch repository data for {repo_full_name} (Error: {e})\n") | |
return None, None | |
def config_retry(backoff_factor=1.0, total=8): | |
"""Configure urllib3 retry logic with exponential backoff.""" | |
Retry.DEFAULT_BACKOFF_MAX = backoff_factor * 2 ** (total - 1) | |
return Retry(total=total, backoff_factor=backoff_factor) | |
def query_openai(client, repo): | |
"""Query OpenAI API once and return both summary and category.""" | |
prompt = f"Provide both a summary and a category for the following GitHub repository: {repo}" | |
try: | |
response = client.chat.completions.create( | |
model="gpt-4", | |
messages=[{"role": "system", "content": GLOBAL_PROMPT}, | |
{"role": "user", "content": prompt}], | |
temperature=0.7 | |
) | |
result = response.choices[0].message.content.strip() | |
# Extract summary and category | |
summary, category = parse_openai_response(result) | |
return summary, category | |
except openai.OpenAIError as e: | |
sys.stderr.write(f"Error querying OpenAI: {e}\n") | |
return "Error", "Error" | |
def parse_openai_response(response): | |
"""Parse OpenAI response to extract summary and category.""" | |
summary = "Unknown" | |
category = "Uncategorized" | |
for line in response.split("\n"): | |
if line.startswith("Summary:"): | |
summary = line.replace("Summary:", "").strip() | |
elif line.startswith("Category:"): | |
category = line.replace("Category:", "").strip() | |
return summary, category | |
def parse_args(): | |
"""Parse command-line arguments.""" | |
parser = ArgumentParser(description="Export a GitHub user's starred repositories to CSV") | |
parser.add_argument("--gh_user", required=True, help="GitHub username") | |
parser.add_argument("--gh_token", required=True, help="GitHub personal access token (required)") | |
parser.add_argument("--openai_token", required=True, help="OpenAI API token (required)") | |
parser.add_argument("--csv_file", help="Optional CSV file to write output instead of stdout") | |
return parser.parse_args() | |
def main(): | |
args = parse_args() | |
# Initialize OpenAI Client once | |
openai_client = openai.OpenAI(api_key=args.openai_token) | |
# Initialize GitHub Client (authentication required) | |
gh_client = Github(args.gh_token, retry=config_retry()) | |
user = gh_client.get_user(args.gh_user) | |
# Open CSV file or default to stdout | |
if args.csv_file: | |
csv_file = open(args.csv_file, mode="w", newline="", encoding="utf-8") | |
else: | |
csv_file = sys.stdout | |
sys.stdout.reconfigure(line_buffering=True) # Immediate output when using stdout | |
writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL) | |
# Print header row | |
writer.writerow(["Repository URL", "Summary", "Category", "Stars", "Last Updated"]) | |
# Process each starred repository | |
for repo in starred_repos(user): | |
repo_url = repo.html_url | |
repo_full_name = repo.full_name | |
# Fetch summary and category in one request | |
summary, category = query_openai(openai_client, repo_url) | |
stars, last_updated = get_github_repo_info(gh_client, repo_full_name) | |
writer.writerow((repo_url, summary, category, stars, last_updated)) | |
# Close file if writing to a file | |
if args.csv_file: | |
csv_file.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment