Skip to content

Instantly share code, notes, and snippets.

@zwned
Last active February 10, 2025 19:26
Show Gist options
  • Save zwned/5525359723a2a7cc7363425cc3ed6636 to your computer and use it in GitHub Desktop.
Save zwned/5525359723a2a7cc7363425cc3ed6636 to your computer and use it in GitHub Desktop.
Summarizes GitHub stars with chatGPT
#!/usr/bin/env python
import sys
import csv
import openai
from math import ceil
from argparse import ArgumentParser
from github import Github
from github.GithubException import RateLimitExceededException, GithubException
from urllib3 import Retry
# Global Prompt for ChatGPT
GLOBAL_PROMPT = """
You are a cybersecurity expert specializing in penetration testing, red teaming, and security research.
When provided with a GitHub repository, return both:
1. A brief summary of the repository’s purpose and functionality.
2. A category in 1-3 words (e.g., Reconnaissance, Exploitation, Persistence, Post-Exploitation, Privilege Escalation, Command and Control).
Your response **must** be formatted as follows:
---
Summary: <Provide a concise summary>
Category: <Provide a category>
---
Avoid using quotation marks. Ensure responses are on a single line with no extra line breaks.
"""
def starred_repos(user):
"""Generator to iterate over starred repositories for a given GitHub user."""
starred = user.get_starred()
total_pages = ceil(starred.totalCount / 30)
for page_num in range(total_pages):
for repo in starred.get_page(page_num):
yield repo
def get_github_repo_info(gh_client, repo_full_name):
"""Fetch repository details (stars and last updated time) using an authenticated GitHub client."""
try:
repo = gh_client.get_repo(repo_full_name)
return repo.stargazers_count, repo.updated_at.isoformat() # ISO 8601 timestamp
except RateLimitExceededException:
sys.stderr.write(f"Error: GitHub API rate limit exceeded while fetching data for {repo_full_name}\n")
return None, None
except GithubException as e:
sys.stderr.write(f"Warning: Failed to fetch repository data for {repo_full_name} (Error: {e})\n")
return None, None
def config_retry(backoff_factor=1.0, total=8):
"""Configure urllib3 retry logic with exponential backoff."""
Retry.DEFAULT_BACKOFF_MAX = backoff_factor * 2 ** (total - 1)
return Retry(total=total, backoff_factor=backoff_factor)
def query_openai(client, repo):
"""Query OpenAI API once and return both summary and category."""
prompt = f"Provide both a summary and a category for the following GitHub repository: {repo}"
try:
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "system", "content": GLOBAL_PROMPT},
{"role": "user", "content": prompt}],
temperature=0.7
)
result = response.choices[0].message.content.strip()
# Extract summary and category
summary, category = parse_openai_response(result)
return summary, category
except openai.OpenAIError as e:
sys.stderr.write(f"Error querying OpenAI: {e}\n")
return "Error", "Error"
def parse_openai_response(response):
"""Parse OpenAI response to extract summary and category."""
summary = "Unknown"
category = "Uncategorized"
for line in response.split("\n"):
if line.startswith("Summary:"):
summary = line.replace("Summary:", "").strip()
elif line.startswith("Category:"):
category = line.replace("Category:", "").strip()
return summary, category
def parse_args():
"""Parse command-line arguments."""
parser = ArgumentParser(description="Export a GitHub user's starred repositories to CSV")
parser.add_argument("--gh_user", required=True, help="GitHub username")
parser.add_argument("--gh_token", required=True, help="GitHub personal access token (required)")
parser.add_argument("--openai_token", required=True, help="OpenAI API token (required)")
parser.add_argument("--csv_file", help="Optional CSV file to write output instead of stdout")
return parser.parse_args()
def main():
args = parse_args()
# Initialize OpenAI Client once
openai_client = openai.OpenAI(api_key=args.openai_token)
# Initialize GitHub Client (authentication required)
gh_client = Github(args.gh_token, retry=config_retry())
user = gh_client.get_user(args.gh_user)
# Open CSV file or default to stdout
if args.csv_file:
csv_file = open(args.csv_file, mode="w", newline="", encoding="utf-8")
else:
csv_file = sys.stdout
sys.stdout.reconfigure(line_buffering=True) # Immediate output when using stdout
writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
# Print header row
writer.writerow(["Repository URL", "Summary", "Category", "Stars", "Last Updated"])
# Process each starred repository
for repo in starred_repos(user):
repo_url = repo.html_url
repo_full_name = repo.full_name
# Fetch summary and category in one request
summary, category = query_openai(openai_client, repo_url)
stars, last_updated = get_github_repo_info(gh_client, repo_full_name)
writer.writerow((repo_url, summary, category, stars, last_updated))
# Close file if writing to a file
if args.csv_file:
csv_file.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment