zwned · February 10, 2025 19:26
diff --git a/summarize_stars.py b/summarize_stars.py
 #!/usr/bin/env python

 import sys
 import csv
 import openai
 from math import ceil
 from argparse import ArgumentParser
 from github import Github
 from github.GithubException import RateLimitExceededException, GithubException
 from urllib3 import Retry

 # Global Prompt for ChatGPT
 GLOBAL_PROMPT = """
 You are a cybersecurity expert specializing in penetration testing, red teaming, and security research.

 When provided with a GitHub repository, return both:
 1. A brief summary of the repository’s purpose and functionality.
 2. A category in 1-3 words (e.g., Reconnaissance, Exploitation, Persistence, Post-Exploitation, Privilege Escalation, Command and Control).

 Your response **must** be formatted as follows:
 ---
 Summary: <Provide a concise summary>
 Category: <Provide a category>
 ---

 Avoid using quotation marks. Ensure responses are on a single line with no extra line breaks.
 """

 def starred_repos(user):
    """Generator to iterate over starred repositories for a given GitHub user."""
    starred = user.get_starred()
    total_pages = ceil(starred.totalCount / 30)

    for page_num in range(total_pages):
        for repo in starred.get_page(page_num):
            yield repo

 def get_github_repo_info(gh_client, repo_full_name):
    """Fetch repository details (stars and last updated time) using an authenticated GitHub client."""
    try:
        repo = gh_client.get_repo(repo_full_name)
        return repo.stargazers_count, repo.updated_at.isoformat()  # ISO 8601 timestamp
    except RateLimitExceededException:
        sys.stderr.write(f"Error: GitHub API rate limit exceeded while fetching data for {repo_full_name}\n")
        return None, None
    except GithubException as e:
        sys.stderr.write(f"Warning: Failed to fetch repository data for {repo_full_name} (Error: {e})\n")
        return None, None

 def config_retry(backoff_factor=1.0, total=8):
    """Configure urllib3 retry logic with exponential backoff."""
    Retry.DEFAULT_BACKOFF_MAX = backoff_factor * 2 ** (total - 1)
    return Retry(total=total, backoff_factor=backoff_factor)

 def query_openai(client, repo):
    """Query OpenAI API once and return both summary and category."""
    prompt = f"Provide both a summary and a category for the following GitHub repository: {repo}"
    
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "system", "content": GLOBAL_PROMPT},
                      {"role": "user", "content": prompt}],
            temperature=0.7
        )
        result = response.choices[0].message.content.strip()

        # Extract summary and category
        summary, category = parse_openai_response(result)
        return summary, category

    except openai.OpenAIError as e:
        sys.stderr.write(f"Error querying OpenAI: {e}\n")
        return "Error", "Error"

 def parse_openai_response(response):
    """Parse OpenAI response to extract summary and category."""
    summary = "Unknown"
    category = "Uncategorized"

    for line in response.split("\n"):
        if line.startswith("Summary:"):
            summary = line.replace("Summary:", "").strip()
        elif line.startswith("Category:"):
            category = line.replace("Category:", "").strip()

    return summary, category

 def parse_args():
    """Parse command-line arguments."""
    parser = ArgumentParser(description="Export a GitHub user's starred repositories to CSV")

    parser.add_argument("--gh_user", required=True, help="GitHub username")
    parser.add_argument("--gh_token", required=True, help="GitHub personal access token (required)")
    parser.add_argument("--openai_token", required=True, help="OpenAI API token (required)")
    parser.add_argument("--csv_file", help="Optional CSV file to write output instead of stdout")

    return parser.parse_args()

 def main():
    args = parse_args()

    # Initialize OpenAI Client once
    openai_client = openai.OpenAI(api_key=args.openai_token)

    # Initialize GitHub Client (authentication required)
    gh_client = Github(args.gh_token, retry=config_retry())
    user = gh_client.get_user(args.gh_user)

    # Open CSV file or default to stdout
    if args.csv_file:
        csv_file = open(args.csv_file, mode="w", newline="", encoding="utf-8")
    else:
        csv_file = sys.stdout
        sys.stdout.reconfigure(line_buffering=True)  # Immediate output when using stdout

    writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
    
    # Print header row
    writer.writerow(["Repository URL", "Summary", "Category", "Stars", "Last Updated"])

    # Process each starred repository
    for repo in starred_repos(user):
        repo_url = repo.html_url
        repo_full_name = repo.full_name

        # Fetch summary and category in one request
        summary, category = query_openai(openai_client, repo_url)
        stars, last_updated = get_github_repo_info(gh_client, repo_full_name)

        writer.writerow((repo_url, summary, category, stars, last_updated))

    # Close file if writing to a file
    if args.csv_file:
        csv_file.close()

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python

	import sys
	import csv
	import openai
	from math import ceil
	from argparse import ArgumentParser
	from github import Github
	from github.GithubException import RateLimitExceededException, GithubException
	from urllib3 import Retry

	# Global Prompt for ChatGPT
	GLOBAL_PROMPT = """
	You are a cybersecurity expert specializing in penetration testing, red teaming, and security research.

	When provided with a GitHub repository, return both:
	1. A brief summary of the repository’s purpose and functionality.
	2. A category in 1-3 words (e.g., Reconnaissance, Exploitation, Persistence, Post-Exploitation, Privilege Escalation, Command and Control).

	Your response must be formatted as follows:
	---
	Summary: <Provide a concise summary>
	Category: <Provide a category>
	---

	Avoid using quotation marks. Ensure responses are on a single line with no extra line breaks.
	"""

	def starred_repos(user):
	"""Generator to iterate over starred repositories for a given GitHub user."""
	starred = user.get_starred()
	total_pages = ceil(starred.totalCount / 30)

	for page_num in range(total_pages):
	for repo in starred.get_page(page_num):
	yield repo

	def get_github_repo_info(gh_client, repo_full_name):
	"""Fetch repository details (stars and last updated time) using an authenticated GitHub client."""
	try:
	repo = gh_client.get_repo(repo_full_name)
	return repo.stargazers_count, repo.updated_at.isoformat() # ISO 8601 timestamp
	except RateLimitExceededException:
	sys.stderr.write(f"Error: GitHub API rate limit exceeded while fetching data for {repo_full_name}\n")
	return None, None
	except GithubException as e:
	sys.stderr.write(f"Warning: Failed to fetch repository data for {repo_full_name} (Error: {e})\n")
	return None, None

	def config_retry(backoff_factor=1.0, total=8):
	"""Configure urllib3 retry logic with exponential backoff."""
	Retry.DEFAULT_BACKOFF_MAX = backoff_factor * 2 ** (total - 1)
	return Retry(total=total, backoff_factor=backoff_factor)

	def query_openai(client, repo):
	"""Query OpenAI API once and return both summary and category."""
	prompt = f"Provide both a summary and a category for the following GitHub repository: {repo}"

	try:
	response = client.chat.completions.create(
	model="gpt-4",
	messages=[{"role": "system", "content": GLOBAL_PROMPT},
	{"role": "user", "content": prompt}],
	temperature=0.7
	)
	result = response.choices[0].message.content.strip()

	# Extract summary and category
	summary, category = parse_openai_response(result)
	return summary, category

	except openai.OpenAIError as e:
	sys.stderr.write(f"Error querying OpenAI: {e}\n")
	return "Error", "Error"

	def parse_openai_response(response):
	"""Parse OpenAI response to extract summary and category."""
	summary = "Unknown"
	category = "Uncategorized"

	for line in response.split("\n"):
	if line.startswith("Summary:"):
	summary = line.replace("Summary:", "").strip()
	elif line.startswith("Category:"):
	category = line.replace("Category:", "").strip()

	return summary, category

	def parse_args():
	"""Parse command-line arguments."""
	parser = ArgumentParser(description="Export a GitHub user's starred repositories to CSV")

	parser.add_argument("--gh_user", required=True, help="GitHub username")
	parser.add_argument("--gh_token", required=True, help="GitHub personal access token (required)")
	parser.add_argument("--openai_token", required=True, help="OpenAI API token (required)")
	parser.add_argument("--csv_file", help="Optional CSV file to write output instead of stdout")

	return parser.parse_args()

	def main():
	args = parse_args()

	# Initialize OpenAI Client once
	openai_client = openai.OpenAI(api_key=args.openai_token)

	# Initialize GitHub Client (authentication required)
	gh_client = Github(args.gh_token, retry=config_retry())
	user = gh_client.get_user(args.gh_user)

	# Open CSV file or default to stdout
	if args.csv_file:
	csv_file = open(args.csv_file, mode="w", newline="", encoding="utf-8")
	else:
	csv_file = sys.stdout
	sys.stdout.reconfigure(line_buffering=True) # Immediate output when using stdout

	writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)

	# Print header row
	writer.writerow(["Repository URL", "Summary", "Category", "Stars", "Last Updated"])

	# Process each starred repository
	for repo in starred_repos(user):
	repo_url = repo.html_url
	repo_full_name = repo.full_name

	# Fetch summary and category in one request
	summary, category = query_openai(openai_client, repo_url)
	stars, last_updated = get_github_repo_info(gh_client, repo_full_name)

	writer.writerow((repo_url, summary, category, stars, last_updated))

	# Close file if writing to a file
	if args.csv_file:
	csv_file.close()

	if __name__ == "__main__":
	main()