Skip to content

Instantly share code, notes, and snippets.

@bplunkert
Created August 22, 2024 13:27
Show Gist options
  • Save bplunkert/5c25927711ac02cdbd905ecbc610b59e to your computer and use it in GitHub Desktop.
Save bplunkert/5c25927711ac02cdbd905ecbc610b59e to your computer and use it in GitHub Desktop.
Github Star Summarizer
"""
Gets a list of a user's starred repos, reads descriptions for all the repos, categorizes, summarizes and quantifies the user's interests.
"""
import requests
import os
from openai import OpenAI
# Initialize OpenAI client with API key from environment variable
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
def get_starred_repositories(username, github_token=None):
url = f"https://api.github.com/users/{username}/starred"
headers = {'Authorization': f'token {github_token}'} if github_token else {}
repos = []
page = 1
while True:
response = requests.get(url, headers=headers, params={'page': page, 'per_page': 30})
if response.status_code != 200:
raise Exception(f"Error fetching starred repositories: {response.status_code}")
page_repos = response.json()
if not page_repos:
break
repos.extend(page_repos)
page += 1
return repos
def get_descriptions_and_urls_from_starred_repos(starred_repos):
descriptions_and_urls = []
for repo in starred_repos:
description = repo.get('description', '')
url = repo.get('html_url', '')
if description:
descriptions_and_urls.append((description, url))
return descriptions_and_urls
def summarize_descriptions_with_gpt4(descriptions_and_urls):
prompt = """
Based on the following list of project descriptions and their URLs, summarize the user's interests in JSON format.
The JSON should have categories as keys, and for each category, include the count of repositories and an array of repository URLs.
Example schema:
{
"category1": {
"count": count1,
"urls": ["url1", "url2", ...]
},
"category2": {
"count": count2,
"urls": ["url3", "url4", ...]
},
...
}
Descriptions and URLs:
"""
for description, url in descriptions_and_urls:
prompt += f"\nDescription: {description}\nURL: {url}\n"
chat_completion = client.chat.completions.create(
messages=[
{"role": "user", "content": prompt}
],
model="gpt-4o-mini",
response_format={"type": "json_object"}
)
summary = chat_completion.choices[0].message.content.strip()
return summary
def rolling_summarize(descriptions_and_urls, chunk_size=30):
chunk_summaries = []
for i in range(0, len(descriptions_and_urls), chunk_size):
chunk = descriptions_and_urls[i:i + chunk_size]
print(f"Summarizing chunk {i // chunk_size + 1}...")
chunk_summary = summarize_descriptions_with_gpt4(chunk)
chunk_summaries.append(chunk_summary)
print(f"\n--- Summary for chunk {i // chunk_size + 1} ---\n")
print(chunk_summary)
print("\n----------------------------------------\n")
# Summarize all chunk summaries
final_summary = summarize_descriptions_with_gpt4([(summary, "") for summary in chunk_summaries])
return final_summary
def main():
username = input("Enter GitHub username: ")
github_token = input("Enter GitHub token (optional, press Enter to skip): ").strip() or None
print(f"Fetching starred repositories for user '{username}'...")
starred_repos = get_starred_repositories(username, github_token)
print(f"Retrieved {len(starred_repos)} starred repositories.")
descriptions_and_urls = get_descriptions_and_urls_from_starred_repos(starred_repos)
if not descriptions_and_urls:
print("No descriptions found for the starred repositories.")
return
print("Summarizing the user's interests...")
final_summary = rolling_summarize(descriptions_and_urls)
print("\nFinal Summary of All Chunk Summaries:\n")
print(final_summary)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment