Created
August 22, 2024 13:27
-
-
Save bplunkert/5c25927711ac02cdbd905ecbc610b59e to your computer and use it in GitHub Desktop.
Github Star Summarizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Gets a list of a user's starred repos, reads descriptions for all the repos, categorizes, summarizes and quantifies the user's interests. | |
""" | |
import requests | |
import os | |
from openai import OpenAI | |
# Initialize OpenAI client with API key from environment variable | |
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) | |
def get_starred_repositories(username, github_token=None): | |
url = f"https://api.github.com/users/{username}/starred" | |
headers = {'Authorization': f'token {github_token}'} if github_token else {} | |
repos = [] | |
page = 1 | |
while True: | |
response = requests.get(url, headers=headers, params={'page': page, 'per_page': 30}) | |
if response.status_code != 200: | |
raise Exception(f"Error fetching starred repositories: {response.status_code}") | |
page_repos = response.json() | |
if not page_repos: | |
break | |
repos.extend(page_repos) | |
page += 1 | |
return repos | |
def get_descriptions_and_urls_from_starred_repos(starred_repos): | |
descriptions_and_urls = [] | |
for repo in starred_repos: | |
description = repo.get('description', '') | |
url = repo.get('html_url', '') | |
if description: | |
descriptions_and_urls.append((description, url)) | |
return descriptions_and_urls | |
def summarize_descriptions_with_gpt4(descriptions_and_urls): | |
prompt = """ | |
Based on the following list of project descriptions and their URLs, summarize the user's interests in JSON format. | |
The JSON should have categories as keys, and for each category, include the count of repositories and an array of repository URLs. | |
Example schema: | |
{ | |
"category1": { | |
"count": count1, | |
"urls": ["url1", "url2", ...] | |
}, | |
"category2": { | |
"count": count2, | |
"urls": ["url3", "url4", ...] | |
}, | |
... | |
} | |
Descriptions and URLs: | |
""" | |
for description, url in descriptions_and_urls: | |
prompt += f"\nDescription: {description}\nURL: {url}\n" | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{"role": "user", "content": prompt} | |
], | |
model="gpt-4o-mini", | |
response_format={"type": "json_object"} | |
) | |
summary = chat_completion.choices[0].message.content.strip() | |
return summary | |
def rolling_summarize(descriptions_and_urls, chunk_size=30): | |
chunk_summaries = [] | |
for i in range(0, len(descriptions_and_urls), chunk_size): | |
chunk = descriptions_and_urls[i:i + chunk_size] | |
print(f"Summarizing chunk {i // chunk_size + 1}...") | |
chunk_summary = summarize_descriptions_with_gpt4(chunk) | |
chunk_summaries.append(chunk_summary) | |
print(f"\n--- Summary for chunk {i // chunk_size + 1} ---\n") | |
print(chunk_summary) | |
print("\n----------------------------------------\n") | |
# Summarize all chunk summaries | |
final_summary = summarize_descriptions_with_gpt4([(summary, "") for summary in chunk_summaries]) | |
return final_summary | |
def main(): | |
username = input("Enter GitHub username: ") | |
github_token = input("Enter GitHub token (optional, press Enter to skip): ").strip() or None | |
print(f"Fetching starred repositories for user '{username}'...") | |
starred_repos = get_starred_repositories(username, github_token) | |
print(f"Retrieved {len(starred_repos)} starred repositories.") | |
descriptions_and_urls = get_descriptions_and_urls_from_starred_repos(starred_repos) | |
if not descriptions_and_urls: | |
print("No descriptions found for the starred repositories.") | |
return | |
print("Summarizing the user's interests...") | |
final_summary = rolling_summarize(descriptions_and_urls) | |
print("\nFinal Summary of All Chunk Summaries:\n") | |
print(final_summary) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment