Last active
September 11, 2024 17:39
-
-
Save manuelsh/7099b900f2846c796310c597e88e572e to your computer and use it in GitHub Desktop.
Get github stars from e2b-dev / awesome-ai-agents README.MD file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
# Let's read the file first and inspect its content to identify how to extract the required information. | |
file_path = 'README.md' | |
with open(file_path, 'r') as file: | |
file_content = file.read() | |
# Updating the logic to specifically capture only GitHub URLs | |
def extract_tool_info_github_only(markdown_text): | |
tool_data = [] | |
# Split the markdown content by sections (e.g., tool entries appear under "## [ToolName](URL)") | |
tool_sections = re.split(r'## \[(.*?)\]\((.*?)\)', markdown_text) | |
# We expect the list to alternate between tool name/URL pairs and their descriptions | |
for i in range(1, len(tool_sections), 3): | |
name = tool_sections[i].strip() | |
url = tool_sections[i+1].strip() | |
# Only proceed if the URL is a GitHub URL | |
if "https://github.com" in url: | |
description_part = tool_sections[i+2].strip() | |
# Extract category and description from the description part | |
category_search = re.search(r'### Category\n(.*?)\n', description_part) | |
description_search = re.search(r'### Description\n(.*?)\n', description_part, re.DOTALL) | |
category = category_search.group(1).strip() if category_search else "N/A" | |
description = description_search.group(1).strip() if description_search else "No description available" | |
tool_data.append([name, url, category, description]) | |
return tool_data | |
# Extract the data using the filtered GitHub URLs | |
tools_info = extract_tool_info_github_only(file_content) | |
# Create a DataFrame for better visualization | |
df_tools = pd.DataFrame(tools_info, columns=['Name', 'GitHub URL', 'Category', 'Description']) | |
# Display the updated DataFrame to the user | |
print(df_tools.head()) | |
# Function to get the star count from GitHub API | |
def get_repo_stars(owner, repo, token=None): | |
url = f"https://api.github.com/repos/{owner}/{repo}" | |
headers = {} | |
if token: | |
headers['Authorization'] = f'token {token}' | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
repo_data = response.json() | |
return repo_data.get('stargazers_count', 0) | |
else: | |
print(f"Failed to retrieve data for {owner}/{repo}: {response.status_code}") | |
return None | |
# Add a new column for the stars | |
df_tools['Stars'] = None | |
# Iterate through the dataframe and update the star counts | |
for index, row in df_tools.iterrows(): | |
github_url = row['GitHub URL'] | |
if pd.notna(github_url) and "github.com" in github_url: | |
try: | |
owner_repo = github_url.replace("https://github.com/", "").split('/') | |
if len(owner_repo) == 2: | |
owner, repo = owner_repo | |
stars = get_repo_stars(owner, repo) | |
df_tools.at[index, 'Stars'] = stars | |
except Exception as e: | |
print(f"Error fetching stars for {github_url}: {e}") | |
df_tools.at[index, 'Stars'] = 'Error' | |
# Save the updated dataframe to a new CSV file | |
output_csv_path = "tools_info_with_stars.csv" | |
df_tools.to_csv(output_csv_path, index=False) | |
print(f"Updated data saved to {output_csv_path}") | |
# Ensure that the 'Stars' column is numeric, in case there are any non-numeric values | |
df_tools['Stars'] = pd.to_numeric(df_tools['Stars'], errors='coerce') | |
# Sort the dataframe by 'Stars' in descending order | |
df_tools_sorted = df_tools.sort_values(by='Stars', ascending=False).reset_index(drop=True) | |
# Display the sorted dataframe | |
print( df_tools_sorted[['Name','Stars','GitHub URL','Description','Category']].to_markdown(index=False) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This would be the output: