Last active
January 12, 2025 10:58
-
-
Save callahantiff/0ae1c00df9bec7228be3f6bda5466d73 to your computer and use it in GitHub Desktop.
GitHub API Repository Search (Python 3.6.2)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.6.2 | |
# -*- coding: utf-8 -*- | |
# run script from command line via python3 keyword_search_github_repositories.py | |
import click | |
import datetime | |
import time | |
from github import Github | |
from github.GithubException import RateLimitExceededException | |
from tqdm import tqdm | |
def search_github(auth: Github, keyword: list) -> list: | |
"""Search the GitHub API for repositories using an input keyword. | |
Args: | |
auth: A Github authenticate object. | |
keyword: A keyword string. | |
Returns: | |
A nested list of GitHub repositories returned for a keyword. Each result list contains the repository name, | |
url, and description. | |
""" | |
print('Searching GitHub using keyword: {}'.format(keyword)) | |
# set-up query | |
query = keyword + '+in:readme+in:description' | |
results = auth.search_repositories(query, 'stars', 'desc') | |
# print results | |
print(f'Found {results.totalCount} repo(s)') | |
results_list = [] | |
for repo in tqdm(range(0, results.totalCount)): | |
try: | |
results_list.append([results[repo].name, results[repo].url, results[repo].description]) | |
time.sleep(2) | |
except RateLimitExceededException: | |
time.sleep(60) | |
results_list.append([results[repo].name, results[repo].url, results[repo].description]) | |
return results_list | |
@click.command() | |
@click.option('--token', prompt='Please enter your GitHub Access Token') | |
@click.option('--keywords', prompt='Please enter the keywords separated by a comma') | |
@click.option('--filename', prompt='Please provide the file path') | |
def main(token: str, keywords: str, filename: str) -> None: | |
# initialize and authenticate GitHub API | |
auth = Github(token) | |
# search a list of keywords | |
search_list = [keyword.strip() for keyword in keywords.split(',')] | |
# search repositories on GitHub | |
github_results = dict() | |
for key in search_list: | |
github_results[key] = [] | |
github_results[key] += search_github(auth, key) | |
if len(search_list) > 1: time.sleep(120) | |
# write out results | |
timestamp = datetime.datetime.now() | |
formatted_date = timestamp.strftime('%d') + timestamp.strftime('%b') + timestamp.strftime('%Y') | |
full_filename = filename.strip() + 'GitHub_Search_Results_' + formatted_date + '.txt' | |
print('Writing search results to: {}'.format(full_filename)) | |
with open(full_filename, 'w') as f_out: | |
for key in tqdm(github_results.keys()): | |
for res in github_results[key]: | |
f_out.write(key + '\t' + str(res[0]) + '\t' + str(res[1]) + '\t' + str(res[2])) | |
f_out.close() | |
if __name__ == '__main__': | |
main() |
python -m pip install tqdm click pygithub
python -m pip install tqdm click pygithub
Thanks so much @Abdur-rahmaanJ!
@callahantiff This could be a nice project of it's own btw
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note. sleep timing could be improved with some API sensitivity experimentation.