Skip to content

Instantly share code, notes, and snippets.

@elijahbenizzy
Last active August 16, 2023 19:39
Show Gist options
  • Save elijahbenizzy/0b0957bfb009c0fed5f5d5717007344b to your computer and use it in GitHub Desktop.
Save elijahbenizzy/0b0957bfb009c0fed5f5d5717007344b to your computer and use it in GitHub Desktop.
def stargazer_url(
stars_by_repo: Dict[str, int], per_page: int = 100
) -> Parallelizable[str]:
"""Generates query objects for each repository, with the correct pagination and offset.
:param stars_by_repo: The star count for each repo
:param per_page: The number of results per page
:return: A query object for each repo, formatted as a generator.
"""
for repo_name, stars in stars_by_repo.items():
num_pages = (
stars + per_page - 1
) // per_page # Calculate the number of pages needed for pagination
for page in range(num_pages):
yield f"https://api.github.com/repos/{repo_name}/stargazers?page={page + 1}&per_page={per_page}"
def stargazers(stargazer_url: str, github_api_key: str) -> pd.DataFrame:
"""Gives the GitHub username of all stargazers in this query
by hitting the GitHub API.
:param stargazer_query: Query object to represent the query
:param github_api_key: API key for GitHub
:return: A set of all stargazers
"""
headers = {
"Authorization": f"token {github_api_key}",
"Accept": "application/vnd.github.v3.star+json",
}
response = requests.get(stargazer_url, headers=headers)
response.raise_for_status() # Raise an exception for unsuccessful requests
data = response.json()
records = [
{
"user": datum["user"]["login"],
"starred_at": datetime.strptime(datum["starred_at"], "%Y-%m-%dT%H:%M:%SZ"),
}
for datum in data
]
return pd.DataFrame.from_records(records)
@save_to.csv(path=value("unique_stargazers.csv"))
def unique_stargazers(stargazers: Collect[pd.DataFrame]) -> pd.DataFrame:
"""Aggregates all stargazers into a single set.
:param stargazers: Set of stargazers, paginated
:return: A set of all stargazers
"""
df = pd.concat(stargazers)
unique = df.sort_values("starred_at").groupby("user").first()
return unique
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment