Last active
August 16, 2023 19:39
-
-
Save elijahbenizzy/0b0957bfb009c0fed5f5d5717007344b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def stargazer_url( | |
stars_by_repo: Dict[str, int], per_page: int = 100 | |
) -> Parallelizable[str]: | |
"""Generates query objects for each repository, with the correct pagination and offset. | |
:param stars_by_repo: The star count for each repo | |
:param per_page: The number of results per page | |
:return: A query object for each repo, formatted as a generator. | |
""" | |
for repo_name, stars in stars_by_repo.items(): | |
num_pages = ( | |
stars + per_page - 1 | |
) // per_page # Calculate the number of pages needed for pagination | |
for page in range(num_pages): | |
yield f"https://api.github.com/repos/{repo_name}/stargazers?page={page + 1}&per_page={per_page}" | |
def stargazers(stargazer_url: str, github_api_key: str) -> pd.DataFrame: | |
"""Gives the GitHub username of all stargazers in this query | |
by hitting the GitHub API. | |
:param stargazer_query: Query object to represent the query | |
:param github_api_key: API key for GitHub | |
:return: A set of all stargazers | |
""" | |
headers = { | |
"Authorization": f"token {github_api_key}", | |
"Accept": "application/vnd.github.v3.star+json", | |
} | |
response = requests.get(stargazer_url, headers=headers) | |
response.raise_for_status() # Raise an exception for unsuccessful requests | |
data = response.json() | |
records = [ | |
{ | |
"user": datum["user"]["login"], | |
"starred_at": datetime.strptime(datum["starred_at"], "%Y-%m-%dT%H:%M:%SZ"), | |
} | |
for datum in data | |
] | |
return pd.DataFrame.from_records(records) | |
@save_to.csv(path=value("unique_stargazers.csv")) | |
def unique_stargazers(stargazers: Collect[pd.DataFrame]) -> pd.DataFrame: | |
"""Aggregates all stargazers into a single set. | |
:param stargazers: Set of stargazers, paginated | |
:return: A set of all stargazers | |
""" | |
df = pd.concat(stargazers) | |
unique = df.sort_values("starred_at").groupby("user").first() | |
return unique |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment