Skip to content

Instantly share code, notes, and snippets.

@malfet
Last active June 8, 2022 00:11
Show Gist options
  • Save malfet/4f35321b0c9315bcd7116c7b54d83372 to your computer and use it in GitHub Desktop.
Save malfet/4f35321b0c9315bcd7116c7b54d83372 to your computer and use it in GitHub Desktop.
Trying to fetch all commit PRs using GraphQL API
#!/usr/bin/env python3
# Trying to use graphQL to fetch all commits associated with PR https://github.com/pytorch/pytorch/pull/77471
# Expect to get 700+ commits for for some reason pagination ends after first 250
# Needs valid PAT passed via GITHUB_TOKEN environment variable
GH_GET_COMMITS = """
query($cursor: String) {
repository(owner: "pytorch", name: "pytorch") {
pullRequest(number: 77471) {
commits(first: 100, after: $cursor) {
nodes {
commit {
oid
}
}
pageInfo {
hasNextPage
endCursor
}
totalCount
}
}
}
}
"""
def gh_graphql(query, **kwargs):
import os
import json
from urllib.request import urlopen, Request
token = os.getenv("GITHUB_TOKEN")
headers = {"Accept": "application/vnd.github.v3+json", "Authorization": f"token {token}"}
data = json.dumps({"query": query, "variables": kwargs}).encode()
with urlopen(Request("https://api.github.com/graphql", data=data, headers=headers)) as conn:
rc = json.load(conn)
if "errors" in rc:
raise RuntimeError(f"GraphQL query {query}, args {kwargs} failed: {rc['errors']}")
return rc
if __name__ == "__main__":
cursor = ""
for pg_idx in range(10):
rc = gh_graphql(GH_GET_COMMITS, cursor=cursor)["data"]["repository"]["pullRequest"]["commits"]
print(pg_idx, len(rc["nodes"]), rc["pageInfo"], rc["totalCount"])
if not bool(rc["pageInfo"]["hasNextPage"]): break
cursor = rc["pageInfo"]["endCursor"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment