Skip to content

Instantly share code, notes, and snippets.

@deadblue
Last active April 3, 2019 00:24
Show Gist options
  • Save deadblue/8435a7beb10c57915d488ec8846b2505 to your computer and use it in GitHub Desktop.
Save deadblue/8435a7beb10c57915d488ec8846b2505 to your computer and use it in GitHub Desktop.
Script to fetch all users who stared one repository
# -*- coding: utf-8 -*-
__author__ = 'deadblue'
import logging
logging.basicConfig(
level=logging.INFO, datefmt='%H:%M:%S',
format='[%(asctime)s] %(message)s'
)
import csv
import os
import sys
import requests
_GQL_TEMPLATE_ = '''
query {
repository(owner:"996icu", name:"996.ICU") {
stargazers(%s) {
nodes {
id
login
createdAt
repositories(isFork:false) {
totalCount
}
}
pageInfo {
hasNextPage
endCursor
}
}
}
}
'''
_logger = logging.getLogger(__name__)
def _print_usage(prog):
print('>> Script for fetching stargazers of [996.icu] repository <<')
print('Usage:')
print(' %s <Your-Github-Access-Token> [Output-File]' % prog)
def _query_stargazers(token, limit, after=None):
# build GQL
if after is None:
cond = 'first:%d' % limit
else:
cond = 'first:%d, after:"%s"' % (
limit, after
)
gql = _GQL_TEMPLATE_ % cond
# query datas
resp = requests.post('https://api.github.com/graphql', json={
'query': gql
}, headers={
'Authorization': 'bearer %s' % token
})
result = resp.json()
return result['data']['repository']['stargazers']
def _main(prog, token=None, outfile=None):
# check arguments
if token is None:
_print_usage(prog)
exit(1)
if outfile is None:
outfile = os.path.join(
os.getcwd(), '996.csv'
)
# start fetching
_logger.info('Start fetching...')
with open(outfile, 'w') as fp:
# create csv writer
cw = csv.DictWriter(fp, fieldnames=['id', 'login', 'createAt', 'repoCount'])
cw.writerow({
'id': 'ID',
'login': 'User Name',
'createAt': 'Create Time',
'repoCount': 'Non-Fork Repository'
})
# fetch data
count, after = 0, None
while True:
# github only allow query no more than 100 datas per request
data = _query_stargazers(token, 100, after)
# append result to csv file
for node in data['nodes']:
count += 1
cw.writerow({
'id': node['id'],
'login': node['login'],
'createAt': node['createdAt'],
'repoCount': node['repositories']['totalCount']
})
# flush per 100 lines
fp.flush()
# update cursor
after = data['pageInfo']['endCursor']
# break at end
if not data['pageInfo']['hasNextPage']:
break
_logger.info('Fetch accounts => %d', count)
_logger.info('Fetch done, total count: %d', count)
if __name__ == '__main__':
_main(sys.argv[0], *sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment