Created
September 4, 2014 23:53
-
-
Save leventov/9e00036ec2f3abbf4a55 to your computer and use it in GitHub Desktop.
См. http://habrahabr.ru/post/235689/. Инструкция по применению в первом комментарии
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
MIN_STARS = 700 | |
with open('{}.json'.format(MIN_STARS)) as json_file: | |
repos = json.load(json_file) | |
repos_by_language = {} | |
by_top = {} | |
for repo in repos: | |
by_top[repo['top']] = by_top.get(repo['top'], 0) + 1 | |
repos_by_language.setdefault(repo['language'], []).append(repo) | |
print('{} repos'.format(len(repos))) | |
for top, count in by_top.items(): | |
print('<tr><td>{}</td><td>{} - {:.1f}%</td></tr>'.format(top, count, count / len(repos) * 100).replace('.', ',')) | |
russian = { | |
'top': 'авторов', | |
'total_commits': 'коммитов', | |
'total_additions': 'добавлений', | |
'total_changes': 'изменений', | |
'stars': 'звездочек', | |
} | |
def ending(n): | |
if 10 < n < 20: | |
return 'ов' | |
if n % 10 == 1: | |
return '' | |
if 2 <= n % 10 <= 4: | |
return 'а' | |
return 'ов' | |
def top_and_print(repos, keys, where=None, top=5, reverse=False): | |
sort_key = lambda r: tuple(map(lambda k: r[k], keys)) | |
if where: | |
repos = filter(lambda r: all(map(lambda kv: r[kv[0]] == kv[1], where.items())), repos) | |
sorted_repos = sorted(repos, key=sort_key, reverse=not reverse) | |
top_repos = sorted_repos[:top] | |
if top_repos: | |
print('<h4>Топ {} проект{} по количеству {}{}</h4><table>'.format(top, ending(top), russian[keys[0]], | |
' с одним автором' if where else '')) | |
print('<tr><th>Проект</th><th>Язык</th><th>Звездочки</th><th>Коммиты</th>' | |
'<th>Добавления</th><th>Изменения</th><th>Авторы</th></tr>') | |
for r in top_repos: | |
print(('<tr>' | |
'<td><a href="https://github.com/{}">{}</a></td>' | |
'<td>{}</td>' | |
'<td>{}</td>' | |
'<td>{}</td>' | |
'<td>{}</td>' | |
'<td>{}</td>' | |
'<td>{}</td>' | |
'</tr>').format(r['name'], r['name'], r['language'], r['stars'], r['total_commits'], | |
r['total_additions'], r['total_changes'], r['top'])) | |
print('</table>') | |
print() | |
else: | |
print('No repos with {}'.format(where)) | |
top_and_print(repos, ['top'], top=20) | |
top_and_print(repos, ['total_commits'], top=20) | |
top_and_print(repos, ['total_additions'], top=20) | |
top_and_print(repos, ['total_changes'], top=20) | |
top_and_print(repos, ['stars'], where={'top': 1}, top=7) | |
top_and_print(repos, ['total_commits'], where={'top': 1}, top=7) | |
total_repos = len(repos) | |
lang_stats = {} | |
for lang, repos in sorted(repos_by_language.items(), key=lambda e: len(e[1]), reverse=True): | |
print('<spoiler title="{}">'.format(lang)) | |
print() | |
n = len(repos) | |
print('{} проект{} - {:.1f}%'.format(n, ending(n), n / total_repos * 100).replace('.', ',')) | |
print() | |
average_top = sum(map(lambda r: r['top'], repos)) / n | |
print('Среднее количество авторов: {:.1f}.'.format(average_top).replace('.', ',', 1)) | |
lang_stats[lang] = (n, average_top) | |
print() | |
top_and_print(repos, ['top']) | |
top_and_print(repos, ['total_commits']) | |
top_and_print(repos, ['total_additions']) | |
top_and_print(repos, ['total_changes']) | |
top_and_print(repos, ['stars'], where={'top': 1}, top=3) | |
top_and_print(repos, ['total_commits'], where={'top': 1}, top=3) | |
print('</spoiler>') | |
for lang, stats in sorted(lang_stats.items(), key=lambda e: e[1][0], reverse=True): | |
print('<tr><td>{}</td><td>{} - {:.1f}%</td><td>{:.1f}</td></tr>'.format(lang, stats[0], | |
stats[0] / total_repos * 100, stats[1] | |
).replace('.', ',')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
MIN_STARS = 700 | |
THRESHOLD = 0.9 | |
with open('{}.json'.format(MIN_STARS)) as json_file: | |
repos = json.load(json_file) | |
keys = ['commits', 'additions', 'changes'] | |
def sum_contributions(contributors): | |
return {k: sum(map(lambda c: c[1][k], contributors)) for k in keys} | |
for repo in repos: | |
cs = repo['contributors'].items() | |
total_sums = sum_contributions(cs) | |
cs_by = {k: sorted(cs, key=lambda c: c[1][k], reverse=True) for k in keys} | |
result_top = 0 | |
result_passing_criteria = [] | |
for top in range(len(cs)): | |
for k in keys: | |
top_by = cs_by[k][:top + 1] | |
top_nicknames = map(lambda e: e[0], top_by) | |
top_sums = sum_contributions(top_by) | |
if total_sums[k] > 0 and top_sums[k] / total_sums[k] >= THRESHOLD: | |
passing_criteria = [k] | |
rest_keys = list(keys) | |
rest_keys.remove(k) | |
for rest_key in rest_keys: | |
if total_sums[rest_key] > 0 and top_sums[rest_key] / total_sums[rest_key] >= THRESHOLD: | |
passing_criteria.append(rest_key) | |
if len(passing_criteria) >= 2 and result_top == 0: | |
result_top = top + 1 | |
result_passing_criteria = passing_criteria | |
if result_top != 0: | |
break | |
repo['top'] = result_top | |
repo['criteria'] = sorted(result_passing_criteria) | |
with open('{}.json'.format(MIN_STARS), 'w') as json_file: | |
json.dump(repos, json_file, sort_keys=True, indent=4, separators=(',', ': ')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
MIN_STARS = 700 | |
repos = json.load(open('{}.json'.format(MIN_STARS))) | |
empty = [] | |
for repo in repos: | |
if not repo['contributors']: | |
empty.append(repo['name']) | |
with open('{}-empty.json'.format(MIN_STARS), 'w') as json_file: | |
json.dump(empty, json_file, sort_keys=True, indent=4, separators=(',', ': ')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from github3 import login | |
import json | |
from time import sleep | |
gh = login('login', 'pass') | |
def all_repos_by_stars(min_stars): | |
repos = {} | |
def add_repos(date_range): | |
if gh.rate_limit()['resources']['search']['remaining'] < 10: | |
print('Throttling requests, sleeping for 60 seconds') | |
sleep(60) | |
for res in gh.search_repositories('created:{} stars:>{}'.format(date_range, min_stars)): | |
repo = res.repository | |
repos[repo.full_name] = repo | |
add_repos('<2008-01-02') | |
add_repos('2008-01-01..2009-01-02') | |
add_repos('2009-01-01..2010-01-02') | |
add_repos('2010-01-01..2011-01-02') | |
add_repos('2011-01-01..2012-01-02') | |
add_repos('2012-01-01..2013-01-02') | |
add_repos('2013-01-01..2014-01-02') | |
add_repos('>2014-01-01') | |
print('Found {} repos'.format(len(repos))) | |
return repos | |
def parse_repos(repos): | |
result = [] | |
for i, repo in enumerate(repos.values(), 1): | |
print('{}% {}'.format(round(i / len(repos) * 100), repo.full_name)) | |
repo_result = { | |
'name': repo.full_name, | |
'language': repo.language, | |
'stars': repo.stargazers, | |
'total_commits': 0, | |
'total_additions': 0, | |
'total_changes': 0, | |
'contributors': {} | |
} | |
iter_contrib_stats = iter(repo.iter_contributor_statistics()) | |
while True: | |
# Sometimes strange ex. 'NoneType' object has no attribute 'get' is thrown | |
# github3.py bug or connection problems? | |
try: | |
contrib_stats = iter_contrib_stats.__next__() | |
contrib_additions = 0 | |
contrib_changes = 0 | |
for week in contrib_stats.weeks: | |
additions = int(week['a']) | |
deletions = int(week['d']) | |
contrib_additions += additions - deletions | |
contrib_changes += additions + deletions | |
repo_result['total_commits'] += contrib_stats.total | |
repo_result['total_additions'] += contrib_additions | |
repo_result['total_changes'] += contrib_changes | |
repo_result['contributors'][contrib_stats.author.login] = { | |
'commits': contrib_stats.total, | |
'additions': contrib_additions, | |
'changes': contrib_changes | |
} | |
except StopIteration: | |
break | |
except Exception as e: | |
print(e) | |
result.append(repo_result) | |
return result | |
if __name__ == '__main__': | |
min_stars = 700 | |
parse_empty = True | |
if parse_empty: | |
with open('{}-empty.json'.format(min_stars)) as json_file: | |
all_empty = json.load(json_file) | |
repos_able_to_process = (gh.rate_limit()['rate']['remaining'] // 2) - 1 | |
print('Rate limits allow to process {} repos'.format(repos_able_to_process)) | |
empty_to_process = list(all_empty[:repos_able_to_process]) | |
repos = {} | |
for i, full_name in enumerate(empty_to_process): | |
print('{}% {}'.format(round(i / len(empty_to_process) * 100), full_name)) | |
owner, repo = full_name.split('/') | |
repos[full_name] = gh.repository(owner, repo) | |
result = parse_repos(repos) | |
with open('{}.json'.format(min_stars)) as json_file: | |
old_result = json.load(json_file) | |
new_repos = {repo['name']: repo for repo in old_result} | |
for repo_result in result: | |
name = repo_result['name'] | |
if repo_result['contributors']: | |
new_repos[name] = repo_result | |
else: | |
empty_to_process.remove(name) | |
with open('{}.json'.format(min_stars), 'w') as json_file: | |
json.dump(list(new_repos.values()), json_file, sort_keys=True, indent=4, separators=(',', ': ')) | |
for processed in empty_to_process: | |
all_empty.remove(processed) | |
with open('{}-empty.json'.format(min_stars), 'w') as json_file: | |
json.dump(all_empty, json_file, sort_keys=True, indent=4, separators=(',', ': ')) | |
else: | |
result = parse_repos(all_repos_by_stars(min_stars)) | |
with open('{}.json'.format(min_stars), 'w') as json_file: | |
json.dump(result, json_file, sort_keys=True, indent=4, separators=(',', ': ')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from github3 import login | |
gh = login('login', 'pass') | |
print(gh.rate_limit()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import csv | |
MIN_STARS = 700 | |
repos = json.load(open('{}.json'.format(MIN_STARS))) | |
with open('{}.csv'.format(MIN_STARS), 'w') as csv_file: | |
field_names = ['name', 'language', 'stars', 'total_commits', 'total_additions', 'total_changes', 'top', 'criteria'] | |
writer = csv.DictWriter(csv_file, field_names, extrasaction='ignore') | |
writer.writeheader() | |
for repo in repos: | |
writer.writerow(repo) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Зависимость - github3.py, среда Python 3
login('login', 'pass')
10) Скопипастить прямо в статью на ХабреЗнаю что все очень криво, но лениво вылизывать.