-
-
Save non/bc0cac3053b4cb532b09 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| # | |
| # by Erik Osheim | |
| # | |
| # Reads README.md, and writes a README.md.new. If the format of | |
| # README.md changes, this script may need modifications. | |
| # | |
| # Currently it rewrites each section, doing the following: | |
| # 1. alphabetizing | |
| # 2. querying GitHub for watchers, stars, days since active | |
| # 3. formatting the link title to show this info | |
| # 4. bolding projects with lots of stars | |
| # | |
| # Once README.md has the stars/days info in the links, the | |
| # repo_regex will need slight modification. | |
| # | |
| # Curently, the lack of OAuth2 + GH developer keys means you exceed | |
| # GitHub's hourly limit before you even finish. I have no idea how to | |
| # set up OAuth2 so I'm going to post this as a Gist and see if someone | |
| # else has an idea. | |
| import datetime | |
| import json | |
| import random | |
| import re | |
| import urllib2 | |
| empty_regex = re.compile(r"^ *\n$") | |
| section_regex = re.compile(r"^## (.+)\n$") | |
| repo_regex = re.compile(r"^\* \[(.+?)\]\((.+?)\) - (.+)\n$") | |
| end_regex = re.compile(r"^# .+\n$") | |
| github_regex = re.compile(r"^https://github.com/(.+?)/(.+)$") | |
| inf = open('README.md', 'r') | |
| lines = list(inf) | |
| inf.close() | |
| outf = open('README.md.new', 'w') | |
| # use fake to avoid hitting github API | |
| # useful when over the rate limit (i.e. always) | |
| fake = True | |
| # TODO: need to do Oauth2 stuff here to avoid GitHub's rate limit. | |
| def query(owner, name): | |
| if fake: | |
| return (random.randint(1, 100), random.randint(1, 1000), random.randint(1, 300)) | |
| else: | |
| try: | |
| u = urllib2.urlopen('https://api.github.com/repos/%s/%s' % (owner, name)) | |
| j = json.load(u) | |
| t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ") | |
| dt = datetime.datetime.now() - t | |
| print "%s/%s: ok" % (owner, name) | |
| return (j['watchers_count'], j['stargazers_count'], dt.days) | |
| except urllib2.HTTPError, e: | |
| print "%s/%s: ERROR" % (owner, name) | |
| return None | |
| def flush_section(outf, section, sdesc, repos): | |
| outf.write(section) | |
| outf.write('\n') | |
| if sdesc: | |
| outf.write(sdesc) | |
| outf.write('\n') | |
| repos.sort(key=lambda t: t[0].lower()) | |
| for name, link, rdesc in repos: | |
| m = github_regex.match(link) | |
| if m: | |
| res = query(m.group(1), m.group(2)) | |
| if res is not None: | |
| (watchers, stars, days) = res | |
| title = '%s ★ %d ⧗ %d' % (name, stars, int(days)) | |
| if stars > 500: | |
| btitle = '*' + title + '*' | |
| else: | |
| btitle = title | |
| outf.write('* [%s](%s) - %s\n' % (btitle, link, rdesc)) | |
| continue | |
| outf.write('* [%s ★ ? ⧗ ?](%s) - %s\n' % (name, link, rdesc)) | |
| outf.write('\n') | |
| started = False | |
| finished = False | |
| section = None | |
| sdesc = None | |
| repos = [] | |
| for line in lines: | |
| if finished: | |
| outf.write(line) | |
| elif started: | |
| if end_regex.match(line): | |
| flush_section(outf, section, sdesc, repos) | |
| outf.write(line) | |
| finished = True | |
| elif empty_regex.match(line): | |
| continue | |
| elif section_regex.match(line): | |
| flush_section(outf, section, sdesc, repos) | |
| section = line | |
| sdesc = None | |
| repos = [] | |
| else: | |
| m = repo_regex.match(line) | |
| if m: | |
| name, link, rdesc = m.groups() | |
| repos.append((name, link, rdesc)) | |
| elif sdesc is None: | |
| sdesc = line | |
| else: | |
| raise Exception("cannot parse %r" % line) | |
| else: | |
| if section_regex.match(line): | |
| section = line | |
| started = True | |
| else: | |
| outf.write(line) |
@bzz seems pretty simple! Good tip.
Does this mean you'd have anyone wanting to contribute have to set up auth tokens? Maybe this is something better handled by a bot somewhere? Dunno, just tossing out thoughts since I'd like to lower the barrier to contribution as much as possible.
Perhaps I am overthinking it and this could be run by someone (like a collaborator) once in awhile.
(I am under the assumption that this is run per-PR by a potential contributor - though it's late for me and this is making less sense now...maybe run just once in awhile locally)
@longcao -- i think this is maybe something that we would run ourselves periodically to keep things up-to-date.
(although i could imagine a mode where it only runs for "empty" entries, and that could possibly run without authentication.)
@bzz Thanks again for your help! I felt really blocked, although getting basic auth working was super easy.
@longcao I have a PR for this, see: lauris/awesome-scala#139
@bzz Aha! That's a great idea, thanks! I'll try that out and see how it goes.