non · July 28, 2015 22:57 · non · Jul 29, 2015 · longcao · Jul 29, 2015
diff --git a/updater.py b/updater.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # by Erik Osheim
 #
 # Reads README.md, and writes a README.md.new. If the format of
 # README.md changes, this script may need modifications.
 #
 # Currently it rewrites each section, doing the following:
 #  1. alphabetizing
 #  2. querying GitHub for watchers, stars, days since active
 #  3. formatting the link title to show this info
 #  4. bolding projects with lots of stars
 #
 # Once README.md has the stars/days info in the links, the
 # repo_regex will need slight modification.
 #
 # Curently, the lack of OAuth2 + GH developer keys means you exceed
 # GitHub's hourly limit before you even finish. I have no idea how to
 # set up OAuth2 so I'm going to post this as a Gist and see if someone
 # else has an idea.

 import datetime
 import json
 import random
 import re
 import urllib2

 empty_regex = re.compile(r"^ *\n$")
 section_regex = re.compile(r"^## (.+)\n$")
 repo_regex = re.compile(r"^\* \[(.+?)\]\((.+?)\) - (.+)\n$")
 end_regex = re.compile(r"^# .+\n$")
 github_regex = re.compile(r"^https://github.com/(.+?)/(.+)$")

 inf = open('README.md', 'r')
 lines = list(inf)
 inf.close()

 outf = open('README.md.new', 'w')

 # use fake to avoid hitting github API
 # useful when over the rate limit (i.e. always)
 fake = True

 # TODO: need to do Oauth2 stuff here to avoid GitHub's rate limit.
 def query(owner, name):
    if fake:
        return (random.randint(1, 100), random.randint(1, 1000), random.randint(1, 300))
    else:
        try:
            u = urllib2.urlopen('https://api.github.com/repos/%s/%s' % (owner, name))
            j = json.load(u)
            t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
            dt = datetime.datetime.now() - t
            print "%s/%s: ok" % (owner, name)
            return (j['watchers_count'], j['stargazers_count'], dt.days)
        except urllib2.HTTPError, e:
            print "%s/%s: ERROR" % (owner, name)
            return None

 def flush_section(outf, section, sdesc, repos):
    outf.write(section)
    outf.write('\n')
    if sdesc:
        outf.write(sdesc)
        outf.write('\n')
    repos.sort(key=lambda t: t[0].lower())
    for name, link, rdesc in repos:
        m = github_regex.match(link)
        if m:
            res = query(m.group(1), m.group(2))
            if res is not None:
                (watchers, stars, days) = res
                title = '%s ★ %d ⧗ %d' % (name, stars, int(days))
                if stars > 500:
                    btitle = '*' + title + '*'
                else:
                    btitle = title
                outf.write('* [%s](%s) - %s\n' % (btitle, link, rdesc))
                continue

        outf.write('* [%s ★ ? ⧗ ?](%s) - %s\n' % (name, link, rdesc))
    outf.write('\n')

 started = False
 finished = False
 section = None
 sdesc = None
 repos = []
 for line in lines:
    if finished:
        outf.write(line)
    elif started:
        if end_regex.match(line):
            flush_section(outf, section, sdesc, repos)
            outf.write(line)
            finished = True
        elif empty_regex.match(line):
            continue
        elif section_regex.match(line):
            flush_section(outf, section, sdesc, repos)
            section = line
            sdesc = None
            repos = []
        else:
            m = repo_regex.match(line)
            if m:
                name, link, rdesc = m.groups()
                repos.append((name, link, rdesc))
            elif sdesc is None:
                sdesc = line
            else:
                raise Exception("cannot parse %r" % line)
    else:
        if section_regex.match(line):
            section = line
            started = True
        else:
            outf.write(line)
	#!/usr/bin/env python
	# -- coding: utf-8 --
	#
	# by Erik Osheim
	#
	# Reads README.md, and writes a README.md.new. If the format of
	# README.md changes, this script may need modifications.
	#
	# Currently it rewrites each section, doing the following:
	# 1. alphabetizing
	# 2. querying GitHub for watchers, stars, days since active
	# 3. formatting the link title to show this info
	# 4. bolding projects with lots of stars
	#
	# Once README.md has the stars/days info in the links, the
	# repo_regex will need slight modification.
	#
	# Curently, the lack of OAuth2 + GH developer keys means you exceed
	# GitHub's hourly limit before you even finish. I have no idea how to
	# set up OAuth2 so I'm going to post this as a Gist and see if someone
	# else has an idea.

	import datetime
	import json
	import random
	import re
	import urllib2

	empty_regex = re.compile(r"^ *\n$")
	section_regex = re.compile(r"^## (.+)\n$")
	repo_regex = re.compile(r"^\* \[(.+?)\]\((.+?)\) - (.+)\n$")
	end_regex = re.compile(r"^# .+\n$")
	github_regex = re.compile(r"^https://github.com/(.+?)/(.+)$")

	inf = open('README.md', 'r')
	lines = list(inf)
	inf.close()

	outf = open('README.md.new', 'w')

	# use fake to avoid hitting github API
	# useful when over the rate limit (i.e. always)
	fake = True

	# TODO: need to do Oauth2 stuff here to avoid GitHub's rate limit.
	def query(owner, name):
	if fake:
	return (random.randint(1, 100), random.randint(1, 1000), random.randint(1, 300))
	else:
	try:
	u = urllib2.urlopen('https://api.github.com/repos/%s/%s' % (owner, name))
	j = json.load(u)
	t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
	dt = datetime.datetime.now() - t
	print "%s/%s: ok" % (owner, name)
	return (j['watchers_count'], j['stargazers_count'], dt.days)
	except urllib2.HTTPError, e:
	print "%s/%s: ERROR" % (owner, name)
	return None

	def flush_section(outf, section, sdesc, repos):
	outf.write(section)
	outf.write('\n')
	if sdesc:
	outf.write(sdesc)
	outf.write('\n')
	repos.sort(key=lambda t: t[0].lower())
	for name, link, rdesc in repos:
	m = github_regex.match(link)
	if m:
	res = query(m.group(1), m.group(2))
	if res is not None:
	(watchers, stars, days) = res
	title = '%s ★ %d ⧗ %d' % (name, stars, int(days))
	if stars > 500:
	btitle = '' + title + ''
	else:
	btitle = title
	outf.write('* [%s](%s) - %s\n' % (btitle, link, rdesc))
	continue

	outf.write('* [%s ★ ? ⧗ ?](%s) - %s\n' % (name, link, rdesc))
	outf.write('\n')

	started = False
	finished = False
	section = None
	sdesc = None
	repos = []
	for line in lines:
	if finished:
	outf.write(line)
	elif started:
	if end_regex.match(line):
	flush_section(outf, section, sdesc, repos)
	outf.write(line)
	finished = True
	elif empty_regex.match(line):
	continue
	elif section_regex.match(line):
	flush_section(outf, section, sdesc, repos)
	section = line
	sdesc = None
	repos = []
	else:
	m = repo_regex.match(line)
	if m:
	name, link, rdesc = m.groups()
	repos.append((name, link, rdesc))
	elif sdesc is None:
	sdesc = line
	else:
	raise Exception("cannot parse %r" % line)
	else:
	if section_regex.match(line):
	section = line
	started = True
	else:
	outf.write(line)
No results found