batemapf · March 30, 2017 16:43
diff --git a/gh_commits.py b/gh_commits.py
 #! /usr/bin/env python
 import requests
 import os, json
 import datetime as dt

 class DataSet:
    def __init__(self):
        self.created = dt.datetime.now()
        self.data = []

    def length(self):
        return len(self.data)

    def keys(self):
        return self.data[0].keys()

    def print_item(self, index):
        return print(json.dumps(self.data[index], indent=4))

    def dump_json(self, name):
        with open('{}_{}.json'.format(name, dt.datetime.utcnow()), 'w') \
        as outfile:
            return outfile.write(json.dumps(self.data, indent=4))

 class GitHubDataSet(DataSet):
    base_url = 'https://api.github.com'
    def __init__(self, endpoint, params={}):
        self.params = params
        self.endpoint = endpoint
        self.token = os.getenv('GITHUB_TOKEN')
        self.headers = self.github_get().headers
        self.data = self.get_data()

    def github_get(self, diff_endpoint=None):
        if diff_endpoint:
            return requests.get(
                '{}{}'.format(self.base_url, diff_endpoint),
                headers={'Authorization': 'token {}'.format(self.token)},
                params=self.params
            )
        else:
            return requests.get(
                '{}{}'.format(self.base_url, self.endpoint),
                headers={'Authorization': 'token {}'.format(self.token)},
                params=self.params
            )

    def get_data(self):
        # Handle paginated data.
        if len([ i for i in self.headers.keys() if i == 'Link']):
            links = self.headers['Link'].split(',')
            next_prev = ([ i.strip() for i in links[0].split(';') ])
            last_first = ([ i.strip() for i in links[1].split(';') ])
            page_range = int(
                last_first[0]\
                    .split(self.base_url)[1]\
                    .split('?')[1]\
                    .split('=')[-1]\
                    .replace('>','')
            )
            output = []
            for i in range(0, page_range):
                output.append(self.github_get(diff_endpoint=\
                '{}?page={}'.format(
                    next_prev[0].split(self.base_url)[1].split('?')[0], i)
                ).json())
            return [ i for sublist in output for i in sublist ]
        # Handle non-paginated data.
        else:
            return self.github_get().json()

 def check_rate_limit():
    return requests.get(
    'https://api.github.com/rate_limit',
    headers={'Authorization': 'token {}'.format(os.getenv('GITHUB_TOKEN'))},
        ).json()['resources']['core']['remaining']

 # Where team is plain language team name, e.g. 18f.
 def get_org_repos(team):
    return GitHubDataSet('/orgs/{}/repos'.format(team))

 # Where weeks is number of prior weeks of history sought and repos is
 # a GitHubDataSet object with >0 repos in held in its data attribute.
 def get_bulk_commit_history(weeks, repos):
    end = dt.date.today() - dt.timedelta(days=(dt.date.today().weekday() + 1))
    start = (end - dt.timedelta(days=(weeks * 7)))
    return [ GitHubDataSet(
        '/repos/18F/{}/commits'.format(r['name']),
        params={'since':start, 'until': end}) \
            for r in repos.data ], \
            end, \
            start

 def get_18fs_commits(weeks):
    print('{} calls remaining...'.format(check_rate_limit()))
    commits, end, start = get_bulk_commit_history(weeks, get_org_repos('18f'))
    to_file = DataSet()
    to_file.data = [ i.data for i in commits ]
    to_file.dump_json('commits')
    print('History for {} repos written to file.'.format(to_file.length))

 get_18fs_commits(52)
	#! /usr/bin/env python
	import requests
	import os, json
	import datetime as dt

	class DataSet:
	def __init__(self):
	self.created = dt.datetime.now()
	self.data = []

	def length(self):
	return len(self.data)

	def keys(self):
	return self.data[0].keys()

	def print_item(self, index):
	return print(json.dumps(self.data[index], indent=4))

	def dump_json(self, name):
	with open('{}_{}.json'.format(name, dt.datetime.utcnow()), 'w') \
	as outfile:
	return outfile.write(json.dumps(self.data, indent=4))

	class GitHubDataSet(DataSet):
	base_url = 'https://api.github.com'
	def __init__(self, endpoint, params={}):
	self.params = params
	self.endpoint = endpoint
	self.token = os.getenv('GITHUB_TOKEN')
	self.headers = self.github_get().headers
	self.data = self.get_data()

	def github_get(self, diff_endpoint=None):
	if diff_endpoint:
	return requests.get(
	'{}{}'.format(self.base_url, diff_endpoint),
	headers={'Authorization': 'token {}'.format(self.token)},
	params=self.params
	)
	else:
	return requests.get(
	'{}{}'.format(self.base_url, self.endpoint),
	headers={'Authorization': 'token {}'.format(self.token)},
	params=self.params
	)

	def get_data(self):
	# Handle paginated data.
	if len([ i for i in self.headers.keys() if i == 'Link']):
	links = self.headers['Link'].split(',')
	next_prev = ([ i.strip() for i in links[0].split(';') ])
	last_first = ([ i.strip() for i in links[1].split(';') ])
	page_range = int(
	last_first[0]\
	.split(self.base_url)[1]\
	.split('?')[1]\
	.split('=')[-1]\
	.replace('>','')
	)
	output = []
	for i in range(0, page_range):
	output.append(self.github_get(diff_endpoint=\
	'{}?page={}'.format(
	next_prev[0].split(self.base_url)[1].split('?')[0], i)
	).json())
	return [ i for sublist in output for i in sublist ]
	# Handle non-paginated data.
	else:
	return self.github_get().json()

	def check_rate_limit():
	return requests.get(
	'https://api.github.com/rate_limit',
	headers={'Authorization': 'token {}'.format(os.getenv('GITHUB_TOKEN'))},
	).json()['resources']['core']['remaining']

	# Where team is plain language team name, e.g. 18f.
	def get_org_repos(team):
	return GitHubDataSet('/orgs/{}/repos'.format(team))

	# Where weeks is number of prior weeks of history sought and repos is
	# a GitHubDataSet object with >0 repos in held in its data attribute.
	def get_bulk_commit_history(weeks, repos):
	end = dt.date.today() - dt.timedelta(days=(dt.date.today().weekday() + 1))
	start = (end - dt.timedelta(days=(weeks * 7)))
	return [ GitHubDataSet(
	'/repos/18F/{}/commits'.format(r['name']),
	params={'since':start, 'until': end}) \
	for r in repos.data ], \
	end, \
	start

	def get_18fs_commits(weeks):
	print('{} calls remaining...'.format(check_rate_limit()))
	commits, end, start = get_bulk_commit_history(weeks, get_org_repos('18f'))
	to_file = DataSet()
	to_file.data = [ i.data for i in commits ]
	to_file.dump_json('commits')
	print('History for {} repos written to file.'.format(to_file.length))

	get_18fs_commits(52)
No results found