Created
November 12, 2017 01:02
-
-
Save dmsimard/4f9177486dd8ae02d6c31ac1898e06ef to your computer and use it in GitHub Desktop.
clone-everything
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from requests.auth import HTTPBasicAuth | |
from urlparse import parse_qsl, urlsplit | |
import git | |
import json | |
import os | |
import shutil | |
# GitHub has increased rate-limiting for non-authenticated requests | |
# See tokens here: https://github.com/settings/tokens | |
# USER = "" | |
# TOKEN = "" | |
API = "https://api.github.com/orgs/" | |
ORGANIZATIONS = [ | |
"openstack", | |
"openstack-infra" | |
] | |
# TODO: Load YAML and look for "/home/gerrit2/acls/openstack/retired.config" | |
# https://github.com/openstack-infra/project-config/blob/master/gerrit/projects.yaml | |
BLACKLIST = [ | |
"openstack/deb-" | |
] | |
CLONE_DIR = "/data/hound/git" | |
MAX_CONCURRENT_INDEXERS = 4 | |
DBPATH = "data" | |
def generate_org_repo_config(org): | |
url = API + org + '/repos?per_page=100' | |
# Retrieve the amount of pages | |
resp = requests.get(url, auth=HTTPBasicAuth(USER, TOKEN)) | |
if resp.links: | |
pages = parse_qsl(urlsplit(resp.links['last']['url']).query)[-1][1] | |
else: | |
pages = 1 | |
print("Retrieving %s pages of 100 results for %s..." % (pages, org)) | |
repos = {} | |
for page in range(1, int(pages) + 1): | |
url = API + org + '/repos?per_page=100&page=%s' % page | |
resp = requests.get(url, auth=HTTPBasicAuth(USER, TOKEN)).json() | |
for repo in resp: | |
name = repo['full_name'] | |
path = os.path.join(CLONE_DIR, name) | |
# If the repo is blacklisted, we're not going to clone it | |
# and delete it if it exists | |
if name in BLACKLIST or repo['name'] in BLACKLIST: | |
if os.path.exists(path): | |
shutil.rmtree(path) | |
continue | |
repos[name] = {} | |
repos[name]['url'] = "file:///data/hound/git/%s" % name | |
base_url = 'https://github.com/%s/blob/%s/{path}{anchor}' | |
repos[name]['url-pattern'] = { | |
'base-url': base_url % (name, repo['default_branch']), | |
'anchor': '#L{line}' | |
} | |
repos[name]['enable-poll-updates'] = False | |
# https://github.com/etsy/hound/pull/224 | |
repos[name]['ref'] = repo['default_branch'] | |
# If the repo already exists, update it | |
if os.path.exists(path): | |
print("Updating %s from %s" % (name, repo['clone_url'])) | |
clone = git.Repo(path) | |
origin = clone.remotes.origin | |
try: | |
origin.pull() | |
except git.exc.GitCommandError as e: | |
print('Error updating %s: %s' % (name, str(e))) | |
else: | |
# If the repo doesn't exist, clone it | |
print("Cloning %s from %s" % (name, repo['clone_url'])) | |
try: | |
clone = git.Repo.clone_from(repo['clone_url'], path, | |
branch=repo['default_branch']) | |
except git.exc.GitCommandError as e: | |
print('Error cloning %s: %s' % (name, str(e))) | |
return repos | |
if __name__ == "__main__": | |
repos = {} | |
for org in ORGANIZATIONS: | |
repos.update(generate_org_repo_config(org)) | |
config = { | |
'max-concurrent-indexers': MAX_CONCURRENT_INDEXERS, | |
'dbpath': DBPATH, | |
'repos': repos | |
} | |
with open('/data/hound/config.json', 'w') as f: | |
f.write(json.dumps(config, indent=2, sort_keys=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment