Skip to content

Instantly share code, notes, and snippets.

@dmsimard
Created November 12, 2017 01:02
Show Gist options
  • Save dmsimard/4f9177486dd8ae02d6c31ac1898e06ef to your computer and use it in GitHub Desktop.
Save dmsimard/4f9177486dd8ae02d6c31ac1898e06ef to your computer and use it in GitHub Desktop.
clone-everything
import requests
from requests.auth import HTTPBasicAuth
from urlparse import parse_qsl, urlsplit
import git
import json
import os
import shutil
# GitHub has increased rate-limiting for non-authenticated requests
# See tokens here: https://github.com/settings/tokens
# USER = ""
# TOKEN = ""
API = "https://api.github.com/orgs/"
ORGANIZATIONS = [
"openstack",
"openstack-infra"
]
# TODO: Load YAML and look for "/home/gerrit2/acls/openstack/retired.config"
# https://github.com/openstack-infra/project-config/blob/master/gerrit/projects.yaml
BLACKLIST = [
"openstack/deb-"
]
CLONE_DIR = "/data/hound/git"
MAX_CONCURRENT_INDEXERS = 4
DBPATH = "data"
def generate_org_repo_config(org):
url = API + org + '/repos?per_page=100'
# Retrieve the amount of pages
resp = requests.get(url, auth=HTTPBasicAuth(USER, TOKEN))
if resp.links:
pages = parse_qsl(urlsplit(resp.links['last']['url']).query)[-1][1]
else:
pages = 1
print("Retrieving %s pages of 100 results for %s..." % (pages, org))
repos = {}
for page in range(1, int(pages) + 1):
url = API + org + '/repos?per_page=100&page=%s' % page
resp = requests.get(url, auth=HTTPBasicAuth(USER, TOKEN)).json()
for repo in resp:
name = repo['full_name']
path = os.path.join(CLONE_DIR, name)
# If the repo is blacklisted, we're not going to clone it
# and delete it if it exists
if name in BLACKLIST or repo['name'] in BLACKLIST:
if os.path.exists(path):
shutil.rmtree(path)
continue
repos[name] = {}
repos[name]['url'] = "file:///data/hound/git/%s" % name
base_url = 'https://github.com/%s/blob/%s/{path}{anchor}'
repos[name]['url-pattern'] = {
'base-url': base_url % (name, repo['default_branch']),
'anchor': '#L{line}'
}
repos[name]['enable-poll-updates'] = False
# https://github.com/etsy/hound/pull/224
repos[name]['ref'] = repo['default_branch']
# If the repo already exists, update it
if os.path.exists(path):
print("Updating %s from %s" % (name, repo['clone_url']))
clone = git.Repo(path)
origin = clone.remotes.origin
try:
origin.pull()
except git.exc.GitCommandError as e:
print('Error updating %s: %s' % (name, str(e)))
else:
# If the repo doesn't exist, clone it
print("Cloning %s from %s" % (name, repo['clone_url']))
try:
clone = git.Repo.clone_from(repo['clone_url'], path,
branch=repo['default_branch'])
except git.exc.GitCommandError as e:
print('Error cloning %s: %s' % (name, str(e)))
return repos
if __name__ == "__main__":
repos = {}
for org in ORGANIZATIONS:
repos.update(generate_org_repo_config(org))
config = {
'max-concurrent-indexers': MAX_CONCURRENT_INDEXERS,
'dbpath': DBPATH,
'repos': repos
}
with open('/data/hound/config.json', 'w') as f:
f.write(json.dumps(config, indent=2, sort_keys=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment