Skip to content

Instantly share code, notes, and snippets.

@Raynos
Created April 19, 2014 22:02
Show Gist options
  • Save Raynos/11098914 to your computer and use it in GitHub Desktop.
Save Raynos/11098914 to your computer and use it in GitHub Desktop.
An npm mirror
#!/usr/bin/env python
from traceback import print_exc
import urllib2
import hashlib
import os.path
import os
import json
REGISTRY_URL = 'https://registry.npmjs.org'
MIRROR_PATH = '/var/lib/mirror/npm'
MIRROR_URL = 'https://archive.uber.com/npm'
def request(url):
req = urllib2.Request(url, headers={
'User-agent': 'node/0.8.11 linux x64'
})
res = urllib2.urlopen(req, timeout=60)
return res
def get_package_index():
res = request('http://isaacs.iriscouch.com/registry/_all_docs')
packages = json.load(res)['rows']
packages = [(x['key'], x['value']['rev']) for x in packages if x['key'] and not x['key'].startswith('_')]
with open(os.path.join(MIRROR_PATH, 'revisions.json'), 'w') as fd:
fd.write(json.dumps(packages))
short = [x[0] for x in packages]
with open(os.path.join(MIRROR_PATH, 'index.json'), 'w') as fd:
fd.write(json.dumps(short, separators=(',',':')))
return packages
def get_package_info(package):
try:
res = request('http://isaacs.iriscouch.com/registry/%s' % package.encode('utf8'))
return res.read()
except:
print_exc()
return None
def check_sha(path, expected_digest):
sha = hashlib.new('sha1')
with open(path, 'rb') as fd:
sha.update(fd.read())
return (sha.hexdigest() == expected_digest)
def main():
if not os.path.exists(MIRROR_PATH):
os.makedirs(MIRROR_PATH)
print 'Downloading package list'
index = get_package_index()
print 'Updating repository'
for package, revision in index:
if package in ('error: forbidden', 'registry/jDataView'):
continue
package_path = os.path.join(MIRROR_PATH, package.encode('utf8'))
if os.path.exists(os.path.join(package_path, 'package.json')):
existing = json.load(open(os.path.join(package_path, 'package.json'), 'r'))
if existing['_rev'] == revision:
continue
#print package
info = get_package_info(package)
if not info:
print 'Unable to get info for', package
continue
if not os.path.exists(package_path):
os.makedirs(package_path)
info = json.loads(info)
if not 'versions' in info:
print 'No versions field in package info for', package
continue
for version, versioninfo in info['versions'].items():
try:
disturl = versioninfo['dist']['tarball']
digest = versioninfo['dist']['shasum']
tarname = disturl.rsplit('/', 1)[1]
tarpath = os.path.join(package_path, tarname)
info['versions'][version]['dist']['tarball'] = '%s/%s/%s' % (MIRROR_URL, package, tarname)
with open(os.path.join(package_path, version), 'w') as fd:
fd.write(json.dumps(info['versions'][version], separators=(',',':')))
if os.path.exists(tarpath) and check_sha(tarpath, digest):
continue
if disturl.find('packages:5984') != -1:
disturl = disturl.replace('packages:5984', 'registry.npmjs.org')
print 'GET', disturl
try:
res = request(disturl)
with open(tarpath, 'wb') as fd:
fd.write(res.read())
except urllib2.HTTPError as e:
if e.code == 404:
print 'Unable to download %s, removing from package.json' % disturl
del info['versions'][version]
continue
if not check_sha(tarpath, digest):
print 'ERROR: SHA1 mismatch for %s' % tarpath
os.remove(tarpath)
continue
except Exception as e:
print 'ERROR Unable to get %s %s: %s' % (package, version, str(e))
info = json.dumps(info, separators=(',',':'))
with open(os.path.join(package_path, 'package.json'), 'w') as fd:
fd.write(info)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment