Created
April 19, 2014 22:02
-
-
Save Raynos/11098914 to your computer and use it in GitHub Desktop.
An npm mirror
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from traceback import print_exc | |
import urllib2 | |
import hashlib | |
import os.path | |
import os | |
import json | |
REGISTRY_URL = 'https://registry.npmjs.org' | |
MIRROR_PATH = '/var/lib/mirror/npm' | |
MIRROR_URL = 'https://archive.uber.com/npm' | |
def request(url): | |
req = urllib2.Request(url, headers={ | |
'User-agent': 'node/0.8.11 linux x64' | |
}) | |
res = urllib2.urlopen(req, timeout=60) | |
return res | |
def get_package_index(): | |
res = request('http://isaacs.iriscouch.com/registry/_all_docs') | |
packages = json.load(res)['rows'] | |
packages = [(x['key'], x['value']['rev']) for x in packages if x['key'] and not x['key'].startswith('_')] | |
with open(os.path.join(MIRROR_PATH, 'revisions.json'), 'w') as fd: | |
fd.write(json.dumps(packages)) | |
short = [x[0] for x in packages] | |
with open(os.path.join(MIRROR_PATH, 'index.json'), 'w') as fd: | |
fd.write(json.dumps(short, separators=(',',':'))) | |
return packages | |
def get_package_info(package): | |
try: | |
res = request('http://isaacs.iriscouch.com/registry/%s' % package.encode('utf8')) | |
return res.read() | |
except: | |
print_exc() | |
return None | |
def check_sha(path, expected_digest): | |
sha = hashlib.new('sha1') | |
with open(path, 'rb') as fd: | |
sha.update(fd.read()) | |
return (sha.hexdigest() == expected_digest) | |
def main(): | |
if not os.path.exists(MIRROR_PATH): | |
os.makedirs(MIRROR_PATH) | |
print 'Downloading package list' | |
index = get_package_index() | |
print 'Updating repository' | |
for package, revision in index: | |
if package in ('error: forbidden', 'registry/jDataView'): | |
continue | |
package_path = os.path.join(MIRROR_PATH, package.encode('utf8')) | |
if os.path.exists(os.path.join(package_path, 'package.json')): | |
existing = json.load(open(os.path.join(package_path, 'package.json'), 'r')) | |
if existing['_rev'] == revision: | |
continue | |
#print package | |
info = get_package_info(package) | |
if not info: | |
print 'Unable to get info for', package | |
continue | |
if not os.path.exists(package_path): | |
os.makedirs(package_path) | |
info = json.loads(info) | |
if not 'versions' in info: | |
print 'No versions field in package info for', package | |
continue | |
for version, versioninfo in info['versions'].items(): | |
try: | |
disturl = versioninfo['dist']['tarball'] | |
digest = versioninfo['dist']['shasum'] | |
tarname = disturl.rsplit('/', 1)[1] | |
tarpath = os.path.join(package_path, tarname) | |
info['versions'][version]['dist']['tarball'] = '%s/%s/%s' % (MIRROR_URL, package, tarname) | |
with open(os.path.join(package_path, version), 'w') as fd: | |
fd.write(json.dumps(info['versions'][version], separators=(',',':'))) | |
if os.path.exists(tarpath) and check_sha(tarpath, digest): | |
continue | |
if disturl.find('packages:5984') != -1: | |
disturl = disturl.replace('packages:5984', 'registry.npmjs.org') | |
print 'GET', disturl | |
try: | |
res = request(disturl) | |
with open(tarpath, 'wb') as fd: | |
fd.write(res.read()) | |
except urllib2.HTTPError as e: | |
if e.code == 404: | |
print 'Unable to download %s, removing from package.json' % disturl | |
del info['versions'][version] | |
continue | |
if not check_sha(tarpath, digest): | |
print 'ERROR: SHA1 mismatch for %s' % tarpath | |
os.remove(tarpath) | |
continue | |
except Exception as e: | |
print 'ERROR Unable to get %s %s: %s' % (package, version, str(e)) | |
info = json.dumps(info, separators=(',',':')) | |
with open(os.path.join(package_path, 'package.json'), 'w') as fd: | |
fd.write(info) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment