Created
June 22, 2018 18:21
-
-
Save iscgar/98b1bf6e5ade52699d0888a528ab78e9 to your computer and use it in GitHub Desktop.
Quick and dirty Rust ditribution (rustup) mirror script (prints are now messed up due to added parallelism)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import os | |
import sys | |
import re | |
from datetime import datetime, timedelta | |
import pickle | |
from multiprocessing import cpu_count | |
from multiprocessing.dummy import Pool as ThreadPool | |
import toml | |
import requests | |
import hashlib | |
import time | |
import colorama | |
from colorama import Fore | |
colorama.init() | |
if sys.version_info >= (3,): | |
raw_input = input | |
class TargetUrl(object): | |
def __init__(self, url, hash): | |
super(TargetUrl, self).__init__() | |
self.url = str(url) | |
self.hash = str(hash) | |
def __repr__(self): | |
return 'TargetUrl({}, {})'.format( | |
self.url.decode('utf-8'), self.hash.decode('utf-8')) | |
class Target(object): | |
def __init__(self, nm, urls): | |
super(Target, self).__init__() | |
self.name = str(nm) | |
self.urls = tuple(urls) | |
def __str__(self): | |
return self.name.decode('utf-8') | |
__repr__ = __str__ | |
def __hash__(self): | |
return hash(self.name) | |
class Version(object): | |
_V2_MANIFEST = re.compile(r'^\(([\da-fA-F]+)\s+(\d{4}-\d{2}-\d{2})\)$') | |
_V1_MANIFEST = re.compile(r'^(\d+\.\d+\.\d+)(-.+)?$') | |
def __init__(self, s): | |
super(Version, self).__init__() | |
s = str(s).strip() | |
v = s.split(' ', 1) | |
m = self._V1_MANIFEST.match(v[0].strip()) | |
if not m: | |
raise ValueError('Unknown version format') | |
if len(v) == 2: | |
hm = self._V2_MANIFEST.match(v[1].strip()) | |
if not hm: | |
raise ValueError('Unknown v2 format') | |
self.hash = hm.group(1) | |
self.date = datetime.strptime(hm.group(2), '%Y-%m-%d') | |
self.tag = m.group(2)[1:] if m.group(2) else '' | |
self.major, self.minor, self.build = tuple( | |
int(_) for _ in m.group(1).split('.')) | |
def __hash__(self): | |
return hash((self.major, self.minor, self.build, self.tag)) | |
def __eq__(self, o): | |
return self.major == o.major and self.minor == o.minor \ | |
and self.build == o.build and self.tag == o.tag | |
def __ne__(self, o): | |
return self.major != o.major or self.minor != o.minor \ | |
or self.build != o.build or self.tag != o.tag | |
def __gt__(self, o): | |
return self.major > o.major or self.major == o.major and ( | |
self.minor > o.minor or self.minor == o.minor and ( | |
self.build > o.build or | |
self.build == o.build and self.tag > o.tag)) | |
def __ge__(self, o): | |
return self == o or self > o | |
def __lt__(self, o): | |
return self.major < o.major or self.major == o.major and ( | |
self.minor < o.minor or self.minor == o.minor and ( | |
self.build < o.build or | |
self.build == o.build and self.tag < o.tag)) | |
def __le__(self, o): | |
return self == o or self < o | |
def __str__(self): | |
s = '{}.{}.{}{}'.format( | |
self.major, self.minor, self.build, | |
'' if not self.tag else '-{}'.format(self.tag)) | |
if hasattr(self, 'hash'): | |
s += ' ({} {})'.format(self.hash, self.date.strftime('%Y-%m-%d')) | |
return s | |
class Package(object): | |
def __init__(self, nm, data): | |
super(Package, self).__init__() | |
self._parse(nm, data) | |
def _parse(self, nm, data): | |
self.name = nm | |
self.version = Version(data[u'version']) | |
self.targets = [] | |
targs = data[u'target'] | |
for target in filter(lambda t: targs[t][u'available'], targs): | |
trg = targs[target] | |
prefixes = (s[:-3] for s in trg if s.endswith(u'url')) | |
self.targets.append(Target( | |
target.strip(), | |
(TargetUrl( | |
trg[pfx + u'url'].strip(), | |
trg[pfx + u'hash'].strip().lower()) for pfx in prefixes) | |
)) | |
def __str__(self): | |
return 'Package(%s-%s)' % ( | |
self.name, | |
self.version) | |
def __repr__(self): | |
return 'Package(%s)\n %s' % ( | |
self.name, | |
',\n '.join( | |
'%r: %r' % (k.upper(), getattr(self, k)) | |
for k in dir(self) | |
if not k.startswith('_'))) | |
def __hash__(self): | |
return hash(self.name) | |
def __eq__(self, o): | |
return self.name == o.name and self.version == o.version | |
def __ne__(self, o): | |
return self.name != o.name or self.version != o.version | |
def __gt__(self, o): | |
return self.name > o.name \ | |
or self.name == o.name and self.version > o.version | |
def __ge__(self, o): | |
return self.name >= o.name \ | |
or self.name == o.name and self.version >= o.version | |
def __lt__(self, o): | |
return self.name < o.name \ | |
or self.name == o.name and self.version < o.version | |
def __le__(self, o): | |
return self.name <= o.name \ | |
or self.name == o.name and self.version <= o.version | |
STATIC_RUST_URL = 'static.rust-lang.org' | |
URL_TEMPLATE = 'https://{}/{{}}'.format(STATIC_RUST_URL) | |
CHANNEL_MANIFEST_PATH_TEMPLATE = 'dist/channel-rust-{}.toml' | |
RUSTUP_MANIFEST_PATH = 'rustup/release-stable.toml' | |
HASH_EXT = '.sha256' | |
SIG_EXT = '.asc' | |
V1_EXTS = ('', HASH_EXT) | |
V2_EXTS = ('', HASH_EXT, SIG_EXT) | |
DEST_DIR = 'rustup-repo' | |
CACHED_DB_PATH = os.path.join(DEST_DIR, 'cached.db') | |
DIST_DIR = os.path.join(DEST_DIR, STATIC_RUST_URL) | |
TARGET_SKIPS = ( | |
'apple', | |
'android', | |
'emscripten', | |
'wasm', | |
's390x', | |
'netbsd', | |
'freebsd', | |
'fuchsia', | |
'sparc', | |
'solaris', | |
'redox' | |
) | |
CHANNELS = ( | |
('stable', timedelta(seconds=1), TARGET_SKIPS), | |
('beta', timedelta(weeks=25), ()), | |
('nightly', timedelta(weeks=6), TARGET_SKIPS) | |
) | |
def _format_size(s): | |
SIZES = ('B', 'KiB', 'MiB', 'GiB') | |
f = 0 | |
s = float(s) | |
while s > 1024: | |
s /= 1024 | |
f += 1 | |
return (s, SIZES[f]) | |
def get_url_changed_date(url): | |
return datetime.strptime( | |
requests.head(url).headers.get('Last-Modified'), | |
'%a, %d %b %Y %H:%M:%S %Z') | |
def _download(url, out): | |
print('Downloading `{}`...'.format(url)) | |
outdir = os.path.dirname(out) | |
if not os.path.isdir(outdir): | |
os.makedirs(outdir) | |
r = requests.get(url, timeout=20, stream=True) | |
total = int(r.headers.get('content-length')) | |
with open(out, 'wb') as of: | |
done = 0 | |
for chunk in r.iter_content(chunk_size=102400): | |
of.write(chunk) | |
done += len(chunk) | |
print(' {:6.2f}%'.format(float(done) / total * 100), end='\r') | |
print(' {:7.2f} {}'.format(*_format_size(done))) | |
def download_v1_pkg(fname, hname, out_dir): | |
def remove_v1_package(): | |
for f in (fname, hname): | |
if f: | |
os.remove(os.path.join(out_dir, f)) | |
while True: | |
for f in (fname, hname): | |
ffpath = os.path.join(out_dir, f) | |
# Skip already downloaded files and continue to validation | |
if os.path.isfile(ffpath): | |
continue | |
while True: | |
try: | |
_download( | |
URL_TEMPLATE.format(f), ffpath) | |
break | |
except requests.Timeout: | |
print('Timed out on {}. Retrying...'.format(f)) | |
time.sleep(5) | |
except requests.HTTPError as ex: | |
if ex.response.status == 404: | |
print('{} could not be found. Skipping.'.format(f)) | |
return | |
print('HTTP error {}'.format(ex)) | |
print('Verifying v1 package `{}`... '.format(fname), end='') | |
with open(os.path.join( | |
out_dir, hname), 'rb') as hf: | |
hd = hf.read() | |
try: | |
if fname.endswith('.exe'): | |
h = hd.decode('ascii').strip() | |
else: | |
h, fn = tuple( | |
_.strip() for _ in hd.decode('ascii').strip().split()) | |
if os.path.basename(fn) != os.path.basename(fname): | |
print('{}FAILED: file name mismatch. Retrying.{}'.format( | |
Fore.YELLOW, Fore.RESET)) | |
remove_v1_package() | |
continue | |
h = h.lower() | |
except: | |
print('{}FAILED: hash file parsing failed. Retrying.{}'.format( | |
Fore.YELLOW, Fore.RESET)) | |
remove_v1_package() | |
continue | |
with open(os.path.join(out_dir, fname), 'rb') as pf: | |
hr = hashlib.sha256() | |
for chunk in iter(lambda: pf.read(1024), b''): | |
hr.update(chunk) | |
if h != hr.hexdigest(): | |
print('{}FAILED: hash mismatch. Retrying.{}'.format( | |
Fore.YELLOW, Fore.RESET)) | |
remove_v1_package() | |
continue | |
# Everything's good | |
print('{}OK{}'.format(Fore.GREEN, Fore.RESET)) | |
break | |
def download_v2_pkg(fname, expected_hash, out_dir): | |
def remove_v2_package(): | |
for ext in V2_EXTS: | |
ffext = '{}{}'.format(fname, ext) | |
os.remove(os.path.join(out_dir, ffext)) | |
while True: | |
for ext in V2_EXTS: | |
ffext = '{}{}'.format(fname, ext) | |
ffpath = os.path.join(out_dir, ffext) | |
# Skip already downloaded files and continue to validation | |
if os.path.isfile(ffpath): | |
continue | |
while True: | |
try: | |
_download( | |
URL_TEMPLATE.format(ffext), ffpath) | |
break | |
except requests.Timeout: | |
print('{}Timed out on {}. Retrying...{}'.format( | |
Fore.YELLOW, ffext, Fore.RESET)) | |
time.sleep(5) | |
except requests.HTTPError as ex: | |
if ex.response.status == 404: | |
print('{}{} could not be found. Skipping.{}'.format( | |
Fore.YELLOW, ffext, Fore.RESET)) | |
return | |
print('{}HTTP error: {}{}'.format( | |
Fore.RED, ex, Fore.RESET)) | |
print('Verifying v2 package `{}`... '.format( | |
os.path.basename(fname)), end='') | |
with open(os.path.join( | |
out_dir, '{}{}'.format(fname, HASH_EXT)), 'rb') as hf: | |
hd = hf.read() | |
try: | |
h, fn = tuple( | |
_.strip() for _ in hd.decode('ascii').strip().split()) | |
h = h.lower() | |
except: | |
print('{}FAILED: hash file parsing failed. Retrying.{}'.format( | |
Fore.YELLOW, Fore.RESET)) | |
remove_v2_package() | |
continue | |
if fn != os.path.basename(fname): | |
print('{}FAILED: file name mismatch. Retrying.{}'.format( | |
Fore.YELLOW, Fore.RESET)) | |
remove_v2_package() | |
continue | |
if expected_hash and h != expected_hash: | |
print('{}FAILED: expected hash mismatch. Retrying.{}'.format( | |
Fore.YELLOW, Fore.RESET)) | |
remove_v2_package() | |
continue | |
with open(os.path.join(out_dir, fname), 'rb') as pf: | |
hr = hashlib.sha256() | |
for chunk in iter(lambda: pf.read(512000), b''): | |
hr.update(chunk) | |
if h != hr.hexdigest(): | |
print('{}FAILED: hash mismatch. Retrying.{}'.format( | |
Fore.YELLOW, Fore.RESET)) | |
remove_v2_package() | |
continue | |
# TODO: Verify sig | |
print('{}OK{}'.format(Fore.GREEN, Fore.RESET)) | |
break | |
def write_diff_file(ufname, removed, updated): | |
DIFF_LINE_TEMPLATE = 'rget -O "{0}" "{1}{0}"' | |
def get_diff_line(pkg): | |
url = URL_TEMPLATE.format(pkg) | |
res = requests.head(url) | |
if res.status_code != 200: | |
return '#:{} {}'.format(res.status_code, url) | |
return DIFF_LINE_TEMPLATE.format(pkg, url) | |
print('Writing updated diff to {}...'.format(ufname)) | |
with open(ufname, 'w') as uf: | |
# File header | |
print('#!/bin/sh', file=uf) | |
print('alias "rget=wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 0"', file=uf) | |
# Removed list | |
print('# --- Removed ---', file=uf) | |
for pkg in removed: | |
print('rm -f "{}"'.format(pkg), file=uf) | |
# Updated dirs | |
print('# --- Updated ---', file=uf) | |
for dn in sorted(set(os.path.dirname(pkg) for pkg in updated)): | |
print('mkdir -p "{}"'.format(dn), file=uf) | |
# Updated list | |
for i, diff_line in enumerate( | |
ThreadPool(cpu_count()).imap_unordered( | |
get_diff_line, updated), 1): | |
print(diff_line, file=uf) | |
print('{:3d}%'.format(int(i * 100 / len(updated))), end='\r') | |
print('\nDone.') | |
def update_v1_channel(manifest_path, update_delta, ch_fname_func, targets, db): | |
changed = get_url_changed_date(URL_TEMPLATE.format(manifest_path)) | |
if changed - db['updated'] >= update_delta: | |
manifest = toml.loads( | |
requests.get( | |
URL_TEMPLATE.format(manifest_path)).content.decode('utf-8')) | |
assert manifest.get(u'schema-version', u'') == u'1', ( | |
"Unsupported manifest version `{}'".format( | |
manifest.get(u'schema-version', u''))) | |
nv = Version(manifest[u'version']) | |
ov = db['version'] | |
if nv > ov: | |
removed = [] | |
updated = [] | |
for target in db['targets']: | |
fname = ch_fname_func(ov, target) | |
removed.append(tuple('%s%s' % (fname, ext) for ext in V1_EXTS)) | |
ntargets = tuple(targets) | |
for target in ntargets: | |
fname = ch_fname_func(nv, target) | |
updated.append(tuple('%s%s' % (fname, ext) for ext in V1_EXTS)) | |
return (changed, nv, ntargets, removed, updated) | |
return (changed, db['version'], db['targets'], (), ()) | |
def update_v2_channel(manifest_path, update_delta, skips, db): | |
changed = get_url_changed_date(URL_TEMPLATE.format(manifest_path)) | |
if changed - db['updated'] >= update_delta: | |
print('Updating components...') | |
manifest = toml.loads( | |
requests.get( | |
URL_TEMPLATE.format(manifest_path)).content.decode('utf-8')) | |
assert manifest.get(u'manifest-version', u'') == u'2', ( | |
"Unsupported manifest version `{}'".format( | |
manifest.get(u'manifest-version', u''))) | |
packages = {} | |
removed = [] | |
updated = [] | |
for pkg, pkg_desc in manifest[u'pkg'].items(): | |
np = Package(pkg, pkg_desc) | |
op = db['pkgs'].get(pkg, None) | |
if op: | |
if np < op: | |
print('Package {} went back (from {} to {})'.format( | |
pkg, op.version, np.version)) | |
break | |
removed_targets = tuple( | |
t for t in op.targets if t not in np.targets or | |
any(s in t.name for s in skips)) | |
if removed_targets: | |
print('{}WARNING{}: The following targets were removed from {}:'.format( | |
Fore.YELLOW, Fore.RESET, pkg)) | |
for t in removed_targets: | |
print(' - {}'.format(t.name)) | |
if raw_input('Continue? [y/N] ').strip().lower() != 'y': | |
break | |
if np > op: | |
removed_targets = op.targets | |
# Remove old targets | |
for target in removed_targets: | |
for u in target.urls: | |
fname = u.url.replace(URL_TEMPLATE.format(''), '') | |
removed.append((fname, u.hash)) | |
if op and np == op: | |
# Construct updated target list | |
updated_targets = ( | |
t for t in np.targets | |
if t not in op.targets and all( | |
s not in t.name for s in skips)) | |
else: | |
# Add all new targets | |
updated_targets = ( | |
t for t in np.targets | |
if all(s not in t.name for s in skips)) | |
# Add updated targets | |
for target in updated_targets: | |
for u in target.urls: | |
fname = u.url.replace(URL_TEMPLATE.format(''), '') | |
updated.append((fname, u.hash)) | |
packages[pkg] = np | |
else: | |
return (changed, packages, removed, updated) | |
return (changed, db['pkgs'], (), ()) | |
def get_rustup_filename(ver, target): | |
return 'rustup/archive/{}/{}/rustup-init{}'.format( | |
str(ver), target, '.exe' if 'windows' in target else '') | |
# Load db from cache | |
print('Loading cached db...') | |
try: | |
with open(CACHED_DB_PATH, 'rb') as dbf: | |
db = pickle.load(dbf) | |
except: | |
db = { | |
'rustup': { | |
'updated': datetime(1970, 1, 1), | |
'version': Version('0.0.0'), | |
'targets': [] | |
}, | |
'dist': {} | |
} | |
print('Done.') | |
updated = [] | |
removed = [] | |
pool = ThreadPool(cpu_count()) | |
print('--- Checking for channel updates...') | |
for channel, delta, skips in CHANNELS: | |
repfile = CHANNEL_MANIFEST_PATH_TEMPLATE.format(channel) | |
print('=========================') | |
print(repfile) | |
print('=========================') | |
try: | |
ch_db = db['dist'][channel] | |
except KeyError: | |
ch_db = db['dist'][channel] = { | |
'updated': datetime(1970, 1, 1), | |
'pkgs': {} | |
} | |
(ch_db['updated'], ch_db['pkgs'], | |
ch_removed, ch_updated) = update_v2_channel( | |
repfile, delta, skips, ch_db) | |
if not ch_removed and not ch_updated: | |
print('No changes for {}'.format(channel)) | |
else: | |
# Add removed files to list | |
for f, _ in ch_removed: | |
for ext in V2_EXTS: | |
removed.append('{}{}'.format(f, ext)) | |
# Add updated files to list | |
for f, h in ch_updated: | |
for ext in V2_EXTS: | |
updated.append('{}{}'.format(f, ext)) | |
for _ in pool.imap_unordered( | |
lambda t: download_v2_pkg(*t, out_dir=DIST_DIR), ch_updated): | |
pass | |
# Update manifest file | |
for ext in V2_EXTS: | |
if ch_removed: | |
removed.append('{}{}'.format(repfile, ext)) | |
updated.append('{}{}'.format(repfile, ext)) | |
download_v2_pkg(repfile, None, DIST_DIR) | |
print('--- Checking for rustup updates...') | |
ru_db = db['rustup'] | |
(ru_db['updated'], ru_db['version'], ru_db['targets'], | |
ru_removed, ru_updated) = update_v1_channel( | |
RUSTUP_MANIFEST_PATH, timedelta(seconds=1), get_rustup_filename, | |
(t.name for t in db['dist']['stable']['pkgs']['cargo'].targets), ru_db) | |
if not ru_removed and not ru_updated: | |
print('No changes for rustup') | |
else: | |
# Update removed list | |
for f, h in ru_removed: | |
removed.append(f) | |
removed.append(h) | |
# Update removed list and download packages | |
for f, h in ru_updated: | |
updated.append(f) | |
updated.append(h) | |
for _ in pool.imap_unordered( | |
lambda t: download_v1_pkg(*t, out_dir=DIST_DIR), ru_updated): | |
pass | |
# Update the manifest | |
if ru_removed: | |
removed.append(RUSTUP_MANIFEST_PATH) | |
updated.append(RUSTUP_MANIFEST_PATH) | |
_download(URL_TEMPLATE.format(RUSTUP_MANIFEST_PATH), | |
os.path.join(DIST_DIR, RUSTUP_MANIFEST_PATH)) | |
if not updated: | |
print('Local db is up to date. Nothing to do.') | |
else: | |
write_diff_file(os.path.join(DEST_DIR, '{}.update'.format( | |
datetime.now().strftime('%Y%m%d-%H%M%S'))), removed, updated) | |
print('Updating cached db...') | |
with open(CACHED_DB_PATH, 'wb') as dbf: | |
pickle.dump(db, dbf) | |
print('Done.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment