Last active
September 22, 2016 16:28
-
-
Save jjhelmus/869d6827ac8e0275437e7643989974e4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" Find conda packages which use a binary prefix. """ | |
import argparse | |
import bz2 | |
import json | |
import os | |
import tarfile | |
import urllib.request | |
try: | |
from packaging.version import parse as parse_version | |
except ImportError: | |
from pip._vendor.packaging.version import parse as parse_version | |
def get_channel_index(channel): | |
""" Return the channel index for all platforms. """ | |
# find all packages in the channel one platform at a time | |
index = {} | |
url_template = 'https://conda.anaconda.org/%s/%s/repodata.json.bz2' | |
for platform in ['linux-64', 'osx-64', 'win-32', 'win-64', 'linux-32']: | |
channel_url = 'https://conda.anaconda.org/%s' % channel | |
url = url_template % (channel, platform) | |
response = urllib.request.urlopen(url) | |
decomp = bz2.decompress(response.read()) | |
json_response = json.loads(decomp.decode('utf-8')) | |
for fn, info in json_response['packages'].items(): | |
# add a download url to the package | |
subdir = info['subdir'] | |
info['url'] = channel_url + '/' + subdir + '/' + fn | |
# add package to index with platform prefix | |
index[platform + fn] = info | |
return index | |
def find_latest_versions(index, package_name): | |
""" Return the latest version and packages from a conda channel index. """ | |
valid = [v for v in index.values() if v['name'] == package_name] | |
versions = [parse_version(v['version']) for v in valid] | |
latest_ver = str(max(versions)) | |
entries = [v for v in valid if v['version'] == latest_ver] | |
if len(entries) == 0: | |
# fall back to sorting versions by string if all entries were removed | |
versions = [v['version'] for v in valid] | |
latest_ver = sorted(versions)[-1] | |
entries = [v for v in valid if v['version'] == latest_ver] | |
return latest_ver, entries | |
def parse_arguments(): | |
""" Parse command line arguments. """ | |
parser = argparse.ArgumentParser( | |
description="Find conda packages which use a prefix") | |
parser.add_argument( | |
'packages', nargs='*', | |
help=('Name of packages to check, leave blank to check all packages ' | |
'on the channel')) | |
parser.add_argument( | |
'--skip', '-s', action='store', help=( | |
'file containing list of packages to skip when checking for ' | |
'prefixes')) | |
parser.add_argument( | |
'--verb', '-v', action='store_true', help='verbose output') | |
parser.add_argument( | |
'--channel', '-c', action='store', default='conda-forge', | |
help='Conda channel to check. Default is conda-forge') | |
parser.add_argument( | |
'--json', action='store', help='Save outdated packages to json file.') | |
parser.add_argument( | |
'--directory', '-d', action='store', | |
default=os.path.join(os.getcwd(), 'pkg_cache'), | |
help='where to store packages') | |
return parser.parse_args() | |
def find_prefix_packages(index, package_names, verbose, cache_dir): | |
""" Return a list of packages which use a prefix. """ | |
pkgs_with_bin_prefix = [] | |
pkgs_with_no_bin_prefix = [] | |
for package_name in sorted(package_names): | |
_, entries = find_latest_versions(index, package_name) | |
if not entries: | |
print(package_name + " : Missing any entries. Skipping...") | |
continue | |
uses_prefix = [] | |
pkg_platforms = {e['platform'] for e in entries} | |
for platform in pkg_platforms: | |
platform_entries = [e for e in entries | |
if e['platform'] == platform] | |
# sort entired by md5 so we try the same package each time | |
platform_entries = sorted(platform_entries, key=lambda k: k['md5']) | |
url = platform_entries[0]['url'] | |
filename = os.path.join( | |
cache_dir, platform + '-' + url.split('/')[-1]) | |
# Download if not in cache | |
if not os.path.exists(filename): | |
print("Downloading:", filename) | |
response = urllib.request.urlopen(url) | |
with open(filename, 'wb') as f: | |
f.write(response.read()) | |
# determine if package uses a binary prefix | |
tf = tarfile.open(filename) | |
try: | |
uses_prefix.append(b' binary ' in tf.extractfile( | |
tf.getmember('info/has_prefix')).read()) | |
except KeyError: | |
uses_prefix.append(False) | |
if True in uses_prefix: | |
print(package_name, "uses a binary prefix") | |
pkgs_with_bin_prefix.append(package_name) | |
else: | |
pkgs_with_no_bin_prefix.append(package_name) | |
if verbose: | |
print(package_name, "does NOT use a binary prefix") | |
print("Uses a binary prefix:", len(pkgs_with_bin_prefix)) | |
print("Does NOT use a binary prefix:", len(pkgs_with_no_bin_prefix)) | |
print("Total:", len(pkgs_with_bin_prefix) + len(pkgs_with_no_bin_prefix)) | |
return pkgs_with_bin_prefix | |
def main(): | |
""" main function """ | |
args = parse_arguments() | |
# create somewhere to store downloaded packages. | |
if not os.path.exists(args.directory): | |
os.makedirs(args.directory) | |
# determine package names to check | |
index = get_channel_index(args.channel) | |
package_names = set(args.packages) | |
if len(package_names) == 0: # no package names given on command line | |
package_names = {v['name'] for k, v in index.items()} | |
# remove skipped packages | |
if args.skip is not None: | |
with open(args.skip) as f: | |
pkgs_to_skip = [line.strip() for line in f] | |
package_names = [p for p in package_names if p not in pkgs_to_skip] | |
# find packages which use a binary prefix | |
pkgs_with_bin_prefix = find_prefix_packages( | |
index, package_names, args.verb, args.directory) | |
# save pkgs_with_bin_prefix to json formatted file is specified | |
if args.json is not None: | |
with open(args.json, 'w') as f: | |
json.dump(pkgs_with_bin_prefix, f) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Made a tweak to your gist to handle this case of the binary prefix.