Last active
September 14, 2020 13:59
-
-
Save bouweandela/13ab56bee8778a1eb5b82091e4fdd443 to your computer and use it in GitHub Desktop.
Script to list licenses of conda/CRAN/Julia packages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Tool to list licenses of conda/CRAN/Julia packages. | |
Example usage: | |
conda activate some-environment | |
python list_licenses.py -l '[^L]*GPL' Unknown | |
will list all packages installed through conda with a GPL or Unknown license. | |
To see how a package enters your dependency tree, use the command | |
`conda-tree whoneeds --tree --small package-name`. | |
""" | |
import argparse | |
import json | |
import logging | |
import os | |
import re | |
from pathlib import Path | |
import sh | |
logger = logging.getLogger("licenses") | |
CONDA_PREFIX = Path(os.environ["CONDA_PREFIX"]) | |
R_PREFIX = CONDA_PREFIX / "lib" / "R" | |
JULIA_PREFIX = Path("~/.julia").expanduser() | |
def get_conda_packages(packages=None, conda_prefix=CONDA_PREFIX): | |
"""Yield tuples of (package name, license) for Conda packages.""" | |
meta_dir = conda_prefix / "conda-meta" | |
pkgs = json.loads(sh.conda.list("--prefix", conda_prefix, "--json").stdout) | |
for pkg in pkgs: | |
if packages and pkg["name"] not in packages: | |
continue | |
meta_file = meta_dir / f"{pkg['dist_name']}.json" | |
if not meta_file.exists(): | |
logger.warning( | |
"Could not find metadata for package %s, " | |
"did you install it with conda?", | |
pkg["name"], | |
) | |
continue | |
meta = json.loads(meta_file.read_bytes()) | |
license_name = meta.get("license", "Unknown") | |
yield pkg["name"], license_name | |
def get_r_packages(packages=None): | |
"""Yield tuples of (package name, license) for R packages.""" | |
meta_dir = R_PREFIX / "library" | |
for path in meta_dir.iterdir(): | |
pkg = path.stem | |
if packages and pkg not in packages: | |
continue | |
meta_file = path / "DESCRIPTION" | |
license_name = re.findall("License: (.*)", meta_file.read_text())[0] | |
yield pkg, license_name | |
def license_from_file(license_file): | |
"""Try to figure out what license is in a file.""" | |
if sh.which("licensee"): | |
stdout = sh.licensee(license_file, _ok_code=[0, 1]).stdout.decode( | |
errors="ignore" | |
) | |
if stdout: | |
license_name = re.findall("License: (.*)", stdout)[0] | |
if license_name != "Not detected": | |
return license_name | |
if sh.which("identify_license"): | |
stdout = sh.identify_license( | |
license_file, _ok_code=[0, 1] | |
).stdout.decode(errors="ignore") | |
if stdout: | |
license_name = re.findall(f"{license_file}: (.*)", stdout)[0] | |
if license_name: | |
return license_name[0] | |
return f"Unknown, see {license_file}" | |
def get_julia_packages(packages=None): | |
"""Yield tuples of (package name, license) for Julia packages.""" | |
meta_dir = JULIA_PREFIX / "packages" | |
for path in meta_dir.iterdir(): | |
pkg = path.stem | |
if packages and pkg not in packages: | |
continue | |
pkg_dir = next(path.glob("*")) | |
for filename in "License.md", "LICENSE.md", "LICENSE": | |
license_file = pkg_dir / filename | |
if license_file.is_file(): | |
break | |
else: | |
logger.warning( | |
"Could not find license for Julia package %s at %s", | |
pkg, | |
license_file, | |
) | |
continue | |
license_match = re.findall( | |
"licensed under the (.*)", license_file.read_text() | |
) | |
if license_match: | |
license_name = license_match[0].rstrip(":") | |
else: | |
license_name = license_from_file(license_file) | |
yield pkg, license_name | |
# TODO: figure out if this conda environment is actually used or | |
# if PyCall uses the active environment | |
conda_prefix = JULIA_PREFIX / "conda" / "3" | |
yield from get_conda_packages(packages, conda_prefix) | |
def load_ignore(filename): | |
"""Load the list of packages to ignore from file.""" | |
ignore = set() | |
if filename is not None: | |
for line in Path(filename).open(): | |
pkg = line.split("#")[0].strip() | |
if pkg: | |
ignore.add(pkg) | |
logger.debug("Ignoring %s", sorted(ignore)) | |
return ignore | |
def main(): | |
"""Run the program.""" | |
args = parse_args() | |
logger.setLevel(args.log_level.upper()) | |
ignore = load_ignore(args.ignore) | |
packages = set(args.packages) | |
for repo in 'conda', 'r', 'julia': | |
get_licences = globals()[f"get_{repo}_packages"] | |
print(f"# List of {repo.title()} packages:") | |
for pkg, license_name in get_licences(packages): | |
if pkg in ignore: | |
continue | |
for pattern in args.licenses: | |
if re.match(pattern, license_name): | |
print(pkg, license_name) | |
break | |
print("") | |
def parse_args(): | |
"""Parse command line arguments.""" | |
parser = argparse.ArgumentParser( | |
description=__doc__, formatter_class=argparse.RawTextHelpFormatter | |
) | |
parser.add_argument( | |
"-l", | |
"--licenses", | |
default=".*", | |
nargs="*", | |
help="Only display licenses that match these regular expressions", | |
) | |
parser.add_argument( | |
"-p", | |
"--packages", | |
default=set(), | |
nargs="*", | |
help="Only display licenses for these packages", | |
) | |
parser.add_argument( | |
"-i", | |
"--ignore", | |
default=None, | |
help="Ignore packages listed in this file.", | |
) | |
parser.add_argument( | |
"-ll", | |
"--log-level", | |
default="warning", | |
choices=["debug", "info", "warning", "error"], | |
) | |
args = parser.parse_args() | |
return args | |
if __name__ == "__main__": | |
logging.basicConfig( | |
format=( | |
"%(asctime)s [%(process)d] %(levelname)-8s " | |
"%(name)s,%(lineno)s\t%(message)s" | |
) | |
) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment