Skip to content

Instantly share code, notes, and snippets.

@bouweandela
Last active September 14, 2020 13:59
Show Gist options
  • Save bouweandela/13ab56bee8778a1eb5b82091e4fdd443 to your computer and use it in GitHub Desktop.
Save bouweandela/13ab56bee8778a1eb5b82091e4fdd443 to your computer and use it in GitHub Desktop.
Script to list licenses of conda/CRAN/Julia packages
#!/usr/bin/env python3
"""Tool to list licenses of conda/CRAN/Julia packages.
Example usage:
conda activate some-environment
python list_licenses.py -l '[^L]*GPL' Unknown
will list all packages installed through conda with a GPL or Unknown license.
To see how a package enters your dependency tree, use the command
`conda-tree whoneeds --tree --small package-name`.
"""
import argparse
import json
import logging
import os
import re
from pathlib import Path
import sh
logger = logging.getLogger("licenses")
CONDA_PREFIX = Path(os.environ["CONDA_PREFIX"])
R_PREFIX = CONDA_PREFIX / "lib" / "R"
JULIA_PREFIX = Path("~/.julia").expanduser()
def get_conda_packages(packages=None, conda_prefix=CONDA_PREFIX):
"""Yield tuples of (package name, license) for Conda packages."""
meta_dir = conda_prefix / "conda-meta"
pkgs = json.loads(sh.conda.list("--prefix", conda_prefix, "--json").stdout)
for pkg in pkgs:
if packages and pkg["name"] not in packages:
continue
meta_file = meta_dir / f"{pkg['dist_name']}.json"
if not meta_file.exists():
logger.warning(
"Could not find metadata for package %s, "
"did you install it with conda?",
pkg["name"],
)
continue
meta = json.loads(meta_file.read_bytes())
license_name = meta.get("license", "Unknown")
yield pkg["name"], license_name
def get_r_packages(packages=None):
"""Yield tuples of (package name, license) for R packages."""
meta_dir = R_PREFIX / "library"
for path in meta_dir.iterdir():
pkg = path.stem
if packages and pkg not in packages:
continue
meta_file = path / "DESCRIPTION"
license_name = re.findall("License: (.*)", meta_file.read_text())[0]
yield pkg, license_name
def license_from_file(license_file):
"""Try to figure out what license is in a file."""
if sh.which("licensee"):
stdout = sh.licensee(license_file, _ok_code=[0, 1]).stdout.decode(
errors="ignore"
)
if stdout:
license_name = re.findall("License: (.*)", stdout)[0]
if license_name != "Not detected":
return license_name
if sh.which("identify_license"):
stdout = sh.identify_license(
license_file, _ok_code=[0, 1]
).stdout.decode(errors="ignore")
if stdout:
license_name = re.findall(f"{license_file}: (.*)", stdout)[0]
if license_name:
return license_name[0]
return f"Unknown, see {license_file}"
def get_julia_packages(packages=None):
"""Yield tuples of (package name, license) for Julia packages."""
meta_dir = JULIA_PREFIX / "packages"
for path in meta_dir.iterdir():
pkg = path.stem
if packages and pkg not in packages:
continue
pkg_dir = next(path.glob("*"))
for filename in "License.md", "LICENSE.md", "LICENSE":
license_file = pkg_dir / filename
if license_file.is_file():
break
else:
logger.warning(
"Could not find license for Julia package %s at %s",
pkg,
license_file,
)
continue
license_match = re.findall(
"licensed under the (.*)", license_file.read_text()
)
if license_match:
license_name = license_match[0].rstrip(":")
else:
license_name = license_from_file(license_file)
yield pkg, license_name
# TODO: figure out if this conda environment is actually used or
# if PyCall uses the active environment
conda_prefix = JULIA_PREFIX / "conda" / "3"
yield from get_conda_packages(packages, conda_prefix)
def load_ignore(filename):
"""Load the list of packages to ignore from file."""
ignore = set()
if filename is not None:
for line in Path(filename).open():
pkg = line.split("#")[0].strip()
if pkg:
ignore.add(pkg)
logger.debug("Ignoring %s", sorted(ignore))
return ignore
def main():
"""Run the program."""
args = parse_args()
logger.setLevel(args.log_level.upper())
ignore = load_ignore(args.ignore)
packages = set(args.packages)
for repo in 'conda', 'r', 'julia':
get_licences = globals()[f"get_{repo}_packages"]
print(f"# List of {repo.title()} packages:")
for pkg, license_name in get_licences(packages):
if pkg in ignore:
continue
for pattern in args.licenses:
if re.match(pattern, license_name):
print(pkg, license_name)
break
print("")
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"-l",
"--licenses",
default=".*",
nargs="*",
help="Only display licenses that match these regular expressions",
)
parser.add_argument(
"-p",
"--packages",
default=set(),
nargs="*",
help="Only display licenses for these packages",
)
parser.add_argument(
"-i",
"--ignore",
default=None,
help="Ignore packages listed in this file.",
)
parser.add_argument(
"-ll",
"--log-level",
default="warning",
choices=["debug", "info", "warning", "error"],
)
args = parser.parse_args()
return args
if __name__ == "__main__":
logging.basicConfig(
format=(
"%(asctime)s [%(process)d] %(levelname)-8s "
"%(name)s,%(lineno)s\t%(message)s"
)
)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment