Skip to content

Instantly share code, notes, and snippets.

@riga
Created July 19, 2021 10:45
Show Gist options
  • Save riga/18c85152e292dcb098b3333c91b30040 to your computer and use it in GitHub Desktop.
Save riga/18c85152e292dcb098b3333c91b30040 to your computer and use it in GitHub Desktop.
Helpers for loading remote files from CERN EOS.
# coding: utf-8
"""
Helpers for loading remote files from CERN EOS.
"""
import os
import subprocess
import six
# base url of files on CERN EOS
EOS_URL = "https://cernbox.cern.ch/index.php/s/{dir_hash}/download?path={path}&files={files}"
def wget(url, path):
"""
Generic function to download a remote file located at *url* to a local *path* via wget.
Missing, intermediate directories are created, and existing local files are overwritten
automatically. An exception is raised when *path* refers to an existing directory.
"""
# create the parent directory, remove the file if existing
path = os.path.normpath(os.path.abspath(os.path.expandvars(os.path.expanduser(path))))
if os.path.isdir(path):
raise Exception("path '{}' refers to a local, existing directory".format(path))
elif not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
elif os.path.exists(path):
os.remove(path)
# build the wget command and run it
cmd = ["wget", "-O", path, url]
try:
subprocess.check_call(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except Exception as e:
raise Exception("download of url '{}' failed: {}".format(url, e))
def download_from_eos(dir_hash, file_name, path):
"""
Downloads a file from CERN EOS and stores it at *path*. The file to load is defined by
*dir_hash* (i.e. the hash of a public, shared directory such as "KAkUpZjhEbzi2Uy") and the
actual *file_name* which can contain a subdirectories. Examples:
.. code-block:: python
download_from_eos("KAkUpZjhEbzi2Uy", "file.root", "my/data/dir/file.root")
download_from_eos("KAkUpZjhEbzi2Uy", "dir/file2.root", "my/data/dir/file2.root")
"""
# split the file name into dir and file
d, f = os.path.split(file_name)
d = d or "/"
# create the url
quote = six.moves.urllib.parse.quote
url = EOS_URL.format(dir_hash=dir_hash, path=quote(d, safe=""), files=quote(f, safe=""))
# download it
wget(url, path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment