Created
October 31, 2019 09:24
-
-
Save helenst/1d6bbbc8bf30df1a3e14a638ae3121e6 to your computer and use it in GitHub Desktop.
Include directory entries in tarfile and allow top level directory to be specified
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/python_client/src/wellcome_storage_service/downloader.py b/python_client/src/wellcome_storage_service/downloader.py | |
index 6a78c76b..fc10f56e 100644 | |
--- a/python_client/src/wellcome_storage_service/downloader.py | |
+++ b/python_client/src/wellcome_storage_service/downloader.py | |
@@ -3,6 +3,7 @@ | |
import abc | |
import os | |
import tarfile | |
+import time | |
try: | |
from collections.abc import ABC | |
@@ -45,7 +46,7 @@ def download_bag(storage_manifest, out_dir): | |
) | |
-def download_compressed_bag(storage_manifest, out_path): | |
+def download_compressed_bag(storage_manifest, out_path, top_level_directory=''): | |
""" | |
Download all the files in a bag to a compressed archive. | |
@@ -58,17 +59,33 @@ def download_compressed_bag(storage_manifest, out_path): | |
provider = _choose_provider(location) | |
with tarfile.open(out_path, "w:gz") as tf: | |
+ # Keeps track of which directories have been added to the tar file | |
+ dirnames = set() | |
for manifest_file in _all_files(storage_manifest): | |
fileobj = provider.get_fileobj( | |
location=location, manifest_file=manifest_file | |
) | |
- tarinfo = tarfile.TarInfo(name=manifest_file["name"]) | |
+ name_in_tar = os.path.join(top_level_directory, manifest_file["name"]) | |
+ | |
+ # Ensure all parent directories exist in tar | |
+ name = name_in_tar | |
+ while name: | |
+ name = os.path.dirname(name) | |
+ if name and name not in dirnames: | |
+ tarinfo = tarfile.TarInfo(name) | |
+ tarinfo.type = tarfile.DIRTYPE | |
+ tarinfo.mode = 0o755 | |
+ tarinfo.mtime = time.time() | |
+ tf.addfile(tarinfo=tarinfo) | |
+ dirnames.add(name) | |
+ | |
+ tarinfo = tarfile.TarInfo(name=name_in_tar) | |
tarinfo.size = manifest_file["size"] | |
+ tarinfo.mtime = time.time() | |
tf.addfile(tarinfo=tarinfo, fileobj=fileobj) | |
- | |
class AbstractProvider(object): | |
""" | |
Abstract class for a downloader. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment