Skip to content

Instantly share code, notes, and snippets.

@knil-sama
Last active November 22, 2022 14:41
Show Gist options
  • Select an option

  • Save knil-sama/c0536477b7d0ca2328479d7ce9972bc0 to your computer and use it in GitHub Desktop.

Select an option

Save knil-sama/c0536477b7d0ca2328479d7ce9972bc0 to your computer and use it in GitHub Desktop.
import s3fs
import sys
# Expect aws credentials to exist in path
def fetch_filtered_filekeys(bucket_name: str, dir_name: str, prefix_file: str="",suffix_file: str="") -> list:
S3 = s3fs.S3FileSystem()
s3_dataset_path = "/".join([bucket_name, dir_name])
list_available_file = [p for p in S3.walk(s3_dataset_path)
if p.endswith(suffix_file) and p.startswith(prefix_file)]
return list_available_file
if __name__ == "__main__":
# usage python3 count_files_s3_dir_and_subdir.py <bucket_name> <dir_name> [prefix_file] [suffix_file]
print(len(fetch_filtered_filekeys(**sys.argv[1:])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment