turtlemonvh · January 22, 2019 22:28
diff --git a/s3_nested_data_counts.py b/s3_nested_data_counts.py
 import boto3
 from collections import Counter

 """
 If your data uses "/" in a directory-like structure and you want to expand the list of items.
 Similar to `tree -L2 prefix/` in *nix.
 """

 s3 = boto3.client('s3')
 bucket_name = "XXX" # s3 bucket name
 starting_prefix = "YYY" # prefix to look under in the bucket

 # Get the prefixes on the first level
 prefixes = (key['Prefix'] for key in s3.list_objects_v2(Bucket=bucket_name, Delimiter="/", Prefix=starting_prefix)['CommonPrefixes'])

 # Expand the list of prefixes with the next level into a flattened list
 # Note that you can use something similar to this to continue to expand your prefixes more levels
 expanded_prefixes = (key['Prefix'] for prefix in prefixes for key in s3.list_objects_v2(Bucket=bucket_name, Delimiter="/", Prefix=prefix)['CommonPrefixes'] )

 # If you want counts of the number of times the 2nd level value shows up
 # Helpful if your data is set up like "{PREFIX}/{UUID}/{DATE}" and you want to see the number of unique values of UUID for each DATE.
 Counter(p.split("/")[-2] for p in expanded_prefixes)
	import boto3
	from collections import Counter

	"""
	If your data uses "/" in a directory-like structure and you want to expand the list of items.
	Similar to `tree -L2 prefix/` in *nix.
	"""

	s3 = boto3.client('s3')
	bucket_name = "XXX" # s3 bucket name
	starting_prefix = "YYY" # prefix to look under in the bucket

	# Get the prefixes on the first level
	prefixes = (key['Prefix'] for key in s3.list_objects_v2(Bucket=bucket_name, Delimiter="/", Prefix=starting_prefix)['CommonPrefixes'])

	# Expand the list of prefixes with the next level into a flattened list
	# Note that you can use something similar to this to continue to expand your prefixes more levels
	expanded_prefixes = (key['Prefix'] for prefix in prefixes for key in s3.list_objects_v2(Bucket=bucket_name, Delimiter="/", Prefix=prefix)['CommonPrefixes'] )

	# If you want counts of the number of times the 2nd level value shows up
	# Helpful if your data is set up like "{PREFIX}/{UUID}/{DATE}" and you want to see the number of unique values of UUID for each DATE.
	Counter(p.split("/")[-2] for p in expanded_prefixes)