Mlawrence95 · July 27, 2020 22:54
diff --git a/read_csv_from_aws_s3_targz.python b/read_csv_from_aws_s3_targz.python
 # checked against python 3.7.3, pandas 0.24.2, s3fs 0.4.2
 import tarfile
 import io
 import s3fs 

 import pandas as pd

 tar_path      = f"s3://my-bucket/debug.tar.gz"  # path in s3
 metadata_path = "debug/metadata.csv"  # path inside of the tar file 

 s3 = s3fs.S3FileSystem()

 # this is in my experience, but it does work!
 with s3.open(tar_path, 'rb') as debug_tar:
    with tarfile.open(mode='r:gz', fileobj=debug_tar) as tar:
        csv_contents = tar.extractfile(metadata_path).read()
        df = pd.read_csv(io.BytesIO(csv_contents), encoding='utf8')
	# checked against python 3.7.3, pandas 0.24.2, s3fs 0.4.2
	import tarfile
	import io
	import s3fs

	import pandas as pd

	tar_path = f"s3://my-bucket/debug.tar.gz" # path in s3
	metadata_path = "debug/metadata.csv" # path inside of the tar file

	s3 = s3fs.S3FileSystem()

	# this is in my experience, but it does work!
	with s3.open(tar_path, 'rb') as debug_tar:
	with tarfile.open(mode='r:gz', fileobj=debug_tar) as tar:
	csv_contents = tar.extractfile(metadata_path).read()
	df = pd.read_csv(io.BytesIO(csv_contents), encoding='utf8')
No results found