Last active
June 5, 2023 11:40
-
-
Save ghandic/dbde264a0d666a415bbf1bdcc3645aec to your computer and use it in GitHub Desktop.
Load csv from S3 directly into memory and write to S3 directly from memory by extending pd.DataFrame class
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import pandas as pd | |
from io import StringIO | |
class S3DataFrame(pd.DataFrame): | |
""" | |
# Make a dataframe and upload it as csv | |
s3df = S3DataFrame({'h1':[1], 'h2':[2]}) | |
s3df.to_s3(Bucket='bucket-name', | |
Key='file-key-on-s3', # The name of the file when it is stored in s3 | |
SSEKMSKeyId='kms-id') # note: the kms should be in the same region as the bucket | |
# Download the same csv into DataFrame | |
s3df2 = S3DataFrame.from_s3(Bucket='bucket-name', Key='file-key-on-s3') | |
""" | |
client = boto3.client('s3') # May need region but seems to work without | |
def to_s3(self, Bucket, Key, SSEKMSKeyId): | |
"""Sends csv to S3 from memory""" | |
csv_buffer = StringIO() | |
self.to_csv(csv_buffer, index=False) | |
return self.client.put_object(Bucket=Bucket, Key=Key, Body=csv_buffer.getvalue(), SSEKMSKeyId=SSEKMSKeyId, ServerSideEncryption='aws:kms') | |
@classmethod | |
def from_s3(cls, Bucket, Key): | |
"""Collects csv into memory from S3""" | |
res = cls.client.get_object(Bucket=Bucket, Key=Key)['Body'].read() | |
return pd.read_csv(StringIO(res.decode('utf-8'))) | |
if __name__ == "__main__": | |
BUCKET_NAME = 'bucket-name' | |
FILE_KEY = 'test.csv' | |
KMS_KEY_ID = 'kms-id' | |
# Creates a DataFrame and writes to S3 | |
s3df = S3DataFrame({'h1':[1], 'h2':[2]}) | |
s3df.to_s3(Bucket=BUCKET_NAME, Key=FILE_KEY, SSEKMSKeyId=KMS_KEY_ID) | |
# Reads from S3 | |
s3df2 = S3DataFrame.from_s3(Bucket=BUCKET_NAME, Key=FILE_KEY) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thx for contribution to community. this saves my day.