Last active
May 8, 2022 20:16
-
-
Save GabrielSGoncalves/b878aaa16a640afae92b80b1795b8a37 to your computer and use it in GitHub Desktop.
Amazon Lambda function used on medium article
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from io import StringIO | |
import boto3 | |
import os | |
import pandas as pd | |
def write_dataframe_to_csv_on_s3(dataframe, filename, bucket): | |
""" Write a dataframe to a CSV on S3 """ | |
# Create buffer | |
csv_buffer = StringIO() | |
# Write dataframe to buffer | |
dataframe.to_csv(csv_buffer, sep="\t") | |
# Create S3 object | |
s3_resource = boto3.resource("s3") | |
# Write buffer to S3 object | |
s3_resource.Object(bucket, f'{filename}').put( | |
Body=csv_buffer.getvalue()) | |
def lambda_handler(event, context): | |
try: | |
# Get variables from event | |
BUCKET = event.get('bucket') | |
KEY = event.get('file_key') | |
OUTPUT = event.get('output_file') | |
GROUP = event.get('group') | |
COLUMN = event.get('column') | |
# Set client to get file from S3 | |
s3_client = boto3.client('s3') | |
response = s3_client.get_object(Bucket=BUCKET, | |
Key=KEY) | |
csv_file = response["Body"] | |
# Load csv as a Pandas Dataframe | |
df = pd.read_csv(csv_file, index_col=0, low_memory=False) | |
# Create groupby Dataframe | |
df_groupby = pd.DataFrame(df.groupby(GROUP)[ | |
COLUMN].mean().sort_values(ascending=False)).round(2) | |
# Save the Dataframe to the same S3 BUCKET | |
write_dataframe_to_csv_on_s3(df_groupby, OUTPUT, BUCKET) | |
return { | |
'statusCode': 200, | |
'message':'Success!'} | |
except: | |
return { | |
'statusCode': 400, | |
'body': 'Error, bad request!'} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment