Last active
July 22, 2022 16:35
-
-
Save em-shea/ab32607b78c8e41c808b5dd82df855ca to your computer and use it in GitHub Desktop.
A Lambda function that scans a given DynamoDB table and writes the data to S3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import boto3 | |
from datetime import datetime | |
# Import resources using AWS Python SDK (boto3) and specify the DynamoDB table to scan and S3 bucket to write file to | |
# Table and bucket name are passed as environment variables in SAM template | |
s3 = boto3.resource('s3') | |
bucket = s3.Bucket(os.environ['BUCKET_NAME']) | |
table = boto3.resource('dynamodb').Table(os.environ['TABLE_NAME']) | |
def lambda_handler(event, context): | |
# Scan DynamoDB contents and save to array | |
# In my case, my DynamoDB table contains data on my app's subscribers | |
all_subscribers_data = scan_subscribers_table() | |
todays_date = format_date(datetime.today()) | |
# Add ReportingDate to each row of data for filtering in QuickSight | |
data_rows = convert_to_rows(all_subscribers_data, todays_date) | |
# Save JSON data file to S3 | |
response = write_to_s3(data_rows, todays_date) | |
def scan_subscribers_table(): | |
# Loop through subscribers in DynamoDB | |
response = table.scan() | |
all_subscribers = response['Items'] | |
# Paginate through DynamoDB response | |
while 'LastEvaluatedKey' in response: | |
response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey']) | |
all_subscribers.extend(response['Items']) | |
return all_subscribers | |
def convert_to_rows(all_subscribers_data, todays_date): | |
data_rows = [] | |
# Append today's date to each item as ReportingDate | |
for item in all_subscribers_data: | |
item['ReportingDate'] = todays_date | |
data_rows.append(item) | |
return data_rows | |
# Save JSON data file to S3 | |
# The name of the file is set to the current date | |
def write_to_s3(data_rows, todays_date): | |
response = bucket.put_object( | |
Body = json.dumps(data_rows).encode('UTF-8'), | |
Key = f'{todays_date}.json' | |
) | |
return response | |
# Return a date with format YYYY-MM-DD | |
def format_date(date_object): | |
formatted_date = date_object.strftime('%Y-%m-%d') | |
return formatted_date |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment