Last active
September 2, 2021 18:21
-
-
Save jspinella/cf653d0b124850fed51b6f528c77dd3d to your computer and use it in GitHub Desktop.
AWS Lambda container image to convert Access files to CSV files with Python 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# convert a table from an accdb file to CSV and upload to an AWS S3 bucket | |
import os, subprocess, urllib.request, requests, zipfile, boto3 | |
from bs4 import BeautifulSoup | |
from lxml import etree | |
def handler(event, context): # we aren't using event or context here, but you probably will in your real-world implementation | |
# cd into Lambda's writable directory (allows up to 512MB of files) | |
os.chdir('/tmp') | |
#todo: download the accdb file from S3 or the Internet | |
# convert the accdb table to CSV | |
DATABASE = "yourFile.accdb" # yourFile.mdb should work as well | |
TABLE = "tableInAccdbToConvert" # e.g. "MyAccessTable" | |
# based on code here: http://okfnlabs.org/handbook/data/patterns/liberating-access-databases/ which loops through all tables in accdb file | |
# here I am just converting a single table to CSV as I only needed one table | |
filename = TABLE.replace(' ','_') + '.csv' | |
print(f'Converting {TABLE} to CSV format...') | |
with open(filename, 'wb') as f: | |
subprocess.call(['mdb-export', DATABASE, TABLE], stdout=f) | |
# upload CSV file to S3 | |
s3 = boto3.client( | |
's3', | |
region_name='us-east-1', | |
aws_access_key_id='yourAccessKeyId', | |
aws_secret_access_key='yourAccessKeyValue' | |
) | |
S3_BUCKET = "yourS3BucketName" | |
S3_FILE_NAME = "export.csv" # override file name of CSV in S3 here | |
print(f"Uploading {S3_FILE_NAME} to S3 bucket {S3_BUCKET}") | |
response = s3.upload_file(f"{TABLE}.csv", S3_BUCKET, S3_FILE_NAME) | |
print(f"S3 response: {response}") | |
print("Done!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM public.ecr.aws/lambda/python:3.8 | |
# install dependencies | |
# mdbtools depends on unixODBC-devel and gcc-c++ | |
# we start by enabling the EPEL package repository, which hosts the mdbtools package | |
RUN yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -y && \ | |
yum update -y && \ | |
yum install -y mdbtools gcc-c++ unixODBC-devel | |
# add the python code to the Docker image | |
COPY app.py ${LAMBDA_TASK_ROOT} | |
# install python dependencies | |
COPY requirements.txt . | |
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" | |
# trigger Lambda handler | |
CMD [ "app.handler" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beautifulsoup4==4.9.3 | |
boto3==1.18.30 | |
botocore==1.21.30 | |
bs4==0.0.1 | |
certifi==2021.5.30 | |
charset-normalizer==2.0.4 | |
idna==3.2 | |
jmespath==0.10.0 | |
lxml==4.6.3 | |
numpy==1.21.2 | |
pandas==1.3.2 | |
pyodbc==4.0.32 | |
python-dateutil==2.8.2 | |
pytz==2021.1 | |
requests==2.26.0 | |
s3transfer==0.5.0 | |
six==1.16.0 | |
soupsieve==2.2.1 | |
urllib3==1.26.6 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment