Created
November 12, 2018 10:14
-
-
Save JD-V/a9b780ed894778a04663587322c43199 to your computer and use it in GitHub Desktop.
Break a large CSV file quarter wise and push to s3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import boto3 | |
def splitandpushtos3(x): | |
#store file locally | |
x.to_csv(r'{}.csv'.format(x.name)) | |
#push to s3 (inorder to use boto you need to have aws-cli configured) | |
s3 = boto3.resource('s3') | |
s3.Bucket('my-bucket').upload_file(r'{}.csv'.format(x.name), r'{}.csv'.format(x.name)) | |
#read csv file | |
df = pd.read_csv('input.csv') | |
# convert date string to date | |
df['created_at'] = pd.to_datetime(df['created_at'], format='%Y-%m-%d %H:%M:%S',errors='coerce') | |
#group data Quarterly | |
df = df.groupby(pd.PeriodIndex(df.created_at, freq='Q')) | |
#store file locally and push to aws s3 as well | |
df.apply(lambda x: splitandpushtos3(x) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment