Skip to content

Instantly share code, notes, and snippets.

@eoinsha
Created March 19, 2021 15:41
Show Gist options
  • Save eoinsha/cad92b827737ddde7c628a1a9a8ea87a to your computer and use it in GitHub Desktop.
Save eoinsha/cad92b827737ddde7c628a1a9a8ea87a to your computer and use it in GitHub Desktop.
obj_get_ctx = event['getObjectContext']
request_route = obj_get_ctx['outputRoute']
request_token = obj_get_ctx['outputToken']
obj_url = obj_get_ctx['inputS3Url']
requested_url = event['userRequest']['url']
path = Path(urlparse(requested_url).path).relative_to('/')
# First attempt to read directly using the signed URL provided
response = requests.get(obj_url)
resp = {'StatusCode': response.status_code}
if response.status_code == 404 and path.suffix == '.parquet':
# Load CSV and convert to Parquet.
csv_key = str(path.with_suffix('.csv'))
try:
csv_body = s3_client.get_object(Bucket=bucket_name, Key=csv_key)['Body']
resp['Body'] = pd.read_csv(csv_body).to_parquet()
resp['StatusCode'] = 200
except botocore.exceptions.ClientError as error:
# If the CSV could not be read, propagate that error
resp['ErrorCode'] = error.response['Error']['Code']
resp['StatusCode'] = error.response['ResponseMetadata']['HTTPStatusCode']
resp['ErrorMessage'] = error.response['Error']['Message']
else:
resp['Body'] = response.content
s3_client.write_get_object_response(
RequestRoute=request_route,
RequestToken=request_token,
**resp
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment