Last active
July 6, 2020 16:07
-
-
Save onefoursix/a9e2a92269e2446f76dcd8c37ce774bd to your computer and use it in GitHub Desktop.
SDC Groovy Script to get S3 File
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.amazonaws.services.s3.AmazonS3; | |
import com.amazonaws.services.s3.model.S3Object; | |
import com.amazonaws.services.s3.model.S3ObjectInputStream; | |
records = sdc.records | |
for (record in records) { | |
S3ObjectInputStream s3is | |
FileOutputStream fos | |
try { | |
// Get the S3 Bucket from the record | |
s3Bucket = record.value['S3_Bucket'] | |
// Get the S3 file path from the record | |
s3FilePath= record.value['S3_Path'][0] | |
// Extract the S3 file name from the full file path | |
fileName = s3FilePath.substring(s3FilePath.lastIndexOf('/') + 1, s3FilePath.length()) | |
// Create a private directory for each file to be downloaded | |
millis = new Date().getTime() | |
def dir = new File(sdc.userParams["TMP_FILE_DIR"] + "/" + fileName + "-" + millis) | |
dir.mkdir() | |
fileDir = dir.getAbsolutePath() | |
// Generate full path for file download | |
filePath = fileDir + "/" + fileName | |
// Download the S3 object (assumed to be a gzip file) and write it the local file system | |
s3object = sdc.state['s3'].getObject(s3Bucket, s3FilePath); | |
s3is = s3object.getObjectContent(); | |
// We'll write the file with a .tmp suffix and trim the suffix when the file is fully written | |
fos = new FileOutputStream(new File(filePath + ".tmp")); | |
byte[] buffer = new byte[1024]; | |
readLength = 0; | |
while ((readLength = s3is.read(buffer)) > 0) { | |
fos.write(buffer, 0, readLength); | |
} | |
s3is.close(); | |
fos.close(); | |
// Trim the .tmp suffix now that the file is fully written | |
def file = new File(filePath + ".tmp") | |
file.renameTo(filePath) | |
// Add the local file and path to the record | |
record.value["fileDir"] = fileDir | |
record.value["fileName"] = fileName | |
// Send the record to the next stage | |
output.write(record) | |
} catch (Exception e){ | |
sdc.log.error(e.toString(), e) | |
sdc.error.write(record, e.toString()) | |
} finally { | |
try { | |
s3is.close(); | |
fos.close(); | |
} catch (e) { | |
// swallow | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment