-
-
Save mlapida/1166b18651a185e21a08 to your computer and use it in GitHub Desktop.
import boto3 | |
import logging | |
import json | |
import gzip | |
import urllib | |
import time | |
from StringIO import StringIO | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
s3 = boto3.client('s3') | |
def lambda_handler(event, context): | |
#set the name of the S3 bucket | |
bucketS3 = 'test-flowlogs' | |
folderS3 = 'ArcSight' | |
prefixS3 = 'AW1Logs_' | |
#capture the CloudWatch log data | |
outEvent = str(event['awslogs']['data']) | |
#decode and unzip the log data | |
outEvent = gzip.GzipFile(fileobj=StringIO(outEvent.decode('base64','strict'))).read() | |
#convert the log data from JSON into a dictionary | |
cleanEvent = json.loads(outEvent) | |
#create a temp file | |
tempFile = open('/tmp/file', 'w+') | |
#Create the S3 file key | |
key = folderS3 + '/' + prefixS3 + str(int(time.time())) + ".log" | |
#loop through the events line by line | |
for t in cleanEvent['logEvents']: | |
#Transform the data and store it in the temp file. | |
tempFile.write("CEF:0|AWS CloudWatch|FlowLogs|1.0|src=" + str(t['extractedFields']['srcaddr']) + "|spt=" + str(t['extractedFields']['srcport']) + "|dst=" + str(t['extractedFields']['dstaddr']) + "|dpt=" + str(t['extractedFields']['dstport'])+ "|proto=" + str(t['extractedFields']['protocol'])+ "|start=" + str(t['extractedFields']['start'])+ "|end=" + str(t['extractedFields']['end'])+ "|out=" + str(t['extractedFields']['bytes'])+"\n") | |
#close the temp file | |
tempFile.close() | |
#write the files to s3 | |
s3Results = s3.upload_file('/tmp/file', bucketS3, key) | |
print s3Results |
Not sure if it's better/faster, but any reason why you didn't use StringIO for the body and use s3.put_object(Body=io.getvalue()) instead of actually writing the temp file?
Just not sure if the actual write is faster vs. keeping it in the StringIO object.
What should be should be written for the entire line according to you?
Been debugging.. its kinda out of date.
-
Need to change
from StringIO import StringIO
...to...
from io import StringIO -
now i get error
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 22, in lambda_handler
outEvent = str(event['awslogs']['data'])
KeyError: 'awslogs'
Any ideas on why?
Thanks
shane
{
"awslogs": {
"data": "2 006714643595 eni-265afe94 181.214.87.239 172.31.23.164 46008 3394 6 1 40 1518511341 1518511357 REJECT OK"
}
}
put this sample data
Where did you get the sample values
Howdy @mlapida,
your code is pretty cool except #40 line where the key extractedFields now is missing and you should use the message key only for the entire line.