Last active
July 5, 2019 16:03
-
-
Save statik/1172a9ced6023b3a18bf37c634f83832 to your computer and use it in GitHub Desktop.
serverless siem fixes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LambdaSortGDFindings: | |
Type: 'AWS::Lambda::Function' | |
Condition: AuditAccountPrimary # only in primary audit region, next to S3 bucket | |
Properties: | |
Code: | |
ZipFile: | | |
import boto3, csv, os, json | |
s3_resource = boto3.resource('s3') | |
s3_client = boto3.client('s3') | |
def sanitize_key_names(finding): | |
"""AWS Athena does not allow column names to contain special characters | |
other than underscores. Some GuardDuty event findings contain json dicts | |
with :: in the key names. AWS Glue will crawl these findings and create | |
schemas that Athena will crash on them with HIVE_METASTORE errors. | |
As of July 2019, the 5 GuardDuty finding types that contain these fields are | |
stealth_iamuser_cloudtrailloggingdisabled | |
recon_iamuser_maliciousipcaller | |
stealth_iamuser_loggingconfigurationmodified | |
unauthorizedaccess_iamuser_consolelogin | |
privilegeescalation_iamuser_administrativepermissions | |
To view the problematic fields, you can use jq | |
`cat finding.json|jq '.detail.service.action.awsApiCallAction.affectedResources' | |
This will show output like: | |
{ | |
"AWS::IAM::User": "GeneratedFindingIAMUser", | |
"AWS::IAM::Role": "GeneratedFindingIAMRole" | |
} | |
This function recurses through a dictionary replacing any '::' in key names | |
with '_'. | |
""" | |
if type(finding) is dict: | |
for key in finding.keys(): | |
if type(finding[key]) is dict or type(finding[key]) is list: | |
finding[key] = sanitize_key_names(finding[key]) | |
if "::" in key: | |
new_key = key.replace("::", "_") | |
finding[new_key] = finding.pop(key) | |
elif type(finding) is list: | |
for entry in finding: | |
entry = sanitize_key_names(entry) | |
return finding | |
def lambda_handler(event, context): | |
print(event) | |
record_count = 0 | |
for record in event['Records']: | |
bucket = record['s3']['bucket']['name'] | |
object_key = record['s3']['object']['key'] | |
partition = '/'.join(object_key.split('/')[2:-1]) | |
response = s3_client.get_object(Bucket=bucket, Key=object_key) | |
findings = '['+ response['Body'].read().decode('utf-8').replace('}{','},\n{') +']' | |
findings_list = json.loads(findings) | |
record_count += len(findings_list) | |
output = {} | |
for item in findings_list: | |
fixed_item = sanitize_key_names(item) | |
if fixed_iitem['detail']['type'] not in output: | |
output[item['detail']['type']] = [fixed_item] | |
else: | |
output[fixed_item['detail']['type']].append(fixed_item) | |
for finding_type in output: | |
print(object_key.split('/')[-1]) | |
s3_path = 'raw/by_finding_type/' + '_'.join(finding_type.split('/')) + '/' + partition + '/' + object_key.split('/')[-1] + '.json' | |
body = '' | |
for version in output[finding_type]: | |
body += json.dumps(version) + '\n' | |
s3_resource.Bucket(bucket).put_object(Key=s3_path, Body=body) | |
return 'Processed: ' + str(record_count) + ' logs' | |
Handler: index.lambda_handler | |
Runtime: python3.6 | |
Description: 'Function sorts findings by type and places them into appropriate bucket' | |
MemorySize: 128 | |
Timeout: 300 | |
Role: | |
Fn::GetAtt: | |
- LambdaSortRole | |
- Arn |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment