Last active
July 11, 2023 13:20
-
-
Save rdkls/b0cea7202067976525bb962b2788a735 to your computer and use it in GitHub Desktop.
athena table create statement for one bucket holding multiple load balancer access logs (default example is limited to one table per alb)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Note the projection for api/alb name is an enum not 'injected' to enable querying on it | |
CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs ( | |
type string, | |
time string, | |
elb string, | |
client_ip string, | |
client_port int, | |
target_ip string, | |
target_port int, | |
request_processing_time double, | |
target_processing_time double, | |
response_processing_time double, | |
elb_status_code int, | |
target_status_code string, | |
received_bytes bigint, | |
sent_bytes bigint, | |
request_verb string, | |
request_url string, | |
request_proto string, | |
user_agent string, | |
ssl_cipher string, | |
ssl_protocol string, | |
target_group_arn string, | |
trace_id string, | |
domain_name string, | |
chosen_cert_arn string, | |
matched_rule_priority string, | |
request_creation_time string, | |
actions_executed string, | |
redirect_url string, | |
lambda_error_reason string, | |
target_port_list string, | |
target_status_code_list string, | |
classification string, | |
classification_reason string | |
) | |
PARTITIONED BY | |
( | |
api_name STRING, | |
day STRING | |
) | |
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' | |
WITH SERDEPROPERTIES ( | |
'serialization.format' = '1', | |
'input.regex' = | |
'([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\s]+?)\" \"([^\s]+)\" \"([^ ]*)\" \"([^ ]*)\"') | |
LOCATION 's3://MY_ALB_ACCESS_LOGS/' | |
TBLPROPERTIES | |
( | |
"projection.enabled" = "true", | |
"projection.api_name.type" = "enum", | |
"projection.api_name.values" = "a,list,of,toplevel,folders,each,lb,creates", | |
"projection.day.type" = "date", | |
"projection.day.range" = "2022/12/05,NOW", | |
"projection.day.format" = "yyyy/MM/dd", | |
"projection.day.interval" = "1", | |
"projection.day.interval.unit" = "DAYS", | |
"storage.location.template" = "s3://MY_ALB_ACCESS_LOGS/${api_name}/AWSLogs/MY_AWS_ACCOUNT_ID/elasticloadbalancing/ap-southeast-2/${day}" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment