-
-
Save sharmaansh21/3d81b38a9863ec6a2217a08653e7a158 to your computer and use it in GitHub Desktop.
Athena Table for ALB Logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- This creates an athena table that can parse ALB logs. | |
| -- Advantage of this over others are this works when the log ends with a trailing space | |
| -- plus it also breaks the http request into route and params for easier grouping | |
| CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs ( | |
| type string, | |
| timestamp string, | |
| elb string, | |
| client_ip string, | |
| client_port int, | |
| target_ip string, | |
| target_port int, | |
| request_processing_time double, | |
| target_processing_time double, | |
| response_processing_time double, | |
| elb_status_code string, | |
| target_status_code string, | |
| received_bytes bigint, | |
| sent_bytes bigint, | |
| request_verb string, | |
| protocol string, | |
| host string, | |
| port int, | |
| route string, | |
| params string, | |
| httpversion string, | |
| user_agent string, | |
| ssl_cipher string, | |
| ssl_protocol string, | |
| target_group_arn string, | |
| trace_id string ) | |
| PARTITIONED BY(year string, month string, day string) | |
| ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' | |
| WITH SERDEPROPERTIES ( | |
| 'serialization.format' = '1','input.regex' = | |
| '(.*?) (.*?) (.*?) ([0-9.]+):([0-9]*) ([0-9.]+):([0-9]*) ([.0-9]*) ([.0-9]*) ([.0-9]*) (-|[0-9]*) (-|[0-9]*) ([-0-9]*) ([-0-9]*) "(.*?) (.*?)://(.*?):([0-9]+)([^? ]*)(\\x3f?.*?) (.*?)" "(.*?)" (.*?) (.*?) (.*?) "(.*?)" *$' | |
| ) | |
| LOCATION 's3://bucket/api-alb/external/AWSLogs/accountid/elasticloadbalancing/us-west-2/' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment