Last active
June 6, 2019 15:29
-
-
Save angrychimp/a56d68271c4e56f6ed184682d9edcc83 to your computer and use it in GitHub Desktop.
Creating Application ELB Athena tables (updated as of June 2019)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Create the partitioned table | |
| CREATE EXTERNAL TABLE IF NOT EXISTS {SCHEMA}.{TABLE_NAME} ( | |
| type string, | |
| time string, | |
| alb_id string, | |
| alb_name string, | |
| client_ip string, | |
| client_port int, | |
| target_ip string, | |
| request_processing_time double, | |
| target_processing_time double, | |
| response_processing_time double, | |
| elb_status_code int, | |
| target_status_code string, | |
| received_bytes int, | |
| sent_bytes int, | |
| request_verb string, | |
| request_url string, | |
| request_proto string, | |
| user_agent string, | |
| ssl_cipher string, | |
| ssl_protocol string, | |
| target_group_arn string, | |
| trace_id string, | |
| hostname string, | |
| ssl_certificate string, | |
| matched_rule_priority int, | |
| request_creation_time string, | |
| actions_executed string, | |
| redirect_url string, | |
| error_reason string | |
| ) | |
| PARTITIONED BY(dt date) | |
| ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' | |
| WITH SERDEPROPERTIES ( | |
| 'serialization.format' = '1', | |
| 'input.regex' = '([^ ]+) ([^ ]+) (app/([^ ]+)/[^ ]+) ([0-9.]+):([0-9]+) ([0-9.:-]+) ([0-9.-]+) ([0-9.-]+) ([0-9.-]+) ([0-9-]+) ([0-9-]+) ([0-9-]+) ([0-9-]+) \"([^ ]*) ([^ ]*) ([^ ]*)\" \"([^\"]*)\" ([^ ]+) ([^ ]+) ([^ ]+) \"([^ ]+)\" \"([^ ]+)\" \"([^ ]+)\" ([0-9-]+) ([^ ]+) \"([^ ]+)\" \"([^ ]+)\" \"([^ ]+)\".*' | |
| ) LOCATION 's3://{BUCKET_NAME}/AWSLogs/{ACCOUNT_ID}/elasticloadbalancing/{REGION}/'; | |
| # Create a partition | |
| ALTER TABLE sampledb.appelb add partition (dt=date('2019-01-01')) | |
| LOCATION "s3://{BUCKET_NAME}/AWSLogs/{ACCOUNT_ID}/elasticloadbalancing/{REGION}/2019/01/01/"; |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This assumes you have permission to read the S3 prefix (required for Athena - it uses your permissions). This is based on the current app ELB log structure where SNI is deployed. This format will not work for classic ELBs.