Skip to content

Instantly share code, notes, and snippets.

@sharmaansh21
Forked from szinck/alb_logs.sql
Created August 16, 2017 10:13
Show Gist options
  • Select an option

  • Save sharmaansh21/3d81b38a9863ec6a2217a08653e7a158 to your computer and use it in GitHub Desktop.

Select an option

Save sharmaansh21/3d81b38a9863ec6a2217a08653e7a158 to your computer and use it in GitHub Desktop.
Athena Table for ALB Logs
-- This creates an athena table that can parse ALB logs.
-- Advantage of this over others are this works when the log ends with a trailing space
-- plus it also breaks the http request into route and params for easier grouping
CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs (
type string,
timestamp string,
elb string,
client_ip string,
client_port int,
target_ip string,
target_port int,
request_processing_time double,
target_processing_time double,
response_processing_time double,
elb_status_code string,
target_status_code string,
received_bytes bigint,
sent_bytes bigint,
request_verb string,
protocol string,
host string,
port int,
route string,
params string,
httpversion string,
user_agent string,
ssl_cipher string,
ssl_protocol string,
target_group_arn string,
trace_id string )
PARTITIONED BY(year string, month string, day string)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1','input.regex' =
'(.*?) (.*?) (.*?) ([0-9.]+):([0-9]*) ([0-9.]+):([0-9]*) ([.0-9]*) ([.0-9]*) ([.0-9]*) (-|[0-9]*) (-|[0-9]*) ([-0-9]*) ([-0-9]*) "(.*?) (.*?)://(.*?):([0-9]+)([^? ]*)(\\x3f?.*?) (.*?)" "(.*?)" (.*?) (.*?) (.*?) "(.*?)" *$'
)
LOCATION 's3://bucket/api-alb/external/AWSLogs/accountid/elasticloadbalancing/us-west-2/'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment