Created
January 1, 2016 20:33
-
-
Save xyu/5a1ee488a286627ac043 to your computer and use it in GitHub Desktop.
Hive query to create table for parsing Nginx logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Table for raw NGINX logs | |
CREATE EXTERNAL TABLE IF NOT EXISTS `raw_nginx_logs` ( | |
`remote_addr` string, | |
`remote_user` string, | |
`time_local` string, | |
`http_verb` string, | |
`url` string, | |
`http_ver` string, | |
`status` int, | |
`body_bytes_sent` int, | |
`http_referer` string, | |
`http_user_agent` string | |
) | |
-- Use RegEx to parse each line | |
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' | |
WITH serdeproperties( | |
"input.regex" = "([0-9\\.]+) - ([^ ]*) \\[([^\\]]*)\\] \"([^ ]*) ([^ ]*) ([^ ]*)\" ([0-9]*) ([0-9]*) \"(.*)\" \"(.*)\", | |
"output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s" | |
) | |
-- This also supports gzip, just have to have a .gz extension | |
STORED AS TEXTFILE | |
-- Be explicit where we are going to store this table | |
LOCATION 'hdfs://nameservice/user/hive/warehouse/raw_nginx_logs'; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tried it and I received this error
FAILED: ParseException line 17:3 mismatched input 'output' expecting ) near '"([0-9\\.]+) - ([^ ]*) \\[([^\\]]*)\\] \"([^ ]*) ([^ ]*) ([^ ]*)\" ([0-9]*) ([0-9]*) \"(.*)\" \"(.*)\",