Skip to content

Instantly share code, notes, and snippets.

@yuyasugano
Last active May 15, 2020 04:52
Show Gist options
  • Save yuyasugano/7a1166c2d5ee3d848cb84a9c1c06d341 to your computer and use it in GitHub Desktop.
Save yuyasugano/7a1166c2d5ee3d848cb84a9c1c06d341 to your computer and use it in GitHub Desktop.
Data Analytics lesson example
CREATE EXTERNAL TABLE IF NOT EXISTS <Database>.<Table> (
host STRING,
identity STRING,
user STRING,
time STRING,
request STRING,
status STRING,
size STRING,
referer STRING,
agent STRING
) PARTITIONED BY (
year int,
month int,
day int,
hour int
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?",
"output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"
) LOCATION 's3://<Bucket>/'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment