Last active
April 25, 2020 18:45
-
-
Save amwolff/0ed5f951e86c6bace72dcb9e62c2f1ea to your computer and use it in GitHub Desktop.
Apache Hive test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET hive.exec.dynamic.partition.mode = nonstrict; | |
SET hive.exec.max.dynamic.partitions = 2000; | |
SET hive.exec.max.dynamic.partitions.pernode = 500; | |
USE wolffartu; | |
DROP TABLE IF EXISTS temp_Vehicles PURGE; | |
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS temp_Vehicles ( | |
id BIGINT, | |
ts TIMESTAMP, | |
nr_radia SMALLINT, | |
nb SMALLINT, | |
numer_lini VARCHAR(3), | |
war_trasy CHAR(1), | |
kierunek CHAR(1), | |
id_kursu SMALLINT, | |
lp_przyst TINYINT, | |
droga_plan SMALLINT, | |
droga_wyko SMALLINT, | |
dlugosc DECIMAL(7, 5), | |
szerokosc DECIMAL(7, 5), | |
prev_dlugosc DECIMAL(7, 5), | |
prev_szerokosc DECIMAL(7, 5), | |
odchylenie SMALLINT, | |
odchylenie_str CHAR(9), | |
stan TINYINT, | |
plan_godz_rozp CHAR(5), | |
nast_id_kursu SMALLINT, | |
nast_plan_godz_rozp CHAR(5), | |
nast_num_lini VARCHAR(3), | |
nast_war_trasy CHAR(1), | |
nast_kierunek CHAR(1), | |
ile_sek_do_odjazdu SMALLINT, | |
typ_pojazdu CHAR(1), | |
transport CHAR(1), | |
cechy VARCHAR(3), | |
opis_tabl STRING, | |
nast_opis_tabl STRING, | |
wektor DECIMAL(5, 2)) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
STORED AS TEXTFILE | |
LOCATION '/user/wolffartu/input' | |
TBLPROPERTIES ('skip.header.line.count' = '1', 'timestamp.formats' = 'yyyy-MM-dd HH:mm:ss.SSSSSS+00', 'serialization.null.format' = '""'); | |
DROP TABLE IF EXISTS Vehicles PURGE; | |
CREATE TABLE Vehicles ( | |
route VARCHAR(3), | |
deviation SMALLINT) | |
COMMENT '1-7.05.2019 public transportation vehicles (Olsztyn/Poland)' | |
PARTITIONED BY(day TINYINT) | |
CLUSTERED BY(route) INTO 4 BUCKETS | |
STORED AS ORC; | |
INSERT OVERWRITE TABLE Vehicles PARTITION(day) | |
SELECT IF(t.numer_lini IS NOT NULL, t.numer_lini, t.nast_num_lini), t.odchylenie, from_unixtime(unix_timestamp(t.ts), 'd') FROM temp_Vehicles t WHERE ile_sek_do_odjazdu <= 0; | |
SELECT t.route as Route, t.day as Day, t.average as Average FROM ( | |
SELECT d.route, d.day, d.average, DENSE_RANK() OVER (PARTITION BY d.day ORDER BY d.average DESC) as rank FROM ( | |
SELECT route, day, AVG(deviation) AS average FROM Vehicles GROUP BY route, day) as d) as t | |
WHERE t.rank < 6 ORDER BY Day ASC, Average DESC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment