Skip to content

Instantly share code, notes, and snippets.

@amwolff
Last active April 25, 2020 18:45
Show Gist options
  • Save amwolff/0ed5f951e86c6bace72dcb9e62c2f1ea to your computer and use it in GitHub Desktop.
Save amwolff/0ed5f951e86c6bace72dcb9e62c2f1ea to your computer and use it in GitHub Desktop.
Apache Hive test
SET hive.exec.dynamic.partition.mode = nonstrict;
SET hive.exec.max.dynamic.partitions = 2000;
SET hive.exec.max.dynamic.partitions.pernode = 500;
USE wolffartu;
DROP TABLE IF EXISTS temp_Vehicles PURGE;
CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS temp_Vehicles (
id BIGINT,
ts TIMESTAMP,
nr_radia SMALLINT,
nb SMALLINT,
numer_lini VARCHAR(3),
war_trasy CHAR(1),
kierunek CHAR(1),
id_kursu SMALLINT,
lp_przyst TINYINT,
droga_plan SMALLINT,
droga_wyko SMALLINT,
dlugosc DECIMAL(7, 5),
szerokosc DECIMAL(7, 5),
prev_dlugosc DECIMAL(7, 5),
prev_szerokosc DECIMAL(7, 5),
odchylenie SMALLINT,
odchylenie_str CHAR(9),
stan TINYINT,
plan_godz_rozp CHAR(5),
nast_id_kursu SMALLINT,
nast_plan_godz_rozp CHAR(5),
nast_num_lini VARCHAR(3),
nast_war_trasy CHAR(1),
nast_kierunek CHAR(1),
ile_sek_do_odjazdu SMALLINT,
typ_pojazdu CHAR(1),
transport CHAR(1),
cechy VARCHAR(3),
opis_tabl STRING,
nast_opis_tabl STRING,
wektor DECIMAL(5, 2))
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/user/wolffartu/input'
TBLPROPERTIES ('skip.header.line.count' = '1', 'timestamp.formats' = 'yyyy-MM-dd HH:mm:ss.SSSSSS+00', 'serialization.null.format' = '""');
DROP TABLE IF EXISTS Vehicles PURGE;
CREATE TABLE Vehicles (
route VARCHAR(3),
deviation SMALLINT)
COMMENT '1-7.05.2019 public transportation vehicles (Olsztyn/Poland)'
PARTITIONED BY(day TINYINT)
CLUSTERED BY(route) INTO 4 BUCKETS
STORED AS ORC;
INSERT OVERWRITE TABLE Vehicles PARTITION(day)
SELECT IF(t.numer_lini IS NOT NULL, t.numer_lini, t.nast_num_lini), t.odchylenie, from_unixtime(unix_timestamp(t.ts), 'd') FROM temp_Vehicles t WHERE ile_sek_do_odjazdu <= 0;
SELECT t.route as Route, t.day as Day, t.average as Average FROM (
SELECT d.route, d.day, d.average, DENSE_RANK() OVER (PARTITION BY d.day ORDER BY d.average DESC) as rank FROM (
SELECT route, day, AVG(deviation) AS average FROM Vehicles GROUP BY route, day) as d) as t
WHERE t.rank < 6 ORDER BY Day ASC, Average DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment