Last active
August 3, 2016 00:02
-
-
Save yuta-imai/e7f47b865d6dbe072548575a72adca4c to your computer and use it in GitHub Desktop.
Test script for Hive with ORC. It mounts data on S3 which is provided at here: https://amplab.cs.berkeley.edu/benchmark/ as external table. Then we import it from external table to internal table.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE EXTERNAL TABLE rankings_external ( | |
pageURL VARCHAR(300), | |
pageRank INT, | |
avgDuration INT | |
) | |
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' | |
STORED AS SEQUENCEFILE | |
LOCATION 's3a://big-data-benchmark/pavlo/sequence/1node/rankings/'; | |
CREATE TABLE rankings ( | |
pageURL VARCHAR(300), | |
pageRank INT, | |
avgDuration INT | |
) | |
STORED AS ORC; | |
INSERT OVERWRITE TABLE rankings SELECT * FROM rankings_external; | |
SELECT pagerank, count(1) AS cnt FROM rankings GROUP BY pagerank ORDER BY cnt DESC LIMIT 10; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment