Created
May 7, 2017 04:44
-
-
Save jamesrajendran/ee21a7329450cfed0a1a78e961216fcc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hive> set mapreduce.framework.name=local | |
display hive database name: set hive.cli.print.current.db=true; | |
DESCRIBE EXTENDED husn_small; --to get statistics | |
Analyze table husn_small compute statistics; | |
create table snpn(sn String, pn String) | |
LOAD DATA INPATH 'hdfs://127200813master.eap.g4ihos.itcs.hpecorp.net:8020/user/centos7/test_data/snpn' append INTO TABLE snpn | |
----------------------ORC in hive performance tuning-------------------------------------- | |
TEZ | |
CREATE TABLE A_ORC (customerID int, name string, age int, address string ) STORED AS ORC tblproperties (“orc.compress" = “SNAPPY”); | |
INSERT INTO TABLE A_ORC SELECT * FROM A; | |
set hive.vectorized.execution.enabled = true; | |
set hive.vectorized.execution.reduce.enabled = true; | |
data should be stored in ORC format | |
--enbale CBO--------------- | |
set hive.cbo.enbale=true; | |
set hive.compute.query.using.stats=true; | |
set hive.stats.fetch.column.stats=true; | |
set hive.stats.fetch.partition.stats=true; | |
analyze table tweets compute statistics for columns; | |
----window function------------ | |
SELECT * FROM | |
(SELECT *, RANK() over (partition by sessionID,order by timestamp desc) as rank FROM clicks) ranked_clicks | |
WHERE ranked_clicks.rank=1; | |
-----avoid global sort------------------------ | |
SELECT id, name, salary, dept FROM employee | |
DISTRIBUTE BY dept --custom partition like | |
SORT BY id ASC, name DESC; | |
--------------------parallel ------------- | |
<name>hive.exec.parallel</name> | |
<name>hive.exec.parallel.thread.number</name> | |
<value>8</value> | |
---------------- multi group by single reducer------------------ | |
<name>hive.multigroupby.singlereducer</name> | |
<value>true</value> | |
<description> | |
Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has | |
common group by keys, it will be optimized to generate single M/R job. | |
--------------------bucketing---------------------------- | |
uses hash partitioning | |
useful when cardinality of data is high | |
result in fixed number of files as opposed to Partitioning | |
it's similar to clustering. | |
recommended not to use | |
-----------------functions----------------- | |
show functions | |
show functions ".*date.*" | |
describe function <function name> | |
describe function extended <function name> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment