--****0.13.0 to 0.14.0****
--1. hive-7784 非必须
--hive-8715 这张表在0.13.1环境已经存在,升级过程只需要加一个索引
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
`CS_ID` bigint(20) NOT NULL,
`DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
接下来使用下面的步骤完成“helloworld”版本库的初始化。 | |
本地建立一个Git版本库。 | |
$ mkdir helloworld | |
$ cd helloworld | |
$ git init | |
然后在版本库中添加示例文件,如README.md文件,内容同前。 | |
$ git add README.md | |
$ git commit -m "README for this project." | |
为版本库添加名为origin的远程版本库。 | |
$ git remote add origin [email protected]:gotgithub/helloworld.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set mapred.reduce.tasks=500; | |
drop table result; | |
create table result as select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (SELECT 'web' as channel, 'ws_bill_customer_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price | |
FROM web_sales JOIN item ON (web_sales.ws_item_sk=item.i_item_sk) JOIN date_dim on (web_sales.ws_sold_date_sk=date_dim.d_date_sk) | |
WHERE ws_bill_customer_sk IS NULL | |
) foo | |
GROUP BY channel, col_name, d_year, d_qoy, i_category | |
ORDER BY channel, col_name, d_year, d_qoy, i_category |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set mapred.reduce.tasks=500; | |
drop table result; | |
create table result as select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (SELECT 'web' as channel, 'ws_bill_customer_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price | |
FROM web_sales JOIN item ON (web_sales.ws_item_sk=item.i_item_sk) JOIN date_dim on (web_sales.ws_sold_date_sk=date_dim.d_date_sk) | |
WHERE ws_bill_customer_sk IS NULL | |
) foo | |
GROUP BY channel, col_name, d_year, d_qoy, i_category | |
ORDER BY channel, col_name, d_year, d_qoy, i_category | |
limit 100; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
程序行为: | |
这一小时里,gc线程的cpu都是100%,占用系统大量CPU,不停的进行full gc,但是进程没有oom error退出。大概一小时左右会oom error退出。 | |
程序在干什么: | |
Hive客户端程序,有一些SQL需要上亿的文件,所以这个程序需要启动,去遍历所有文件,并且缓存到内存,产生SQL job所需要的计算, | |
因而文件数太多会导致堆内存不足。希望这种情况下直接报oom退出即可。 | |
jdk: | |
jdk是1.7.0_55,不过换到jdk1.8.0_91,症状类似,都是1小时才报错oom。 |