thanoojgithub · March 4, 2016 07:17
diff --git a/row_number() ranking in Hive - DE-DUPLICATION b/row_number() ranking in Hive - DE-DUPLICATION
 ubuntu@ubuntu:~$ jps
 3330 Jps
 ubuntu@ubuntu:~$ start-dfs.sh
 16/03/03 21:47:17 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 Starting namenodes on [localhost]
 localhost: starting namenode, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/hadoop-ubuntu-namenode-ubuntu.out
 localhost: starting datanode, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/hadoop-ubuntu-datanode-ubuntu.out
 Starting secondary namenodes [0.0.0.0]
 0.0.0.0: starting secondarynamenode, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/hadoop-ubuntu-secondarynamenode-ubuntu.out
 16/03/03 21:47:34 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 ubuntu@ubuntu:~$ start-yarn.sh
 starting yarn daemons
 starting resourcemanager, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/yarn-ubuntu-resourcemanager-ubuntu.out
 localhost: starting nodemanager, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/yarn-ubuntu-nodemanager-ubuntu.out

 ubuntu@ubuntu:~$ jps
 4198 NodeManager
 3488 NameNode
 6932 Jps
 3853 SecondaryNameNode
 4058 ResourceManager
 3622 DataNode

 ubuntu@ubuntu:~$ hive
 Logging initialized using configuration in file:/home/ubuntu/softwares/hive-1.2.1/conf/hive-log4j.properties

 hive> show schemas;
 OK
 default
 thanooj
 Time taken: 1.161 seconds, Fetched: 2 row(s)
 hive> use  thanooj;
 OK
 Time taken: 0.026 seconds

 hive> create EXTERNAL table thanooj.cust_credit (ID INT, NAME STRING, doa STRING, loation STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';

 hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/input/cust_credit.txt' OVERWRITE INTO TABLE thanooj.cust_credit;

 hive> set hive.cli.print.header=true;

 hive> select * from thanooj.cust_credit;
 OK
 cust_credit.id	cust_credit.name	cust_credit.doa	cust_credit.location
 1	sriram	2015-10-12	ayodhya
 2	seeta	2015-09-12	midhila
 3	lakshman	2015-11-12	ayodhya
 4	bharata	2015-12-12	ayodhya
 5	sathrugna	2015-12-12	ayodhya
 6	hanuma	2015-11-12	ayodhya
 7	sriram	2015-10-12	ayodhya
 8	seeta	2015-09-12	midhila
 9	lakshman	2015-11-12	ayodhya
 10	bharata	2015-12-12	ayodhya
 11	sathrugna	2015-12-12	ayodhya
 12	hanuma	2015-11-12	ayodhya
 Time taken: 0.081 seconds, Fetched: 12 row(s)
 hive>  SELECT * FROM (SELECT *,row_number() over (partition by name) AS row_nm FROM thanooj.cust_credit)c WHERE c.row_nm = 1;
 Query ID = ubuntu_20160303220426_04a07d31-f04a-4ae0-b133-d7cef22b557c
 Total jobs = 1
 Launching Job 1 out of 1
 Number of reduce tasks not specified. Estimated from input data size: 1
 In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
 In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
 In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
 Starting Job = job_1457070465534_0002, Tracking URL = http://ubuntu:8088/proxy/application_1457070465534_0002/
 Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job  -kill job_1457070465534_0002
 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
 2016-03-03 22:04:33,171 Stage-1 map = 0%,  reduce = 0%
 2016-03-03 22:04:46,716 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.04 sec
 2016-03-03 22:04:52,984 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 3.15 sec
 MapReduce Total cumulative CPU time: 3 seconds 150 msec
 Ended Job = job_1457070465534_0002
 MapReduce Jobs Launched: 
 Stage-Stage-1: Map: 1  Reduce: 1   Cumulative CPU: 3.15 sec   HDFS Read: 9522 HDFS Write: 188 SUCCESS
 Total MapReduce CPU Time Spent: 3 seconds 150 msec
 OK
 10	bharata	2015-12-12	ayodhya	1
 12	hanuma	2015-11-12	ayodhya	1
 9	lakshman	2015-11-12	ayodhya	1
 11	sathrugna	2015-12-12	ayodhya	1
 8	seeta	2015-09-12	midhila	1
 7	sriram	2015-10-12	ayodhya	1
 Time taken: 27.702 seconds, Fetched: 6 row(s)
	ubuntu@ubuntu:~$ jps
	3330 Jps
	ubuntu@ubuntu:~$ start-dfs.sh
	16/03/03 21:47:17 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
	Starting namenodes on [localhost]
	localhost: starting namenode, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/hadoop-ubuntu-namenode-ubuntu.out
	localhost: starting datanode, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/hadoop-ubuntu-datanode-ubuntu.out
	Starting secondary namenodes [0.0.0.0]
	0.0.0.0: starting secondarynamenode, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/hadoop-ubuntu-secondarynamenode-ubuntu.out
	16/03/03 21:47:34 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
	ubuntu@ubuntu:~$ start-yarn.sh
	starting yarn daemons
	starting resourcemanager, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/yarn-ubuntu-resourcemanager-ubuntu.out
	localhost: starting nodemanager, logging to /home/ubuntu/softwares/hadoop-2.7.2/logs/yarn-ubuntu-nodemanager-ubuntu.out

	ubuntu@ubuntu:~$ jps
	4198 NodeManager
	3488 NameNode
	6932 Jps
	3853 SecondaryNameNode
	4058 ResourceManager
	3622 DataNode

	ubuntu@ubuntu:~$ hive
	Logging initialized using configuration in file:/home/ubuntu/softwares/hive-1.2.1/conf/hive-log4j.properties

	hive> show schemas;
	OK
	default
	thanooj
	Time taken: 1.161 seconds, Fetched: 2 row(s)
	hive> use thanooj;
	OK
	Time taken: 0.026 seconds

	hive> create EXTERNAL table thanooj.cust_credit (ID INT, NAME STRING, doa STRING, loation STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';

	hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/input/cust_credit.txt' OVERWRITE INTO TABLE thanooj.cust_credit;

	hive> set hive.cli.print.header=true;

	hive> select * from thanooj.cust_credit;
	OK
	cust_credit.id cust_credit.name cust_credit.doa cust_credit.location
	1 sriram 2015-10-12 ayodhya
	2 seeta 2015-09-12 midhila
	3 lakshman 2015-11-12 ayodhya
	4 bharata 2015-12-12 ayodhya
	5 sathrugna 2015-12-12 ayodhya
	6 hanuma 2015-11-12 ayodhya
	7 sriram 2015-10-12 ayodhya
	8 seeta 2015-09-12 midhila
	9 lakshman 2015-11-12 ayodhya
	10 bharata 2015-12-12 ayodhya
	11 sathrugna 2015-12-12 ayodhya
	12 hanuma 2015-11-12 ayodhya
	Time taken: 0.081 seconds, Fetched: 12 row(s)
	hive> SELECT * FROM (SELECT *,row_number() over (partition by name) AS row_nm FROM thanooj.cust_credit)c WHERE c.row_nm = 1;
	Query ID = ubuntu_20160303220426_04a07d31-f04a-4ae0-b133-d7cef22b557c
	Total jobs = 1
	Launching Job 1 out of 1
	Number of reduce tasks not specified. Estimated from input data size: 1
	In order to change the average load for a reducer (in bytes):
	set hive.exec.reducers.bytes.per.reducer=<number>
	In order to limit the maximum number of reducers:
	set hive.exec.reducers.max=<number>
	In order to set a constant number of reducers:
	set mapreduce.job.reduces=<number>
	Starting Job = job_1457070465534_0002, Tracking URL = http://ubuntu:8088/proxy/application_1457070465534_0002/
	Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job -kill job_1457070465534_0002
	Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
	2016-03-03 22:04:33,171 Stage-1 map = 0%, reduce = 0%
	2016-03-03 22:04:46,716 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.04 sec
	2016-03-03 22:04:52,984 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 3.15 sec
	MapReduce Total cumulative CPU time: 3 seconds 150 msec
	Ended Job = job_1457070465534_0002
	MapReduce Jobs Launched:
	Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 3.15 sec HDFS Read: 9522 HDFS Write: 188 SUCCESS
	Total MapReduce CPU Time Spent: 3 seconds 150 msec
	OK
	10 bharata 2015-12-12 ayodhya 1
	12 hanuma 2015-11-12 ayodhya 1
	9 lakshman 2015-11-12 ayodhya 1
	11 sathrugna 2015-12-12 ayodhya 1
	8 seeta 2015-09-12 midhila 1
	7 sriram 2015-10-12 ayodhya 1
	Time taken: 27.702 seconds, Fetched: 6 row(s)