thanoojgithub · May 29, 2018 08:02 · thanoojgithub · Jan 11, 2016
diff --git a/Hive_WC.hql b/Hive_WC.hql
 hive> CREATE TABLE thanooj.docs (line STRING);
 OK
 Time taken: 0.06 seconds
 hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/abc.txt' OVERWRITE INTO TABLE THANOOJ.docs;
 Loading data to table thanooj.docs
 Table thanooj.docs stats: [numFiles=1, numRows=0, totalSize=57, rawDataSize=0]
 OK
 Time taken: 0.161 seconds

 hive> select * from thanooj.docs;
 OK
 to be or not to be
 to be or not to be
 to be or not to be
 Time taken: 0.057 seconds, Fetched: 3 row(s)

 hive> SELECT split(line, ' ') AS word FROM docs;
 OK
 ["to","be","or","not","to","be"]
 ["to","be","or","not","to","be"]
 ["to","be","or","not","to","be"]
 Time taken: 0.069 seconds, Fetched: 3 row(s)

 hive> SELECT explode(split(line, ' ')) AS word FROM docs;
 OK
 to
 be
 or
 not
 to
 be
 to
 be
 or
 not
 to
 be
 to
 be
 or
 not
 to
 be
 Time taken: 0.079 seconds, Fetched: 18 row(s)

 hive> CREATE TABLE thanooj.word_counts AS SELECT word, count(word) AS count FROM (SELECT explode(split(line, ' ')) AS word FROM thanooj.docs) w GROUP BY word ORDER BY word;
 Query ID = ubuntu_20160111174054_e5685e76-c6f3-4bcf-93f0-5f566e473654
 Total jobs = 2
 Launching Job 1 out of 2
 Number of reduce tasks not specified. Estimated from input data size: 1
 In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
 In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
 In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
 Starting Job = job_1452510713608_0011, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0011/
 Kill Command = /usr/local/hadoop2/bin/hadoop job  -kill job_1452510713608_0011
 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
 2016-01-11 17:41:00,586 Stage-1 map = 0%,  reduce = 0%
 2016-01-11 17:41:07,125 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.12 sec
 2016-01-11 17:41:14,613 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 1.93 sec
 MapReduce Total cumulative CPU time: 1 seconds 930 msec
 Ended Job = job_1452510713608_0011
 Launching Job 2 out of 2
 Number of reduce tasks determined at compile time: 1
 In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
 In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
 In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
 Starting Job = job_1452510713608_0012, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0012/
 Kill Command = /usr/local/hadoop2/bin/hadoop job  -kill job_1452510713608_0012
 Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
 2016-01-11 17:41:25,553 Stage-2 map = 0%,  reduce = 0%
 2016-01-11 17:41:31,045 Stage-2 map = 100%,  reduce = 0%, Cumulative CPU 0.67 sec
 2016-01-11 17:41:37,431 Stage-2 map = 100%,  reduce = 100%, Cumulative CPU 1.52 sec
 MapReduce Total cumulative CPU time: 1 seconds 520 msec
 Ended Job = job_1452510713608_0012
 Moving data to: hdfs://localhost:54310/user/hive/warehouse/thanooj.db/word_counts
 Table thanooj.word_counts stats: [numFiles=1, numRows=4, totalSize=21, rawDataSize=17]
 MapReduce Jobs Launched: 
 Stage-Stage-1: Map: 1  Reduce: 1   Cumulative CPU: 1.93 sec   HDFS Read: 7200 HDFS Write: 181 SUCCESS
 Stage-Stage-2: Map: 1  Reduce: 1   Cumulative CPU: 1.52 sec   HDFS Read: 4516 HDFS Write: 96 SUCCESS
 Total MapReduce CPU Time Spent: 3 seconds 450 msec
 OK
 Time taken: 44.291 seconds
 hive> select * from thanooj.word_counts;
 OK
 be	6
 not	3
 or	3
 to	6
 Time taken: 0.068 seconds, Fetched: 4 row(s)
 hive>






 ubuntu@ubuntu:~/input$ hadoop fs -ls

 ubuntu@ubuntu:~/input$ hadoop fs -ls /
 Found 2 items
 drwx-wx-wx   - ubuntu supergroup          0 2016-01-11 17:22 /tmp
 drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:13 /user
 ubuntu@ubuntu:~/input$ hadoop fs -ls /user

 Found 1 items
 drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:13 /user/hive


 ubuntu@ubuntu:~/input$ hadoop fs -mkdir /user/ubuntu
 ubuntu@ubuntu:~/input$ hadoop fs -ls
 ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive
 Found 1 items
 drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:14 /user/hive/warehouse
 ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse
 Found 2 items
 drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:13 /user/hive/warehouse/docs
 drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db
 ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db
 Found 2 items
 drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs
 drwxr-xr-x   - ubuntu supergroup          0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts
 ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/docs
 Found 1 items
 -rwxr-xr-x   1 ubuntu supergroup         57 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs/abc.txt


 ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/docs/abc.txt
 to be or not to be
 to be or not to be
 to be or not to be

 ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/word_counts
 Found 1 items
 -rwxr-xr-x   1 ubuntu supergroup         21 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts/000000_0

 ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/word_counts/000000_0
 16/01/11 17:57:59 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 be6
 not3
 or3
 to6
 ubuntu@ubuntu:~/input$
	hive> CREATE TABLE thanooj.docs (line STRING);
	OK
	Time taken: 0.06 seconds
	hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/abc.txt' OVERWRITE INTO TABLE THANOOJ.docs;
	Loading data to table thanooj.docs
	Table thanooj.docs stats: [numFiles=1, numRows=0, totalSize=57, rawDataSize=0]
	OK
	Time taken: 0.161 seconds

	hive> select * from thanooj.docs;
	OK
	to be or not to be
	to be or not to be
	to be or not to be
	Time taken: 0.057 seconds, Fetched: 3 row(s)

	hive> SELECT split(line, ' ') AS word FROM docs;
	OK
	["to","be","or","not","to","be"]
	["to","be","or","not","to","be"]
	["to","be","or","not","to","be"]
	Time taken: 0.069 seconds, Fetched: 3 row(s)

	hive> SELECT explode(split(line, ' ')) AS word FROM docs;
	OK
	to
	be
	or
	not
	to
	be
	to
	be
	or
	not
	to
	be
	to
	be
	or
	not
	to
	be
	Time taken: 0.079 seconds, Fetched: 18 row(s)

	hive> CREATE TABLE thanooj.word_counts AS SELECT word, count(word) AS count FROM (SELECT explode(split(line, ' ')) AS word FROM thanooj.docs) w GROUP BY word ORDER BY word;
	Query ID = ubuntu_20160111174054_e5685e76-c6f3-4bcf-93f0-5f566e473654
	Total jobs = 2
	Launching Job 1 out of 2
	Number of reduce tasks not specified. Estimated from input data size: 1
	In order to change the average load for a reducer (in bytes):
	set hive.exec.reducers.bytes.per.reducer=<number>
	In order to limit the maximum number of reducers:
	set hive.exec.reducers.max=<number>
	In order to set a constant number of reducers:
	set mapreduce.job.reduces=<number>
	Starting Job = job_1452510713608_0011, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0011/
	Kill Command = /usr/local/hadoop2/bin/hadoop job -kill job_1452510713608_0011
	Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
	2016-01-11 17:41:00,586 Stage-1 map = 0%, reduce = 0%
	2016-01-11 17:41:07,125 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.12 sec
	2016-01-11 17:41:14,613 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 1.93 sec
	MapReduce Total cumulative CPU time: 1 seconds 930 msec
	Ended Job = job_1452510713608_0011
	Launching Job 2 out of 2
	Number of reduce tasks determined at compile time: 1
	In order to change the average load for a reducer (in bytes):
	set hive.exec.reducers.bytes.per.reducer=<number>
	In order to limit the maximum number of reducers:
	set hive.exec.reducers.max=<number>
	In order to set a constant number of reducers:
	set mapreduce.job.reduces=<number>
	Starting Job = job_1452510713608_0012, Tracking URL = http://ubuntu:8088/proxy/application_1452510713608_0012/
	Kill Command = /usr/local/hadoop2/bin/hadoop job -kill job_1452510713608_0012
	Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
	2016-01-11 17:41:25,553 Stage-2 map = 0%, reduce = 0%
	2016-01-11 17:41:31,045 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.67 sec
	2016-01-11 17:41:37,431 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 1.52 sec
	MapReduce Total cumulative CPU time: 1 seconds 520 msec
	Ended Job = job_1452510713608_0012
	Moving data to: hdfs://localhost:54310/user/hive/warehouse/thanooj.db/word_counts
	Table thanooj.word_counts stats: [numFiles=1, numRows=4, totalSize=21, rawDataSize=17]
	MapReduce Jobs Launched:
	Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 1.93 sec HDFS Read: 7200 HDFS Write: 181 SUCCESS
	Stage-Stage-2: Map: 1 Reduce: 1 Cumulative CPU: 1.52 sec HDFS Read: 4516 HDFS Write: 96 SUCCESS
	Total MapReduce CPU Time Spent: 3 seconds 450 msec
	OK
	Time taken: 44.291 seconds
	hive> select * from thanooj.word_counts;
	OK
	be 6
	not 3
	or 3
	to 6
	Time taken: 0.068 seconds, Fetched: 4 row(s)
	hive>






	ubuntu@ubuntu:~/input$ hadoop fs -ls

	ubuntu@ubuntu:~/input$ hadoop fs -ls /
	Found 2 items
	drwx-wx-wx - ubuntu supergroup 0 2016-01-11 17:22 /tmp
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:13 /user
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user

	Found 1 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:13 /user/hive


	ubuntu@ubuntu:~/input$ hadoop fs -mkdir /user/ubuntu
	ubuntu@ubuntu:~/input$ hadoop fs -ls
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive
	Found 1 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:14 /user/hive/warehouse
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse
	Found 2 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:13 /user/hive/warehouse/docs
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db
	Found 2 items
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs
	drwxr-xr-x - ubuntu supergroup 0 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts
	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/docs
	Found 1 items
	-rwxr-xr-x 1 ubuntu supergroup 57 2016-01-11 17:28 /user/hive/warehouse/thanooj.db/docs/abc.txt


	ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/docs/abc.txt
	to be or not to be
	to be or not to be
	to be or not to be

	ubuntu@ubuntu:~/input$ hadoop fs -ls /user/hive/warehouse/thanooj.db/word_counts
	Found 1 items
	-rwxr-xr-x 1 ubuntu supergroup 21 2016-01-11 17:41 /user/hive/warehouse/thanooj.db/word_counts/000000_0

	ubuntu@ubuntu:~/input$ hadoop fs -cat /user/hive/warehouse/thanooj.db/word_counts/000000_0
	16/01/11 17:57:59 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
	be6
	not3
	or3
	to6
	ubuntu@ubuntu:~/input$