Last active
September 26, 2021 04:13
-
-
Save thanoojgithub/9002bdc515d9f473160e04094ab82d81 to your computer and use it in GitHub Desktop.
File Formats in Apache HIVE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[mapr@maprdemo work]$ cat /home/mapr/work/cust_credit_ext.txt | |
1,sriram,2015-10-12,ayodhya | |
2,seeta,2015-09-12,midhila | |
3,lakshman,2015-11-12,ayodhya | |
4,bharata,2015-12-12,ayodhya | |
5,sathrugna,2015-12-12,ayodhya | |
6,hanuma,2015-11-12,ayodhya | |
7,sriram,2015-10-12,ayodhya | |
8,seeta,2015-09-12,midhila | |
9,lakshman,2015-11-12,ayodhya | |
10,bharata,2015-12-12,ayodhya | |
11,sathrugna,2015-12-12,ayodhya | |
12,hanuma,2015-11-12,ayodhya | |
[mapr@maprdemo work]$ ls -ltr | |
total 4 | |
-rwxrwxrwx 1 mapr mapr 349 May 17 21:33 cust_credit_ext.txt | |
[mapr@maprdemo ~]$ hadoop fs -mkdir /user/mapr/work | |
[mapr@maprdemo ~]$ hadoop fs -mkdir /user/mapr/work/input | |
[mapr@maprdemo ~]$ hadoop fs -put /home/mapr/work/cust_credit_ext.txt /user/mapr/work/input/ | |
[mapr@maprdemo ~]$ hadoop fs -ls /user/mapr/work/input/ | |
Found 1 items | |
-rwxr-xr-x 1 mapr mapr 349 2017-05-17 22:00 /user/mapr/work/input/cust_credit_ext.txt | |
[mapr@maprdemo ~]$ | |
hive> CREATE EXTERNAL TABLE thanooj.cust_credit_ext (id int, name string, doa string, location string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE LOCATION '/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit/'; | |
OK | |
Time taken: 0.064 seconds | |
hive> desc cust_credit_ext; | |
OK | |
id int | |
name string | |
doa string | |
location string | |
Time taken: 0.09 seconds, Fetched: 4 row(s) | |
hive> select * from thanooj.cust_credit_ext; | |
OK | |
1 sriram 2015-10-12 ayodhya | |
2 seeta 2015-09-12 midhila | |
3 lakshman 2015-11-12 ayodhya | |
4 bharata 2015-12-12 ayodhya | |
5 sathrugna 2015-12-12 ayodhya | |
6 hanuma 2015-11-12 ayodhya | |
7 sriram 2015-10-12 ayodhya | |
8 seeta 2015-09-12 midhila | |
9 lakshman 2015-11-12 ayodhya | |
10 bharata 2015-12-12 ayodhya | |
11 sathrugna 2015-12-12 ayodhya | |
12 hanuma 2015-11-12 ayodhya | |
Time taken: 0.294 seconds, Fetched: 12 row(s) | |
hive> select * from cust_credit cc join cust_debit cd on cc.id<>cd.id; | |
FAILED: SemanticException [Error 10017]: Line 1:51 Both left and right aliases encountered in JOIN 'id' | |
hive> select * from cust_credit cc join cust_debit cd on cc.id != cd.id; | |
FAILED: SemanticException [Error 10017]: Line 1:51 Both left and right aliases encountered in JOIN 'id' | |
hive> select id, collect_set(location)[0] as c_location from thanooj.cust_credit group by id; | |
Query ID = ubuntu_20160303231512_dd77dba6-8284-4b6f-9f71-68c9e68844ff | |
Total jobs = 1 | |
Launching Job 1 out of 1 | |
Number of reduce tasks not specified. Estimated from input data size: 1 | |
In order to change the average load for a reducer (in bytes): | |
set hive.exec.reducers.bytes.per.reducer=<number> | |
In order to limit the maximum number of reducers: | |
set hive.exec.reducers.max=<number> | |
In order to set a constant number of reducers: | |
set mapreduce.job.reduces=<number> | |
Starting Job = job_1457070465534_0004, Tracking URL = http://ubuntu:8088/proxy/application_1457070465534_0004/ | |
Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job -kill job_1457070465534_0004 | |
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 | |
2016-03-03 23:15:24,829 Stage-1 map = 0%, reduce = 0% | |
2016-03-03 23:16:05,390 Stage-1 map = 100%, reduce = 0% | |
2016-03-03 23:16:16,297 Stage-1 map = 0%, reduce = 0% | |
2016-03-03 23:17:16,694 Stage-1 map = 0%, reduce = 0% | |
2016-03-03 23:17:24,082 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.15 sec | |
2016-03-03 23:17:32,035 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 3.3 sec | |
MapReduce Total cumulative CPU time: 3 seconds 300 msec | |
Ended Job = job_1457070465534_0004 | |
MapReduce Jobs Launched: | |
Stage-Stage-1: Map: 1 Reduce: 1 Cumulative CPU: 3.3 sec HDFS Read: 8635 HDFS Write: 123 SUCCESS | |
Total MapReduce CPU Time Spent: 3 seconds 300 msec | |
OK | |
id c_location | |
1 ayodhya | |
2 midhila | |
3 ayodhya | |
4 ayodhya | |
5 ayodhya | |
6 ayodhya | |
7 ayodhya | |
8 midhila | |
9 ayodhya | |
10 ayodhya | |
11 ayodhya | |
12 ayodhya | |
Time taken: 140.36 seconds, Fetched: 12 row(s) | |
hive> select id, location as c_location from thanooj.cust_credit group by id; | |
FAILED: SemanticException Line 0:-1 Expression not in GROUP BY key 'location' | |
hive> INSERT OVERWRITE LOCAL DIRECTORY '/home/ubuntu/test/' SELECT * from thanooj.cust_credit; | |
Query ID = ubuntu_20160318053609_42c6d022-d4f2-4736-9e3d-3c55b0acfe0f | |
Total jobs = 1 | |
Launching Job 1 out of 1 | |
Number of reduce tasks is set to 0 since there is no reduce operator | |
Starting Job = job_1458300898384_0002, Tracking URL = http://ubuntu:8088/proxy/application_1458300898384_0002/ | |
Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job -kill job_1458300898384_0002 | |
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 | |
2016-03-18 05:36:19,267 Stage-1 map = 0%, reduce = 0% | |
2016-03-18 05:36:37,026 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.51 sec | |
MapReduce Total cumulative CPU time: 1 seconds 510 msec | |
Ended Job = job_1458300898384_0002 | |
Copying data to local directory /home/ubuntu/test | |
Copying data to local directory /home/ubuntu/test | |
MapReduce Jobs Launched: | |
Stage-Stage-1: Map: 1 Cumulative CPU: 1.51 sec HDFS Read: 3928 HDFS Write: 349 SUCCESS | |
Total MapReduce CPU Time Spent: 1 seconds 510 msec | |
OK | |
Time taken: 28.393 seconds | |
--sequence | |
hive> create table cust_credit_sec (id int, name string, doa string, location string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' stored as SEQUENCEFILE; | |
OK | |
Time taken: 0.412 seconds | |
hive> insert overwrite table cust_credit_sec select * from cust_credit; | |
Query ID = ubuntu_20160318065702_c4eb35c4-79aa-4d10-9773-be8c9a4058b6 | |
Total jobs = 3 | |
Launching Job 1 out of 3 | |
Number of reduce tasks is set to 0 since there is no reduce operator | |
Starting Job = job_1458300898384_0007, Tracking URL = http://ubuntu:8088/proxy/application_1458300898384_0007/ | |
Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job -kill job_1458300898384_0007 | |
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 | |
2016-03-18 06:57:11,419 Stage-1 map = 0%, reduce = 0% | |
2016-03-18 06:57:27,226 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 2.12 sec | |
MapReduce Total cumulative CPU time: 2 seconds 120 msec | |
Ended Job = job_1458300898384_0007 | |
Stage-4 is selected by condition resolver. | |
Stage-3 is filtered out by condition resolver. | |
Stage-5 is filtered out by condition resolver. | |
Moving data to: hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_sec/.hive-staging_hive_2016-03-18_06-57-02_702_1026185545452775762-1/-ext-10000 | |
Loading data to table thanooj.cust_credit_sec | |
Table thanooj.cust_credit_sec stats: [numFiles=1, numRows=12, totalSize=580, rawDataSize=337] | |
MapReduce Jobs Launched: | |
Stage-Stage-1: Map: 1 Cumulative CPU: 2.12 sec HDFS Read: 4336 HDFS Write: 660 SUCCESS | |
Total MapReduce CPU Time Spent: 2 seconds 120 msec | |
OK | |
Time taken: 25.934 seconds | |
hive> ! hadoop fs -cat /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_sec/000000_1; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-18 06:57:51 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
SEQ"org.apache.hadoop.io.BytesWritableorg.apache.hadoop.io.Texte%6� | |
a�?�d� 1,sriram,2015-10-12,ayodhya2,seeta,2015-09-12,midhila"3,lakshman,2015-11-12,ayodhya!4,bharata,2015-12-12,ayodhya#5,sathrugna,2015-12-12,ayodhya 6,hanuma,2015-11-12,ayodhya ,sriram,2015-10-12,ayodhya8,seeta,2015-09-12,midhila"9,lakshman,2015-11-12,ayodhya"10,bharata,2015-12-12,ayodhya$11,sathrugna,2015-12-12,ayodhya!12,hanuma,2015-11-12,ayodhya | |
hive> ! hadoop fs -text /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_sec/000000_1; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-18 06:58:30 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
1,sriram,2015-10-12,ayodhya | |
2,seeta,2015-09-12,midhila | |
3,lakshman,2015-11-12,ayodhya | |
4,bharata,2015-12-12,ayodhya | |
5,sathrugna,2015-12-12,ayodhya | |
6,hanuma,2015-11-12,ayodhya | |
7,sriram,2015-10-12,ayodhya | |
8,seeta,2015-09-12,midhila | |
9,lakshman,2015-11-12,ayodhya | |
10,bharata,2015-12-12,ayodhya | |
11,sathrugna,2015-12-12,ayodhya | |
12,hanuma,2015-11-12,ayodhya | |
hive> show create table thanooj.cust_credit_sec; | |
OK | |
CREATE TABLE `thanooj.cust_credit_sec`( | |
`id` int, | |
`name` string, | |
`doa` string, | |
`location` string) | |
ROW FORMAT SERDE | |
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' | |
STORED AS INPUTFORMAT | |
'org.apache.hadoop.mapred.SequenceFileInputFormat' | |
OUTPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' | |
LOCATION | |
'hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_sec' | |
TBLPROPERTIES ( | |
'COLUMN_STATS_ACCURATE'='true', | |
'numFiles'='1', | |
'numRows'='12', | |
'rawDataSize'='337', | |
'totalSize'='376', | |
'transient_lastDdlTime'='1458308265') | |
Time taken: 0.055 seconds, Fetched: 20 row(s) | |
hive> describe extended thanooj.cust_credit_sec; | |
OK | |
id int | |
name string | |
doa string | |
location string | |
Detailed Table Information Table(tableName:cust_credit_sec, dbName:thanooj, owner:ubuntu, createTime:1458307933, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:doa, type:string, comment:null), FieldSchema(name:location, type:string, comment:null)], location:hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_sec, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, COLUMN_STATS_ACCURATE=true, transient_lastDdlTime=1458308265, numRows=12, totalSize=376, rawDataSize=337}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE) | |
Time taken: 0.075 seconds, Fetched: 6 row(s) | |
hive> describe formatted thanooj.cust_credit_sec; | |
OK | |
# col_name data_type comment | |
id int | |
name string | |
doa string | |
location string | |
# Detailed Table Information | |
Database: thanooj | |
Owner: ubuntu | |
CreateTime: Fri Mar 18 06:32:13 PDT 2016 | |
LastAccessTime: UNKNOWN | |
Protect Mode: None | |
Retention: 0 | |
Location: hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_sec | |
Table Type: MANAGED_TABLE | |
Table Parameters: | |
COLUMN_STATS_ACCURATE true | |
numFiles 1 | |
numRows 12 | |
rawDataSize 337 | |
totalSize 376 | |
transient_lastDdlTime 1458308265 | |
# Storage Information | |
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | |
InputFormat: org.apache.hadoop.mapred.SequenceFileInputFormat | |
OutputFormat: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat | |
Compressed: No | |
Num Buckets: -1 | |
Bucket Columns: [] | |
Sort Columns: [] | |
Storage Desc Params: | |
serialization.format 1 | |
Time taken: 0.062 seconds, Fetched: 34 row(s) | |
hive> SET hive.exec.compress.output=true; | |
hive> SET mapred.max.split.size=256000000; | |
hive> SET mapred.output.compression.type=BLOCK; | |
hive> SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec /OR/ org.apache.hadoop.io.compress.GzipCodec; | |
hive> SET hive.exec.dynamic.partition.mode=nonstrict; | |
hive> SET hive.exec.dynamic.partition=true; | |
--PARQUET | |
hive> create table cust_credit_parquet (id int, name string, doa string, location string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS PARQUET; | |
OK | |
Time taken: 0.325 seconds | |
hive> insert overwrite table cust_credit_parquet select * from cust_credit; | |
Query ID = ubuntu_20160328061221_6d885d6a-cd20-40f5-842d-09025df628c4 | |
Total jobs = 3 | |
Launching Job 1 out of 3 | |
Number of reduce tasks is set to 0 since there is no reduce operator | |
Starting Job = job_1459170227047_0001, Tracking URL = http://ubuntu:8088/proxy/application_1459170227047_0001/ | |
Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job -kill job_1459170227047_0001 | |
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 | |
2016-03-28 06:12:32,197 Stage-1 map = 0%, reduce = 0% | |
2016-03-28 06:12:53,069 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 2.44 sec | |
MapReduce Total cumulative CPU time: 2 seconds 440 msec | |
Ended Job = job_1459170227047_0001 | |
Stage-4 is selected by condition resolver. | |
Stage-3 is filtered out by condition resolver. | |
Stage-5 is filtered out by condition resolver. | |
Moving data to: hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_parquet/.hive-staging_hive_2016-03-28_06-12-21_397_2155480560380901326-1/-ext-10000 | |
Loading data to table thanooj.cust_credit_parquet | |
Table thanooj.cust_credit_parquet stats: [numFiles=1, numRows=12, totalSize=769, rawDataSize=48] | |
MapReduce Jobs Launched: | |
Stage-Stage-1: Map: 1 Cumulative CPU: 2.44 sec HDFS Read: 4451 HDFS Write: 852 SUCCESS | |
Total MapReduce CPU Time Spent: 2 seconds 440 msec | |
OK | |
Time taken: 34.265 seconds | |
hive> select * from thanooj. cust_credit_parquet; | |
OK | |
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". | |
SLF4J: Defaulting to no-operation (NOP) logger implementation | |
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. | |
1 sriram 2015-10-12 ayodhya | |
2 seeta 2015-09-12 midhila | |
3 lakshman 2015-11-12 ayodhya | |
4 bharata 2015-12-12 ayodhya | |
5 sathrugna 2015-12-12 ayodhya | |
6 hanuma 2015-11-12 ayodhya | |
7 sriram 2015-10-12 ayodhya | |
8 seeta 2015-09-12 midhila | |
9 lakshman 2015-11-12 ayodhya | |
10 bharata 2015-12-12 ayodhya | |
11 sathrugna 2015-12-12 ayodhya | |
12 hanuma 2015-11-12 ayodhya | |
Time taken: 0.073 seconds, Fetched: 12 row(s) | |
hive> ! hadoop fs -cat /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_parquet/000000_2; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-28 06:15:33 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
PAR1ll, | |
��L | |
sriramseetlakshmanbharata sathrugnahanuma,srirambharata��" | |
ppL | |
2015-10-12 | |
2015-09-12 | |
2015-11-12 | |
2015-12-12, | |
2015-12-12 | |
2015-09-12�K�,,Layodhyamidhila,midhilaayodhya�\H | |
hive_schema%id | |
%name% | |
%doa% | |
%location%Lid��< | |
&� | |
name��&�<srirambharata&� | |
doa��&�< | |
2015-12-12 | |
2015-09-12&� | |
location��&�<midhilaayodhya�(parquet-mr version 1.6.0HPAR1 | |
hive> ! hadoop fs -text /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_parquet/000000_2; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-28 06:17:45 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
PAR1ll, | |
��L | |
sriramseetlakshmanbharata sathrugnahanuma,srirambharata��" | |
ppL | |
2015-10-12 | |
2015-09-12 | |
2015-11-12 | |
2015-12-12, | |
2015-12-12 | |
2015-09-12�K�,,Layodhyamidhila,midhilaayodhya�\H | |
hive_schema%id | |
%name% | |
%doa% | |
%location%Lid��< | |
&� | |
name��&�<srirambharata&� | |
doa��&�< | |
2015-12-12 | |
2015-09-12&� | |
location��&�<midhilaayodhya�(parquet-mr version 1.6.0HPAR1 | |
--AVRO SERDE | |
hive> use thanooj; | |
OK | |
Time taken: 0.009 seconds | |
hive> set hive.exec.compress.output=true; | |
hive> SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; | |
hive> CREATE TABLE cust_credit_avro (id int, name string, doa string, location string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS AVRO; | |
OK | |
Time taken: 0.093 seconds | |
hive> show create table cust_credit_avro; | |
OK | |
CREATE TABLE `cust_credit_avro`( | |
`id` int COMMENT '', | |
`name` string COMMENT '', | |
`doa` string COMMENT '', | |
`location` string COMMENT '') | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
LINES TERMINATED BY '\n' | |
STORED AS INPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' | |
OUTPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' | |
LOCATION | |
'hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro' | |
TBLPROPERTIES ( | |
'transient_lastDdlTime'='1459321615') | |
Time taken: 0.059 seconds, Fetched: 16 row(s) | |
hive> describe extended cust_credit_avro; | |
OK | |
id int | |
name string | |
doa string | |
location string | |
Detailed Table Information Table(tableName:cust_credit_avro, dbName:thanooj, owner:ubuntu, createTime:1459321615, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:doa, type:string, comment:null), FieldSchema(name:location, type:string, comment:null)], location:hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro, inputFormat:org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.avro.AvroSerDe, parameters:{serialization.format=,, field.delim=,, line.delim= | |
}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1459321615}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE) | |
Time taken: 0.11 seconds, Fetched: 7 row(s) | |
hive> insert overwrite table cust_credit_avro select * from cust_credit; | |
Query ID = ubuntu_20160330000846_a0e2bc20-9ceb-4276-9a3b-199ec5bdd4aa | |
Total jobs = 3 | |
Launching Job 1 out of 3 | |
Number of reduce tasks is set to 0 since there is no reduce operator | |
Starting Job = job_1459320620757_0002, Tracking URL = http://ubuntu:8088/proxy/application_1459320620757_0002/ | |
Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job -kill job_1459320620757_0002 | |
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 | |
2016-03-30 00:08:54,040 Stage-1 map = 0%, reduce = 0% | |
2016-03-30 00:09:00,304 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.97 sec | |
MapReduce Total cumulative CPU time: 1 seconds 970 msec | |
Ended Job = job_1459320620757_0002 | |
Stage-4 is selected by condition resolver. | |
Stage-3 is filtered out by condition resolver. | |
Stage-5 is filtered out by condition resolver. | |
Moving data to: hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro/.hive-staging_hive_2016-03-30_00-08-46_861_1164406953482225834-1/-ext-10000 | |
Loading data to table thanooj.cust_credit_avro | |
Table thanooj.cust_credit_avro stats: [numFiles=1, numRows=12, totalSize=502, rawDataSize=0] | |
MapReduce Jobs Launched: | |
Stage-Stage-1: Map: 1 Cumulative CPU: 1.97 sec HDFS Read: 5000 HDFS Write: 565 SUCCESS | |
Total MapReduce CPU Time Spent: 1 seconds 970 msec | |
OK | |
Time taken: 14.809 seconds | |
hive> select * from cust_credit_avro; | |
OK | |
1 sriram 2015-10-12 ayodhya | |
2 seeta 2015-09-12 midhila | |
3 lakshman 2015-11-12 ayodhya | |
4 bharata 2015-12-12 ayodhya | |
5 sathrugna 2015-12-12 ayodhya | |
6 hanuma 2015-11-12 ayodhya | |
7 sriram 2015-10-12 ayodhya | |
8 seeta 2015-09-12 midhila | |
9 lakshman 2015-11-12 ayodhya | |
10 bharata 2015-12-12 ayodhya | |
11 sathrugna 2015-12-12 ayodhya | |
12 hanuma 2015-11-12 ayodhya | |
Time taken: 0.133 seconds, Fetched: 12 row(s) | |
hive> ! hadoop fs -ls /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-30 00:25:30 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
Found 5 items | |
drwx-wx-wx - ubuntu supergroup 0 2016-03-03 23:12 /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit | |
drwx-wx-wx - ubuntu supergroup 0 2016-03-30 00:09 /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro | |
drwx-wx-wx - ubuntu supergroup 0 2016-03-30 00:23 /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_orc | |
drwx-wx-wx - ubuntu supergroup 0 2016-03-28 06:12 /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_parquet | |
drwx-wx-wx - ubuntu supergroup 0 2016-03-18 06:57 /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_sec | |
hive> ! hadoop fs -ls /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-30 00:25:55 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
Found 1 items | |
-rwx-wx-wx 1 ubuntu supergroup 502 2016-03-30 00:08 /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro/000000_0 | |
hive> ! hadoop fs -cat /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro/000000_0; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-30 00:26:21 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
Objavro.codecdeflateavro.schema�{"type":"record","name":"cust_credit_avro","namespace":"thanooj","fields":[{"name":"id","type":["null","int"],"default":null},{"name":"name","type":["null","string"],"default":null},{"name":"doa","type":["null","string"],"default":null},{"name":"location","type":["null","string"],"default":null}]}{=���QQ�S�����K | |
�0E�R�"R� ! 8�r�(}�&�D�P����@�`9tF�ת�����;�:��J?�_���v!\�,���u��+ȑ)�9�M�k��V�� ��7w��@����~[�� | |
��=��{=���QQ�S�� | |
hive> ! hadoop fs -text /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_avro/000000_0; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-30 00:26:31 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
{"id":{"int":1},"name":{"string":"sriram"},"doa":{"string":"2015-10-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":2},"name":{"string":"seeta"},"doa":{"string":"2015-09-12"},"location":{"string":"midhila"}} | |
{"id":{"int":3},"name":{"string":"lakshman"},"doa":{"string":"2015-11-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":4},"name":{"string":"bharata"},"doa":{"string":"2015-12-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":5},"name":{"string":"sathrugna"},"doa":{"string":"2015-12-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":6},"name":{"string":"hanuma"},"doa":{"string":"2015-11-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":7},"name":{"string":"sriram"},"doa":{"string":"2015-10-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":8},"name":{"string":"seeta"},"doa":{"string":"2015-09-12"},"location":{"string":"midhila"}} | |
{"id":{"int":9},"name":{"string":"lakshman"},"doa":{"string":"2015-11-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":10},"name":{"string":"bharata"},"doa":{"string":"2015-12-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":11},"name":{"string":"sathrugna"},"doa":{"string":"2015-12-12"},"location":{"string":"ayodhya"}} | |
{"id":{"int":12},"name":{"string":"hanuma"},"doa":{"string":"2015-11-12"},"location":{"string":"ayodhya"}} | |
hive> CREATE TABLE cust_credit_orc (id int, name string, doa string, location string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS ORC tblproperties ("orc.compress"="ZLIB"); | |
OK | |
Time taken: 0.092 seconds | |
hive> show create table cust_credit_orc; | |
OK | |
CREATE TABLE `cust_credit_orc`( | |
`id` int, | |
`name` string, | |
`doa` string, | |
`location` string) | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY ',' | |
LINES TERMINATED BY '\n' | |
STORED AS INPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' | |
OUTPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' | |
LOCATION | |
'hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_orc' | |
TBLPROPERTIES ( | |
'orc.compress'='ZLIB', | |
'transient_lastDdlTime'='1459322556') | |
Time taken: 0.087 seconds, Fetched: 17 row(s) | |
hive> describe extended cust_credit_orc; | |
OK | |
id int | |
name string | |
doa string | |
location string | |
Detailed Table Information Table(tableName:cust_credit_orc, dbName:thanooj, owner:ubuntu, createTime:1459322556, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:doa, type:string, comment:null), FieldSchema(name:location, type:string, comment:null)], location:hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_orc, inputFormat:org.apache.hadoop.hive.ql.io.orc.OrcInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.ql.io.orc.OrcSerde, parameters:{serialization.format=,, field.delim=,, line.delim= | |
}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{orc.compress=ZLIB, transient_lastDdlTime=1459322556}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE) | |
Time taken: 0.065 seconds, Fetched: 7 row(s) | |
hive> insert overwrite table cust_credit_orc select * from cust_credit; | |
Query ID = ubuntu_20160330002316_91bd31a1-cb05-4e17-adec-13d330ad606e | |
Total jobs = 1 | |
Launching Job 1 out of 1 | |
Number of reduce tasks is set to 0 since there's no reduce operator | |
Starting Job = job_1459320620757_0003, Tracking URL = http://ubuntu:8088/proxy/application_1459320620757_0003/ | |
Kill Command = /home/ubuntu/softwares/hadoop-2.7.2/bin/hadoop job -kill job_1459320620757_0003 | |
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 | |
2016-03-30 00:23:23,196 Stage-1 map = 0%, reduce = 0% | |
2016-03-30 00:23:41,855 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.64 sec | |
MapReduce Total cumulative CPU time: 1 seconds 640 msec | |
Ended Job = job_1459320620757_0003 | |
Stage-4 is selected by condition resolver. | |
Stage-3 is filtered out by condition resolver. | |
Stage-5 is filtered out by condition resolver. | |
Moving data to: hdfs://localhost:9000/home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_orc/.hive-staging_hive_2016-03-30_00-23-16_290_118468332717631894-1/-ext-10000 | |
Loading data to table thanooj.cust_credit_orc | |
Table thanooj.cust_credit_orc stats: [numFiles=1, numRows=12, totalSize=703, rawDataSize=3348] | |
MapReduce Jobs Launched: | |
Stage-Stage-1: Map: 1 Cumulative CPU: 1.64 sec HDFS Read: 4643 HDFS Write: 784 SUCCESS | |
Total MapReduce CPU Time Spent: 1 seconds 640 msec | |
OK | |
Time taken: 26.912 seconds | |
hive> select * from cust_credit_orc; | |
OK | |
1 sriram 2015-10-12 ayodhya | |
2 seeta 2015-09-12 midhila | |
3 lakshman 2015-11-12 ayodhya | |
4 bharata 2015-12-12 ayodhya | |
5 sathrugna 2015-12-12 ayodhya | |
6 hanuma 2015-11-12 ayodhya | |
7 sriram 2015-10-12 ayodhya | |
8 seeta 2015-09-12 midhila | |
9 lakshman 2015-11-12 ayodhya | |
10 bharata 2015-12-12 ayodhya | |
11 sathrugna 2015-12-12 ayodhya | |
12 hanuma 2015-11-12 ayodhya | |
Time taken: 0.087 seconds, Fetched: 12 row(s) | |
hive> ! hadoop fs -cat /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_orc/000000_2; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-30 00:34:24 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
ORC | |
P- | |
�PG | |
! | |
" | |
bharatasriram�PL���bf``R��Q���2204�5��54�� | |
��l�� | |
I | |
" | |
" | |
ayodhyamidhila�P � | |
F | |
T 1T 1 | |
Fv�VL�A��� �pX������<ny�E�f'e֔�� | |
B | |
K� | |
03204�5��542� | |
�,C8�@@@�Fwayodhyamidhila��A | |
�@ | |
�a��j7�v��gp�Dd�V���[L>�gr|���/S<��E:�x���oT�_�`:�cw�����슒]��u#!�'���9�S�{���u�>�~��o�[<N��*�b�� `����b�`����Jq�(�p�'e$%�$ | |
�e%�J,�)夹�� | |
Mu | |
,u | |
�� lC# ��R" | |
T"�ŞX���Q�(Ğ������(�( :�`��,���,0�Q��Q�@�GI��G�����E�5>9?�BB(e���� | |
��0l�c����`� | |
����$ !1�ȕ��Q�bO�H,J,Ib+.�,J̕X�S�Isq��X� A؆F@$��D�D��=�2?%�2Q�=73%#3'QbP�a����" | |
(j0��ORC | |
hive> ! hadoop fs -text /home/ubuntu/softwares/hive-1.2.1/warehouse/thanooj.db/cust_credit_orc/000000_2; | |
log4j: reset attribute= "false". | |
log4j: Threshold ="null". | |
log4j: Level value for root is [INFO]. | |
log4j: root level set to INFO | |
log4j: Class name: [org.apache.log4j.ConsoleAppender] | |
log4j: Parsing layout of class: "org.apache.log4j.PatternLayout" | |
log4j: Setting property [conversionPattern] to [%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n]. | |
log4j: Adding appender named [console] to category [root]. | |
2016-03-30 00:34:36 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
ORC | |
P- | |
�PG | |
! | |
" | |
bharatasriram�PL���bf``R��Q���2204�5��54�� | |
��l�� | |
I | |
" | |
" | |
ayodhyamidhila�P � | |
F | |
T 1T 1 | |
Fv�VL�A��� �pX������<ny�E�f'e֔�� | |
B | |
K� | |
03204�5��542� | |
�,C8�@@@�Fwayodhyamidhila��A | |
�@ | |
�a��j7�v��gp�Dd�V���[L>�gr|���/S<��E:�x���oT�_�`:�cw�����슒]��u#!�'���9�S�{���u�>�~��o�[<N��*�b�� `����b�`����Jq�(�p�'e$%�$ | |
�e%�J,�)夹�� | |
Mu | |
,u | |
�� lC# ��R" | |
T"�ŞX���Q�(Ğ������(�( :�`��,���,0�Q��Q�@�GI��G�����E�5>9?�BB(e���� | |
��0l�c����`� | |
����$ !1�ȕ��Q�bO�H,J,Ib+.�,J̕X�S�Isq��X� A؆F@$��D�D��=�2?%�2Q�=73%#3'QbP�a����" | |
(j0��ORC | |
hive> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://cwiki.apache.org/confluence/display/Hive/SerDe
https://cwiki.apache.org/confluence/display/Hive/FileFormats
https://blog.twitter.com/2013/dremel-made-simple-with-parquet
https://acadgild.com/blog/file-formats-in-apache-hive/
https://github.com/thanoojgithub/Hadoop_files/blob/master/HIVE_File_Format.pdf