Download storm-hive-examples < version >.jar from maven central or build it from https://github.com/apache/storm/tree/v1.2.1/examples/storm-hive-examples
Note: Ensure to use matching jar version of cluster version
We will try to save records with following fields into Hive table
{"id","name","phone","street","city","state"}
beeline>
create database stormdb;
use stormdb;
CREATE TABLE `storm_person`(
`id` int ,
`name` string ,
`phone` string ,
`street` string ,
`city` string ,
`state` string )
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ;
1.1) Give table/db access to 'storm' user;
ACL Eg: //else use Ranger
hdfs dfs -setfacl -m default:user:storm:rwx /warehouse/tablespace/managed/hive/stormdb.db
1.2) Submit storm topology
/usr/hdp/current/storm-client/bin/storm jar ./storm-hive-examples-1.2.1.3.1.0.0-78.jar org.apache.storm.hive.bolt.HiveTopology 'thrift://c4114-node3.coelab.cloudera.com:9083' stormdb storm_person storm_person_topology
1.3) Test the output from hive
beeline> select count(*) from stormdb.storm_person limit 1 ;
beeline>
create database if not exists stormdb;
use stormdb;
CREATE TABLE `storm_person_partTable`(
`id` int ,
`name` string ,
`phone` string ,
`street` string )
PARTITIONED BY (
`state` string,
`city` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ;
1.1) Give table/db access to 'storm' user; ACL Eg: //else use Ranger
hdfs dfs -setfacl -m default:user:storm:rwx /warehouse/tablespace/managed/hive/stormdb.db
1.2) Submit storm topology
/usr/hdp/current/storm-client/bin/storm jar ./storm-hive-examples-1.2.1.3.1.0.0-78.jar org.apache.storm.hive.bolt.HiveTopologyPartitioned 'thrift://c4114-node3.coelab.cloudera.com:9083' stormdb storm_person_partTable storm_person_Part_table_topology
1.3) Test the output from hive
beeline> select count(*) from stormdb.storm_person_partTable limit 1 ;
beeline>
create database if not exists stormdb;
use stormdb;
CREATE TABLE `storm_person_partTransTable`(
`id` int ,
`name` string ,
`phone` string ,
`street` string )
PARTITIONED BY (
`state` string,
`city` string)
STORED AS ORC TBLPROPERTIES ('transactional' = 'true');
1.1) Give table/db access to 'storm' user; ACL Eg: //else use Ranger
hdfs dfs -setfacl -m default:user:storm:rwx /warehouse/tablespace/managed/hive/stormdb.db
1.2) Submit storm topology
/usr/hdp/current/storm-client/bin/storm jar ./storm-hive-examples-1.2.1.3.1.0.0-78.jar org.apache.storm.hive.bolt.HiveTopologyPartitioned 'thrift://c4114-node3.coelab.cloudera.com:9083' stormdb storm_person_partTransTable storm_person_partTransTable_topology
1.3) Test the output from hive
beeline> select count(*) from stormdb.storm_person_partTransTable limit 1 ;