Skip to content

Instantly share code, notes, and snippets.

@mannharleen
Created September 10, 2017 13:46
Show Gist options
  • Select an option

  • Save mannharleen/b540d7e2732cb9617a4495b93a4c7b2e to your computer and use it in GitHub Desktop.

Select an option

Save mannharleen/b540d7e2732cb9617a4495b93a4c7b2e to your computer and use it in GitHub Desktop.
Convert file types using hive
-- create a avro table
create table t_avro (col1 Int) stored as AVRO;
insert into table t_avro values (1),(2),(3),(4);
-- *INFO* If we want to create avro with custom schema file (avsc) do the following
--Option1: create table t_avro (col1 Int) stored as AVRO tblproperties('avro.schema.url'='/user/hive/schemas/t_avro/t_avro.avsc')
--Option2: create table t_avro (col1 Int) stored as AVRO tblproperties('avro.schema.literal'='{
-- "name": "t_avro_schema",
-- "type": "record",
-- "fields": [ { "col1":"Int" }]
-- }');
--
-- create parquet table from existing table
create table t_parquet stored as parquet as select * from t_avro;
-- create orc table from existing table
create table t_orc stored as orc as select * from t_parquet;
--verify schema of parquet data files created by hive
hdfs dfs -get /user/hive/warehouse/t_parquet/* /home/cloudera/t_parquet/;
parquet-tools schema <file>;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment