Priyansh121096 · October 10, 2023 01:24
diff --git a/local_iceberg_table_using_trino.txt b/local_iceberg_table_using_trino.txt
 Requirements: Linux, Trino
 Topics:
 - Apache Iceberg (https://iceberg.apache.org/)
 - Trino (https://trino.io/)
 - MySQL (https://www.mysql.com/)
 - Hive metastore (https://hive.apache.org/)

 # Install and configure mysql
 # https://www.digitalocean.com/community/tutorials/how-to-install-mysql-on-ubuntu-20-04


 # Create a mysql user for hive metastore and grant it access to "metastore"
 # https://gridscale.io/en/community/tutorials/create-a-mysql-user/#:~:text=Creating%20a%20new%20user%20is,any%20database%20or%20table%20yet.


 # Download hive-standalone-metastore 
 # Installation instructions: https://github.com/naushadh/hive-metastore/blob/main/Dockerfile (dont do the postgres part as we're using mysql instead)


 # Configure hive-metastore to use mysql
 # https://data-flair.training/blogs/configure-hive-metastore-to-mysql/


 # Init schema
 $HIVE_HOME/bin/schematool -dbType mysql -initSchema


 # Create a warehouse dir (this is where your iceberg table's data/metadata will be stored).
 # In prod, you'd use an s3 bucket but for local dev, a local directory is fine.
 mkdir $HOME/iceberg_warehouse


 # Start metastore
 $HIVE_HOME/bin/start-metastore


 # Bring up Trino with an iceberg catalog
 # Trino building and running locally: https://github.com/trinodb/trino/
 # Connector config: https://trino.io/docs/current/connector/iceberg.html#general-configuration


 # Connect to trino via cli client
 $TRINO_HOME/client/trino-cli/target/trino-cli-*-executable.jar


 # Create a schema in the iceberg catalog
 trino -> CREATE SCHEMA iceberg.foo
      -> WITH (location = 'file:$HOME/iceberg_warehouse');
 trino -> USE iceberg.foo


 # Create an Iceberg table
 trino> CREATE TABLE iceberg.foo.bar (
    ->     c1 INTEGER,
    ->     c2 DATE,
    ->     c3 DOUBLE
    -> )
    -> WITH (
    ->     format = 'PARQUET'
    -> );
    
    
 # Insert sample data
 trino:foo> INSERT INTO bar VALUES (1, Date('2020-07-31'), 5.6); 

 # The data and metadata directories for your table should've been created in $HOME/iceberg_warehouse
 # Have fun :)
	Requirements: Linux, Trino
	Topics:
	- Apache Iceberg (https://iceberg.apache.org/)
	- Trino (https://trino.io/)
	- MySQL (https://www.mysql.com/)
	- Hive metastore (https://hive.apache.org/)

	# Install and configure mysql
	# https://www.digitalocean.com/community/tutorials/how-to-install-mysql-on-ubuntu-20-04


	# Create a mysql user for hive metastore and grant it access to "metastore"
	# https://gridscale.io/en/community/tutorials/create-a-mysql-user/#:~:text=Creating%20a%20new%20user%20is,any%20database%20or%20table%20yet.


	# Download hive-standalone-metastore
	# Installation instructions: https://github.com/naushadh/hive-metastore/blob/main/Dockerfile (dont do the postgres part as we're using mysql instead)


	# Configure hive-metastore to use mysql
	# https://data-flair.training/blogs/configure-hive-metastore-to-mysql/


	# Init schema
	$HIVE_HOME/bin/schematool -dbType mysql -initSchema


	# Create a warehouse dir (this is where your iceberg table's data/metadata will be stored).
	# In prod, you'd use an s3 bucket but for local dev, a local directory is fine.
	mkdir $HOME/iceberg_warehouse


	# Start metastore
	$HIVE_HOME/bin/start-metastore


	# Bring up Trino with an iceberg catalog
	# Trino building and running locally: https://github.com/trinodb/trino/
	# Connector config: https://trino.io/docs/current/connector/iceberg.html#general-configuration


	# Connect to trino via cli client
	$TRINO_HOME/client/trino-cli/target/trino-cli-*-executable.jar


	# Create a schema in the iceberg catalog
	trino -> CREATE SCHEMA iceberg.foo
	-> WITH (location = 'file:$HOME/iceberg_warehouse');
	trino -> USE iceberg.foo


	# Create an Iceberg table
	trino> CREATE TABLE iceberg.foo.bar (
	-> c1 INTEGER,
	-> c2 DATE,
	-> c3 DOUBLE
	-> )
	-> WITH (
	-> format = 'PARQUET'
	-> );


	# Insert sample data
	trino:foo> INSERT INTO bar VALUES (1, Date('2020-07-31'), 5.6);

	# The data and metadata directories for your table should've been created in $HOME/iceberg_warehouse
	# Have fun :)