Created
December 20, 2021 16:49
-
-
Save naren-dremio/91f324484e4d0a7243e996f4d432fdf6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Install hive: | |
export HIVE_HOME=/opt/homebrew/Cellar/hive/apache-hive-3.1.2-bin | |
wget https://dlcdn.apache.org/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz | |
mkdir -p /opt/homebrew/Cellar/hive && tar xvf apache-hive-3.1.2-bin.tar.gz -C /opt/homebrew/Cellar/hive | |
#hive-site.xml | |
<?xml version="1.0"?> | |
<configuration> | |
<property> | |
<name>hive.querylog.location</name> | |
<value>/Users/apache-hive-3.1.2-bin/log/hive.log</value> | |
</property> | |
<property> | |
<name>hive.querylog.enable.plan.progress</name> | |
<value>false</value> | |
</property> | |
<property> | |
<name>hive.log.explain.output</name> | |
<value>false</value> | |
</property> | |
<property> | |
<name>javax.jdo.option.ConnectionURL</name> | |
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value> | |
</property> | |
<property> | |
<name>javax.jdo.option.ConnectionDriverName</name> | |
<value>com.mysql.cj.jdbc.Driver</value> | |
</property> | |
<property> | |
<name>javax.jdo.option.ConnectionUserName</name> | |
<value>root</value> | |
</property> | |
<property> | |
<name>javax.jdo.option.ConnectionPassword</name> | |
<value>root</value> | |
</property> | |
<property> | |
<name>hive.metastore.schema.verification</name> | |
<value>false</value> | |
</property> | |
<property> | |
<name>hive.metastore.warehouse.dir</name> | |
<value>hdfs://localhost:9000/user/hive/warehouse</value> | |
</property> | |
</configuration> | |
#Install Hadoop: | |
export HADOOP_HOME=/opt/homebrew/Cellar/hadoop/3.3.1/libexec | |
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz | |
mkdir -p /opt/homebrew/Cellar/hadoop && tar xvf hadoop-3.3.1.tar.gz -C /opt/homebrew/Cellar/hadoop | |
#core-site.xml: | |
<?xml version="1.0" encoding="UTF-8"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>fs.defaultFS</name> | |
<value>hdfs://localhost:9000</value> | |
</property> | |
</configuration> | |
#hdfs-site.xml: | |
<?xml version="1.0" encoding="UTF-8"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>dfs.replication</name> | |
<value>1</value> | |
</property> | |
</configuration> | |
#yarn-site.xml: | |
<?xml version="1.0"?> | |
<configuration> | |
<property> | |
<name>yarn.nodemanager.aux-services</name> | |
<value>mapreduce_shuffle</value> | |
</property> | |
<property> | |
<name>yarn.nodemanager.env-whitelist</name> | |
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> | |
</property> | |
</configuration> | |
#mapred-site.xml: | |
<?xml version="1.0"?> | |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | |
<configuration> | |
<property> | |
<name>mapreduce.framework.name</name> | |
<value>yarn</value> | |
</property> | |
<property> | |
<name>mapreduce.application.classpath</name> | |
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value> | |
</property> | |
</configuration> | |
#Add iceberg jars to hive and spark classpath: | |
cd $HIVE_HOME/lib | |
wget https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-hive-runtime/0.12.1/iceberg-hive-runtime-0.12.1.jar | |
cd $SPARK_HOME/jars | |
wget https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime/0.12.1/iceberg-spark-runtime-0.12.1.jar | |
# Prep for Hadoop: | |
ssh-keygen | |
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys | |
# enable remote login (Settings > Sharing) | |
$HADOOP_HOME/bin/hdfs namenode -format | |
$HADOOP_HOME/sbin/start-all.sh | |
# Prepare hive | |
brew install mysql | |
mysql -uroot | |
mysql> create database metastore; | |
mysql> use metastore; | |
mysql> ALTER USER 'root'@'localhost' IDENTIFIED BY 'root'; | |
mysql> source /opt/homebrew/Cellar/hive/scripts/metastore/upgrade/mysql/hive-schema-3.1.0.mysql.sql | |
$HIVE_HOME/bin/schematool -initSchema -dbType mysql | |
$HIVE_HOME/bin/hive --service metastore & | |
#notebook | |
import findspark | |
findspark.init() | |
from pyspark.context import SparkContext | |
from pyspark.sql.session import SparkSession | |
from pyspark.sql import SQLContext | |
spark = SparkSession.builder.master("local").appName("Iceberg Demo") \ | |
.config("spark.sql.extensions","org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \ | |
.config("spark.sql.catalog.spark_catalog.type","hive") \ | |
.config("spark.sql.catalog.spark_catalog","org.apache.iceberg.spark.SparkSessionCatalog") \ | |
.config("spark.sql.catalog.local","org.apache.iceberg.spark.SparkCatalog") \ | |
.config("spark.sql.catalog.local.type","hive") \ | |
.config("spark.sql.catalog.local.uri","thrift://localhost:9083") \ | |
.config("hive.metastore.uris", "thrift://localhost:9083") \ | |
.getOrCreate() | |
sqlContext = SQLContext(spark) | |
sqlContext.sql("DROP TABLE default.iceberg_sample;"); | |
sqlContext.sql("CREATE TABLE default.iceberg_sample (id bigint, data string) USING iceberg") | |
sqlContext.sql("INSERT INTO default.iceberg_sample VALUES (1, 'a'), (2, 'b'), (3, 'c');") | |
df = sqlContext.sql("SELECT * FROM default.iceberg_sample;"); | |
df.show(); | |
for i in range (4,10000): | |
sqlContext.sql("INSERT INTO default.iceberg_sample VALUES ({}, '{}a');".format(i,i)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment