Step 1) Define below env variables and aliases in the ~/.bash_profile OR ~/.bashrc:
export SPARK_CONF_DIR='/Users/dixitm/Workspace/conf/spark-conf-dir'
# DATA_PLATFORM_ROOT: Local root dir where spark catalog & metastore is setup
export DATA_PLATFORM_ROOT="/Users/dixitm/Workspace/data/local-data-platform"
export ZEPPELIN_HOME="/opt/zeppelin/current"
export ZEPPELIN_NOTEBOOK_DIR="~/ZeppelinNotebooks"
alias zeppelin="${ZEPPELIN_HOME}/bin/zeppelin-daemon.sh"
Step 2) Download zeppelin from page: https://zeppelin.apache.org/docs/latest/quickstart/install.html#downloading-binary-package
Step 3) Unzip the downloaded package to dir: /opt/zeppelin
Step 4) create a softlink named: current for the unzipped dir using the command as below
cd /opt/zeppelin
ln -s zeppelin-0.10.1-bin-all current
Step 5) Add below content in the the file: ${ZEPPELIN_HOME}/conf/zeppelin-env.sh
export USE_HADOOP=false
export ZEPPELIN_SPARK_USEHIVECONTEXT=false
export SPARK_SUBMIT_OPTIONS="--conf spark.driver.extraJavaOptions=-Dderby.system.home=${DATA_PLATFORM_ROOT}/catalog --conf spark.sql.catalogImplementation=hive"
Step 1) Create spark-conf dir with the below content:
├── log4j.properties
└── spark-env.sh
Step 2) Paste below lines in spark-env.sh:
echo "spark.hadoop.hive.exec.dynamic.partition true" > "${SPARK_CONF_DIR}/spark-defaults.conf"
echo "spark.hadoop.hive.exec.dynamic.partition.mode nonstrict" >> "${SPARK_CONF_DIR}/spark-defaults.conf"
echo "spark.driver.extraJavaOptions -Dderby.system.home=${DATA_PLATFORM_ROOT}/catalog" >> "${SPARK_CONF_DIR}/spark-defaults.conf"
echo "spark.sql.warehouse.dir ${DATA_PLATFORM_ROOT}/catalog" >> "${SPARK_CONF_DIR}/spark-defaults.conf"
echo "spark.sql.catalogImplementation hive" >> "${SPARK_CONF_DIR}/spark-defaults.conf"
Step 3) Paste below lines in log4j.properties:
log4j.rootCategory=WARN, stderr
# Our custom SalesIntel settings. In development we want everything including debug to stdout
log4j.logger.io.salesintel=DEBUG,stdout
log4j.additivity.io.salesintel=false
# Set all apache log messages to ERROR -- this should be changed to WARN or INFO to debug problems with spark
log4j.logger.org.apache=ERROR
###
# The only message we can't control is the warning caused by parquet:
# SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
# SLF4J: Defaulting to no-operation (NOP) logger implementation
# SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
#
# Explained here: https://issues.apache.org/jira/browse/PARQUET-369
log4j.appender.stderr=org.apache.log4j.ConsoleAppender
log4j.appender.stderr.target=System.err
log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
log4j.appender.stderr.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
Step 1) Now start and stop zeppelin using the command:
zeppelin start
zeppelin stop