Skip to content

Instantly share code, notes, and snippets.

@gbraccialli
Last active February 18, 2018 01:47
Show Gist options
  • Save gbraccialli/7d5c00e2f71eb6b9aa3c8121473f3844 to your computer and use it in GitHub Desktop.
Save gbraccialli/7d5c00e2f71eb6b9aa3c8121473f3844 to your computer and use it in GitHub Desktop.
jupyter_spark.sh
#option 1 - start jupyter using pyspark
export PYSPARK_DRIVER_PYTHON_OPTS="notebook"
export PYSPARK_DRIVER_PYTHON=/mnt/lib/python/anaconda2/bin/ipython
pyspark --queue queue3
#option 2 - vanilla jupyter with jars
import os
os.environ["SPARK_HOME"] = "/Downloads/spark-2.2.1-bin-hadoop2.7/"
os.environ["SPARK_CLASSPATH"] = "/tmp/shared/postgresql-42.2.1.jar"
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /tmp/shared/postgresql-42.2.1.jar pyspark-shell'
os.environ['SPARK_SUBMIT_OPTIONS'] = '--jars /tmp/shared/postgresql-42.2.1.jar'
import findspark
findspark.init()
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder\
.master("local[*]")\
.appName("jupyter2")\
.config("spark.jars", "/tmp/shared/postgresql-42.2.1.jar")\
.config("spark.driver.extraClassPath", "/tmp/shared/postgresql-42.2.1.jar")\
.config("spark.executor.extraClassPath", "/tmp/shared/postgresql-42.2.1.jar")\
.config("spark.repl.local.jars", "/tmp/shared/postgresql-42.2.1.jar")\
.getOrCreate()
#configs
cat jupyter_notebook_config.py
c.NotebookApp.iopub_data_rate_limit = 10000000000
c.NotebookApp.ip = '0.0.0.0'
c.NotebookApp.notebook_dir = u'/home/hadoop/notebooks'
c.NotebookApp.open_browser = False
c.NotebookApp.password_required = True
c.NotebookApp.password = u'sha1:4ba49e39fac1:f0e97c8bedea3286d99e0135d6cb37904096b543'
c.NotebookApp.port = 7777
from notebook.services.config import ConfigManager
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False,"autoCloseQuotes": False}}})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment