Last active
November 21, 2017 21:39
-
-
Save gbraccialli/097d712c5b98312766b56d5bd5b19de8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://github.com/jupyter-scala/jupyter-scala.git | |
./jupyter-scala | |
#inside jupyter | |
import $exclude.`org.slf4j:slf4j-log4j12`, $ivy.`org.slf4j:slf4j-nop:1.7.21` // for cleaner logs | |
import $profile.`hadoop-2.6` | |
import $ivy.`org.apache.spark::spark-sql:2.1.0` // adjust spark version - spark >= 2.0 | |
import $ivy.`org.apache.hadoop:hadoop-aws:2.6.4` | |
import $ivy.`org.jupyter-scala::spark:0.4.2` // for JupyterSparkSession (SparkSession aware of the jupyter-scala kernel) | |
import org.apache.spark._ | |
import org.apache.spark.sql._ | |
import jupyter.spark.session._ | |
val spark = JupyterSparkSession.builder() // important - call this rather than SparkSession.builder() | |
.jupyter() // this method must be called straightaway after builder() | |
// .yarn("/etc/hadoop/conf") // optional, for Spark on YARN - argument is the Hadoop conf directory | |
// .emr("2.6.4") // on AWS ElasticMapReduce, this adds aws-related to the spark jar list | |
.master("local") // change to "yarn-client" on YARN | |
// .config("spark.executor.instances", "10") | |
// .config("spark.executor.memory", "3g") | |
// .config("spark.hadoop.fs.s3a.access.key", awsCredentials._1) | |
// .config("spark.hadoop.fs.s3a.secret.key", awsCredentials._2) | |
.appName("notebook") | |
.getOrCreate() | |
import spark.implicits._ | |
//import spark.sqlContext.implicits._ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment