Skip to content

Instantly share code, notes, and snippets.

@mingfang
Created April 29, 2019 13:28
Show Gist options
  • Save mingfang/3711de821a2e6ef9c31caff42eb9a293 to your computer and use it in GitHub Desktop.
Save mingfang/3711de821a2e6ef9c31caff42eb9a293 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%classpath add mvn \n",
"com.google.protobuf protobuf-java 3.7.1\n",
"com.google.guava guava 20.0\n",
"io.netty netty-all 4.1.17.Final\n",
"org.apache.hadoop hadoop-common 2.7.3\n",
"org.apache.hadoop hadoop-client 2.7.3\n",
"org.apache.spark spark-sql_2.11 2.3.3\n",
"org.alluxio alluxio-core-client-runtime 2.0.0-preview"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import java.io._\n",
"val jars = ClasspathManager.getJars().toArray\n",
" .filter(x => \n",
" x.toString.contains(\"/guava\") || \n",
" x.toString.contains(\"/protobuf\")|| \n",
" x.toString.contains(\"alluxio\")||\n",
" x.toString.contains(\"/grpc\") ||\n",
" x.toString.contains(\"/opencensus\")\n",
" )\n",
" .mkString(\",\")\n",
"new PrintWriter(new File(\"jars.txt\" )){write(jars); close()}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys.process._\n",
"val ip = \"hostname -i\"!!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%spark --noUI\n",
"import org.apache.spark.sql.SparkSession\n",
"import scala.io.Source\n",
"val cp = Source.fromFile(\"jars.txt\").getLines.mkString\n",
"val spark = SparkSession.builder()\n",
" .appName(\"Simple Application\")\n",
" .master(\"spark://spark-master.spark.svc.cluster.local:7077\")\n",
" .config(\"spark.driver.host\", \"250.2.146.2\")\n",
" .config(\"spark.driver.userClassPathFirst\", \"true\")\n",
" .config(\"spark.executor.userClassPathFirst\", \"true\")\n",
" .config(\"spark.jars\", cp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"val sc = spark.sparkContext"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spark.sparkContext.hadoopConfiguration.set(\"fs.alluxio.impl\", \"alluxio.hadoop.FileSystem\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"val textFile = spark.sparkContext.textFile(\"alluxio://alluxio-master.alluxio.svc.cluster.local:19998/TitanicPassengersTrainData.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"textFile.count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spark.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Scala",
"language": "scala",
"name": "scala"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".scala",
"mimetype": "",
"name": "Scala",
"nbconverter_exporter": "",
"version": "2.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment