Skip to content

Instantly share code, notes, and snippets.

@alexarchambault
Last active October 3, 2018 14:21
Show Gist options
  • Select an option

  • Save alexarchambault/92b11406b7e586e7cb8032d2151af30b to your computer and use it in GitHub Desktop.

Select an option

Save alexarchambault/92b11406b7e586e7cb8032d2151af30b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
]
},
{
"data": {
"text/plain": [
"\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
"\u001b[39m\n",
"\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
"\n",
"\u001b[39m\n",
"\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import $ivy.`org.apache.spark::spark-sql:2.3.1`\n",
"import $ivy.`sh.almond::almond-spark:0.1.8`\n",
"\n",
"import org.apache.spark.sql._"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Getting spark JARs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"log4j:WARN No appenders could be found for logger (org.eclipse.jetty.util.log).\n",
"log4j:WARN Please initialize the log4j system properly.\n",
"log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Creating SparkSession\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[log redacted]\n",
"18/10/03 16:14:52 INFO Executor: Starting executor ID driver on host localhost\n",
"18/10/03 16:14:52 INFO Executor: Using REPL class URI: http://127.0.1.1:33731\n",
"18/10/03 16:14:53 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 33811.\n",
"18/10/03 16:14:53 INFO NettyBlockTransferService: Server created on 192.168.0.15:33811\n",
"18/10/03 16:14:53 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy\n",
"18/10/03 16:14:53 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.0.15, 33811, None)\n",
"18/10/03 16:14:53 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.0.15:33811 with 867.6 MB RAM, BlockManagerId(driver, 192.168.0.15, 33811, None)\n",
"18/10/03 16:14:53 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.0.15, 33811, None)\n",
"18/10/03 16:14:53 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 192.168.0.15, 33811, None)\n"
]
},
{
"data": {
"text/html": [
"<a href=\"http://192.168.0.15:4040\">Spark UI</a>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@5e7015b1"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"val spark = {\n",
" NotebookSparkSession.builder()\n",
" .master(\"local\")\n",
" .appName(\"Demo App\")\n",
" .getOrCreate()\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[log redacted]\n"
]
},
{
"data": {
"text/html": [
"<script>\n",
"var comm = Jupyter.notebook.kernel.comm_manager.new_comm('cancel-stage-432cf627-be46-43d7-80fa-62316d04dcf2', {});\n",
"\n",
"function cancelStage(stageId) {\n",
" console.log('Cancelling stage ' + stageId);\n",
" comm.send({ 'stageId': stageId });\n",
"}\n",
"</script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
" <span style=\"float: left;\">load at cmd2.sc:5</span>\n",
"</div>\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div class=\"progress\">\n",
" <div class=\"progress-bar\" role=\"progressbar\" style=\"background-color: blue; width: 100%\" aria-valuenow=\"100\" aria-valuemin=\"0\" aria-valuemax=\"100\">\n",
" 1 / 1\n",
" </div>\n",
" <div class=\"progress-bar\" role=\"progressbar\" style=\"background-color: red; width: 0%\" aria-valuenow=\"0\" aria-valuemin=\"0\" aria-valuemax=\"100\"></div>\n",
"</div>\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[log redacted]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[log redacted]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[log redacted]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"18/10/03 16:14:57 INFO SparkContext: Created broadcast 2 from load at cmd2.sc:5\n",
"18/10/03 16:14:57 INFO FileSourceScanExec: Planning scan with bin packing, max size: 8388634 bytes, open cost is considered as scanning 4194304 bytes.\n"
]
},
{
"data": {
"text/plain": [
"\u001b[36mdf\u001b[39m: \u001b[32mDataFrame\u001b[39m = [1;\"a\": string]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"val df = spark\n",
" .read\n",
" .format(\"csv\")\n",
" .option(\"header\", \"true\")\n",
" .load(\"data.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Scala (2.11)",
"language": "scala",
"name": "scala211"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".scala",
"mimetype": "text/x-scala",
"name": "scala",
"nbconvert_exporter": "script",
"version": "2.11.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment