itayw · January 21, 2016 07:50 · tobilg · Jan 21, 2016
diff --git a/Dockerfile.yml b/Dockerfile.yml
 FROM beakernotebook/beaker

 MAINTAINER "Itay Weinberger <[email protected]>"

 # Install Node.js 5.x, the FROM image is still using 0.12
 ENV NODE_VERSION v5.1.0
 RUN wget --no-check-certificate https://nodejs.org/dist/$NODE_VERSION/node-$NODE_VERSION-linux-x64.tar.gz && \
    tar -C /usr/local --strip-components 1 -xzf node-$NODE_VERSION-linux-x64.tar.gz && \
    rm node-$NODE_VERSION-linux-x64.tar.gz

 # Spark ENV vars
 ENV SPARK_VERSION 1.6.0
 ENV SPARK_VERSION_STRING spark-$SPARK_VERSION-bin-hadoop2.6
 ENV SPARK_DOWNLOAD_URL http://d3kbcqa49mib13.cloudfront.net/$SPARK_VERSION_STRING.tgz
 ENV SPARK_HOME /usr/local/spark

 # Download and unzip Spark
 RUN wget $SPARK_DOWNLOAD_URL && \
    mkdir -p /usr/local/spark && \
    tar xvf $SPARK_VERSION_STRING.tgz -C /tmp && \
    cp -rf /tmp/$SPARK_VERSION_STRING/* /usr/local/spark/ && \
    rm -rf -- /tmp/$SPARK_VERSION_STRING && \
    rm spark-$SPARK_VERSION-bin-hadoop2.6.tgz

 # Set ASSEMBLY_JAR
 ENV ASSEMBLY_JAR $SPARK_HOME/lib/spark-assembly-$SPARK_VERSION-hadoop2.6.0.jar

 # Install apache-spark-node
 RUN \
  cd /home/beaker/src/core/config/plugins/eval/node/app && \
  npm install apache-spark-node

 # Assign a volume for notebooks, make sure you set correct permissions on the external volume
 RUN \
  mkdir /notebooks && \
  chown beaker:beaker /notebooks
 VOLUME /notebooks
diff --git a/notebook.bkr b/notebook.bkr
 {
    "beaker": "2",
    "evaluators": [
        {
            "name": "HTML",
            "plugin": "HTML",
            "view": {
                "cm": {
                    "mode": "htmlmixed"
                }
            }
        },
        {
            "name": "JavaScript",
            "plugin": "JavaScript",
            "view": {
                "cm": {
                    "mode": "javascript",
                    "background": "#FFE0F0"
                }
            }
        },
        {
            "name": "Node",
            "plugin": "Node",
            "view": {
                "cm": {
                    "mode": "javascript"
                }
            },
            "deferred": {
                "promise": {
                    "$$state": {
                        "status": 0,
                        "pending": [
                            [
                                {
                                    "promise": {
                                        "$$state": {
                                            "status": 0,
                                            "pending": [
                                                [
                                                    {
                                                        "promise": {
                                                            "$$state": {
                                                                "status": 0
                                                            }
                                                        }
                                                    },
                                                    null,
                                                    null,
                                                    null
                                                ]
                                            ]
                                        }
                                    }
                                },
                                null,
                                null,
                                null
                            ]
                        ]
                    }
                }
            }
        }
    ],
    "cells": [
        {
            "id": "markdownFWtsuF",
            "type": "markdown",
            "body": [
                "#Beaker Running Apache Spark with NodeJS",
                "",
                "The following notebook uses [`apache-spark-node`](https://github.com/henridf/apache-spark-node) by [@henridf](https://github.com/henridf).",
                "Code sections below demonstrate initialization and additional features as taken from `/test`.",
                "",
                "The top section is an initialization script which sets *global* `spark` and `sqlContext` vars which can be used throughout the notebook."
            ],
            "evaluatorReader": false
        },
        {
            "id": "coder3sdNr",
            "type": "code",
            "evaluator": "Node",
            "input": {
                "body": [
                    "var args = [\"--class\", \"org.apache.spark.repl.Main\",",
                    "            \"shark-shell\"];",
                    "",
                    "global.spark = require('apache-spark-node')",
                    "global.sqlContext = spark(args, process.env.ASSEMBLY_JAR).sqlContext;",
                    "",
                    "//Watch your stderr tab and see Spark starting."
                ]
            },
            "output": {
                "state": {},
                "selectedType": "Text",
                "hidden": true,
                "result": "[object Object]",
                "height": 51
            },
            "evaluatorReader": true,
            "lineCount": 7
        },
        {
            "id": "codeYNeaVG",
            "type": "code",
            "evaluator": "Node",
            "input": {
                "body": [
                    "//This section will output results to stdout.",
                    "",
                    "var df = sqlContext.createDataFrame([{\"name\":\"Michael\"}, {\"name\":\"Andy\", \"age\":30}, {\"name\":\"Justin\", \"age\": 19}]);",
                    "df.show(); //watch your stdout"
                ]
            },
            "output": {
                "state": {},
                "result": "undefined",
                "selectedType": "Text",
                "height": 56
            },
            "evaluatorReader": true,
            "lineCount": 4
        },
        {
            "id": "codenNyKzu",
            "type": "code",
            "evaluator": "Node",
            "input": {
                "body": [
                    "//This section will output results on in the results pane",
                    "var df = sqlContext.createDataFrame([{\"name\":\"Michael\"}, {\"name\":\"Andy\", \"age\":30}, {\"name\":\"Justin\", \"age\": 19}]);",
                    "df.jvm_obj.showString(20, true).split(\"\\n\");"
                ]
            },
            "output": {
                "state": {},
                "result": "+----+-------+,| age|   name|,+----+-------+,|null|Michael|,|  30|   Andy|,|  19| Justin|,+----+-------+,",
                "selectedType": "Text",
                "height": 56
            },
            "evaluatorReader": true,
            "lineCount": 3
        },
        {
            "id": "codeJBFrnK",
            "type": "code",
            "evaluator": "Node",
            "input": {
                "body": [
                    "//Using sqlContext",
                    "",
                    " var people = [{\"name\":\"Michael\"},",
                    "               {\"name\":\"Andy\", \"age\":30},",
                    "               {\"name\":\"Justin\", \"age\":19}];",
                    "",
                    "var df = sqlContext.createDataFrame(people);",
                    "df.registerTempTable(\"people\");",
                    "//notice we're using the Sync option",
                    "sqlContext.sql(\"SELECT name FROM people WHERE age >= 13 AND age <= 19\").collectSync(",
                    "  function(err, res) {",
                    "    res",
                    "  }",
                    ");",
                    ""
                ]
            },
            "output": {
                "state": {},
                "result": "Justin",
                "selectedType": "Text",
                "height": 56
            },
            "evaluatorReader": true,
            "lineCount": 15
        }
    ],
    "namespace": {}
 }
	FROM beakernotebook/beaker

	MAINTAINER "Itay Weinberger <[email protected]>"

	# Install Node.js 5.x, the FROM image is still using 0.12
	ENV NODE_VERSION v5.1.0
	RUN wget --no-check-certificate https://nodejs.org/dist/$NODE_VERSION/node-$NODE_VERSION-linux-x64.tar.gz && \
	tar -C /usr/local --strip-components 1 -xzf node-$NODE_VERSION-linux-x64.tar.gz && \
	rm node-$NODE_VERSION-linux-x64.tar.gz

	# Spark ENV vars
	ENV SPARK_VERSION 1.6.0
	ENV SPARK_VERSION_STRING spark-$SPARK_VERSION-bin-hadoop2.6
	ENV SPARK_DOWNLOAD_URL http://d3kbcqa49mib13.cloudfront.net/$SPARK_VERSION_STRING.tgz
	ENV SPARK_HOME /usr/local/spark

	# Download and unzip Spark
	RUN wget $SPARK_DOWNLOAD_URL && \
	mkdir -p /usr/local/spark && \
	tar xvf $SPARK_VERSION_STRING.tgz -C /tmp && \
	cp -rf /tmp/$SPARK_VERSION_STRING/* /usr/local/spark/ && \
	rm -rf -- /tmp/$SPARK_VERSION_STRING && \
	rm spark-$SPARK_VERSION-bin-hadoop2.6.tgz

	# Set ASSEMBLY_JAR
	ENV ASSEMBLY_JAR $SPARK_HOME/lib/spark-assembly-$SPARK_VERSION-hadoop2.6.0.jar

	# Install apache-spark-node
	RUN \
	cd /home/beaker/src/core/config/plugins/eval/node/app && \
	npm install apache-spark-node

	# Assign a volume for notebooks, make sure you set correct permissions on the external volume
	RUN \
	mkdir /notebooks && \
	chown beaker:beaker /notebooks
	VOLUME /notebooks
	{
	"beaker": "2",
	"evaluators": [
	{
	"name": "HTML",
	"plugin": "HTML",
	"view": {
	"cm": {
	"mode": "htmlmixed"
	}
	}
	},
	{
	"name": "JavaScript",
	"plugin": "JavaScript",
	"view": {
	"cm": {
	"mode": "javascript",
	"background": "#FFE0F0"
	}
	}
	},
	{
	"name": "Node",
	"plugin": "Node",
	"view": {
	"cm": {
	"mode": "javascript"
	}
	},
	"deferred": {
	"promise": {
	"$$state": {
	"status": 0,
	"pending": [
	[
	{
	"promise": {
	"$$state": {
	"status": 0,
	"pending": [
	[
	{
	"promise": {
	"$$state": {
	"status": 0
	}
	}
	},
	null,
	null,
	null
	]
	]
	}
	}
	},
	null,
	null,
	null
	]
	]
	}
	}
	}
	}
	],
	"cells": [
	{
	"id": "markdownFWtsuF",
	"type": "markdown",
	"body": [
	"#Beaker Running Apache Spark with NodeJS",
	"",
	"The following notebook uses [`apache-spark-node`](https://github.com/henridf/apache-spark-node) by [@henridf](https://github.com/henridf).",
	"Code sections below demonstrate initialization and additional features as taken from `/test`.",
	"",
	"The top section is an initialization script which sets global `spark` and `sqlContext` vars which can be used throughout the notebook."
	],
	"evaluatorReader": false
	},
	{
	"id": "coder3sdNr",
	"type": "code",
	"evaluator": "Node",
	"input": {
	"body": [
	"var args = [\"--class\", \"org.apache.spark.repl.Main\",",
	" \"shark-shell\"];",
	"",
	"global.spark = require('apache-spark-node')",
	"global.sqlContext = spark(args, process.env.ASSEMBLY_JAR).sqlContext;",
	"",
	"//Watch your stderr tab and see Spark starting."
	]
	},
	"output": {
	"state": {},
	"selectedType": "Text",
	"hidden": true,
	"result": "[object Object]",
	"height": 51
	},
	"evaluatorReader": true,
	"lineCount": 7
	},
	{
	"id": "codeYNeaVG",
	"type": "code",
	"evaluator": "Node",
	"input": {
	"body": [
	"//This section will output results to stdout.",
	"",
	"var df = sqlContext.createDataFrame([{\"name\":\"Michael\"}, {\"name\":\"Andy\", \"age\":30}, {\"name\":\"Justin\", \"age\": 19}]);",
	"df.show(); //watch your stdout"
	]
	},
	"output": {
	"state": {},
	"result": "undefined",
	"selectedType": "Text",
	"height": 56
	},
	"evaluatorReader": true,
	"lineCount": 4
	},
	{
	"id": "codenNyKzu",
	"type": "code",
	"evaluator": "Node",
	"input": {
	"body": [
	"//This section will output results on in the results pane",
	"var df = sqlContext.createDataFrame([{\"name\":\"Michael\"}, {\"name\":\"Andy\", \"age\":30}, {\"name\":\"Justin\", \"age\": 19}]);",
	"df.jvm_obj.showString(20, true).split(\"\\n\");"
	]
	},
	"output": {
	"state": {},
	"result": "+----+-------+,\| age\| name\|,+----+-------+,\|null\|Michael\|,\| 30\| Andy\|,\| 19\| Justin\|,+----+-------+,",
	"selectedType": "Text",
	"height": 56
	},
	"evaluatorReader": true,
	"lineCount": 3
	},
	{
	"id": "codeJBFrnK",
	"type": "code",
	"evaluator": "Node",
	"input": {
	"body": [
	"//Using sqlContext",
	"",
	" var people = [{\"name\":\"Michael\"},",
	" {\"name\":\"Andy\", \"age\":30},",
	" {\"name\":\"Justin\", \"age\":19}];",
	"",
	"var df = sqlContext.createDataFrame(people);",
	"df.registerTempTable(\"people\");",
	"//notice we're using the Sync option",
	"sqlContext.sql(\"SELECT name FROM people WHERE age >= 13 AND age <= 19\").collectSync(",
	" function(err, res) {",
	" res",
	" }",
	");",
	""
	]
	},
	"output": {
	"state": {},
	"result": "Justin",
	"selectedType": "Text",
	"height": 56
	},
	"evaluatorReader": true,
	"lineCount": 15
	}
	],
	"namespace": {}
	}