Skip to content

Instantly share code, notes, and snippets.

@maasg
Created November 26, 2016 11:25
Show Gist options
  • Select an option

  • Save maasg/e470654d15a73a1cc1a280e37561a8a5 to your computer and use it in GitHub Desktop.

Select an option

Save maasg/e470654d15a73a1cc1a280e37561a8a5 to your computer and use it in GitHub Desktop.
Simple TimeSeries Aggregation using Spark
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "ts-aggregate",
"user_save_timestamp": "1970-01-01T01:00:00.000Z",
"auto_save_timestamp": "1970-01-01T01:00:00.000Z",
"language_info": {
"name": "scala",
"file_extension": "scala",
"codemirror_mode": "text/x-scala"
},
"trusted": true,
"customLocalRepo": null,
"customRepos": null,
"customDeps": null,
"customImports": null,
"customArgs": null,
"customSparkConf": null
},
"cells": [
{
"metadata": {
"id": "CB653B6E8C804F67B75372D99DC8159D"
},
"cell_type": "markdown",
"source": "Created with the Spark Notebook at http://spark-notebook.io/"
},
{
"metadata": {
"id": "6EEDC917094540AF8D1C373853B0C5CE"
},
"cell_type": "markdown",
"source": "### Aggregate timeseries by day/hour"
},
{
"metadata": {
"trusted": true,
"input_collapsed": false,
"collapsed": false,
"id": "61C8F9656D6C40798F45AA817E658979"
},
"cell_type": "code",
"source": "import org.joda.time.DateTime",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "import org.joda.time.DateTime\n"
},
{
"metadata": {},
"data": {
"text/html": ""
},
"output_type": "execute_result",
"execution_count": 1,
"time": "Took: 563 milliseconds, at 2016-11-26 12:8"
}
]
},
{
"metadata": {
"trusted": true,
"input_collapsed": false,
"collapsed": false,
"id": "405112F985984B7582D85316F181D79B"
},
"cell_type": "code",
"source": "val sampleData = Seq(\"p1,38.1,2016-11-26T11:15:10\",\n \"p1,39.1,2016-11-26T11:16:10\",\n \"p1,35.8,2016-11-26T11:17:10\",\n \"p1,34.1,2016-11-26T11:18:10\",\n \"p2,37.2,2016-11-26T11:16:00\",\n \"p2,31.2,2016-11-27T11:17:00\",\n \"p2,31.6,2016-11-27T11:17:00\",\n \"p1,39.4,2016-11-26T12:15:10\",\n \"p2,36.3,2016-11-27T10:10:10\",\n \"p1,39.5,2016-11-27T12:15:00\",\n \"p3,36.1,2016-11-26T11:15:10\") ",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "sampleData: Seq[String] = List(p1,38.1,2016-11-26T11:15:10, p1,39.1,2016-11-26T11:16:10, p1,35.8,2016-11-26T11:17:10, p1,34.1,2016-11-26T11:18:10, p2,37.2,2016-11-26T11:16:00, p2,31.2,2016-11-27T11:17:00, p2,31.6,2016-11-27T11:17:00, p1,39.4,2016-11-26T12:15:10, p2,36.3,2016-11-27T10:10:10, p1,39.5,2016-11-27T12:15:00, p3,36.1,2016-11-26T11:15:10)\n"
},
{
"metadata": {},
"data": {
"text/html": ""
},
"output_type": "execute_result",
"execution_count": 18,
"time": "Took: 523 milliseconds, at 2016-11-26 12:22"
}
]
},
{
"metadata": {
"trusted": true,
"input_collapsed": false,
"collapsed": false,
"id": "8B62AE133DBE4156836FDFE3B845DE99"
},
"cell_type": "code",
"source": "val sampleDataRdd = sparkContext.parallelize(sampleData)\n",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "sampleDataRdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[4] at parallelize at <console>:74\n"
},
{
"metadata": {},
"data": {
"text/html": ""
},
"output_type": "execute_result",
"execution_count": 19,
"time": "Took: 527 milliseconds, at 2016-11-26 12:22"
}
]
},
{
"metadata": {
"trusted": true,
"input_collapsed": false,
"collapsed": false,
"id": "8E9147C5D2404892B39748343E0D27B7"
},
"cell_type": "code",
"source": "val records = sampleDataRdd.map{line => \n val parts = line.split(\",\")\n val id = parts(0)\n val value = parts(1).toDouble\n val dateTime = DateTime.parse(parts(2))\n val doy = dateTime.getDayOfYear\n val hod = dateTime.getHourOfDay\n ((id, doy, hod), value)\n }\n ",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "records: org.apache.spark.rdd.RDD[((String, Int, Int), Double)] = MapPartitionsRDD[5] at map at <console>:76\n"
},
{
"metadata": {},
"data": {
"text/html": ""
},
"output_type": "execute_result",
"execution_count": 20,
"time": "Took: 457 milliseconds, at 2016-11-26 12:22"
}
]
},
{
"metadata": {
"trusted": true,
"input_collapsed": false,
"collapsed": false,
"id": "16CD7357DAEE4DDEB7458DF605CA99E1"
},
"cell_type": "code",
"source": "val aggregatedRecords = records.reduceByKey(_ + _)",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "aggregatedRecords: org.apache.spark.rdd.RDD[((String, Int, Int), Double)] = ShuffledRDD[6] at reduceByKey at <console>:78\n"
},
{
"metadata": {},
"data": {
"text/html": ""
},
"output_type": "execute_result",
"execution_count": 21,
"time": "Took: 467 milliseconds, at 2016-11-26 12:22"
}
]
},
{
"metadata": {
"trusted": true,
"input_collapsed": false,
"collapsed": false,
"presentation": {
"tabs_state": "{\n \"tab_id\": \"#tab1333318160-0\"\n}",
"pivot_chart_state": "{\n \"hiddenAttributes\": [],\n \"menuLimit\": 200,\n \"cols\": [],\n \"rows\": [],\n \"vals\": [],\n \"exclusions\": {},\n \"inclusions\": {},\n \"unusedAttrsVertical\": 85,\n \"autoSortUnusedAttrs\": false,\n \"inclusionsInfo\": {},\n \"aggregatorName\": \"Count\",\n \"rendererName\": \"Table\"\n}"
},
"id": "39EE6D314EB24E8D8D3BD9F907DF9C64"
},
"cell_type": "code",
"source": "aggregatedRecords.collect",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "res24: Array[((String, Int, Int), Double)] = Array(((p1,331,11),147.10000000000002), ((p2,332,11),62.8), ((p2,331,11),37.2), ((p1,332,12),39.5), ((p2,332,10),36.3), ((p1,331,12),39.4), ((p3,331,11),36.1))\n"
},
{
"metadata": {},
"data": {
"text/html": "<div>\n <script data-this=\"{&quot;dataId&quot;:&quot;anon82cf6e02d3ba6815e1860fbfc5f5509a&quot;,&quot;dataInit&quot;:[],&quot;genId&quot;:&quot;1333318160&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/tabs'], \n function(playground, _magictabs) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magictabs,\n \"o\": {}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <div>\n <ul class=\"nav nav-tabs\" id=\"ul1333318160\"><li>\n <a href=\"#tab1333318160-0\"><i class=\"fa fa-table\"/></a>\n </li><li>\n <a href=\"#tab1333318160-1\"><i class=\"fa fa-bar-chart\"/></a>\n </li><li>\n <a href=\"#tab1333318160-2\"><i class=\"fa fa-pie-chart\"/></a>\n </li><li>\n <a href=\"#tab1333318160-3\"><i class=\"fa fa-cubes\"/></a>\n </li></ul>\n\n <div class=\"tab-content\" id=\"tab1333318160\"><div class=\"tab-pane\" id=\"tab1333318160-0\">\n <div>\n <script data-this=\"{&quot;dataId&quot;:&quot;anon2a7d00799f3b34c9058792532aa49ab9&quot;,&quot;dataInit&quot;:[{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:147.10000000000002},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:11},&quot;_2&quot;:62.8},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:37.2},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:332,&quot;_3&quot;:12},&quot;_2&quot;:39.5},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:10},&quot;_2&quot;:36.3},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:12},&quot;_2&quot;:39.4},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p3&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:36.1}],&quot;genId&quot;:&quot;162068253&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/tableChart'], \n function(playground, _magictableChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magictableChart,\n \"o\": {\"headers\":[\"_1\",\"_2\"],\"width\":600,\"height\":400}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anone82404e49bf0e2126bc6226dbd24b65e&quot;,&quot;initialValue&quot;:&quot;7&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anon111653b2ccc5a106198b53e4914b3db1&quot;,&quot;initialValue&quot;:&quot;&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div><div class=\"tab-pane\" id=\"tab1333318160-1\">\n <div>\n <script data-this=\"{&quot;dataId&quot;:&quot;anon5654aa244c6df4b6d9ff8acd429c8b5b&quot;,&quot;dataInit&quot;:[{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:147.10000000000002},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:11},&quot;_2&quot;:62.8},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:37.2},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:332,&quot;_3&quot;:12},&quot;_2&quot;:39.5},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:10},&quot;_2&quot;:36.3},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:12},&quot;_2&quot;:39.4},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p3&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:36.1}],&quot;genId&quot;:&quot;2078702285&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/barChart'], \n function(playground, _magicbarChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magicbarChart,\n \"o\": {\"x\":\"_1\",\"y\":\"_2\",\"width\":600,\"height\":400}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anona6bb01e4c36570d3cc6149ac6f92bd7f&quot;,&quot;initialValue&quot;:&quot;7&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anon058947ccefffbb8a5c33a727861f93d3&quot;,&quot;initialValue&quot;:&quot;&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div><div class=\"tab-pane\" id=\"tab1333318160-2\">\n <div>\n <script data-this=\"{&quot;dataId&quot;:&quot;anon6126379f4f7833a2e6df5eeb3b0cb0dd&quot;,&quot;dataInit&quot;:[{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:147.10000000000002},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:11},&quot;_2&quot;:62.8},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:37.2},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:332,&quot;_3&quot;:12},&quot;_2&quot;:39.5},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:10},&quot;_2&quot;:36.3},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:12},&quot;_2&quot;:39.4},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p3&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:36.1}],&quot;genId&quot;:&quot;1463578223&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/pieChart'], \n function(playground, _magicpieChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magicpieChart,\n \"o\": {\"series\":\"_1\",\"p\":\"_2\",\"width\":600,\"height\":400}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anona0a8edf3b3cd0583ecd90ef3bccce889&quot;,&quot;initialValue&quot;:&quot;7&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anonae646d82375e3f1b1ce3e951c663dba7&quot;,&quot;initialValue&quot;:&quot;&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div><div class=\"tab-pane\" id=\"tab1333318160-3\">\n <div>\n <script data-this=\"{&quot;dataId&quot;:&quot;anonac7b0aacc4144efd8b71b5d491773660&quot;,&quot;dataInit&quot;:[{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:147.10000000000002},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:11},&quot;_2&quot;:62.8},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:37.2},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:332,&quot;_3&quot;:12},&quot;_2&quot;:39.5},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p2&quot;,&quot;_2&quot;:332,&quot;_3&quot;:10},&quot;_2&quot;:36.3},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p1&quot;,&quot;_2&quot;:331,&quot;_3&quot;:12},&quot;_2&quot;:39.4},{&quot;_1&quot;:{&quot;_1&quot;:&quot;p3&quot;,&quot;_2&quot;:331,&quot;_3&quot;:11},&quot;_2&quot;:36.1}],&quot;genId&quot;:&quot;919389&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/pivotChart'], \n function(playground, _magicpivotChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magicpivotChart,\n \"o\": {\"width\":600,\"height\":400,\"derivedAttributes\":{},\"extraOptions\":{}}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anon456626a329e63d5a3ac3d7ccb635399d&quot;,&quot;initialValue&quot;:&quot;7&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{&quot;valueId&quot;:&quot;anonc84cf87e06d18eab08dd6f9f200d21c9&quot;,&quot;initialValue&quot;:&quot;&quot;}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div></div>\n </div>\n </div></div>"
},
"output_type": "execute_result",
"execution_count": 22,
"time": "Took: 633 milliseconds, at 2016-11-26 12:22"
}
]
},
{
"metadata": {
"trusted": true,
"input_collapsed": false,
"collapsed": true,
"id": "9B4F553F4ADE428B8B756A8243E9531C"
},
"cell_type": "code",
"source": "",
"outputs": []
}
],
"nbformat": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment