Created
November 26, 2016 11:25
-
-
Save maasg/e470654d15a73a1cc1a280e37561a8a5 to your computer and use it in GitHub Desktop.
Simple TimeSeries Aggregation using Spark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "metadata": { | |
| "name": "ts-aggregate", | |
| "user_save_timestamp": "1970-01-01T01:00:00.000Z", | |
| "auto_save_timestamp": "1970-01-01T01:00:00.000Z", | |
| "language_info": { | |
| "name": "scala", | |
| "file_extension": "scala", | |
| "codemirror_mode": "text/x-scala" | |
| }, | |
| "trusted": true, | |
| "customLocalRepo": null, | |
| "customRepos": null, | |
| "customDeps": null, | |
| "customImports": null, | |
| "customArgs": null, | |
| "customSparkConf": null | |
| }, | |
| "cells": [ | |
| { | |
| "metadata": { | |
| "id": "CB653B6E8C804F67B75372D99DC8159D" | |
| }, | |
| "cell_type": "markdown", | |
| "source": "Created with the Spark Notebook at http://spark-notebook.io/" | |
| }, | |
| { | |
| "metadata": { | |
| "id": "6EEDC917094540AF8D1C373853B0C5CE" | |
| }, | |
| "cell_type": "markdown", | |
| "source": "### Aggregate timeseries by day/hour" | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "input_collapsed": false, | |
| "collapsed": false, | |
| "id": "61C8F9656D6C40798F45AA817E658979" | |
| }, | |
| "cell_type": "code", | |
| "source": "import org.joda.time.DateTime", | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "import org.joda.time.DateTime\n" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/html": "" | |
| }, | |
| "output_type": "execute_result", | |
| "execution_count": 1, | |
| "time": "Took: 563 milliseconds, at 2016-11-26 12:8" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "input_collapsed": false, | |
| "collapsed": false, | |
| "id": "405112F985984B7582D85316F181D79B" | |
| }, | |
| "cell_type": "code", | |
| "source": "val sampleData = Seq(\"p1,38.1,2016-11-26T11:15:10\",\n \"p1,39.1,2016-11-26T11:16:10\",\n \"p1,35.8,2016-11-26T11:17:10\",\n \"p1,34.1,2016-11-26T11:18:10\",\n \"p2,37.2,2016-11-26T11:16:00\",\n \"p2,31.2,2016-11-27T11:17:00\",\n \"p2,31.6,2016-11-27T11:17:00\",\n \"p1,39.4,2016-11-26T12:15:10\",\n \"p2,36.3,2016-11-27T10:10:10\",\n \"p1,39.5,2016-11-27T12:15:00\",\n \"p3,36.1,2016-11-26T11:15:10\") ", | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "sampleData: Seq[String] = List(p1,38.1,2016-11-26T11:15:10, p1,39.1,2016-11-26T11:16:10, p1,35.8,2016-11-26T11:17:10, p1,34.1,2016-11-26T11:18:10, p2,37.2,2016-11-26T11:16:00, p2,31.2,2016-11-27T11:17:00, p2,31.6,2016-11-27T11:17:00, p1,39.4,2016-11-26T12:15:10, p2,36.3,2016-11-27T10:10:10, p1,39.5,2016-11-27T12:15:00, p3,36.1,2016-11-26T11:15:10)\n" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/html": "" | |
| }, | |
| "output_type": "execute_result", | |
| "execution_count": 18, | |
| "time": "Took: 523 milliseconds, at 2016-11-26 12:22" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "input_collapsed": false, | |
| "collapsed": false, | |
| "id": "8B62AE133DBE4156836FDFE3B845DE99" | |
| }, | |
| "cell_type": "code", | |
| "source": "val sampleDataRdd = sparkContext.parallelize(sampleData)\n", | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "sampleDataRdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[4] at parallelize at <console>:74\n" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/html": "" | |
| }, | |
| "output_type": "execute_result", | |
| "execution_count": 19, | |
| "time": "Took: 527 milliseconds, at 2016-11-26 12:22" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "input_collapsed": false, | |
| "collapsed": false, | |
| "id": "8E9147C5D2404892B39748343E0D27B7" | |
| }, | |
| "cell_type": "code", | |
| "source": "val records = sampleDataRdd.map{line => \n val parts = line.split(\",\")\n val id = parts(0)\n val value = parts(1).toDouble\n val dateTime = DateTime.parse(parts(2))\n val doy = dateTime.getDayOfYear\n val hod = dateTime.getHourOfDay\n ((id, doy, hod), value)\n }\n ", | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "records: org.apache.spark.rdd.RDD[((String, Int, Int), Double)] = MapPartitionsRDD[5] at map at <console>:76\n" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/html": "" | |
| }, | |
| "output_type": "execute_result", | |
| "execution_count": 20, | |
| "time": "Took: 457 milliseconds, at 2016-11-26 12:22" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "input_collapsed": false, | |
| "collapsed": false, | |
| "id": "16CD7357DAEE4DDEB7458DF605CA99E1" | |
| }, | |
| "cell_type": "code", | |
| "source": "val aggregatedRecords = records.reduceByKey(_ + _)", | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "aggregatedRecords: org.apache.spark.rdd.RDD[((String, Int, Int), Double)] = ShuffledRDD[6] at reduceByKey at <console>:78\n" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/html": "" | |
| }, | |
| "output_type": "execute_result", | |
| "execution_count": 21, | |
| "time": "Took: 467 milliseconds, at 2016-11-26 12:22" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "input_collapsed": false, | |
| "collapsed": false, | |
| "presentation": { | |
| "tabs_state": "{\n \"tab_id\": \"#tab1333318160-0\"\n}", | |
| "pivot_chart_state": "{\n \"hiddenAttributes\": [],\n \"menuLimit\": 200,\n \"cols\": [],\n \"rows\": [],\n \"vals\": [],\n \"exclusions\": {},\n \"inclusions\": {},\n \"unusedAttrsVertical\": 85,\n \"autoSortUnusedAttrs\": false,\n \"inclusionsInfo\": {},\n \"aggregatorName\": \"Count\",\n \"rendererName\": \"Table\"\n}" | |
| }, | |
| "id": "39EE6D314EB24E8D8D3BD9F907DF9C64" | |
| }, | |
| "cell_type": "code", | |
| "source": "aggregatedRecords.collect", | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": "res24: Array[((String, Int, Int), Double)] = Array(((p1,331,11),147.10000000000002), ((p2,332,11),62.8), ((p2,331,11),37.2), ((p1,332,12),39.5), ((p2,332,10),36.3), ((p1,331,12),39.4), ((p3,331,11),36.1))\n" | |
| }, | |
| { | |
| "metadata": {}, | |
| "data": { | |
| "text/html": "<div>\n <script data-this=\"{"dataId":"anon82cf6e02d3ba6815e1860fbfc5f5509a","dataInit":[],"genId":"1333318160"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/tabs'], \n function(playground, _magictabs) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magictabs,\n \"o\": {}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <div>\n <ul class=\"nav nav-tabs\" id=\"ul1333318160\"><li>\n <a href=\"#tab1333318160-0\"><i class=\"fa fa-table\"/></a>\n </li><li>\n <a href=\"#tab1333318160-1\"><i class=\"fa fa-bar-chart\"/></a>\n </li><li>\n <a href=\"#tab1333318160-2\"><i class=\"fa fa-pie-chart\"/></a>\n </li><li>\n <a href=\"#tab1333318160-3\"><i class=\"fa fa-cubes\"/></a>\n </li></ul>\n\n <div class=\"tab-content\" id=\"tab1333318160\"><div class=\"tab-pane\" id=\"tab1333318160-0\">\n <div>\n <script data-this=\"{"dataId":"anon2a7d00799f3b34c9058792532aa49ab9","dataInit":[{"_1":{"_1":"p1","_2":331,"_3":11},"_2":147.10000000000002},{"_1":{"_1":"p2","_2":332,"_3":11},"_2":62.8},{"_1":{"_1":"p2","_2":331,"_3":11},"_2":37.2},{"_1":{"_1":"p1","_2":332,"_3":12},"_2":39.5},{"_1":{"_1":"p2","_2":332,"_3":10},"_2":36.3},{"_1":{"_1":"p1","_2":331,"_3":12},"_2":39.4},{"_1":{"_1":"p3","_2":331,"_3":11},"_2":36.1}],"genId":"162068253"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/tableChart'], \n function(playground, _magictableChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magictableChart,\n \"o\": {\"headers\":[\"_1\",\"_2\"],\"width\":600,\"height\":400}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anone82404e49bf0e2126bc6226dbd24b65e","initialValue":"7"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anon111653b2ccc5a106198b53e4914b3db1","initialValue":""}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div><div class=\"tab-pane\" id=\"tab1333318160-1\">\n <div>\n <script data-this=\"{"dataId":"anon5654aa244c6df4b6d9ff8acd429c8b5b","dataInit":[{"_1":{"_1":"p1","_2":331,"_3":11},"_2":147.10000000000002},{"_1":{"_1":"p2","_2":332,"_3":11},"_2":62.8},{"_1":{"_1":"p2","_2":331,"_3":11},"_2":37.2},{"_1":{"_1":"p1","_2":332,"_3":12},"_2":39.5},{"_1":{"_1":"p2","_2":332,"_3":10},"_2":36.3},{"_1":{"_1":"p1","_2":331,"_3":12},"_2":39.4},{"_1":{"_1":"p3","_2":331,"_3":11},"_2":36.1}],"genId":"2078702285"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/barChart'], \n function(playground, _magicbarChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magicbarChart,\n \"o\": {\"x\":\"_1\",\"y\":\"_2\",\"width\":600,\"height\":400}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anona6bb01e4c36570d3cc6149ac6f92bd7f","initialValue":"7"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anon058947ccefffbb8a5c33a727861f93d3","initialValue":""}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div><div class=\"tab-pane\" id=\"tab1333318160-2\">\n <div>\n <script data-this=\"{"dataId":"anon6126379f4f7833a2e6df5eeb3b0cb0dd","dataInit":[{"_1":{"_1":"p1","_2":331,"_3":11},"_2":147.10000000000002},{"_1":{"_1":"p2","_2":332,"_3":11},"_2":62.8},{"_1":{"_1":"p2","_2":331,"_3":11},"_2":37.2},{"_1":{"_1":"p1","_2":332,"_3":12},"_2":39.5},{"_1":{"_1":"p2","_2":332,"_3":10},"_2":36.3},{"_1":{"_1":"p1","_2":331,"_3":12},"_2":39.4},{"_1":{"_1":"p3","_2":331,"_3":11},"_2":36.1}],"genId":"1463578223"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/pieChart'], \n function(playground, _magicpieChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magicpieChart,\n \"o\": {\"series\":\"_1\",\"p\":\"_2\",\"width\":600,\"height\":400}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anona0a8edf3b3cd0583ecd90ef3bccce889","initialValue":"7"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anonae646d82375e3f1b1ce3e951c663dba7","initialValue":""}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div><div class=\"tab-pane\" id=\"tab1333318160-3\">\n <div>\n <script data-this=\"{"dataId":"anonac7b0aacc4144efd8b71b5d491773660","dataInit":[{"_1":{"_1":"p1","_2":331,"_3":11},"_2":147.10000000000002},{"_1":{"_1":"p2","_2":332,"_3":11},"_2":62.8},{"_1":{"_1":"p2","_2":331,"_3":11},"_2":37.2},{"_1":{"_1":"p1","_2":332,"_3":12},"_2":39.5},{"_1":{"_1":"p2","_2":332,"_3":10},"_2":36.3},{"_1":{"_1":"p1","_2":331,"_3":12},"_2":39.4},{"_1":{"_1":"p3","_2":331,"_3":11},"_2":36.1}],"genId":"919389"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/req(['../javascripts/notebook/playground','../javascripts/notebook/magic/pivotChart'], \n function(playground, _magicpivotChart) {\n // data ==> data-this (in observable.js's scopedEval) ==> this in JS => { dataId, dataInit, ... }\n // this ==> scope (in observable.js's scopedEval) ==> this.parentElement ==> div.container below (toHtml)\n\n playground.call(data,\n this\n ,\n {\n \"f\": _magicpivotChart,\n \"o\": {\"width\":600,\"height\":400,\"derivedAttributes\":{},\"extraOptions\":{}}\n }\n \n \n \n );\n }\n );/*]]>*/</script>\n <div>\n <span class=\"chart-total-item-count\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anon456626a329e63d5a3ac3d7ccb635399d","initialValue":"7"}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p> entries total</span>\n <span class=\"chart-sampling-warning\"><p data-bind=\"text: value\"><script data-this=\"{"valueId":"anonc84cf87e06d18eab08dd6f9f200d21c9","initialValue":""}\" type=\"text/x-scoped-javascript\">/*<![CDATA[*/\nreq(\n['observable', 'knockout'],\nfunction (O, ko) {\n ko.applyBindings({\n value: O.makeObservable(valueId, initialValue)\n },\n this\n );\n});\n /*]]>*/</script></p></span>\n <div>\n </div>\n </div></div>\n </div></div>\n </div>\n </div></div>" | |
| }, | |
| "output_type": "execute_result", | |
| "execution_count": 22, | |
| "time": "Took: 633 milliseconds, at 2016-11-26 12:22" | |
| } | |
| ] | |
| }, | |
| { | |
| "metadata": { | |
| "trusted": true, | |
| "input_collapsed": false, | |
| "collapsed": true, | |
| "id": "9B4F553F4ADE428B8B756A8243E9531C" | |
| }, | |
| "cell_type": "code", | |
| "source": "", | |
| "outputs": [] | |
| } | |
| ], | |
| "nbformat": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment