Created
September 18, 2015 14:09
-
-
Save tijptjik/300abf8cb4cc229a3f84 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"collapsed": false, | |
"twolaSignature": "snapshot-selector", | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from twola.datastore.snapshot import Snapshot\n%hub SnapshotSelector\n\nimport ast, vincent, random\nimport pandas as pd\nimport numpy as np", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "raw = Snapshot('/Users/twola-data/chelm/55f6ffa38a7fb3e025e94bfc/data/monday1.h5')\nfollowerCounts = raw.tbl[raw.tables[0]]\ncounts = [x['followers_count'] for x in followerCounts]\n\ntweets = raw.tbl[raw.tables[1]]\ntweetLength = [len(x['text'].split(' ')) for x in tweets]\ntweetCount, division = np.histogram(tweetLength)\ntweetHist = list(tweetCount)\n\n#print len(tweetLength), len(counts)", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df = pd.DataFrame({'followers': counts}) #, 'tweets': tweetLength})\nbins = [0, 25, 50, 100, 200, 400, 800, 1600, 3600, 6400, 128000]\n\ngrouped = df.groupby(pd.cut(df.followers, bins)).count()", | |
"execution_count": 10, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"width": 475, | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "vincent.core.initialize_notebook()\n\nbar = vincent.Bar(grouped)\nbar.width = 400\nbar.height = 200\nbar.colors(brew='Set3')\nbar.axis_titles(x='', y='')\nbar.display()", | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": "<IPython.core.display.HTML object>", | |
"text/html": "\n <script>\n \n function vct_load_lib(url, callback){\n if(typeof d3 !== 'undefined' &&\n url === 'http://d3js.org/d3.v3.min.js'){\n callback()\n }\n var s = document.createElement('script');\n s.src = url;\n s.async = true;\n s.onreadystatechange = s.onload = callback;\n s.onerror = function(){\n console.warn(\"failed to load library \" + url);\n };\n document.getElementsByTagName(\"head\")[0].appendChild(s);\n };\n var vincent_event = new CustomEvent(\n \"vincent_libs_loaded\",\n {bubbles: true, cancelable: true}\n );\n \n function load_all_libs(){\n console.log('Loading Vincent libs...')\n vct_load_lib('http://d3js.org/d3.v3.min.js', function(){\n vct_load_lib('http://d3js.org/d3.geo.projection.v0.min.js', function(){\n vct_load_lib('http://wrobstory.github.io/d3-cloud/d3.layout.cloud.js', function(){\n vct_load_lib('http://wrobstory.github.io/vega/vega.v1.3.3.js', function(){\n window.dispatchEvent(vincent_event);\n });\n });\n });\n });\n };\n if(typeof define === \"function\" && define.amd){\n if (window['d3'] === undefined ||\n window['topojson'] === undefined){\n require.config(\n {paths: {\n d3: 'http://d3js.org/d3.v3.min',\n topojson: 'http://d3js.org/topojson.v1.min'\n }\n }\n );\n require([\"d3\"], function(d3){\n console.log('Loading Vincent from require.js...')\n window.d3 = d3;\n require([\"topojson\"], function(topojson){\n window.topojson = topojson;\n load_all_libs();\n });\n });\n } else {\n load_all_libs();\n };\n }else{\n console.log('Require.js not found, loading manually...')\n load_all_libs();\n };\n\n </script>" | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": "<IPython.core.display.HTML object>", | |
"text/html": "<div id=\"visd17d2a3b684e45578295d086238c0d31\"></div>\n<script>\n ( function() {\n var _do_plot = function() {\n if (typeof vg === 'undefined') {\n window.addEventListener('vincent_libs_loaded', _do_plot)\n return;\n }\n vg.parse.spec({\"axes\": [{\"scale\": \"x\", \"title\": \"\", \"type\": \"x\"}, {\"scale\": \"y\", \"title\": \"\", \"type\": \"y\"}], \"data\": [{\"name\": \"table\", \"values\": [{\"col\": \"followers\", \"idx\": \"(0, 25]\", \"val\": 437}, {\"col\": \"followers\", \"idx\": \"(25, 50]\", \"val\": 298}, {\"col\": \"followers\", \"idx\": \"(50, 100]\", \"val\": 464}, {\"col\": \"followers\", \"idx\": \"(100, 200]\", \"val\": 838}, {\"col\": \"followers\", \"idx\": \"(200, 400]\", \"val\": 1169}, {\"col\": \"followers\", \"idx\": \"(400, 800]\", \"val\": 1198}, {\"col\": \"followers\", \"idx\": \"(800, 1600]\", \"val\": 894}, {\"col\": \"followers\", \"idx\": \"(1600, 3600]\", \"val\": 556}, {\"col\": \"followers\", \"idx\": \"(3600, 6400]\", \"val\": 230}, {\"col\": \"followers\", \"idx\": \"(6400, 128000]\", \"val\": 356}]}, {\"name\": \"stats\", \"source\": \"table\", \"transform\": [{\"keys\": [\"data.idx\"], \"type\": \"facet\"}, {\"type\": \"stats\", \"value\": \"data.val\"}]}], \"height\": 200, \"legends\": [], \"marks\": [{\"from\": {\"data\": \"table\", \"transform\": [{\"keys\": [\"data.col\"], \"type\": \"facet\"}, {\"height\": \"data.val\", \"point\": \"data.idx\", \"type\": \"stack\"}]}, \"marks\": [{\"properties\": {\"enter\": {\"fill\": {\"field\": \"data.col\", \"scale\": \"color\"}, \"width\": {\"band\": true, \"offset\": -1, \"scale\": \"x\"}, \"x\": {\"field\": \"data.idx\", \"scale\": \"x\"}, \"y\": {\"field\": \"y\", \"scale\": \"y\"}, \"y2\": {\"field\": \"y2\", \"scale\": \"y\"}}}, \"type\": \"rect\"}], \"type\": \"group\"}], \"padding\": \"auto\", \"scales\": [{\"domain\": {\"data\": \"table\", \"field\": \"data.idx\"}, \"name\": \"x\", \"range\": \"width\", \"type\": \"ordinal\", \"zero\": false}, {\"domain\": {\"data\": \"stats\", \"field\": \"sum\"}, \"name\": \"y\", \"nice\": true, \"range\": \"height\"}, {\"domain\": {\"data\": \"table\", \"field\": \"data.col\"}, \"name\": \"color\", \"range\": [\"#8dd3c7\", \"#ffffb3\", \"#bebada\", \"#fb8072\", \"#80b1d3\", \"#fdb462\", \"#b3de69\", \"#fccde5\", \"#d9d9d9\", \"#bc80bd\", \"#ccebc5\", \"#ffed6f\"], \"type\": \"ordinal\"}], \"width\": 400}, function(chart) {\n chart({el: \"#visd17d2a3b684e45578295d086238c0d31\"}).update();\n });\n };\n _do_plot();\n })();\n</script>\n<style>.vega canvas {width: 100%;}</style>\n " | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"width": 350, | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "\nbar = vincent.Bar(tweetHist)\nbar.width = 300\nbar.height = 200\nbar.colors(brew='Set3')\nbar.axis_titles(y='', x='')\nbar.display()\n", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": "<IPython.core.display.HTML object>", | |
"text/html": "<div id=\"visd8b5908ca39e433d82bc5c8a5fa4acc8\"></div>\n<script>\n ( function() {\n var _do_plot = function() {\n if (typeof vg === 'undefined') {\n window.addEventListener('vincent_libs_loaded', _do_plot)\n return;\n }\n vg.parse.spec({\"axes\": [{\"scale\": \"x\", \"title\": \"\", \"type\": \"x\"}, {\"scale\": \"y\", \"title\": \"\", \"type\": \"y\"}], \"data\": [{\"name\": \"table\", \"values\": [{\"col\": \"data\", \"idx\": 0, \"val\": 414}, {\"col\": \"data\", \"idx\": 1, \"val\": 698}, {\"col\": \"data\", \"idx\": 2, \"val\": 1020}, {\"col\": \"data\", \"idx\": 3, \"val\": 863}, {\"col\": \"data\", \"idx\": 4, \"val\": 1282}, {\"col\": \"data\", \"idx\": 5, \"val\": 1160}, {\"col\": \"data\", \"idx\": 6, \"val\": 584}, {\"col\": \"data\", \"idx\": 7, \"val\": 428}, {\"col\": \"data\", \"idx\": 8, \"val\": 71}, {\"col\": \"data\", \"idx\": 9, \"val\": 14}]}, {\"name\": \"stats\", \"source\": \"table\", \"transform\": [{\"keys\": [\"data.idx\"], \"type\": \"facet\"}, {\"type\": \"stats\", \"value\": \"data.val\"}]}], \"height\": 200, \"legends\": [], \"marks\": [{\"from\": {\"data\": \"table\", \"transform\": [{\"keys\": [\"data.col\"], \"type\": \"facet\"}, {\"height\": \"data.val\", \"point\": \"data.idx\", \"type\": \"stack\"}]}, \"marks\": [{\"properties\": {\"enter\": {\"fill\": {\"field\": \"data.col\", \"scale\": \"color\"}, \"width\": {\"band\": true, \"offset\": -1, \"scale\": \"x\"}, \"x\": {\"field\": \"data.idx\", \"scale\": \"x\"}, \"y\": {\"field\": \"y\", \"scale\": \"y\"}, \"y2\": {\"field\": \"y2\", \"scale\": \"y\"}}}, \"type\": \"rect\"}], \"type\": \"group\"}], \"padding\": \"auto\", \"scales\": [{\"domain\": {\"data\": \"table\", \"field\": \"data.idx\"}, \"name\": \"x\", \"range\": \"width\", \"type\": \"ordinal\", \"zero\": false}, {\"domain\": {\"data\": \"stats\", \"field\": \"sum\"}, \"name\": \"y\", \"nice\": true, \"range\": \"height\"}, {\"domain\": {\"data\": \"table\", \"field\": \"data.col\"}, \"name\": \"color\", \"range\": [\"#8dd3c7\", \"#ffffb3\", \"#bebada\", \"#fb8072\", \"#80b1d3\", \"#fdb462\", \"#b3de69\", \"#fccde5\", \"#d9d9d9\", \"#bc80bd\", \"#ccebc5\", \"#ffed6f\"], \"type\": \"ordinal\"}], \"width\": 300}, function(chart) {\n chart({el: \"#visd8b5908ca39e433d82bc5c8a5fa4acc8\"}).update();\n });\n };\n _do_plot();\n })();\n</script>\n<style>.vega canvas {width: 100%;}</style>\n " | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "full = Snapshot('/Users/twola-data/chelm/55f730b88a6aa0596128cdc0/data/fullmsg.h5')", | |
"execution_count": 90, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "\"\"\"Split text and count words in an array. \"\"\"\n\nwc = {\n \"monday\": 0,\n \"coffee\": 0,\n \"work\": 0\n}\n\ntexts = [r for oid, r in full]\n\nfor t in texts:\n if 'text' in t.keys(): \n words = t['text'].split(' ')\n if 'monday' in words:\n wc['monday'] += 1\n elif 'coffee' in words:\n wc['coffee'] += 1\n elif 'work' in words:\n wc['work'] += 1\n ", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'full' is not defined", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-5-e726a485e62c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m }\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mtexts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mr\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0moid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfull\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtexts\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mNameError\u001b[0m: name 'full' is not defined" | |
], | |
"output_type": "error" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"width": 275, | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": " \ndonut = vincent.Pie(wc, outer_radius=100, inner_radius=50)\ndonut.width = 100\ndonut.height = 100\ndonut.colors(brew=\"Set2\")\ndonut.legend('Tweet Match')\n#print donut.to_json()", | |
"execution_count": 89, | |
"outputs": [ | |
{ | |
"execution_count": 89, | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": "<vincent.charts.Pie at 0x10fe4e610>", | |
"text/html": "<div id=\"vis25d98018c20a4abdafb712c74472550c\"></div>\n<script>\n ( function() {\n var _do_plot = function() {\n if (typeof vg === 'undefined') {\n window.addEventListener('vincent_libs_loaded', _do_plot)\n return;\n }\n vg.parse.spec({\"axes\": [], \"data\": [{\"name\": \"table\", \"values\": [{\"col\": \"data\", \"idx\": \"coffee\", \"val\": 202}, {\"col\": \"data\", \"idx\": \"monday\", \"val\": 64}, {\"col\": \"data\", \"idx\": \"work\", \"val\": 2218}]}], \"height\": 100, \"legends\": [{\"fill\": \"color\", \"offset\": 0, \"properties\": {}, \"title\": \"Tweet Match\"}], \"marks\": [{\"from\": {\"data\": \"table\", \"transform\": [{\"type\": \"pie\", \"value\": \"data.val\"}]}, \"properties\": {\"enter\": {\"endAngle\": {\"field\": \"endAngle\"}, \"fill\": {\"field\": \"data.idx\", \"scale\": \"color\"}, \"innerRadius\": {\"value\": 50}, \"outerRadius\": {\"value\": 100}, \"startAngle\": {\"field\": \"startAngle\"}, \"stroke\": {\"value\": \"white\"}, \"x\": {\"group\": \"width\", \"mult\": 0.5}, \"y\": {\"group\": \"height\", \"mult\": 0.5}}}, \"type\": \"arc\"}], \"padding\": \"auto\", \"scales\": [{\"domain\": {\"data\": \"table\", \"field\": \"data.idx\"}, \"name\": \"color\", \"range\": [\"#66c2a5\", \"#fc8d62\", \"#8da0cb\", \"#e78ac3\", \"#a6d854\", \"#ffd92f\", \"#e5c494\", \"#b3b3b3\"], \"type\": \"ordinal\"}], \"width\": 100}, function(chart) {\n chart({el: \"#vis25d98018c20a4abdafb712c74472550c\"}).update();\n });\n };\n _do_plot();\n })();\n</script>\n<style>.vega canvas {width: 100%;}</style>\n " | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"collapsed": false, | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%hub search", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"execution_count": 6, | |
"output_type": "execute_result", | |
"data": { | |
"application/javascript": "\nvar twolaCheck = function() {\n if(window.Twola) {\n if(IPython.notebook.kernel.is_connected() == true) {\n clearInterval(poller);\n window.Twola.search(element, \"\");\n }\n }\n}\nvar poller = setInterval(twolaCheck, 100);\n", | |
"text/plain": "<IPython.core.display.Javascript object>" | |
}, | |
"metadata": {} | |
} | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python2", | |
"display_name": "Python 2", | |
"language": "python" | |
}, | |
"language_info": { | |
"mimetype": "text/x-python", | |
"nbconvert_exporter": "python", | |
"name": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10", | |
"file_extension": ".py", | |
"codemirror_mode": { | |
"version": 2, | |
"name": "ipython" | |
} | |
} | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment