Skip to content

Instantly share code, notes, and snippets.

View saptarshiguha's full-sized avatar

Saptarshi Guha saptarshiguha

View GitHub Profile
## In https://github.com/rstudio/htmltools/blob/master/R/html_print.R#L41
## html_print doesn't have an option on how and where to write the html
## it always creates anothe temporary directory and writes the HTML there
## This bypasses that with an uglry recursive copy ...
options(rbokehViewDir = tempfile("rbokehviz"))
if(!file.exists(getOption("rbokehViewDir")))
dir.create(getOption("rbokehViewDir"))
options(viewer = (function(){
buildingR <- function(excludeLibs=c(),exclude=NULL,iterate=TRUE,verbose=1,nameof="Rfolder-test",destpath=sprintf("/user/%s/",USER)){
library(Rhipe)
rhinit()
## if(USER=="") print("=================USER is empty=====================")
local({
tfolder <- sprintf("%s/Rdist",tempdir())
## delete folder if it exists!
dir.create(tfolder)
execu <- if ("package:Rhipe" %in% search()) rhoptions()$RhipeMapReduce else sprintf("/home/%s/software/R_LIBS/Rhipe/bin/RhipeMapReduce",USER)
import ujson as json
import matplotlib.pyplot as plt
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client
import datetime
def whatTime():
print datetime.datetime.now()
## Get the v4 main pings for Beta users during 07/15 ... 08/31 I need
This file has been truncated, but you can view the full file.
{"nbformat_minor": 0, "cells": [{"source": "### switching defaults in Windows10", "cell_type": "markdown", "metadata": {}}, {"execution_count": 1, "cell_type": "code", "source": "import ujson as json\nimport boto\nimport binascii\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\nsc.defaultParallelism\nconn = boto.connect_s3()\n%pylab inline", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"source": "## We will use the premade sample, maybe that is faster?", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "pings2 = get_clients_history (sc)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "def BetaFilter(x):\n client_id, p = x\n if p[u'type'] == u'main'
This file has been truncated, but you can view the full file.
{"nbformat_minor": 0, "cells": [{"source": "### switching defaults in Windows10", "cell_type": "markdown", "metadata": {}}, {"execution_count": 1, "cell_type": "code", "source": "import ujson as json\nimport boto\nimport binascii\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\nsc.defaultParallelism\nconn = boto.connect_s3()\n%pylab inline\npings2 = get_clients_history (sc)", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"source": "## We will use the premade sample, maybe that is faster?", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "def BetaFilter(x):\n client_id, p = x\n if p[u'type'] == u'main' and p[u'application'][u'channel'] == 'beta'and p[u'application'][u'name']=='Firefox'\\\n and p[u'application'][u'vendor
{"nbformat_minor": 0, "cells": [{"source": "### switching defaults in Windows10", "cell_type": "markdown", "metadata": {}}, {"execution_count": null, "cell_type": "code", "source": "import ujson as json\nimport boto\nimport binascii\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\noutBucketName = \"s3://sguhaoutputs/win10a/\"\n%pylab inline", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": false}}, {"source": "### Basics", "cell_type": "markdown", "metadata": {}}, {"source": " Let's see how many parallel workers we have at our disposal:", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "sc.defaultParallelism\nconn = boto.connect_s3()", "outputs": [], "metadata": {"collapsed": false, "trusted": false}}, {"source": "We will need all the pings because we need to stitch", "cell_type": "markdown", "metadata": {}}, {"execution_count": 8, "cell_type": "code", "s
{"nbformat_minor": 0, "cells": [{"source": "### switching defaults in Windows10", "cell_type": "markdown", "metadata": {}}, {"execution_count": null, "cell_type": "code", "source": "import ujson as json\nimport boto\nimport binascii\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\noutBucketName = \"s3://sguhaoutputs/win10a/\"\n%pylab inline", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": false}}, {"source": "### Basics", "cell_type": "markdown", "metadata": {}}, {"source": " Let's see how many parallel workers we have at our disposal:", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "sc.defaultParallelism\nconn = boto.connect_s3()", "outputs": [], "metadata": {"collapsed": false, "trusted": false}}, {"source": "We will need all the pings because we need to stitch", "cell_type": "markdown", "metadata": {}}, {"execution_count": 8, "cell_type": "code", "s
{"nbformat_minor": 0, "cells": [{"source": "### switching defaults in Windows10", "cell_type": "markdown", "metadata": {}}, {"execution_count": null, "cell_type": "code", "source": "import ujson as json\nimport boto\nimport binascii\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\noutBucketName = \"s3://sguhaoutputs/win10a/\"\n%pylab inline", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": false}}, {"source": "### Basics", "cell_type": "markdown", "metadata": {}}, {"source": " Let's see how many parallel workers we have at our disposal:", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "sc.defaultParallelism\nconn = boto.connect_s3()", "outputs": [], "metadata": {"collapsed": false, "trusted": false}}, {"source": "We will need all the pings because we need to stitch", "cell_type": "markdown", "metadata": {}}, {"execution_count": 8, "cell_type": "code", "s
{"nbformat_minor": 0, "cells": [{"source": "### switching defaults in Windows10", "cell_type": "markdown", "metadata": {}}, {"execution_count": null, "cell_type": "code", "source": "import ujson as json\nimport boto\nimport binascii\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\noutBucketName = \"s3://sguhaoutputs/win10a/\"\n%pylab inline", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": false}}, {"source": "### Basics", "cell_type": "markdown", "metadata": {}}, {"source": " Let's see how many parallel workers we have at our disposal:", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "sc.defaultParallelism\nconn = boto.connect_s3()", "outputs": [], "metadata": {"collapsed": false, "trusted": false}}, {"source": "We will need all the pings because we need to stitch", "cell_type": "markdown", "metadata": {}}, {"execution_count": 8, "cell_type": "code", "s
{"nbformat_minor": 0, "cells": [{"source": "### switching defaults in Windows10", "cell_type": "markdown", "metadata": {}}, {"execution_count": 1, "cell_type": "code", "source": "import ujson as json\nimport boto\nimport binascii\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n\n%pylab inline", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"scrolled": true, "collapsed": false, "trusted": false}}, {"source": "### Basics", "cell_type": "markdown", "metadata": {}}, {"source": " Let's see how many parallel workers we have at our disposal:", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "sc.defaultParallelism", "outputs": [{"execution_count": 2, "output_type": "execute_result", "data": {"text/plain": "240"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": false}}, {"source