Skip to content

Instantly share code, notes, and snippets.

View saptarshiguha's full-sized avatar

Saptarshi Guha saptarshiguha

View GitHub Profile
---
title: Churn Analyses for mozilla100 vs mozilla111
author: Saptarshi Guha <[email protected]>
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
html_document:
toc: true
toc_float: true
mathjax: default
self_contained: true
require("base")
local R = terralib.includecstring [[
#include <rterra.h>
]]
function R.ptable(w) for key,value in pairs(w) do print(key,value) end end
R.malloc = stdlib.malloc
R.free = stdlib.free
R.cincludesearchpath = {}
R.types = { NILSXP = 0, SYMSXP = 1, LISTSXP = 2,
islocaltolaptopq <- grepl("darwin",R.version['platform'])
library(mozaws)
aws.init(ec2key="20161025-dataops-dev"
,localpubkey = if(islocaltolaptopq) "~/mz/awsssh.pub" else "~/software/custom/awsssh.pub"
,opts = list(loguri= "s3://mozilla-metrics/share/logs/"
,s3bucket = "mozilla-metrics/share/bootscriptsAndR"
,timeout = as.character(as.integer(1440))
,ec2attributes = "InstanceProfile='telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3'"
,configfile="https://s3-us-west-2.amazonaws.com/telemetry-spark-emr-2/configuration/configuration.json"
))
################################################################################
## PySpark Invocation
## submit code using /usr/lib/spark/bin/spark-submit review.py
################################################################################
import os,sys
print([os.environ.get('PYSPARK_PYTHON','missing'),os.environ.get('PYSPARK_DRIVER_PYTHON','missing')])
import pyspark
import py4j
from pyspark import SparkContext
from pyspark.sql import SQLContext
################################################################################
## PySpark Invocation
## submit code using /usr/lib/spark/bin/spark-submit review.py
################################################################################
import os,sys
print([os.environ.get('PYSPARK_PYTHON','missing'),os.environ.get('PYSPARK_DRIVER_PYTHON','missing')])
import pyspark
import py4j
from pyspark import SparkContext
from pyspark.sql import SQLContext
| Date | Unique # of Users Pinging Within Days Since Date | | | | | | | | | | | | | | |
|--------------+---------------------------------------------------+-----+-----+-----+-----+------+------+------+------+------+------+------+------+------+------|
| | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
|--------------+---------------------------------------------------+-----+-----+-----+-----+------+------+------+------+------+------+------+------+------+------|
| '2017-02-01' | 100 | 130 | 300 | 500 | 800 | 1700 | 2100 | 2200 | 2300 | 2350 | 2375 | 2400 | 2410 | 2450 | 2490 |
| '2017-02-15' | 40 (=2490-2450) | ... | | | | | | | | | | | | | |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
################################################################################
## PySpark Invocation
## submit code using /usr/lib/spark/bin/spark-submit review.py
################################################################################
import os,sys
print([os.environ.get('PYSPARK_PYTHON','missing'),os.environ.get('PYSPARK_DRIVER_PYTHON','missing')])
import pyspark
import py4j
from pyspark import SparkContext
from pyspark.sql import SQLContext
################################################################################
## PySpark Invocation
## submit code using /usr/lib/spark/bin/spark-submit review.py
################################################################################
import os,sys
print([os.environ.get('PYSPARK_PYTHON','missing'),os.environ.get('PYSPARK_DRIVER_PYTHON','missing')])
import pyspark
import py4j
from pyspark import SparkContext
from pyspark.sql import SQLContext
################################################################################
## PySpark Invocation
## submit code using /usr/lib/spark/bin/spark-submit review.py
################################################################################
import os,sys
print([os.environ.get('PYSPARK_PYTHON','missing'),os.environ.get('PYSPARK_DRIVER_PYTHON','missing')])
import pyspark
import py4j
from pyspark import SparkContext
from pyspark.sql import SQLContext