Skip to content

Instantly share code, notes, and snippets.

@datawookie
Last active March 17, 2018 03:43
Show Gist options
  • Save datawookie/df8d37b80c2928ce3f30b02d88ad3cf4 to your computer and use it in GitHub Desktop.
Save datawookie/df8d37b80c2928ce3f30b02d88ad3cf4 to your computer and use it in GitHub Desktop.
Files for Productivity Hacks talk.
# CONFIGURATION ---------------------------------------------------------------
RATIO = 0.75 # Ratio of needle length to line spacing.
SAMPLES = 500000 # Number of times that the needle is dropped.
SEED = 13 # Random seed for repeatability.
# -----------------------------------------------------------------------------
# CONFIGURATION ---------------------------------------------------------------
RATIO = 0.75 # Ratio of needle length to line spacing.
SAMPLES = 500000 # Number of times that the needle is dropped.
SEED = 13 # Random seed for repeatability.
# -----------------------------------------------------------------------------
buffon <- function() {
# Sample angle of needle with respect to lines.
#
theta = runif(1, 0, pi / 2)
#
# Sample the location of the needle's centre. Does the needle cross a line?
#
runif(1) <= RATIO * sin(theta)
}
set.seed(SEED)
cross = replicate(SAMPLES, buffon())
#
# Estimate pi.
#
cat(sprintf("ratio = %.3f / %6d samples -> estimate = %.6f\n", RATIO, SAMPLES, 2 * SAMPLES * RATIO / sum(cross)))
DEBUG = as.logical(Sys.getenv("DEBUG", unset = "FALSE"))
print(DEBUG)
# CONFIGURATION ---------------------------------------------------------------
# 1. Get value from environment.
#
DEBUG = Sys.getenv("DEBUG")
#
# Gives "" if variable is not set.
# 2. Impose a reasonable default.
#
DEBUG = as.logical(Sys.getenv("DEBUG", unset = "FALSE"))
# SMTP credentials.
#
SMTP_USER = Sys.getenv("SMTP_USER")
SMTP_PASS = Sys.getenv("SMTP_PASS")
# -----------------------------------------------------------------------------
# CONFIGURATION ---------------------------------------------------------------
N = 1000 # Number of samples.
K = 10 # Number of folds.
DEBUG = FALSE
RECIPIENTS = c("[email protected]", "[email protected]")
SMTP_SERVER = "smtp.example.com"
SMTP_PORT = 587
SMTP_USER = "admin"
SMTP_PASS = "13*y2YQaV%p5"
# -----------------------------------------------------------------------------
# Rest of script goes here...
if (exists(RECIPIENTS)) {
if (DEBUG) message("Sending email! ✉")
# Send email.
}
# Retrieve "NAME" from environment. Use "World" if not defined.
#
name = Sys.getenv("NAME", unset = "World")
message("Hello ", name, "!")
name = "World"
message("Hello ", name, "!")
message("Hello World!")
#!/usr/bin/env Rscript
message("Hello World!")

Some files for talk about Productivity Hacks.

Setup:

Launch a t2.2xlarge instance (8 cores) on AWS.

  1. Start a tmux session. Create two horizontal panes (Ctrl-b ").
  2. sudo apt update && sudo apt install -y htop r-base
  3. wget -O buffon-needle.R http://bit.ly/2HieqWc
#!/bin/bash
sed 's/\(RATIO =\).*/\1 0.25/' buffon-needle.R | R --slave
sed 's/\(RATIO =\).*/\1 0.50/' buffon-needle.R | R --slave
sed 's/\(RATIO =\).*/\1 0.75/' buffon-needle.R | R --slave
sed 's/\(RATIO =\).*/\1 1.00/' buffon-needle.R | R --slave
sed 's/\(RATIO =\).*/\1 1.25/' buffon-needle.R | R --slave
sed 's/\(RATIO =\).*/\1 1.50/' buffon-needle.R | R --slave
#!/bin/bash
for ratio in `seq 0 0.125 1`
do
(sed "s/\(RATIO = \).*/\1${ratio}/" buffon-needle.R | R --slave &)
done
wait
#!/bin/bash
for ratio in `seq 0 0.25 1`
do
sed "s/\(RATIO =\).*/\1 ${ratio}/" buffon-needle.R | R --slave
done
#!/bin/bash
sed "s/\(RATIO =\).*/\1 0.5/" buffon-needle.R | R --slave
DEBUG = FALSE
#
# Some code goes here...
#
N = 1000 # Number of samples.
K = 10 # Number of folds.
#
# Some more code goes here...
#
RECIPIENTS = c("[email protected]", "[email protected]")
#
# Still more code goes here...
#
SMTP_SERVER = "smtp.example.com"
SMTP_PORT = 587
SMTP_USER = "admin"
SMTP_PASS = "13*y2YQaV%p5"
# 1. Load libraries
library(dplyr)
library(tidyr)
library(purrr)
# 2. Load data
#
titanic <- read.csv("titanic.csv")
# 3. Prepare data
#
titanic <- na.omit(titanic)
# 4. Do analysis
#
model <- glm(survived ~ ., data = titanic, family = binomial)
# 5. Disseminate results
#
# - Write to CSV/XLS or database
# - Send email
# 6. Clean up
#
rm(titanic)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment