Skip to content

Instantly share code, notes, and snippets.

@benjamin-chan
benjamin-chan / stataMP.bat
Last active April 8, 2016 16:24
stataMP.bat
set STATATMP=E:\Share\Temp\chanb\StataMP
E:\Share\Applications\Stata14\StataMP-64.exe /UseRegistryStartin
@benjamin-chan
benjamin-chan / loadPkg.R
Last active July 1, 2016 20:48
R function to install and load packages
loadPkg <- function (pkg) {
# pkg should be an as.character() object
repos <- "http://cloud.r-project.org"
if (!require(pkg, character.only=TRUE)) {
install.packages(pkg, dependencies=TRUE, repos=repos)
}
require(pkg, character.only=TRUE)
}
# loadPkg("data.table")
# loadPkg("xtable")
@benjamin-chan
benjamin-chan / colorPalette.R
Created July 30, 2015 22:00
CHSE color palette
colorPalette <- function () {
c(rgb( 1, 67, 134, maxColorValue=255),
rgb(119, 120, 123, maxColorValue=255),
rgb(139, 184, 234, maxColorValue=255),
rgb(188, 190, 192, maxColorValue=255),
rgb( 94, 122, 162, maxColorValue=255),
rgb(223, 122, 28, maxColorValue=255))
}
@benjamin-chan
benjamin-chan / getHCPCS.R
Created September 3, 2015 18:25
Get Alpha-Numeric HCPCS from CMS
year <- 13
url <- sprintf("https://www.cms.gov/Medicare/Coding/HCPCSReleaseCodeSets/Downloads/%02danweb.zip",
year)
f <- tempfile()
download.file(url, f)
file.info(f)
unzip(f, list=TRUE)
unzip(f, exdir=tempdir())
list.files(tempdir())
f <- file.path(tempdir(), sprintf("HCPC20%02d_A-N.txt", year))
@benjamin-chan
benjamin-chan / makeMetadata.R
Last active May 23, 2016 23:20
Function to create metadata object
makeMetadata <- function(D, note=NULL) {
if (is.data.frame(D)) {
list(objectName = deparse(substitute(D)),
timeStamp = sprintf("%s", Sys.time()),
objectSize = format(object.size(D), units="auto"),
note = note,
rowCount = nrow(D),
colCount = ncol(D),
colNames = names(D),
colClasses = sapply(D, class),
@benjamin-chan
benjamin-chan / ruca.R
Last active October 14, 2015 20:01
Rural-Urban Commuting Area Codes (RUCAs)
# Source: http://depts.washington.edu/uwruca/ruca-data.php
# Code definitions: http://depts.washington.edu/uwruca/ruca-codes.php
url <- "http://depts.washington.edu/uwruca/ruca_data/2006%20Complete%20Excel%20RUCA%20file.xls.zip"
path <- tempdir()
f <- tempfile()
download.file(url, f)
unzip(f, list=TRUE)
filenames <- unzip(f, list=TRUE)[, "Name"]
isValidFile <- grep("^[0-9a-z]", filenames, ignore.case=TRUE)
unzip(f, files=filenames[isValidFile], exdir=path)
@benjamin-chan
benjamin-chan / flow.R
Last active July 7, 2016 16:46
Create a linear flow diagram
flow <- function (x, switch=NULL) {
# `x` is a character vector of items to diagram
# switch is an optional vector (logical or integer)
# specifying if the element of `x` is run (TRUE or 1) or not (FALSE or 0)
# Usage:
# > flow(c("Part 1", "Part 2", "...", "Part N"),
# + c(TRUE, FALSE, ..., TRUE))
require(DiagrammeR, quietly=TRUE)
require(devtools, quietly=TRUE)
if (packageVersion("devtools") >= "1.12.0") {
@benjamin-chan
benjamin-chan / parallelGLM.R
Last active October 22, 2015 03:22
Example of parallelization of glm()
if (!require(devtools)) {install.packages("devtools")}
library(devtools)
source_gist("https://gist.github.com/benjamin-chan/3b59313e8347fffea425")
loadPkg("doParallel")
loadPkg("data.table")
J <- 30 # This is the number of models to fit
N <- 2E5 # This is the size of the dataset
i <- rep(1:N, each=J)
D <- data.table(i, # id
library(data.table)
nBins <- 25
breaks <- seq(0, 1, length.out=nBins+1)
midpoints <- breaks[1:nBins] + (breaks[2:(nBins+1)] - breaks[1:nBins]) / 2
D <- data.table(x = runif(1e4))
D <- D[, xBinned := cut(x, breaks=breaks, labels=sprintf("%.2f", midpoints))]
D <- D[,
.(.N,
label=as.character(xBinned),
min = min(x),
@benjamin-chan
benjamin-chan / parallelGLMTuning.md
Last active November 19, 2015 23:57
Tuning parallel processing of GLM model fitting

Parallel GLM tuning

Benjamin Chan
r Sys.time()

Clear the workspace environment and load packages.

rm(list=ls())
gc()