Benjamin Chan benjamin-chan

benjamin-chan / stataMP.bat

Last active April 8, 2016 16:24

stataMP.bat

	set STATATMP=E:\Share\Temp\chanb\StataMP
	E:\Share\Applications\Stata14\StataMP-64.exe /UseRegistryStartin

benjamin-chan / loadPkg.R

Last active July 1, 2016 20:48

R function to install and load packages

	loadPkg <- function (pkg) {
	# pkg should be an as.character() object
	repos <- "http://cloud.r-project.org"
	if (!require(pkg, character.only=TRUE)) {
	install.packages(pkg, dependencies=TRUE, repos=repos)
	}
	require(pkg, character.only=TRUE)
	}
	# loadPkg("data.table")
	# loadPkg("xtable")

benjamin-chan / colorPalette.R

Created July 30, 2015 22:00

CHSE color palette

benjamin-chan / getHCPCS.R

Created September 3, 2015 18:25

Get Alpha-Numeric HCPCS from CMS

	year <- 13
	url <- sprintf("https://www.cms.gov/Medicare/Coding/HCPCSReleaseCodeSets/Downloads/%02danweb.zip",
	year)
	f <- tempfile()
	download.file(url, f)
	file.info(f)
	unzip(f, list=TRUE)
	unzip(f, exdir=tempdir())
	list.files(tempdir())
	f <- file.path(tempdir(), sprintf("HCPC20%02d_A-N.txt", year))

benjamin-chan / makeMetadata.R

Last active May 23, 2016 23:20

Function to create metadata object

	makeMetadata <- function(D, note=NULL) {
	if (is.data.frame(D)) {
	list(objectName = deparse(substitute(D)),
	timeStamp = sprintf("%s", Sys.time()),
	objectSize = format(object.size(D), units="auto"),
	note = note,
	rowCount = nrow(D),
	colCount = ncol(D),
	colNames = names(D),
	colClasses = sapply(D, class),

benjamin-chan / ruca.R

Last active October 14, 2015 20:01

Rural-Urban Commuting Area Codes (RUCAs)

	# Source: http://depts.washington.edu/uwruca/ruca-data.php
	# Code definitions: http://depts.washington.edu/uwruca/ruca-codes.php
	url <- "http://depts.washington.edu/uwruca/ruca_data/2006%20Complete%20Excel%20RUCA%20file.xls.zip"
	path <- tempdir()
	f <- tempfile()
	download.file(url, f)
	unzip(f, list=TRUE)
	filenames <- unzip(f, list=TRUE)[, "Name"]
	isValidFile <- grep("^[0-9a-z]", filenames, ignore.case=TRUE)
	unzip(f, files=filenames[isValidFile], exdir=path)

benjamin-chan / flow.R

Last active July 7, 2016 16:46

Create a linear flow diagram

	flow <- function (x, switch=NULL) {
	# `x` is a character vector of items to diagram
	# switch is an optional vector (logical or integer)
	# specifying if the element of `x` is run (TRUE or 1) or not (FALSE or 0)
	# Usage:
	# > flow(c("Part 1", "Part 2", "...", "Part N"),
	# + c(TRUE, FALSE, ..., TRUE))
	require(DiagrammeR, quietly=TRUE)
	require(devtools, quietly=TRUE)
	if (packageVersion("devtools") >= "1.12.0") {

benjamin-chan / parallelGLM.R

Last active October 22, 2015 03:22

Example of parallelization of glm()

	if (!require(devtools)) {install.packages("devtools")}
	library(devtools)
	source_gist("https://gist.github.com/benjamin-chan/3b59313e8347fffea425")
	loadPkg("doParallel")
	loadPkg("data.table")

	J <- 30 # This is the number of models to fit
	N <- 2E5 # This is the size of the dataset
	i <- rep(1:N, each=J)
	D <- data.table(i, # id

benjamin-chan / binning.R

Last active October 21, 2015 02:15

	library(data.table)
	nBins <- 25
	breaks <- seq(0, 1, length.out=nBins+1)
	midpoints <- breaks[1:nBins] + (breaks[2:(nBins+1)] - breaks[1:nBins]) / 2
	D <- data.table(x = runif(1e4))
	D <- D[, xBinned := cut(x, breaks=breaks, labels=sprintf("%.2f", midpoints))]
	D <- D[,
	.(.N,
	label=as.character(xBinned),
	min = min(x),

benjamin-chan / parallelGLMTuning.md

Last active November 19, 2015 23:57

Tuning parallel processing of GLM model fitting

Parallel GLM tuning

Benjamin Chan
r Sys.time()

Clear the workspace environment and load packages.

rm(list=ls())
gc()