Benjamin Chan
r Sys.time()
Clear the workspace environment and load packages.
rm(list=ls())
gc()
set STATATMP=E:\Share\Temp\chanb\StataMP | |
E:\Share\Applications\Stata14\StataMP-64.exe /UseRegistryStartin |
loadPkg <- function (pkg) { | |
# pkg should be an as.character() object | |
repos <- "http://cloud.r-project.org" | |
if (!require(pkg, character.only=TRUE)) { | |
install.packages(pkg, dependencies=TRUE, repos=repos) | |
} | |
require(pkg, character.only=TRUE) | |
} | |
# loadPkg("data.table") | |
# loadPkg("xtable") |
colorPalette <- function () { | |
c(rgb( 1, 67, 134, maxColorValue=255), | |
rgb(119, 120, 123, maxColorValue=255), | |
rgb(139, 184, 234, maxColorValue=255), | |
rgb(188, 190, 192, maxColorValue=255), | |
rgb( 94, 122, 162, maxColorValue=255), | |
rgb(223, 122, 28, maxColorValue=255)) | |
} |
year <- 13 | |
url <- sprintf("https://www.cms.gov/Medicare/Coding/HCPCSReleaseCodeSets/Downloads/%02danweb.zip", | |
year) | |
f <- tempfile() | |
download.file(url, f) | |
file.info(f) | |
unzip(f, list=TRUE) | |
unzip(f, exdir=tempdir()) | |
list.files(tempdir()) | |
f <- file.path(tempdir(), sprintf("HCPC20%02d_A-N.txt", year)) |
makeMetadata <- function(D, note=NULL) { | |
if (is.data.frame(D)) { | |
list(objectName = deparse(substitute(D)), | |
timeStamp = sprintf("%s", Sys.time()), | |
objectSize = format(object.size(D), units="auto"), | |
note = note, | |
rowCount = nrow(D), | |
colCount = ncol(D), | |
colNames = names(D), | |
colClasses = sapply(D, class), |
# Source: http://depts.washington.edu/uwruca/ruca-data.php | |
# Code definitions: http://depts.washington.edu/uwruca/ruca-codes.php | |
url <- "http://depts.washington.edu/uwruca/ruca_data/2006%20Complete%20Excel%20RUCA%20file.xls.zip" | |
path <- tempdir() | |
f <- tempfile() | |
download.file(url, f) | |
unzip(f, list=TRUE) | |
filenames <- unzip(f, list=TRUE)[, "Name"] | |
isValidFile <- grep("^[0-9a-z]", filenames, ignore.case=TRUE) | |
unzip(f, files=filenames[isValidFile], exdir=path) |
flow <- function (x, switch=NULL) { | |
# `x` is a character vector of items to diagram | |
# switch is an optional vector (logical or integer) | |
# specifying if the element of `x` is run (TRUE or 1) or not (FALSE or 0) | |
# Usage: | |
# > flow(c("Part 1", "Part 2", "...", "Part N"), | |
# + c(TRUE, FALSE, ..., TRUE)) | |
require(DiagrammeR, quietly=TRUE) | |
require(devtools, quietly=TRUE) | |
if (packageVersion("devtools") >= "1.12.0") { |
if (!require(devtools)) {install.packages("devtools")} | |
library(devtools) | |
source_gist("https://gist.github.com/benjamin-chan/3b59313e8347fffea425") | |
loadPkg("doParallel") | |
loadPkg("data.table") | |
J <- 30 # This is the number of models to fit | |
N <- 2E5 # This is the size of the dataset | |
i <- rep(1:N, each=J) | |
D <- data.table(i, # id |
library(data.table) | |
nBins <- 25 | |
breaks <- seq(0, 1, length.out=nBins+1) | |
midpoints <- breaks[1:nBins] + (breaks[2:(nBins+1)] - breaks[1:nBins]) / 2 | |
D <- data.table(x = runif(1e4)) | |
D <- D[, xBinned := cut(x, breaks=breaks, labels=sprintf("%.2f", midpoints))] | |
D <- D[, | |
.(.N, | |
label=as.character(xBinned), | |
min = min(x), |