Benjamin Chan
r Sys.time()
Clear the workspace environment and load packages.
rm(list=ls())
gc()| set STATATMP=E:\Share\Temp\chanb\StataMP | |
| E:\Share\Applications\Stata14\StataMP-64.exe /UseRegistryStartin |
| loadPkg <- function (pkg) { | |
| # pkg should be an as.character() object | |
| repos <- "http://cloud.r-project.org" | |
| if (!require(pkg, character.only=TRUE)) { | |
| install.packages(pkg, dependencies=TRUE, repos=repos) | |
| } | |
| require(pkg, character.only=TRUE) | |
| } | |
| # loadPkg("data.table") | |
| # loadPkg("xtable") |
| colorPalette <- function () { | |
| c(rgb( 1, 67, 134, maxColorValue=255), | |
| rgb(119, 120, 123, maxColorValue=255), | |
| rgb(139, 184, 234, maxColorValue=255), | |
| rgb(188, 190, 192, maxColorValue=255), | |
| rgb( 94, 122, 162, maxColorValue=255), | |
| rgb(223, 122, 28, maxColorValue=255)) | |
| } |
| year <- 13 | |
| url <- sprintf("https://www.cms.gov/Medicare/Coding/HCPCSReleaseCodeSets/Downloads/%02danweb.zip", | |
| year) | |
| f <- tempfile() | |
| download.file(url, f) | |
| file.info(f) | |
| unzip(f, list=TRUE) | |
| unzip(f, exdir=tempdir()) | |
| list.files(tempdir()) | |
| f <- file.path(tempdir(), sprintf("HCPC20%02d_A-N.txt", year)) |
| makeMetadata <- function(D, note=NULL) { | |
| if (is.data.frame(D)) { | |
| list(objectName = deparse(substitute(D)), | |
| timeStamp = sprintf("%s", Sys.time()), | |
| objectSize = format(object.size(D), units="auto"), | |
| note = note, | |
| rowCount = nrow(D), | |
| colCount = ncol(D), | |
| colNames = names(D), | |
| colClasses = sapply(D, class), |
| # Source: http://depts.washington.edu/uwruca/ruca-data.php | |
| # Code definitions: http://depts.washington.edu/uwruca/ruca-codes.php | |
| url <- "http://depts.washington.edu/uwruca/ruca_data/2006%20Complete%20Excel%20RUCA%20file.xls.zip" | |
| path <- tempdir() | |
| f <- tempfile() | |
| download.file(url, f) | |
| unzip(f, list=TRUE) | |
| filenames <- unzip(f, list=TRUE)[, "Name"] | |
| isValidFile <- grep("^[0-9a-z]", filenames, ignore.case=TRUE) | |
| unzip(f, files=filenames[isValidFile], exdir=path) |
| flow <- function (x, switch=NULL) { | |
| # `x` is a character vector of items to diagram | |
| # switch is an optional vector (logical or integer) | |
| # specifying if the element of `x` is run (TRUE or 1) or not (FALSE or 0) | |
| # Usage: | |
| # > flow(c("Part 1", "Part 2", "...", "Part N"), | |
| # + c(TRUE, FALSE, ..., TRUE)) | |
| require(DiagrammeR, quietly=TRUE) | |
| require(devtools, quietly=TRUE) | |
| if (packageVersion("devtools") >= "1.12.0") { |
| if (!require(devtools)) {install.packages("devtools")} | |
| library(devtools) | |
| source_gist("https://gist.github.com/benjamin-chan/3b59313e8347fffea425") | |
| loadPkg("doParallel") | |
| loadPkg("data.table") | |
| J <- 30 # This is the number of models to fit | |
| N <- 2E5 # This is the size of the dataset | |
| i <- rep(1:N, each=J) | |
| D <- data.table(i, # id |
| library(data.table) | |
| nBins <- 25 | |
| breaks <- seq(0, 1, length.out=nBins+1) | |
| midpoints <- breaks[1:nBins] + (breaks[2:(nBins+1)] - breaks[1:nBins]) / 2 | |
| D <- data.table(x = runif(1e4)) | |
| D <- D[, xBinned := cut(x, breaks=breaks, labels=sprintf("%.2f", midpoints))] | |
| D <- D[, | |
| .(.N, | |
| label=as.character(xBinned), | |
| min = min(x), |