This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | functions { | |
| // lower bound is a, upper bound is b, rv is x, mean is mu, sd is sigma | |
| vector xi(vector x, real mu, real sigma) { | |
| return((x - mu)./sigma); | |
| } | |
| real alpha(real a, real mu, real sigma) { | |
| real out; | |
| out = (a==negative_infinity())? negative_infinity(): (a - mu)/sigma; | |
| return(out); | |
| } | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | functions { | |
| real binormal_cdf(real z1, real z2, real rho) { | |
| if (z1 != 0 || z2 != 0) { | |
| real denom = fabs(rho) < 1.0 ? sqrt((1 + rho) * (1 - rho)) : not_a_number(); | |
| real a1 = (z2 / z1 - rho) / denom; | |
| real a2 = (z1 / z2 - rho) / denom; | |
| real product = z1 * z2; | |
| real delta = product < 0 || (product == 0 && (z1 + z2) < 0); | |
| return 0.5 * (Phi(z1) + Phi(z2) - delta) - owens_t(z1, a1) - owens_t(z2, a2); | |
| } | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | library(rstan); library(dplyr); library(ggplot2); library(reshape2) | |
| options(mc.cores = parallel::detectCores()) | |
| set.seed(49) | |
| # Observations | |
| N <- 2000 | |
| # Draw errors | |
| rho <- 0.5 | |
| Sigma <- matrix(c(1, rho, rho, 1), 2, 2) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | data { | |
| int N; | |
| int PX; // dimension of exogenous covariates | |
| int PN; // dimension of endogenous covariates | |
| int PZ; // dimension of instruments | |
| matrix[N, PX] X_exog; // exogenous covariates | |
| matrix[N, PN] X_endog; // engogenous covariates | |
| matrix[N, PZ] Z; // instruments | |
| vector[N] Y_outcome; // outcome variable | |
| int<lower=0,upper=1> run_estimation; // simulate (0) or estimate (1) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | data { | |
| int N; // number of rows | |
| int T; // number of inidvidual-choice sets/task combinations | |
| int I; // number of Individuals | |
| int P; // number of covariates | |
| vector<lower = 0, upper = 1>[N] choice; // binary indicator for choice | |
| matrix[N, P] X; // product attributes | |
| int task[T]; // index for tasks | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | data { | |
| int N; // number of rows | |
| int T; // number of inidvidual-choice sets/task combinations | |
| int I; // number of Individuals | |
| int P; // number of covariates | |
| vector<lower = 0, upper = 1>[N] choice; // binary indicator for choice | |
| matrix[N, P] X; // product attributes | |
| int task[T]; // index for tasks | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | library(dplyr) | |
| # Let's make some fake data! | |
| # These will contain true labels and 500 binary features | |
| # We'll follow the general approach here: http://modernstatisticalworkflow.blogspot.com/2018/03/1000-labels-and-4500-observations-have.html | |
| # but rather than using a penalized likelihood estimator for the label-activity probabilities, we'll approximate | |
| # it by taking the average of the frequency of activity within label and 0.5. | |
| N <- 100000 | |
| I <- 30000 | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # This illustrates why Neal's Funnel really messes with us when we're trying | |
| # to estimate random effects models using maximum likelihood. Ie. why we need | |
| # approximations with lme4, INLA, or need to go full Bayes. The mode is an | |
| # awful one. | |
| # Let's simulate some data from a really simple model with 10 | |
| # random intercepts whose variance you want to estimate | |
| N <- 1000 | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # This gist uses the classifier defined in this post: http://modernstatisticalworkflow.blogspot.com/2018/03/1000-labels-and-4500-observations-have.html | |
| # applied with this approximation: https://gist.github.com/khakieconomics/0325c054b1499d5037a1de5d1014645a | |
| # to Kaggle's credit card fraud data--a fun rare-case problem. In a cross-validation exercise with 50 randomly selected hold-out | |
| # sets, it appears perform similarly (or perhaps better) than others' attempts using random forests and neural networks. | |
| # The upside of course is that it estimates/generates predictions in a couple of seconds. | |
| library(tidyverse); library(reticulate); library(pROC) | |
| # Download some data from Kaggle | |
| system("kaggle datasets download -d mlg-ulb/creditcardfraud -p ~/Documents/kagglestuff") | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | library(rstanarm); library(tidyverse) | |
| options(mc.cores = parallel::detectCores()) | |
| set.seed(42) | |
| data("radon") | |
| head(treatment_sample) | |
| # Some levels have no variance in the outcomes, making likelihood estimates impossible | |
| # Adding a tiny bit of noise fixes the problem | |
| radon$log_uranium <- rnorm(nrow(radon), radon$log_uranium, 0.05) |