Last active
February 1, 2021 14:44
-
-
Save bbdaniels/210b7206d4b2e67cceb09698068a6846 to your computer and use it in GitHub Desktop.
Selection simulations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Selection bias for normally-distributed effect with choice | |
| // Setup | |
| qui { | |
| clear all | |
| cls | |
| version 13 | |
| // Reproducibility with bit.ly/stata-random | |
| set seed 422698 // Timestamp: 2021-01-22 16:41:57 UTC | |
| // Request parameter values | |
| cap prog drop selection_setup | |
| prog def selection_setup | |
| di in smcl `"{ red : Welcome to the selection bias simulation program.}"' | |
| di in smcl `"{ red : Please enter the requested parameter values:}"' | |
| di `"Enter number of observations (integer)"' _request(n) | |
| di `"Enter true effect size (real)"' _request(t) | |
| di `"Enter number of iterations (integer)"' _request(it) | |
| end | |
| } | |
| // Request parameters | |
| selection_setup | |
| qui { | |
| // Create simulation program | |
| cap prog drop selection | |
| prog def selection , rclass // Allow statistics output | |
| qui { | |
| syntax , [pause] [exit] [clear] // Allow pausing and breaking | |
| if "`exit'" == "" preserve // Reset Stata state after finishing program run | |
| // Set parameters for simulation | |
| local n = ${n} // Number of observations | |
| local t = ${t} // True β of treatment effect | |
| local v = 1 // True standard deviation of treatment effect | |
| local s = 1 // True standard deviation of potential outcomes | |
| // Potential outcomes | |
| `clear' | |
| set obs `n' // Create simulation observations | |
| gen id = _n // ID variable | |
| lab var id "Unique ID" | |
| gen y_0 = rnormal(0,`s') // Potential outcome 0: untreated | |
| lab var y_0 "Untreated Potential Outcome" | |
| gen y_1 = y_0 + rnormal(`t',`v') // Potential outcome 1: treated | |
| lab var y_1 "Treated Potential Outcome" | |
| // Allow choice if positive returns | |
| gen return = y_1 - y_0 | |
| lab var return "Individual Gain to Education" | |
| gen education = (return > 0) // Choose education | |
| lab var education "Educated" | |
| gen y = y_0 // Observed outcome | |
| replace y = y_1 if (education == 1) | |
| lab var y "Observed Outcome" | |
| // Create randomization | |
| gen y_rand = y_0 // Observed outcome under randomization | |
| gen education_rand = (rnormal() > 0) | |
| lab var education_rand "Randomized Treatment" | |
| replace y_rand = y_1 if (education_rand == 1) | |
| lab var y_rand "Randomized Observed Outcome" | |
| // Regression time | |
| sum return // Investigate true β | |
| return scalar true = `r(mean)' | |
| reg y education // Investigate observed β | |
| mat results = r(table) // Get regression results | |
| local beta = results[1,1] | |
| local pval = results[4,1] | |
| // Push results to main memory | |
| return scalar sigb = (`pval' < 0.05) | |
| return scalar beta = `beta' | |
| return matrix results = results | |
| // Regression time (randomization) | |
| reg y_rand education_rand // Investigate observed β | |
| mat results = r(table) // Get regression results | |
| local beta_rand = results[1,1] | |
| local pval_rand = results[4,1] | |
| // Push results to main memory | |
| return scalar sigb_rand = (`pval_rand' < 0.05) | |
| return scalar beta_rand = `beta_rand' | |
| return matrix results_rand = results | |
| // Pause or here if we want to see microdata | |
| pause on | |
| `pause' // Type "BREAK" in console to quit, "q" to continue | |
| `exit' // Ends here if [exit] option called | |
| } | |
| end | |
| // Multiple simulations | |
| clear | |
| tempfile sims | |
| } | |
| simulate r(true) r(beta) r(beta_rand) r(sigb) r(sigb_rand) /// | |
| , reps(${it}) saving(`sims') seed(858488) /// Timestamp: 2021-01-22 17:25:50 UTC | |
| : selection , clear | |
| qui { | |
| // Set up graphing | |
| noi di in smcl `"{ red : Compiling results visualization...}"' _newline _newline | |
| use `sims' , clear | |
| qui su _sim_2 | |
| local m2 = r(min) | |
| qui su _sim_3 | |
| local m3 = r(min) | |
| local min = min(`m2',`m3') | |
| // Visualize | |
| tw /// | |
| (histogram _sim_2 , w(0.05) lc(none) fc(red%75) barwidth(0.04) start(`min') freq) /// | |
| (histogram _sim_2 if (_sim_4 == 1), w(0.05) lc(black) fc(none) barwidth(0.04) start(`min') freq) /// | |
| (histogram _sim_1 , w(0.05) lc(none) fc(black%50) barwidth(0.04) start(`min') freq) /// | |
| , legend(on order(0 "Effect Size With Choice:" 1 "Estimated Returns" 2 "Significant Estimates" 3 "True Returns") /// | |
| ring(1) pos(12) r(1) symxsize(small) region(lc(none))) /// | |
| nodraw saving(g1.gph , replace) | |
| // Visualize with randomization | |
| tw /// | |
| (histogram _sim_3 , w(0.05) lc(none) fc(red%75) barwidth(0.045) start(`min') freq) /// | |
| (histogram _sim_3 if (_sim_5 == 1), w(0.05) lc(black) fc(none) barwidth(0.045) start(`min') freq) /// | |
| (histogram _sim_1 , w(0.05) lc(none) fc(black%50) barwidth(0.04) start(`min') freq) /// | |
| , legend(on order(0 "Effect Size With Randomization:" 1 "Estimated Returns" 2 "Significant Estimates" 3 "True Returns") /// | |
| ring(1) pos(12) r(1) symxsize(small) region(lc(none))) /// | |
| nodraw saving(g2.gph , replace) | |
| // Graph | |
| graph combine g1.gph g2.gph , c(1) xcom | |
| !rm g1.gph g2.gph | |
| } | |
| // End of dofile |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Selection bias for normally-distributed effect with choice | |
| // Setup | |
| clear | |
| cls | |
| version 13 | |
| // Reproducibility with bit.ly/stata-random | |
| set seed 422698 // Timestamp: 2021-01-22 16:41:57 UTC | |
| // Create simulation program | |
| cap prog drop selection | |
| prog def selection , rclass // Allow statistics output | |
| syntax , [pause] [exit] [clear] // Allow pausing and breaking | |
| if "`exit'" == "" preserve // Reset Stata state after finishing program run | |
| // Set parameters for simulation | |
| local n = 1000 // Number of observations | |
| local t = 0 // True β of treatment effect | |
| local v = 1 // True standard deviation of treatment effect | |
| local s = 1 // True standard deviation of potential outcomes | |
| // Potential outcomes | |
| `clear' | |
| set obs `n' // Create simulation observations | |
| gen id = _n // ID variable | |
| lab var id "Unique ID" | |
| gen y_0 = rnormal(0,`s') // Potential outcome 0: untreated | |
| lab var y_0 "Untreated Potential Outcome" | |
| gen y_1 = y_0 + rnormal(`t',`v') // Potential outcome 1: treated | |
| lab var y_1 "Treated Potential Outcome" | |
| // Allow choice if positive returns | |
| gen return = y_1 - y_0 | |
| lab var return "Individual Gain to Education" | |
| gen education = (return > 0) // Choose education | |
| lab var education "Educated" | |
| gen y = y_0 // Observed outcome | |
| replace y = y_1 if (education == 1) | |
| lab var y "Observed Outcome" | |
| // Create randomization | |
| gen y_rand = y_0 // Observed outcome under randomization | |
| gen education_rand = (rnormal() > 0) | |
| lab var education_rand "Randomized Treatment" | |
| replace y_rand = y_1 if (education_rand == 1) | |
| lab var y_rand "Randomized Observed Outcome" | |
| // Regression time | |
| sum return // Investigate true β | |
| return scalar true = `r(mean)' | |
| reg y education // Investigate observed β | |
| mat results = r(table) // Get regression results | |
| local beta = results[1,1] | |
| // Push results to main memory | |
| return scalar beta = `beta' | |
| return matrix results = results | |
| // Regression time (randomization) | |
| reg y_rand education_rand // Investigate observed β | |
| mat results = r(table) // Get regression results | |
| local beta_rand = results[1,1] | |
| // Push results to main memory | |
| return scalar beta_rand = `beta_rand' | |
| return matrix results_rand = results | |
| // Pause or here if we want to see microdata | |
| pause on | |
| `pause' // Type "BREAK" in console to quit, "q" to continue | |
| `exit' // Ends here if [exit] option called | |
| end | |
| // Single simulation run | |
| selection | |
| matlist r(results) // View returned estimates | |
| di r(beta) // View estimated beta | |
| di r(true) // View true beta | |
| // Single simulation run with microdata | |
| selection, exit | |
| // Multiple simulations | |
| clear | |
| tempfile sims | |
| simulate r(true) r(beta) r(beta_rand) /// | |
| , reps(100) saving(`sims') seed(858488) /// Timestamp: 2021-01-22 17:25:50 UTC | |
| : selection , clear | |
| use `sims' , clear | |
| // Visualize | |
| tw /// | |
| (histogram _sim_2 , w(0.05) lc(none) fc(red) barwidth(0.04) start(-0.2) freq) /// | |
| (histogram _sim_1 , w(0.05) lc(none) fc(gs12) barwidth(0.04) start(-0.2) freq) /// | |
| , legend(on c(1) order(0 "With Choice:" 1 "Estimated Returns" 2 "True Returns") ring(0) pos(1)) | |
| // Visualize with randomization | |
| tw /// | |
| (histogram _sim_3 , w(0.05) lc(none) fc(red%75) barwidth(0.045) start(-0.2) freq) /// | |
| (histogram _sim_1 , w(0.05) lc(none) fc(black%75) barwidth(0.04) start(-0.2) freq) /// | |
| , legend(on c(1) order(0 "With Randomization:" 1 "Estimated Returns" 2 "True Returns") ring(0) pos(1)) | |
| // End of dofile |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Simulation 2: | |
| All for a more structured wage equation. | |
| Lets take a more complicated function, | |
| so that your `skills’ are √sθ. | |
| Here, θ is exogenous ability. | |
| The benefits of being educated w^e=a+√sθ and, | |
| in fact, s itself will depend on θ, | |
| so that w^e=a+√(s(θ)θ). | |
| So. people choose s to max. w^e | |
| and the costs of education are given by rs, | |
| where r is the interest rate. So, simulation does the following: | |
| Step 1: Draw θ from a distribution | |
| Step 2: Let the user define the interest rate between 0 and 1 | |
| Step 3: Assign an optimal s to everyone, which just max. w^e - rs, which implies that s*=θ/(4r^2) | |
| Step 4: Construct a regression by adding some noise | |
| Step 5: Regress w^e on s | |
| Then, modifications and questions for understanding this better could be: | |
| Q1. What would happen if s was allocated randomly? | |
| (problem here is that s is not either 0 or 1, | |
| so perhaps see if we can bound r to make things more sensible) | |
| Q2. Would increasing the sample size solve the problem? | |
| */ | |
| // Start of dofile | |
| // Setup | |
| clear | |
| cls | |
| version 13 | |
| // Reproducibility with bit.ly/stata-random | |
| set seed 155966 // Timestamp: 2021-01-22 18:25:30 UTC | |
| set obs 100 // allow x education levels | |
| gen education = _n-1 | |
| tsset education | |
| gen r = 0.05 // Interest rate (1=100%) | |
| local n = 5 // number of individuals | |
| local colors = "maroon navy dkgreen black dkorange" | |
| forv i = 1/`n' { | |
| local theta`i' = runiform() | |
| gen payoff`i' = sqrt(education*`theta`i'') | |
| gen margin`i' = d.payoff`i' | |
| gen chosen`i' = margin`i' > r | |
| gen opt`i' = f.chosen`i' != chosen`i' & education != 99 | |
| local color : word `i' of `colors' | |
| local graphs = "`graphs' " /// | |
| + "(line payoff`i' education , lc(`color'))" /// | |
| + "(line payoff`i' education if chosen`i' == 1 , lw(vthick) lc(`color'))" /// | |
| + "(scatter payoff`i' education if opt`i' == 1 , mc(black) msize(large))" /// | |
| local graphs2 = "`graphs2' " /// | |
| + "(line margin`i' education if chosen`i' == 1, lc(`color') lp(dash))" /// | |
| + "(line margin`i' education if chosen`i' == 0, lc(gs14) lp(dash))" | |
| } | |
| tempfile g1 g2 | |
| tw `graphs' /// | |
| , title("Returns") xtit(" ") /// | |
| saving(`g1'.gph, replace) nodraw | |
| tw `graphs2' if education > 10 /// | |
| , title("Marginal Returns") xtit(" ") yline(0.05) /// | |
| saving(`g2'.gph, replace) nodraw ylab(0 "0" 0.05 "R") | |
| graph combine `g1'.gph `g2'.gph , c(1) xcom ysize(7) | |
| // End of dofile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment