Skip to content

Instantly share code, notes, and snippets.

/* Sort so we can transpose */
proc sort data = joined;
by county degree;
run;
/* And transpose! */
proc transpose data = joined out = constraints(drop = _NAME_) prefix = con;
by county degree;
/* target_data is the data set you want to suppress
dimensions is a pipe-separated list of dimensions to suppress */
%macro constraints(target_data, dimensions);
/* Count the number of dimensions we're working with */
%let num_dims = %eval(%sysfunc(count(&dimensions, |)) + 1);
/* Build a macro variable array with the dimension names */
/* Minimize sum of cells suppressed, with a small punishment for suppressing an unnecssary number of cells (e.g., suppressing all 0's) */
minimize target = (sum{i in dec_set} values[i] * choices[i]) + (sum{i in dec_set} choices[i] * .0001);
/* Primary suppressed cells must be suppressed in solution */
con primary_con {i in dec_set}: choices[i] >= primary[i];
/* These decisions variables govern whether we suppress 0 or 2+ cells within each grouping constraint */
var choose_zero {con_set} binary;
/* For each grouping constraint, we must have 0 cells suppressed if we're choosing to have 0 cells suppressed */
con zero_con {i in con_set}: (sum{j in dec_set} choices[j] * constraints[i, j]) - (1 - choose_zero[i]) * 100000000000 <= 0;
/* For each grouping constraint, we must have 2+ cells suppressed if we're choosing to have 2+ cells supppressed */
/* We need constraints for every combination of dimensions you can build by leaving out one dimension at a time.
This is pretty simple in a 2-dimensional dataset. */
proc sql;
create table
levels
/* Constraints across the degree dimension */
as select distinct
/* target_data is the data set you want to suppress
target_var is the variable you want to suppress
dimensions is a pipe-separated list of dimensions to suppress
cutoff is the highest value you want to suppress */
%macro opt_suppress(target_data, target_var, dimensions, cutoff);
/* Count the number of dimensions we're working with */
%let num_dims = %eval(%sysfunc(count(&dimensions, |)) + 1);
data names_raw;
infile "C:\Users\dbatten\Desktop\Name Data.csv" dlm = ',' dsd truncover lrecl = 1000;
input first_name :$50. last_name :$50. race :$3. number :5.;
run;
proc means noprint data = names_raw;
class first_name last_name race;
var number;
/* Read in the raw names data */
data names_raw;
infile "C:\Users\dbatten\Desktop\Name Data.csv" dlm = ',' dsd truncover lrecl = 1000;
input first_name :$50. last_name :$50. race :$3. number :5.;
run;
/* Aggreate it at every possible level */
left daughter right daughter split var split point status prediction
1 2 3 3 2.45 1 0
2 0 0 0 0.00 -1 1
3 4 5 4 1.75 1 0
4 6 7 3 5.05 1 0
5 8 9 3 4.85 1 0
6 10 11 4 1.65 1 0