Created
May 21, 2015 18:36
-
-
Save daynebatten/b7c19420a76bb3517634 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* target_data is the data set you want to suppress | |
| target_var is the variable you want to suppress | |
| dimensions is a pipe-separated list of dimensions to suppress | |
| cutoff is the highest value you want to suppress */ | |
| %macro opt_suppress(target_data, target_var, dimensions, cutoff); | |
| /* Count the number of dimensions we're working with */ | |
| %let num_dims = %eval(%sysfunc(count(&dimensions, |)) + 1); | |
| /* Build a macro variable array with the dimension names */ | |
| %do i = 1 %to &num_dims; | |
| %let dim&i = %sysfunc(scan(&dimensions, &i, |)); | |
| %end; | |
| /* We need constraints for every combination of dimensions you can build by leaving out one dimension at a time. | |
| This is pretty simple in a 2-dimensional dataset. */ | |
| proc sql; | |
| create table | |
| levels | |
| %do i = 1 %to &num_dims; | |
| /* If this is the first loop through, we don't need the "union all"... */ | |
| %if &i = 1 %then %do; | |
| as | |
| %end; | |
| %else %do; | |
| union all | |
| %end; | |
| select distinct | |
| /* Include every dimension but the one we're leaving out. That one will just get a placeholder. */ | |
| %do j = 1 %to &num_dims; | |
| %if &j = &i %then %do; | |
| 'dummy' as &&dim&j | |
| %end; | |
| %else %do; | |
| &&dim&j | |
| %end; | |
| /* Add a comma unless it's the last dimension in the list */ | |
| %if &j < &num_dims %then %do; | |
| , | |
| %end; | |
| %end; | |
| from | |
| &target_data | |
| %end; | |
| /* Semicolon for the end of our SQL statement */ | |
| ; | |
| quit; | |
| /* Assign numbers to our soon-to-be constraints */ | |
| data levels; | |
| set levels; | |
| con = _n_; | |
| run; | |
| /* Merge our raw data with our constraint categories to determine which constraints apply to which data points */ | |
| proc sql; | |
| create table | |
| joined | |
| as select | |
| t.*, | |
| con, | |
| case when | |
| %do i = 1 %to &num_dims; | |
| /* If it's not the first loop, we need an "or" */ | |
| %if &i > 1 %then %do; | |
| or | |
| %end; | |
| /* We'll want to evaluate on a match across any set of all dimensions but one */ | |
| /* Start by opening our paren to keep the logic right */ | |
| ( | |
| %do j = 1 %to &num_dims; | |
| /* Need an "and" unless this is the first loop */ | |
| %if &j > 1 %then %do; | |
| and | |
| %end; | |
| /* If this is the dimension we're leaving out, look for a placeholder */ | |
| /* Otherwise, look for a real match on the dimension */ | |
| %if &i = &j %then %do; | |
| l.&&dim&j = 'dummy' and t.&&dim&j ~= 'ALL' | |
| %end; | |
| %else %do; | |
| l.&&dim&j = t.&&dim&j | |
| %end; | |
| %end; | |
| /* Close our paren */ | |
| ) | |
| %end; | |
| then 1 else 0 end as included | |
| from | |
| &target_data as t | |
| left join | |
| levels as l | |
| on | |
| 1 = 1; | |
| quit; | |
| /* Sort so we can transpose */ | |
| proc sort data = joined; | |
| by | |
| %do i = 1 %to &num_dims; | |
| &&dim&i | |
| %end; | |
| ; | |
| run; | |
| /* And transpose! */ | |
| proc transpose data = joined out = constraints(drop = _NAME_) prefix = con; | |
| by | |
| %do i = 1 %to &num_dims; | |
| &&dim&i | |
| %end; | |
| ; | |
| id con; | |
| var included; | |
| run; | |
| /* Sort target data to match our constraints */ | |
| proc sort data = &target_data; | |
| by | |
| %do i = 1 %to &num_dims; | |
| &&dim&i | |
| %end; | |
| ; | |
| run; | |
| /* Create a primary suppression constraint */ | |
| data primary; | |
| set &target_data; | |
| if &target_var <= &cutoff and &target_var > 0 then lock = 1; | |
| else lock = 0; | |
| keep lock; | |
| run; | |
| /* Figure out how many rows we have... */ | |
| proc sql noprint; | |
| select | |
| count(*) | |
| into | |
| :num_rows | |
| from | |
| &target_data; | |
| quit; | |
| /* Figure out how many columns we have... */ | |
| proc sql noprint; | |
| select | |
| max(con) | |
| into | |
| :num_cons | |
| from | |
| levels; | |
| quit; | |
| /* Start optimizing */ | |
| proc optmodel; | |
| /* Number of deicisions (N of our data), and number of constraints */ | |
| set dec_set = 1..&num_rows; | |
| set con_set = 1..&num_cons; | |
| /* Values are the original data, primary will store our primary suppression constraint, constraints the grouping constraints */ | |
| number values {dec_set}; | |
| number primary {dec_set}; | |
| number constraints {con_set, dec_set}; | |
| /* Choices is our main suppressed / unsuppressed choice variable */ | |
| var choices {dec_set} binary; | |
| /* These decisions variables govern whether we suppress 0 or 2+ cells within each grouping constraint */ | |
| var choose_zero {con_set} binary; | |
| /* Read in data... */ | |
| read data constraints into [i = _n_] {j in con_set} <constraints[j, i] = col("con"||j)>; | |
| read data &target_data into [i = _n_] values[i] = col("&target_var"); | |
| read data primary into [i = _n_] primary[i] = col("lock"); | |
| /* Minimize sum of cells suppressed, with a small punishment for suppressing an unnecssary number of cells (e.g., suppressing all 0's) */ | |
| minimize target = (sum{i in dec_set} values[i] * choices[i]) + (sum{i in dec_set} choices[i] * .0001); | |
| /* Primary suppressed cells must be suppressed in solution */ | |
| con primary_con {i in dec_set}: choices[i] >= primary[i]; | |
| /* For each grouping constraint, we must have 0 cells suppressed if we're choosing to have 0 cells suppressed */ | |
| con zero_con {i in con_set}: (sum{j in dec_set} choices[j] * constraints[i, j]) - (1 - choose_zero[i]) * 100000000000 <= 0; | |
| /* For each grouping constraint, we must have 2+ cells suppressed if we're choosing to have 2+ cells supppressed */ | |
| con two_more_con {i in con_set}: (sum{j in dec_set} choices[j] * constraints[i, j]) + choose_zero[i] * 100000000000 >= 2; | |
| solve with milp; | |
| create data outdata(keep = chosen) from [i] = dec_set col("chosen") = choices[i]; | |
| quit; | |
| /* Apply decisions to original data */ | |
| data &target_data; | |
| set &target_data; | |
| set outdata; | |
| if chosen = 1 then &target_var = .; | |
| drop chosen; | |
| run; | |
| %mend; | |
| %opt_suppress(testset,num,degree|county,1); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment