Skip to content

Instantly share code, notes, and snippets.

@daynebatten
Created May 21, 2015 18:36
Show Gist options
  • Select an option

  • Save daynebatten/b7c19420a76bb3517634 to your computer and use it in GitHub Desktop.

Select an option

Save daynebatten/b7c19420a76bb3517634 to your computer and use it in GitHub Desktop.
/* target_data is the data set you want to suppress
target_var is the variable you want to suppress
dimensions is a pipe-separated list of dimensions to suppress
cutoff is the highest value you want to suppress */
%macro opt_suppress(target_data, target_var, dimensions, cutoff);
/* Count the number of dimensions we're working with */
%let num_dims = %eval(%sysfunc(count(&dimensions, |)) + 1);
/* Build a macro variable array with the dimension names */
%do i = 1 %to &num_dims;
%let dim&i = %sysfunc(scan(&dimensions, &i, |));
%end;
/* We need constraints for every combination of dimensions you can build by leaving out one dimension at a time.
This is pretty simple in a 2-dimensional dataset. */
proc sql;
create table
levels
%do i = 1 %to &num_dims;
/* If this is the first loop through, we don't need the "union all"... */
%if &i = 1 %then %do;
as
%end;
%else %do;
union all
%end;
select distinct
/* Include every dimension but the one we're leaving out. That one will just get a placeholder. */
%do j = 1 %to &num_dims;
%if &j = &i %then %do;
'dummy' as &&dim&j
%end;
%else %do;
&&dim&j
%end;
/* Add a comma unless it's the last dimension in the list */
%if &j < &num_dims %then %do;
,
%end;
%end;
from
&target_data
%end;
/* Semicolon for the end of our SQL statement */
;
quit;
/* Assign numbers to our soon-to-be constraints */
data levels;
set levels;
con = _n_;
run;
/* Merge our raw data with our constraint categories to determine which constraints apply to which data points */
proc sql;
create table
joined
as select
t.*,
con,
case when
%do i = 1 %to &num_dims;
/* If it's not the first loop, we need an "or" */
%if &i > 1 %then %do;
or
%end;
/* We'll want to evaluate on a match across any set of all dimensions but one */
/* Start by opening our paren to keep the logic right */
(
%do j = 1 %to &num_dims;
/* Need an "and" unless this is the first loop */
%if &j > 1 %then %do;
and
%end;
/* If this is the dimension we're leaving out, look for a placeholder */
/* Otherwise, look for a real match on the dimension */
%if &i = &j %then %do;
l.&&dim&j = 'dummy' and t.&&dim&j ~= 'ALL'
%end;
%else %do;
l.&&dim&j = t.&&dim&j
%end;
%end;
/* Close our paren */
)
%end;
then 1 else 0 end as included
from
&target_data as t
left join
levels as l
on
1 = 1;
quit;
/* Sort so we can transpose */
proc sort data = joined;
by
%do i = 1 %to &num_dims;
&&dim&i
%end;
;
run;
/* And transpose! */
proc transpose data = joined out = constraints(drop = _NAME_) prefix = con;
by
%do i = 1 %to &num_dims;
&&dim&i
%end;
;
id con;
var included;
run;
/* Sort target data to match our constraints */
proc sort data = &target_data;
by
%do i = 1 %to &num_dims;
&&dim&i
%end;
;
run;
/* Create a primary suppression constraint */
data primary;
set &target_data;
if &target_var <= &cutoff and &target_var > 0 then lock = 1;
else lock = 0;
keep lock;
run;
/* Figure out how many rows we have... */
proc sql noprint;
select
count(*)
into
:num_rows
from
&target_data;
quit;
/* Figure out how many columns we have... */
proc sql noprint;
select
max(con)
into
:num_cons
from
levels;
quit;
/* Start optimizing */
proc optmodel;
/* Number of deicisions (N of our data), and number of constraints */
set dec_set = 1..&num_rows;
set con_set = 1..&num_cons;
/* Values are the original data, primary will store our primary suppression constraint, constraints the grouping constraints */
number values {dec_set};
number primary {dec_set};
number constraints {con_set, dec_set};
/* Choices is our main suppressed / unsuppressed choice variable */
var choices {dec_set} binary;
/* These decisions variables govern whether we suppress 0 or 2+ cells within each grouping constraint */
var choose_zero {con_set} binary;
/* Read in data... */
read data constraints into [i = _n_] {j in con_set} <constraints[j, i] = col("con"||j)>;
read data &target_data into [i = _n_] values[i] = col("&target_var");
read data primary into [i = _n_] primary[i] = col("lock");
/* Minimize sum of cells suppressed, with a small punishment for suppressing an unnecssary number of cells (e.g., suppressing all 0's) */
minimize target = (sum{i in dec_set} values[i] * choices[i]) + (sum{i in dec_set} choices[i] * .0001);
/* Primary suppressed cells must be suppressed in solution */
con primary_con {i in dec_set}: choices[i] >= primary[i];
/* For each grouping constraint, we must have 0 cells suppressed if we're choosing to have 0 cells suppressed */
con zero_con {i in con_set}: (sum{j in dec_set} choices[j] * constraints[i, j]) - (1 - choose_zero[i]) * 100000000000 <= 0;
/* For each grouping constraint, we must have 2+ cells suppressed if we're choosing to have 2+ cells supppressed */
con two_more_con {i in con_set}: (sum{j in dec_set} choices[j] * constraints[i, j]) + choose_zero[i] * 100000000000 >= 2;
solve with milp;
create data outdata(keep = chosen) from [i] = dec_set col("chosen") = choices[i];
quit;
/* Apply decisions to original data */
data &target_data;
set &target_data;
set outdata;
if chosen = 1 then &target_var = .;
drop chosen;
run;
%mend;
%opt_suppress(testset,num,degree|county,1);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment