Skip to content

Instantly share code, notes, and snippets.

@daynebatten
Created May 21, 2015 18:34
Show Gist options
  • Select an option

  • Save daynebatten/770c15294a2c8d908bc8 to your computer and use it in GitHub Desktop.

Select an option

Save daynebatten/770c15294a2c8d908bc8 to your computer and use it in GitHub Desktop.
/* We need constraints for every combination of dimensions you can build by leaving out one dimension at a time.
This is pretty simple in a 2-dimensional dataset. */
proc sql;
create table
levels
/* Constraints across the degree dimension */
as select distinct
county,
'dummy' as degree
from
testset
/* And constraints across the county dimension */
union all
select distinct
'dummy' as county,
degree
from
testset;
quit;
/* Assign numbers to our soon-to-be constraints */
data levels;
set levels;
con = _n_;
run;
/* Merge our raw data with our constraint categories to determine which constraints apply to which data points */
proc sql;
create table
joined
as select
t.*,
con,
case when
(t.county = l.county and l.degree = 'dummy' and t.degree ~= 'ALL')
or (t.degree = l.degree and l.county = 'dummy' and t.county ~= 'ALL')
then 1 else 0 end as included
from
testset as t
left join
levels as l
on
1 = 1;
quit;
/* Sort so we can transpose */
proc sort data = joined;
by county degree;
run;
/* And transpose! */
proc transpose data = joined out = constraints(drop = _NAME_) prefix = con;
by county degree;
id con;
var included;
run;
/* Sort our data to match our constraints */
proc sort data = testset;
by county degree;
run;
/* Create a primary suppression constraint */
data primary;
set testset;
if num <= 1 and num > 0 then lock = 1;
else lock = 0;
keep lock;
run;
/* Start optimizing */
proc optmodel;
/* Number of deicisions (N of our data), and number of constraints */
set dec_set = 1..12;
set con_set = 1..7;
/* Values are the original data, primary will store our primary suppression constraint, constraints the grouping constraints */
number values {dec_set};
number primary {dec_set};
number constraints {con_set, dec_set};
/* Choices is our main suppressed / unsuppressed choice variable */
var choices {dec_set} binary;
/* These decisions variables govern whether we suppress 0 or 2+ cells within each grouping constraint */
var choose_zero {con_set} binary;
/* Read in data... */
read data constraints into [i = _n_] {j in con_set} <constraints[j, i] = col("con"||j)>;
read data testset into [i = _n_] values[i] = col("num");
read data primary into [i = _n_] primary[i] = col("lock");
/* Minimize sum of cells suppressed, with a small punishment for suppressing an unnecssary number of cells (e.g., suppressing all 0's) */
minimize target = (sum{i in dec_set} values[i] * choices[i]) + (sum{i in dec_set} choices[i] * .0001);
/* Primary suppressed cells must be suppressed in solution */
con primary_con {i in dec_set}: choices[i] >= primary[i];
/* For each grouping constraint, we must have 0 cells suppressed if we're choosing to have 0 cells suppressed */
con zero_con {i in con_set}: (sum{j in dec_set} choices[j] * constraints[i, j]) - (1 - choose_zero[i]) * 100000000000 <= 0;
/* For each grouping constraint, we must have 2+ cells suppressed if we're choosing to have 2+ cells supppressed */
con two_more_con {i in con_set}: (sum{j in dec_set} choices[j] * constraints[i, j]) + choose_zero[i] * 100000000000 >= 2;
solve with milp;
create data outdata(keep = chosen) from [i] = dec_set col("chosen") = choices[i];
quit;
/* Apply our decisions to original data */
data testset;
set testset;
set outdata;
if chosen = 1 then num = .;
drop chosen;
run;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment