Skip to content

Instantly share code, notes, and snippets.

@statgeek
Created January 18, 2021 23:28
Show Gist options
  • Save statgeek/feedc3fc520cb0d2018ca2a8cab241d8 to your computer and use it in GitHub Desktop.
Save statgeek/feedc3fc520cb0d2018ca2a8cab241d8 to your computer and use it in GitHub Desktop.
SAS - drop variables with a percentage missing
%macro drop_missing_pct(input_dsn = , output_dsn=, pct = , id_vars=);
*input_dsn = input data set name;
*output_dsn = output data set name;
*pct = missing percent, variables with a percentage of missing above this value are dropped;
*id_vars = space delimited list of variables that you do not want to include in the analysis such as ID variables;
*create format for missing;
proc format;
value $ missfmt ' '="Missing" other="Not Missing";
value nmissfmt .="Missing" other="Not Missing";
run;
*Proc freq to count missing/non missing;
ods select none;
*turns off the output so the results do not get too messy;
ods table onewayfreqs=temp;
proc freq data=&INPUT_DSN. (drop = &ID_Vars);
table _all_ / missing;
format _numeric_ nmissfmt. _character_ $missfmt.;
run;
ods select all;
*Format and organize output;
data long;
length variable $32. variable_value $50.;
set temp;
Variable=scan(table, 2);
Variable_Value=strip(trim(vvaluex(variable)));
presentation=catt(frequency, " (", trim(put(percent/100, percent7.1)), ")");
keep variable variable_value frequency percent cum: presentation;
label variable='Variable' variable_value='Variable Value';
run;
*not required for display purposes;
proc sort data=long;
by variable;
run;
*select variables more than x% missing;
proc sql noprint;
select variable into :drop_var_list separated by " "
from long where variable_value = 'Missing' and percent > &pct;
quit;
*Drop variables;
data &output_dsn;
set &input_dsn;
drop &drop_var_list;
run;
*clean up;
*uncomment after testing;
/* proc sql; */
/* drop table long; */
/* drop table temp; */
/* quit; */
%mend;
***************************************************************************************************
*Example Usage
***************************************************************************************************;
data class;
set sashelp.class;
if age=14 then
call missing(height, weight, sex);
if name='Alfred' then
call missing(sex, age, height);
label age="Fancy Age Label";
run;
%drop_missing_pct(input_dsn = class, output_dsn = want, pct = 20, id_vars = Name);
*check output;
proc contents data=want;
run;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment