Created
January 18, 2021 23:28
-
-
Save statgeek/feedc3fc520cb0d2018ca2a8cab241d8 to your computer and use it in GitHub Desktop.
SAS - drop variables with a percentage missing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%macro drop_missing_pct(input_dsn = , output_dsn=, pct = , id_vars=); | |
*input_dsn = input data set name; | |
*output_dsn = output data set name; | |
*pct = missing percent, variables with a percentage of missing above this value are dropped; | |
*id_vars = space delimited list of variables that you do not want to include in the analysis such as ID variables; | |
*create format for missing; | |
proc format; | |
value $ missfmt ' '="Missing" other="Not Missing"; | |
value nmissfmt .="Missing" other="Not Missing"; | |
run; | |
*Proc freq to count missing/non missing; | |
ods select none; | |
*turns off the output so the results do not get too messy; | |
ods table onewayfreqs=temp; | |
proc freq data=&INPUT_DSN. (drop = &ID_Vars); | |
table _all_ / missing; | |
format _numeric_ nmissfmt. _character_ $missfmt.; | |
run; | |
ods select all; | |
*Format and organize output; | |
data long; | |
length variable $32. variable_value $50.; | |
set temp; | |
Variable=scan(table, 2); | |
Variable_Value=strip(trim(vvaluex(variable))); | |
presentation=catt(frequency, " (", trim(put(percent/100, percent7.1)), ")"); | |
keep variable variable_value frequency percent cum: presentation; | |
label variable='Variable' variable_value='Variable Value'; | |
run; | |
*not required for display purposes; | |
proc sort data=long; | |
by variable; | |
run; | |
*select variables more than x% missing; | |
proc sql noprint; | |
select variable into :drop_var_list separated by " " | |
from long where variable_value = 'Missing' and percent > &pct; | |
quit; | |
*Drop variables; | |
data &output_dsn; | |
set &input_dsn; | |
drop &drop_var_list; | |
run; | |
*clean up; | |
*uncomment after testing; | |
/* proc sql; */ | |
/* drop table long; */ | |
/* drop table temp; */ | |
/* quit; */ | |
%mend; | |
*************************************************************************************************** | |
*Example Usage | |
***************************************************************************************************; | |
data class; | |
set sashelp.class; | |
if age=14 then | |
call missing(height, weight, sex); | |
if name='Alfred' then | |
call missing(sex, age, height); | |
label age="Fancy Age Label"; | |
run; | |
%drop_missing_pct(input_dsn = class, output_dsn = want, pct = 20, id_vars = Name); | |
*check output; | |
proc contents data=want; | |
run; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment