Created
April 16, 2015 18:42
-
-
Save josephlei/37e60921af9ff4733f27 to your computer and use it in GitHub Desktop.
SAS Macro definition for splitting one large file into smaller ones based on a key column containing discrete values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OPTIONS NOMPRINT MCOMPILENOTE=ALL NOSYMBOLGEN NOMLOGIC; | |
DATA SASUSER.COUNTY_SAWS_REF; | |
LENGTH COUNTY $13 SAWS $3; | |
INFILE DATALINES DSD DLM=","; | |
INPUT COUNTY $ SAWS $; | |
DATALINES; | |
ALAMEDA,CW | |
ALPINE,CIV | |
AMADOR,CIV | |
BUTTE,CIV | |
CALAVERAS,CIV | |
COLUSA,CIV | |
CONTRACOSTA,CW | |
DELNORTE,CIV | |
ELDORADO,CIV | |
FRESNO,CW | |
GLENN,CIV | |
HUMBOLDT,CIV | |
IMPERIAL,CIV | |
INYO,CIV | |
KERN,CIV | |
KINGS,CIV | |
LAKE,CIV | |
LASSEN,CIV | |
LOSANGELES,LDR | |
MADERA,CIV | |
MARIN,CIV | |
MARIPOSA,CIV | |
MENDOCINO,CIV | |
MERCED,CIV | |
MODOC,CIV | |
MONO,CIV | |
MONTEREY,CIV | |
NAPA,CIV | |
NEVADA,CIV | |
ORANGE,CW | |
PLACER,CW | |
PLUMAS,CIV | |
RIVERSIDE,CIV | |
SACRAMENTO,CW | |
SANBENITO,CIV | |
SANBERNARDINO,CIV | |
SANDIEGO,CW | |
SANFRANCISCO,CW | |
SANJOAQUIN,CIV | |
SANLUISOBISPO,CW | |
SANMATEO,CW | |
SANTABARBARA,CW | |
SANTACLARA,CW | |
SANTACRUZ,CW | |
SHASTA,CIV | |
SIERRA,CIV | |
SISKIYOU,CIV | |
SOLANO,CW | |
SONOMA,CW | |
STANISLAUS,CIV | |
SUTTER,CIV | |
TEHAMA,CIV | |
TRINITY,CIV | |
TULARE,CW | |
TUOLUMNE,CIV | |
VENTURA,CW | |
YOLO,CW | |
YUBA,CIV | |
; | |
RUN; | |
*CREATE MVARS; | |
DATA _NULL_; | |
SET SASUSER.COUNTY_SAWS_REF END=EOF; | |
CALL SYMPUTX("SAWSREF"||LEFT(_N_), SAWS,"G"); | |
CALL SYMPUTX("COUNTYREF"||LEFT(_N_), LEFT(COUNTY),"G"); | |
*CREATE MACRO VAR "SAWSREF1 TO SAWSREF58" THAT HOLDS CIV, CW, LDR; | |
*CREATE MACRO VARS "COUNTYREF1 TO COUNTYREF58 THAT HOLDS COMPRESSED COUNTY NAMES; | |
*QA POINT, IF COUNTREFS (_N_ VAR) > 58 DISCRETE VALUES WILL TRIGGER A WARNING; | |
IF EOF THEN | |
DO; | |
CALL SYMPUTX("COUNTREFS", _N_); | |
IF _N_ > 58 THEN | |
PUT "ERROR: TOO MANY MVARS, EXAMINE CAREFULLY!"; | |
END; | |
RUN; | |
%MACRO EXPORTCOUNTIES(SETNAME2=,OUTPATH2=G:\); | |
%DO I=1 %TO 58; | |
PROC EXPORT DATA=&&COUNTYREF&I | |
OUTFILE="&OUTPATH2\&SETNAME2._&&SAWSREF&I.._&&COUNTYREF&I...CSV" | |
DBMS=CSV REPLACE; | |
RUN; | |
%END; | |
%MEND; | |
%macro split ( | |
data = /*Name of SAS data set to split */ , | |
var = /*Name of the variable you wish to split "by" */ , | |
expected= 58 /*How many separate files are you expecting? */ , | |
setname = output /*What is the name of the output "set" */ , | |
outpath = G:\ /*Output dir, for example C:\Users\*/, | |
countysplit=N /*Are these split files to be exported by county? This option includes consortia specifiers (CW, CIV, LDR)*/, | |
export=N | |
); | |
*validate split key, eliminate zeros, null values, anything shorter than length 2; | |
data splitme errors; | |
set &data; | |
if length(&var) GE 2 then output splitme; | |
else output errors; | |
run; | |
*from "validated data," create discrete data set with unique values of split keys; | |
proc sort data=splitme (keep=&var) out=getkeysfromhere nodupkey; | |
by &var; | |
run; | |
*modify "getkeysfromhere" data set to include a compressed key; | |
data getkeysfromhere; | |
set getkeysfromhere end=eof; | |
length compressedkey $20; | |
compressedkey=upcase(compress(&var,,"nk")); | |
call symputx("splitvar"||left(_n_), compressedkey); | |
*create macro var "numsplitvar" that holds the number of distinct keys; | |
*QA point, any splitvar with >58 discrete values will trigger a warning; | |
if eof then do; | |
call symputx("numsplitvar", _n_); | |
if _n_ > &expected then put "ERROR: TOO MANY OUTPUTS, EXAMINE CAREFULLY!"; | |
end; | |
run; | |
*QA debug point; | |
%put _user_; | |
data | |
%do i=1 %to &numsplitvar; | |
&&splitvar&i | |
%end; | |
; | |
set splitme end=eof; | |
select (upcase(compress(&var,,"nk"))); | |
%do i=1 %to &numsplitvar; | |
when ("&&splitvar&i") output &&splitvar&i; | |
%end; | |
OTHERWISE ; | |
END; | |
run; | |
%if %sysfunc(upcase(&export))=Y %then %do; | |
%IF %sysfunc(upcase(&COUNTYSPLIT=Y)) %THEN %EXPORTCOUNTIES(SETNAME2=&SETNAME, OUTPATH2=&OUTPATH); | |
%ELSE %DO I=1 %TO &NUMSPLITVAR; | |
PROC EXPORT DATA=&&SPLITVAR&I | |
OUTFILE="&OUTPATH.\&SETNAME._&&SPLITVAR&I...CSV" | |
DBMS=CSV REPLACE; | |
RUN; | |
%END; | |
%end; | |
%MEND SPLIT; | |
%SPLIT(DATA=WORK.SETONE, VAR=COUNTY, EXPECTED=58, SETNAME=SETONERESULTS,COUNTYSPLIT=Y, EXPORT=Y, outpath=G:\); | |
/********************************************************************************************** | |
DOCUMENTATION AND CHANGE LOG: | |
24FEB2015, ADDED ABILITY TO USE CONSORTIA MODIFIERS VIA EXPORTCOUNTIES CODE | |
24FEB2015, SPECIAL CHAR PROBLEM RESOLVED VIA COMPRESS WITH NK FLAGS | |
08APR2015, NON COUNTY EXPORT MISSING A BACKSLASH, FIXED IN %ELSE %DO STATEMENT, OK TO GO | |
**********************************************************************************************/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment