Created
November 18, 2019 16:59
-
-
Save beugley/c62f4cd496c32e2684f3a98c59b79a27 to your computer and use it in GitHub Desktop.
SAS script to convert a data set to text format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*options mprint mlogic symbolgen; | |
options errorabend fullstimer compress=binary; | |
options nofmterr validvarname=any; | |
/* | |
** SAS script to convert a data set to text-delimited format. The output file | |
** will be written to either the same directory where the SAS data set resides | |
** or to a user-specified file. | |
** | |
** sysparm must contain 5 :-separated values: | |
** 1) the name of a SAS data set, including full path and extension. | |
** 2) the name of the output file; leave this null and the output will | |
** be written to the same directory where the SAS data set resides using | |
** the same name, but with an extension that reflects the delimiter, | |
** instead of .sas7bdat. | |
** 3) the number of rows in the subset; specify MAX or nothing to convert | |
** all rows. | |
** 4) the delimiter for the output file. Valid options are "comma", "tab", | |
** "ctrl-A", and "vertical-bar". Default is tab. | |
** 5) Optional unique path and name to use for temp data set. If set, then | |
** it must be unique for concurrent runs. If not set, then the temp | |
** data set will be put in the WORK dir. | |
** Examples: /prod/user/sam/lob/directory/file.sas7bdat:/tmp/output.tsv:10000:: | |
** /prod/user/sam/lob/directory/file.sas7bdat::MAX:: | |
** /prod/user/sam/lob/directory/file.sas7bdat::10000:ctrl-A: | |
*/ | |
%put sysparm: "&sysparm"; | |
%let SASFILE = %sysfunc(scan(&sysparm,1,:,m)); | |
%let OUTFILE = %sysfunc(scan(&sysparm,2,:,m)); | |
%let ROWCOUNT = %sysfunc(scan(&sysparm,3,:,m)); | |
%let DELIMITER = %sysfunc(scan(&sysparm,4,:,m)); | |
%let TEMP_DS = %sysfunc(scan(&sysparm,5,:,m)); | |
%put SASFILE: "&SASFILE"; | |
%put OUTFILE: "&OUTFILE"; | |
%put ROWCOUNT: "&ROWCOUNT"; | |
%put DELIMITER: "&DELIMITER"; | |
%put TEMP_DS: "&TEMP_DS"; | |
/* | |
** Parse the path, filename, and data set name from &SASFILE. | |
*/ | |
%let FILENAME = %sysfunc(scan(&SASFILE,-1,/)); | |
%let DSLEN = %eval(%sysfunc(find(&FILENAME,.sas7bdat))-1); | |
%let DSNAME = %sysfunc(substr(&FILENAME,1,&DSLEN)); | |
%let PATHLEN = %eval(%sysfunc(find(&SASFILE,&FILENAME))-1); | |
%let PATHNAME = %sysfunc(substr(&SASFILE,1,&PATHLEN)); | |
%put FILENAME: &FILENAME; | |
%put DSNAME: &DSNAME; | |
%put PATHNAME: &PATHNAME; | |
%macro set_delimiter; | |
%global EXTENSION; | |
%if ("&DELIMITER" = "" or "&DELIMITER" = "tab") %then | |
%do; | |
%let DELIMITER = '09'x; | |
%let EXTENSION = tsv; | |
%end; | |
%else %if ("&DELIMITER" = "ctrl-A") %then | |
%do; | |
%let DELIMITER = '01'x; | |
%let EXTENSION = txt; | |
%end; | |
%else %if ("&DELIMITER" = "vertical-bar") %then | |
%do; | |
%let DELIMITER = '|'; | |
%let EXTENSION = txt; | |
%end; | |
%else %if ("&DELIMITER" = "comma") %then | |
%do; | |
%let DELIMITER = ','; | |
%let EXTENSION = csv; | |
%end; | |
%else | |
%do; | |
%put ERROR: Invalid delimiter '&DELIMITER'; | |
%abort abend 5; | |
%end; | |
%mend set_delimiter; | |
%set_delimiter; | |
%put DELIMITER: "&DELIMITER"; | |
%put EXTENSION: "&EXTENSION"; | |
%macro set_outfile; | |
%if ("&OUTFILE" = "") %then | |
%do; | |
%let OUTFILE = &PATHNAME./&DSNAME..&EXTENSION; | |
%end; | |
%mend set_outfile; | |
%set_outfile; | |
%put OUTFILE: "&OUTFILE"; | |
%macro set_rowcount; | |
%if ("&ROWCOUNT" = "") %then | |
%do; | |
%let ROWCOUNT = MAX; | |
%end; | |
%mend set_rowcount; | |
%set_rowcount; | |
%put ROWCOUNT: "&ROWCOUNT"; | |
%macro set_temp_ds; | |
%global TEMP_DSNAME; | |
%if ("&TEMP_DS" = "") %then | |
%do; | |
%let TEMP_DSNAME = WORK.&DSNAME; | |
%end; | |
%else | |
%do; | |
%let TEMP_DSNAME = %sysfunc(scan(&TEMP_DS,-1,/)); | |
%let TEMP_PATHLEN = %eval(%sysfunc(find(&TEMP_DS,&TEMP_DSNAME))-1); | |
%let TEMP_PATHNAME = %sysfunc(substr(&TEMP_DS,1,&TEMP_PATHLEN)); | |
%let TEMP_DSNAME=WORKDIR.&TEMP_DSNAME; | |
libname WORKDIR "&TEMP_PATHNAME"; | |
%end; | |
%mend set_temp_ds; | |
%set_temp_ds; | |
%put TEMP_DSNAME: "&TEMP_DSNAME"; | |
libname SAM "&PATHNAME"; | |
/* | |
** If an index is missing or the data set is otherwise damaged, fix it before | |
** continuing. | |
** COMMENTED-OUT BECAUSE THIS FAILS IF THE SAS DATA SET WAS CREATED ON A | |
** DIFFERENT ARCHITECTURE. | |
proc datasets library=SAM; | |
repair &DSNAME; | |
quit; | |
*/ | |
/* | |
** Get number of variables, numeric-variables, and character-variables in the | |
** data set. | |
*/ | |
proc sql noprint; | |
select strip(putn(count(*),'6.')) into :NUM_COLUMNS | |
from sashelp.vcolumn | |
where libname = 'SAM' | |
and upper(memname) = upper("&DSNAME"); | |
select strip(putn(count(*),'6.')) into :NUM_NUMERIC_COLUMNS | |
from sashelp.vcolumn | |
where libname = 'SAM' | |
and upper(memname) = upper("&DSNAME") | |
and type = 'num'; | |
select strip(putn(count(*),'6.')) into :NUM_CHAR_COLUMNS | |
from sashelp.vcolumn | |
where libname = 'SAM' | |
and upper(memname) = upper("&DSNAME") | |
and type = 'char'; | |
quit; | |
%put NUM_COLUMNS: &NUM_COLUMNS; | |
%put NUM_NUMERIC_COLUMNS: &NUM_NUMERIC_COLUMNS; | |
%put NUM_CHAR_COLUMNS: &NUM_CHAR_COLUMNS; | |
/* | |
** Get lists of variables, numeric-variables, and character-variables. | |
*/ | |
proc sql noprint; | |
select name into :COLS1 - :COLS&NUM_COLUMNS | |
from sashelp.vcolumn | |
where libname = 'SAM' | |
and upper(memname) = upper("&DSNAME") | |
order by varnum; | |
select name into :NUMERIC_COLS1 - :NUMERIC_COLS&NUM_NUMERIC_COLUMNS | |
from sashelp.vcolumn | |
where libname = 'SAM' | |
and upper(memname) = upper("&DSNAME") | |
and type = 'num' | |
order by varnum; | |
select name into :CHAR_COLS1 - :CHAR_COLS&NUM_CHAR_COLUMNS | |
from sashelp.vcolumn | |
where libname = 'SAM' | |
and upper(memname) = upper("&DSNAME") | |
and type = 'char' | |
order by varnum; | |
quit; | |
/* | |
** Create a temporary version of the data set with all delimiters, | |
** carriage-returns, and line-feeds converted to spaces. | |
** Use the BEST32 format for all numeric variables to ensure no loss of | |
** precision when exporting to text-delimited format. | |
** Remove formats from all character variables to ensure that the actual value | |
** is exported. | |
*/ | |
%macro ConvertSpecialChars; | |
data &TEMP_DSNAME; | |
set SAM.&DSNAME(obs=&ROWCOUNT); | |
%do I = 1 %to &NUM_NUMERIC_COLUMNS; | |
format "&&NUMERIC_COLS&I"n BEST32.; | |
%end; | |
%do I = 1 %to &NUM_CHAR_COLUMNS; | |
format "&&CHAR_COLS&I"n; | |
"&&CHAR_COLS&I"n = translate("&&CHAR_COLS&I"n,' ',&DELIMITER.||'0a'x||'0d'x); | |
%end; | |
run; | |
%mend ConvertSpecialChars; | |
%ConvertSpecialChars; | |
/* | |
** Export the converted data set to TSV format. | |
** NOTE: This has been replaced by the %export macro because of proc-export | |
** limitations. | |
proc export data=&TEMP_DSNAME | |
outfile="&OUTFILE" | |
dbms=tab | |
replace; | |
run; | |
*/ | |
/* | |
** Export the converted data set to text-delimited format. | |
** This steps exists because proc-export has a limit of LRECL=32767 which fails | |
** on data sets with very large numbers/lengths of columns. | |
*/ | |
%macro export; | |
data _null_; | |
set &TEMP_DSNAME; | |
file "&OUTFILE" delimiter=&DELIMITER DSD DROPOVER LRECL=5000000; | |
* Print column names on the first iteration; | |
if _n_ = 1 then | |
do; | |
put | |
%do I = 1 %to &NUM_COLUMNS; | |
"&&COLS&I" | |
%if (&I < &NUM_COLUMNS) %then | |
%do; | |
&DELIMITER | |
%end; | |
%end; | |
; | |
end; | |
* Print all variables; | |
put (_all_) (+0); | |
run; | |
%mend export; | |
%export; | |
proc sql; | |
drop table &TEMP_DSNAME; | |
quit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment