purtysql.sas
%macro purtySQL (data=, var=, out=, file=, outfile=);
%*---------------------------------------------------------------------------;
%* Richard A. DeVenezia, 3/1999;
%*---------------------------------------------------------------------------;
%* Beautify SQL code that is stored in a data set or a file;
%* This code assumes the SQL is correct but badly formatted;
%* unmatched quotes, escaped quote characters and tokens > 200 characters
%* will return undetermined but probably wrong results;
%* data - name of data set containing SQL source;
%* var - name of variable in data set containing SQL source;
%* out - name of data set to contain pretty SQL;
%* file - name of file to read SQL from;
%* outfile - name of file to write pretty SQL to;
%local this;
%let this=purtySQL;
%*---------------------------------------------------------------------------;
%* argument check;
%if ((%superq(data) ne ) and (%superq(file) ne )) %then %do;
%put &this: Please specify only data= or file=. ;
%goto ByeBye;
%end;
%if ((%superq(file) ne ) and (%superq(var) ne )) %then %do;
%put &this: Note: var= option is ignored when file= specified.;
%end;
%if ((%superq(outfile) ne ) and (%superq(out) eq )) %then %do;
%let out=WORK.PURTYSQL;
%end;
%if ((%superq(data)=) and (%superq(file)=))
or ((%superq(var)^=) and (%superq(data)=))
or ((%superq(out)=) and (%superq(outfile)=))
%then %do;
%put &this: All arguments (data= | file=, var= and out= | outfile=) must be specified. ;
%goto ByeBye;
%end;
%*---------------------------------------------------------------------------;
%* read in a file if requested;
%if (%superq(file) ne ) %then %do;
filename _Source_ "&file";
%if NOT %sysfunc (FEXIST (_SOURCE_)) %then %do;
%put &this: File &file does not exist;
%goto ByeBye;
%end;
%let data = __sql__;
%let var = text;
data &data (keep=text);
length text $200;
infile _SOURCE_ length=len lrecl=5000;
input @1 dummy $1 @;
at = 1;
do while (len > 0) ;
if len > 200 then textL = 200; else textL = len;
input @at text $varying. textL @;
output;
len = len - 200;
at = at + 200;
end;
input;
text = '';
output;
run;
filename _SOURCE_;
%if &SYSERR ne 0 %then %goto ByeBye;
%end;
%if NOT %sysfunc (EXIST (&data)) %then %goto ByeBye;
* options nomprint;
%*---------------------------------------------------------------------------;
%* determine length of variable;
%let dsid = %sysfunc (open (&data));
%let mlength = %sysfunc (varlen (&dsid, %sysfunc (varnum (&dsid, &var))));
%let dsid = %sysfunc (close (&dsid));
%*---------------------------------------------------------------------------;
%* Incoming data contains the SQL query and can wrap lines;
%* break the query into tokens;
%* assume no token is more than 200 characters;
%* Note: query in a dataset can have tokens split across rows,
%* a query in a file assumes NO line breaks within a token
%* (and is enforced by the file reader by placing a blank row between each
%* line read from the file);
data &out (label="One row per token" where=(token ne '') keep=token);
array line [5000] $200 _temporary_;
do while (not end);
set &data(rename=(&var=text)) end=end;
n ++ 1;
text = translate (text, ' ', '090A0D1A'x);
line [n] = text;
end;
length L token $200 _ch_ $1;
retain token '';
* length ul $200;
_p_ = 1;
_i_ = 1;
nC = n * &mlength;
inQ = ' ';
do while (_p_ < nC);
link L;
done = 0;
do while ((_p_+_i_ <= nC) and (^done));
done = (_i_ > 200) ;
if done then do;
link L;
done = 0;
end;
if ^done then
done = (substr (L, _i_, 1) ^= ' ') ;
if ^done then _i_ ++ 1;
end;
link L;
_i0_ = _i_;
_ch_ = substr (L, _i_, 1);
if _ch_ = '"' then link DQ; else
if _ch_ = "'" then link SQ; else
link TOKEN;
end;
STOP;
TOKEN:
do while (_p_+_i_ <= nC and _ch_ not in (' ' ','));
_i_ ++ 1;
_ch_ = substr (L, _i_, 1);
if _ch_ = '"' then do;
token = substr (L, _i0_, _i_-_i0_);
output;
_i0_ = _i_;
link DQ;
_i0_ = _i_;
end;
else
if _ch_ = "'" then do;
token = substr (L, _i0_, _i_-_i0_);
output;
_i0_ = _i_;
link SQ;
_i0_ = _i_;
end;
end;
token = substr (L, _i0_, _i_-_i0_+1);
output;
if (substr (L, _i_, 1) = ',') then
_i_ ++ 1;
return;
DQ:
_i_ ++ 1;
do while (_p_+_i_ <= nC and substr (L, _i_, 1) ^= '"');
_i_ ++ 1;
end;
token = substr (L, _i0_, _i_-_i0_+1);
output;
_i_ ++ 1;
return;
SQ:
_i_ ++ 1;
do while (_p_+_i_ <= nC and substr (L, _i_, 1) ^= "'");
_i_ ++ 1;
end;
token = substr (L, _i0_, _i_-_i0_+1);
output;
_i_ ++ 1;
return;
L:
_p_ ++ ( _i_ - 1) ;
_r_ = INT ((_p_ - 1) / &mlength) + 1;
_c_ = MOD ((_p_ - 1) , &mlength) + 1;
L = substr (line [_r_], _c_);
x = &mlength - ( _c_ - 1 ) + 1;
xi = 1;
do while (x < 200);
substr (L, x) = line [_r_ + xi ];
x ++ &mlength;
end;
_i_ = 1;
*uL = '~' || L;
return;
run;
%*---------------------------------------------------------------------------;
%* reassemble the tokens in readable SQL;
data &out; %* / debug;
set &out end=end;
length holdtext $200;
retain in indent holdtext infunc;
if token in ('CREATE' 'SELECT' 'FROM' 'WHERE' 'HAVING' 'GROUP' 'ORDER') then do;
if holdtext ne '' then do;
if indent then
textout = repeat (' ', indent-1) || holdtext;
else
textout = holdtext;
output;
holdtext = '';
end;
in = token;
indent = 0;
inc = 2;
textout = token;
output;
end;
else
if in in ('SELECT' 'FROM' 'GROUP' 'ORDER') then do;
if not infunc then infunc = index (token, '(');
if index (token, ',') = 0 then do;
holdtext = trim(holdtext) || ' ' || token;
if infunc and index (token, ')') then
infunc = 0;
end;
else
if infunc then do;
holdtext = trim(holdtext) || ' ' || token;
if index (token, ')') then
infunc = 0;
end;
if index (token, ',') and not infunc then do;
textout = trim (holdtext) || ' ' || token;
if indent then
textout = repeat (' ', indent-1) || textout;
output;
holdtext = '';
end;
end;
else
if in in ('WHERE' 'HAVING') then do;
if token = 'AND' then do;
textout = holdtext;
output;
textout = token;
output;
holdtext = '';
end;
else do;
holdtext = trim(holdtext) || ' ' || token;
end;
end;
else do;
if indent then
textout = repeat (' ', indent-1) || token;
else
textout = token;
output;
end;
indent+inc;
if end then do;
if holdtext ne '' then do;
textout=holdtext;
output;
end;
textout=token;
output;
end;
keep textout;
run;
%if (%superq(outfile) ne ) %then %do;
data _null_;
set &out;
file "&outfile";
len = length (textout);
put textout $varying. len;
run;
%end;
%ByeBye:
%mend;