Download purtysql.sas purtysql.sasSubmit a comment

%macro purtySQL (data=, var=, out=, file=, outfile=);

  %*---------------------------------------------------------------------------;
  %* Richard A. DeVenezia, 3/1999;
  %*---------------------------------------------------------------------------;
  %* Beautify SQL code that is stored in a data set or a file;
  %* This code assumes the SQL is correct but badly formatted;
  %* unmatched quotes, escaped quote characters and tokens > 200 characters
  %* will return undetermined but probably wrong results;

  %* data - name of data set containing SQL source;
  %* var  - name of variable in data set containing SQL source;
  %* out  - name of data set to contain pretty SQL;
  %* file - name of file to read SQL from;
  %* outfile - name of file to write pretty SQL to;


  %local this;
  %let this=purtySQL;

  %*---------------------------------------------------------------------------;
  %* argument check;

  %if ((%superq(data) ne ) and (%superq(file) ne )) %then %do;
    %put &this: Please specify only data= or file=. ;
    %goto ByeBye;
  %end;

  %if ((%superq(file) ne ) and (%superq(var) ne )) %then %do;
    %put &this: Note: var= option is ignored when file= specified.;
  %end;

  %if ((%superq(outfile) ne ) and (%superq(out) eq )) %then %do;
    %let out=WORK.PURTYSQL;
  %end;

  %if  ((%superq(data)=) and (%superq(file)=))
    or ((%superq(var)^=) and (%superq(data)=))
    or ((%superq(out)=)  and (%superq(outfile)=))
  %then %do;
    %put &this: All arguments (data= | file=, var= and out= | outfile=) must be specified. ;
    %goto ByeBye;
  %end;

  %*---------------------------------------------------------------------------;
  %* read in a file if requested;

  %if (%superq(file) ne ) %then %do;

    filename _Source_ "&file";

    %if NOT %sysfunc (FEXIST (_SOURCE_)) %then %do;
      %put &this: File &file does not exist;
      %goto ByeBye;
    %end;

    %let data = __sql__;
    %let var = text;
    data &data (keep=text);
      length text $200;
      infile _SOURCE_ length=len lrecl=5000;

      input @1 dummy $1 @;

      at = 1;

      do while (len > 0) ;
        if len > 200 then textL = 200; else textL = len;

        input @at text $varying. textL @;

        output;

        len = len - 200;
        at  = at  + 200;
      end;

      input;

      text = '';
      output;
   run;

    filename _SOURCE_;

    %if &SYSERR ne 0 %then %goto ByeBye;
  %end;

  %if NOT %sysfunc (EXIST (&data)) %then %goto ByeBye;

* options nomprint;

  %*---------------------------------------------------------------------------;
  %* determine length of variable;

  %let dsid = %sysfunc (open (&data));
  %let mlength = %sysfunc (varlen (&dsid, %sysfunc (varnum (&dsid, &var))));
  %let dsid = %sysfunc (close (&dsid));

  %*---------------------------------------------------------------------------;
  %* Incoming data contains the SQL query and can wrap lines;
  %* break the query into tokens;
  %* assume no token is more than 200 characters;
  %* Note: query in a dataset can have tokens split across rows,
  %* a query in a file assumes NO line breaks within a token
  %* (and is enforced by the file reader by placing a blank row between each
  %*  line read from the file);

  data &out (label="One row per token" where=(token ne '') keep=token);

    array line [5000] $200 _temporary_;

    do while (not end);
      set &data(rename=(&var=text)) end=end;
      n ++ 1;
      text = translate (text, '    ', '090A0D1A'x);
      line [n] = text;
    end;

    length L token $200 _ch_ $1;
    retain token '';
*   length ul $200;

    _p_ = 1;
    _i_ = 1;
    nC  = n * &mlength;
    inQ = ' ';

    do while (_p_ < nC);

      link L;

      done = 0;
      do while ((_p_+_i_ <= nC) and (^done));
        done = (_i_ > 200) ;
        if done then do;
          link L;
          done = 0;
        end;

        if ^done then
          done = (substr (L, _i_, 1) ^= ' ') ;

        if ^done then _i_ ++ 1;
      end;

      link L;

      _i0_ = _i_;
      _ch_ = substr (L, _i_, 1);

      if _ch_ = '"' then link DQ; else
      if _ch_ = "'" then link SQ; else
                         link TOKEN;
    end;

    STOP;

TOKEN:
    do while (_p_+_i_ <= nC and _ch_ not in (' ' ','));
      _i_ ++ 1;
      _ch_ = substr (L, _i_, 1);
      if _ch_ = '"' then do;
        token = substr (L, _i0_, _i_-_i0_);
        output;

        _i0_ = _i_;
        link DQ;
        _i0_ = _i_;
      end;
      else
      if _ch_ = "'" then do;
        token = substr (L, _i0_, _i_-_i0_);
        output;

        _i0_ = _i_;
        link SQ;
        _i0_ = _i_;
      end;
    end;

    token = substr (L, _i0_, _i_-_i0_+1);
    output;

    if (substr (L, _i_, 1) = ',') then
      _i_ ++ 1;
return;

DQ:
    _i_ ++ 1;
    do while (_p_+_i_ <= nC and substr (L, _i_, 1) ^= '"');
      _i_ ++ 1;
    end;

    token = substr (L, _i0_, _i_-_i0_+1);
    output;

    _i_ ++ 1;
return;

SQ:
    _i_ ++ 1;
    do while (_p_+_i_ <= nC and substr (L, _i_, 1) ^= "'");
      _i_ ++ 1;
    end;

    token = substr (L, _i0_, _i_-_i0_+1);
    output;

    _i_ ++ 1;
return;

L:
    _p_ ++ ( _i_ - 1) ;

    _r_ = INT ((_p_ - 1) / &mlength) + 1;
    _c_ = MOD ((_p_ - 1) , &mlength) + 1;

    L = substr (line [_r_], _c_);
    x = &mlength - ( _c_ - 1 ) + 1;
    xi = 1;
    do while (x < 200);
      substr (L, x) = line [_r_ + xi ];
      x ++ &mlength;
    end;
    _i_ = 1;

*uL = '~' || L;

return;

  run;

  %*---------------------------------------------------------------------------;
  %* reassemble the tokens in readable SQL;

  data &out; %* / debug;
    set &out end=end;
    length holdtext $200;
    retain in indent holdtext infunc;

    if token in ('CREATE' 'SELECT' 'FROM' 'WHERE' 'HAVING' 'GROUP' 'ORDER') then do;
      if holdtext ne '' then do;
        if indent then
          textout = repeat (' ', indent-1) || holdtext;
        else
          textout = holdtext;

        output;

        holdtext = '';
      end;

      in = token;
      indent = 0;
      inc = 2;

      textout = token;

      output;
    end;
    else
    if in in ('SELECT' 'FROM' 'GROUP' 'ORDER') then do;
      if not infunc then infunc = index (token, '(');

      if index (token, ',') = 0 then do;
        holdtext = trim(holdtext) || ' ' || token;
        if infunc and index (token, ')') then
          infunc = 0;
      end;
      else
      if infunc then do;
        holdtext = trim(holdtext) || ' ' || token;
        if index (token, ')') then
          infunc = 0;
      end;

      if index (token, ',') and not infunc then do;
        textout = trim (holdtext) || ' ' || token;
        if indent then
          textout = repeat (' ', indent-1) || textout;

        output;
        holdtext = '';
      end;
    end;
    else
    if in in ('WHERE' 'HAVING') then do;
      if token = 'AND' then do;
        textout = holdtext;
        output;
        textout = token;
        output;
        holdtext = '';
      end;
      else do;
        holdtext = trim(holdtext) || ' ' || token;
      end;
    end;
    else do;
      if indent then
        textout = repeat (' ', indent-1) || token;
      else
        textout = token;

      output;
    end;

    indent+inc;

    if end then do;
      if holdtext ne '' then do;
        textout=holdtext;
        output;
      end;
      textout=token;
      output;
    end;

    keep textout;
  run;

  %if (%superq(outfile) ne ) %then %do;
    data _null_;
      set &out;
      file "&outfile";
      len = length (textout);
      put textout $varying. len;
    run;
  %end;

%ByeBye:

%mend;