/*----- * group: Data in * purpose: Import a dBase III file into a SAS data set */ %macro read_dbf (filename, ds, keep_del=0, showflds=0); /* Author: Richard A. DeVenezia * Date: 18 May 1993 * * 02/23/96 RAD change date variables to default to mmddyy10. * format and informat; * 10/24/96 RAD change method used to skip header portion * (use cut -c -) * 4/ 9/98 RAD ensure variable names are valid SAS names * 1/ 4/00 RAD file descriptor date of last update year item is * 100+ for year 2000+. Mod with 100 and rely on SAS * YEARCUTOFF option * 11/13/00 RAD rewrite data input portion to remove reliance on UNIX cut */ /* Read a DBASE III(+) file into a SAS dataset * filename - file in operating system assumed to be in .dbf format * ds - destination SAS dataset * keep_del - 0 to ignore deleted records, 1 to retain deleted records * * DBASE III structures * * File Descriptor: * Ofs Len Field * 0 1 dBASE III version number (3 ls bits) * 1 3 date of last update (YY MM DD in pib1. format) * 4 4 number of records (longint - pib4.) * 8 2 length of header structure (int - pib2.) * [header + field descriptions ] * 10 2 length of each record (int - pib2.) [deleted flag + fields] * 12 20 reserved * * Field Descriptors (32 chars each) immediately follow header * (number of fields implied from length of header structure) * Ofs Len Field * 0 11 field name (variable) zero terminated * 11 1 field type (C N F L D M) * C - Character: Ascii * N/F - Numeric: -.0123456789 * L - Logical: ?YyTtNnFf * D - Date: yyyymmdd * M - Memo: 10 digit pointer for .dbt file * 12 4 field data address (do not care) * 16 1 field length (short - pib1.) * 17 1 decimals (digits to right of decimal point) (short - pib1.) * 18 14 reserved * * immediately following field descriptors * '00'x'0D'x - dBASE III * '0D'x - dBASE III+ * header length field should take care of skipping over these positions * * * all data is stored in character format (i.e. no binary integer or * floating point formats), numerics are stored in ascii representation */ %local notes mprint symgen; %let notes = %sysfunc (getoption(NOTES)); %let mprint = %sysfunc (getoption(MPRINT)); %let symgen = %sysfunc (getoption(SYMBOLGEN)); options nonotes nomprint nosymbolgen; %local filename ds keep_del showflds; %local i type; %if (%quote(&filename)=) or (%quote(&ds)=) %then %do; %put usage: read_dbf(filename, ds); %goto ByeBye; %end; %* * Read File Descriptor and each Field Descriptor *; %local hdrLen nFields recLen numRex; %local maxLenV; %* maximum length of SAS variable name in output dataset; %if (&sysver >= 7) %then %let maxLenV = 11; %* dBase field names can be 11 characters at most; %else %let maxLenV = 8; %* SAS variable names can be 8 characters at most; data &DS (keep=i name varname type length decimals ) ; infile "&filename" unbuffered recfm=n; * dBASE file descriptor (32 bytes); input version pib1. u1 pib1. %* YY - Note: value is 100+ for year 2000+ ; u2 pib1. %* MM ; u3 pib1. %* DD ; %*numRex pib4. (works only on Intel based SAS); rx1 pib1. rx2 pib1. rx3 pib1. rx4 pib1. %*hdrLen pib2. (works only on Intel based SAS); hd1 pib1. hd2 pib1. %*recLen pib2. (works only on Intel based SAS); rl1 pib1. rl2 pib1. reserved $char20. ; version = mod (version, 8); if version ^= 3 then stop; numRex = rx1 + 256 * (rx2 + 256 * (rx3 + 256 * rx4)); hdrLen = hd1 + 256 * (hd2); recLen = rl1 + 256 * (rl2); * u1 = mod (u1, 100); * updated = mdy (u2, u3, u1); * format updated date7.; numFlds = (hdrLen - 33) / 32; * fileSize = hdrLen + recLen*numRex + 1; call symput ('nFields',trim(left(put(numFlds,8.)))); call symput ('hdrLen', trim(left(put(hdrLen,8.)))); call symput ('recLen', trim(left(put(recLen,8.)))); call symput ('numRex', trim(left(put(numRex,8.)))); put 'NOTE:' hdrLen= recLen= numFlds= numRex=; * read each dBASE field descriptor (32 bytes each); * field name will be mapped to a SAS eight character variable name; array _var [2000] $&maxLenV _temporary_; length varname $&maxLenV; do i = 1 to numFlds; input name $CHARZB11. %* convert null chars to blanks; type $CHAR1. reserve1 $CHAR4. length pib1. decimals pib1. reserve2 $CHAR14. ; if type='F' then type='N'; * map first word of field name to variable name; * change any non alphanumeric to underscore; varname = upcase(name); varname = scan (varname, 1, ' '); if not ('A' <= substr (varname,1,1) <= 'Z') then varname = '_' || varname; do j = 1 to length (varname); if not ('A' <= substr (varname,j,1) <= 'Z' or '0' <= substr (varname,j,1) <= '9') then substr (varname,j,1) = '_'; end; * check if this SAS varname has been used already; found = 0; do j = 1 to 2000 while (_var[j] ^= "" and not found); found = ( _var[j] = varname ); end; if not found then _var[j]=varname; else do; do k = 1 to 2000 while (found); n = left(put(k,4.)); varname = substr(varname,1,8-length(n)) || n; found = 0; do j = 1 to 2000 while (_var[j] ^= "" and not found); found = ( _var[j] = varname ); end; end; _var[j] = varname; end; output; end; stop; run; %if (&syserr ^= 0) %then %goto ByeBye; %* * localize all macro variables that describe each variable in .dbf file; *; %do i = 1 %to &nFields; %local var&i label&i type&i len&i dec&i ; %end; %* * now populate those macro variables with values *; data _null_; set &ds; n = left(put(i,8.)); call symput ('var' ||n, trim(varname)); call symput ('label'||n, trim(name)); call symput ('type' ||n, trim(type)); call symput ('len' ||n, trim(left(put(length,8.)))); call symput ('dec' ||n, trim(left(put(decimals,8.)))); run; %if (&syserr ^= 0) %then %goto ByeBye; %if (&showflds) %then %do; %* report file structure; %put &filename; %put hdrLen=&hdrLen recLen=&recLen numRex=&numRex; %do i=1 %to &nfields; %put %substr(&i..%str( ),1,3) &&var&i &&label&i &&type&i &&len&i &&dec&i; %end; %end; %* * read data part of .dbf file *; %local type; data &ds; infile "&filename" unbuffered recfm=n; attrib deleted length=$1; %do i=1 %to &nFields; %let type=&&type&i; ATTRIB &&var&i label="&&label&i" %if (&type=D) %then length=4 format=mmddyy10. informat=mmddyy10.; %else %if (&type=L) %then length=3 format=1. ; %else %if (&type=C) %then length=$&&len&i; %else %if (&type=N) %then format=&&len&i...&&dec&i; ; %end; attrib source length=$150 label='Data source'; retain source "&filename"; input @&hdrLen deleted $CHAR1.; do i = 1 to &numRex; input deleted $CHAR1. ; %if (not &keep_del) %then %do; if deleted ne ' ' then do; %* since this record is tagged as deleted in the .dbf and the macro * was invoked indicating deleted records are not to be saved, * skip over the rest of the record; input +(&recLen-2) dummy $CHAR1.; drop dummy; goto NextRcrd; end; %end; input %do i=1 %to &nFields; %let type=&&type&i; %if (&type=D) %then &&var&i yymmdd8. ; %else %if (&type=L) %then l&i $CHAR1. ; %else %if (&type=C) %then &&var&i $CHAR&&len&i...; %else %if (&type=N) %then &&var&i &&len&i...&&dec&i; %else %if (&type=M) %then &&var&i $CHAR&&len&i...; %end; ; %do i=1 %to &nfields; %let type=&&type&i; %if (&type=L) %then %do; if (upcase(l&i) in ("T" "Y")) then &&var&i=1; else if (upcase(l&i) in ("F" "N")) then &&var&i=0; drop l&i; %end; %else %if (&type=M) %then %do; drop &&var&i; %end; %end; NextRcrd: %if (&keep_del) %then %do; output; %end; %else %do; if deleted = ' ' then output; %end; end; stop; drop deleted; run; %ByeBye: options &symgen &mprint ¬es; %mend read_dbf;