SAS, handle Macro output - macros

I took a SAS macro from SAS website, in order to list all the file in a folder.
Thit is the full reference: http://support.sas.com/kb/25/074.html .
And that's the code:
%macro drive(dir,ext);
%let filrf=mydir;
/* Assigns the fileref of mydir to the directory and opens the directory */
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
/* Returns the number of members in the directory */
%let memcnt=%sysfunc(dnum(&did));
/* Loops through entire directory */
%do i = 1 %to &memcnt;
/* Returns the extension from each file */
%let name=%qscan(%qsysfunc(dread(&did,&i)),-1,.);
/* Checks to see if file contains an extension */
%if %qupcase(%qsysfunc(dread(&did,&i))) ne %qupcase(&name) %then %do;
/* Checks to see if the extension matches the parameter value */
/* If condition is true prints the full name to the log */
%if (%superq(ext) ne and %qupcase(&name) = %qupcase(&ext)) or
(%superq(ext) = and %superq(name) ne) %then %do;
%put %qsysfunc(dread(&did,&i));
%end;
%end;
%end;
/* Closes the directory */
%let rc=%sysfunc(dclose(&did));
%mend drive;
/* First parameter is the directory of where your files are stored. */
/* Second parameter is the extension you are looking for. */
/* Leave 2nd paramater blank if you want a list of all the files. */
%drive(c:\,sas)
This macro (obviously) works fine, problem is that returns the results on the log.
I need to put those results into a SAS dataset in order to schedule other operations.
How can I do it?
Thanks in advance.

First off, if you can make a pipe, you can do:
filename dirlist pipe "dir /b c:\*.sas";
data myfiles;
infile dirlist lrecl=512 truncover;
input
#1 fullname $512.;
filename = scan(fullname,-1,'\');
run;
If you do need to use that macro due to system restrictions, you can't just take the output from it directly as it doesn't do anything other than print to the log. You'll need to do one of two things:
Use PROC PRINTTO to redirect the log to a file, which you can then parse
Change the line of code that actually does something. %put %qsysfunc(dread(&did,&i)); If you change this to, say,
filename = dread(&did,&i);
output;
then you call the macro from inside a data step and you can use the results. Realistically you might be better off just running all of that in a datastep and not bothering with the macro - it's more complicated than it needs to be in order to be macro-only; in a datastep that's shorter.

Related

How do I read multiple excel files in SAS using a macro?

I would like to create one dataset within SAS by importing multiple excel files. The excel files have the same variables within and are saved in the same directory, the files all have consistent names:
excel_20150101
excel_20150201
How would I write this in SAS?
This macro will loop through all Excel files in a Directory and will import the contents in datasets named : DS1, DS2, DS3 …. DS400(If there are 400 Excel files). Pease make sure to keep only Excel(.xlsx) files in the specific directory.
options merror mlogic mprint symbolgen spool;
%macro drive(dir,ext);
%local filrf rc did memcnt name i;
/* Assigns a fileref to the directory and opens the directory */
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
/* Loops through entire directory */
%do i = 1 %to %sysfunc(dnum(&did));
/* Retrieve name and import each Excel file */
%let name=%qsysfunc(dread(&did,&i));
proc import
out=DS&i
datafile= "Y:\Excel\&name"
dbms=XLSX replace;
getnames=yes;
run;
%end;
/* Closes the directory and clear the fileref */
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend drive;
/* First parameter is the directory of where your files are stored. */
/* Second parameter is the extension you are looking for. */
%drive(Y:\Excel,xlsx);
here is how you could create a dataset with all the file name and their path
%let windowpath=/data/exports/Analytics/Users/psamson/Travel_project/PCA/;
data file; /*THIS PREPARE A TABLE OF TARGET FILE TO IMPORT*/
length file $200 name $28;
rc=filename("myfile","&windowpath");
DirId=dopen("myfile");
memcount=dnum(DirId);
do i = 1 to memcount;
fdir = "&windowpath./" || strip(dread(DirId,i));
rc2=filename("fdir",fdir);
fDirId=fopen("fdir");
if fDirId > 0 then do;
file = strip(dread(DirId,i));
name = substr(strip(dread(DirId,i)),1,25);
if index(file,'.xlsx') then output; /*THIS ENSURE WE ONLY IMPORT CSV file */
end;
rc2 = fclose(fDirId);
rc2= filename("fdir", "");
end;
rc = dclose(DirId);
rc = filename("myfile");
keep file name ; /*KEEP THE 2 VARIABLE TO VALIDATE AND USE FOR IMPORT*/
run;
From this list loop trough the file table to call your import like in the macro above!
So this is going to be interesting. When you import data from Excel the types are not controlled and you have little control. So I'm 99% certain you'll run into an issue combining data because the types will not align. I would recommend converting all your files to CSV (via batch script) and then importing all the CSV's at once.
Here's a macro I wrote that imports all:
%*Creates a list of all files in the DIR directory with the specified extension (EXT);
%macro list_files(dir,ext);
%local filrf rc did memcnt name i;
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
%if &did eq 0 %then
%do;
%put Directory &dir cannot be open or does not exist;
%return;
%end;
%do i = 1 %to %sysfunc(dnum(&did));
%let name=%qsysfunc(dread(&did,&i));
%if %qupcase(%qscan(&name,-1,.)) = %upcase(&ext) %then
%do;
%put &dir\&name;
%let file_name = %qscan(&name,1,.);
%put &file_name;
data _tmp;
length dir $512 name $100;
dir=symget("dir");
name=symget("name");
path = catx('\',dir,name);
the_name = substr(name,1,find(name,'.')-1);
run;
proc append base=list data=_tmp force;
run;
quit;
proc sql;
drop table _tmp;
quit;
%end;
%else %if %qscan(&name,2,.) = %then
%do;
%list_files(&dir\&name,&ext)
%end;
%end;
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend list_files;
%*Macro to import a single file, using the path, filename and an output dataset name must be specified;
%macro import_file(path, file_name, dataset_name );
proc import
datafile="&path.\&file_name."
dbms=xlsx
out=&dataset_name replace;
run;
%mend;
*Create the list of files, in this case all XLSX files;
%list_files(c:\_localData\temp, xlsx);
%*Call macro once for each entry in the list table created from the %list_files() macro;
data _null_;
set list;
string = catt('%import_file(', dir, ', ', name,', ', catt('test', put(_n_, z2.)), ');');
call execute (string);
run;
Instead of this however, I recommend converting all files to CSV and then importing those.
https://gist.github.com/statgeek/878e585102c14e01581f55dbe972d27e
And then importing all the CSV's at once into a single file:
https://blogs.sas.com/content/sasdummy/2018/10/09/read-multiple-text-files/

SAS Error trying to loop through multiple datasets

I'm trying to run some code which will hopefully concatenate multiple months or years worth of data. I am trying to figure out when a field was populated with a value. I.e. there is field XYZ in my data set and it is populated with value A in November 2016. If I run my code from Jan - Dec I would like a new field populated with the date that SAS encounters a non-blank value in that field.
Here's my code:
options mprint symbolgen source mlogic merror syntaxcheck ;
%macro append_monthly(iStart_date=, iEnd_date=);
%local tmp_date i;
%let tmp_date = %sysfunc(intnx(month,&iStart_date,0,beginning)) ;
%do %while (&tmp_date le &iEnd_date);
%let i = %sysfunc(sum(&tmp_date),yymmn4.);
%put &i.;
%let tmp_date = %sysfunc(intnx(month,&tmp_date,1,beginning)) ;
libname note "my.qualifiers.fords.note&i." disp=shr;
data new ;
set note.file ;
%if ln_note_crbur_date_delinq ne '' %then spc_cmt_date = &i.;
run;
%end;
%mend;
%append_monthly(iStart_date=%sysfunc(mdy(5,1,2016)), iEnd_date=%sysfunc(mdy(10,1,2016)) );
LIBNAME _ALL_ CLEAR;
Here's a sample from log with errors :
SYMBOLGEN: Macro variable TMP_DATE resolves to 20606
SYMBOLGEN: Macro variable IEND_DATE resolves to 20728
MLOGIC(APPEND_MONTHLY): %DO %WHILE(&tmp_date le &iEnd_date) condition is TRUE; loop will iterate again.
MLOGIC(APPEND_MONTHLY): %LET (variable name is I)
SYMBOLGEN: Macro variable TMP_DATE resolves to 20606
MLOGIC(APPEND_MONTHLY): %PUT &i.
SYMBOLGEN: Macro variable I resolves to 1606
1606
MLOGIC(APPEND_MONTHLY): %LET (variable name is TMP_DATE)
SYMBOLGEN: Macro variable TMP_DATE resolves to 20606
MPRINT(APPEND_MONTHLY): spc_cmt_date = 1605 run;
SYMBOLGEN: Macro variable I resolves to 1606
MPRINT(APPEND_MONTHLY): libname note "my.qualifiers.fords.note1606" disp=shr;
ERROR: Unable to clear or re-assign the library NOTE because it is still in use.
ERROR: Error in the LIBNAME statement.
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set WORK.NEW may be incomplete. When this step was stopped there were 0 observations and 622 variables.
WARNING: Data set WORK.NEW was not replaced because this step was stopped.
NOTE: The DATA statement used 0.01 CPU seconds and 49483K.
NOTE: The address space has used a maximum of 4292K below the line and 240388K above the line.
I can't figure out why this isn't working. Maybe this could work using Proc append.
Basically, I just want my output with a field that returns a date in the form of YYMM for when field ln_note_crbur_date_delinq was non-blank.
Any help would be greatly appreciated
I'd guess the reason for your error is that the handle is not being cleared on your source file before the next libname statement tries to re-assign.
An easy fix would be to use a different alias (libref) each time, as follows:
libname note&i "my.qualifiers.fords.note&i." disp=shr;
Then adjust your data step like so:
data new ;
set note&i..file ;
The next part appears to be confusion between macro logic and data step. Simply remove the % symbols as follows:
if ln_note_crbur_date_delinq ne '' then spc_cmt_date = &i.;
Finally, add a proc append before the %end as follows:
proc append base=work.final data=new; run;
If work.final does not exist, it will be created in the same format as new.
EDIT:
following discussion in comments, here is a revised approach:
%macro append_monthly(iStart_date=, iEnd_date=);
%local tmp_date i set_statement;
%let tmp_date = %sysfunc(intnx(month,&iStart_date,0,beginning)) ;
%do %while (&tmp_date le &iEnd_date);
%let i = %sysfunc(sum(&tmp_date),yymmn4.);
%let tmp_date = %sysfunc(intnx(month,&tmp_date,1,beginning)) ;
%let set_statement=&set_statement &i..file;
libname note&i "my.qualifiers.fords.note&i." disp=shr;
%end;
data new ;
set &set_statement;
if ln_note_crbur_date_delinq ne '' then spc_cmt_date = &i.;
run;
%mend;
%append_monthly(iStart_date=%sysfunc(mdy(5,1,2016)), iEnd_date=%sysfunc(mdy(10,1,2016)) );
LIBNAME _ALL_ CLEAR;

Get the ith word in a macro variable list

%let TableList = TableA TableH TableB TableG;
Words in &TableList are separated by ' '.
How can I retrieve certain word to do the following?
I do not know the number of words in the tablelist and would like to get the nth word from the list.
Given i = 4,
data &&table&i.; /* &&table&i. will resolve to TableG */
set have;
[..];
run;
I would have done the same %sysfunc(scan) trick as #mjsqu and as to answer your remaining question - of getting the last word because you don't know the number of words in the list, the easiest way I can think of is using array like below
%let all=word1 word2 word3 word4 word5;
%macro test;
data _NULL_;
array x[*] &all.;
Num=dim(x);
call symput("Num_of_words",num);
run;
%mend;
%test;
Now you know the total number of words so can find out the last word as well.
The short answer is to use the %scan function:
%put %scan(&tablelist,4,%str( ));
The third argument specifies that %scan should count only spaces as delimiters. Otherwise, it will also treat all of the following characters as delimiters by default:
. < ( + & ! $ * ) ; ^ - / , % |
Given the list you have, you can use a %do loop to add the macro variables to a list:
/* initialise a counter macro variable */
%let k = 1;
/* iterate through tablelist until a value is not found */
%do %until (%scan(&tablelist,&k,%str( )) = );
%let table&k = %scan(&tablelist,&k,%str( ));
%let k = &k + 1;
%end;
%let i = 4;
%put &&table&i;
N.B. this code only works inside a macro definition (that is a block of code delimited by %macro and %mend statements.
If you're doing this for the purpose of selecting on the fly one word from the list, you should just make a macro, not try to set up macro variables. Too much extra work to do all that business to make the various macro variables versus a one-line macro.
%let tableList=TableA TableB TableC TableD;
%macro selectTable(k=);
%scan(&tablelist,&k)
%mend selectTable;
data %selectTable(k=4);
set sashelp.class;
run;

sas macros for incrementing date

My codes are:
libname " Cp/mydata"
options ;
%let yyyymmdd=20050210;
%let offset=0;
%let startrange=0;
%let endrange=0;
/* MACRO FOR INCREMENTING THE DATE */
%macro base(yyyymmdd=, offset=);
%local date x ds; /* declare macro variables with local scope */
%let date=%sysfunc(mdy(%substr(&yyyymmdd,5,2)
,%substr(&yyyymmdd,7,2)
,%substr(&yyyymmdd,1,4))); /* convert yyyymmdd to SAS date */
%let loopout=100;/* hardcoded - number of times to check whether ds exists */
%do x=&offset %to &loopout; /* begin loop */
/* convert &date to yyyymmdd format */
%let ds=AQ.CO_%sysfunc(intnx(day,&date,&offset),yymmddn8.);
%if %sysfunc(exist( &ds )) %then %do;
%put &ds exists!;
&ds /* write out the dataset, if it exists */
%let x=&loopout; /* exit loop */
%end;
%else %do;
%put &ds does not exist - checking subsequent day;
%let date=&date+1;
%end;
%end;
%mend;
%macro loop(yyyymmdd=, startrange=, endrange=);
%local date x ds;
%let date=%sysfunc(mdy(%substr(&yyyymmdd,5,2)
,%substr(&yyyymmdd,7,2)
,%substr(&yyyymmdd,1,4)));
data x;
set set %base(yyyymmdd=&yyyymmdd, offset=0)
/* loop through each specific dataset, checking first whether it exists.. */
%do x=&startrange %to &endrange;
%let ds=AQ.CO_%sysfunc(intnx(day,&date,&x),yymmddn8.);
%if %sysfunc(exist( &ds )) %then %do;
&ds
%end;
%end;
;
run;
%mend;
This was the error generated when I tried to run this macro.
data temp;
58 set %loop(yyyymmdd=&yyyymmdd, startrange=&startrange,
58 ! endrange=&endrange);
ERROR: File WORK.DATA.DATA does not exist.
ERROR: File WORK.X.DATA does not exist.
AQ.CO_20050210 does not exist - checking subsequent day
AQ.CO_20050211 does not exist - checking subsequent day
AQ.CO_20050212 exists!
NOTE: The system stopped processing this step because of errors.
I want help on two things:
1) Here, I'm trying to increment my date by 1 or 2 or so on if that date is not there in my original dataset. Please help to make this macro work fine.
2)I would like to have another column ie work.date in my data that will have 0 or 1(1 if the specified date yyyymmdd exist in our original data and 0 if I'm incrementing). Please make the specified changes in my macro.
Thanks in advance!!
I wasn't quite sure exactly what your %base() macro was trying to achieve but there were a couple of things I noticed.
First try turning on option mprint; to help with debugging. If you still need more debugging info you can also try turning on the following options (I'd suggest 1 at a time until you know which ones you need):
option symbolgen macrogen mlogic;
Secondly, you have set set instead of just set in your example code. I don't think that is helping any =).
When I tried the code quickly on my machine I noticed I was getting a strange error (different from yours) when I called the %base() macro. It seemed like an error that shouldn't be occurring so I wrapped the call in an %unquote() function just to make sure and I started to receive the error your post mentioned. You may want to try this as well:
set %unquote(%base(yyyymmdd=&yyyymmdd, offset=0))
Normally the %unquote() function isn't required unless you are explicitly using macro quoting functions and getting strange errors, but SAS macros sometimes seem to have a mind of their own. I only ever add this when I know it is required.
Also, your libname call is missing a semicolon at the end of the line.
Finally, some advice on working with dates in the SAS macro language. Don't keep converting between the date value, and the formatted value. It will make your code bigger, more error prone and more difficult to read. I know because I used to do it that way too. Try instead to always work with variables that contain the actual date value (by using the result from %sysfunc(mdy()) ) and then if you need a formatted value then create a new variable (eg. %let yyyymmdd = %sysfunc(putn(&mydate),yymmddn8.);. When you pass values from one macro to another, don't pass the formatted values even if it seems easier, pass the actual values.
Making the above changes removed all errors on my machine. Final code:
libname " Cp/mydata";
%let yyyymmdd=20050210;
%let offset=0;
%let startrange=0;
%let endrange=0;
/* MACRO FOR INCREMENTING THE DATE */
%macro base(yyyymmdd=, offset=);
%local date x ds; /* declare macro variables with local scope */
%let date=%sysfunc(mdy(%substr(&yyyymmdd,5,2)
,%substr(&yyyymmdd,7,2)
,%substr(&yyyymmdd,1,4))); /* convert yyyymmdd to SAS date */
%let loopout=100;/* hardcoded - number of times to check whether ds exists */
%do x=&offset %to &loopout; /* begin loop */
/* convert &date to yyyymmdd format */
%let ds=AQ.CO_%sysfunc(intnx(day,&date,&offset),yymmddn8.);
%if %sysfunc(exist( &ds )) %then %do;
%put &ds exists!;
&ds /* write out the dataset, if it exists */
%let x=&loopout; /* exit loop */
%end;
%else %do;
%put &ds does not exist - checking subsequent day;
%let date=&date+1;
%end;
%end;
%mend;
%macro loop(yyyymmdd=, startrange=, endrange=);
%local date x ds;
%let date=%sysfunc(mdy(%substr(&yyyymmdd,5,2)
,%substr(&yyyymmdd,7,2)
,%substr(&yyyymmdd,1,4)));
data x;
set %unquote( %base(yyyymmdd=&yyyymmdd, offset=0))
/* loop through each specific dataset, checking first whether it exists.. */
%do x=&startrange %to &endrange;
%let ds=AQ.CO_%sysfunc(intnx(day,&date,&x),yymmddn8.);
%if %sysfunc(exist( &ds )) %then %do;
&ds
%end;
%end;
;
run;
%mend;
%loop(yyyymmdd=&yyyymmdd, startrange=&startrange, endrange=&endrange);
Seems to me that your solution is quite complex.
But i believe that at least one issue is the variable x in our second macro (%loop): i do not see where you define it.
You can probably do all of this much easier, IF you do not need to limit the loopout. If you just want all datasets beyond the offset, you can simplify all this by making use of the SASHELP library to find the datasets you need. And then just loop over that result.
DEPRECATED REPLY BELOW, misread the need
You are partially reinventing the wheel, have a deeper look at the intnx and intck functions.
http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_tsdata_sect038.htm
https://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000212700.htm

SAS: put format in macro

I am trying to create a new variable by assigning a format to an existing variable. I'm doing this from within a macro. I'm getting the following error: ": Expecting a format name." Any thoughts on how to resolve? Thanks!
/* macro to loop thru a list of vars and execute a code block on each. This is working fine. */
%macro iterlist
(
code =
,list =
)
;
%*** ASSIGN EACH ITEM IN THE LIST TO AN INDEXED MACRO VARIABLE &&ITEM&I ;
%let i = 1;
%do %while (%cmpres(%scan(&list., &i.)) ne );
%let item&i. = %cmpres(%scan(&list., &i.));
%let i = %eval((&i. + 1);
%end;
%*** STORE THE COUNT OF THE NUMBER OF ITEMS IN A MACRO VARIABLE: &CNTITEM;
%let cntitem = %eval((&i. - 1);
%*** EXPRESS CODE, REPLACING TOKENS WITH ELEMENTS OF THE LIST, IN SEQUENCE;
%do i = 1 %to &cntitem.;
%let codeprp = %qsysfunc(tranwrd(&code.,?,%nrstr(&&item&i..)));
%unquote(&codeprp.)
%end;
%mend iterlist;
/* set the list of variables to iterate thru */
%let mylist = v1 v2 v3 v4;
/* create a contents table to look up format info to assign in macro below*/
proc contents data=a.recode1 noprint out=contents;
run;
/* macro to create freq and chisq tables for each var */
%macro runfreqs (variabl = );
proc freq data=a.recode1 noprint ;
tables &variabl.*improved /out=&variabl._1 chisq;
output out=&variabl.chisq n pchi ;
run;
/* do some more stuff with the freq tables, then grab format for variable from contents */
data _null_;
set contents;
if name="&variabl." then CALL SYMPUT("classformat", format);
run;
data &variabl._3;
length classvalue $ 30 ;
set &variabl._2; ;
/* output a new var using the macro variable for format that we pulled from contents above. Here's where the error occurs. */
classvalue=put(class, %quote(&classformat.));
run;
%mend runfreqs;
* run the macro, iterating thru var list and creating freq tables;
%ITERLIST(list = &mylist., code = %nrstr(%runfreqs(variabl = ?);));
Just guessing, the line
classvalue=put(class, %quote(&classformat.));
should be
classvalue=put(class, &classformat..);
Two points because one is "eaten" by macro processor to mark end of macro variable name, the second one is needed to complete format name.
I believe you won't need %quote() in your case - format name cannot contain strings quoted by %quote().
EDIT: Again not tried, just based on the code I see you also need to change CALL SYMPUT("classformat", format);
to CALL SYMPUTX("classformat", format);
CALL SYMPUTX() is advanced version of CALL SYMPUT(), it removes trailing blanks in macro variable value while the original version keeps blanks. Effectively this will be same as your solution, just simpler.
So the problem is indeed with extra blanks between format name and the period.
No idea why this works and vasja's idea wouldn't, but the problem was clearly with the period on the end of the format name (or perhaps some extra white space?). I changed the data step to add the period before the SYMPUT call:
data _null_;
set contents;
myformat=catt(format,'.');
if name="&variabl." then CALL SYMPUT("classformat", myformat);
run;