Macro %descriptive
1. Description
The macro DESCRIPTIVE will intake SAS datasets and outputs descriptive statistics required for a table. The percentage calculation is assumed to be computed based on population total. Mean, Median, Min, Max, Q1 and Q3 can be estimated using this macro.
2. Dependencies
2.1 Global Macro Variables : INTABLE – The input dataset in which population count and percentage is calculated.TABLEFILT – The filer condition from input dataset in which population count and percentage is calculated.
2.2 Global Macros - N/A
3. Macro Parameters
Macro parameter | Description | Example |
---|---|---|
INDSIN (R) | The name of the input dataset which contains the variable for computing descriptive statistics. | INDSIN=ADAM.ADSL , Default value: None |
DSINFILT (O) | Any filter condition that needs to be applied in the input dataset. | DSINFILT=FASFL EQ ‘Y’ , Default value: None |
TRTFMT (O) | Intakes defined formats or to create dummy values inside columns when data is not present for all treatment groups as per mock shell. | TRTFMT=TRTN., Default value: None |
VAR (R) | Required variable for computing descriptive statistics. | VAR=AGE , Default value: None |
BY (O) | This parameter is used to present the descriptive statistics on the basis of any by variables. | BY=VISIT , Default value: None |
COLBY (R) | Specifies the variable names corresponding to column headers in mock shell. | COLBY=TRT01A , Default value: None |
DISPLAYTEXT (O) | The display text of variables in which descriptive statistics are computed. | DISPLAYTEXT=%STR(AGE (YEARS)) , Default value: None |
DISPORDER (R) | The order of descriptive statistics to be presented. To split the statistics into next line, ‘ | ’ can be specified. |
MINMAXDEC (R) | Decimal place that needs to be presented for minimum and maximum. Decimal places of other statistics such as mean, median will be updated accordingly. | MINMAXDEC=0 , Default value: None |
ORD (R) | Used for traceability, and varies from 1 to n for each macro call inside a table generation. | ORD=1 , Default value: None |
APPEND (O) | Y/N option to mention whether to append the dataset created from the macro with finaltable dataset in the work library. If N, the dataset created from previous call is discarded and a new dataset is formed. | APPEND=N , Default value: None |
(R): Required (O): Optional
4. Assumptions
- The dataset and table filter for computing N should be defined as global macro variable as INTABLE and TABLEFILT
- All necessary formats must be specified before the macro call if required. There should be a numeric format for ordering which should be specified in this parameter and a character format which has the same name as numeric format and end with suffix “s”.
5. Sample Call
1. %DESCRIPTIVE(INDSIN=ADAM.ADSL,
DSINFILT=FASFL EQ "Y",
VAR=AGE,
COLBY=TRT01A,
TRTFMT=TRTN,
DISPLAYTEXT=%STR(AGE (YEARS)),
DISPORDER=%STR(N|MEAN (SD)|MEDIAN|MIN, MAX),
MINMAXDEC=0,
ORD=1,
APPEND=N)
2. %DESCRIPTIVE(INDSIN=ADEG
DSINFILT=FASFL EQ "Y" AND PARAMCD EQ "QTCFAG" AND FIND(TRT01A,"EXPANSION","I"),
VAR=AVAL,
BY=VISIT,
COLBY=TRT01A,
TRTFMT=,
DISPLAYTEXT=,
DISPORDER=%STR(N|MEAN (SD)|MEDIAN|MIN, MAX),
MINMAXDEC=1,
ORD=1,
APPEND=Y);
6. Macro Code
%macro descriptive(indsin=,
dsinfilt=,
var=,
by=,
colby=,
trtfmt=,
displaytext=,
disporder=,
minmaxdec=,
ord=,
append=);
ods select none;
/*VarExist macro is invoked to check if a particular variable exists in the dataset*/
%macro VarExist(ds, var);
%local rc dsid result;
%let dsid = %sysfunc(open(&ds));
%if %sysfunc(var(&dsid, &var)) > 0 %then %do;
%let result = 1;
%put NOTE: Var &var exists in &ds;
%end;
%else %do;
%let result = 0;
%put NOTE: Var &var not exists in &ds;
%end;
%let rc = %sysfunc(close(&dsid));
&result
%mend VarExist;
/*Checking if indsin parameter is specified. Giving a user info otherwise*/
%if %bquote(&indsin.) eq %str() %then
%do;
%put %str(WAR)NING: Required parameter indsin is not given.Kindly update.;
%return;
%end;
/*Checking if required parameters are specified. Aborting the macro otherwise*/
%if %bquote(&colby.) eq %str() %then
%do;
%put %str(WAR)NING: Required parameter colby is not given.Kindly update.;
%return;
%end;
%if %bquote(&var.) eq %str() %then
%do;
%put %str(WAR)NING: Required parameter var is not given.Kindly update.;
%return;
%end;
%if %bquote(&disporder.) eq %str() %then
%do;
%put %str(WAR)NING: Required parameter disporder is not given.Kindly update.;
%return;
%end;
%if %bquote(&minmaxdec.) eq %str() %then
%do;
%put %str(WAR)NING: Required parameter minmaxdec is not given.Kindly update.;
%return;
%end;
%if %bquote(&ord.) eq %str() %then
%do;
%put %str(WAR)NING: Required parameter ord is not given.Kindly update.;
%return;
%end;
/*dsin macro variable is created to store the input dataset if it is from work library or some other libraries*/
%if %sysfunc(countw(&indsin.,%str(.))) eq 2 %then %do;
data __%scan(&indsin.,2,%str(.));
set &indsin.;
run;
%let dsin = %scan(&indsin.,2,%str(.));
%end;
%else %do;
data __&indsin.;
set &indsin.;
run;
%let dsin = &indsin.;
%end;
/*Appending the dataset again to set the total column with corresponding counts*/
data &dsin._a;
set __&dsin.(where=(&dsinfilt.)) __&dsin.(where=(&dsinfilt.) in=b);
if b then do;
&colby. = "Total";
&colby.n = 99;
end;
run;
/*Sorthing the dataset and calculating the descriptive statistics*/
proc sort data = &dsin._a out = &dsin._1;
by %if %str(&by.) ne %str() %then %do; &by.n &by. %end; &colby.n &colby. ;
run;
proc means data = &dsin._1 completetypes;
%if %str(&by.) ne %str() %then %do;
by &by.n &by.;
%end;
/*If the column headers in the mock shell is not present in data, preloadfmt is used to load all values*/
class &colby.n %if
%str(&trtfmt.) ne %str() %then %do;/preloadfmt %end; ;
%if %str(&trtfmt.) ne %str() %then %do;
format &colby.n &trtfmt..;
%end;
var &var.;
output out = &dsin._2(where=(_type_ ne 0)) n=n nmiss=nmiss mean=mean std=std
median=median min=min max=max stderr=stderr lclm=lclm uclm=uclm q1=q1 q3=q3;
run;
/*Formatting the dataset with correct decimal places for each statistics*/
data &dsin._3(drop=min1 max1 mean1 median1 lclm1 uclm1 q11 q31);
set &dsin._2(rename=(min=min1 max=max1 mean=mean1 median=median1 lclm=lclm1 uclm=uclm1 q1=q11 q3=q31));
min = strip(put(min1,12.&minmaxdec.));
max = strip(put(max1,12.&minmaxdec.));
mean = strip(put(mean1,12.%eval(&minmaxdec.+1)));
median = strip(put(median1,12.&minmaxdec.));
if not missing(std) then sd = strip(put(std,12.%eval(&minmaxdec.+1)));
else sd = "NA";
lclm = strip(put(lclm1,12.&minmaxdec.));
uclm = strip(put(uclm1,12.&minmaxdec.));
q1 = strip(put(q11,12.&minmaxdec.));
q3 = strip(put(q31,12.&minmaxdec.));
if n eq 0 then call missing(min,max,mean,median,sd,lclm,uclm,q1,q3);
run;
/*Changing the alphanumeric and some other standard symbols to some rare symbols. These symbols
are reverted in a later stage. This is done to avoid issue which macro processing */
%let disporder = %sysfunc(tranwrd(%bquote(&disporder.),±,+));
%let disporder = %sysfunc(tranwrd(%bquote(&disporder.),%bquote(,),%str(^)));
%let disporder = %sysfunc(tranwrd(%bquote(&disporder.),%bquote(STD),%str(SD)));
/*Do loop is running to iterate the statistics in each line*/
%do k = 1 %to %sysfunc(countw(%bquote(&disporder.),%str(|)));
data &dsin._3__&k.;
set &dsin._3;
%let jvar = %scan(%bquote(&disporder.),&k.,%str(|));
__dummy = symget("jvar");
/*Identifying the number of statistics within each line and a nested loop is specified for iterating the same*/
%do i = 1 %to %sysfunc(countw(%bquote(&jvar.)));
/*Taking the different descriptive statistics in a single line into the variable str1,str2 etc*/
str&i. = scan(__dummy,&i.);
%if &i. eq 1 %then
%do;
/*Checking if there are mulitple descriptive statistics present in a single line by taking the position of delimiters and store
in str variables */
call missing(col&i.,col2&i.,col);
col&i. = find(__dummy,scan(__dummy,%eval(&i.+1)),find(__dummy,scan(__dummy ,&i.)));
col2&i. = find(__dummy,scan(__dummy,&i.))+%length(%scan(%bquote(&jvar.),&i .));
col = col&i.-col2&i.;
if col ne 0 then do;
str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))=substr(__dummy,find(__du mmy,scan(__dummy,&i.))+%length(%scan(%bquote(&jvar.),&i.)),col);
end;
else do;
str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))="";
end;
%end;
/*For the last descriptive statistics in a line, assigning the stat to str variables*/
%else %if &i. eq %sysfunc(countw(%bquote(&jvar.))) %then
%do;
str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))=substr(__dummy,find(__dumm y,scan(__dummy,&i.),find(__dummy,scan(__dummy,%eval(&i.-1))))+%length(%scan(%bquot e(&jvar.),&i.)) );
%end;
/*For the descriptive statistics in the middle in a line, assigning the stat to str variables*/
%else
%do;
call missing(col&i.,col2&i.,col);
col&i. = find(__dummy,scan(__dummy,%eval(&i.+1)),find(__dummy,scan(__dummy ,&i.)) );
col2&i. = find(__dummy,scan(__dummy,&i.))+%length(%scan(%bquote(&jvar.),&i .));
col = col&i.-col2&i.;
str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))=substr(__dummy,find(__dumm y,scan(__dummy,&i.),find(__dummy,scan(__dummy,%eval(&i.-1))))+%length(%scan(%bquot e(&jvar.),&i.)),
col);
%end;
%end;
/*Identifying the value of the corresponding statistics in str variables and store it inside value variable*/
value = cat(
%do i = 1 %to %sysfunc(countw(%bquote(&jvar.)));
strip(vvaluex(str&i.)),compbl(str%eval(&i.+%sysfunc(countw(&jvar.)))) %if &i. ne %sysfunc(countw(&jvar.)) %then ,;
%end;);
run;
%end;
/*post processing of data by spacing and transforming the values are required*/
data &dsin._5(keep= &colby.n __dummy value %if %str(&by.) ne %str() %then %do; &by.n &by. %end;);
set &dsin._3__:;
__dummy=tranwrd(__dummy,"STD","SD");
__dummy=cat(" ",__dummy);
if missing(compress(value,,"kad")) then call missing(value);
run;
proc sort data = &dsin._5;
by %if %str(&by.) ne %str() %then %do; &by.n &by. %end; __dummy;
run;
/*Transposing the data so that the column headers will come as variables*/
proc transpose data = &dsin._5 out = &dsin._6(drop=_name_);
by %if %str(&by.) ne %str() %then %do; &by.n &by. %end; __dummy;
id &colby.n;
var value;
run;
/*Final post processing of the data to display the category appropriately based on by variables*/
data head;
length statistic $200.;
statistic = "&displaytext.";
run;
%if %str(&by.) eq %str() %then %do;
data &dsin._7;
length statistic $200.;
set head &dsin._6;
if not missing(__dummy) then statistic = __dummy;
else statistic = statistic;
run;
%end;
%else %do;
data &dsin._6a;
set &dsin._6;
by &by.n &by.;
if first.&by.n then do;
statistic = &by.;
output;
end;
statistic=cat(" ",strip(__dummy));
output;
run;
data &dsin._7;
set &dsin._6a;
run;
%end;
/*Reverting the alpha numeric and commas pre-processed earlier to its original value*/
data &dsin._7a;
set &dsin._7;
%do i = 1 %to %sysfunc(countw(%bquote(&disporder.),%str(|)));
if strip(statistic) = "%scan(%sysfunc(tranwrd(%bquote(&disporder.),%bquote(,),%str(^))),&i.,%str(|))"
then statn = &i.;
%end;
run;
proc sort data = &dsin._7a;
by %if %str(&by.) ne %str() %then %do; &by.n %end; statn;
run;
/*Idnetifying the number of variables in the column header to rename it to col values*/
proc sql noprint;
select name into : memvar separated by " " from dictionary.columns where
lowcase(libname) eq "work" and lowcase(memname) eq "%lowcase(&dsin._7a)" and lowcase(name) not in ("&by.","&by.n","statistic","__dummy","statn");
quit;
data &dsin._7b;
set &dsin._7a;
%if %str(&by.) ne %str() %then %do;
if statistic=&by. then call missing(_99,%sysfunc(tranwrd(&memvar.,%str( ),%bquote(,))));
%end;
run;
/*Renaming the column headers to col values*/
data &dsin._8(rename=(%do i = 1 %to %sysfunc(countw(&memvar.));
%scan(&memvar.,&i.,%str( )) = col&i.
%end;
));
set &dsin._7b;
%do i = 1 %to %sysfunc(countw(&memvar.));
if strip(upcase(statistic)) eq "N" and missing(%scan(&memvar.,&i.,%str( ))) then %scan(&memvar.,&i.,%str( )) = "0";
%end;
run;
/*Identifying the label of column headers to store them into variable labels in final dataset */
proc sort data = &intable.(where=(&tablefilt.)) out = headlbl nodupkey;
by &colby.n;
run;
%if %str(&trtfmt.) ne %str() %then %do;
proc format library=work.formats cntlout=fmtt;
run;
proc sql noprint;
select strip(label) into: headvarlb separated by "**" from fmtt where upcase(fmtname) eq "%upcase(&trtfmt.S)";
quit;
%end;
%else %do;
proc sql noprint;
select &colby. into : headvarlb separated by "**" from headlbl;
quit;
%end;
proc sql noprint;
select count(*) into : tot from &intable. where &tablefilt.;
%do i = 1 %to %sysfunc(countw(&headvarlb.,%str(**)));
select count(*) into :div&i. from &intable. where &tablefilt. and &colby. eq "%scan(&headvarlb.,&i.,%str(**))";
%end;
quit;
/*Replacing the alphanumeric/special characters and assigning label to column headers along with N*/
data &dsin._9(drop=i __dummy);
retain %if %str(&by.) ne %str() %then %do; &by. &by.n %end; statistic col:;
set &dsin._8;
array chars[*] _character_;
do i = 1 to dim(chars);
chars[i] = tranwrd(tranwrd(tranwrd(chars[i],"+","±"),"( ","("),"^",",");
end;
ord = &ord.;
%do i = 1 %to %sysfunc(countw(&headvarlb.,%str(**)));
label col&i. = "%scan(&headvarlb.,&i.,%str(**)) N=(%cmpres(&&div&i..))";
%end;
label col%eval(%sysfunc(countw(&headvarlb.,%str(**)))+1) = "Total (N=%cmpres(&tot.))";
run;
%let datct = &syslast.;
/*Appending the dataset with work.finaltable based on the value in macro parameter append*/
%if %str(&append.) eq %str(Y) %then
%do;
%if %sysfunc(exist(finaltable)) eq 0 %then
%do;
%put %str(WAR)NING: Append parameter is given as Y but the dataset finaltable does not exist.;
%return;
%end;
data finaltable;
length statistic $200.;
set finaltable &datct.;
run;
%end;
%else %if %str(&append.) eq %str(YS) %then
%do;
%if %sysfunc(exist(finaltable)) eq 0 %then
%do;
%put %str(WAR)NING: Append parameter is given as Y but the dataset finaltable does not exist.;
%return;
%end;
data dummy;
statistic = "";
run;
data finaltable;
length statistic $200.;
set finaltable dummy &datct.;
run;
%end;
%else
%do;
data finaltable;
set &datct.;
run;
%end;
/*Deleting the intermediate datasets*/
/*proc datasets library = work memtype=data;*/
/* delete head &dsin._:;*/
/*quit;*/
ods select all;
%mend descriptive;