Macro %descriptive

1. Description

The macro DESCRIPTIVE will intake SAS datasets and outputs descriptive statistics required for a table. The percentage calculation is assumed to be computed based on population total. Mean, Median, Min, Max, Q1 and Q3 can be estimated using this macro.

2. Dependencies

2.1 Global Macro Variables : INTABLE – The input dataset in which population count and percentage is calculated.TABLEFILT – The filer condition from input dataset in which population count and percentage is calculated.

2.2 Global Macros - N/A

3. Macro Parameters

Macro parameter Description Example
INDSIN (R) The name of the input dataset which contains the variable for computing descriptive statistics. INDSIN=ADAM.ADSL , Default value: None
DSINFILT (O) Any filter condition that needs to be applied in the input dataset. DSINFILT=FASFL EQ ‘Y’ , Default value: None
TRTFMT (O) Intakes defined formats or to create dummy values inside columns when data is not present for all treatment groups as per mock shell. TRTFMT=TRTN., Default value: None
VAR (R) Required variable for computing descriptive statistics. VAR=AGE , Default value: None
BY (O) This parameter is used to present the descriptive statistics on the basis of any by variables. BY=VISIT , Default value: None
COLBY (R) Specifies the variable names corresponding to column headers in mock shell. COLBY=TRT01A , Default value: None
DISPLAYTEXT (O) The display text of variables in which descriptive statistics are computed. DISPLAYTEXT=%STR(AGE (YEARS)) , Default value: None
DISPORDER (R) The order of descriptive statistics to be presented. To split the statistics into next line, ’ ’ can be specified.
MINMAXDEC (R) Decimal place that needs to be presented for minimum and maximum. Decimal places of other statistics such as mean, median will be updated accordingly. MINMAXDEC=0 , Default value: None
ORD (R) Used for traceability, and varies from 1 to n for each macro call inside a table generation. ORD=1 , Default value: None
APPEND (O) Y/N option to mention whether to append the dataset created from the macro with finaltable dataset in the work library. If N, the dataset created from previous call is discarded and a new dataset is formed. APPEND=N , Default value: None

(R): Required (O): Optional

4. Assumptions

  1. The dataset and table filter for computing N should be defined as global macro variable as INTABLE and TABLEFILT
  2. All necessary formats must be specified before the macro call if required. There should be a numeric format for ordering which should be specified in this parameter and a character format which has the same name as numeric format and end with suffix “s”.

5. Sample Call

1. %DESCRIPTIVE(INDSIN=ADAM.ADSL,
                    DSINFILT=FASFL EQ "Y",
                    VAR=AGE,
                    COLBY=TRT01A,
                    TRTFMT=TRTN,
                    DISPLAYTEXT=%STR(AGE (YEARS)),
                    DISPORDER=%STR(N|MEAN (SD)|MEDIAN|MIN, MAX),
                  MINMAXDEC=0,
                    ORD=1,
                  APPEND=N)
2. %DESCRIPTIVE(INDSIN=ADEG
                    DSINFILT=FASFL EQ "Y" AND PARAMCD EQ "QTCFAG" AND                              FIND(TRT01A,"EXPANSION","I"),
                  VAR=AVAL,
                    BY=VISIT,
                    COLBY=TRT01A,
                    TRTFMT=,
                    DISPLAYTEXT=,
                    DISPORDER=%STR(N|MEAN (SD)|MEDIAN|MIN, MAX),
                    MINMAXDEC=1,
                    ORD=1,
                    APPEND=Y);
  

6. Macro Code

    %macro descriptive(indsin=,
                       dsinfilt=,
                       var=,
                       by=,
                       colby=,
                       trtfmt=,
                       displaytext=,
                       disporder=,
                       minmaxdec=,
                       ord=,
                       append=);
    
    ods select none;
    
    /*VarExist macro is invoked to check if a particular variable exists in the     dataset*/
    
    %macro VarExist(ds, var);
        %local rc dsid result;
        %let dsid = %sysfunc(open(&ds));
     
        %if %sysfunc(var(&dsid, &var)) > 0 %then %do;
            %let result = 1;
            %put NOTE: Var &var exists in &ds;
        %end;
        %else %do;
            %let result = 0;
            %put NOTE: Var &var not exists in &ds;
        %end;
     
        %let rc = %sysfunc(close(&dsid));
        &result
    %mend VarExist;
    
    /*Checking if indsin parameter is specified. Giving a user info otherwise*/
    
    %if %bquote(&indsin.) eq %str() %then 
    %do;
      %put %str(WAR)NING: Required parameter indsin is not given.Kindly update.;
      %return;
    %end;
    
    /*Checking if required parameters are specified. Aborting the macro otherwise*/
    
    %if %bquote(&colby.) eq %str() %then 
    %do;
      %put %str(WAR)NING: Required parameter colby is not given.Kindly update.;
      %return;
    %end;
    
    %if %bquote(&var.) eq %str() %then 
    %do;
      %put %str(WAR)NING: Required parameter var is not given.Kindly update.;
      %return;
    %end;
    
    %if %bquote(&disporder.) eq %str() %then 
    %do;
      %put %str(WAR)NING: Required parameter disporder is not given.Kindly update.;
      %return;
    %end;
    
    %if %bquote(&minmaxdec.) eq %str() %then 
    %do;
      %put %str(WAR)NING: Required parameter minmaxdec is not given.Kindly update.;
      %return;
    %end;
    
    %if %bquote(&ord.) eq %str() %then 
    %do;
      %put %str(WAR)NING: Required parameter ord is not given.Kindly update.;
      %return;
    %end;
    
    /*dsin macro variable is created to store the input dataset if it is from work     library or some other libraries*/
    
    %if %sysfunc(countw(&indsin.,%str(.))) eq 2 %then %do;
      data __%scan(&indsin.,2,%str(.));
          set &indsin.;
        run;
    
        %let dsin = %scan(&indsin.,2,%str(.));
    %end;
    
    %else %do;
      data __&indsin.;
        set &indsin.;
      run;
    
      %let dsin = &indsin.;
    %end;
    
    /*Appending the dataset again to set the total column with corresponding counts*/
    
    data &dsin._a;
      set __&dsin.(where=(&dsinfilt.)) __&dsin.(where=(&dsinfilt.) in=b);
        if b then do;
          &colby. = "Total";
            &colby.n = 99;
        end;
    run;
    
    /*Sorthing the dataset and calculating the descriptive statistics*/
    
    proc sort data = &dsin._a out = &dsin._1;
      by %if %str(&by.) ne %str() %then %do; &by.n &by. %end; &colby.n &colby. ;
    run;
    
    proc means data = &dsin._1 completetypes;
        %if %str(&by.) ne %str() %then %do;  
          by &by.n &by.;
        %end;
    /*If the column headers in the mock shell is not present in data, preloadfmt is     used to load all values*/
        class &colby.n  %if 
          %str(&trtfmt.) ne %str() %then %do;/preloadfmt %end; ;
        %if %str(&trtfmt.) ne %str() %then %do;
          format &colby.n &trtfmt..;
        %end;
        var &var.;
        output out = &dsin._2(where=(_type_ ne 0)) n=n nmiss=nmiss mean=mean std=std 
       median=median min=min max=max stderr=stderr lclm=lclm uclm=uclm q1=q1 q3=q3;
    run;
    
    /*Formatting the dataset with correct decimal places for each statistics*/
    
    data &dsin._3(drop=min1 max1 mean1 median1 lclm1 uclm1 q11 q31);
      set &dsin._2(rename=(min=min1 max=max1 mean=mean1 median=median1 lclm=lclm1     uclm=uclm1 q1=q11 q3=q31));
      min = strip(put(min1,12.&minmaxdec.));
      max = strip(put(max1,12.&minmaxdec.));
      mean = strip(put(mean1,12.%eval(&minmaxdec.+1)));
      median = strip(put(median1,12.&minmaxdec.));
      if not missing(std) then sd = strip(put(std,12.%eval(&minmaxdec.+1)));
      else sd = "NA";
      lclm = strip(put(lclm1,12.&minmaxdec.));
      uclm = strip(put(uclm1,12.&minmaxdec.));
      q1 = strip(put(q11,12.&minmaxdec.));
      q3 = strip(put(q31,12.&minmaxdec.));
      if n eq 0 then call missing(min,max,mean,median,sd,lclm,uclm,q1,q3);
    run;
    
    /*Changing the alphanumeric and some other standard symbols to some rare symbols.     These symbols
      are reverted in a later stage. This is done to avoid issue which macro     processing */
    
    %let disporder = %sysfunc(tranwrd(%bquote(&disporder.),±,+));
    %let disporder = %sysfunc(tranwrd(%bquote(&disporder.),%bquote(,),%str(^)));
    %let disporder = %sysfunc(tranwrd(%bquote(&disporder.),%bquote(STD),%str(SD)));
    
    /*Do loop is running to iterate the statistics in each line*/
    
    %do k = 1 %to %sysfunc(countw(%bquote(&disporder.),%str(|)));
    
    data &dsin._3__&k.;
      set &dsin._3;
          %let jvar = %scan(%bquote(&disporder.),&k.,%str(|));
        __dummy = symget("jvar");
    /*Identifying the number of statistics within each line and a nested loop is     specified for iterating the same*/
            %do i = 1 %to %sysfunc(countw(%bquote(&jvar.)));
    /*Taking the different descriptive statistics in a single line into the variable     str1,str2 etc*/
                str&i. = scan(__dummy,&i.);
                %if &i. eq 1 %then 
                %do;
    /*Checking if there are mulitple descriptive statistics present in a single line     by taking the position of delimiters and store
                 in str variables */
                    call missing(col&i.,col2&i.,col);
                    col&i. = find(__dummy,scan(__dummy,%eval(&i.+1)),find(__dummy,scan(__dummy    ,&i.)));
                    col2&i. = find(__dummy,scan(__dummy,&i.))+%length(%scan(%bquote(&jvar.),&i    .));
                    col = col&i.-col2&i.;
                    if col ne 0 then do;
                      str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))=substr(__dummy,find(__du    mmy,scan(__dummy,&i.))+%length(%scan(%bquote(&jvar.),&i.)),col);
                    end;
                    else do;
                      str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))="";
                    end;
                %end;
    /*For the last descriptive statistics in a line, assigning the stat to str     variables*/
                %else %if &i. eq %sysfunc(countw(%bquote(&jvar.))) %then
                %do;
                    str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))=substr(__dummy,find(__dumm    y,scan(__dummy,&i.),find(__dummy,scan(__dummy,%eval(&i.-1))))+%length(%scan(%bquot    e(&jvar.),&i.)) );
                %end;
    /*For the descriptive statistics in the middle in a line, assigning the stat to     str variables*/
                %else 
                %do;
                    call missing(col&i.,col2&i.,col);
                    col&i. = find(__dummy,scan(__dummy,%eval(&i.+1)),find(__dummy,scan(__dummy    ,&i.)) );
                    col2&i. = find(__dummy,scan(__dummy,&i.))+%length(%scan(%bquote(&jvar.),&i    .));
                    col = col&i.-col2&i.;
                    str%eval(&i.+%sysfunc(countw(%bquote(&jvar.))))=substr(__dummy,find(__dumm    y,scan(__dummy,&i.),find(__dummy,scan(__dummy,%eval(&i.-1))))+%length(%scan(%bquot    e(&jvar.),&i.)),
                   col);
                %end;
        %end;
    /*Identifying the value of the corresponding statistics in str variables and store     it inside value variable*/
            value = cat(
            %do i = 1 %to %sysfunc(countw(%bquote(&jvar.)));
              strip(vvaluex(str&i.)),compbl(str%eval(&i.+%sysfunc(countw(&jvar.)))) %if     &i. ne %sysfunc(countw(&jvar.)) %then ,;
          %end;);
    run;
    %end;
    
    /*post processing of data by spacing and transforming the values are required*/
    data &dsin._5(keep= &colby.n __dummy value %if %str(&by.) ne %str() %then %do;     &by.n &by. %end;);
      set &dsin._3__:;
      __dummy=tranwrd(__dummy,"STD","SD");
      __dummy=cat("  ",__dummy);
      if missing(compress(value,,"kad")) then call missing(value);
    run;
    
    proc sort data = &dsin._5;
      by %if %str(&by.) ne %str() %then %do; &by.n &by. %end; __dummy;
    run;
    
    /*Transposing the data so that the column headers will come as variables*/
    
    proc transpose data = &dsin._5 out = &dsin._6(drop=_name_);
      by %if %str(&by.) ne %str() %then %do; &by.n &by. %end; __dummy;
        id &colby.n;
        var value;
    run;
    
    /*Final post processing of the data to display the category appropriately based on     by variables*/
    
    data head;
      length statistic $200.;
        statistic = "&displaytext.";
    run;
    
    %if %str(&by.) eq %str() %then %do;
      data &dsin._7;
        length statistic $200.;
        set head &dsin._6;
        if not missing(__dummy) then statistic = __dummy;
        else statistic = statistic;
      run;
    %end;
    %else %do;
      data &dsin._6a;
        set &dsin._6;
        by &by.n &by.;
        if first.&by.n then do;
          statistic = &by.;
          output;
        end;
        statistic=cat("  ",strip(__dummy));
        output;
      run;
    
      data &dsin._7;
        set &dsin._6a;
      run;
    %end;
    
    /*Reverting the alpha numeric and commas pre-processed earlier to its original     value*/
    
    data &dsin._7a;
      set &dsin._7;
      %do i = 1 %to %sysfunc(countw(%bquote(&disporder.),%str(|)));
      if strip(statistic) =     "%scan(%sysfunc(tranwrd(%bquote(&disporder.),%bquote(,),%str(^))),&i.,%str(|))"
        then statn = &i.;
      %end;
    run;
    
    proc sort data = &dsin._7a;
      by %if %str(&by.) ne %str() %then %do; &by.n %end; statn;
    run;
    
    /*Idnetifying the number of variables in the column header to rename it to col     values*/
    
    proc sql noprint;
      select name into : memvar separated by " " from dictionary.columns where
        lowcase(libname) eq "work" and lowcase(memname) eq "%lowcase(&dsin._7a)" and     lowcase(name) not in ("&by.","&by.n","statistic","__dummy","statn");
    quit;
    
    data &dsin._7b;
      set &dsin._7a;
      %if %str(&by.) ne %str() %then %do;
        if statistic=&by. then call missing(_99,%sysfunc(tranwrd(&memvar.,%str(     ),%bquote(,))));
      %end;
    run;
    
    /*Renaming the column headers to col values*/
    
    data &dsin._8(rename=(%do i = 1 %to %sysfunc(countw(&memvar.));
                     %scan(&memvar.,&i.,%str( )) = col&i.
                     %end;
           ));
      set &dsin._7b;
      %do i = 1 %to %sysfunc(countw(&memvar.));
        if strip(upcase(statistic)) eq "N" and missing(%scan(&memvar.,&i.,%str( )))     then %scan(&memvar.,&i.,%str( )) = "0";
      %end;
    run;
    
    /*Identifying the label of column headers to store them into variable labels in     final dataset */
    
    proc sort data = &intable.(where=(&tablefilt.)) out = headlbl nodupkey;
      by &colby.n;
    run;
    
    %if %str(&trtfmt.) ne %str() %then %do;
      proc format library=work.formats cntlout=fmtt;
      run;
    
      proc sql noprint;
        select strip(label) into: headvarlb separated by "**" from fmtt where     upcase(fmtname) eq "%upcase(&trtfmt.S)";
      quit;
    %end;
    
    %else %do;
      proc sql noprint;
        select &colby. into : headvarlb separated by "**" from headlbl;
      quit;
    %end;
    
    proc sql noprint;
      select count(*) into : tot from &intable. where &tablefilt.;
      %do i = 1 %to %sysfunc(countw(&headvarlb.,%str(**)));
        select count(*) into :div&i. from &intable. where &tablefilt. and &colby. eq     "%scan(&headvarlb.,&i.,%str(**))";
      %end;
    quit;
    
    /*Replacing the alphanumeric/special characters and assigning label to column     headers along with N*/
    
    data &dsin._9(drop=i __dummy);
      retain %if %str(&by.) ne %str() %then %do; &by. &by.n %end; statistic col:;
      set &dsin._8;
        array chars[*] _character_;
        do i = 1 to dim(chars);
          chars[i] = tranwrd(tranwrd(tranwrd(chars[i],"+","±"),"( ","("),"^",",");
        end;
        ord = &ord.;
        %do i = 1 %to %sysfunc(countw(&headvarlb.,%str(**)));
          label col&i. = "%scan(&headvarlb.,&i.,%str(**)) N=(%cmpres(&&div&i..))";
        %end;
        label col%eval(%sysfunc(countw(&headvarlb.,%str(**)))+1) = "Total     (N=%cmpres(&tot.))";
    run;
    
    %let datct = &syslast.;
    
    /*Appending the dataset with work.finaltable based on the value in macro parameter     append*/
    
    %if %str(&append.) eq %str(Y) %then
    %do;
      %if %sysfunc(exist(finaltable)) eq 0 %then
      %do;
        %put %str(WAR)NING: Append parameter is given as Y but the dataset finaltable     does not exist.;
        %return;
      %end;
      data finaltable;
        length statistic $200.;
        set finaltable &datct.;
      run;
    %end;
    
    %else %if %str(&append.) eq %str(YS) %then
    %do;
      %if %sysfunc(exist(finaltable)) eq 0 %then
      %do;
        %put %str(WAR)NING: Append parameter is given as Y but the dataset finaltable     does not exist.;
        %return;
      %end;
    
      data dummy;
        statistic = "";
      run;
    
      data finaltable;
        length statistic $200.;
        set finaltable dummy &datct.;
      run;
    %end;
     
    %else 
    %do;
      data finaltable;
        set &datct.;
      run;
    %end;
    
    /*Deleting the intermediate datasets*/
    
    /*proc datasets library = work memtype=data;*/
    /*  delete head &dsin._:;*/
    /*quit;*/
    
    
    ods select all;
    
    %mend descriptive;

Back