Macro %Wordsplit

1. Description

The macro WORDSPLIT will split a long text variable into a set of smaller variables based on the user specified length. The macro breaks the text variables not only at the specified length but the text is split between words.

2. Dependencies

2.1 Global Macro Variables - N/A

2.2 Global Macros - N/A

3. Macro Parameters

Macro parameter Description Example
INDAT (R) The name of the input dataset. INDAT=AE , Default value: None
OUTDAT (R) The name of the output dataset. OUTDAT=AE2 , Default value: None
SPLITVAR (R) The name of the variable that needs to be split. SPLITVAR=AECOM , Default value: None
OUTVAR (O) The name of output variable which stores the additional text values. OUTVAR=COM , Default value: COM
MAXLEN (O) The maximum length of the output variable(s). MAXLEN=200 , Default value: 200
SPACE (O) Y/N option to indicate whether to remove double space or not. If SPACE =Y, double spaces from data will be removed. SPACE=Y , Default value: Y
LABEL (O) Prefix label for newly created variable(s). LABEL=NEW VARIABLE , Default value: 1

(R): Required (O): Optional

4. Assumptions

  1. The macro determines how many variables are needed to be created to accommodate the additional text. The variables are named sequentially with the default value or the value passed through the parameter OUTVAR as the prefix.

5. Sample Call

1.  %WORDSPLIT(INDAT=AE,
               OUTDAT=AE2,
               SPLITVAR=AECOM,
               OUTVAR=AECOM);

2.  %WORDSPLIT(INDAT=CM,
               OUTDAT=CM2,
               SPLITVAR=CMDECOD,
               OUTVAR=CMDCD,
               MAXLEN=200,
               SPACE=Y,
               LABEL=1);

6. Macro Code

    %macro wordsplit(indat=,outdat=,splitvar=,outvar=com,maxlen=200,label=1,space=Y);
      *Custom  message when splitvar is missing in macro call;
      %if &splitvar eq  %then %do;
        %put ERROR: splitvar is missing in macro call;
        %return;
      %end;
        *Custom  message when Input dataset is missing in macro call;
        %if &indat eq  %then %do;
          %put ERROR: Input dataset is missing in macro call;
          %return;
        %end;
        %else %do;
          *Custom  message when Input dataset does not exist;
          %if %sysfunc(exist(&indat)) ne 1 %then %do;
            %put ERROR:Input dataset does not exist ;
            %return;
          %end;
            %else %do;
              %local dopen vnum vtype;
              %let dopen=%sysfunc(open(&indat));
              %let vnum=%sysfunc(varnum(&dopen,&splitvar));
              %if &vnum gt 0 %then %let vtype=%sysfunc(vartype(&dopen,&vnum));
              %let dclose=%sysfunc(close(&dopen));
              %if &vnum eq 0 %then %do;
                %put ERROR: Input variable does not exist;
                %return;
              %end;
                %else %if &vtype=N %then %do;
                  %put ERROR: Input variable type is Numeric;
                  %return;
                %end;
              *Custom  message when Output dataset is missing in macro call;
              %if &outdat eq  %then %do;
                %put ERROR: Output dataset is missing in macro call;
                %return;
              %end;
              %else %do;
                *Beginning;
                %if &vnum gt 0 and &vtype=C %then  %do;
                  %*Identifying  the iteration end key (based on the length of input     variable);
                  proc sql noprint;
                    select max(length(&splitvar)) into :dim
                    from &indat;
                  quit;
    
                  %*Creating the output datset and this will be used for looping;
                  data &outdat;
                    set &indat;
                    x&splitvar=&splitvar;
                  run;
    
                  %if &dim le 200 %then %do;
                    data &outdat;
                      length &outvar $&maxlen.;
                      set &outdat;
                      %if &space=Y %then %do;
                        &outvar=compbl(strip(x&splitvar));*For the last iteration;
                      %end;
                      %else %do;
                        &outvar=strip(x&splitvar);*For the last iteration;
                      %end;
                      %if &label ne 1 %then %do;
                        label &outvar.="&label" ;
                      %end;
                      drop x&splitvar;
                    run;
                   %return;
                  %end;
                  %do i=0 %to &dim;
                  %*Checking the possibility of next iteration;
                  proc sql noprint;
                    select max(length(&splitvar)) into :len
                    from &outdat;
                  quit;
    
                  %if &len le &maxlen. %then %do;
                    data &outdat;
                      length &outvar&i $&maxlen.;
                      set &outdat;
                      %if &space=Y %then %do;
                        &outvar&i=compbl(strip(&splitvar));
                      %end;
                        %else %do;
                          &outvar&i=strip(&splitvar);
                        %end;
                      rename &outvar.0=&outvar;
                      %if &label ne 1 %then %do;
                        label &outvar.0="&label"  &outvar&i="&label &i";
                      %end;
                      drop i &splitvar;
                    run;
                    %return;
                  %end;
                    %else %do; 
                      %*Number of variables in the output datset will be depends on     the length of input variable, original variable will be replaced for programming     purpose;
                      data &outdat;
                      %if &label ne 1 %then %do;
                        attrib &outvar&i length=$&maxlen. label="&label &i";
                      %end;
                        %else %do;
                          length &outvar&i $&maxlen.;
                        %end;
                        set &outdat;
                          %if &space=Y %then %do;
                            &splitvar=compbl(strip(&splitvar));
                          %end;
                            %else %do;
                              &splitvar=strip(&splitvar);
                            %end;
                        if substr(&splitvar,&maxlen.,1) eq " " or length(&splitvar) le     &maxlen. or (~cmiss(substr(&splitvar,&maxlen.,1)) and     cmiss(substr(&splitvar,&maxlen.+1,1))) then do;
                          &outvar&i=substr(&splitvar,1,&maxlen.);
                          &splitvar=substr(&splitvar,&maxlen.+1);
                        end;
                          else do i=1 to &maxlen.;
                            if substr(&splitvar,&maxlen.-i,1) eq " " then do;
                              &outvar&i=substr(&splitvar,1,&maxlen.-i);
                              &splitvar=substr(&splitvar,&maxlen.-i+1);
                              return;
                            end;
                          end;
                       run;         
                    %end;
                  %end;
                %end;
              %end;
            %end;
        %end;
    %mend wordsplit;

Back