Macro %Wordsplit
1. Description
The macro WORDSPLIT will split a long text variable into a set of smaller variables based on the user specified length. The macro breaks the text variables not only at the specified length but the text is split between words.
2. Dependencies
2.1 Global Macro Variables - N/A
2.2 Global Macros - N/A
3. Macro Parameters
Macro parameter | Description | Example |
---|---|---|
INDAT (R) | The name of the input dataset. | INDAT=AE , Default value: None |
OUTDAT (R) | The name of the output dataset. | OUTDAT=AE2 , Default value: None |
SPLITVAR (R) | The name of the variable that needs to be split. | SPLITVAR=AECOM , Default value: None |
OUTVAR (O) | The name of output variable which stores the additional text values. | OUTVAR=COM , Default value: COM |
MAXLEN (O) | The maximum length of the output variable(s). | MAXLEN=200 , Default value: 200 |
SPACE (O) | Y/N option to indicate whether to remove double space or not. If SPACE =Y, double spaces from data will be removed. | SPACE=Y , Default value: Y |
LABEL (O) | Prefix label for newly created variable(s). | LABEL=NEW VARIABLE , Default value: 1 |
(R): Required (O): Optional
4. Assumptions
- The macro determines how many variables are needed to be created to accommodate the additional text. The variables are named sequentially with the default value or the value passed through the parameter OUTVAR as the prefix.
5. Sample Call
1. %WORDSPLIT(INDAT=AE,
OUTDAT=AE2,
SPLITVAR=AECOM,
OUTVAR=AECOM);
2. %WORDSPLIT(INDAT=CM,
OUTDAT=CM2,
SPLITVAR=CMDECOD,
OUTVAR=CMDCD,
MAXLEN=200,
SPACE=Y,
LABEL=1);
6. Macro Code
%macro wordsplit(indat=,outdat=,splitvar=,outvar=com,maxlen=200,label=1,space=Y);
*Custom message when splitvar is missing in macro call;
%if &splitvar eq %then %do;
%put ERROR: splitvar is missing in macro call;
%return;
%end;
*Custom message when Input dataset is missing in macro call;
%if &indat eq %then %do;
%put ERROR: Input dataset is missing in macro call;
%return;
%end;
%else %do;
*Custom message when Input dataset does not exist;
%if %sysfunc(exist(&indat)) ne 1 %then %do;
%put ERROR:Input dataset does not exist ;
%return;
%end;
%else %do;
%local dopen vnum vtype;
%let dopen=%sysfunc(open(&indat));
%let vnum=%sysfunc(varnum(&dopen,&splitvar));
%if &vnum gt 0 %then %let vtype=%sysfunc(vartype(&dopen,&vnum));
%let dclose=%sysfunc(close(&dopen));
%if &vnum eq 0 %then %do;
%put ERROR: Input variable does not exist;
%return;
%end;
%else %if &vtype=N %then %do;
%put ERROR: Input variable type is Numeric;
%return;
%end;
*Custom message when Output dataset is missing in macro call;
%if &outdat eq %then %do;
%put ERROR: Output dataset is missing in macro call;
%return;
%end;
%else %do;
*Beginning;
%if &vnum gt 0 and &vtype=C %then %do;
%*Identifying the iteration end key (based on the length of input variable);
proc sql noprint;
select max(length(&splitvar)) into :dim
from &indat;
quit;
%*Creating the output datset and this will be used for looping;
data &outdat;
set &indat;
x&splitvar=&splitvar;
run;
%if &dim le 200 %then %do;
data &outdat;
length &outvar $&maxlen.;
set &outdat;
%if &space=Y %then %do;
&outvar=compbl(strip(x&splitvar));*For the last iteration;
%end;
%else %do;
&outvar=strip(x&splitvar);*For the last iteration;
%end;
%if &label ne 1 %then %do;
label &outvar.="&label" ;
%end;
drop x&splitvar;
run;
%return;
%end;
%do i=0 %to &dim;
%*Checking the possibility of next iteration;
proc sql noprint;
select max(length(&splitvar)) into :len
from &outdat;
quit;
%if &len le &maxlen. %then %do;
data &outdat;
length &outvar&i $&maxlen.;
set &outdat;
%if &space=Y %then %do;
&outvar&i=compbl(strip(&splitvar));
%end;
%else %do;
&outvar&i=strip(&splitvar);
%end;
rename &outvar.0=&outvar;
%if &label ne 1 %then %do;
label &outvar.0="&label" &outvar&i="&label &i";
%end;
drop i &splitvar;
run;
%return;
%end;
%else %do;
%*Number of variables in the output datset will be depends on the length of input variable, original variable will be replaced for programming purpose;
data &outdat;
%if &label ne 1 %then %do;
attrib &outvar&i length=$&maxlen. label="&label &i";
%end;
%else %do;
length &outvar&i $&maxlen.;
%end;
set &outdat;
%if &space=Y %then %do;
&splitvar=compbl(strip(&splitvar));
%end;
%else %do;
&splitvar=strip(&splitvar);
%end;
if substr(&splitvar,&maxlen.,1) eq " " or length(&splitvar) le &maxlen. or (~cmiss(substr(&splitvar,&maxlen.,1)) and cmiss(substr(&splitvar,&maxlen.+1,1))) then do;
&outvar&i=substr(&splitvar,1,&maxlen.);
&splitvar=substr(&splitvar,&maxlen.+1);
end;
else do i=1 to &maxlen.;
if substr(&splitvar,&maxlen.-i,1) eq " " then do;
&outvar&i=substr(&splitvar,1,&maxlen.-i);
&splitvar=substr(&splitvar,&maxlen.-i+1);
return;
end;
end;
run;
%end;
%end;
%end;
%end;
%end;
%end;
%mend wordsplit;