29.6 Coarsened Exact Matching
Steps from Gray King’s slides International Methods Colloquium talk 2015
Temporarily coarsen \(X\)
Apply exact matching to the coarsened \(X, C(X)\)
sort observation into strata, each with unique values of \(C(X)\)
prune stratum with 0 treated or 0 control units
Pass on original (uncoarsened) units except those pruned
Properties:
Monotonic imbalance bounding (MIB) matching method
- maximum imbalance between the treated and control chosen ex ante
meets congruence principle
robust to measurement error
can be implemented with multiple imputation
works well for multi-category treatments
Assumptions:
- Ignorability (i.e., no omitted variable bias)
More detail in (Iacus, King, and Porro 2012)
Example by package’s authors
library(cem)
data(LeLonde)
Le <- data.frame(na.omit(LeLonde)) # remove missing data
# treated and control groups
tr <- which(Le$treated==1)
ct <- which(Le$treated==0)
ntr <- length(tr)
nct <- length(ct)
# unadjusted, biased difference in means
mean(Le$re78[tr]) - mean(Le$re78[ct])
#> [1] 759.0479
# pre-treatment covariates
vars <-
c(
"age",
"education",
"black",
"married",
"nodegree",
"re74",
"re75",
"hispanic",
"u74",
"u75",
"q1"
)
# overall imbalance statistics
imbalance(group=Le$treated, data=Le[vars]) # L1 = 0.902
#>
#> Multivariate Imbalance Measure: L1=0.902
#> Percentage of local common support: LCS=5.8%
#>
#> Univariate Imbalance Measures:
#>
#> statistic type L1 min 25% 50% 75%
#> age -0.252373042 (diff) 5.102041e-03 0 0 0.0000 -1.0000
#> education 0.153634710 (diff) 8.463851e-02 1 0 1.0000 1.0000
#> black -0.010322734 (diff) 1.032273e-02 0 0 0.0000 0.0000
#> married -0.009551495 (diff) 9.551495e-03 0 0 0.0000 0.0000
#> nodegree -0.081217371 (diff) 8.121737e-02 0 -1 0.0000 0.0000
#> re74 -18.160446880 (diff) 5.551115e-17 0 0 284.0715 806.3452
#> re75 101.501761679 (diff) 5.551115e-17 0 0 485.6310 1238.4114
#> hispanic -0.010144756 (diff) 1.014476e-02 0 0 0.0000 0.0000
#> u74 -0.045582186 (diff) 4.558219e-02 0 0 0.0000 0.0000
#> u75 -0.065555292 (diff) 6.555529e-02 0 0 0.0000 0.0000
#> q1 7.494021189 (Chi2) 1.067078e-01 NA NA NA NA
#> max
#> age -6.0000
#> education 1.0000
#> black 0.0000
#> married 0.0000
#> nodegree 0.0000
#> re74 -2139.0195
#> re75 490.3945
#> hispanic 0.0000
#> u74 0.0000
#> u75 0.0000
#> q1 NA
# drop other variables that are not pre - treatmentt matching variables
todrop <- c("treated", "re78")
imbalance(group=Le$treated, data=Le, drop=todrop)
#>
#> Multivariate Imbalance Measure: L1=0.902
#> Percentage of local common support: LCS=5.8%
#>
#> Univariate Imbalance Measures:
#>
#> statistic type L1 min 25% 50% 75%
#> age -0.252373042 (diff) 5.102041e-03 0 0 0.0000 -1.0000
#> education 0.153634710 (diff) 8.463851e-02 1 0 1.0000 1.0000
#> black -0.010322734 (diff) 1.032273e-02 0 0 0.0000 0.0000
#> married -0.009551495 (diff) 9.551495e-03 0 0 0.0000 0.0000
#> nodegree -0.081217371 (diff) 8.121737e-02 0 -1 0.0000 0.0000
#> re74 -18.160446880 (diff) 5.551115e-17 0 0 284.0715 806.3452
#> re75 101.501761679 (diff) 5.551115e-17 0 0 485.6310 1238.4114
#> hispanic -0.010144756 (diff) 1.014476e-02 0 0 0.0000 0.0000
#> u74 -0.045582186 (diff) 4.558219e-02 0 0 0.0000 0.0000
#> u75 -0.065555292 (diff) 6.555529e-02 0 0 0.0000 0.0000
#> q1 7.494021189 (Chi2) 1.067078e-01 NA NA NA NA
#> max
#> age -6.0000
#> education 1.0000
#> black 0.0000
#> married 0.0000
#> nodegree 0.0000
#> re74 -2139.0195
#> re75 490.3945
#> hispanic 0.0000
#> u74 0.0000
#> u75 0.0000
#> q1 NA
automated coarsening
mat <-
cem(
treatment = "treated",
data = Le,
drop = "re78",
keep.all = TRUE
)
#>
#> Using 'treated'='1' as baseline group
mat
#> G0 G1
#> All 392 258
#> Matched 95 84
#> Unmatched 297 174
# mat$w
coarsening by explicit user choice
# categorial variables
levels(Le$q1) # grouping option
#> [1] "agree" "disagree" "neutral"
#> [4] "no opinion" "strongly agree" "strongly disagree"
q1.grp <-
list(
c("strongly agree", "agree"),
c("neutral", "no opinion"),
c("strongly disagree", "disagree")
) # if you want ordered categories
# continuous variables
table(Le$education)
#>
#> 3 4 5 6 7 8 9 10 11 12 13 14 15
#> 1 5 4 6 12 55 106 146 173 113 19 9 1
educut <- c(0, 6.5, 8.5, 12.5, 17) # use cutpoints
mat1 <-
cem(
treatment = "treated",
data = Le,
drop = "re78",
cutpoints = list(education = educut),
grouping = list(q1 = q1.grp)
)
#>
#> Using 'treated'='1' as baseline group
mat1
#> G0 G1
#> All 392 258
#> Matched 158 115
#> Unmatched 234 143
Can also use progressive coarsening method to control the number of matches.
cem
can also handle some missingness.