25.2 Krippendorff’s Alpha
calculates disagreement among raters.
\[ \frac{\text{observed disagreement}}{\text{expected disagreement}} \]
Rules of thumb by (Shelley and Krippendorff 1984)
\(\alpha \ge 0.08\) = good
\(\alpha \ge 0.667\) = acceptable lower limit
nmm <-
matrix(
c(
1,
1,
NA,
1,
2,
2,
3,
2,
3,
3,
3,
3,
3,
3,
3,
3,
2,
2,
2,
2,
1,
2,
3,
4,
4,
4,
4,
4,
1,
1,
2,
1,
2,
2,
2,
2,
NA,
5,
5,
5,
NA,
NA,
1,
1,
NA,
NA,
3,
NA
),
nrow = 4
)
# first assume the default nominal classification
kripp.alpha(nmm)
## Krippendorff's alpha
##
## Subjects = 12
## Raters = 4
## alpha = 0.743
## Krippendorff's alpha
##
## Subjects = 12
## Raters = 4
## alpha = 0.815
## Krippendorff's alpha
##
## Subjects = 12
## Raters = 4
## alpha = 0.849
## Krippendorff's alpha
##
## Subjects = 12
## Raters = 4
## alpha = 0.797
25.2.1 Kendall’s W
Continuous ordinal ratings
rtr1 <- c(1, 6, 3, 2, 5, 4)
rtr2 <- c(1, 5, 6, 2, 4, 3)
rtr3 <- c(2, 3, 6, 5, 4, 1)
ratings <- cbind(rtr1, rtr2, rtr3)
library(DescTools)
KendallW(ratings, test=TRUE)
##
## Kendall's coefficient of concordance W
##
## data: ratings
## Kendall chi-squared = 8.5238, df = 5, subjects = 6, raters = 3, p-value
## = 0.1296
## alternative hypothesis: W is greater 0
## sample estimates:
## W
## 0.568254
25.2.2 Intraclass correlation coefficients
25.2.2.1 Continuous scales
Decision:
model:
"oneway"
when subjects are random effects"twoway"
when subjects and raters are random effects
type:
"agreement"
differences in mean ratings among raters are of interest"consistency"
is default
unit
"single"
single value"average"
mean of several ratings
icc(anxiety,
model = "twoway", # can be "oneway"
type = "agreement", # can be "consistency"
unit = "single" # can be "average"
)
## Single Score Intraclass Correlation
##
## Model: twoway
## Type : agreement
##
## Subjects = 20
## Raters = 3
## ICC(A,1) = 0.198
##
## F-Test, H0: r0 = 0 ; H1: r0 > 0
## F(19,39.7) = 1.83 , p = 0.0543
##
## 95%-Confidence Interval for ICC Population Values:
## -0.039 < ICC < 0.494
DescTools
package
rtr1 <- c(9, 6, 8, 7, 10, 6)
rtr2 <- c(2, 1, 4, 1, 5, 2)
rtr3 <- c(5, 3, 6, 2, 6, 4)
rtr4 <- c(8, 2, 8, 6, 9, 7)
ratings <- cbind(rtr1, rtr2, rtr3, rtr4)
DescTools::ICC(ratings)
##
## Intraclass correlation coefficients
## type est F-val df1 df2 p-val lwr.ci upr.ci
## Single_raters_absolute ICC1 0.166 1.79 5 18 0.164769 NA NA
## Single_random_raters ICC2 0.290 11.03 5 15 0.000135 NA NA
## Single_fixed_raters ICC3 0.715 11.03 5 15 0.000135 NA NA
## Average_raters_absolute ICC1k 0.443 1.79 5 18 0.164769 NA NA
## Average_random_raters ICC2k 0.620 11.03 5 15 0.000135 NA NA
## Average_fixed_raters ICC3k 0.909 11.03 5 15 0.000135 NA NA
##
## Number of subjects = 6 Number of raters = 4
References
Shelley, Mack, and Klaus Krippendorff. 1984. “Content Analysis: An Introduction to Its Methodology.” Journal of the American Statistical Association 79 (385): 240. https://doi.org/10.2307/2288384.
Shrout, Patrick E., and Joseph L. Fleiss. 1979. “Intraclass Correlations: Uses in Assessing Rater Reliability.” Psychological Bulletin 86 (2): 420–28. https://doi.org/10.1037/0033-2909.86.2.420.