4.11 模糊匹配

近似字符串匹配 (Approximate String Matching) 也叫模糊匹配 (Fuzzy Matching)

agrep() agrepl() aregexec() adist()

agrep(pattern = "lasy", x = "1 lazy 2")
## [1] 1
agrep("lasy", c(" 1 lazy 2", "1 lasy 2"), max = list(sub = 0))
## [1] 2
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2)
## [1] 1
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, value = TRUE)
## [1] "1 lazy"
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, ignore.case = TRUE)
## [1] 1 3
agrepl(pattern = "lasy", x = "1 lazy 2")
## [1] TRUE
## Cf. the examples for agrep.
x <- c("1 lazy", "1", "1 LAZY")

aregexec("laysy", x, max.distance = 2)
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 4
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## 
## [[3]]
## [1] -1
## attr(,"match.length")
## [1] -1
aregexec("(lay)(sy)", x, max.distance = 2)
## [[1]]
## [1] 3 3 5
## attr(,"match.length")
## [1] 4 2 2
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## 
## [[3]]
## [1] -1
## attr(,"match.length")
## [1] -1
aregexec("(lay)(sy)", x, max.distance = 2, ignore.case = TRUE)
## [[1]]
## [1] 3 3 6
## attr(,"match.length")
## [1] 4 3 1
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## 
## [[3]]
## [1] 3 3 6
## attr(,"match.length")
## [1] 4 3 1
m <- aregexec("(lay)(sy)", x, max.distance = 2)
regmatches(x, m)
## [[1]]
## [1] "lazy" "la"   "zy"  
## 
## [[2]]
## character(0)
## 
## [[3]]
## character(0)
## Cf. https://en.wikipedia.org/wiki/Levenshtein_distance
adist("kitten", "sitting")
##      [,1]
## [1,]    3
## To see the transformation counts for the Levenshtein distance:
drop(attr(adist("kitten", "sitting", counts = TRUE), "counts"))
## ins del sub 
##   1   0   2
## To see the transformation sequences:
attr(adist(c("kitten", "sitting"), counts = TRUE), "trafos")
##      [,1]      [,2]     
## [1,] "MMMMMM"  "SMMMSMI"
## [2,] "SMMMSMD" "MMMMMMM"
## Cf. the examples for agrep:
adist("lasy", "1 lazy 2")
##      [,1]
## [1,]    5
## For a "partial approximate match" (as used for agrep):
adist("lasy", "1 lazy 2", partial = TRUE)
##      [,1]
## [1,]    1

案例

help.search()