Skip to content

Instantly share code, notes, and snippets.

@jmbarbone
Created November 30, 2022 01:59
Show Gist options
  • Save jmbarbone/9b71c99782734c3131d9d00800a060c2 to your computer and use it in GitHub Desktop.
Save jmbarbone/9b71c99782734c3131d9d00800a060c2 to your computer and use it in GitHub Desktop.
Example of the differences from the regular expression matches
x <- c("apple", "banana", "orange", "pear", "grape")
p <- "[aeoiu][^(aeiou)]"
regexpr(p, x) # integer vector
#> [1] 1 2 1 3 3
#> attr(,"match.length")
#> [1] 2 2 2 2 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
regexec(p, x) # list of integer values
#> [[1]]
#> [1] 1
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[2]]
#> [1] 2
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[3]]
#> [1] 1
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[4]]
#> [1] 3
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[5]]
#> [1] 3
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
gregexpr(p, x) # list of integer vectors
#> [[1]]
#> [1] 1
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[2]]
#> [1] 2 4
#> attr(,"match.length")
#> [1] 2 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[3]]
#> [1] 1 3
#> attr(,"match.length")
#> [1] 2 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[4]]
#> [1] 3
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
#>
#> [[5]]
#> [1] 3
#> attr(,"match.length")
#> [1] 2
#> attr(,"index.type")
#> [1] "chars"
#> attr(,"useBytes")
#> [1] TRUE
gregexec(p, x) # list of integer matrix/array
#> [[1]]
#> [,1]
#> [1,] 1
#> attr(,"match.length")
#> [,1]
#> [1,] 2
#> attr(,"useBytes")
#> [1] TRUE
#> attr(,"index.type")
#> [1] "chars"
#>
#> [[2]]
#> [,1] [,2]
#> [1,] 2 4
#> attr(,"match.length")
#> [,1] [,2]
#> [1,] 2 2
#> attr(,"useBytes")
#> [1] TRUE
#> attr(,"index.type")
#> [1] "chars"
#>
#> [[3]]
#> [,1] [,2]
#> [1,] 1 3
#> attr(,"match.length")
#> [,1] [,2]
#> [1,] 2 2
#> attr(,"useBytes")
#> [1] TRUE
#> attr(,"index.type")
#> [1] "chars"
#>
#> [[4]]
#> [,1]
#> [1,] 3
#> attr(,"match.length")
#> [,1]
#> [1,] 2
#> attr(,"useBytes")
#> [1] TRUE
#> attr(,"index.type")
#> [1] "chars"
#>
#> [[5]]
#> [,1]
#> [1,] 3
#> attr(,"match.length")
#> [,1]
#> [1,] 2
#> attr(,"useBytes")
#> [1] TRUE
#> attr(,"index.type")
#> [1] "chars"
regmatches(x, regexpr(p, x)) # character vector
#> [1] "ap" "an" "or" "ar" "ap"
regmatches(x, regexec(p, x)) # list of characters
#> [[1]]
#> [1] "ap"
#>
#> [[2]]
#> [1] "an"
#>
#> [[3]]
#> [1] "or"
#>
#> [[4]]
#> [1] "ar"
#>
#> [[5]]
#> [1] "ap"
regmatches(x, gregexpr(p, x)) # list of character vectors
#> [[1]]
#> [1] "ap"
#>
#> [[2]]
#> [1] "an" "an"
#>
#> [[3]]
#> [1] "or" "an"
#>
#> [[4]]
#> [1] "ar"
#>
#> [[5]]
#> [1] "ap"
regmatches(x, gregexec(p, x)) # list of character matrix arrays
#> [[1]]
#> [,1]
#> [1,] "ap"
#>
#> [[2]]
#> [,1] [,2]
#> [1,] "an" "an"
#>
#> [[3]]
#> [,1] [,2]
#> [1,] "or" "an"
#>
#> [[4]]
#> [,1]
#> [1,] "ar"
#>
#> [[5]]
#> [,1]
#> [1,] "ap"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment