Last active
June 16, 2017 16:16
-
-
Save mpereira/7c48cb7ff31517a247146a6e81068eb2 to your computer and use it in GitHub Desktop.
Returns a vector of [start end] offsets for substrings in s.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn substring-offsets | |
"Returns a seq of [start end] offsets for substrings in s." | |
[s substring] | |
(when-not (empty? substring) | |
(let [s (.toLowerCase s) | |
substring (.toLowerCase substring) | |
s-length (count s) | |
substring-length (count substring)] | |
(loop [offset 0 | |
offsets nil] | |
(let [substring-start-offset (.indexOf s substring offset)] | |
(if (= -1 substring-start-offset) | |
offsets | |
(let [substring-end-offset (+ substring-start-offset substring-length) | |
bounded-substring-end-offset (min substring-end-offset s-length) | |
substring-from-offsets (subs s | |
substring-start-offset | |
bounded-substring-end-offset) | |
beginning-of-string? (= 0 substring-start-offset) | |
end-of-string? (= s-length substring-end-offset) | |
char-before-substring (when-not beginning-of-string? | |
(subs s | |
(- substring-start-offset 1) | |
substring-start-offset)) | |
char-after-substring (when-not end-of-string? | |
(subs s | |
bounded-substring-end-offset | |
(+ 1 bounded-substring-end-offset)))] | |
(if (and (or beginning-of-string? (delimiter? char-before-substring)) | |
(or end-of-string? (delimiter? char-after-substring)) | |
(= substring substring-from-offsets)) | |
(recur substring-end-offset | |
(conj offsets | |
[substring-start-offset bounded-substring-end-offset])) | |
offsets)))))))) | |
(substring-offsets "The quick brown fox jumps over the lazy dog" "the") | |
;; => ([0 3] [31 34]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment