Created
January 4, 2025 01:56
-
-
Save xiongtx/592928b79661bbe8bb9a04598cc5599c to your computer and use it in GitHub Desktop.
Minimum window containing all terms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn valid-window? | |
[terms m] | |
(every? #(>= (get m % 0) 1) terms)) | |
(defn full-position | |
[coll idx] | |
(first (nth coll idx))) | |
(defn min-window | |
"Given a set of terms and sequence of tokens, find the minimum window (in | |
terms of token positions) that contains all the terms. | |
The idea is to start with the first window containing all terms from the | |
left, then shrink the window on the left when it's valid and grow it on the | |
right when it's not. This way, we evaluate a sequence of sliding windows | |
from left to right." | |
[terms tokens] | |
(let [terms-set (set terms) | |
relevant-tokens (->> tokens | |
(map-indexed vector) | |
(filter #(contains? terms-set (second %))) | |
vec)] | |
(when (= (set (map second relevant-tokens)) terms-set) | |
(let [max-idx (dec (count relevant-tokens)) | |
full-pos (partial full-position relevant-tokens)] | |
(loop [m {(second (first relevant-tokens)) 1} | |
p1 0 | |
p2 0 | |
min-p1 0 | |
min-p2 max-idx] | |
(if (valid-window? terms m) | |
(let [[min-p1 min-p2] (if (< (- (full-pos p2) (full-pos p1)) | |
(- (full-pos min-p2) (full-pos min-p1))) | |
[p1 p2] | |
[min-p1 min-p2]) | |
[_ t] (nth relevant-tokens p1)] | |
(recur (update m t dec) | |
(inc p1) | |
p2 | |
min-p1 | |
min-p2)) | |
(if (< p2 max-idx) | |
(let [[_ new-t] (nth relevant-tokens (inc p2))] | |
(recur (update m new-t (fnil inc 0)) | |
p1 | |
(inc p2) | |
min-p1 | |
min-p2)) | |
[(full-pos min-p1) (full-pos min-p2)]))))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment