Created
May 5, 2021 00:31
-
-
Save camsaul/3bcbf832ca81b1e5e1f40559b494f9fd to your computer and use it in GitHub Desktop.
Finding a line in a file (binary search vs other strategies)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn unsorted [password] | |
(let [lines (atom 0)] | |
(with-open [reader (java.io.BufferedReader. (java.io.FileReader. (io/file (io/resource "common_passwords.txt"))))] | |
{:bad? (boolean | |
(some | |
(fn [a-password] | |
(swap! lines inc) | |
(= a-password password)) | |
(iterator-seq (.. reader lines iterator)))) | |
:n @lines}))) | |
(defn- find-str-start ^Long [^java.io.RandomAccessFile file ^long end ^long mid] | |
(.seek file mid) | |
#_(print " CHAR:") | |
(loop [str-start mid, #_n2 #_0] | |
(cond | |
(>= str-start end) | |
(do | |
#_(println) | |
#_(println (format "No match, Str-Start %d is >= End %d" str-start end)) | |
nil) | |
(let [ch (char (.readByte file))] | |
#_(print (format " %d => %s" str-start (u/colorize :blue (pr-str ch)))) | |
(= ch \newline)) | |
(do | |
#_(println) | |
(inc str-start)) | |
#_(> n2 100) | |
#_(throw (ex-info "Infinite loop!" {:mid mid, :str-start str-start, :n2 n2})) | |
:else | |
(recur (inc str-start) #_(inc n2))))) | |
(defn- compare-characters [^java.io.RandomAccessFile file ^String password ^long str-start ^long n] | |
(loop [i 0] | |
(let [ | |
password-ch (if (>= i (count password)) | |
\newline | |
(.charAt password i)) | |
str-ch (char (.readByte file))] | |
#_(println (format " COMPARE %s [%s] => %s %s => %s" | |
(u/colorize :magenta i) | |
(u/colorize :yellow (+ str-start i)) | |
(u/colorize :blue (pr-str password-ch)) | |
(u/colorize :magenta (pr-str str-ch)) | |
(let [comparison (compare password-ch str-ch)] | |
(u/colorize (cond | |
(neg? comparison) :red | |
(pos? comparison) :green) | |
comparison)))) | |
(cond | |
(= password-ch str-ch \newline) | |
{:bad? true, :n n} | |
(= password-ch \newline) | |
[-1 i] | |
(= str-ch \newline) | |
[1 i] | |
:else | |
(let [comparison (compare password-ch str-ch)] | |
(if (zero? comparison) | |
(recur (inc i)) | |
[comparison i])))))) | |
(defn sorted [^String password] | |
(let [file (java.io.RandomAccessFile. (io/file (io/resource "common_passwords_sorted.txt")) "r") | |
end (.length file)] | |
(loop [start 0, end (dec end), n 0] | |
(let [rnge (- end start)] | |
(if-not (pos? rnge) | |
(do | |
#_(println (format "Range is zero. Start = %d End = %d Range = %d N = %d" start end rnge n)) | |
{:bad? false, :n n}) | |
(let [mid (long (+ start (/ rnge 2)))] | |
#_(when (> n 100) | |
(throw (ex-info "Infinite loop!" {:start start, :end end, :mid mid, :n n}))) | |
#_(println (format "RANGE => %s - %s [range = %s; mid = %s] n = %d" | |
(u/colorize :magenta start) | |
(u/colorize :magenta end) | |
(u/colorize :cyan rnge) | |
(u/colorize :yellow mid) n)) | |
;; read backward from middle until we reach a newline or zero | |
(let [str-start (find-str-start file end mid)] | |
(if-not str-start | |
(recur start mid (inc n)) | |
(let [result (compare-characters file password str-start n)] | |
(if-not (vector? result) | |
result | |
(let [[comparison i] result] | |
(if (neg? comparison) | |
(recur start (dec (long str-start)) (inc n)) | |
(recur (+ (long str-start) (long i)) end (inc n)))))))))))))) | |
(defn sorted-old [password] | |
(let [file (java.io.RandomAccessFile. (io/file (io/resource "common_passwords_sorted.txt")) "r") | |
end (.length file)] | |
(loop [start 0, end (dec end), n 0] | |
(let [rnge (- end start)] | |
(if-not (pos? rnge) | |
{:bad? false, :n n} | |
(let [mid (long (+ start (/ rnge 2)))] | |
;; read backward from middle until we reach a newline or zero | |
(let [str-start (loop [str-start mid] | |
(.seek file str-start) | |
(cond | |
(zero? str-start) str-start | |
(< str-start start) false | |
(= (char (.readByte file)) \newline) (inc str-start) | |
:else (recur (dec str-start))))] | |
(if-not str-start | |
{:bad? false, :n n} | |
(let [str-start (long str-start)] | |
(.seek file str-start) | |
(let [s (.readLine file) | |
comparison (compare password s)] | |
(cond | |
(neg? comparison) (recur start (dec str-start) (inc n)) | |
(pos? comparison) (recur (+ str-start (count s)) end (inc n)) | |
:else {:bad? true, :n n}))))))))))) | |
(defn arrays [^String password] | |
(let [^"[Ljava.lang.String;" a (into-array String (str/split-lines (slurp (io/file (io/resource "common_passwords_sorted.txt")))))] | |
{:bad? (pos? (java.util.Arrays/binarySearch a password))})) | |
(defn profile-all [password] | |
(letfn [(with-milliseconds [thunk] | |
(let [start (System/currentTimeMillis)] | |
(dotimes [_ 999] | |
(thunk)) | |
(assoc (thunk) :time (u/format-milliseconds (/ (- (System/currentTimeMillis) start) 1000.0)))))] | |
{:linear (with-milliseconds #(unsorted password)) | |
:binary-new (with-milliseconds #(sorted password)) | |
:binary-old (with-milliseconds #(sorted-old password)) | |
:arrays (with-milliseconds #(arrays password))})) | |
(profile-all "toucans") | |
;; -> | |
{:linear {:bad? false, :n 12692, :time "1.6 ms"}, | |
:binary-new {:bad? false, :n 17, :time "94.0 µs"}, | |
:binary-old {:bad? false, :n 14, :time "106.0 µs"}, | |
:arrays {:bad? false, :time "2.5 ms"}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment