Skip to content

Instantly share code, notes, and snippets.

@camsaul
Created May 5, 2021 00:31
Show Gist options
  • Save camsaul/3bcbf832ca81b1e5e1f40559b494f9fd to your computer and use it in GitHub Desktop.
Save camsaul/3bcbf832ca81b1e5e1f40559b494f9fd to your computer and use it in GitHub Desktop.
Finding a line in a file (binary search vs other strategies)
(defn unsorted [password]
(let [lines (atom 0)]
(with-open [reader (java.io.BufferedReader. (java.io.FileReader. (io/file (io/resource "common_passwords.txt"))))]
{:bad? (boolean
(some
(fn [a-password]
(swap! lines inc)
(= a-password password))
(iterator-seq (.. reader lines iterator))))
:n @lines})))
(defn- find-str-start ^Long [^java.io.RandomAccessFile file ^long end ^long mid]
(.seek file mid)
#_(print " CHAR:")
(loop [str-start mid, #_n2 #_0]
(cond
(>= str-start end)
(do
#_(println)
#_(println (format "No match, Str-Start %d is >= End %d" str-start end))
nil)
(let [ch (char (.readByte file))]
#_(print (format " %d => %s" str-start (u/colorize :blue (pr-str ch))))
(= ch \newline))
(do
#_(println)
(inc str-start))
#_(> n2 100)
#_(throw (ex-info "Infinite loop!" {:mid mid, :str-start str-start, :n2 n2}))
:else
(recur (inc str-start) #_(inc n2)))))
(defn- compare-characters [^java.io.RandomAccessFile file ^String password ^long str-start ^long n]
(loop [i 0]
(let [
password-ch (if (>= i (count password))
\newline
(.charAt password i))
str-ch (char (.readByte file))]
#_(println (format " COMPARE %s [%s] => %s %s => %s"
(u/colorize :magenta i)
(u/colorize :yellow (+ str-start i))
(u/colorize :blue (pr-str password-ch))
(u/colorize :magenta (pr-str str-ch))
(let [comparison (compare password-ch str-ch)]
(u/colorize (cond
(neg? comparison) :red
(pos? comparison) :green)
comparison))))
(cond
(= password-ch str-ch \newline)
{:bad? true, :n n}
(= password-ch \newline)
[-1 i]
(= str-ch \newline)
[1 i]
:else
(let [comparison (compare password-ch str-ch)]
(if (zero? comparison)
(recur (inc i))
[comparison i]))))))
(defn sorted [^String password]
(let [file (java.io.RandomAccessFile. (io/file (io/resource "common_passwords_sorted.txt")) "r")
end (.length file)]
(loop [start 0, end (dec end), n 0]
(let [rnge (- end start)]
(if-not (pos? rnge)
(do
#_(println (format "Range is zero. Start = %d End = %d Range = %d N = %d" start end rnge n))
{:bad? false, :n n})
(let [mid (long (+ start (/ rnge 2)))]
#_(when (> n 100)
(throw (ex-info "Infinite loop!" {:start start, :end end, :mid mid, :n n})))
#_(println (format "RANGE => %s - %s [range = %s; mid = %s] n = %d"
(u/colorize :magenta start)
(u/colorize :magenta end)
(u/colorize :cyan rnge)
(u/colorize :yellow mid) n))
;; read backward from middle until we reach a newline or zero
(let [str-start (find-str-start file end mid)]
(if-not str-start
(recur start mid (inc n))
(let [result (compare-characters file password str-start n)]
(if-not (vector? result)
result
(let [[comparison i] result]
(if (neg? comparison)
(recur start (dec (long str-start)) (inc n))
(recur (+ (long str-start) (long i)) end (inc n))))))))))))))
(defn sorted-old [password]
(let [file (java.io.RandomAccessFile. (io/file (io/resource "common_passwords_sorted.txt")) "r")
end (.length file)]
(loop [start 0, end (dec end), n 0]
(let [rnge (- end start)]
(if-not (pos? rnge)
{:bad? false, :n n}
(let [mid (long (+ start (/ rnge 2)))]
;; read backward from middle until we reach a newline or zero
(let [str-start (loop [str-start mid]
(.seek file str-start)
(cond
(zero? str-start) str-start
(< str-start start) false
(= (char (.readByte file)) \newline) (inc str-start)
:else (recur (dec str-start))))]
(if-not str-start
{:bad? false, :n n}
(let [str-start (long str-start)]
(.seek file str-start)
(let [s (.readLine file)
comparison (compare password s)]
(cond
(neg? comparison) (recur start (dec str-start) (inc n))
(pos? comparison) (recur (+ str-start (count s)) end (inc n))
:else {:bad? true, :n n})))))))))))
(defn arrays [^String password]
(let [^"[Ljava.lang.String;" a (into-array String (str/split-lines (slurp (io/file (io/resource "common_passwords_sorted.txt")))))]
{:bad? (pos? (java.util.Arrays/binarySearch a password))}))
(defn profile-all [password]
(letfn [(with-milliseconds [thunk]
(let [start (System/currentTimeMillis)]
(dotimes [_ 999]
(thunk))
(assoc (thunk) :time (u/format-milliseconds (/ (- (System/currentTimeMillis) start) 1000.0)))))]
{:linear (with-milliseconds #(unsorted password))
:binary-new (with-milliseconds #(sorted password))
:binary-old (with-milliseconds #(sorted-old password))
:arrays (with-milliseconds #(arrays password))}))
(profile-all "toucans")
;; ->
{:linear {:bad? false, :n 12692, :time "1.6 ms"},
:binary-new {:bad? false, :n 17, :time "94.0 µs"},
:binary-old {:bad? false, :n 14, :time "106.0 µs"},
:arrays {:bad? false, :time "2.5 ms"}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment