Last active
October 20, 2017 13:51
-
-
Save egri-nagy/41c637b9ed4bfe034f13f52b6334a8b0 to your computer and use it in GitHub Desktop.
Burrows-Wheeler Transform
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; simple implementation of the Burrows-Wheeler transform (used in bzip2) | |
;; https://en.wikipedia.org/wiki/Burrows-Wheeler_transform | |
(defn rotations | |
"All rotations (cyclic permutations) of a string s." | |
[s] | |
(let [n (count s)] | |
(reduce (fn [rotations i] | |
(conj rotations | |
(apply str | |
(take n (drop i (cycle s)))))) | |
[] | |
(range (count s))))) | |
(defn b-w-t | |
"The Burrows-Wheeler transform of a string s. It produces a single | |
permuted string." | |
[s] | |
(apply str (map last (sort (rotations s))))) | |
(defn reverse-b-w-t | |
"Reverse transformation of the Burrows-Wheeler Transform. Returns all | |
rotations of the original string." | |
[s] | |
(reduce (fn [ss _] | |
(sort | |
(map (comp (partial apply str) cons) | |
s | |
ss))) | |
(sort (map str s)) | |
(range (dec (count s))))) | |
(defn r-b-w-t | |
"If the string s was encoded with start character ^ and end | |
character |, then this function finds the original string." | |
[s] | |
(filter #(= \^ (first %)) | |
(reverse-b-w-t s))) | |
(b-w-t "^banana|") | |
(r-b-w-t "|bnn^aaa") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment