Last active
February 16, 2020 16:35
-
-
Save mikeananev/6f0a80d244e5aa29c62a3c798380e61e to your computer and use it in GitHub Desktop.
Clojure RLE encoder/decoder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns org.rssys.rle.core | |
| (:gen-class) | |
| (:require [org.rssys.rle.naive :as rle1] | |
| [org.rssys.rle.lazy :as rle2])) | |
| (defn -main | |
| "entry point to program." | |
| [& args] | |
| (condp = (first args) | |
| "e" (do | |
| (println "compress mode.") | |
| (rle1/compress-file (second args) (first (drop 2 args)))) | |
| "d" (do | |
| (println "decompress mode") | |
| (rle1/decompress-file (second args) (first (drop 2 args)))) | |
| "e2" (do | |
| (println "compress (lazy) mode.") | |
| (rle2/compress-file (second args) (first (drop 2 args)))) | |
| (do | |
| (println "help: <e|e2|d> <infile> <outfile>"))) | |
| (flush) | |
| (System/exit 0)) | |
| (comment | |
| (require '[hashp.core]) | |
| (compress-file "a1.txt" "a1.rle") | |
| (decompress-file "a1.rle" "a2.txt")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| :mvn/repos {"clojars" {:url "https://repo.clojars.org/"} | |
| "central" {:url "https://repo1.maven.org/maven2/"}} | |
| ;; don't change target/classes (do not remove it from :paths and | |
| ;; do not rename it, otherwise edit build.clj) | |
| :paths ["src" "resources" "target/classes"] | |
| :deps { | |
| org.clojure/clojure {:mvn/version "1.10.1"} | |
| } | |
| :build { | |
| ;; uncomment :java-source-paths and :javac-options if you need to compile java sources | |
| ;; :java-source-paths "java-src" | |
| ;; ( default options: ["-cp" "src:target/classes" "-target" "1.8" "-source" "1.8" "-Xlint:-options"] ) | |
| ;; :javac-options ["-cp" "src:target/classes" "-target" "1.8" "-source" "1.8" "-Xlint:-options"] | |
| ;; uncomment this if you want to specify uberjar name, | |
| ;; otherwise it will be artefact-version-standalone.jar | |
| ;;:uberjar-name "rle.jar" | |
| :omit-source true | |
| :main "org.rssys.rle.core" | |
| :group-id "org.rssys" | |
| :artifact-id "rle" | |
| :artifact-version "0.1.0-SNAPSHOT" | |
| } | |
| :aliases { | |
| :run {:extra-paths ["resources"] | |
| :jvm-opts ["-Duser.timezone=UTC" | |
| "-DLOG4J_CONFIGURATION_FILE=resources/log4j2.xml"] | |
| :main-opts ["--main" "org.rssys.rle.core"]} | |
| ;; to run repl: clojure -R:bg -A:repl | |
| :repl {:extra-deps {criterium {:mvn/version "0.4.5"} | |
| nrepl {:mvn/version "0.6.0"} | |
| healthsamurai/matcho {:mvn/version "0.3.3"} | |
| hashp {:mvn/version "0.1.1"}} | |
| :jvm-opts ["-Duser.timezone=UTC"] | |
| :extra-paths ["dev/src" "resources" "test"] | |
| :main-opts ["--main" "nrepl.cmdline"]} | |
| :test {:extra-deps {lambdaisland/kaocha {:mvn/version "0.0-573"} | |
| lambdaisland/kaocha-cloverage {:mvn/version "0.0-41"} | |
| healthsamurai/matcho {:mvn/version "0.3.3"}} | |
| :extra-paths ["resources" "test" "test/resources"] | |
| :jvm-opts ["-Duser.timezone=UTC" | |
| ;;"-DLOG4J_CONFIGURATION_FILE=test/resources/log4j2-test.xml" | |
| ] | |
| :main-opts ["--main" "kaocha.runner"]} | |
| :bg {:extra-deps {badigeon/badigeon {:git/url "https://github.com/EwenG/badigeon.git" | |
| :sha "c5d7d8f9c44fee2f193ef924cdf8a485aee539c5" | |
| }}} | |
| ;; build uberjar (compile java, clj files): clojure -R:bg -A:uberjar | |
| :uberjar {:main-opts ["-e" "(load-file,\"dev/src/build.clj\"),(build/clean),(build/uberjar)"]} | |
| ;; compile java classes only: clojure -R:bg -A:javac | |
| :javac {:main-opts ["-e" "(load-file,\"dev/src/build.clj\"),(build/compile-java)"]} | |
| ;; run on Java 9+ to build standalone app: clojure -R:bg -A:standalone | |
| :standalone {:main-opts ["-e" "(load-file,\"dev/src/build.clj\"),(build/clean),(build/standalone)"]} | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns org.rssys.rle.lazy | |
| (:require [clojure.java.io :as io]) | |
| (:import (java.io InputStream OutputStream) | |
| (java.util Arrays))) | |
| ;; byte-seq creates a lazy chunked sequence of bytes from an InputStream. | |
| ;; It takes the optimal chunk "size" to model a physical resource | |
| ;; (for a disk this would be the block size). | |
| (defn byte-seq [^InputStream is size] | |
| (let [ib (byte-array size)] | |
| ((fn step [] | |
| (lazy-seq | |
| (let [n (.read is ib)] | |
| (when (not= -1 n) | |
| (let [cb (chunk-buffer size)] | |
| (dotimes [i size] (chunk-append cb (aget ib i))) | |
| (chunk-cons (chunk cb) (step)))))))))) | |
| (defn byte-seq2 | |
| [^InputStream is] | |
| (lazy-seq | |
| (let [b (.read is)] | |
| (when (not= -1 b) | |
| (cons b (byte-seq2 is)))))) | |
| ;; RLE encoding transducer | |
| (def rle-encoder | |
| (comp | |
| (partition-by identity) | |
| (map #(vector (count %) (first %))) | |
| (map (fn [x] (if (= 1 (first x)) (second x) x))) | |
| (partition-by vector?) | |
| (map (fn [x] (if (vector? (first x)) x [(- (count x)) (into [] x)]))))) | |
| (defn write-bytes-array | |
| "# write encoded bytes to output stream | |
| if `len` > 0 then ba should contain one repeat byte. | |
| if `len` < 0 then ba should contain non repeat bytes. | |
| if ba > -128/127 then ba splits on blocks 128/127 bytes length." | |
| [^OutputStream out-stream ba len] | |
| (if (not (empty? ba)) | |
| (cond | |
| (pos-int? len) (if (>= len Byte/MAX_VALUE) | |
| (let [m (mod len Byte/MAX_VALUE)] | |
| (dotimes [n (quot len Byte/MAX_VALUE)] | |
| (.write out-stream (byte-array [(byte Byte/MAX_VALUE) (first ba)]))) | |
| (when (> m 0) (.write out-stream (byte-array [(byte m) (first ba)])))) | |
| (.write out-stream (byte-array [(byte len) (first ba)]))) | |
| (neg-int? len) (if (>= Byte/MIN_VALUE len) | |
| (let [int-len (- Byte/MIN_VALUE) | |
| int-coll (partition int-len ba) | |
| rest-len (- (- len) (* (count int-coll) int-len))] | |
| (doseq [next-coll int-coll] | |
| (.write out-stream (byte-array (into [(byte Byte/MIN_VALUE)] next-coll)))) | |
| (when (> rest-len 0) | |
| (.write out-stream (byte-array (into [(byte (- rest-len))] (drop (* (count int-coll) int-len) ba)))))) | |
| (.write out-stream (byte-array (into [(byte len)] ba))))))) | |
| (defn compress-file | |
| [^String infile ^String outfile] | |
| (with-open [in-stream (io/input-stream infile) | |
| out-stream (io/output-stream outfile)] | |
| (let [bs (byte-seq2 in-stream)] | |
| (doseq [item (eduction rle-encoder bs)] | |
| (if (coll? (first item)) | |
| (doseq [i item] | |
| (write-bytes-array out-stream [(second i)] (first i))) | |
| (write-bytes-array out-stream (second item) (first item))))))) | |
| (defn decompress-file | |
| "RLE decompressor" | |
| [in-filename out-filename] | |
| (let [out-bytes-counter (atom 0)] | |
| (with-open [in-stream (io/input-stream in-filename) | |
| out-stream (io/output-stream out-filename)] | |
| (loop [next-byte (.read in-stream)] | |
| (if (= next-byte -1) | |
| (printf "decompressed size: %d\n" @out-bytes-counter) | |
| (do | |
| (if (neg-int? (unchecked-byte next-byte)) | |
| (let [ba (byte-array (- (unchecked-byte next-byte))) | |
| len (.read in-stream ba)] | |
| (.write out-stream ba) | |
| (swap! out-bytes-counter + len)) | |
| (let [len (unchecked-byte next-byte) | |
| ba (byte-array len) | |
| data (.read in-stream) | |
| _ (Arrays/fill ba (unchecked-byte data))] | |
| (.write out-stream ba) | |
| (swap! out-bytes-counter + len))) | |
| (recur (.read in-stream)))))))) | |
| (comment | |
| (compress-file "LICENSE" "abc.rle") | |
| (decompress-file "abc.rle" "abc.txt") | |
| (compress-file "abc.txt" "abc.rle") | |
| (def s1 "abccchhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhq") | |
| (def e (eduction rle-encoder (.getBytes s1))) | |
| (def s2 (apply str (map char (rle-decode e)))) | |
| (= s1 s2)) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns org.rssys.rle.naive | |
| (:require [clojure.java.io :as io]) | |
| (:import (java.util Arrays))) | |
| (def EOF -1) | |
| (defn write-bytes-array | |
| [^java.io.OutputStream out-stream ba repeated-bytes?] | |
| (if (not (empty? ba)) | |
| (let [bytes-count (if repeated-bytes? (count ba) (- (count ba)))] | |
| (if repeated-bytes? | |
| (.write out-stream (byte-array (into [(byte bytes-count)] [(first ba)]))) | |
| (.write out-stream (byte-array (into [(byte bytes-count)] ba)))) | |
| (if repeated-bytes? | |
| 2 | |
| (inc (count ba)))) | |
| 0)) | |
| (defn repeat-detected? | |
| [out-bytes-array next-byte] | |
| (= next-byte (peek out-bytes-array))) | |
| (defn compress-file | |
| "naive RLE compressor" | |
| [in-filename out-filename] | |
| (let [in-bytes-counter (atom 0) | |
| out-bytes-counter (atom 0)] | |
| (with-open [in-stream (io/input-stream in-filename) | |
| out-stream (io/output-stream out-filename)] | |
| (loop [next-byte (.read in-stream) | |
| out-bytes-array [] | |
| repeat-found? false] | |
| (if (= next-byte EOF) | |
| (swap! out-bytes-counter + (write-bytes-array out-stream out-bytes-array repeat-found?)) | |
| (do | |
| (swap! in-bytes-counter inc) | |
| (if (and (not repeat-found?) (repeat-detected? out-bytes-array next-byte)) | |
| (do | |
| (swap! out-bytes-counter + (write-bytes-array out-stream (pop out-bytes-array) repeat-found?)) | |
| (recur (.read in-stream) [next-byte next-byte] true)) | |
| (if (and (not repeat-found?) (not (repeat-detected? out-bytes-array next-byte)) (< Byte/MIN_VALUE (- (count out-bytes-array)))) | |
| (recur (.read in-stream) (conj out-bytes-array next-byte) false) | |
| (if (and (not repeat-found?) (>= Byte/MIN_VALUE (- (count out-bytes-array)))) | |
| (do | |
| (swap! out-bytes-counter + (write-bytes-array out-stream out-bytes-array repeat-found?)) | |
| (recur (.read in-stream) [next-byte] false)) | |
| (if (and repeat-found? (repeat-detected? out-bytes-array next-byte) (>= (count out-bytes-array) Byte/MAX_VALUE)) | |
| (do | |
| (swap! out-bytes-counter + (write-bytes-array out-stream out-bytes-array repeat-found?)) | |
| (recur (.read in-stream) [next-byte] true)) | |
| (if (and repeat-found? (repeat-detected? out-bytes-array next-byte) (< (count out-bytes-array) Byte/MAX_VALUE)) | |
| (recur (.read in-stream) (conj out-bytes-array next-byte) true) | |
| (if (and repeat-found? (not (repeat-detected? out-bytes-array next-byte))) | |
| (do | |
| (swap! out-bytes-counter + (write-bytes-array out-stream out-bytes-array repeat-found?)) | |
| (recur (.read in-stream) [next-byte] false)) | |
| (println "error in compressor!"))))))))))) | |
| (printf "source size: %d, compressed size: %d, ratio: %.3f\n" @in-bytes-counter @out-bytes-counter (/ (float @out-bytes-counter) @in-bytes-counter)))) | |
| (defn decompress-file | |
| "RLE decompressor" | |
| [in-filename out-filename] | |
| (let [out-bytes-counter (atom 0)] | |
| (with-open [in-stream (io/input-stream in-filename) | |
| out-stream (io/output-stream out-filename)] | |
| (loop [next-byte (.read in-stream)] | |
| (if (= next-byte EOF) | |
| (printf "decompressed size: %d\n" @out-bytes-counter) | |
| (do | |
| (if (neg-int? (unchecked-byte next-byte)) | |
| (let [ba (byte-array (- (unchecked-byte next-byte))) | |
| len (.read in-stream ba)] | |
| (.write out-stream ba) | |
| (swap! out-bytes-counter + len)) | |
| (let [len (unchecked-byte next-byte) | |
| ba (byte-array len) | |
| data (.read in-stream) | |
| _ (Arrays/fill ba (unchecked-byte data))] | |
| (.write out-stream ba) | |
| (swap! out-bytes-counter + len))) | |
| (recur (.read in-stream)))))))) | |
| (comment | |
| (decompress-file "abc.rle" "abc.txt") | |
| (decompress-file "a.dat2" "a2.jar")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns org.rssys.rle.naive-test | |
| (:require [clojure.test :refer [deftest testing is]] | |
| [matcho.core :refer [match]] | |
| [org.rssys.rle.lazy :as sut] | |
| [clojure.java.io :as io])) | |
| (deftest compress-file-test | |
| (testing "1 byte" | |
| (let [s "a" | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| (testing "5 bytes" | |
| (let [s "abcde" | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| (testing "2 equal bytes" | |
| (let [s "aa" | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| (testing "15 bytes" | |
| (let [s "1234567890abcdf" | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| (testing "15 equal bytes" | |
| (let [s (apply str (repeat 15 "e")) | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| (testing "129 equal bytes" | |
| (let [s (apply str (repeat 129 "e")) | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| (testing "128 not equal bytes" | |
| (let [s (apply str (repeat 64 "ea")) | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| (testing "130 not equal bytes" | |
| (let [s (apply str (repeat 65 "ea")) | |
| infile "a.txt" | |
| outfile "a.rle" | |
| decfile "a2.txt"] | |
| (spit infile s) | |
| (sut/compress-file infile outfile) | |
| (sut/decompress-file outfile decfile) | |
| (is (= s (slurp decfile))) | |
| (io/delete-file infile) | |
| (io/delete-file outfile) | |
| (io/delete-file decfile))) | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment