Skip to content

Instantly share code, notes, and snippets.

@mikeananev
Last active April 8, 2024 19:38
Show Gist options
  • Save mikeananev/39f95b17f443de026951f97cf5c8f243 to your computer and use it in GitHub Desktop.
Save mikeananev/39f95b17f443de026951f97cf5c8f243 to your computer and use it in GitHub Desktop.
Adaptive LZW compression on Clojure
(ns coder ;; org.rssys.lzw
(:require [clojure.java.io :as io])
(:import (java.io InputStream OutputStream)
(com.github.jinahya.bit.io StreamByteInput StreamByteOutput DefaultBitOutput DefaultBitInput)))
(def MAX-BITS-LENGTH 18)
(def EOF 256)
(defn lzw-encode-stream
[^InputStream in-stream ^OutputStream out-stream]
(when (>= (.available in-stream) 0)
(let [*table (atom (into (sorted-map) (apply merge (map (fn [x] {[x] x}) (range 0 (inc EOF)))))) ;;init table code
*buf (atom [])
*BITS-LENGTH (atom 9)
out-bit-stream (DefaultBitOutput. (StreamByteOutput. out-stream))
first-sym (.read in-stream)]
(swap! *buf conj first-sym) ;; read first byte and put it to buffer
(while (not (zero? (.available in-stream)))
(let [next-byte (.read in-stream)]
(when-not (= -1 next-byte)
(if (get @*table (conj @*buf next-byte))
(swap! *buf conj next-byte) ;; add next byte to buffer and go to read next byte from stream
(do
(if (> (dec (bit-shift-left 1 @*BITS-LENGTH)) (count @*table))
(swap! *table merge {(conj @*buf next-byte) (count @*table)})
(when (>= MAX-BITS-LENGTH @*BITS-LENGTH)
(.writeInt out-bit-stream true @*BITS-LENGTH (dec (bit-shift-left 1 @*BITS-LENGTH)))
(swap! *BITS-LENGTH inc)
(swap! *table merge {(conj @*buf next-byte) (count @*table)})))
(.writeInt out-bit-stream true @*BITS-LENGTH (get @*table @*buf))
(reset! *buf [next-byte]))))))
;; write last code to output stream
(.writeInt out-bit-stream true @*BITS-LENGTH (get @*table @*buf))
(.writeInt out-bit-stream true @*BITS-LENGTH EOF))))
(defn lzw-decode-stream
[^InputStream in-stream ^OutputStream out-stream]
(when (>= (.available in-stream) 0)
(let [*table (atom (into [] (map vector (range 0 (inc EOF))))) ;;init table code
*buf (atom [])
*BITS-LENGTH (atom 9)
in-bit-stream (DefaultBitInput. (StreamByteInput. in-stream))
*old-code (atom (.readInt in-bit-stream true @*BITS-LENGTH))
*sym (atom @*old-code)]
(.write out-stream ^byte @*old-code)
(while (not (zero? (.available in-stream)))
(try
(let [next-code (.readInt in-bit-stream true @*BITS-LENGTH)
next-code (if (= next-code (dec (bit-shift-left 1 @*BITS-LENGTH)))
(do
(when (>= MAX-BITS-LENGTH @*BITS-LENGTH)
(swap! *BITS-LENGTH inc))
(.readInt in-bit-stream true @*BITS-LENGTH))
next-code)]
(when-not (= EOF next-code)
(reset! *buf (if-let [v (get @*table next-code)] v (conj (get @*table @*old-code) @*sym)))
(reset! *sym (first @*buf))
(swap! *table conj (conj (get @*table @*old-code) @*sym))
(reset! *old-code next-code)
(doseq [b @*buf] (.write out-stream b))))
(catch Exception _ ;;end of bit stream reached
))))))
(defn compress-file
[{:keys [^String infile ^String outfile]}]
(println "Compressing file:" (str infile))
(with-open [in-stream (io/input-stream (str infile))
out-stream (io/output-stream (str outfile))]
(lzw-encode-stream in-stream out-stream)))
(defn decompress-file
[{:keys [^String infile ^String outfile]}]
(println "Decompressing file:" (str infile))
(with-open [in-stream (io/input-stream (str infile))
out-stream (io/output-stream (str outfile))]
(lzw-decode-stream in-stream out-stream)))
(comment
(compress-file {:infile "LICENSE" :outfile "a.lzw"})
(decompress-file {:infile "a.lzw" :outfile "LICENSE.txt"})
)
{
:mvn/repos {"clojars" {:url "https://repo.clojars.org/"}}
:paths ["." "src" "resources" "target/classes"]
:deps {org.clojure/clojure {:mvn/version "1.10.1"}
com.github.jinahya/bit-io {:mvn/version "2.0.4"}}
:aliases {
;; to run repl: clojure -A:repl
:repl {:extra-deps {
nrepl/nrepl {:mvn/version "0.6.0"}
hashp/hashp {:mvn/version "0.1.1"}}
:jvm-opts ["-Duser.timezone=UTC"]
:extra-paths ["dev/src" "resources" "test"]
:main-opts ["--main" "nrepl.cmdline"]}
}
}
@mikeananev
Copy link
Author

mikeananev commented Nov 5, 2022

Install clojure brew install clojure

To compress 'a.txt' to 'a.lzw' run from bash:

clojure -Sdeps '{:deps {org.rssys.lzw/lzw {:git/url "https://gist.github.com/mikeananev/39f95b17f443de026951f97cf5c8f243" :sha "69aace6ffe51e0d6800c1c45d7c82913b8ffaa92"}}}' -X coder/compress-file :infile a.txt :outfile a.lzw

To decompress 'a.lzw' to 'a-decompressed.txt' run from bash:

clojure -Sdeps '{:deps {org.rssys.lzw/lzw {:git/url "https://gist.github.com/mikeananev/39f95b17f443de026951f97cf5c8f243" :sha "69aace6ffe51e0d6800c1c45d7c82913b8ffaa92"}}}' -X coder/decompress-file :infile a.lzw :outfile a-decompressed.txt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment