glv · January 19, 2024 19:41
diff --git a/_TheClojureStateMonad.md b/_TheClojureStateMonad.md
diff --git a/archive_monad.clj b/archive_monad.clj
 ;; A nozip writer that uses the state monad to track state.
 (ns nozip.archive-monad
  (:use clojure.algo.monads))

 ;; Note: this file contains many notes to myself, remnants of false
 ;; starts, and exploratory/diagnosis code that I used while trying
 ;; to figure out what was going on.  I'll clean that up shortly.

 ;; Structure of a NoZip archive (big-endian):
 ;;
 ;; HEADER:
 ;;     "NOZA"          magic number
 ;;     4-byte int      number of files in archive
 ;;
 ;; FILE ENTRIES (one per file)
 ;;     4-byte int      length of file name
 ;;     string          file name
 ;;     4-byte int      length of file data
 ;;     data            file data, uncompressed
 ;;
 ;; TRAILER
 ;;     "NOZADIR"       directory intro
 ;;     4-byte int      number of directory entries
 ;;
 ;;   DIRECTORY ENTRIES (one per file)
 ;;     4-byte int      length of file name
 ;;     string          file naem
 ;;     4-byte int      byte offset of file entry from start of archive
 ;;
 ;;   TAIL
 ;;     "NOZATAIL"      tail intro
 ;;     4-byte int      byte offset of start of trailer from start of archive

 (comment
  ;; This is a discussion of the problems with m-seq and an
  ;; exploration of how to produce one that works properly.
  
  ;; Here's the algo.monads definition of `m-seq`:
  
  (defmonadfn m-seq
  "'Executes' the monadic values in ms and returns a sequence of the
   basic values contained in them."
  [ms]
  (reduce (fn [q p]
            (m-bind p (fn [x]
                        (m-bind q (fn [y]
                                    (m-result (cons x y)))) )))
          (m-result '())
          (reverse ms)))

  ;; This definition is clearly based on Haskell's `sequence`
  ;; function, which is defined so (slightly reformatted):
  ;;
  ;; ```haskell
  ;; sequence = foldr mcons (return [])
  ;;   where
  ;;     mcons p q = p >>= \x ->
  ;;                          q >>= \y -> 
  ;;                                   return (x : y)
  ;; ```
  ;;
  ;; But in Clojure, that definition has some problems:
  ;; 
  ;; 1. Clojure doesn't have foldr, so instead we do a reduce (with
  ;;    swapped arguments) over the reversed list.  Probably better
  ;;    and more natural to do a reduce seeded with an array and use
  ;;    conj instead of cons.
  ;; 2. Haskell's `foldr` is lazy, but Clojure's `reduce` is not.  So
  ;;    if this is used in a deferred-execution monad like `state-m`,
  ;;    then the reduce happens when `domonad` is being processed,
  ;;    rather than later.  As far as I can tell this *shouldn't* have
  ;;    any real practical effects, since the result of the reduction
  ;;    is a giant recursive function that then has to be evaluated
  ;;    later. But that leads to the next point:
  ;; 3. Because Haskell is lazy and has proper tail-call elimination,
  ;;    returning a giant recursive function from this doesn't have
  ;;    negative consequences in terms of either stack depth or (I
  ;;    think) code size.  But in Clojure, with `reduce` being strict
  ;;    and no TCE, it's bad in both ways.
  ;;
  ;; In Clojure, this should return a monadic value that (in
  ;; `state-m`) performs the reduce when it is called, in a more
  ;; straightforward way.  (For that matter, some of the other
  ;; functions---definitely `m-map`, and probably `m-chain`, and
  ;; `m-reduce`, should be rewritten similarly.  (And also, looking at
  ;; it more closely, `m-until`, `m-when`, and `m-when-not` might have
  ;; a different kind of problem, where their conditions are evaluated
  ;; at domonad time rather than being deferred.)
  ;;    
  (defmonadfn m-seq
    [ms]
    (reduce (fn [q p]
              (m-bind p (fn [x]
                          (m-bind q (fn [y]
                                      (m-result (cons x y)))))))
            (m-result '())
            (reverse ms)))

  ;; for reference:

  (defn m-result [v]                    ; in state-m
    (fn [s] [v s]))

  (defn m-bind [mv f]                   ; in state-m
    (fn [s]
      (let [[v ss] (mv s)]
        ((f v) ss))))

  (defmonadfn m-fmap
    [f mv]
    (m-bind mv (fn [x] (m-result (f x)))))
  
  ;; m-result: (fn m-result-state [v]
  ;;             (fn [s] [v s])
  (defmonadfn m-seq
    [ms]
    (m-bind (m-result ms)
            (fn [_] (m-bind )
              (reduce (fn [q p]
                        ;; Here I need to
                        ;;
                        ;; 1. bind q to unwrap it from the result
                        ;; 
                        ;; 2. force evaluation of p somehow in a way that
                        ;;    doesn't assume it's a state-m value
                        ;;
                        ;; 3. conj the two and wrap in m-result
                        ;;
                        ;; I *think* the first step is basically
                        ;; m-bind, except that in the state monad it
                        ;; returns a function.  How do I force that to
                        ;; be evaluated?  It almost seems as though,
                        ;; in a language without TCE, we need a third
                        ;; required monadic primitive: something to
                        ;; force evaluation, so that an iterative
                        ;; implementation of some things can work
                        ;; properly.
                        ;; 
                        )
                      (m-result [])
                      ms))))
  )

 (comment
  ;; This explains how the current m-map and m-seq functions evaluate
  ;; in the context of state-m.
  
  ;; assuming
  (defmonadfn twice [n]
    (m-result (+ n n)))

  ;; then, in the state monad, (twice 2) returns
  (fn [s] [4 s])
  
  ;; so (m-map twice [1 2]) evaluates to this:
  (m-seq '((fn [s] [2 s]) (fn [s] [4 s])))

  ;; which in turn evaluates to the following.  (Not exactly; I'm
  ;; using `let` to show the effects of closure scope.  But this is
  ;; the basic idea.
  (fn [s]                                ; result of second iteration
    (let [p (fn [s] [2 s])
          [v ss] (p s)]
      (((fn [x]
          (let [q (fn [s]                ; q on reduce's second iteration
                                        ; (i.e., result of first iteration)
                    (let [p (fn [s] [4 s])
                          [v ss] (p s)]
                      (((fn [x]
                          (let [q (fn [s] ['() s])] ; <-- q on reduce's first iteration
                            (m-bind q (fn [y]
                                        (m-result (cons x y))))))
                        v)
                       ss)))]
            (m-bind q (fn [y]
                        (m-result (cons x y))))))
        v)
       ss)))

  ;; It would be better if it evaluated to this:
  (fn [s]
    (m-result 
     (reduce (fn [[s v] mv]
               (let [[r ss] (mv s)]
                 [ss (conj v r)]))
             [s []]
             '((fn [s] [2 s]) (fn [s] [4 s])))))
  
  )

 (defmonadfn state-m-seq
  [ms]
  (fn [s]
    (reduce (fn [[v s] mv]
              (let [[r ss] (mv s)]
                [(conj v r) ss]))
            [[] s]
            ms)))

 (defmonadfn state-m-map
  [f xs]
  (state-m-seq (map f xs)))

 (defmacro >>>
  "Wraps forms in a state-monad function that returns the result of
   the last form and does not modify the state."
  [& forms]
  `(fn [s#] (let [result# (do ~@forms)] [result# s#])))

 (defn stack-depth [label]
  (let [trace (-> (Throwable.)
                  (.fillInStackTrace)
                  (.getStackTrace))
        depth (count trace)]
    (println (str label ": " depth))
    depth))

 (defmonadfn flush-buffer
  "Flushes the buffer to the channel and clears the buffer.
   Returns the number of bytes written."
  [buffer channel]
  (m-result
   (let [result (.write channel (.flip buffer))]
     (.clear buffer)
     result)))

 (defmonadfn write-archive-header [number-of-files]
  (domonad
   [buffer (fetch-val :buffer)
    _ (>>> (doto buffer
             (.put (.getBytes "NOZA"))
             (.putInt number-of-files)))
    channel (fetch-val :channel)]
   (flush-buffer buffer channel)))

 ;; This doesn't work, because the work of the monad doesn't happen
 ;; here.  This function:
 ;; 1. opens the file and channel
 ;; 2. builds a monadic value (the function returned from the domonad
 ;;    call) but DOES NOT EVALUATE IT!!
 ;; 3. closes the channel and file
 ;; 4. returns the monadic value
 ;;
 ;; Finally, that monadic value is evaluated as part of the builder
 ;; evaluation in write-archive.  By the time we call .transferTo,
 ;; from-file and from-channel have already been closed.
 (defmonadfn write-file-contents-wrong [file]
  (with-open [from-file (java.io.FileInputStream. file)
              from-channel (.getChannel from-file)]
    (domonad
     [to-channel (fetch-val :channel)]
     (.transferTo from-channel 0 (.length file) to-channel))))

 (defmonadfn write-file-contents
  "Copies the contents of file (uncompressed) directly to the output channel.
   Returns the number of bytes written."
  [file]
  (domonad
   [to-channel (fetch-val :channel)]
   (with-open [from-file (java.io.FileInputStream. file)
               from-channel (.getChannel from-file)]
     (.transferTo from-channel 0 (.length file) to-channel))))

 (defmonadfn write-file-entry [file]
  (domonad
   [buffer (fetch-val :buffer)
    channel (fetch-val :channel)
    :let [file-name (.getName file)
          byte-offset (.position channel)
          _ (doto buffer
              (.putInt (.length file-name))
              (.put (.getBytes file-name))
              (.putInt (.length file)))]
    _ (flush-buffer buffer channel)
    _ (write-file-contents file)
    _ (update-val :entries #(conj % [file-name byte-offset]))]
   true))

 (defmonadfn write-trailer-entry [channel buffer [file-name byte-offset]]
  (m-result
   (do
     (when (< (.remaining buffer) (+ 8 (count file-name)))
       (flush-buffer buffer channel))
     (doto buffer
       (.putInt (.length file-name))
       (.put (.getBytes file-name))
       (.putInt byte-offset)))))

 (defmonadfn write-archive-trailer []
  (domonad
   [buffer (fetch-val :buffer)
    channel (fetch-val :channel)
    entries (fetch-val :entries)
    :let [trailer-offset (.position channel)
          _ (doto buffer
              (.put (.getBytes "NOZADIR"))
              (.putInt (count entries)))]
    _ (state-m-map (partial write-trailer-entry channel buffer) entries)
    :let [_ (when (< (.remaining buffer) 12)
              (flush-buffer buffer channel))
          _ (doto buffer
              (.put (.getBytes "NOZATAIL"))
              (.putInt trailer-offset))]
    _ (flush-buffer buffer channel)]
   true))

 (defn write-archive
  "Writes a NoZip archive containing files on the (already open)
   writable channel."
  [output-channel files]
  (with-monad state-m
    (let [buffer (.clear (java.nio.ByteBuffer/allocate 2048))
          builder (domonad
                   [_ (write-archive-header (count files))
                    _ (state-m-map write-file-entry files)
                    _ (write-archive-trailer)]
                   nil)
          ]
      (builder {:channel output-channel
                :buffer buffer
                :entries []})
      nil)))
diff --git a/archive_simple.clj b/archive_simple.clj
 ;; A nozip writer that uses functional decomposition to track state (on the stack).
 (ns nozip.archive-simple)

 (defn flush-buffer [buffer channel]
  (let [result (.write channel (.flip buffer))]
    (.clear buffer)
    result))

 (defn write-archive-header [channel buffer number-of-files]
  (doto buffer
    (.put (.getBytes "NOZA"))
    (.putInt number-of-files))
  (flush-buffer buffer channel))

 (defn write-file-contents [to-channel file]
  (with-open [from-file (java.io.FileInputStream. file)
              from-channel (.getChannel from-file)]
    (.transferTo from-channel 0 (.length file) to-channel)))

 (defn write-file-entry [channel buffer file]
  (let [file-name (.getName file)
        byte-offset (.position channel)]
    (doto buffer
      (.putInt (.length file-name))
      (.put (.getBytes file-name))
      (.putInt (.length file)))
    (flush-buffer buffer channel)
    (write-file-contents channel file)
    (vector file-name byte-offset)))

 (defn write-trailer-entry [channel buffer [file-name byte-offset]]
  (when (< (.remaining buffer) (+ 8 (count file-name)))
    (flush-buffer buffer channel))
  (doto buffer
    (.putInt (.length file-name))
    (.put (.getBytes file-name))
    (.putInt byte-offset)))

 (defn write-archive-trailer [channel buffer file-info-entries]
  (let [trailer-offset (.position channel)]
    (doto buffer
      (.put (.getBytes "NOZADIR"))
      (.putInt (count file-info-entries)))
    (doall (map (partial write-trailer-entry channel buffer) file-info-entries))
    (when (< (.remaining buffer) 12)
      (flush-buffer buffer channel))
    (doto buffer
      (.put (.getBytes "NOZATAIL"))
      (.putInt trailer-offset))
    (flush-buffer buffer channel)))

 (defn write-archive [output-channel files]
  (let [buffer (.clear (java.nio.ByteBuffer/allocate 2048))]
    (write-archive-header output-channel buffer (count files))
    (let [file-info-entries (doall (map (partial write-file-entry output-channel buffer) files))]
      (write-archive-trailer output-channel buffer file-info-entries))
    nil))
diff --git a/archive_var.clj b/archive_var.clj
 ;; A nozip writer that uses a var to track state.
 (ns nozip.archive-var)

 (def ^:dynamic archive-state {})

 (defn flush-buffer [buffer channel]
  (let [result (.write channel (.flip buffer))]
    (.clear buffer)
    result))

 (defn write-archive-header [number-of-files]
  (let [buffer (:buffer archive-state)
        channel (:channel archive-state)]
    (doto buffer
      (.put (.getBytes "NOZA"))
      (.putInt number-of-files))
    (flush-buffer buffer channel)))

 (defn write-file-contents [file]
  (let
   [to-channel (:channel archive-state)]
   (with-open [from-file (java.io.FileInputStream. file)
               from-channel (.getChannel from-file)]
     (.transferTo from-channel 0 (.length file) to-channel))))

 (defn write-file-entry [file]
  (let [buffer (:buffer archive-state)
        channel (:channel archive-state)
        file-name (.getName file)
        byte-offset (.position channel)]
    (doto buffer
      (.putInt (.length file-name))
      (.put (.getBytes file-name))
      (.putInt (.length file)))
    (flush-buffer buffer channel)
    (write-file-contents file)
    (set! archive-state
          (update-in archive-state [:entries] conj [file-name byte-offset]))))

 (defn write-trailer-entry [channel buffer [file-name byte-offset]]
  (when (< (.remaining buffer) (+ 8 (count file-name)))
    (flush-buffer buffer channel))
  (doto buffer
    (.putInt (.length file-name))
    (.put (.getBytes file-name))
    (.putInt byte-offset)))

 (defn write-archive-trailer []
  (let [buffer (:buffer archive-state)
        channel (:channel archive-state)
        entries (:entries archive-state)
        trailer-offset (.position channel)]
    (doto buffer
      (.put (.getBytes "NOZADIR"))
      (.putInt (count entries)))
    (doall (map (partial write-trailer-entry channel buffer) entries))
    (when (< (.remaining buffer) 12)
      (flush-buffer buffer channel))
    (doto buffer
      (.put (.getBytes "NOZATAIL"))
      (.putInt trailer-offset))
    (flush-buffer buffer channel)))

 (defn write-archive [output-channel files]
  (let [buffer (.clear (java.nio.ByteBuffer/allocate 2048))]
    (binding [archive-state {:channel output-channel
                             :buffer buffer
                             :entries []}]
      (write-archive-header (count files))
      (doall (map write-file-entry files))
      (write-archive-trailer)))
  nil)
diff --git a/core.clj b/core.clj
 ;; A driver program that allows testing any of the three nozip writers.
 (ns nozip.core
  (:require nozip.archive-monad
            nozip.archive-simple
            nozip.archive-var))

 (defn choose-version [version-name]
  (case version-name
    "monad" nozip.archive-monad/write-archive
    "simple" nozip.archive-simple/write-archive
    "var" nozip.archive-var/write-archive
    ))

 (defn basic-run [write-archive-fn files]
  (with-open [archive (java.io.FileOutputStream. "archive.nozip")
              channel (.getChannel archive)]
    (write-archive-fn channel files)))

 (defn benchmark-run [write-archive-fn files]
  (with-open [archive (java.io.FileOutputStream. "/dev/null")
              channel (.getChannel archive)]
    (time
     (write-archive-fn channel (take 10000 (cycle files))))))

 (defn run-with [run-fn args]
  ;; work around dangerous default behaviour in Clojure
  (alter-var-root #'*read-eval* (constantly false))
  (let [write-archive-fn (choose-version (first args))
        dir (java.io.File. (second args))
        file-filter (reify java.io.FileFilter
                      (accept [this path]
                        (and (not (.isDirectory path))
                             (not (.isHidden path))
                             (.canRead path))))
        files (.listFiles dir file-filter)]
    (run-fn write-archive-fn files)))

 (defn -main [& args]
  (run-with basic-run args))

 (defn benchmark [& args]
  (run-with benchmark-run args))
	;; A nozip writer that uses the state monad to track state.
	(ns nozip.archive-monad
	(:use clojure.algo.monads))

	;; Note: this file contains many notes to myself, remnants of false
	;; starts, and exploratory/diagnosis code that I used while trying
	;; to figure out what was going on. I'll clean that up shortly.

	;; Structure of a NoZip archive (big-endian):
	;;
	;; HEADER:
	;; "NOZA" magic number
	;; 4-byte int number of files in archive
	;;
	;; FILE ENTRIES (one per file)
	;; 4-byte int length of file name
	;; string file name
	;; 4-byte int length of file data
	;; data file data, uncompressed
	;;
	;; TRAILER
	;; "NOZADIR" directory intro
	;; 4-byte int number of directory entries
	;;
	;; DIRECTORY ENTRIES (one per file)
	;; 4-byte int length of file name
	;; string file naem
	;; 4-byte int byte offset of file entry from start of archive
	;;
	;; TAIL
	;; "NOZATAIL" tail intro
	;; 4-byte int byte offset of start of trailer from start of archive

	(comment
	;; This is a discussion of the problems with m-seq and an
	;; exploration of how to produce one that works properly.

	;; Here's the algo.monads definition of `m-seq`:

	(defmonadfn m-seq
	"'Executes' the monadic values in ms and returns a sequence of the
	basic values contained in them."
	[ms]
	(reduce (fn [q p]
	(m-bind p (fn [x]
	(m-bind q (fn [y]
	(m-result (cons x y)))) )))
	(m-result '())
	(reverse ms)))

	;; This definition is clearly based on Haskell's `sequence`
	;; function, which is defined so (slightly reformatted):
	;;
	;; ```haskell
	;; sequence = foldr mcons (return [])
	;; where
	;; mcons p q = p >>= \x ->
	;; q >>= \y ->
	;; return (x : y)
	;; ```
	;;
	;; But in Clojure, that definition has some problems:
	;;
	;; 1. Clojure doesn't have foldr, so instead we do a reduce (with
	;; swapped arguments) over the reversed list. Probably better
	;; and more natural to do a reduce seeded with an array and use
	;; conj instead of cons.
	;; 2. Haskell's `foldr` is lazy, but Clojure's `reduce` is not. So
	;; if this is used in a deferred-execution monad like `state-m`,
	;; then the reduce happens when `domonad` is being processed,
	;; rather than later. As far as I can tell this shouldn't have
	;; any real practical effects, since the result of the reduction
	;; is a giant recursive function that then has to be evaluated
	;; later. But that leads to the next point:
	;; 3. Because Haskell is lazy and has proper tail-call elimination,
	;; returning a giant recursive function from this doesn't have
	;; negative consequences in terms of either stack depth or (I
	;; think) code size. But in Clojure, with `reduce` being strict
	;; and no TCE, it's bad in both ways.
	;;
	;; In Clojure, this should return a monadic value that (in
	;; `state-m`) performs the reduce when it is called, in a more
	;; straightforward way. (For that matter, some of the other
	;; functions---definitely `m-map`, and probably `m-chain`, and
	;; `m-reduce`, should be rewritten similarly. (And also, looking at
	;; it more closely, `m-until`, `m-when`, and `m-when-not` might have
	;; a different kind of problem, where their conditions are evaluated
	;; at domonad time rather than being deferred.)
	;;
	(defmonadfn m-seq
	[ms]
	(reduce (fn [q p]
	(m-bind p (fn [x]
	(m-bind q (fn [y]
	(m-result (cons x y)))))))
	(m-result '())
	(reverse ms)))

	;; for reference:

	(defn m-result [v] ; in state-m
	(fn [s] [v s]))

	(defn m-bind [mv f] ; in state-m
	(fn [s]
	(let [[v ss] (mv s)]
	((f v) ss))))

	(defmonadfn m-fmap
	[f mv]
	(m-bind mv (fn [x] (m-result (f x)))))

	;; m-result: (fn m-result-state [v]
	;; (fn [s] [v s])
	(defmonadfn m-seq
	[ms]
	(m-bind (m-result ms)
	(fn [_] (m-bind )
	(reduce (fn [q p]
	;; Here I need to
	;;
	;; 1. bind q to unwrap it from the result
	;;
	;; 2. force evaluation of p somehow in a way that
	;; doesn't assume it's a state-m value
	;;
	;; 3. conj the two and wrap in m-result
	;;
	;; I think the first step is basically
	;; m-bind, except that in the state monad it
	;; returns a function. How do I force that to
	;; be evaluated? It almost seems as though,
	;; in a language without TCE, we need a third
	;; required monadic primitive: something to
	;; force evaluation, so that an iterative
	;; implementation of some things can work
	;; properly.
	;;
	)
	(m-result [])
	ms))))
	)

	(comment
	;; This explains how the current m-map and m-seq functions evaluate
	;; in the context of state-m.

	;; assuming
	(defmonadfn twice [n]
	(m-result (+ n n)))

	;; then, in the state monad, (twice 2) returns
	(fn [s] [4 s])

	;; so (m-map twice [1 2]) evaluates to this:
	(m-seq '((fn [s] [2 s]) (fn [s] [4 s])))

	;; which in turn evaluates to the following. (Not exactly; I'm
	;; using `let` to show the effects of closure scope. But this is
	;; the basic idea.
	(fn [s] ; result of second iteration
	(let [p (fn [s] [2 s])
	[v ss] (p s)]
	(((fn [x]
	(let [q (fn [s] ; q on reduce's second iteration
	; (i.e., result of first iteration)
	(let [p (fn [s] [4 s])
	[v ss] (p s)]
	(((fn [x]
	(let [q (fn [s] ['() s])] ; <-- q on reduce's first iteration
	(m-bind q (fn [y]
	(m-result (cons x y))))))
	v)
	ss)))]
	(m-bind q (fn [y]
	(m-result (cons x y))))))
	v)
	ss)))

	;; It would be better if it evaluated to this:
	(fn [s]
	(m-result
	(reduce (fn [[s v] mv]
	(let [[r ss] (mv s)]
	[ss (conj v r)]))
	[s []]
	'((fn [s] [2 s]) (fn [s] [4 s])))))

	)

	(defmonadfn state-m-seq
	[ms]
	(fn [s]
	(reduce (fn [[v s] mv]
	(let [[r ss] (mv s)]
	[(conj v r) ss]))
	[[] s]
	ms)))

	(defmonadfn state-m-map
	[f xs]
	(state-m-seq (map f xs)))

	(defmacro >>>
	"Wraps forms in a state-monad function that returns the result of
	the last form and does not modify the state."
	[& forms]
	`(fn [s#] (let [result# (do ~@forms)] [result# s#])))

	(defn stack-depth [label]
	(let [trace (-> (Throwable.)
	(.fillInStackTrace)
	(.getStackTrace))
	depth (count trace)]
	(println (str label ": " depth))
	depth))

	(defmonadfn flush-buffer
	"Flushes the buffer to the channel and clears the buffer.
	Returns the number of bytes written."
	[buffer channel]
	(m-result
	(let [result (.write channel (.flip buffer))]
	(.clear buffer)
	result)))

	(defmonadfn write-archive-header [number-of-files]
	(domonad
	[buffer (fetch-val :buffer)
	_ (>>> (doto buffer
	(.put (.getBytes "NOZA"))
	(.putInt number-of-files)))
	channel (fetch-val :channel)]
	(flush-buffer buffer channel)))

	;; This doesn't work, because the work of the monad doesn't happen
	;; here. This function:
	;; 1. opens the file and channel
	;; 2. builds a monadic value (the function returned from the domonad
	;; call) but DOES NOT EVALUATE IT!!
	;; 3. closes the channel and file
	;; 4. returns the monadic value
	;;
	;; Finally, that monadic value is evaluated as part of the builder
	;; evaluation in write-archive. By the time we call .transferTo,
	;; from-file and from-channel have already been closed.
	(defmonadfn write-file-contents-wrong [file]
	(with-open [from-file (java.io.FileInputStream. file)
	from-channel (.getChannel from-file)]
	(domonad
	[to-channel (fetch-val :channel)]
	(.transferTo from-channel 0 (.length file) to-channel))))

	(defmonadfn write-file-contents
	"Copies the contents of file (uncompressed) directly to the output channel.
	Returns the number of bytes written."
	[file]
	(domonad
	[to-channel (fetch-val :channel)]
	(with-open [from-file (java.io.FileInputStream. file)
	from-channel (.getChannel from-file)]
	(.transferTo from-channel 0 (.length file) to-channel))))

	(defmonadfn write-file-entry [file]
	(domonad
	[buffer (fetch-val :buffer)
	channel (fetch-val :channel)
	:let [file-name (.getName file)
	byte-offset (.position channel)
	_ (doto buffer
	(.putInt (.length file-name))
	(.put (.getBytes file-name))
	(.putInt (.length file)))]
	_ (flush-buffer buffer channel)
	_ (write-file-contents file)
	_ (update-val :entries #(conj % [file-name byte-offset]))]
	true))

	(defmonadfn write-trailer-entry [channel buffer [file-name byte-offset]]
	(m-result
	(do
	(when (< (.remaining buffer) (+ 8 (count file-name)))
	(flush-buffer buffer channel))
	(doto buffer
	(.putInt (.length file-name))
	(.put (.getBytes file-name))
	(.putInt byte-offset)))))

	(defmonadfn write-archive-trailer []
	(domonad
	[buffer (fetch-val :buffer)
	channel (fetch-val :channel)
	entries (fetch-val :entries)
	:let [trailer-offset (.position channel)
	_ (doto buffer
	(.put (.getBytes "NOZADIR"))
	(.putInt (count entries)))]
	_ (state-m-map (partial write-trailer-entry channel buffer) entries)
	:let [_ (when (< (.remaining buffer) 12)
	(flush-buffer buffer channel))
	_ (doto buffer
	(.put (.getBytes "NOZATAIL"))
	(.putInt trailer-offset))]
	_ (flush-buffer buffer channel)]
	true))

	(defn write-archive
	"Writes a NoZip archive containing files on the (already open)
	writable channel."
	[output-channel files]
	(with-monad state-m
	(let [buffer (.clear (java.nio.ByteBuffer/allocate 2048))
	builder (domonad
	[_ (write-archive-header (count files))
	_ (state-m-map write-file-entry files)
	_ (write-archive-trailer)]
	nil)
	]
	(builder {:channel output-channel
	:buffer buffer
	:entries []})
	nil)))
	;; A nozip writer that uses functional decomposition to track state (on the stack).
	(ns nozip.archive-simple)

	(defn flush-buffer [buffer channel]
	(let [result (.write channel (.flip buffer))]
	(.clear buffer)
	result))

	(defn write-archive-header [channel buffer number-of-files]
	(doto buffer
	(.put (.getBytes "NOZA"))
	(.putInt number-of-files))
	(flush-buffer buffer channel))

	(defn write-file-contents [to-channel file]
	(with-open [from-file (java.io.FileInputStream. file)
	from-channel (.getChannel from-file)]
	(.transferTo from-channel 0 (.length file) to-channel)))

	(defn write-file-entry [channel buffer file]
	(let [file-name (.getName file)
	byte-offset (.position channel)]
	(doto buffer
	(.putInt (.length file-name))
	(.put (.getBytes file-name))
	(.putInt (.length file)))
	(flush-buffer buffer channel)
	(write-file-contents channel file)
	(vector file-name byte-offset)))

	(defn write-trailer-entry [channel buffer [file-name byte-offset]]
	(when (< (.remaining buffer) (+ 8 (count file-name)))
	(flush-buffer buffer channel))
	(doto buffer
	(.putInt (.length file-name))
	(.put (.getBytes file-name))
	(.putInt byte-offset)))

	(defn write-archive-trailer [channel buffer file-info-entries]
	(let [trailer-offset (.position channel)]
	(doto buffer
	(.put (.getBytes "NOZADIR"))
	(.putInt (count file-info-entries)))
	(doall (map (partial write-trailer-entry channel buffer) file-info-entries))
	(when (< (.remaining buffer) 12)
	(flush-buffer buffer channel))
	(doto buffer
	(.put (.getBytes "NOZATAIL"))
	(.putInt trailer-offset))
	(flush-buffer buffer channel)))

	(defn write-archive [output-channel files]
	(let [buffer (.clear (java.nio.ByteBuffer/allocate 2048))]
	(write-archive-header output-channel buffer (count files))
	(let [file-info-entries (doall (map (partial write-file-entry output-channel buffer) files))]
	(write-archive-trailer output-channel buffer file-info-entries))
	nil))
	;; A nozip writer that uses a var to track state.
	(ns nozip.archive-var)

	(def ^:dynamic archive-state {})

	(defn flush-buffer [buffer channel]
	(let [result (.write channel (.flip buffer))]
	(.clear buffer)
	result))

	(defn write-archive-header [number-of-files]
	(let [buffer (:buffer archive-state)
	channel (:channel archive-state)]
	(doto buffer
	(.put (.getBytes "NOZA"))
	(.putInt number-of-files))
	(flush-buffer buffer channel)))

	(defn write-file-contents [file]
	(let
	[to-channel (:channel archive-state)]
	(with-open [from-file (java.io.FileInputStream. file)
	from-channel (.getChannel from-file)]
	(.transferTo from-channel 0 (.length file) to-channel))))

	(defn write-file-entry [file]
	(let [buffer (:buffer archive-state)
	channel (:channel archive-state)
	file-name (.getName file)
	byte-offset (.position channel)]
	(doto buffer
	(.putInt (.length file-name))
	(.put (.getBytes file-name))
	(.putInt (.length file)))
	(flush-buffer buffer channel)
	(write-file-contents file)
	(set! archive-state
	(update-in archive-state [:entries] conj [file-name byte-offset]))))

	(defn write-trailer-entry [channel buffer [file-name byte-offset]]
	(when (< (.remaining buffer) (+ 8 (count file-name)))
	(flush-buffer buffer channel))
	(doto buffer
	(.putInt (.length file-name))
	(.put (.getBytes file-name))
	(.putInt byte-offset)))

	(defn write-archive-trailer []
	(let [buffer (:buffer archive-state)
	channel (:channel archive-state)
	entries (:entries archive-state)
	trailer-offset (.position channel)]
	(doto buffer
	(.put (.getBytes "NOZADIR"))
	(.putInt (count entries)))
	(doall (map (partial write-trailer-entry channel buffer) entries))
	(when (< (.remaining buffer) 12)
	(flush-buffer buffer channel))
	(doto buffer
	(.put (.getBytes "NOZATAIL"))
	(.putInt trailer-offset))
	(flush-buffer buffer channel)))

	(defn write-archive [output-channel files]
	(let [buffer (.clear (java.nio.ByteBuffer/allocate 2048))]
	(binding [archive-state {:channel output-channel
	:buffer buffer
	:entries []}]
	(write-archive-header (count files))
	(doall (map write-file-entry files))
	(write-archive-trailer)))
	nil)
	;; A driver program that allows testing any of the three nozip writers.
	(ns nozip.core
	(:require nozip.archive-monad
	nozip.archive-simple
	nozip.archive-var))

	(defn choose-version [version-name]
	(case version-name
	"monad" nozip.archive-monad/write-archive
	"simple" nozip.archive-simple/write-archive
	"var" nozip.archive-var/write-archive
	))

	(defn basic-run [write-archive-fn files]
	(with-open [archive (java.io.FileOutputStream. "archive.nozip")
	channel (.getChannel archive)]
	(write-archive-fn channel files)))

	(defn benchmark-run [write-archive-fn files]
	(with-open [archive (java.io.FileOutputStream. "/dev/null")
	channel (.getChannel archive)]
	(time
	(write-archive-fn channel (take 10000 (cycle files))))))

	(defn run-with [run-fn args]
	;; work around dangerous default behaviour in Clojure
	(alter-var-root #'read-eval (constantly false))
	(let [write-archive-fn (choose-version (first args))
	dir (java.io.File. (second args))
	file-filter (reify java.io.FileFilter
	(accept [this path]
	(and (not (.isDirectory path))
	(not (.isHidden path))
	(.canRead path))))
	files (.listFiles dir file-filter)]
	(run-fn write-archive-fn files)))

	(defn -main [& args]
	(run-with basic-run args))

	(defn benchmark [& args]
	(run-with benchmark-run args))