Created
February 18, 2014 05:19
-
-
Save danlentz/9065034 to your computer and use it in GitHub Desktop.
Clojure mmap utilx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Copyright (c) Chris Houser, April 2008. All rights reserved. | |
; The use and distribution terms for this software are covered by the | |
; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) | |
; which can be found in the file epl-v10.html at the root of this distribution. | |
; By using this software in any fashion, you are agreeing to be bound by | |
; the terms of this license. | |
; You must not remove this notice, or any other, from this software. | |
; Functions for memory-mapping files, plus some functions that use a | |
; mmaped file for "normal" activies -- slurp, load-file, etc. | |
(ns | |
#^{:author "Chris Houser", | |
:doc "Functions for memory-mapping files, plus some functions that use a | |
mmaped file for \"normal\" activies -- slurp, load-file, etc."} | |
clojure.contrib.mmap | |
(:refer-clojure :exclude (slurp load-file)) | |
(:import (java.nio ByteBuffer CharBuffer) | |
(java.io PushbackReader InputStream InputStreamReader | |
FileInputStream))) | |
;(set! *warn-on-reflection* true) | |
(def READ_ONLY #^{:private true} | |
(java.nio.channels.FileChannel$MapMode/READ_ONLY)) | |
(defn mmap | |
"Memory-map the file named f. Returns a ByteBuffer." | |
[f] | |
(let [channel (.getChannel (FileInputStream. f))] | |
(.map channel READ_ONLY 0 (.size channel)))) | |
(defn slurp | |
"Reads the file named by f and returns it as a string." | |
[#^String f] | |
(.. java.nio.charset.Charset (forName "UTF-8") | |
(newDecoder) (decode (mmap f)))) | |
(defn buffer-stream | |
"Returns an InputStream for a ByteBuffer, such as returned by mmap." | |
[#^ByteBuffer buf] | |
(proxy [InputStream] [] | |
(available [] (.remaining buf)) | |
(read | |
([] (if (.hasRemaining buf) (.get buf) -1)) | |
([dst offset len] (let [actlen (min (.remaining buf) len)] | |
(.get buf dst offset actlen) | |
(if (< actlen 1) -1 actlen)))))) | |
(defn load-file [f] | |
"Like clojure.lang/load-file, but uses mmap internally." | |
(with-open [rdr (-> f mmap buffer-stream InputStreamReader. PushbackReader.)] | |
(load-reader rdr))) | |
(comment | |
(alias 'mmap 'clojure.contrib.mmap) | |
(alias 'core 'clojure.core) | |
;--- | |
; zip_filter.clj is 95KB | |
(def tf "/home/chouser/build/clojure/src/clj/clojure/core.clj") | |
(println "\nload-file" tf) | |
(time (dotimes [_ 5] (core/load-file tf))) ; 5420.177813 msecs | |
(time (dotimes [_ 5] (mmap/load-file tf))) ; 7946.854434 msecs -- not so good | |
;--- | |
; kern.log.0 is 961KB | |
(def tf "/var/log/kern.log.0") | |
(println "\nslurp" tf) | |
(time (dotimes [_ 10] (.length (core/slurp tf)))) ; 435.767226 msecs | |
(time (dotimes [_ 10] (.length (mmap/slurp tf)))) ; 93.176858 msecs | |
;--- | |
; kern.log.0 is 961KB | |
(def tf "/var/log/kern.log.0") | |
(println "\nregex slurp large" tf) | |
(time (dotimes [_ 10] (count (re-seq #"EXT3.*" (core/slurp tf))))) ; 416 | |
(time (dotimes [_ 10] (count (re-seq #"EXT3.*" (mmap/slurp tf))))) ; 101 | |
;--- | |
; mmap.clj is about 3.1KB | |
(def tf "/home/chouser/proj/clojure-contrib/src/clojure/contrib/mmap.clj") | |
(println "\nregex slurp small" tf) | |
(time (dotimes [_ 1000] (count (re-seq #"defn \S*" (core/slurp tf))))) ; 308 | |
(time (dotimes [_ 1000] (count (re-seq #"defn \S*" (mmap/slurp tf))))) ; 198 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment