Created
November 17, 2009 00:43
-
-
Save minimal/236492 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Discussed in detail at http://technomancy.us/130 | |
(ns wide-finder | |
"A basic map/reduce approach to the wide finder using agents. | |
Optimized for being idiomatic and readable rather than speed." | |
(:use [clojure.contrib.duck-streams :only [reader]])) | |
(def re #"GET /(\d+)") | |
(defn inc-or-init [i] | |
(if i (inc i) 1)) | |
(defn count-line | |
"Increment the relevant entry in the counts map." | |
[counts line] | |
(if-let [[_ hit] (re-find re line)] | |
(update-in counts [hit] inc-or-init) | |
counts)) | |
(defn count-lines | |
"count a bunch of lines" | |
[counts lines] | |
(reduce count-line counts lines)) | |
(defn find-widely | |
"Return a map of pages to hit counts in filename." | |
[filename n] | |
;; each agent begins as an empty map. | |
(let [agents (map agent (repeat n {}))] | |
;; For each line in the file, send an agent the job of counting it. | |
(dorun (map #(send %1 count-line %2) | |
(cycle agents) ; infinite seq of all agents | |
(line-seq (reader filename)))) | |
;; Wait for each agent to finish. | |
(doseq [a agents] (await a)) | |
;; Reduce the results into a single count value. | |
(apply merge-with + (map deref agents)))) | |
(defn find-widely-chunked | |
"Return a map of pages to hit counts in filename." | |
[filename n chunksize] | |
;; each agent begins as an empty map. | |
(let [agents (map agent (repeat n {}))] | |
;; For each line in the file, send an agent the job of counting it. | |
(dorun (map #(send %1 count-lines %2) | |
(cycle agents) ; infinite seq of all agents | |
(partition chunksize (line-seq (reader filename))))) | |
;; Wait for each agent to finish. | |
(doseq [a agents] (await a)) | |
;; Reduce the results into a single count value. | |
(apply merge-with + (map deref agents)))) | |
(defn find-widely-single | |
"Non parallel version" | |
[filename] | |
(reduce count-line {} (line-seq (reader filename)))) | |
(defn testall | |
[] | |
(do | |
[(println "parallel 2 core" (time (find-widely "n:/tmp/log.txt" 2))) | |
(println "parallel 4 cores" (time (find-widely "n:/tmp/log.txt" 4))) | |
(println "non parallel" (time (find-widely-single "n:/tmp/log.txt")))])) | |
(testall) | |
(println "chunked parallel" (time (find-widely-chunked "n:/tmp/log2.txt" 4 50))) | |
(println "non parallel" (time (find-widely-single "n:/tmp/log2.txt"))) | |
;; time: 915.114642 msecs" | |
;; non parallel {1 16681, 2 16677, 3 16710, 4 16332, 5 16581, 6 16693, 7 16575, 8 16746, 9 16741, 10 16525} | |
;; "Elapsed time: 3214.297911 msecs" | |
;; parallel {1 16681, 2 16677, 3 16710, 4 16332, 5 16581, 6 16693, 7 16575, 8 16746, 9 16741, 10 16525} | |
;; "Elapsed time: 4546.566562 msecs" | |
;; parallel 2 core {1 16681, 2 16677, 3 16710, 4 16332, 5 16581, 6 16693, 7 16575, 8 16746, 9 16741, 10 16525} | |
;; "Elapsed time: 3575.291225 msecs" | |
;; parallel 4 cores {1 16681, 2 16677, 3 16710, 4 16332, 5 16581, 6 16693, 7 16575, 8 16746, 9 16741, 10 16525} | |
;; "Elapsed time: 909.037236 msecs" | |
;; non parallel {1 16681, 2 16677, 3 16710, 4 16332, 5 16581, 6 16693, 7 16575, 8 16746, 9 16741, 10 16525} | |
;; "Elapsed time: 540.495585 msecs" | |
;; chunked parallel 4 cores {1 16681, 2 16677, 3 16710, 4 16332, 5 16581, 6 16693, 7 16575, 8 16746, 9 16741, 10 16525} | |
;; "Elapsed time: 5421.381623 msecs" | |
;; 40MB log chunked parallel {1 167009, 2 167039, 3 166517, 4 166548, 5 166321, 6 166705, 7 166552, 8 166615, 9 166281, 10 167314} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment