Created
May 14, 2015 20:21
-
-
Save turanct/bdd96af4519991d62d44 to your computer and use it in GitHub Desktop.
Monoids for map/reduce in scheme
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(use-modules (srfi srfi-1) (ice-9 threads)) | |
;; some funtions to generate page content | |
(define page-contents (list "foo" "bar" "baz" "qux")) | |
(define generate-page | |
(lambda (contents times) | |
(cond | |
((eq? 0 times) '()) | |
(else (append contents (generate-page contents (- times 1))))))) | |
(define generate-document | |
(lambda (page number-of-pages) | |
(cond | |
((eq? 0 number-of-pages) '()) | |
(else (cons page (generate-document page (- number-of-pages 1))))))) | |
;; combine pages into a single page | |
(define combine-pages | |
(lambda (page1 page2) | |
(append page1 page2))) | |
;; mapper | |
(define count-words | |
(lambda (page) | |
(length page))) | |
;; slow mapper | |
(define count-words-slow | |
(lambda (page) | |
(begin | |
(usleep 500) | |
(length page)))) | |
;; reducer | |
(define combine-wordcounts | |
(lambda (c1 c2) | |
(+ c1 c2))) | |
;; Naive way: first join all pages, then count the words | |
(define single-effort | |
(lambda (document) | |
(count-words (reduce combine-pages '() document)))) | |
;; Map/Reduce way: first count the words per page, then sum | |
(define combined-effort | |
(lambda (document) | |
(let ((wordcounts (map count-words document))) | |
(reduce combine-wordcounts 0 wordcounts)))) | |
;; Parallel Map/Reduce way: first count the words per page in parallel, then sum | |
(define parallel-effort | |
(lambda (document) | |
(let ((wordcounts (par-map count-words document))) | |
(reduce combine-wordcounts 0 wordcounts)))) | |
;; To work with this, open guile, and load these commands: | |
;; (load "mapreduce.scm") | |
;; (define page (generate-page page-contents 1000)) | |
;; (define document (generate-document page 1000)) | |
;; ,time (single-effort document) | |
;; ,time (combined-effort document) | |
;; ,time (parallel-effort document) | |
;; The parallel effort is currently slower because par-map is not as optimised as possible in guile... | |
;; It uses only the number of hardware processors available + it has really heavy mutexes and locks. Slow. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment