Created
October 25, 2019 10:46
-
-
Save vyzo/b14faeb04c1b91ab2353b0a7c7fdfcba to your computer and use it in GitHub Desktop.
wc with hand-rolled vector-backed string hash table (that doesn't resize)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(import :std/net/bio | |
:std/net/bio/output | |
:std/net/bio/file | |
:std/os/fdio | |
:std/sort | |
:std/srfi/1) | |
(export main) | |
(declare (not safe)) | |
;; shim to use an output buffer on raw file descriptors | |
(def (fd-output-drain! buf need) | |
(fd-output-write (&output-buffer-e buf) 0 (&output-buffer-wlo buf) buf) | |
(set! (&output-buffer-wlo buf) 0)) | |
(def (fd-output-write bytes start end buf) | |
(let ((fd (&file-output-buffer-fd buf)) | |
(bytes (&output-buffer-e buf))) | |
(_write fd bytes start end))) | |
(def (open-stdout-buffer (bufsz 4096)) | |
(make-file-output-buffer (make-u8vector bufsz) 0 bufsz | |
fd-output-drain! | |
fd-output-write | |
1)) | |
(def (read-next-word buf) | |
(let lp ((chars [])) | |
(let (next (bio-read-char buf)) | |
(cond | |
((eof-object? next) | |
(if (null? chars) | |
next | |
(list->string (reverse! chars)))) | |
((or (eq? next #\space) (eq? next #\newline)) | |
(list->string (reverse! chars))) | |
(else | |
(lp (cons next chars))))))) | |
(def (main path) | |
(let ((words (make-string-table 1000000)) | |
(buf (open-file-input-buffer path 8192))) | |
(let lp () | |
(let (word (read-next-word buf)) | |
(unless (eof-object? word) | |
(string-table-update! words word fx1+ 0) | |
(lp)))) | |
(let (obuf (open-stdout-buffer 8192)) | |
(for-each (lambda (x) | |
(bio-write-string (car x) obuf) | |
(bio-write-char #\space obuf) | |
(bio-write-string (number->string (cdr x)) obuf) | |
(bio-write-char #\newline obuf)) | |
(sort! (string-table->list words) | |
(lambda (a b) (> (cdr a) (cdr b))))) | |
(bio-force-output obuf)))) | |
;;; faster hash tables; quick and dirty vector backed implementation that doesn't resize | |
(def (make-string-table size) | |
(make-vector size #f)) | |
(def (string-table-update! tab str update default) | |
(let* ((h (##string=?-hash str)) | |
(idx (fxmodulo h (vector-length tab)))) | |
(cond | |
((vector-ref tab idx) | |
=> (lambda (sth) | |
(cond | |
((string? (car sth)) | |
(if (equal? (car sth) str) | |
(set! (cdr sth) (update (cdr sth))) | |
(vector-set! tab idx (list (cons str (update default)) sth)))) | |
((assoc str sth) | |
=> (lambda (p) | |
(set! (cdr p) (update (cdr p))))) | |
(else | |
(vector-set! tab idx (cons (cons str (update default)) sth)))))) | |
(else | |
(vector-set! tab idx (cons str (update default))))))) | |
(def (string-table->list tab) | |
(let (end (vector-length tab)) | |
(let lp ((i 0) (r [])) | |
(if (fx< i end) | |
(cond | |
((vector-ref tab i) | |
=> (lambda (sth) | |
(if (string? (car sth)) | |
(lp (fx1+ i) (cons sth r)) | |
(lp (fx1+ i) (foldl cons r sth))))) | |
(else | |
(lp (fx1+ i) r))) | |
r)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment