Skip to content

Instantly share code, notes, and snippets.

@vyzo
Created October 25, 2019 10:46
Show Gist options
  • Save vyzo/b14faeb04c1b91ab2353b0a7c7fdfcba to your computer and use it in GitHub Desktop.
Save vyzo/b14faeb04c1b91ab2353b0a7c7fdfcba to your computer and use it in GitHub Desktop.
wc with hand-rolled vector-backed string hash table (that doesn't resize)
(import :std/net/bio
:std/net/bio/output
:std/net/bio/file
:std/os/fdio
:std/sort
:std/srfi/1)
(export main)
(declare (not safe))
;; shim to use an output buffer on raw file descriptors
(def (fd-output-drain! buf need)
(fd-output-write (&output-buffer-e buf) 0 (&output-buffer-wlo buf) buf)
(set! (&output-buffer-wlo buf) 0))
(def (fd-output-write bytes start end buf)
(let ((fd (&file-output-buffer-fd buf))
(bytes (&output-buffer-e buf)))
(_write fd bytes start end)))
(def (open-stdout-buffer (bufsz 4096))
(make-file-output-buffer (make-u8vector bufsz) 0 bufsz
fd-output-drain!
fd-output-write
1))
(def (read-next-word buf)
(let lp ((chars []))
(let (next (bio-read-char buf))
(cond
((eof-object? next)
(if (null? chars)
next
(list->string (reverse! chars))))
((or (eq? next #\space) (eq? next #\newline))
(list->string (reverse! chars)))
(else
(lp (cons next chars)))))))
(def (main path)
(let ((words (make-string-table 1000000))
(buf (open-file-input-buffer path 8192)))
(let lp ()
(let (word (read-next-word buf))
(unless (eof-object? word)
(string-table-update! words word fx1+ 0)
(lp))))
(let (obuf (open-stdout-buffer 8192))
(for-each (lambda (x)
(bio-write-string (car x) obuf)
(bio-write-char #\space obuf)
(bio-write-string (number->string (cdr x)) obuf)
(bio-write-char #\newline obuf))
(sort! (string-table->list words)
(lambda (a b) (> (cdr a) (cdr b)))))
(bio-force-output obuf))))
;;; faster hash tables; quick and dirty vector backed implementation that doesn't resize
(def (make-string-table size)
(make-vector size #f))
(def (string-table-update! tab str update default)
(let* ((h (##string=?-hash str))
(idx (fxmodulo h (vector-length tab))))
(cond
((vector-ref tab idx)
=> (lambda (sth)
(cond
((string? (car sth))
(if (equal? (car sth) str)
(set! (cdr sth) (update (cdr sth)))
(vector-set! tab idx (list (cons str (update default)) sth))))
((assoc str sth)
=> (lambda (p)
(set! (cdr p) (update (cdr p)))))
(else
(vector-set! tab idx (cons (cons str (update default)) sth))))))
(else
(vector-set! tab idx (cons str (update default)))))))
(def (string-table->list tab)
(let (end (vector-length tab))
(let lp ((i 0) (r []))
(if (fx< i end)
(cond
((vector-ref tab i)
=> (lambda (sth)
(if (string? (car sth))
(lp (fx1+ i) (cons sth r))
(lp (fx1+ i) (foldl cons r sth)))))
(else
(lp (fx1+ i) r)))
r))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment