Created
October 26, 2019 21:11
-
-
Save vyzo/c881086663ebd93c8cac86c3b7971850 to your computer and use it in GitHub Desktop.
wc using a string buffer and all the other tricks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(import :std/net/bio | |
:std/net/bio/output | |
:std/net/bio/file | |
:std/os/fdio | |
:std/sort | |
:std/srfi/1) | |
(export main) | |
(declare (not safe)) | |
;; shim to use an output buffer on raw file descriptors | |
(def (fd-output-drain! buf need) | |
(fd-output-write (&output-buffer-e buf) 0 (&output-buffer-wlo buf) buf) | |
(set! (&output-buffer-wlo buf) 0)) | |
(def (fd-output-write bytes start end buf) | |
(let ((fd (&file-output-buffer-fd buf)) | |
(bytes (&output-buffer-e buf))) | |
(_write fd bytes start end))) | |
(def (open-stdout-buffer (bufsz 4096)) | |
(make-file-output-buffer (make-u8vector bufsz) 0 bufsz | |
fd-output-drain! | |
fd-output-write | |
1)) | |
(def str-buffer (make-string 1024)) | |
(def (read-next-word buf) | |
(let lp ((i 0)) | |
(let (next (bio-read-char buf)) | |
(cond | |
((eof-object? next) | |
(if (fx> i 0) | |
(substring str-buffer 0 i) | |
next)) | |
((or (eq? next #\space) (eq? next #\newline)) | |
(substring str-buffer 0 i)) | |
(else | |
(string-set! str-buffer i next) | |
(lp (fx1+ i))))))) | |
(def (main path) | |
(let ((words (make-string-table 1000000)) | |
(buf (open-file-input-buffer path 8192))) | |
(let lp () | |
(let (word (read-next-word buf)) | |
(unless (eof-object? word) | |
(string-table-update! words word fx1+ 0) | |
(lp)))) | |
(let (obuf (open-stdout-buffer 8192)) | |
(for-each (lambda (x) | |
(bio-write-string (car x) obuf) | |
(bio-write-char #\space obuf) | |
(bio-write-string (number->string (cdr x)) obuf) | |
(bio-write-char #\newline obuf)) | |
(sort! (string-table->list words) | |
(lambda (a b) (> (cdr a) (cdr b))))) | |
(bio-force-output obuf)))) | |
;;; faster hash tables; quick and dirty vector backed implementation that doesn't resize | |
(def (make-string-table size) | |
(make-vector size #f)) | |
(def (string-table-update! tab str update default) | |
(let* ((h (##string=?-hash str)) | |
(idx (fxmodulo h (vector-length tab)))) | |
(cond | |
((vector-ref tab idx) | |
=> (lambda (sth) | |
(cond | |
((string? (car sth)) | |
(if (equal? (car sth) str) | |
(set! (cdr sth) (update (cdr sth))) | |
(vector-set! tab idx (list (cons str (update default)) sth)))) | |
((assoc str sth) | |
=> (lambda (p) | |
(set! (cdr p) (update (cdr p))))) | |
(else | |
(vector-set! tab idx (cons (cons str (update default)) sth)))))) | |
(else | |
(vector-set! tab idx (cons str (update default))))))) | |
(def (string-table->list tab) | |
(let (end (vector-length tab)) | |
(let lp ((i 0) (r [])) | |
(if (fx< i end) | |
(cond | |
((vector-ref tab i) | |
=> (lambda (sth) | |
(if (string? (car sth)) | |
(lp (fx1+ i) (cons sth r)) | |
(lp (fx1+ i) (foldl cons r sth))))) | |
(else | |
(lp (fx1+ i) r))) | |
r)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment