Created
October 24, 2019 23:02
-
-
Save vyzo/c650a23226f377400286813b1c49bb1b to your computer and use it in GitHub Desktop.
wc with tries and raw devices; atrocities!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(import :gerbil/gambit/ports | |
:std/net/bio | |
:std/sort) | |
(export main) | |
(declare (not safe)) | |
(def +nl+ | |
(char->integer #\newline)) | |
(def +space+ | |
(char->integer #\space)) | |
;; words are represented as lists of u8s in reverse | |
(def (read-next-word buf) | |
(let lp ((bytes [])) | |
(let (next (bio-read-u8 buf)) | |
(cond | |
((eof-object? next) | |
(if (null? bytes) | |
next | |
bytes)) | |
((or (eq? next +space+) (eq? next +nl+)) | |
(if (null? bytes) | |
(lp bytes) | |
bytes)) | |
(else | |
(lp (cons next bytes))))))) | |
(def (show word count) | |
(let (u8v (list->u8vector word)) | |
(write-u8vector u8v) | |
(write-u8 +space+) | |
(write count) | |
(write-u8 +nl+))) | |
(def (main path) | |
(let ((words (make-trie)) | |
(buf (open-file-input-buffer path))) | |
(let lp () | |
(let (word (read-next-word buf)) | |
(unless (eof-object? word) | |
(trie-update! words word fx1+ 0) | |
(lp)))) | |
(for-each (lambda (x) (show (car x) (cdr x))) | |
(sort! (trie->list words) | |
(lambda (a b) (> (cdr a) (cdr b))))))) | |
;;;; quick and dirty trie implementation | |
(def trie-length 128) | |
(def (make-trie) | |
(make-vector trie-length #f)) | |
(defstruct leaf (value) | |
final: #t unchecked: #t) | |
(def (trie-update! trie word update default) | |
(match word | |
([u8 . rest] | |
(cond | |
((vector-ref trie u8) | |
=> (lambda (sth) | |
(cond | |
((vector? sth) | |
(if (null? rest) | |
(let (new-sth (cons (make-leaf (update default)) sth)) | |
(vector-set! trie u8 new-sth)) | |
(trie-update! sth rest update default))) | |
((pair? sth) | |
(if (null? rest) | |
(let (leaf (car sth)) | |
(&leaf-value-set! leaf (update (&leaf-value leaf)))) | |
(trie-update! (cdr sth) rest update default))) | |
((leaf? sth) | |
(if (null? rest) | |
(&leaf-value-set! sth (update (&leaf-value sth))) | |
(let* ((new-trie (make-trie)) | |
(new-sth (cons sth new-trie))) | |
(vector-set! trie u8 new-sth) | |
(trie-update! new-trie rest update default)))) | |
(else | |
(if (null? rest) | |
(let (new-sth (make-leaf (update default))) | |
(vector-set! trie u8 new-sth)) | |
(let (new-trie (make-trie)) | |
(vector-set! trie u8 new-trie) | |
(trie-update! new-trie rest update default))))))) | |
(else | |
(if (null? rest) | |
(vector-set! trie u8 (make-leaf (update default))) | |
(let (new-trie (make-trie)) | |
(vector-set! trie u8 new-trie) | |
(trie-update! new-trie rest update default)))))))) | |
(def (trie->list trie) | |
(let recur ((trie trie) (word []) (r [])) | |
(if trie | |
(let lp ((i 0) (r r)) | |
(if (fx< i trie-length) | |
(let ((word* (cons i word)) | |
(sth (vector-ref trie i))) | |
(cond | |
((vector? sth) | |
(lp (fx1+ i) | |
(recur sth word* r))) | |
((pair? sth) | |
(lp (fx1+ i) | |
(recur (cdr sth) word* (cons (cons word* (&leaf-value (car sth))) r)))) | |
((leaf? sth) | |
(lp (fx1+ i) | |
(cons (cons word* (&leaf-value sth)) r))) | |
(else | |
(lp (fx1+ i) r)))) | |
r)) | |
r))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment