Created
October 6, 2022 21:08
-
-
Save Metaxal/695a6d135a72560db283562d518e49a8 to your computer and use it in GitHub Desktop.
fast load large amount of data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
(require racket/fasl | |
racket/fixnum) | |
;;; Author: Laurent Orseau | |
;;; License: [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) or | |
;;; [MIT license](http://opensource.org/licenses/MIT) at your option. | |
#| Usage: | |
This needs to be done only once (unless the output format is changed): | |
$ racket bigfasl.rkt write | |
WARNING: This will write ~6GB to disk! | |
then: | |
$ racket bigfasl.rkt read | |
On my machine: | |
Write: | |
cpu time: 313596 real time: 313843 gc time: 1078 | |
read: | |
|# | |
(define dir "/tmp/bigfasl") | |
(define n-files #;10000 100000) ; Change this number for smaller batches | |
(define (fasl-file->value f) | |
(call-with-input-file* f fasl->s-exp)) | |
(define (write-fasl-to-file v f) | |
(call-with-output-file* f (λ (out) (s-exp->fasl v out)))) | |
;; The output format is in lists because fasl doesn't accept fxvectors | |
(define (generate-file-content) | |
(define n (+ 20 (random 100))) | |
(list | |
(build-list n (λ _ (random 10))) | |
(build-list n (λ _ (build-list 150 (λ _ (random (expt 2 30)))))))) | |
;============; | |
;=== Main ===; | |
;============; | |
(module+ main | |
(define ccla (current-command-line-arguments)) | |
;; Generate the data and write it to disk | |
(when (equal? ccla #("write")) | |
(delete-directory/files dir #:must-exist? #f) | |
(make-directory* dir) | |
(printf "Writing to disk in ~a. Please be patient.\n" dir) | |
(time | |
(for ([i (in-range n-files)]) | |
(define f (build-path dir (format "~a.fasl" i))) | |
(write-fasl-to-file (generate-file-content) f)))) | |
;; Read the data from disk and swap the lists for vectors and fxvectors | |
(when (equal? ccla #("read")) | |
(displayln "Reading from disk") | |
(define res | |
(time | |
(for/list ([f (in-list (directory-list dir #:build? #t))]) | |
(fasl-file->value f)))) | |
(displayln "Converting to vectors and fxvectors") | |
(define vres | |
(time | |
(for/vector #:length (length res) ([p (in-list res)]) | |
(vector (apply fxvector (first p)) | |
(for/vector #:length (length (second p)) ([ctxs (in-list (second p))]) | |
(apply fxvector ctxs)))))) | |
(displayln "Finished"))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment