Last active
February 2, 2017 21:09
-
-
Save c-cube/347e3b42e081beb4a3ed3d42751bdbf2 to your computer and use it in GitHub Desktop.
memory map and iterators
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #require "bigstring";; | |
| #require "sequence";; | |
| (* the goal is to count how many times each character occurs in the file *) | |
| let file = "foo.txt";; | |
| (* simple way *) | |
| Bigstring.with_map_file file | |
| (fun map -> | |
| Bigstring.to_seq map | |
| |> Sequence.count ~eq:(==) ~hash:Char.code) | |
| |> Sequence.to_list;; | |
| (* more efficient way (counting with an array) *) | |
| Bigstring.with_map_file file | |
| (fun map -> | |
| let arr = Array.make 256 0 in | |
| Bigstring.to_seq map | |
| |> Sequence.iter (fun c -> let c = Char.code c in arr.(c)<-arr.(c)+1); | |
| arr | |
| ) | |
| |> Sequence.of_array | |
| |> Sequence.mapi (fun i n -> Char.chr i, n) | |
| |> Sequence.to_list;; | |
| - : (char * int) list = [('\000', 0); ('\001', 1); ('\002', 0); ('\003', 0); ('\004', 0); ('\005', 1); ('\006', 0); ('\007', 0); ('\b', 16); ('\t', 522); ('\n', 2099229); | |
| ('\011', 1); ('\012', 3); ('\r', 0); ('\014', 3); ('\015', 0); ('\016', 5); | |
| ('\017', 2); ('\018', 6); ('\019', 4); ('\020', 18); ('\021', 11); | |
| ('\022', 0); ('\023', 0); ('\024', 0); ('\025', 2); ('\026', 0); ('\027', 0); | |
| ('\028', 2); ('\029', 0); ('\030', 5); ('\031', 0); (' ', 20840762); | |
| ('!', 59367); ('"', 100185); ('#', 2108037); ('$', 5307); ('%', 14740); | |
| ('&', 9896); ('\'', 923550); ('(', 85067); (')', 109244); ('*', 51687); | |
| ('+', 76874); (',', 617766); ('-', 4439814); ('.', 474605); ('/', 337361); | |
| ('0', 1313849); ('1', 2274585); ('2', 1664401); ('3', 939300); ('4', 766156); | |
| ('5', 771176); ('6', 403695); ('7', 434962); ('8', 436393); ('9', 429836); | |
| (':', 4677759); (';', 15915); ('<', 2108126); ('=', 40917); ('>', 2123814); | |
| ('?', 250588); ('@', 78918); ('A', 188558); ('B', 104461); ('C', 362273); | |
| ('D', 139689); ('E', 350802); ('F', 44410); ('G', 47290); ('H', 35732); | |
| ('I', 81485); ('J', 44832); ('K', 22029); ('L', 63064); ('M', 121858); | |
| ('N', 116143); ('O', 100993); ('P', 362956); ('Q', 13608); ('R', 199619); | |
| ('S', 111420); ('T', 315954); ('U', 41387); ('V', 21828); ('W', 22347); | |
| ('X', 15129); ('Y', 24217); ('Z', 9483); ('[', 9168); ('\\', 16248); | |
| (']', 11030); ('^', 27633); ('_', 450510); ('`', 11821); ('a', 8722743); | |
| ('b', 1269696); ('c', ...); ...] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment