Skip to content

Instantly share code, notes, and snippets.

@c-cube
Last active February 2, 2017 21:09
Show Gist options
  • Select an option

  • Save c-cube/347e3b42e081beb4a3ed3d42751bdbf2 to your computer and use it in GitHub Desktop.

Select an option

Save c-cube/347e3b42e081beb4a3ed3d42751bdbf2 to your computer and use it in GitHub Desktop.
memory map and iterators
#require "bigstring";;
#require "sequence";;
(* the goal is to count how many times each character occurs in the file *)
let file = "foo.txt";;
(* simple way *)
Bigstring.with_map_file file
(fun map ->
Bigstring.to_seq map
|> Sequence.count ~eq:(==) ~hash:Char.code)
|> Sequence.to_list;;
(* more efficient way (counting with an array) *)
Bigstring.with_map_file file
(fun map ->
let arr = Array.make 256 0 in
Bigstring.to_seq map
|> Sequence.iter (fun c -> let c = Char.code c in arr.(c)<-arr.(c)+1);
arr
)
|> Sequence.of_array
|> Sequence.mapi (fun i n -> Char.chr i, n)
|> Sequence.to_list;;
- : (char * int) list = [('\000', 0); ('\001', 1); ('\002', 0); ('\003', 0); ('\004', 0); ('\005', 1); ('\006', 0); ('\007', 0); ('\b', 16); ('\t', 522); ('\n', 2099229);
('\011', 1); ('\012', 3); ('\r', 0); ('\014', 3); ('\015', 0); ('\016', 5);
('\017', 2); ('\018', 6); ('\019', 4); ('\020', 18); ('\021', 11);
('\022', 0); ('\023', 0); ('\024', 0); ('\025', 2); ('\026', 0); ('\027', 0);
('\028', 2); ('\029', 0); ('\030', 5); ('\031', 0); (' ', 20840762);
('!', 59367); ('"', 100185); ('#', 2108037); ('$', 5307); ('%', 14740);
('&', 9896); ('\'', 923550); ('(', 85067); (')', 109244); ('*', 51687);
('+', 76874); (',', 617766); ('-', 4439814); ('.', 474605); ('/', 337361);
('0', 1313849); ('1', 2274585); ('2', 1664401); ('3', 939300); ('4', 766156);
('5', 771176); ('6', 403695); ('7', 434962); ('8', 436393); ('9', 429836);
(':', 4677759); (';', 15915); ('<', 2108126); ('=', 40917); ('>', 2123814);
('?', 250588); ('@', 78918); ('A', 188558); ('B', 104461); ('C', 362273);
('D', 139689); ('E', 350802); ('F', 44410); ('G', 47290); ('H', 35732);
('I', 81485); ('J', 44832); ('K', 22029); ('L', 63064); ('M', 121858);
('N', 116143); ('O', 100993); ('P', 362956); ('Q', 13608); ('R', 199619);
('S', 111420); ('T', 315954); ('U', 41387); ('V', 21828); ('W', 22347);
('X', 15129); ('Y', 24217); ('Z', 9483); ('[', 9168); ('\\', 16248);
(']', 11030); ('^', 27633); ('_', 450510); ('`', 11821); ('a', 8722743);
('b', 1269696); ('c', ...); ...]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment