Created
January 1, 2010 06:47
-
-
Save rapha/267064 to your computer and use it in GitHub Desktop.
word count in OCaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open Batteries | |
let rec files path = | |
if Shell.is_directory path then | |
path |> Shell.readdir |> Array.enum |> map ((^) (path ^ "/")) |> map files |> Enum.flatten | |
else | |
Enum.singleton path | |
let read_file filename = | |
File.with_file_in filename IO.read_all | |
let words = Str.split (Str.regexp "[^a-zA-Z_'\\-]+") |- List.enum |- map String.lowercase | |
let count_into table = | |
iter (fun word -> | |
let prev_count = Hashtbl.find_default table word 0 in | |
Hashtbl.replace table word (prev_count + 1) | |
) | |
let _ = | |
let table = Hashtbl.create 4096 in | |
Sys.argv.(1) |> files |> map read_file |> iter (words |- count_into table); | |
table |> Hashtbl.enum |> List.of_enum | |
|> List.fast_sort (fun (_, count1) (_, count2) -> Standard.compare count1 count2) | |
|> List.iter (fun (word, count) -> (Printf.printf "%s\t%d\n") word count) | |
;; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment