Skip to content

Instantly share code, notes, and snippets.

@erhangundogan
Created October 7, 2021 13:55
Show Gist options
  • Select an option

  • Save erhangundogan/3a3a76d436131fb0645190e0965a03ea to your computer and use it in GitHub Desktop.

Select an option

Save erhangundogan/3a3a76d436131fb0645190e0965a03ea to your computer and use it in GitHub Desktop.
Search and index files blazing fast
let extract_words str =
let re = Re.Pcre.regexp "\\w+" in
let groups = Re.all re str in
List.map
(fun g ->
let word = Re.Group.get g 0 in
let pos = Re.Group.start g 0 in
(word, pos)
) groups
let contains s1 s2 =
let re = Str.regexp_string s2 in
try
ignore (Str.search_forward re s1 0);
true
with Not_found -> false
let search htbl =
let _ = print_string "Please enter your search string: " in
let search_string = read_line () in
Hashtbl.iter
(fun key (row, col) ->
match contains key search_string with
| false -> ()
| true -> Printf.printf "%s (row: %d, col: %d)\n" key row col
) htbl
let create_stream channel =
Stream.from
(fun _ ->
try
Some (input_line channel)
with End_of_file -> None)
let process_file filename =
let words_hash_table = Hashtbl.create 1 in
let in_channel = open_in filename in
let add_words line =
let row = ref 0 in
incr row;
extract_words line
|> List.iter
(fun (word, col) ->
Hashtbl.add words_hash_table word (!row, col)) in
try
Stream.iter add_words (create_stream in_channel);
close_in in_channel;
words_hash_table
with e ->
close_in in_channel;
raise e
let () =
print_endline "Indexing...";
Sys.argv.(1)
|> process_file
|> search
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment