Created
October 7, 2021 13:55
-
-
Save erhangundogan/3a3a76d436131fb0645190e0965a03ea to your computer and use it in GitHub Desktop.
Search and index files blazing fast
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| let extract_words str = | |
| let re = Re.Pcre.regexp "\\w+" in | |
| let groups = Re.all re str in | |
| List.map | |
| (fun g -> | |
| let word = Re.Group.get g 0 in | |
| let pos = Re.Group.start g 0 in | |
| (word, pos) | |
| ) groups | |
| let contains s1 s2 = | |
| let re = Str.regexp_string s2 in | |
| try | |
| ignore (Str.search_forward re s1 0); | |
| true | |
| with Not_found -> false | |
| let search htbl = | |
| let _ = print_string "Please enter your search string: " in | |
| let search_string = read_line () in | |
| Hashtbl.iter | |
| (fun key (row, col) -> | |
| match contains key search_string with | |
| | false -> () | |
| | true -> Printf.printf "%s (row: %d, col: %d)\n" key row col | |
| ) htbl | |
| let create_stream channel = | |
| Stream.from | |
| (fun _ -> | |
| try | |
| Some (input_line channel) | |
| with End_of_file -> None) | |
| let process_file filename = | |
| let words_hash_table = Hashtbl.create 1 in | |
| let in_channel = open_in filename in | |
| let add_words line = | |
| let row = ref 0 in | |
| incr row; | |
| extract_words line | |
| |> List.iter | |
| (fun (word, col) -> | |
| Hashtbl.add words_hash_table word (!row, col)) in | |
| try | |
| Stream.iter add_words (create_stream in_channel); | |
| close_in in_channel; | |
| words_hash_table | |
| with e -> | |
| close_in in_channel; | |
| raise e | |
| let () = | |
| print_endline "Indexing..."; | |
| Sys.argv.(1) | |
| |> process_file | |
| |> search |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment