Skip to content

Instantly share code, notes, and snippets.

@ytomino
Last active April 18, 2022 19:03
Show Gist options
  • Save ytomino/add1f09660c88b15a0f08f0627145041 to your computer and use it in GitHub Desktop.
Save ytomino/add1f09660c88b15a0f08f0627145041 to your computer and use it in GitHub Desktop.
Sorting URLs
let split_domain = String.split_on_char '.';;
let url_re =
Str.regexp "^\\([^:]*://\\)?\\([^/]*\\)\\(/[^?]*\\)?\\(\\?.*\\)?$";;
let split_url x =
let protocol, domain, path, query =
if Str.string_match url_re x 0 then (
let protocol = try Str.matched_group 1 x with Not_found -> "" in
let domain = Str.matched_group 2 x in
let path = try Str.matched_group 3 x with Not_found -> "" in
let query = try Str.matched_group 4 x with Not_found -> "" in
protocol, domain, path, query
) else "", x, "", ""
in
protocol, domain, List.rev (split_domain domain), path, query
let contents_and_clear buf =
let r = Buffer.contents buf in
Buffer.clear buf;
r;;
let rec input_line_and_eol buf c =
match input_char c with
| '\n' ->
contents_and_clear buf, true
| e ->
Buffer.add_char buf e;
input_line_and_eol buf c
| exception End_of_file when Buffer.length buf > 0 ->
contents_and_clear buf, false;;
let xs, eol =
let buf = Buffer.create 128 in
let rec read_inputs buf xs eol =
match input_line_and_eol buf stdin with
| x, eol ->
let xs = split_url x :: xs in
if eol then read_inputs buf xs true else
List.rev xs, false
| exception End_of_file ->
List.rev xs, eol
in read_inputs buf [] false;;
let compare_url a b =
let a_protocol, _, a_rev_domain, a_path, a_query = a in
let b_protocol, _, b_rev_domain, b_path, b_query = b in
let r = compare a_rev_domain b_rev_domain in
if r <> 0 then r else
let r = String.compare a_path b_path in
if r <> 0 then r else
let r = String.compare a_query b_query in
if r <> 0 then r else
let r = String.compare a_protocol b_protocol in r;;
let sorted = List.stable_sort compare_url xs in
let out =
List.fold_left (fun out x ->
if out then print_newline ();
let protocol, domain, _, path, query = x in
print_string protocol;
print_string domain;
print_string path;
print_string query;
true
) false sorted
in
if out && eol then print_newline();;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment