Last active
March 15, 2016 22:24
-
-
Save aantron/34271c1a4d051d6c966f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let take limit html = | |
let spaces = Str.regexp "[ \t\r\n]+" in | |
let rec repeat acc n v = if n > 0 then repeat (v::acc) (n - 1) v else acc in | |
let filter_signals = | |
(0, 0) | |
|> Markup.transform (fun ((seen, unclosed_elements) as state) signal -> | |
match signal with | |
| `Text ss -> | |
let tokens = ss |> String.concat "" |> Str.split spaces in | |
(* Count tokens in the current `Text signal, accumulating into seen, up | |
to limit. If limit is reached, create a new signal containing only | |
the tokens up to limit. Otherwise, keep the original signal. *) | |
let rec text_to_emit acc seen = function | |
| [] -> signal, seen | |
| token::rest when seen < limit -> | |
text_to_emit (token::acc) (seen + 1) rest | |
| _::_ -> `Text [List.rev acc |> String.concat " "], seen | |
in | |
let signal, seen = text_to_emit [] seen tokens in | |
(* If limit was not reached, emit the current text signal, and | |
continue. Otherwise, if limit is reached, emit the current | |
(potentially truncated) signal, followed by an `End_element signal | |
for each unclosed element, and stop. *) | |
if seen < limit then | |
[signal], Some (seen, unclosed_elements) | |
else | |
signal::(repeat [] unclosed_elements `End_element), None | |
| `Start_element _ -> [signal], Some (seen, unclosed_elements + 1) | |
| `End_element -> [signal], Some (seen, unclosed_elements - 1) | |
| _ -> [signal], Some state) | |
in | |
Markup.(string html | |
|> parse_html |> signals |> filter_signals |> write_html |> to_string) | |
let () = " | |
<p>foo</p> | |
<p> | |
bar <a href='/'>click here</a> | |
</p>" | |
|> take 3 | |
|> print_endline | |
(* Output: | |
<p>foo</p> | |
<p> | |
bar <a href="/">click</a></p> | |
*) | |
(* Compile and run: | |
ocamlfind opt -linkpkg -package markup -package str take.ml && ./a.out | |
*) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment