Skip to content

Instantly share code, notes, and snippets.

@DmitrySoshnikov
Created April 19, 2011 19:55
Show Gist options
  • Save DmitrySoshnikov/929448 to your computer and use it in GitHub Desktop.
Save DmitrySoshnikov/929448 to your computer and use it in GitHub Desktop.
main([]) ->
%% Given the parsed HTML tree in the following format:
%%
%% Types HTML = [Element]
%% Element = {Tag, [Attribute], [Element | Text]}
%% Tag = atom() % e.g. 'a', 'pre', 'p'
%% Attribute = {Name, Value}
%% Name = atom()
%% Value = string()
%% Text = iolist()
%%
%% Write the function with the following signature:
%%
%% strip_html(HTML) -> string().
%%
%% which strips the HTML formatting and just returns
%% the raw text contents of the given HTML tree.
Res = strip_html(example_data()),
?LOG(Res). % [["An example text",["Test",["V"]],"Data"]]
example_data() -> {a, [{href, "http://fprog.ru/"}], [
{b, [], [
"An example text",
{b, [], [
"Test",
{b, [], [
"V"
]}
]},
"Data"
]}
]}.
strip_html({_, _, ChildNodes}) ->
lists:map(fun(R) ->
case R of
L when is_list(L) -> L;
E -> strip_html(E)
end
end, ChildNodes).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment