Created
February 27, 2012 20:18
-
-
Save seancribbs/1926748 to your computer and use it in GitHub Desktop.
Module to simplify XML reading/manipulation stuffs in Erlang
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%% @doc Uses SAX to convert an XML document into a simple nested-tuple | |
%% structure. Ignores namespaces. | |
-module(xmlsimple). | |
-export([file/1, | |
string/1, | |
emit/1, | |
emit_file/2]). | |
-include_lib("xmerl/include/xmerl.hrl"). | |
-define(SAX, [{continuation_fun, fun continuation/1}, | |
{event_fun, fun event/3}, | |
{event_state, []}, | |
{file_type, normal}, | |
{encoding, utf8}, | |
skip_external_dtd]). | |
-type tag() :: {TagName::atom(), Attributes::[proplists:property()], Children::[tag() | string()]}. | |
-type predicate() :: fun((term()) -> boolean()). | |
-export_types([tag/0]). | |
%% @doc Parses an XML document from a file. | |
-spec file(file:name()) -> {ok, tag() | [tag()]} | {error, term()}. | |
file(Filename) -> | |
case file:read_file(Filename) of | |
{ok, Bin} -> | |
string(Bin); | |
Err -> Err | |
end. | |
%% @doc Parses an XML document from a list or binary. | |
-spec string(iodata()) -> {ok, tag() | [tag()]} | {error, term()}. | |
string(Binary) when is_binary(Binary) | |
orelse is_list(Binary)-> | |
case xmerl_sax_parser:stream(Binary, ?SAX) of | |
{ok, [Doc], _Rest} -> | |
{ok, Doc}; | |
{ok, State, _Rest} -> | |
{ok, State}; | |
Else -> | |
Else | |
end. | |
%% @doc Writes an Erlang nested-tuple as described by {@link tag/0} | |
%% into an XML document in a file. | |
-spec emit_file(file:name(), tag() | [tag()]) -> ok | {error, term()}. | |
emit_file(Filename, Doc) -> | |
file:write_file(Filename, emit(doc)). | |
%% @doc Writes an Erlang nested-tuple as described by {@link tag/0} | |
%% into an XML document. | |
-spec emit(tag() | [tag()]) -> iodata(). | |
emit(Doc) when not is_list(Doc) -> | |
emit([Doc]); | |
emit(Docs) -> | |
xmerl:export_simple(Docs, xmerl_xml). | |
-spec continuation(term()) -> {binary(), term()}. | |
continuation(State) -> | |
{<<>>, State}. | |
-spec event(xmerl_sax_parser:event(), tuple(), term()) -> term(). | |
event({startElement, _Uri, LocalName, _QName, Attrs}, _Location, State) -> | |
Tag = list_to_atom(LocalName), | |
AttrPList = lists:map(fun attr_to_pair/1, Attrs), | |
[{Tag, AttrPList, undefined}|State]; | |
event({endElement, _Uri, LocalName, _QName}, _Location, State) -> | |
Tag = list_to_existing_atom(LocalName), | |
{Children, [{Tag, Attrs, _}|Stack]} = lists:splitwith(tag_predicate(Tag), State), | |
[{Tag, Attrs, lists:reverse(Children)}|Stack]; | |
event(startCDATA, _, State) -> | |
[cdata|State]; | |
event(endCDATA, _, State) -> | |
{Text, [cdata|Stack]} = lists:splitwith(fun cdata_predicate/1, State), | |
[lists:flatten(lists:reverse(Text))|Stack]; | |
event({characters, String}, _, [Top|_]=State) -> | |
case Top of | |
Chars when is_list(Chars) -> | |
[Chars ++ String|State]; | |
_ -> | |
[String|State] | |
end; | |
event(_,_,State) -> State. | |
-spec attr_to_pair(tuple()) -> proplists:property(). | |
attr_to_pair({_,_,Name,Value}) -> | |
{list_to_atom(Name), Value}. | |
-spec tag_predicate(atom()) -> predicate(). | |
tag_predicate(T) -> | |
fun({Tag,_,_}) when T =:= Tag-> false; | |
(_) -> true | |
end. | |
-spec cdata_predicate(term()) -> boolean(). | |
cdata_predicate(cdata) -> | |
false; | |
cdata_predicate(_) -> | |
true. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment