Last active
July 7, 2020 18:50
-
-
Save thiagomajesk/52b70b3a736d1664451130bb228a9a4a to your computer and use it in GitHub Desktop.
XML parsing benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Dependencies: {:saxy, "~> 1.2"}, {:sax_map, "~> 0.2"}, {:benchee, "~> 1.0"} | |
# | |
# Samples | |
# | |
simple = """ | |
<?xml version="1.0"?> | |
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?> | |
<catalog> | |
<product description="Cardigan Sweater" product_image="cardigan.jpg"> | |
<catalog_item gender="Men's"> | |
<item_number>QWZ5671</item_number> | |
<price>39.95</price> | |
<!--Yet another comment--> | |
<size description="Medium"> | |
<color_swatch image="red_cardigan.jpg">Red</color_swatch> | |
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> | |
</size> | |
<size description="Large"> | |
<color_swatch image="red_cardigan.jpg">Red</color_swatch> | |
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> | |
</size> | |
</catalog_item> | |
<catalog_item gender="Women's"> | |
<item_number>RRX9856</item_number> | |
<price>42.50</price> | |
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?> | |
<size description="Small"> | |
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?> | |
<color_swatch image="red_cardigan.jpg">Red</color_swatch> | |
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch> | |
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> | |
</size> | |
<size description="Medium"> | |
<color_swatch image="red_cardigan.jpg">Red</color_swatch> | |
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch> | |
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> | |
<color_swatch image="black_cardigan.jpg">Black</color_swatch> | |
</size> | |
<size description="Large"> | |
<color_swatch image="navy_cardigan.jpg">Navy</color_swatch> | |
<color_swatch image="black_cardigan.jpg">Black</color_swatch> | |
</size> | |
<size description="Extra Large"> | |
<color_swatch image="burgundy_cardigan.jpg">Burgundy</color_swatch> | |
<color_swatch image="black_cardigan.jpg">Black</color_swatch> | |
</size> | |
</catalog_item> | |
</product> | |
</catalog> | |
""" | |
%{body: feed} = HTTPoison.get!("https://www.gamespot.com/feeds/mashup/") | |
%{body: complex} = HTTPoison.get!("http://aiweb.cs.washington.edu/research/projects/xmltk/xmldata/data/treebank/treebank_e.xml") | |
# | |
# Saxy.SimpleForm based implementation | |
# | |
defmodule Parser do | |
def parse([{tag, attrs, children} | tail]) do | |
%{} | |
|> Map.put(tag, parse(children)) | |
|> Map.merge(parse(tail), fn | |
_k, v1, v2 when is_list(v2) -> [v1 | v2] | |
_k, v1, v2 -> [v1, v2] | |
end) | |
end | |
def parse(["\n" <> _ | tail]), do: parse(tail) | |
def parse([head | []]), do: head | |
def parse([]), do: %{} | |
end | |
# | |
# Tests | |
# | |
Benchee.run( | |
%{ | |
"SAXMap.from_string" => fn input -> | |
SAXMap.from_string(input) | |
end, | |
"Parser.parse" => fn input -> | |
{:ok, result} = Saxy.SimpleForm.parse_string(input) | |
Parser.parse([result]) | |
end | |
}, | |
time: 10, | |
memory_time: 2, | |
inputs: %{ | |
"SIMPLE XML" => simple, | |
"FEED XML" => feed | |
} | |
) | |
# | |
# Tests with complex files | |
# | |
# SAXMap.from_string can't process large files so I removed it from the tests. | |
# In my current hardware I've awaited over 15 minutes for tests to be completed without response. | |
Benchee.run( | |
%{ | |
"Parser.parse" => fn input -> | |
{:ok, result} = Saxy.SimpleForm.parse_string(input) | |
Parser.parse([result]) | |
end | |
}, | |
time: 10, | |
memory_time: 2, | |
inputs: %{ | |
"COMPLEX" => complex | |
} | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment