Last active
November 25, 2023 20:09
-
-
Save miikka/30d996ed52d0e5854af9 to your computer and use it in GitHub Desktop.
Using Hypothesis to generate XML based on RELAX-NG
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
from lxml import etree | |
from lxml.etree import QName | |
from lxml.builder import E | |
from hypothesis import strategies as st | |
NS = "http://relaxng.org/ns/structure/1.0" | |
def pairsum(pairs): | |
"""Concatenate list of pairs of lists.""" | |
return (sum([x[0] for x in pairs], []), sum([x[1] for x in pairs], [])) | |
def parse_grammar(root): | |
start = None | |
defs = {} | |
for child in root.getchildren(): | |
assert len(child) == 1 | |
if child.tag == QName(NS, 'start'): | |
assert start is None | |
start = child[0] | |
elif child.tag == QName(NS, 'define'): | |
name = child.get('name') | |
assert name not in defs | |
defs[name] = child[0] | |
return (start, defs) | |
@st.composite | |
def gen_children(draw, root, defs): | |
attrs = [] | |
children = [] | |
for child_tag in root.getchildren(): | |
if child_tag.tag is etree.Comment: | |
continue | |
attr, child = draw(trees(child_tag, defs)) | |
attrs.extend(attr) | |
children.extend(child) | |
return (attrs, children) | |
@st.composite | |
def attributes(draw, root, defs): | |
name = root.get('name') | |
value = None | |
def draw_text(): | |
return draw(st.text(alphabet=string.ascii_letters, min_size=1)) | |
for child_tag in root.getchildren(): | |
if child_tag.tag is etree.Comment: | |
continue | |
if child_tag.tag == QName(NS, 'name'): | |
name = child_tag.text | |
elif child_tag.tag == QName(NS, 'anyName'): | |
name = draw_text() | |
elif child_tag.tag == QName(NS, 'text'): | |
value = draw_text() | |
elif child_tag.tag == QName(NS, 'choice'): | |
values = [] | |
for value_tag in child_tag.getchildren(): | |
if value_tag.tag is etree.Comment: | |
continue | |
values.append(value_tag.text) | |
value = draw(st.sampled_from(values)) | |
else: | |
raise Exception( | |
'How to attribute {}?'.format(etree.tostring(root)) | |
) | |
# No value is same as <text/> | |
if value is None: | |
value = draw_text() | |
assert name is not None | |
return (name, value) | |
@st.composite | |
def trees(draw, root, defs): | |
if root.tag == QName(NS, 'element'): | |
attrs, children = draw(gen_children(root, defs)) | |
return ([], [E(root.get('name'), *children, dict(attrs))]) | |
elif root.tag == QName(NS, 'optional'): | |
return pairsum(draw(st.lists(gen_children(root, defs), max_size=1))) | |
elif root.tag == QName(NS, 'zeroOrMore'): | |
return pairsum(draw(st.lists(gen_children(root, defs), | |
average_size=2))) | |
elif root.tag == QName(NS, 'oneOrMore'): | |
return pairsum(draw(st.lists(gen_children(root, defs), min_size=1, | |
average_size=2))) | |
elif root.tag == QName(NS, 'choice'): | |
# XXX(miikka) getchildren does not ignore comments | |
return draw(st.one_of(*[trees(x, defs) for x in root.getchildren()])) | |
elif root.tag == QName(NS, 'interleave'): | |
# XXX(miikka) Not 100% if shuffling is valid way to implement | |
# <interleave>. Maybe I should write a randomized test for this... | |
attrs, children = draw(gen_children(root, defs)) | |
draw(st.randoms()).shuffle(children) | |
return (attrs, children) | |
elif root.tag == QName(NS, 'group'): | |
return draw(gen_children(root, defs)) | |
elif root.tag == QName(NS, 'ref'): | |
return draw(gen_children(defs[root.get('name')], defs)) | |
elif root.tag == QName(NS, 'text'): | |
# XXX(miikka) Should generate more than just ASCII letters! | |
return ([], [draw(st.text(alphabet=string.ascii_letters))]) | |
elif root.tag == QName(NS, 'empty'): | |
return ([], []) | |
elif root.tag == QName(NS, 'attribute'): | |
attr = draw(attributes(root, defs)) | |
return ([attr], []) | |
else: | |
raise Exception('What to do with {}?'.format(etree.tostring(root))) | |
@st.composite | |
def xml(draw, root): | |
if root.tag == QName(NS, 'grammar'): | |
start, defs = parse_grammar(root) | |
else: | |
start = root | |
defs = {} | |
attrs, objs = draw(trees(start, defs)) | |
assert attrs == [] | |
assert len(objs) == 1 | |
return objs[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from hypothesis import given | |
from lxml import etree | |
import pytest | |
import schema | |
A_SCHEMA = """ | |
<element name="root" xmlns="http://relaxng.org/ns/structure/1.0"> | |
<element name="a"><empty /></element> | |
</element> | |
""" | |
A_SCHEMA = etree.fromstring(A_SCHEMA) | |
def test_simple_schema(): | |
tree = schema.xml(A_SCHEMA).example() | |
assert tree.tag == 'root' | |
children = tree.getchildren() | |
assert len(children) == 1 | |
assert children[0].tag == 'a' | |
B_SCHEMA = """ | |
<element name="root" xmlns="http://relaxng.org/ns/structure/1.0"> | |
<element name="a"> | |
<text/> | |
</element> | |
<zeroOrMore> | |
<element name="b"><empty/></element> | |
</zeroOrMore> | |
<optional> | |
<element name="c"><empty/></element> | |
</optional> | |
<choice> | |
<element name="d"><empty/></element> | |
<group> | |
<element name="e"><empty/></element> | |
<element name="f"><empty/></element> | |
</group> | |
</choice> | |
</element> | |
""" | |
B_SCHEMA = etree.fromstring(B_SCHEMA) | |
@given(schema.xml(B_SCHEMA)) | |
def test_complex_schema(tree): | |
rng = etree.RelaxNG(B_SCHEMA) | |
assert rng.validate(tree) | |
ATTR_SCHEMA = """ | |
<element name="root" xmlns="http://relaxng.org/ns/structure/1.0"> | |
<attribute name="a"><text/></attribute> | |
</element> | |
""" | |
ATTR_SCHEMA = etree.fromstring(ATTR_SCHEMA) | |
@given(schema.xml(ATTR_SCHEMA)) | |
def test_attributes(tree): | |
assert tree.get('a') is not None | |
GRAMMAR_SCHEMA = etree.fromstring(""" | |
<grammar xmlns="http://relaxng.org/ns/structure/1.0"> | |
<start><element name="foo"><empty/></element></start> | |
<define name="bar"> | |
<element name="bar"><empty/></element> | |
</define> | |
</grammar> | |
""") | |
@given(schema.xml(GRAMMAR_SCHEMA)) | |
def test_grammar(tree): | |
rng = etree.RelaxNG(GRAMMAR_SCHEMA) | |
assert rng.validate(tree) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment