Skip to content

Instantly share code, notes, and snippets.

@miikka
Last active November 25, 2023 20:09
Show Gist options
  • Save miikka/30d996ed52d0e5854af9 to your computer and use it in GitHub Desktop.
Save miikka/30d996ed52d0e5854af9 to your computer and use it in GitHub Desktop.
Using Hypothesis to generate XML based on RELAX-NG
import string
from lxml import etree
from lxml.etree import QName
from lxml.builder import E
from hypothesis import strategies as st
NS = "http://relaxng.org/ns/structure/1.0"
def pairsum(pairs):
"""Concatenate list of pairs of lists."""
return (sum([x[0] for x in pairs], []), sum([x[1] for x in pairs], []))
def parse_grammar(root):
start = None
defs = {}
for child in root.getchildren():
assert len(child) == 1
if child.tag == QName(NS, 'start'):
assert start is None
start = child[0]
elif child.tag == QName(NS, 'define'):
name = child.get('name')
assert name not in defs
defs[name] = child[0]
return (start, defs)
@st.composite
def gen_children(draw, root, defs):
attrs = []
children = []
for child_tag in root.getchildren():
if child_tag.tag is etree.Comment:
continue
attr, child = draw(trees(child_tag, defs))
attrs.extend(attr)
children.extend(child)
return (attrs, children)
@st.composite
def attributes(draw, root, defs):
name = root.get('name')
value = None
def draw_text():
return draw(st.text(alphabet=string.ascii_letters, min_size=1))
for child_tag in root.getchildren():
if child_tag.tag is etree.Comment:
continue
if child_tag.tag == QName(NS, 'name'):
name = child_tag.text
elif child_tag.tag == QName(NS, 'anyName'):
name = draw_text()
elif child_tag.tag == QName(NS, 'text'):
value = draw_text()
elif child_tag.tag == QName(NS, 'choice'):
values = []
for value_tag in child_tag.getchildren():
if value_tag.tag is etree.Comment:
continue
values.append(value_tag.text)
value = draw(st.sampled_from(values))
else:
raise Exception(
'How to attribute {}?'.format(etree.tostring(root))
)
# No value is same as <text/>
if value is None:
value = draw_text()
assert name is not None
return (name, value)
@st.composite
def trees(draw, root, defs):
if root.tag == QName(NS, 'element'):
attrs, children = draw(gen_children(root, defs))
return ([], [E(root.get('name'), *children, dict(attrs))])
elif root.tag == QName(NS, 'optional'):
return pairsum(draw(st.lists(gen_children(root, defs), max_size=1)))
elif root.tag == QName(NS, 'zeroOrMore'):
return pairsum(draw(st.lists(gen_children(root, defs),
average_size=2)))
elif root.tag == QName(NS, 'oneOrMore'):
return pairsum(draw(st.lists(gen_children(root, defs), min_size=1,
average_size=2)))
elif root.tag == QName(NS, 'choice'):
# XXX(miikka) getchildren does not ignore comments
return draw(st.one_of(*[trees(x, defs) for x in root.getchildren()]))
elif root.tag == QName(NS, 'interleave'):
# XXX(miikka) Not 100% if shuffling is valid way to implement
# <interleave>. Maybe I should write a randomized test for this...
attrs, children = draw(gen_children(root, defs))
draw(st.randoms()).shuffle(children)
return (attrs, children)
elif root.tag == QName(NS, 'group'):
return draw(gen_children(root, defs))
elif root.tag == QName(NS, 'ref'):
return draw(gen_children(defs[root.get('name')], defs))
elif root.tag == QName(NS, 'text'):
# XXX(miikka) Should generate more than just ASCII letters!
return ([], [draw(st.text(alphabet=string.ascii_letters))])
elif root.tag == QName(NS, 'empty'):
return ([], [])
elif root.tag == QName(NS, 'attribute'):
attr = draw(attributes(root, defs))
return ([attr], [])
else:
raise Exception('What to do with {}?'.format(etree.tostring(root)))
@st.composite
def xml(draw, root):
if root.tag == QName(NS, 'grammar'):
start, defs = parse_grammar(root)
else:
start = root
defs = {}
attrs, objs = draw(trees(start, defs))
assert attrs == []
assert len(objs) == 1
return objs[0]
from hypothesis import given
from lxml import etree
import pytest
import schema
A_SCHEMA = """
<element name="root" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="a"><empty /></element>
</element>
"""
A_SCHEMA = etree.fromstring(A_SCHEMA)
def test_simple_schema():
tree = schema.xml(A_SCHEMA).example()
assert tree.tag == 'root'
children = tree.getchildren()
assert len(children) == 1
assert children[0].tag == 'a'
B_SCHEMA = """
<element name="root" xmlns="http://relaxng.org/ns/structure/1.0">
<element name="a">
<text/>
</element>
<zeroOrMore>
<element name="b"><empty/></element>
</zeroOrMore>
<optional>
<element name="c"><empty/></element>
</optional>
<choice>
<element name="d"><empty/></element>
<group>
<element name="e"><empty/></element>
<element name="f"><empty/></element>
</group>
</choice>
</element>
"""
B_SCHEMA = etree.fromstring(B_SCHEMA)
@given(schema.xml(B_SCHEMA))
def test_complex_schema(tree):
rng = etree.RelaxNG(B_SCHEMA)
assert rng.validate(tree)
ATTR_SCHEMA = """
<element name="root" xmlns="http://relaxng.org/ns/structure/1.0">
<attribute name="a"><text/></attribute>
</element>
"""
ATTR_SCHEMA = etree.fromstring(ATTR_SCHEMA)
@given(schema.xml(ATTR_SCHEMA))
def test_attributes(tree):
assert tree.get('a') is not None
GRAMMAR_SCHEMA = etree.fromstring("""
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
<start><element name="foo"><empty/></element></start>
<define name="bar">
<element name="bar"><empty/></element>
</define>
</grammar>
""")
@given(schema.xml(GRAMMAR_SCHEMA))
def test_grammar(tree):
rng = etree.RelaxNG(GRAMMAR_SCHEMA)
assert rng.validate(tree)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment