jstimpfle · April 19, 2017 18:05
diff --git a/htmltagsreplace.py b/htmltagsreplace.py
 """Replace magic HTML tags with the result of function calls.

 For now, only self-closing tags (<TAGNAME .../>) are supported.  This is for
 simplicity and also because otherwise we'd have to decide how to check for
 proper nesting and to handle body arguments (lazy or strict call order, ... ?)

 We don't even try to parse valid HTML here. We're just looking for
 the next occurrence of "<TAGNAME" for any given TAGNAME.
 This is easy to implement and better for performance.

 Unfortunately that also means that things like

  <p someattr="someval" someotherattr="foo <TAGNAME bar="baz"/>">

 get replaced as well, which might be undesirable.
 """

 import io
 import re


 class Invalid(Exception):
    pass


 def htmltagsreplace(fdict, html):
    assert isinstance(fdict, dict)
    assert isinstance(html, str)
    assert all(tagname.isalnum() for tagname in fdict.keys())
    
    if len(fdict) == 0:
        # the tagpat below won't work, so this is a special case
        return html 

    tagpat = r'<(%s)\b' %('|'.join([re.escape(key) for key in fdict.keys()]))
    attrpat = r'\s+([a-zA-Z_][a-zA-Z0-9_-]*)="([^"]*)"'

    out = io.StringIO()
    i = 0
    for m in re.finditer(tagpat, html):
        out.write(html[i:m.start()])
        tagname = m.group(1)
        i = m.end()

        attrs = {}
        while True:
            m = re.match(attrpat, html[i:])
            if m is None:
                break
            key = m.group(1)
            val = m.group(2)  # XXX: want to replace HTML entities?
            if key in attrs:
                raise Invalid('Attribute "%s" used multiple times in magic tag <%s ...>' %(key, tagname))
            attrs[key] = val
            i = i + m.end()

        m = re.match(r'\s*/>', html[i:])
        if m is None:
            raise Invalid('Missing close sequence (/>) in use of magic tag <%s ...>' %(tagname,))
        i = i + m.end()

        result = fdict[tagname](**attrs)
        if not isinstance(result, str):
            raise Invalid('Call to magic tag function <%s ...> did not return a string: %s' %(tagname, result))
        out.write(result)

    if i == 0:
        return html  # silly optimization
    else:
        out.write(html[i:])
        return out.getvalue()


 def testit():
    import datetime
    fdict = {
        'foo': lambda **args: 'FOO' + args['bar'] + str(args.get('baz')),
        'date': lambda: str(datetime.datetime.now())
    }
    result = htmltagsreplace(fdict, """
 <p>abc<foo bar="42" baz="43" /></p>
 <p><foo bar="xyz"/></p>
 <p>Date: <date/></p>
 """)
    print(result)


 if __name__ == '__main__':
    testit()
	"""Replace magic HTML tags with the result of function calls.

	For now, only self-closing tags (<TAGNAME .../>) are supported. This is for
	simplicity and also because otherwise we'd have to decide how to check for
	proper nesting and to handle body arguments (lazy or strict call order, ... ?)

	We don't even try to parse valid HTML here. We're just looking for
	the next occurrence of "<TAGNAME" for any given TAGNAME.
	This is easy to implement and better for performance.

	Unfortunately that also means that things like

	<p someattr="someval" someotherattr="foo <TAGNAME bar="baz"/>">

	get replaced as well, which might be undesirable.
	"""

	import io
	import re


	class Invalid(Exception):
	pass


	def htmltagsreplace(fdict, html):
	assert isinstance(fdict, dict)
	assert isinstance(html, str)
	assert all(tagname.isalnum() for tagname in fdict.keys())

	if len(fdict) == 0:
	# the tagpat below won't work, so this is a special case
	return html

	tagpat = r'<(%s)\b' %('\|'.join([re.escape(key) for key in fdict.keys()]))
	attrpat = r'\s+([a-zA-Z_][a-zA-Z0-9_-])="([^"])"'

	out = io.StringIO()
	i = 0
	for m in re.finditer(tagpat, html):
	out.write(html[i:m.start()])
	tagname = m.group(1)
	i = m.end()

	attrs = {}
	while True:
	m = re.match(attrpat, html[i:])
	if m is None:
	break
	key = m.group(1)
	val = m.group(2) # XXX: want to replace HTML entities?
	if key in attrs:
	raise Invalid('Attribute "%s" used multiple times in magic tag <%s ...>' %(key, tagname))
	attrs[key] = val
	i = i + m.end()

	m = re.match(r'\s*/>', html[i:])
	if m is None:
	raise Invalid('Missing close sequence (/>) in use of magic tag <%s ...>' %(tagname,))
	i = i + m.end()

	result = fdict[tagname](**attrs)
	if not isinstance(result, str):
	raise Invalid('Call to magic tag function <%s ...> did not return a string: %s' %(tagname, result))
	out.write(result)

	if i == 0:
	return html # silly optimization
	else:
	out.write(html[i:])
	return out.getvalue()


	def testit():
	import datetime
	fdict = {
	'foo': lambda **args: 'FOO' + args['bar'] + str(args.get('baz')),
	'date': lambda: str(datetime.datetime.now())
	}
	result = htmltagsreplace(fdict, """
	<p>abc<foo bar="42" baz="43" /></p>
	<p><foo bar="xyz"/></p>
	<p>Date: <date/></p>
	""")
	print(result)


	if __name__ == '__main__':
	testit()