|
"""Replace magic HTML tags with the result of function calls. |
|
|
|
For now, only self-closing tags (<TAGNAME .../>) are supported. This is for |
|
simplicity and also because otherwise we'd have to decide how to check for |
|
proper nesting and to handle body arguments (lazy or strict call order, ... ?) |
|
|
|
We don't even try to parse valid HTML here. We're just looking for |
|
the next occurrence of "<TAGNAME" for any given TAGNAME. |
|
This is easy to implement and better for performance. |
|
|
|
Unfortunately that also means that things like |
|
|
|
<p someattr="someval" someotherattr="foo <TAGNAME bar="baz"/>"> |
|
|
|
get replaced as well, which might be undesirable. |
|
""" |
|
|
|
import io |
|
import re |
|
|
|
|
|
class Invalid(Exception): |
|
pass |
|
|
|
|
|
def htmltagsreplace(fdict, html): |
|
assert isinstance(fdict, dict) |
|
assert isinstance(html, str) |
|
assert all(tagname.isalnum() for tagname in fdict.keys()) |
|
|
|
if len(fdict) == 0: |
|
# the tagpat below won't work, so this is a special case |
|
return html |
|
|
|
tagpat = r'<(%s)\b' %('|'.join([re.escape(key) for key in fdict.keys()])) |
|
attrpat = r'\s+([a-zA-Z_][a-zA-Z0-9_-]*)="([^"]*)"' |
|
|
|
out = io.StringIO() |
|
i = 0 |
|
for m in re.finditer(tagpat, html): |
|
out.write(html[i:m.start()]) |
|
tagname = m.group(1) |
|
i = m.end() |
|
|
|
attrs = {} |
|
while True: |
|
m = re.match(attrpat, html[i:]) |
|
if m is None: |
|
break |
|
key = m.group(1) |
|
val = m.group(2) # XXX: want to replace HTML entities? |
|
if key in attrs: |
|
raise Invalid('Attribute "%s" used multiple times in magic tag <%s ...>' %(key, tagname)) |
|
attrs[key] = val |
|
i = i + m.end() |
|
|
|
m = re.match(r'\s*/>', html[i:]) |
|
if m is None: |
|
raise Invalid('Missing close sequence (/>) in use of magic tag <%s ...>' %(tagname,)) |
|
i = i + m.end() |
|
|
|
result = fdict[tagname](**attrs) |
|
if not isinstance(result, str): |
|
raise Invalid('Call to magic tag function <%s ...> did not return a string: %s' %(tagname, result)) |
|
out.write(result) |
|
|
|
if i == 0: |
|
return html # silly optimization |
|
else: |
|
out.write(html[i:]) |
|
return out.getvalue() |
|
|
|
|
|
def testit(): |
|
import datetime |
|
fdict = { |
|
'foo': lambda **args: 'FOO' + args['bar'] + str(args.get('baz')), |
|
'date': lambda: str(datetime.datetime.now()) |
|
} |
|
result = htmltagsreplace(fdict, """ |
|
<p>abc<foo bar="42" baz="43" /></p> |
|
<p><foo bar="xyz"/></p> |
|
<p>Date: <date/></p> |
|
""") |
|
print(result) |
|
|
|
|
|
if __name__ == '__main__': |
|
testit() |