xflr6 · June 5, 2022 10:11
diff --git a/xpath.py b/xpath.py
 """Use advanced XPath features of lxml (see also scrapy parsel)."""

 from __future__ import annotations

 from typing import Optional
 import urllib.request

 import lxml.etree
 import lxml.html


 def register_xpath(func, *, ns_uri: Optional[str] = None):
    ns = lxml.etree.FunctionNamespace(ns_uri)
    name = func.__name__.replace('_', '-')
    ns[name] = func
    return func


 @register_xpath
 def has_class(context, *args) -> bool:
    cls_contains = 'contains(concat(" ", normalize-space(@class), " "), " {} ")'
    has_cls = ' and '.join(cls_contains.format(cls) for cls in args)
    xpath = f'self::*[@class and {has_cls}]'
    return bool(context.context_node.xpath(xpath))


 class XpathEval(lxml.etree.XPathDocumentEvaluator):

    _defaults = {'namespaces': {'set': 'http://exslt.org/sets',
                                're': 'http://exslt.org/regular-expressions'},
                 'smart_strings': False}

    @classmethod
    def from_url(cls, url: str, *, xml: bool = False, **kwargs) -> XpathEval:
        parse_func = lxml.xml.parse if xml else lxml.html.parse
        with urllib.request.urlopen(url) as f:
            tree = parse_func(f)
        return cls(tree, **kwargs)

    def __init__(self, tree: lxml.etree._ElementTree,
                 *args, **kwargs) -> None:
        for kw, value in self._defaults.items():
            kwargs.setdefault(kw, value)
        super().__init__(tree, *args, **kwargs)
        self.tree = tree


 if __name__ == '__main__':
    url = 'https://en.wikipedia.org/wiki/Category:Monty_Python_songs'

    xpath = ('//div[has-class("mw-category")]'
             '//h3[re:test(., "^[EI]$")]'
             '/following-sibling::ul/li/a/text()')

    e = XpathEval.from_url(url)

    print(e(xpath))
	"""Use advanced XPath features of lxml (see also scrapy parsel)."""

	from __future__ import annotations

	from typing import Optional
	import urllib.request

	import lxml.etree
	import lxml.html


	def register_xpath(func, *, ns_uri: Optional[str] = None):
	ns = lxml.etree.FunctionNamespace(ns_uri)
	name = func.__name__.replace('_', '-')
	ns[name] = func
	return func


	@register_xpath
	def has_class(context, *args) -> bool:
	cls_contains = 'contains(concat(" ", normalize-space(@class), " "), " {} ")'
	has_cls = ' and '.join(cls_contains.format(cls) for cls in args)
	xpath = f'self::*[@class and {has_cls}]'
	return bool(context.context_node.xpath(xpath))


	class XpathEval(lxml.etree.XPathDocumentEvaluator):

	_defaults = {'namespaces': {'set': 'http://exslt.org/sets',
	're': 'http://exslt.org/regular-expressions'},
	'smart_strings': False}

	@classmethod
	def from_url(cls, url: str, , xml: bool = False, *kwargs) -> XpathEval:
	parse_func = lxml.xml.parse if xml else lxml.html.parse
	with urllib.request.urlopen(url) as f:
	tree = parse_func(f)
	return cls(tree, **kwargs)

	def __init__(self, tree: lxml.etree._ElementTree,
	args, *kwargs) -> None:
	for kw, value in self._defaults.items():
	kwargs.setdefault(kw, value)
	super().__init__(tree, args, *kwargs)
	self.tree = tree


	if __name__ == '__main__':
	url = 'https://en.wikipedia.org/wiki/Category:Monty_Python_songs'

	xpath = ('//div[has-class("mw-category")]'
	'//h3[re:test(., "^[EI]$")]'
	'/following-sibling::ul/li/a/text()')

	e = XpathEval.from_url(url)

	print(e(xpath))