-
-
Save juanriaza/c5c6ba77b00c68d41f77 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""XPath extension functions for lxml, inspired by: | |
https://gist.github.com/shirk3y/458224083ce5464627bc | |
Usage: | |
import xpathfuncs; xpathfuncs.setup() | |
""" | |
import string | |
from lxml import etree | |
CLASS_EXPR = "contains(concat(' ', normalize-space(@class), ' '), ' {} ')" | |
ICONTAINS_EXPR = "contains(translate({}, %r, %r), {})" % (string.uppercase, string.lowercase) | |
FUNCTIONS = {} | |
def register(func): | |
FUNCTIONS[func.__name__.replace('_', '-')] = func | |
return func | |
def setup(): | |
# Register custom xpath functions. | |
ns = etree.FunctionNamespace(None) | |
for name, func in FUNCTIONS.items(): | |
ns[name] = func | |
@register | |
def has_class(context, *classes): | |
""" | |
This lxml extension allows to select by CSS class more easily | |
>>> ns = etree.FunctionNamespace(None) | |
>>> ns['has-class'] = has_class | |
>>> root = etree.XML(''' | |
... <a> | |
... <b class="one first text">I</b> | |
... <b class="two text">LOVE</b> | |
... <b class="three text">CSS</b> | |
... </a> | |
... ''') | |
>>> len(root.xpath('//b[has-class("text")]')) | |
3 | |
>>> len(root.xpath('//b[has-class("one")]')) | |
1 | |
>>> len(root.xpath('//b[has-class("text", "first")]')) | |
1 | |
>>> len(root.xpath('//b[not(has-class("first"))]')) | |
2 | |
>>> len(root.xpath('//b[has-class("not-exists")]')) | |
0 | |
""" | |
expressions = ' and '.join([CLASS_EXPR.format(c) for c in classes]) | |
xpath = 'self::*[@class and {}]'.format(expressions) | |
return bool(context.context_node.xpath(xpath)) | |
@register | |
def lower_case(context, s): | |
"""Naive lower case function. | |
>>> ns = etree.FunctionNamespace(None) | |
>>> ns['lower-case'] = lower_case | |
>>> root = etree.XML('<root/>') | |
>>> root.xpath('lower-case("FOO")') | |
'foo' | |
""" | |
return s.lower() | |
@register | |
def upper_case(context, s): | |
"""Naive lower case function. | |
>>> ns = etree.FunctionNamespace(None) | |
>>> ns['upper-case'] = upper_case | |
>>> root = etree.XML('<root/>') | |
>>> root.xpath('upper-case("foo")') | |
'FOO' | |
""" | |
return s.upper() | |
@register | |
def icontains(context, value, text): | |
"""Like contains but ignores case. | |
>>> ns = etree.FunctionNamespace(None) | |
>>> ns['icontains'] = icontains | |
>>> root = etree.XML('<root><a>foo</a><a>\\'BAR\\'</a></root>') | |
>>> root.xpath('icontains("FoOo","foO")') | |
True | |
>>> root.xpath('icontains(.,"thing")') | |
False | |
>>> root.xpath('icontains(.,"FOO")') | |
True | |
>>> root.xpath('icontains(./a,"bar")') | |
True | |
>>> root.xpath('icontains(./a/text(),"FOO")') | |
True | |
>>> root.xpath('icontains(normalize-space(a),"F\\'OO")') | |
False | |
""" | |
if isinstance(value, list): # list of Element's | |
return any(icontains(context, val, text) for val in value) | |
elif isinstance(value, basestring): | |
expr = ICONTAINS_EXPR.format(repr(value.lower()), repr(text.lower())) | |
return bool(context.context_node.xpath(expr)) | |
else: # assume element | |
expr = ICONTAINS_EXPR.format(".", repr(text.lower())) | |
return bool(value.xpath(expr)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment