Skip to content

Instantly share code, notes, and snippets.

@jbpotonnier
Created June 5, 2012 21:10
Show Gist options
  • Save jbpotonnier/2877976 to your computer and use it in GitHub Desktop.
Save jbpotonnier/2877976 to your computer and use it in GitHub Desktop.
Paths in ul-li
bin
local
include
lib
man
.Python
*.pyc
*.swp
<ul>
<li>Animals
<ul>
<li>Birds</li>
<li>Mammals
<ul>
<li>Elephant</li>
<li><a href="#">Mouse</a></li>
</ul>
</li>
<li>Reptiles</li>
</ul>
</li>
<li>Plants
<ul>
<li>Flowers
<ul>
<li>Rose</li>
<li>Tulip</li>
</ul>
</li>
<li>Trees</li>
</ul>
</li>
</ul>
PyZen==0.3.2
lxml==2.3.4
nose==1.1.2
rednose==0.3.3
from lxml import etree
from nose.tools import assert_equal
from tree import tree_paths
def test_should_find_tree_paths():
with open('animals.html') as f:
tree = etree.parse(f)
paths = tree_paths(tree)
assert_equal([
'/Animals',
'/Animals/Birds',
'/Animals/Mammals',
'/Animals/Mammals/Elephant',
'/Animals/Mammals/Mouse',
'/Animals/Reptiles',
'/Plants',
'/Plants/Flowers',
'/Plants/Flowers/Rose',
'/Plants/Flowers/Tulip',
'/Plants/Trees'
],
paths.keys())
element = paths['/Animals/Mammals/Mouse']
assert_equal('a', element.tag)
from collections import OrderedDict
def path_to(elem):
ancestors_having_text = elem.xpath('./ancestor-or-self::*[boolean(normalize-space(text()))]')
return [e.text.strip() for e in ancestors_having_text ]
def path_as_string(path):
return '/' + '/'.join(path)
def tree_paths(tree):
elements_having_text = tree.xpath('.//*[boolean(normalize-space(text()))]')
return OrderedDict([ (path_as_string(path_to(l)), l) for l in elements_having_text ])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment