Skip to content

Instantly share code, notes, and snippets.

@jeroenbourgois
Created January 23, 2011 16:20
Show Gist options
  • Save jeroenbourgois/792182 to your computer and use it in GitHub Desktop.
Save jeroenbourgois/792182 to your computer and use it in GitHub Desktop.
import lxml
import urllib
from lxml.html import fromstring
from lxml import etree
# vars
url = "http://www.makro.be/Content/assortiment/benzinestation/benzineprijzen/1/index.jsp?stat=0"
# methods
# parse it the xpath way
# -->
def select_n_print_xpath(doc):
elements = doc.xpath('//div[@class="boxContent"]/table/tr[last()]/td[last()]')
for e in elements:
print e.text_content()
# or even better
# throw a css selector at it
# but then you can't use any of the pseudo selectors :(
def select_n_print_css(doc):
elements = doc.cssselect('div.boxContent table tr td')
for e in elements:
print e.text_content()
# get that sweet html!
content = urllib.urlopen(url).read()
# throw it into the doc
doc = fromstring(content)
select_n_print_xpath(doc)
#select_n_print_css(doc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment