mwicat · August 2, 2013 09:46
diff --git a/lxml_ex.py b/lxml_ex.py
 import json
 from lxml.html import fromstring
 from urllib import urlopen

 def get_tree(page):
    d = urlopen('http://audio.tutsplus.com/category/tutorials/production/page/%d' % page).read()
    h = fromstring(d)
    return h

 def extract_art(head):
    a = head.find('a')
    link = a.get('href')
    text = a.text
    return link, text

 def get_arts(tree):
    arts = [extract_art(head) for head in tree.cssselect('h1.post_title')]
    return arts

 # arts = []
 # for i in range(1, 48):
 #     print i
 #     arts.extend(get_arts(get_tree(i)))

 # json.dump(arts, open('arts.json', 'w'))

 arts = json.load(open('arts.json'))

 def shorten(text):
    return text.replace('Quick Tip:', '').replace('How to', '').strip()

 arts = sorted([[shorten(text), link] for link, text in arts])

 print '<ol>'
 for text, link in arts:
    t = '<li><a href="%s">%s</a></li>' % (link, text)
    print t.encode('utf-8')
 print '</ol>'
	import json
	from lxml.html import fromstring
	from urllib import urlopen

	def get_tree(page):
	d = urlopen('http://audio.tutsplus.com/category/tutorials/production/page/%d' % page).read()
	h = fromstring(d)
	return h

	def extract_art(head):
	a = head.find('a')
	link = a.get('href')
	text = a.text
	return link, text

	def get_arts(tree):
	arts = [extract_art(head) for head in tree.cssselect('h1.post_title')]
	return arts

	# arts = []
	# for i in range(1, 48):
	# print i
	# arts.extend(get_arts(get_tree(i)))

	# json.dump(arts, open('arts.json', 'w'))

	arts = json.load(open('arts.json'))

	def shorten(text):
	return text.replace('Quick Tip:', '').replace('How to', '').strip()

	arts = sorted([[shorten(text), link] for link, text in arts])

	print '<ol>'
	for text, link in arts:
	t = '<li><a href="%s">%s</a></li>' % (link, text)
	print t.encode('utf-8')
	print '</ol>'
No results found