bcdejp · December 27, 2014 08:02
diff --git a/lxml_sample.py b/lxml_sample.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import requests
 import lxml.html

 URL = "http://make.bcde.jp/category/1/"

 #Webページ(HTML)の取得
 req = requests.get(URL)
 root = lxml.html.fromstring(req.text)

 #<a href=""></a>によるリンクを抽出する
 anchors = root.xpath('//a')
 for anchor in anchors:
    print anchor.attrib['href']

 #h1の中身を抽出する
 h1s = root.xpath('//h1')
 for h1 in h1s:
    print h1.text

 #idを指定して、タグに直接囲われたテキストを抽出
 content1 = root.get_element_by_id('content1').text
 print content1

 #idを指定して、タグの中のテキストをすべて抽出
 content = root.get_element_by_id('content').text_content()
 print content
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import requests
	import lxml.html

	URL = "http://make.bcde.jp/category/1/"

	#Webページ(HTML)の取得
	req = requests.get(URL)
	root = lxml.html.fromstring(req.text)

	#<a href=""></a>によるリンクを抽出する
	anchors = root.xpath('//a')
	for anchor in anchors:
	print anchor.attrib['href']

	#h1の中身を抽出する
	h1s = root.xpath('//h1')
	for h1 in h1s:
	print h1.text

	#idを指定して、タグに直接囲われたテキストを抽出
	content1 = root.get_element_by_id('content1').text
	print content1

	#idを指定して、タグの中のテキストをすべて抽出
	content = root.get_element_by_id('content').text_content()
	print content