YoshihitoAso · February 27, 2014 11:31
diff --git a/gistfile1.py b/gistfile1.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 from pyquery import PyQuery
 import json

 q = PyQuery(url='http://mainichi.jp/select/biz/archive/')
 url = 'http://mainichi.jp/select/news/'

 links = []

 for elem in q.find('a'):
 #PyQuery
    q2 = PyQuery(elem)
    linkurl = str(q2.attr('href'))
    if linkurl.find(url) > -1:
        links.append(q2.attr('href'))

 for link in links:
    print link
    q3 = PyQuery(url=link)

    # title
    ttlelem = q3.find('title')
    q4 = PyQuery(ttlelem)
    #print q4.text()

    text = ""

    # article
    for news_body in q3.find("div.NewsBody"):
        news_body_query = PyQuery(news_body)
        for pelem in news_body_query.find('p'):
            q5 = PyQuery(pelem)
            text_tmp = q5.text()
            text = text + text_tmp

    #print text
    article = {"url":link,"text":text}
    print article
    with open('mainichi.txt','a') as outfile:
        json.dump(article, outfile)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	from pyquery import PyQuery
	import json

	q = PyQuery(url='http://mainichi.jp/select/biz/archive/')
	url = 'http://mainichi.jp/select/news/'

	links = []

	for elem in q.find('a'):
	#PyQuery
	q2 = PyQuery(elem)
	linkurl = str(q2.attr('href'))
	if linkurl.find(url) > -1:
	links.append(q2.attr('href'))

	for link in links:
	print link
	q3 = PyQuery(url=link)

	# title
	ttlelem = q3.find('title')
	q4 = PyQuery(ttlelem)
	#print q4.text()

	text = ""

	# article
	for news_body in q3.find("div.NewsBody"):
	news_body_query = PyQuery(news_body)
	for pelem in news_body_query.find('p'):
	q5 = PyQuery(pelem)
	text_tmp = q5.text()
	text = text + text_tmp

	#print text
	article = {"url":link,"text":text}
	print article
	with open('mainichi.txt','a') as outfile:
	json.dump(article, outfile)