582033 · August 9, 2016 09:33 · 582033 · Aug 10, 2016
diff --git a/python b/python
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import scrapy
 from scrapy.spider import Spider#, Rule
 from itjuzi.items import ItjuziItem

 from itjuzi.settings import *


 class spider(Spider):
    name = "itjuzi"
    allow_domains = [
        "www.itjuzi.com"
    ]
    start_urls = [
        "http://www.itjuzi.com/company?page=1"
    ]

    def parse(self, response):
        if response.status == 200:
            for sel in response.xpath('//p[@class="title"]/a/span/text()').extract():
                item = ItjuziItem()
                item['company_name'] = sel
                yield item

            next_page_xpath = response.xpath('//div[contains(@class, "ui-pagechange")]/a')
            for i in next_page_xpath:
                if i.xpath('text()').re(u'.*下一页.*'):
                    next_page = i.xpath('@href').extract()[0]
            if next_page:
                print "next_page: %s" % next_page
                yield scrapy.Request(next_page, callback='parse')
	#!/usr/bin/env python
	# -- coding: utf-8 --
	import scrapy
	from scrapy.spider import Spider#, Rule
	from itjuzi.items import ItjuziItem

	from itjuzi.settings import *


	class spider(Spider):
	name = "itjuzi"
	allow_domains = [
	"www.itjuzi.com"
	]
	start_urls = [
	"http://www.itjuzi.com/company?page=1"
	]

	def parse(self, response):
	if response.status == 200:
	for sel in response.xpath('//p[@class="title"]/a/span/text()').extract():
	item = ItjuziItem()
	item['company_name'] = sel
	yield item

	next_page_xpath = response.xpath('//div[contains(@class, "ui-pagechange")]/a')
	for i in next_page_xpath:
	if i.xpath('text()').re(u'.下一页.'):
	next_page = i.xpath('@href').extract()[0]
	if next_page:
	print "next_page: %s" % next_page
	yield scrapy.Request(next_page, callback='parse')
No results found