yihyang · August 15, 2016 05:10
diff --git a/scrapy-example.py b/scrapy-example.py
 import scrapy
 import os.path

 class EmailSpider(scrapy.Spider):
    name = "test"
    allowed_domains = ["test.com"]
    start_urls = ['test.com/1', 'test.com/2']

    # the spider will crawl according to the 'start_urls' available
    def parse(self, response):
        # extracting the div (need to be customized according to the target)
        box = response.xpath('//div[@class="target"]')
        # loop through the divs available
        for b in box:
            ab = b.xpath('div[@class="a"]/div[@class="b"]').extract()[0].strip()
            cd = b.xpath('div[@class="c"]/div[@class="d"]').extract()[0].strip()
            # open the file and write
            with open(os.path.abspath('') + '/output/test.txt', 'a') as f:
                f.write(ab + ', ' + cd + '\n')
	import scrapy
	import os.path

	class EmailSpider(scrapy.Spider):
	name = "test"
	allowed_domains = ["test.com"]
	start_urls = ['test.com/1', 'test.com/2']

	# the spider will crawl according to the 'start_urls' available
	def parse(self, response):
	# extracting the div (need to be customized according to the target)
	box = response.xpath('//div[@class="target"]')
	# loop through the divs available
	for b in box:
	ab = b.xpath('div[@class="a"]/div[@class="b"]').extract()[0].strip()
	cd = b.xpath('div[@class="c"]/div[@class="d"]').extract()[0].strip()
	# open the file and write
	with open(os.path.abspath('') + '/output/test.txt', 'a') as f:
	f.write(ab + ', ' + cd + '\n')
No results found