titanjer · February 16, 2015 12:09
diff --git a/gistfile1.py b/gistfile1.py
 from scrapy.contrib.spiders import CrawlSpider
 from scraper.items import ProductItem
 from scrapy import log

 import re
 import json

 # curl http://192.168.1.100:6800/schedule.json -d project=scraper -d spider=importjl -d setting=STORE=24 -d setting=RQ_QUEUE=scraper

 class ImportjlSpider(CrawlSpider):
    name = 'importjl'
    start_urls = [
        # 'http://10.8.0.1:6800/items/scraper/dwidigitalcameras.jl'
    ]

    def parse(self, response):
        self.log('> %s' % response.body, level=log.INFO)
        for j in response.body.split('\n'):
            self.log('> %s' % j, level=log.INFO)
            item = ProductItem(json.loads(j))

            # item['description'] = \
            #     item['description'].encode('ascii', 'ignore') \
            #                        .decode('utf-8') \
            #     if item['description'] else ''

            # item['product_spec'] = \
            #     item['product_spec'].encode('ascii', 'ignore') \
            #                         .decode('utf-8') \
            #     if item['product_spec'] else ''

            item['description'] = re.sub(r'\n+', '\n', item['description'])
            self.log('> %s' % item['product_number'], level=log.INFO)

            yield item
	from scrapy.contrib.spiders import CrawlSpider
	from scraper.items import ProductItem
	from scrapy import log

	import re
	import json

	# curl http://192.168.1.100:6800/schedule.json -d project=scraper -d spider=importjl -d setting=STORE=24 -d setting=RQ_QUEUE=scraper

	class ImportjlSpider(CrawlSpider):
	name = 'importjl'
	start_urls = [
	# 'http://10.8.0.1:6800/items/scraper/dwidigitalcameras.jl'
	]

	def parse(self, response):
	self.log('> %s' % response.body, level=log.INFO)
	for j in response.body.split('\n'):
	self.log('> %s' % j, level=log.INFO)
	item = ProductItem(json.loads(j))

	# item['description'] = \
	# item['description'].encode('ascii', 'ignore') \
	# .decode('utf-8') \
	# if item['description'] else ''

	# item['product_spec'] = \
	# item['product_spec'].encode('ascii', 'ignore') \
	# .decode('utf-8') \
	# if item['product_spec'] else ''

	item['description'] = re.sub(r'\n+', '\n', item['description'])
	self.log('> %s' % item['product_number'], level=log.INFO)

	yield item