Skip to content

Instantly share code, notes, and snippets.

@2shou
Created January 8, 2015 07:36
Show Gist options
  • Save 2shou/befe2b1b4c5eebaadac8 to your computer and use it in GitHub Desktop.
Save 2shou/befe2b1b4c5eebaadac8 to your computer and use it in GitHub Desktop.
custom item exporter of Scrapy
from scrapy.contrib.exporter import BaseItemExporter
from scrapy import signals, log
from pipeline_base import StorePipeline
from os.path import join
class CustomItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
@staticmethod
def format_output(item):
# add code to generate output you want to
pass
def export_item(self, item):
output = '%s\n' % (sef.format_output(item))
self.file.write(output)
class FsLinesPipeline(StorePipeline):
def __init__(self, data_path):
self.files = {}
self.data_path = data_path
@classmethod
def from_crawler(cls, crawler):
settings = crawler.settings
pipeline = cls(data_path=settings.get('DATA_PATH'))
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open(join(self.data_path, spider.output_file), 'w+b')
self.files[spider] = file
self.exporter = DelimitedItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment