Created
January 8, 2015 07:36
-
-
Save 2shou/befe2b1b4c5eebaadac8 to your computer and use it in GitHub Desktop.
custom item exporter of Scrapy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.contrib.exporter import BaseItemExporter | |
from scrapy import signals, log | |
from pipeline_base import StorePipeline | |
from os.path import join | |
class CustomItemExporter(BaseItemExporter): | |
def __init__(self, file, **kwargs): | |
self._configure(kwargs, dont_fail=True) | |
self.file = file | |
@staticmethod | |
def format_output(item): | |
# add code to generate output you want to | |
pass | |
def export_item(self, item): | |
output = '%s\n' % (sef.format_output(item)) | |
self.file.write(output) | |
class FsLinesPipeline(StorePipeline): | |
def __init__(self, data_path): | |
self.files = {} | |
self.data_path = data_path | |
@classmethod | |
def from_crawler(cls, crawler): | |
settings = crawler.settings | |
pipeline = cls(data_path=settings.get('DATA_PATH')) | |
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) | |
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) | |
return pipeline | |
def spider_opened(self, spider): | |
file = open(join(self.data_path, spider.output_file), 'w+b') | |
self.files[spider] = file | |
self.exporter = DelimitedItemExporter(file) | |
self.exporter.start_exporting() | |
def spider_closed(self, spider): | |
self.exporter.finish_exporting() | |
file = self.files.pop(spider) | |
file.close() | |
def process_item(self, item, spider): | |
self.exporter.export_item(item) | |
return item |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment