Last active
August 29, 2018 12:38
-
-
Save matiskay/3903347aa7da1d928b81 to your computer and use it in GitHub Desktop.
A scrapy pipeline using https://dataset.readthedocs.org/en/latest/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dataset | |
class DatasetPipeline(object): | |
def __init__(self, dataset_uri, dataset_table): | |
self.dataset_uri = dataset_uri | |
self.dataset_table = dataset_table | |
@classmethod | |
def from_crawler(cls, crawler): | |
return cls( | |
dataset_uri=crawler.settings.get('DATASET_URI'), | |
dataset_table=crawler.settings.get('DATASET_TABLE', 'items') | |
) | |
def open_spider(self, spider): | |
self.db = dataset.connect(self.dataset_uri) | |
def close_spider(self, spider): | |
pass | |
def process_item(self, item, spider): | |
self.db[self.dataset_table].insert(item) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
please add a return item to the end of process_item