Skip to content

Instantly share code, notes, and snippets.

@matiskay
Last active August 29, 2018 12:38
Show Gist options
  • Save matiskay/3903347aa7da1d928b81 to your computer and use it in GitHub Desktop.
Save matiskay/3903347aa7da1d928b81 to your computer and use it in GitHub Desktop.
import dataset
class DatasetPipeline(object):
def __init__(self, dataset_uri, dataset_table):
self.dataset_uri = dataset_uri
self.dataset_table = dataset_table
@classmethod
def from_crawler(cls, crawler):
return cls(
dataset_uri=crawler.settings.get('DATASET_URI'),
dataset_table=crawler.settings.get('DATASET_TABLE', 'items')
)
def open_spider(self, spider):
self.db = dataset.connect(self.dataset_uri)
def close_spider(self, spider):
pass
def process_item(self, item, spider):
self.db[self.dataset_table].insert(item)
@strubbi77
Copy link

please add a return item to the end of process_item

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment