-
-
Save Jeffallan/33e6511e7e82ada5e537f95cb70af903 to your computer and use it in GitHub Desktop.
Scrapy MySQL pipeline. Just a mirror to the asynchronous MySQL pipeline.
Copy-paste it directly to pipelines.py. Database credentials are stored in settings.py. Based on http://snipplr.com/view/66986/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import MySQLdb.cursors | |
from twisted.enterprise import adbapi | |
from scrapy.xlib.pydispatch import dispatcher | |
from scrapy import signals | |
from scrapy.utils.project import get_project_settings | |
from scrapy import log | |
SETTINGS = get_project_settings() | |
class MySQLPipeline(object): | |
@classmethod | |
def from_crawler(cls, crawler): | |
return cls(crawler.stats) | |
def __init__(self, stats): | |
#Instantiate DB | |
self.dbpool = adbapi.ConnectionPool ('MySQLdb', | |
host=SETTINGS['DB_HOST'], | |
user=SETTINGS['DB_USER'], | |
passwd=SETTINGS['DB_PASSWD'], | |
port=SETTINGS['DB_PORT'], | |
db=SETTINGS['DB_DB'], | |
charset='utf8', | |
use_unicode = True, | |
cursorclass=MySQLdb.cursors.DictCursor | |
) | |
self.stats = stats | |
dispatcher.connect(self.spider_closed, signals.spider_closed) | |
def spider_closed(self, spider): | |
""" Cleanup function, called after crawing has finished to close open | |
objects. | |
Close ConnectionPool. """ | |
self.dbpool.close() | |
def process_item(self, item, spider): | |
query = self.dbpool.runInteraction(self._insert_record, item) | |
query.addErrback(self._handle_error) | |
return item | |
def _insert_record(self, tx, item): | |
result = tx.execute( | |
""" INSERT INTO table VALUES (1,2,3)""" | |
) | |
if result > 0: | |
self.stats.inc_value('database/items_added') | |
def _handle_error(self, e): | |
log.err(e) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Add these on your settings.py file | |
#Database settings | |
DB_HOST = 'localhost' | |
DB_PORT = 3306 | |
DB_USER = 'user' | |
DB_PASSWD = 'password' | |
DB_DB = 'database' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment