Skip to content

Instantly share code, notes, and snippets.

@clemfromspace
Created November 24, 2017 04:34
Show Gist options
  • Save clemfromspace/2edb88a79de3d6dde0d93c68354db385 to your computer and use it in GitHub Desktop.
Save clemfromspace/2edb88a79de3d6dde0d93c68354db385 to your computer and use it in GitHub Desktop.
Running a scrapy spider from a celery task
from billiard.context import Process
from scrapy.crawler import Crawler
from scrapy import signals
from scrapy.utils.project import get_project_settings
from twisted.internet import reactor
from celery_app import app
class CrawlerProcess(Process):
def __init__(self, spider):
Process.__init__(self)
settings = get_project_settings()
self.crawler = Crawler(spider.__class__, settings)
self.crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
self.spider = spider
def run(self):
self.crawler.crawl(self.spider)
reactor.run()
@app.task
def crawl(spider_klass, *args, **kwargs):
spider = spider_klass(*args, **kwargs)
crawler = CrawlerProcess(spider)
crawler.start()
crawler.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment