Skip to content

Instantly share code, notes, and snippets.

@Irio
Created April 14, 2017 12:22
Show Gist options
  • Save Irio/79512e14671e7e2c644943718933364d to your computer and use it in GitHub Desktop.
Save Irio/79512e14671e7e2c644943718933364d to your computer and use it in GitHub Desktop.
from scrapy import project, signals
from scrapy.conf import settings
from scrapy.crawler import CrawlerProcess
from scrapy.xlib.pydispatch import dispatcher
from multiprocessing.queues import Queue
import multiprocessing
class CrawlerWorker(multiprocessing.Process):
def __init__(self, spider, result_queue):
multiprocessing.Process.__init__(self)
self.result_queue = result_queue
self.crawler = CrawlerProcess(settings)
if not hasattr(project, 'crawler'):
self.crawler.install()
self.crawler.configure()
self.items = []
self.spider = spider
dispatcher.connect(self._item_passed, signals.item_passed)
def _item_passed(self, item):
self.items.append(item)
def run(self):
self.crawler.crawl(self.spider)
self.crawler.start()
self.crawler.stop()
self.result_queue.put(self.items)
result_queue = Queue()
crawler = CrawlerWorker(MySpider(myArgs), result_queue)
crawler.start()
for item in result_queue.get():
yield item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment