Skip to content

Instantly share code, notes, and snippets.

@ownport
Created August 23, 2015 12:41
Show Gist options
  • Save ownport/6209982820999f06f817 to your computer and use it in GitHub Desktop.
Save ownport/6209982820999f06f817 to your computer and use it in GitHub Desktop.
Scrapy: Spiders runner
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Scrapy Spiders runner
#
# Based on:
# - http://kirankoduru.github.io/python/multiple-scrapy-spiders.html
# - https://github.com/kirankoduru/scrapy-programmatically
#
# updated by 2015-08-23
#
import scrapy
from twisted.internet import reactor
from scrapy import log, signals
from scrapy.crawler import Crawler
from scrapy.utils.project import get_project_settings
# Spiders that are running
RUNNING_SPIDERS = []
def spider_start(spider):
crawler = Crawler(settings)
# stop reactor when spider closes
crawler.signals.connect(spider_stop, signal=signals.spider_closed)
crawler.configure()
new_spider = crawler.spiders.create(spider)
RUNNING_SPIDERS.append(new_spider)
crawler.crawl(new_spider)
crawler.start()
def spider_stop(spider):
RUNNING_SPIDERS.remove(spider)
if not RUNNING_SPIDERS:
reactor.stop()
if __name__ == '__main__':
log.start()
settings = get_project_settings()
crawler = Crawler(settings)
crawler.configure()
for spider in crawler.spiders.list():
spider_start(spider)
reactor.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment