Created
August 23, 2015 12:41
-
-
Save ownport/6209982820999f06f817 to your computer and use it in GitHub Desktop.
Scrapy: Spiders runner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# Scrapy Spiders runner | |
# | |
# Based on: | |
# - http://kirankoduru.github.io/python/multiple-scrapy-spiders.html | |
# - https://github.com/kirankoduru/scrapy-programmatically | |
# | |
# updated by 2015-08-23 | |
# | |
import scrapy | |
from twisted.internet import reactor | |
from scrapy import log, signals | |
from scrapy.crawler import Crawler | |
from scrapy.utils.project import get_project_settings | |
# Spiders that are running | |
RUNNING_SPIDERS = [] | |
def spider_start(spider): | |
crawler = Crawler(settings) | |
# stop reactor when spider closes | |
crawler.signals.connect(spider_stop, signal=signals.spider_closed) | |
crawler.configure() | |
new_spider = crawler.spiders.create(spider) | |
RUNNING_SPIDERS.append(new_spider) | |
crawler.crawl(new_spider) | |
crawler.start() | |
def spider_stop(spider): | |
RUNNING_SPIDERS.remove(spider) | |
if not RUNNING_SPIDERS: | |
reactor.stop() | |
if __name__ == '__main__': | |
log.start() | |
settings = get_project_settings() | |
crawler = Crawler(settings) | |
crawler.configure() | |
for spider in crawler.spiders.list(): | |
spider_start(spider) | |
reactor.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment