Skip to content

Instantly share code, notes, and snippets.

# Get a next task from some tasks queue, create and start a crawler
def start_crawler():
settings = CrawlerSettings()
settings.overrides['TELNETCONSOLE_ENABLED'] = 0
settings.overrides['WEBSERVICE_ENABLED'] = False
crawler = Crawler(settings)
crawler.configure()
url = get_url_from_some_queue()
from scrapy.spider import BaseSpider
class AnotherSpider(BaseSpider):
name = "dmoz"
# One domain (job) per spider
start_urls = [
"http://www.dmoz.org/",
]
from scrapy.spider import BaseSpider
class AnotherSpider(BaseSpider):
name = "dmoz"
# One domain (job) per spider
start_urls = [
"http://www.dmoz.org/",
]
from scrapy.spider import BaseSpider
class DmozSpider(BaseSpider):
name = "dmoz"
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):