Created
May 17, 2010 21:18
-
-
Save rmax/404240 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy.core import signals | |
from scrapy import log | |
from scrapy.xlib.pydispatch import dispatcher | |
import time | |
class ElapsedTimeMiddleware(object): | |
def __init__(self): | |
self._registry = {} | |
dispatcher.connect(self.spider_opened, signal=signals.spider_opened) | |
dispatcher.connect(self.spider_closed, signal=signals.spider_closed) | |
def spider_opened(self, spider): | |
# I use `id` as key to not keep references of spider object | |
spider_id = id(spider) | |
self._registry[spider_id] = { | |
'name': spider.domain_name, | |
'start_time': time.time(), | |
} | |
def spider_closed(self, spider, reason): | |
spider_id = id(spider) | |
if spider_id in self._registry: | |
registry = self._registry[spider_id] | |
registry['end_time'] = time.time() | |
registry['reason'] = reason | |
# do something with the data | |
elapsed = registry['end_time'] - registry['start_time'] | |
log.msg('<%s> crawled in %s seconds' % (registry['name'], | |
elapsed)) | |
# will not use registry any more | |
del self._registry[spider_id] | |
else: | |
# @@@: spider don't get registry on opened event? | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SPIDER_MIDDLEWARES = { | |
'myproject.middleware.ElapsedTimeMiddleware': 500, | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment