Skip to content

Instantly share code, notes, and snippets.

@rmax
Created May 17, 2010 21:18
Show Gist options
  • Save rmax/404240 to your computer and use it in GitHub Desktop.
Save rmax/404240 to your computer and use it in GitHub Desktop.
from scrapy.core import signals
from scrapy import log
from scrapy.xlib.pydispatch import dispatcher
import time
class ElapsedTimeMiddleware(object):
def __init__(self):
self._registry = {}
dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
def spider_opened(self, spider):
# I use `id` as key to not keep references of spider object
spider_id = id(spider)
self._registry[spider_id] = {
'name': spider.domain_name,
'start_time': time.time(),
}
def spider_closed(self, spider, reason):
spider_id = id(spider)
if spider_id in self._registry:
registry = self._registry[spider_id]
registry['end_time'] = time.time()
registry['reason'] = reason
# do something with the data
elapsed = registry['end_time'] - registry['start_time']
log.msg('<%s> crawled in %s seconds' % (registry['name'],
elapsed))
# will not use registry any more
del self._registry[spider_id]
else:
# @@@: spider don't get registry on opened event?
pass
SPIDER_MIDDLEWARES = {
'myproject.middleware.ElapsedTimeMiddleware': 500,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment