Skip to content

Instantly share code, notes, and snippets.

@ziplokk1
Created July 10, 2017 01:19
Show Gist options
  • Save ziplokk1/6226d67951a0d0bbf12e14161a96b2e5 to your computer and use it in GitHub Desktop.
Save ziplokk1/6226d67951a0d0bbf12e14161a96b2e5 to your computer and use it in GitHub Desktop.
import time
from scrapy.downloadermiddlewares.retry import RetryMiddleware
from scrapy.utils.response import response_status_message
class PauseOnStatusMiddleware(RetryMiddleware):
"""
When encountering a 503 error from amazon, pause the request for TIMEOUT seconds and retry
"""
TIMEOUT = 2.5
def __init__(self, settings):
RetryMiddleware.__init__(self, settings)
self.timeout_status_codes = set(int(x) for x in settings.getlist('TIMEOUT_STATUS_CODES'))
self.TIMEOUT = settings.get('RETRY_TIMEOUT', self.TIMEOUT)
def process_response(self, request, response, spider):
if request.meta.get('dont_retry', False):
return response
if response.status in self.timeout_status_codes:
request.meta['cookiejar'] = request.meta.get('cookiejar', 0) + 1
time.sleep(self.TIMEOUT)
reason = response_status_message(response.status)
return self._retry(request, reason, spider) or response
return response
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment