-
-
Save orenshk/84b3760484b7330db4205c2de10f77ce to your computer and use it in GitHub Desktop.
| import boto3 | |
| from botocore.exceptions import ClientError | |
| class BotoBackoff(object): | |
| """ | |
| Wrap a client for an AWS service such that every call is backed by exponential backoff with jitter. | |
| Examples: | |
| >>> ecs = BotoBackoff('ecs') | |
| >>> ecs.list_tasks(cluster='my-cluster') | |
| Args: | |
| service (str): Name of AWS Service to wrap. | |
| min_sleep_time (float): The minimum amount of time to sleep in case of failure. | |
| max_retries (int): The maximum amount of retries to perform. | |
| """ | |
| def __init__(self, service, min_sleep_time=1e-2, max_retries=15): | |
| self._service = boto3.client(service) | |
| self.min_sleep_time = min_sleep_time | |
| self.max_retries = max_retries | |
| self.logger = logging.getLogger(LOGGER_NAME) | |
| def __getattr__(self, item): | |
| fn = getattr(self._service, item) | |
| if not callable(fn): | |
| return fn | |
| def call_with_backoff(**api_kwargs): | |
| num_retries = 0 | |
| while True: | |
| try: | |
| self.logger.debug('BotoBackoff Calling {}'.format(fn)) | |
| return fn(**api_kwargs) | |
| except ClientError as err: | |
| if "Rate exceeded" in err.args[0]: | |
| # if we hit the retry limit, we'll go to sleep for a bit then try again. | |
| # the number of retries determines our sleep time. This thread will sleep for | |
| # min_sleep_time * random.randint(1, 2 ** num_retries), up to at most | |
| # min_sleep_time * max_retries. | |
| # After max_retries, we can't give up, so we scale back the number of retries by a random int | |
| # to avoid collision with other threads. | |
| num_retries += 1 | |
| if num_retries > self.max_retries: | |
| num_retries = random.randint(1, self.max_retries) | |
| sleep_time = self.min_sleep_time * random.randint(1, 2 ** num_retries) | |
| self.logger.debug("{} Hit retry limit, sleeping for {} seconds".format(item, sleep_time)) | |
| self.logger.debug("arguments: {}".format(json.dumps(api_kwargs, indent=4, separators=(',', ': ')))) | |
| self.logger.error(err) | |
| time.sleep(sleep_time) | |
| else: | |
| # let the caller handle every other error. | |
| raise | |
| return call_with_backoff | |
# After max_retries, we can't give up
Therefore that variable is not the maximum number of retries.
This code implements infinite retries.
Also, boto automatically retries a certain number of times (usually the default is 4 retries). so each "retry" here is likely 5 tries each time
Also, boto automatically retries a certain number of times (usually the default is 4 retries). so each "retry" here is likely 5 tries each time
Can you elaborate on this a bit? Thanks
@ElijahLynn
Boto already has backoff and jittering preconfigured, although it is not very customizable yet. If you want to use the above method I would set the max_attempts to 0.
when the rate is exceeded, is there a way we can capture the revised payload UnprocessedItems or do we need to retry writing the whole payload again?
Just a note, missing random, time, and json imports
Thanks for the snippet, very useful and effective.