Created
April 19, 2021 22:04
-
-
Save s-hertel/b87bad21adea00e4a91fd68a307e5fc6 to your computer and use it in GitHub Desktop.
example of generic retry wrapper in reboot module
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/ansible/plugins/action/reboot.py b/lib/ansible/plugins/action/reboot.py | |
index 64397b12fa..12ecdaee0b 100644 | |
--- a/lib/ansible/plugins/action/reboot.py | |
+++ b/lib/ansible/plugins/action/reboot.py | |
@@ -12,6 +12,7 @@ from datetime import datetime, timedelta | |
from ansible.errors import AnsibleError, AnsibleConnectionFailure | |
from ansible.module_utils._text import to_native, to_text | |
+from ansible.module_utils.api import retry_with_delays_and_condition | |
from ansible.module_utils.common.validation import check_type_list, check_type_str | |
from ansible.plugins.action import ActionBase | |
from ansible.utils.display import Display | |
@@ -279,46 +280,57 @@ class ActionModule(ActionBase): | |
display.vvv("{action}: system successfully rebooted".format(action=self._task.action)) | |
+ def do_on_retry(self, action, description, connection): | |
+ def handle_exception(exception, delay): | |
+ if isinstance(exception, AnsibleConnectionFailure): | |
+ try: | |
+ connection.reset() | |
+ except AnsibleConnectionFailure: | |
+ pass | |
+ | |
+ if description: | |
+ try: | |
+ error = to_text(exception).splitlines()[-1] | |
+ except IndexError as e: | |
+ error = to_text(e) | |
+ display.debug("{action}: {desc} fail '{err}', retrying in {sleep:.4} seconds...".format( | |
+ action=action, | |
+ desc=description, | |
+ err=error, | |
+ sleep=delay)) | |
+ return handle_exception | |
+ | |
+ def generate_exponential_backoff(self, reboot_timeout): | |
+ max_end_time = datetime.utcnow() + timedelta(seconds=reboot_timeout) | |
+ max_fail_sleep = 12 | |
+ total_time = 0 | |
+ fail_count = 0 | |
+ | |
+ while total_time < reboot_timeout: | |
+ random_int = random.randint(0, 1000) / 1000 | |
+ fail_sleep = 2 ** fail_count + random_int | |
+ if fail_sleep > max_fail_sleep: | |
+ fail_sleep = max_fail_sleep + random_int | |
+ | |
+ total_time += fail_sleep | |
+ fail_count += 1 | |
+ yield fail_sleep | |
+ | |
def do_until_success_or_timeout(self, action, reboot_timeout, action_desc, distribution, action_kwargs=None): | |
max_end_time = datetime.utcnow() + timedelta(seconds=reboot_timeout) | |
if action_kwargs is None: | |
action_kwargs = {} | |
- fail_count = 0 | |
- max_fail_sleep = 12 | |
- | |
- while datetime.utcnow() < max_end_time: | |
- try: | |
- action(distribution=distribution, **action_kwargs) | |
- if action_desc: | |
- display.debug('{action}: {desc} success'.format(action=self._task.action, desc=action_desc)) | |
- return | |
- except Exception as e: | |
- if isinstance(e, AnsibleConnectionFailure): | |
- try: | |
- self._connection.reset() | |
- except AnsibleConnectionFailure: | |
- pass | |
- # Use exponential backoff with a max timout, plus a little bit of randomness | |
- random_int = random.randint(0, 1000) / 1000 | |
- fail_sleep = 2 ** fail_count + random_int | |
- if fail_sleep > max_fail_sleep: | |
- | |
- fail_sleep = max_fail_sleep + random_int | |
- if action_desc: | |
- try: | |
- error = to_text(e).splitlines()[-1] | |
- except IndexError as e: | |
- error = to_text(e) | |
- display.debug("{action}: {desc} fail '{err}', retrying in {sleep:.4} seconds...".format( | |
- action=self._task.action, | |
- desc=action_desc, | |
- err=error, | |
- sleep=fail_sleep)) | |
- fail_count += 1 | |
- time.sleep(fail_sleep) | |
- | |
- raise TimedOutException('Timed out waiting for {desc} (timeout={timeout})'.format(desc=action_desc, timeout=reboot_timeout)) | |
+ try: | |
+ retry_with_delays_and_condition( | |
+ backoff_iterator=self.generate_exponential_backoff(reboot_timeout), | |
+ retry_condition=lambda x: True, | |
+ do_on_retry=self.do_on_retry(self._task.action, action_desc, self._connection) | |
+ )(action)(distribution=distribution, **action_kwargs) | |
+ except Exception as e: | |
+ raise TimedOutException('Timed out waiting for {desc} (timeout={timeout})'.format(desc=action_desc, timeout=reboot_timeout)) | |
+ if action_desc: | |
+ display.debug('{action}: {desc} success'.format(action=self._task.action, desc=action_desc)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment