Last active
March 16, 2016 01:57
-
-
Save beekhof/748af4942ca0cb61780b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python -tt | |
import sys | |
import time | |
import atexit | |
import logging | |
import requests.exceptions | |
sys.path.append("/usr/share/fence") | |
from fencing import * | |
from fencing import fail_usage, is_executable, run_command, run_delay | |
#BEGIN_VERSION_GENERATION | |
RELEASE_VERSION="4.0.21.35-b44dc" | |
BUILD_DATE="(built Tue Dec 15 12:45:56 AEDT 2015)" | |
REDHAT_COPYRIGHT="Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved." | |
#END_VERSION_GENERATION | |
override_status = "" | |
nova = None | |
EVACUABLE_TAG = "evacuable" | |
TRUE_TAGS = ['true'] | |
def get_power_status(_, options): | |
global override_status | |
status = "unknown" | |
logging.debug("get action: " + options["--action"]) | |
if len(override_status): | |
logging.debug("Pretending we're " + override_status) | |
return override_status | |
if nova: | |
try: | |
services = nova.services.list(host=options["--plug"]) | |
for service in services: | |
logging.debug("Status of %s is %s" % (service.binary, service.state)) | |
if service.binary == "nova-compute": | |
if service.state == "up": | |
status = "on" | |
elif service.state == "down": | |
status = "off" | |
else: | |
logging.debug("Unknown status detected from nova: " + service.state) | |
break | |
except ConnectionError as (err): | |
logging.warning("Nova connection failed: " + str(err)) | |
return status | |
# NOTE(sbauza); We mimic the host-evacuate module since it's only a contrib | |
# module which is not stable | |
def _server_evacuate(server, on_shared_storage): | |
success = False | |
error_message = "" | |
try: | |
logging.debug("Resurrecting instance: %s" % server) | |
(response, dictionary) = nova.servers.evacuate(server=server, on_shared_storage=on_shared_storage) | |
if response == None: | |
error_message = "No response while evacuating instance" | |
elif response.status_code == 200: | |
success = True | |
error_message = response.reason | |
else: | |
error_message = response.reason | |
except Exception as e: | |
error_message = "Error while evacuating instance: %s" % e | |
return { | |
"uuid": server, | |
"accepted": success, | |
"reason": error_message, | |
} | |
def _is_server_evacuable(server, evac_flavors, evac_images): | |
if server.flavor.get('id') in evac_flavors: | |
return True | |
if server.image.get('id') in evac_images: | |
return True | |
logging.debug("Instance %s is not evacuable" % server.image.get('id')) | |
return False | |
def _get_evacuable_flavors(): | |
result = [] | |
flavors = nova.flavors.list() | |
# Since the detailed view for all flavors doesn't provide the extra specs, | |
# we need to call each of the flavor to get them. | |
for flavor in flavors: | |
if flavor.get_keys().get(EVACUABLE_TAG).strip().lower() in TRUE_TAGS: | |
result.append(flavor.id) | |
return result | |
def _get_evacuable_images(): | |
result = [] | |
images = nova.images.list(detailed=True) | |
for image in images: | |
if hasattr(image, 'metadata'): | |
if image.metadata.get(EVACUABLE_TAG).strip.lower() in TRUE_TAGS: | |
result.append(image.id) | |
return result | |
def _host_evacuate(options): | |
result = True | |
servers = nova.servers.list(search_opts={'host': options["--plug"], 'all_tenants': 1 }) | |
if options["--instance-filtering"] == "False": | |
logging.debug("Evacuating all images and flavors") | |
evacuables = servers | |
else: | |
logging.debug("Filtering images and flavors") | |
flavors = _get_evacuable_flavors() | |
images = _get_evacuable_images() | |
# Identify all evacuable servers | |
evacuables = [server for server in servers | |
if _is_server_evacuable(server, flavors, images)] | |
if options["--no-shared-storage"] != "False": | |
on_shared_storage = False | |
else: | |
on_shared_storage = True | |
for server in evacuables: | |
logging.debug("Processing %s" % server) | |
if hasattr(server, 'id'): | |
response = _server_evacuate(server.id, on_shared_storage) | |
if response["accepted"]: | |
logging.debug("Evacuated %s from %s: %s" % | |
(response["uuid"], options["--plug"], response["reason"])) | |
else: | |
logging.error("Evacuation of %s on %s failed: %s" % | |
(response["uuid"], options["--plug"], response["reason"])) | |
result = False | |
else: | |
logging.error("Could not evacuate instance: %s" % server.to_dict()) | |
# Should a malformed instance result in a failed evacuation? | |
# result = False | |
return result | |
def set_attrd_status(host, status, options): | |
logging.debug("Setting fencing status for %s to %s" % (host, status)) | |
run_command(options, "attrd_updater -p -n evacuate -Q -N %s -U %s" % (host, status)) | |
def set_power_status(_, options): | |
global override_status | |
override_status = "" | |
logging.debug("set action: " + options["--action"]) | |
if not nova: | |
return | |
if options["--action"] == "on": | |
if get_power_status(_, options) == "on": | |
# Forcing the service back up in case it was disabled | |
nova.services.enable(options["--plug"], 'nova-compute') | |
try: | |
# Forcing the host back up | |
nova.services.force_down( | |
options["--plug"], "nova-compute", force_down=False) | |
except Exception as e: | |
# In theory, if foce_down=False fails, that's for the exact | |
# same possible reasons that below with force_down=True | |
# eg. either an incompatible version or an old client. | |
# Since it's about forcing back to a default value, there is | |
# no real worries to just consider it's still okay even if the | |
# command failed | |
logging.info("Exception from attempt to force " | |
"host back up via nova API: " | |
"%s: %s" % (e.__class__.__name__, e)) | |
else: | |
# Pretend we're 'on' so that the fencing library doesn't loop forever waiting for the node to boot | |
override_status = "on" | |
return | |
try: | |
nova.services.force_down( | |
options["--plug"], "nova-compute", force_down=True) | |
except Exception as e: | |
# Something went wrong when we tried to force the host down. | |
# That could come from either an incompatible API version | |
# eg. UnsupportedVersion or VersionNotFoundForAPIMethod | |
# or because novaclient is old and doesn't include force_down yet | |
# eg. AttributeError | |
# In that case, fallbacking to wait for Nova to catch the right state. | |
logging.error("Exception from attempt to force host down via nova API: " | |
"%s: %s" % (e.__class__.__name__, e)) | |
# need to wait for nova to update its internal status or we | |
# cannot call host-evacuate | |
while get_power_status(_, options) != "off": | |
# Loop forever if need be. | |
# | |
# Some callers (such as Pacemaker) will have a timer | |
# running and kill us if necessary | |
logging.debug("Waiting for nova to update it's internal state for %s" % options["--plug"]) | |
time.sleep(1) | |
if not _host_evacuate(options): | |
sys.exit(1) | |
return | |
def get_plugs_list(_, options): | |
result = {} | |
if nova: | |
hypervisors = nova.hypervisors.list() | |
for hypervisor in hypervisors: | |
longhost = hypervisor.hypervisor_hostname | |
if options["--domain"] != "": | |
shorthost = longhost.replace("." + options["--domain"], "") | |
result[longhost] = ("", None) | |
result[shorthost] = ("", None) | |
else: | |
result[longhost] = ("", None) | |
return result | |
def define_new_opts(): | |
all_opt["endpoint-type"] = { | |
"getopt" : "e:", | |
"longopt" : "endpoint-type", | |
"help" : "-e, --endpoint-type=[endpoint] Nova Endpoint type (publicURL, internalURL, adminURL)", | |
"required" : "0", | |
"shortdesc" : "Nova Endpoint type", | |
"default" : "internalURL", | |
"order": 1, | |
} | |
all_opt["tenant-name"] = { | |
"getopt" : "t:", | |
"longopt" : "tenant-name", | |
"help" : "-t, --tenant-name=[tenant] Keystone Admin Tenant", | |
"required" : "0", | |
"shortdesc" : "Keystone Admin Tenant", | |
"default" : "", | |
"order": 1, | |
} | |
all_opt["auth-url"] = { | |
"getopt" : "k:", | |
"longopt" : "auth-url", | |
"help" : "-k, --auth-url=[tenant] Keystone Admin Auth URL", | |
"required" : "0", | |
"shortdesc" : "Keystone Admin Auth URL", | |
"default" : "", | |
"order": 1, | |
} | |
all_opt["domain"] = { | |
"getopt" : "d:", | |
"longopt" : "domain", | |
"help" : "-d, --domain=[string] DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN", | |
"required" : "0", | |
"shortdesc" : "DNS domain in which hosts live", | |
"default" : "", | |
"order": 5, | |
} | |
all_opt["record-only"] = { | |
"getopt" : "r:", | |
"longopt" : "record-only", | |
"help" : "--record-only Record the target as needing evacuation but as yet do not intiate it", | |
"required" : "0", | |
"shortdesc" : "Only record the target as needing evacuation", | |
"default" : "False", | |
"order": 5, | |
} | |
all_opt["instance-filtering"] = { | |
"getopt" : "", | |
"longopt" : "instance-filtering", | |
"help" : "--instance-filtering Only evacuate instances create from images and flavors with evacuable=true", | |
"required" : "0", | |
"shortdesc" : "Only evacuate flagged instances", | |
"default" : "False", | |
"order": 5, | |
} | |
all_opt["no-shared-storage"] = { | |
"getopt" : "", | |
"longopt" : "no-shared-storage", | |
"help" : "--no-shared-storage Disable functionality for shared storage", | |
"required" : "0", | |
"shortdesc" : "Disable functionality for dealing with shared storage", | |
"default" : "False", | |
"order": 5, | |
} | |
def main(): | |
global override_status | |
global nova | |
atexit.register(atexit_handler) | |
device_opt = ["login", "passwd", "tenant-name", "auth-url", "fabric_fencing", "on_target", | |
"no_login", "no_password", "port", "domain", "no-shared-storage", "endpoint-type", | |
"record-only", "instance-filtering"] | |
define_new_opts() | |
all_opt["shell_timeout"]["default"] = "180" | |
options = check_input(device_opt, process_input(device_opt)) | |
docs = {} | |
docs["shortdesc"] = "Fence agent for the automatic resurrection of OpenStack compute instances" | |
docs["longdesc"] = "Used to tell Nova that compute nodes are down and to reschedule flagged instances" | |
docs["vendorurl"] = "" | |
show_docs(options, docs) | |
run_delay(options) | |
try: | |
from novaclient import client as nova_client | |
except ImportError: | |
fail_usage("nova not found or not accessible") | |
# Potentially we should make this a pacemaker feature | |
if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"): | |
options["--plug"] = options["--plug"] + "." + options["--domain"] | |
if options["--record-only"] in [ "2", "Disabled", "disabled" ]: | |
sys.exit(0) | |
elif options["--record-only"] in [ "1", "True", "true", "Yes", "yes"]: | |
if options["--action"] == "on": | |
set_attrd_status(options["--plug"], "no", options) | |
sys.exit(0) | |
elif options["--action"] in ["off", "reboot"]: | |
set_attrd_status(options["--plug"], "yes", options) | |
sys.exit(0) | |
elif options["--action"] in ["monitor", "status"]: | |
sys.exit(0) | |
# The first argument is the Nova client version | |
nova = nova_client.Client('2', | |
options["--username"], | |
options["--password"], | |
options["--tenant-name"], | |
options["--auth-url"], | |
endpoint_type=options["--endpoint-type"]) | |
if options["--action"] in ["off", "reboot"]: | |
# Pretend we're 'on' so that the fencing library will always call set_power_status(off) | |
override_status = "on" | |
if options["--action"] == "on": | |
# Pretend we're 'off' so that the fencing library will always call set_power_status(on) | |
override_status = "off" | |
result = fence_action(None, options, set_power_status, get_power_status, get_plugs_list, None) | |
sys.exit(result) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment