Created
July 7, 2021 20:07
-
-
Save rus-kilian/e1e6b7f3febfc75534c9af1b165db915 to your computer and use it in GitHub Desktop.
Aruba API to prometheus exporter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import yaml | |
import os.path | |
import pprint | |
import re | |
import time | |
import logging | |
import argparse | |
import requests | |
from requests.exceptions import Timeout | |
import urllib3 # For disabling SSL warnings | |
import socket | |
from prometheus_client import Gauge, Summary | |
from threading import Thread | |
# systemd socket activation | |
from prometheus_client import start_http_server | |
from prometheus_client.exposition import MetricsHandler | |
from prometheus_client.registry import REGISTRY | |
# Debian bullseye has _ThreadingSimpleServer renamed to ThreadingWSGIServer | |
try: | |
from prometheus_client.exposition import ( | |
ThreadingWSGIServer as _ThreadingSimpleServer, | |
) | |
except ImportError: | |
from prometheus_client.exposition import _ThreadingSimpleServer | |
config = {} | |
if os.path.isfile("/etc/aruba_exporter.yaml"): | |
with open("/etc/aruba_exporter.yaml", "r") as stream: | |
try: | |
config = yaml.safe_load(stream) | |
except yaml.YAMLError as exc: | |
print(exc) | |
exit(1) | |
elif os.path.isfile(os.environ["HOME"] + "/.config.yaml"): | |
with open(os.environ["HOME"] + "/.config.yaml", "r") as stream: | |
try: | |
config = yaml.safe_load(stream) | |
except yaml.YAMLError as exc: | |
print(exc) | |
exit(1) | |
else: | |
print("No config.yaml") | |
exit(1) | |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
parser.add_argument( | |
"--debug", | |
"-d", | |
dest="debug", | |
action="store_true", | |
help="Run debug mode", | |
default=False, | |
) | |
parser.add_argument( | |
"--listen-port", type=int, help="The port the exporter will listen on", default=9425 | |
) | |
parser.add_argument( | |
"--delay", | |
type=int, | |
help="The refresh delay the exporter will wait between runs", | |
default=120, | |
) | |
args = parser.parse_args() | |
pp = pprint.PrettyPrinter(indent=4) | |
SYSTEMD_FIRST_SOCKET_FD = 3 | |
CONTENT_TYPE_LATEST = str("text/plain; version=0.0.1; charset=utf-8") | |
"""Content type of the latest text format""" | |
logger = logging.getLogger(__name__) | |
debug = args.debug | |
# create console handler and set level to debug | |
ch = logging.StreamHandler() | |
if debug: | |
logger.setLevel(logging.DEBUG) | |
ch.setLevel(logging.DEBUG) | |
# create formatter | |
formatter = logging.Formatter( | |
"%(asctime)s - %(name)s/%(threadName)s - %(levelname)s - %(message)s" | |
) | |
# add formatter to ch | |
ch.setFormatter(formatter) | |
# add ch to logger | |
logger.addHandler(ch) | |
session_cookies = {} | |
urllib3.disable_warnings() | |
http_session = requests.Session() | |
http_session.verify = False | |
Aruba_collect = Summary( | |
"aruba_collect", | |
"Aruba poller details collecting and processing AP and Client stats", | |
) | |
Aruba_API_calls_sent = Gauge( | |
"aruba_api_calls_sent", | |
"Aruba API calls sent", | |
labelnames=["script", "md", "method", "transport"], | |
) | |
Aruba_module_collect = Gauge( | |
"aruba_module_collect", "Aruba poller details per module", labelnames=["module"] | |
) | |
Aruba_module_collect_AP = Gauge( | |
"aruba_module_collect_ap", | |
"Aruba poller details per module per AP", | |
labelnames=["module", "ap"], | |
) | |
Aruba_DP_DMA_Counters = Gauge( | |
"aruba_dp_dma_counter", | |
"Aruba datapath dma queue counters", | |
labelnames=["md", "queue", "processor"], | |
) | |
Aruba_CP_BWM_Table = Gauge( | |
"aruba_cp_bwm_table", | |
"Aruba control plane bandwidth table", | |
labelnames=["md", "queue", "status"], | |
) | |
Aruba_DP_Frame_Counters = Gauge( | |
"aruba_dp_frame_counters", | |
"Aruba datapath frame counters", | |
labelnames=["md", "slot", "key"], | |
) | |
Aruba_Vlan = Gauge( | |
"aruba_vlan", | |
"Aruba Vlan distribution details for wireless users connected to MD", | |
labelnames=["md", "vlan"], | |
) | |
Aruba_Vlan_APgroup = Gauge( | |
"aruba_vlan_apgroup", | |
"Aruba Vlan distribution details for wireless users connected to MD within certain AP group", | |
labelnames=["md", "vlan", "group"], | |
) | |
Aruba_AP_count = Gauge( | |
"aruba_ap_up_count", "Aruba AP Database status up per MD", labelnames=["md"] | |
) | |
Aruba_AP_Uptime = Gauge("aruba_ap_uptime", "Aruba AP Uptime", labelnames=["md", "ap"]) | |
Aruba_AP_PoE_degraded = Gauge( | |
"aruba_ap_poe_degraded", "Aruba AP with PoE handshake issues", labelnames=["md"] | |
) | |
Aruba_AP_Channel = Gauge( | |
"aruba_ap_chan", "Aruba AP channel", labelnames=["md", "ap", "radio"] | |
) | |
Aruba_AP_PoE = Gauge("aruba_ap_poe", "Aruba AP PoE handshake", labelnames=["ap"]) | |
Aruba_AP_bootstraps = Gauge( | |
"aruba_ap_bootstraps", "Aruba AP bootstraps", labelnames=["md", "ap"] | |
) | |
Aruba_AP_reboots = Gauge( | |
"aruba_ap_reboots", "Aruba AP reboots", labelnames=["md", "ap"] | |
) | |
Aruba_AP_keepalive = Gauge( | |
"aruba_ap_keepalive", "Aruba AP keepalives", labelnames=["md", "ap", "status"] | |
) | |
Aruba_AP_power_update = Gauge( | |
"aruba_ap_power_update", "Aruba AP power updates", labelnames=["ap", "status"] | |
) | |
Aruba_AP_config = Gauge( | |
"aruba_ap_config", "Aruba AP config sync", labelnames=["md", "ap", "status"] | |
) | |
Aruba_AP_crash = Gauge( | |
"aruba_ap_crash", "Aruba AP crash report", labelnames=["md", "ap"] | |
) | |
Aruba_AP_health = Gauge( | |
"aruba_ap_health", "Aruba AP IP health check", labelnames=["md", "ap", "status"] | |
) | |
Aruba_AP_channel_status = Gauge( | |
"aruba_ap_channel", | |
"Aruba AP channel status check", | |
labelnames=["ap", "channel", "status", "service"], | |
) | |
Aruba_AP_radio_status = Gauge( | |
"aruba_ap_radio_stats", | |
"Aruba AP radio status", | |
labelnames=["ap", "radio", "status"], | |
) | |
Aruba_clients_connected = Gauge( | |
"aruba_clients_connected", "Aruba clients connected to AP", labelnames=["md", "ap"] | |
) | |
Aruba_client_status = Gauge( | |
"aruba_client_stats", "Aruba client status", labelnames=["client", "status"] | |
) | |
ap_chan_re = re.compile(r"^(\d+)[^\d]?") | |
class SocketInheritingHTTPServer(_ThreadingSimpleServer): | |
"""A HttpServer subclass that takes over an inherited socket from systemd""" | |
def __init__(self, address_info, handler, fd, bind_and_activate=True): | |
_ThreadingSimpleServer.__init__( | |
self, address_info, handler, bind_and_activate=False | |
) | |
logger.debug("http server init complete - passing socket") | |
self.socket = socket.fromfd(fd, self.address_family, self.socket_type) | |
if bind_and_activate: | |
# NOTE: systemd provides ready-bound sockets, so we only need to activate: | |
logger.debug("http server activating") | |
self.server_activate() | |
else: | |
logger.debug("http server NOT activated") | |
def target_login(target): | |
global config | |
global session_cookies | |
logger.debug("Logging in to %s and add to session_cookies" % target) | |
login_url = "https://" + target + ":4343/v1/api/login" | |
# Initiate login with authentication, and persistently store cookies | |
try: | |
login_response = http_session.get( | |
login_url, | |
params={"username": config["login"], "password": config["password"]}, | |
timeout=(1, 3), | |
) | |
except Timeout: | |
logger.error("Timeout while logging in to %s" % target) | |
return False | |
else: | |
# Login to store UIDARUBA | |
if login_response: | |
http_session_arubauid = login_response.json()["_global_result"]["UIDARUBA"] | |
logger.debug( | |
"Logged in sucessfully with HTTP status code %d" | |
% login_response.status_code | |
) | |
logger.debug("Received UIDARUBA: " + http_session_arubauid) | |
session_cookies[target] = http_session_arubauid | |
Aruba_API_calls_sent.labels( | |
script="aruba_exporter.py", md=target, transport="http", method="login" | |
).inc() | |
return http_session_arubauid | |
else: | |
# FIXME: add retry | |
logger.error( | |
"Login failed with HTTP status code %d" % login_response.status_code | |
) | |
Aruba_API_calls_sent.labels( | |
script="aruba_exporter.py", | |
md=target, | |
transport="http", | |
method="login_failed", | |
).inc() | |
return False | |
def showcli(target, command, retries=3): | |
global session_cookies | |
if target not in session_cookies: | |
if not target_login(target): | |
return | |
show_command_url = "https://" + target + ":4343/v1/configuration/showcommand" | |
for i in range(0, retries): | |
try: | |
# FIXME: for now we don't track individual commands to spare prometheus series... | |
Aruba_API_calls_sent.labels( | |
script="aruba_exporter.py", | |
md=target, | |
transport="http", | |
method="showcommand", | |
).inc() | |
show_command_response = http_session.get( | |
show_command_url, | |
params={ | |
"json": "1", | |
"command": command, | |
"UIDARUBA": session_cookies[target], | |
}, | |
timeout=(3, 8), | |
headers={"Connection": "close"}, | |
) | |
except Timeout: | |
Aruba_API_calls_sent.labels( | |
script="aruba_exporter.py", | |
md=target, | |
transport="http", | |
method="showcommand_timeout", | |
).inc() | |
logger.warning( | |
'Timeout while executing "%s" on %s. Retrying...' % (command, target) | |
) | |
http_session.close() | |
else: | |
http_session.close() | |
if show_command_response.status_code == 200: | |
if show_command_response.text == "": | |
logger.warning("Empty response received. Retrying.") | |
# let's retry | |
time.sleep(3) | |
continue | |
else: | |
try: | |
json = show_command_response.json() | |
except Exception: | |
Aruba_API_calls_sent.labels( | |
script="aruba_exporter.py", | |
md=target, | |
transport="http", | |
method="showcommand_invalid_json", | |
).inc() | |
logger.error("Invalid response received! Not JSON:") | |
logger.error(show_command_response.text) | |
return | |
else: | |
return json | |
elif show_command_response.status_code == 401: | |
logger.error("Unauthenticated on %s. Retrying." % target) | |
Aruba_API_calls_sent.labels( | |
script="aruba_exporter.py", | |
md=target, | |
transport="http", | |
method="showcommand_auth_invalid", | |
).inc() | |
if target_login(target): | |
return showcli(target, command) | |
else: | |
return | |
else: | |
Aruba_API_calls_sent.labels( | |
script="aruba_exporter.py", | |
md=target, | |
transport="http", | |
method="showcommand_error_%d" % show_command_response.status_code, | |
).inc() | |
logger.error( | |
"Received unhandled error code: %d on %s" | |
% (show_command_response.status_code, target) | |
) | |
return | |
logger.error('Retries exceeded with Timeout for "%s" on %s' % (command, target)) | |
return | |
def get_controllers(target): | |
controllers = {} | |
logger.debug("retrieving controlers managed by %s" % target) | |
command = "show switches" | |
data = showcli(target, command) | |
if data: | |
if "All Switches" in data: | |
for c in data["All Switches"]: | |
if c["Type"] == "MD": | |
ip = c["IP Address"] | |
# ipv6 = c["IPv6 Address"] | |
name = c["Name"] | |
# devtype = c["Type"] | |
logger.debug("Adding %s (%s)" % (ip, name)) | |
controllers[ip] = name | |
else: | |
logger.debug("Ignoring type %s" % c["Type"]) | |
return controllers | |
def uptime2sec(upstring): | |
uptime = 0 | |
timeArray = upstring.split(":") | |
for element in timeArray: | |
timeint = int(element[:-1]) | |
unit = element[-1:] | |
if unit == "s": | |
uptime += timeint | |
elif unit == "m": | |
uptime += timeint * 60 | |
elif unit == "h": | |
uptime += timeint * 60 * 60 | |
elif unit == "d": | |
uptime += timeint * 60 * 60 * 24 | |
return uptime | |
def ap_db(target, group=""): | |
aps = {} | |
command = "show ap database status up" | |
if group != "": | |
logger.debug("collecting ap database for group %s on %s" % (group, target)) | |
command = "show ap database status up group %s" % group | |
else: | |
logger.debug("collecting ap database on %s" % target) | |
data = showcli(target, command) | |
if data: | |
if "AP Database" in data: | |
for ap in iter(data["AP Database"]): | |
name = ap["Name"] | |
aps[name] = {} | |
aps[name]["ap_type"] = ap["AP Type"] | |
aps[name]["group"] = ap["Group"] | |
status = ap["Status"].split(" ") | |
aps[name]["status"] = status[0] | |
aps[name]["uptime"] = uptime2sec(status[1]) | |
aps[name]["standby"] = ap["Standby IP"] | |
aps[name]["ip_address"] = ap["IP Address"] | |
aps[name]["flags"] = ap["Flags"] | |
aps[name]["switch"] = ap["Switch IP"] | |
return aps | |
def cp_bwcontracts(target): | |
contracts = {} | |
command = "show cp-bwcontracts" | |
logger.debug("collecting controlplane bandwidth contracts on %s" % target) | |
data = showcli(target, command) | |
if data: | |
if "CP bw contracts" in data: | |
for c in data["CP bw contracts"]: | |
contracts[int(c["Id"])] = c["Contract"] | |
return contracts | |
section_re = re.compile(r"^[=\s-]+$") | |
bwm_cp_re = re.compile( | |
r"^(\d+)\s+(\d+)\s+(\d+)\s+pps\s+(\d+)\s+(\d+)\s+(\d+)\/(\d+)\s*" | |
) | |
def bwm_cp_table(target): | |
entries = {} | |
command = "show datapath cp-bwm table" | |
logger.debug("collecting datapath cp-bwm table on %s" % target) | |
data = showcli(target, command) | |
if data: | |
if "_data" in data: | |
section = 0 | |
for l in data["_data"]: | |
s = section_re.match(l) | |
if s: | |
section += 1 | |
if section == 2: | |
p = bwm_cp_re.match(l) | |
if p: | |
# cpu = int(p.group(1)) | |
contract = int(p.group(2)) | |
entries[contract] = {} | |
entries[contract]["rate_pps"] = int(p.group(3)) | |
entries[contract]["policed"] = int(p.group(4)) | |
entries[contract]["credits"] = int(p.group(5)) | |
entries[contract]["queued_bytes"] = int(p.group(6)) | |
entries[contract]["queued_packets"] = int(p.group(7)) | |
return entries | |
slot_re = re.compile(r"^\|\s*Slot\s*\|\s*(\d+)\s*\|") | |
dp_f_counters = re.compile(r"^.*\|\s+(\S[^\|]+\S)\s+(\d+)\s+\|$") | |
def dp_frame_counters(target): | |
entries = {} | |
command = "show datapath frame counters" | |
logger.debug("collecting datapath frame counters on %s" % target) | |
data = showcli(target, command) | |
if data: | |
if "_data" in data: | |
slot = None | |
for l in data["_data"]: | |
s = slot_re.match(l) | |
if s: | |
slot = int(s.group(1)) | |
entries[slot] = {} | |
if slot is not None: | |
c = dp_f_counters.match(l) | |
if c: | |
desc = c.group(1) | |
value = int(c.group(2)) | |
entries[slot][desc] = value | |
return entries | |
dp_dma_re = re.compile(r"^(\d+)\s+(\d+)\s+(\d+)\s*") | |
def datapath_dma_counters(target): | |
entries = {} | |
command = "show datapath debug dma counters" | |
logger.debug("collecting datapath debug dma counters on %s" % target) | |
data = showcli(target, command) | |
if data: | |
if "_data" in data: | |
section = 0 | |
for l in data["_data"]: | |
s = section_re.match(l) | |
if s: | |
section += 1 | |
if section == 2: | |
d = dp_dma_re.match(l) | |
if d: | |
queue = int(d.group(1)) | |
entries[queue] = {} | |
entries[queue]["cp_full"] = int(d.group(2)) | |
entries[queue]["np_full"] = int(d.group(3)) | |
return entries | |
def ap_association(target, group=""): | |
associations = {} | |
command = "show ap association" | |
if group != "": | |
logger.debug("collecting ap association for group %s on %s" % (group, target)) | |
command = "show ap association ap-group %s" % group | |
else: | |
logger.debug("collecting ap association on %s" % target) | |
data = showcli(target, command) | |
if data: | |
if "Association Table" in data: | |
for assoc in iter(data["Association Table"]): | |
vlan = assoc["vlan-id"] | |
if vlan in associations.keys(): | |
associations[vlan] = associations[vlan] + 1 | |
else: | |
associations[vlan] = 1 | |
return associations | |
def ap_client_table(target, ap): | |
logger.debug("collecting ap debug client table for AP %s on %s" % (ap, target)) | |
command = "show ap debug client-table ap-name %s" % ap | |
data = showcli(target, command) | |
if data: | |
if "Client Table" in data: | |
clients = {} | |
try: | |
for client in iter(data["Client Table"]): | |
mac = client["MAC"] | |
if mac is None: | |
# don't add empty client | |
continue | |
clients[mac] = {} | |
clients[mac]["ACK_SNR"] = client["Last_ACK_SNR"] | |
clients[mac]["state"] = client["Assoc_State"] | |
clients[mac]["health"] = client["Client health (C/R)"] | |
clients[mac]["ps_qlen"] = client["PS_Qlen"] | |
clients[mac]["tx_retries"] = client["Tx_Retries"] | |
return clients | |
except Exception: | |
pp.pprint(data) | |
return | |
def ap_poe(target, ap): | |
logger.debug("collecting ap PoE stats for AP %s on %s" % (ap, target)) | |
command = "show ap power-mgmt-statistics ap-name %s" % ap | |
data = showcli(target, command) | |
if data: | |
for k, v in data.items(): | |
if "AP Power Mgmt Status" in k: | |
for k in v: | |
if "Power Supply" in k["Attr"]: | |
return k["Value"] | |
ap_detail_key = re.compile(r'^\s*AP\s["a-z0-9A-Z,-]+\s+([^\s].*[^\s])+\s*$') | |
ap_radio_oper_info = re.compile(r"Radio (\d) Operating Information") | |
def ap_detail(target, ap): | |
logger.debug("collecting ap details advanced for AP %s on %s" % (ap, target)) | |
command = "show ap details advanced ap-name %s" % ap | |
data = showcli(target, command) | |
if data: | |
items = {} | |
for e in data: | |
ap_match = ap_detail_key.match(e) | |
if ap_match: | |
key = ap_match.group(1) | |
# logger.debug('Inspecting "%s" on "%s"' % (key,ap)) | |
if key == "AP to Switch Message Counts": | |
# logger.debug('Matched "%s" as AP to Switch message count' % key) | |
messages = data[e] | |
items["messages"] = {} | |
for msg in messages: | |
items["messages"][msg["Message"]] = {} | |
items["messages"][msg["Message"]]["Acknowledged"] = msg[ | |
"Acknowledged" | |
] | |
items["messages"][msg["Message"]]["New"] = msg["New"] | |
items["messages"][msg["Message"]]["Total"] = msg["Total"] | |
elif key == "Operating Information": | |
# logger.debug('Matched "%s" as AP Operating Information' % key) | |
for l in data[e]: | |
if l["Item"] == "Reboots": | |
items["reboots"] = l["Value"] | |
elif l["Item"] == "Bootstraps": | |
items["bootstraps"] = l["Value"] | |
else: | |
# logger.debug('Trying to match Radio Operationg Information') | |
ap_oper = ap_radio_oper_info.match(key) | |
if ap_oper: | |
radio = int(ap_oper.group(1)) | |
# logger.debug('Matched %s as Radio %d Operating Information' % (key,radio)) | |
for l in data[e]: | |
if l["Item"] == "Channel": | |
items["channel%d" % radio] = l["Value"] | |
# elif l['Item'] == 'Cell size reduction': | |
# items['cell_size_reduction%d' % radio] = l['Value'] | |
# else: | |
# logger.debug('Ignoring section "%s"' % key) | |
else: | |
logger.debug('Ignoring key "%s"' % e) | |
return items | |
else: | |
logger.error("No output received!") | |
bootstraps_re = re.compile(r"^(\d+)\s+\((\d+)\s*\)") | |
def ap_debug_counters(target, ap): | |
logger.debug("collecting ap debug counters for AP %s on %s" % (ap, target)) | |
command = "show ap debug counters ap-name %s" % ap | |
data = showcli(target, command) | |
if data: | |
if "AP Counters" in data: | |
ret = data["AP Counters"][0] | |
items = {} | |
items["configs_ack"] = ret["Configs Acked"] | |
items["configs_sent"] = ret["Configs Sent"] | |
items["crash"] = ret["Crash"] | |
items["reboots"] = ret["Reboots"] | |
b = bootstraps_re.match(ret["Bootstraps (Total)"]) | |
if b: | |
items["bootstraps"] = b.group(1) | |
items["bootstraps_total"] = b.group(2) | |
return items | |
health_loss = re.compile(r"^([0-9\.]+)%\s+[^0-9]+([0-9]+)\/([0-9]+)[^0-9]*$") | |
def ap_ip_health(target, ap): | |
logger.debug("collecting ap ip health-check for AP %s on %s" % (ap, target)) | |
command = "show ap ip health-check ap-name %s" % ap | |
data = showcli(target, command) | |
if data: | |
if "AP Health-Check Status" in data: | |
if data["AP Health-Check Status"] != []: | |
ret = data["AP Health-Check Status"][0] | |
items = {} | |
items["avg_rtt"] = ret["1 min Avg RTT"] | |
loss = health_loss.match(ret["1 min Loss"]) | |
if loss: | |
items["loss_pct"] = loss.group(1) | |
items["loss_pkt"] = loss.group(2) | |
return items | |
else: | |
logger.error( | |
"Received empty AP health check status for %s on %s" % (ap, target) | |
) | |
return | |
def ap_rf_verbose(target, ap): | |
logger.debug("collecting ap arm rf-summary for AP %s on %s" % (ap, target)) | |
command = "show ap arm rf-summary ap-name %s" % ap | |
data = showcli(target, command) | |
if data: | |
if "Channel Summary" in data: | |
items = {} | |
if "Cur Chan: cca_ibss/cca_obss/cca_intf" in data: | |
items["cur_chan"] = data["Cur Chan: cca_ibss/cca_obss/cca_intf"] | |
if "Bcn fail/Bstuck reset/Scan rej(l)" in data: | |
items["bcn_fail"] = data["Bcn fail/Bstuck reset/Scan rej(l) "] | |
items["channels"] = {} | |
items["ht_vht_channels"] = {} | |
for c in data["Channel Summary"]: | |
chan = c["channel"] | |
items["channels"][chan] = {} | |
items["channels"][chan]["noise"] = c["noise"] | |
items["channels"][chan]["mac-err"] = c["mac-err"] | |
items["channels"][chan]["phy-err"] = c["phy-err"] | |
items["channels"][chan]["retry"] = c["retry"] | |
# 'cov-idx(Total)': '0/0(0)', | |
# 'intf_idx(Total)': '59/14//15/6(94)', | |
# 'util(Qual)': '13/11/1/0/99'} | |
for c in data["HT/VHT Channel Summary"]: | |
chan = c["Channel range"] | |
items["ht_vht_channels"][chan] = {} | |
items["ht_vht_channels"][chan]["bandwidth"] = c["Bandwidth"] | |
items["ht_vht_channels"][chan]["interference"] = int( | |
c["Total interference index"] | |
) | |
return items | |
def ap_debug_radio_stats(target, ap, radio): | |
logger.debug( | |
"collecting ap debug radio-stats (radio %d) for AP %s on %s" | |
% (radio, ap, target) | |
) | |
command = "show ap debug radio-stats ap-name %s radio %d advanced" % (ap, radio) | |
data = showcli(target, command) | |
if data: | |
items = {} | |
if "RADIO Stats" in data: | |
for e in data["RADIO Stats"]: | |
items[e["Parameter"]] = e["Value"] | |
return items | |
def ap_debug_client_stats(target, client): | |
logger.debug( | |
"collecting ap debug client-stats for client-mac %s on %s" % (client, target) | |
) | |
command = "show ap debug client-stats client-mac %s" % client | |
data = showcli(target, command) | |
if data: | |
items = {} | |
if "Station Stats" in data: | |
for e in data["Station Stats"]: | |
items[e["Parameter"]] = e["Value"] | |
return items | |
def lookup_user(md, client): | |
logger.debug("Searching for %s on %s" % (client, md)) | |
data = showcli(md, "show user-table mac %s" % client) | |
if data: | |
if "Users" in data: | |
sessions = data["Users"] | |
user_on_ap = [] | |
for s in sessions: | |
ap_name = s["AP name"] | |
if ap_name not in user_on_ap: | |
logger.debug("Adding %s to APs user %s is on" % (ap_name, client)) | |
user_on_ap = user_on_ap + [ap_name] | |
return user_on_ap | |
def collect_stats(modname, offset): | |
curtime = time.time() | |
Aruba_module_collect.labels(module=modname).set(curtime - offset) | |
return curtime | |
def collect_stats_per_ap(modname, ap, offset): | |
curtime = time.time() | |
Aruba_module_collect_AP.labels(module=modname, ap=ap).set(curtime - offset) | |
return curtime | |
# MAIN # | |
def check_aruba(): | |
mm = config["aruba_mm"] | |
start_time = time.time() | |
all_mds = get_controllers(mm) | |
intermediate = collect_stats("get_controllers", start_time) | |
for ip in all_mds: | |
md = all_mds[ip] | |
logger.debug("Fetching controlplane and bandwidth stats for %s" % md) | |
dp_dma_c = datapath_dma_counters(md) | |
if dp_dma_c: | |
for q in dp_dma_c: | |
for p in dp_dma_c[q]: | |
logger.debug( | |
"Adding DP DMA counter for %s, queue: %s, processor: %s, value: %d" | |
% (md, q, p, dp_dma_c[q][p]) | |
) | |
Aruba_DP_DMA_Counters.labels(md=md, queue=q, processor=p).set( | |
dp_dma_c[q][p] | |
) | |
cp_bwcon = cp_bwcontracts(md) | |
md_cp_bwm = bwm_cp_table(md) | |
if md_cp_bwm: | |
for c in md_cp_bwm: | |
for q in md_cp_bwm[c]: | |
logger.debug( | |
"Adding CP BWM table entry for %s, class/queue: %s, entry: %s, value: %d" | |
% (md, c, q, md_cp_bwm[c][q]) | |
) | |
Aruba_CP_BWM_Table.labels(md=md, queue=cp_bwcon[c], status=q).set( | |
md_cp_bwm[c][q] | |
) | |
dp_f_c = dp_frame_counters(md) | |
if dp_f_c: | |
for slot in dp_f_c: | |
for (k, v) in dp_f_c[slot].items(): | |
logger.debug( | |
'Adding datapath frame counter in slot %d, "%s": %d' | |
% (slot, k, v) | |
) | |
Aruba_DP_Frame_Counters.labels(md=md, slot=slot, key=k).set(v) | |
intermediate = collect_stats("get_dp_cp_bwm_stats", start_time) | |
for ip in all_mds: | |
md = all_mds[ip] | |
logger.debug("Fetching vlan associations for %s" % md) | |
md_assoc = ap_association(md) | |
if md_assoc: | |
for vlan in md_assoc: | |
value = md_assoc[vlan] | |
logger.debug( | |
"Adding %d to associations for %s on %s" % (value, vlan, md) | |
) | |
Aruba_Vlan.labels(md=md, vlan=vlan).set(value) | |
if "aruba_sample_clients" in config: | |
for c in config["aruba_sample_clients"]: | |
# check if this MD actually has this user... | |
if lookup_user(md, c): | |
data = ap_debug_client_stats(md, c) | |
if data: | |
for s in config["aruba_client_fields"]: | |
Aruba_client_status.labels(client=c, status=s).set( | |
data[s] | |
) | |
intermediate = collect_stats("ap_associations", intermediate) | |
logger.debug("Fetching ap database for counters") | |
ap_in_db = ap_db(mm) | |
if ap_in_db: | |
logger.debug( | |
"Found total %d APs in database - registered in mm" % len(ap_in_db) | |
) | |
for ip in all_mds: | |
md = all_mds[ip] | |
ap_count = len({k: v for (k, v) in ap_in_db.items() if v["switch"] == ip}) | |
logger.debug( | |
"Found total %d APs in database connected to %s" % (ap_count, md) | |
) | |
Aruba_AP_count.labels(md=md).set(ap_count) | |
ap_poe_degraded = len( | |
{ | |
k: v | |
for (k, v) in ap_in_db.items() | |
if v["switch"] == ip and "r" in v["flags"] | |
} | |
) | |
if ap_poe_degraded: | |
Aruba_AP_PoE_degraded.labels(md=md).set(ap_poe_degraded) | |
aps = {} | |
for group in config["aruba_ap_groups"]: | |
ap_in_group = ap_db(mm, group) | |
if ap_in_group: | |
for ap in ap_in_group.keys(): | |
md = all_mds[ap_in_group[ap]["switch"]] | |
Aruba_AP_Uptime.labels(md=md, ap=ap).set(ap_in_group[ap]["uptime"]) | |
aps[ap] = md | |
assoc = ap_association(md, group) | |
if assoc: | |
for k, v in assoc.items(): | |
Aruba_Vlan_APgroup.labels(md=md, vlan=k, group=group).set(v) | |
intermediate = collect_stats("get_ab_db", intermediate) | |
for ap in aps: | |
intermediate = time.time() | |
md = aps[ap] | |
poe = ap_poe(md, ap) | |
if poe: | |
logger.debug('Obtained PoE for AP %s as "%s"' % (ap, poe)) | |
if poe == "POE-AF": | |
Aruba_AP_PoE.labels(ap=ap).set(1) | |
elif poe == "POE-AT": | |
Aruba_AP_PoE.labels(ap=ap).set(2) | |
elif poe == "POE-BT": | |
Aruba_AP_PoE.labels(ap=ap).set(3) | |
elif poe == "None": | |
Aruba_AP_PoE.labels(ap=ap).set(-1) | |
else: | |
Aruba_AP_PoE.labels(ap=ap).set(0) | |
intermediate = collect_stats_per_ap("get_ab_poe", ap, intermediate) | |
# FIXME: select min/max/avg SNR, qlen and txretries? | |
client_table = ap_client_table(md, ap) | |
if client_table: | |
Aruba_clients_connected.labels(md=md, ap=ap).set(len(client_table)) | |
for c in client_table: | |
# XXX: just select a few clients to not clutter prometheus... (and respect privacy) | |
if "aruba_sample_clients" in config: | |
if c in config["aruba_sample_clients"]: | |
if "ACK_SNR" in client_table[c]: | |
Aruba_client_status.labels(client=c, status="ACK_SNR").set( | |
client_table[c]["ACK_SNR"] | |
) | |
if "ps_qlen" in client_table[c]: | |
Aruba_client_status.labels(client=c, status="ps_qlen").set( | |
client_table[c]["ps_qlen"] | |
) | |
if "tx_retries" in client_table[c]: | |
Aruba_client_status.labels( | |
client=c, status="tx_retries" | |
).set(client_table[c]["tx_retries"]) | |
if "health" in client_table[c]: | |
Aruba_client_status.labels(client=c, status="health1").set( | |
client_table[c]["health"].split("/")[0] | |
) | |
Aruba_client_status.labels(client=c, status="health2").set( | |
client_table[c]["health"].split("/")[1] | |
) | |
else: | |
Aruba_clients_connected.labels(md=md, ap=ap).set(0) | |
intermediate = collect_stats_per_ap("get_ap_client_table", ap, intermediate) | |
ap_chan = [] | |
ret = ap_detail(md, ap) | |
if ret: | |
ap_chan = [ret["channel0"], ret["channel1"]] | |
Aruba_AP_Channel.labels(md=md, ap=ap, radio=0).set(ret["channel0"]) | |
Aruba_AP_Channel.labels(md=md, ap=ap, radio=1).set(ret["channel1"]) | |
Aruba_AP_bootstraps.labels(md=md, ap=ap).set(ret["bootstraps"]) | |
Aruba_AP_reboots.labels(md=md, ap=ap).set(ret["reboots"]) | |
for s in ret["messages"]["KEEPALIVE"]: | |
Aruba_AP_keepalive.labels(md=md, ap=ap, status=s).set( | |
ret["messages"]["KEEPALIVE"][s] | |
) | |
for s in ret["messages"]["PWR_EVENT_UPDATE"]: | |
Aruba_AP_power_update.labels(ap=ap, status=s).set( | |
ret["messages"]["PWR_EVENT_UPDATE"][s] | |
) | |
intermediate = collect_stats_per_ap("get_ap_detail", ap, intermediate) | |
ret = ap_debug_counters(md, ap) | |
if ret: | |
Aruba_AP_config.labels(md=md, ap=ap, status="ACK").set(ret["configs_ack"]) | |
Aruba_AP_config.labels(md=md, ap=ap, status="sent").set(ret["configs_sent"]) | |
Aruba_AP_config.labels(md=md, ap=ap, status="bootstraps").set( | |
ret["bootstraps"] | |
) | |
Aruba_AP_config.labels(md=md, ap=ap, status="bootstraps_total").set( | |
ret["bootstraps_total"] | |
) | |
if ret["crash"] == "N": | |
Aruba_AP_crash.labels(md=md, ap=ap).set(0) | |
else: | |
Aruba_AP_crash.labels(md=md, ap=ap).set(1) | |
intermediate = collect_stats_per_ap("get_ap_debug_counters", ap, intermediate) | |
ret = ap_ip_health(md, ap) | |
if ret: | |
for k in ret: | |
Aruba_AP_health.labels(md=md, ap=ap, status=k).set(ret[k]) | |
intermediate = collect_stats_per_ap("get_ap_ip_health", ap, intermediate) | |
ret = ap_rf_verbose(md, ap) | |
if ret: | |
for c in ret["channels"]: | |
for s in ret["channels"][c]: | |
if c in ap_chan: | |
in_service = "y" | |
else: | |
in_service = "n" | |
_chan = ret["channels"][c][s] | |
_c = ap_chan_re.match(_chan) | |
if _c: | |
_chan = _c.group(1) | |
Aruba_AP_channel_status.labels( | |
ap=ap, channel=c, status=s, service=in_service | |
).set(_chan) | |
for radio in [0, 1]: | |
ret = ap_debug_radio_stats(md, ap, radio) | |
if ret: | |
for s in config["aruba_radio_fields"]: | |
if s in ret: | |
Aruba_AP_radio_status.labels(ap=ap, radio=radio, status=s).set( | |
ret[s] | |
) | |
else: | |
logger.debug( | |
'Ignoring missing "%s" in AP %s radio stats for radio %d' | |
% (s, ap, radio) | |
) | |
# FIXME: if AP555, we might also want to look at radio 2... | |
intermediate = collect_stats_per_ap( | |
"get_ap_rf_verbose_radio_%d" % radio, ap, intermediate | |
) | |
# for c in config['aruba_sample_clients']: | |
# data = showcli(mm,'show ap virtual-beacon-report client-mac %s' % c) | |
# if data: | |
# if 'Consecutive (Fails/BTM Rej/BTM Timeouts) ' in data: | |
# pp.pprint(data['Consecutive (Fails/BTM Rej/BTM Timeouts) ']) | |
# else: | |
# logger.debug('No Consecutive (Fails/BTM Rej/BTM Timeouts) in data for client %s' % c) | |
# pp.pprint(data) | |
# else: | |
# logger.error('virtual AP beacon report for %s does not contain fails etc.' % c) | |
# pp.pprint(data) | |
logger.debug("Time spent: %d" % (time.time() - start_time)) | |
Aruba_collect.observe(time.time() - start_time) | |
class ArubaGatherer(Thread): | |
"""Periodically retrieve data from Aruba in a separate thread, | |
""" | |
def __init__(self): | |
Thread.__init__(self) | |
self.name = "ArubaGatherer" | |
def run(self): | |
logger.debug("Starting Aruba data gather thread") | |
while True: | |
try: | |
logger.debug("Running check_aruba in thread") | |
check_aruba() | |
logger.debug("Done: Running check_aruba in thread") | |
except Exception: | |
# Ignore failures, we will try again after refresh_interval. | |
# Most of them are termporary ie. connectivity problmes | |
logger.error("Error getting stats", exc_info=True) | |
logger.debug("Sleeping in Aruba thread for %d s" % args.delay) | |
time.sleep(args.delay) | |
if __name__ == "__main__": | |
logger.debug("Starting Aruba gatherer thread") | |
aruba_gatherer = ArubaGatherer() | |
aruba_gatherer.start() | |
# ...and now serve the registry contents so that we can consume it.. | |
if os.environ.get("LISTEN_PID", None) == str(os.getpid()): | |
# systemd socket activation will need that httpd is waiting for socket | |
# to be passed - while collection still updates in the background | |
# inherit the socket | |
logger.debug( | |
"Starting systemd socket activation http server on %d" % args.listen_port | |
) | |
CustomMetricsHandler = MetricsHandler.factory(REGISTRY) | |
server_args = [("localhost", args.listen_port), CustomMetricsHandler] | |
httpd = SocketInheritingHTTPServer(*server_args, fd=SYSTEMD_FIRST_SOCKET_FD) | |
logging.info( | |
"aruba_exporter started for socket activation on fd %s" | |
% (SYSTEMD_FIRST_SOCKET_FD,) | |
) | |
try: | |
logging.info( | |
"aruba_exporter httpd running on socket fd %s" | |
% (SYSTEMD_FIRST_SOCKET_FD,) | |
) | |
httpd.serve_forever() | |
except KeyboardInterrupt: | |
httpd.socket.close() | |
else: | |
# start the server normally | |
# Start up the server to expose the metrics. | |
logger.debug("Starting http server on %d" % args.listen_port) | |
start_http_server(args.listen_port) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment