Created
December 31, 2015 16:58
-
-
Save giovtorres/6581bb529437eed7c64c to your computer and use it in GitHub Desktop.
Push slurm cpu and memory cluster utilization to a carbon (graphite) instance for graphing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# vim: set ts=4 sw=4 et | |
""" | |
send_cluster_util.py - A script that will get CPU and Memory utilization | |
for a SLURM cluster and submit it directly to carbon's | |
pickle listener port. | |
http://graphite.readthedocs.org/en/latest/feeding-carbon.html#the-pickle-protocol | |
""" | |
import pickle | |
import socket | |
import struct | |
import sys | |
import time | |
import pyslurm | |
import hostlist | |
__author__ = "Giovanni Torres" | |
CARBON_SERVER = "127.0.0.1" | |
CARBON_PICKLE_PORT = 2004 | |
DELAY = 30 | |
try: | |
pyslurmnode = pyslurm.node() | |
pyslurmjob = pyslurm.job() | |
except ValueError as e: | |
sys.exit(e) | |
def get_cluster_util(): | |
try: | |
nodes = pyslurmnode.get() | |
jobs = pyslurmjob.get() | |
except ValueError as e: | |
return | |
else: | |
total_cpus_avail = 0 | |
total_cpus_alloc = 0 | |
total_memory_avail = 0 | |
total_memory_alloc = 0 | |
for node in nodes: | |
nodeinfo = nodes.get(node) | |
state = nodeinfo.get("state") | |
alloc_cpus = nodeinfo.get("alloc_cpus") | |
avail_cpus = nodeinfo.get("cpus") | |
alloc_memory = nodeinfo.get("alloc_memory") | |
avail_memory = nodeinfo.get("real_memory") | |
if "ALLOCATED" in state: | |
total_memory_avail += avail_memory | |
total_memory_alloc += avail_memory | |
total_cpus_avail += avail_cpus | |
total_cpus_alloc += alloc_cpus | |
elif "MIXED" in state: | |
total_memory_avail += avail_memory | |
total_memory_alloc += alloc_memory | |
total_cpus_avail += avail_cpus | |
total_cpus_alloc += alloc_cpus | |
elif "IDLE" in state: | |
total_memory_avail += avail_memory | |
total_cpus_avail += avail_cpus | |
total_cpus_alloc += alloc_cpus | |
return total_cpus_avail, total_cpus_alloc, total_memory_avail, total_memory_alloc | |
def run(sock, delay): | |
while True: | |
now = int(time.time()) | |
tuples = ([]) | |
cpus_avail, cpus_alloc, memory_avail, memory_alloc = get_cluster_util() | |
mem_util_percent = memory_alloc * 100 / memory_avail | |
cpu_util_percent = cpus_alloc * 100 / cpus_avail | |
tuples.append(('cluster.slurm_cpu_util.gauge-percent_cpu_util', | |
(now, cpu_util_percent))) | |
tuples.append(('cluster.slurm_mem_util.gauge-percent_mem_util', | |
(now, mem_util_percent))) | |
package = pickle.dumps(tuples, 1) | |
size = struct.pack('!L', len(package)) | |
sock.sendall(size) | |
sock.sendall(package) | |
time.sleep(delay) | |
def main(): | |
sock = socket.socket() | |
try: | |
sock.connect((CARBON_SERVER, CARBON_PICKLE_PORT)) | |
except socket.error: | |
raise SystemExit("Couldn't connect to %s on port %d. Is carbon-cache \ | |
running" % (CARBON_SERVER, CARBON_PICKLE_PORT)) | |
try: | |
run(sock, DELAY) | |
except KeyboardInterrupt: | |
sys.stderr.write("\nExiting on CTRL-c\n") | |
sys.exit(0) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment