Created
February 2, 2011 21:48
-
-
Save agaoglu/808523 to your computer and use it in GitHub Desktop.
Ganglia metric module for nVidia GPU monitoring
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
descriptors = list() | |
def getString(): | |
test_file = "nvidia-smi -q --gpu=0 | tail -23" | |
try: | |
p = os.popen(test_file, 'r') | |
return p.read() | |
except IOError: | |
return "Error" | |
def readl(key): | |
output=getString() | |
splittedoutput=output.split('\n') | |
for line in splittedoutput: | |
line=line.strip() | |
if line.startswith(key): | |
line=line.split(':')[1].strip() | |
if key=='Temperature': | |
return line.split('C')[0].strip() | |
else: | |
return line[:-1] | |
def Gpu_Temp(name): | |
return int(readl('Temperature')) | |
def Fan_Speed(name): | |
return int(readl('Fan Speed')) | |
def Gpu_Util(name): | |
return int(readl('GPU')) | |
def Mem_Util(name): | |
return int(readl('Memory')) | |
def metric_init(params): | |
global descriptors | |
d1 = {'name': 'Gpu_Temperature', | |
'call_back': Gpu_Temp, | |
'time_max': 90, | |
'value_type': 'uint', | |
'units': 'C', | |
'slope': 'both', | |
'format': '%u', | |
'description': 'GPU Temperature', | |
'groups': 'gpu'} | |
d2 = {'name': 'Fan_Speed', | |
'call_back': Fan_Speed, | |
'time_max': 90, | |
'value_type': 'uint', | |
'units': '%', | |
'slope': 'both', | |
'format': '%u', | |
'description': 'Fan Speed', | |
'groups': 'gpu'} | |
d3 = {'name': 'Gpu_Utilization', | |
'call_back': Gpu_Util, | |
'time_max': 90, | |
'value_type': 'uint', | |
'units': '%', | |
'slope': 'both', | |
'format': '%u', | |
'description': 'GPU GPU Utilization', | |
'groups': 'gpu'} | |
d4 = {'name': 'Memory_Utilization', | |
'call_back': Mem_Util, | |
'time_max': 90, | |
'value_type': 'uint', | |
'units': '%', | |
'slope': 'both', | |
'format': '%u', | |
'description': 'GPU Memory Utilization', | |
'groups': 'gpu'} | |
descriptors = [d1,d2,d3,d4] | |
return descriptors | |
def metric_cleanup(): | |
'''Clean up the metric module.''' | |
pass | |
#Testing | |
if __name__ == '__main__': | |
metric_init({}) | |
for d in descriptors: | |
v = d['call_back']('') | |
print 'value for %s is %u' % (d['name'], v) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
modules { | |
module { | |
name = "python_module" | |
path = "modpython.so" | |
params = "/path/to/dir/of/gpuwatch" | |
} | |
} | |
modules { | |
module { | |
name = "gpuwatch" | |
language = "python" | |
} | |
} | |
collection_group { | |
collect_every = 20 | |
time_threshold = 20 | |
metric { | |
name = "Gpu_Temperature" | |
title = "Gpu Temperature" | |
} | |
metric { | |
name = "Fan_Speed" | |
title = "Fan Speed Percentage" | |
} | |
metric { | |
name = "Gpu_Utilization" | |
title = "Gpu Utilization Percentage" | |
} | |
metric { | |
name = "Memory_Utilization" | |
title = "Memory Utilization Percentage" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
see our post for some details. http://goo.gl/g2zpS