Last active
November 2, 2023 07:13
-
-
Save supernlogn/018df045846ae72815676719bc02f03a to your computer and use it in GitHub Desktop.
An example of logging nvidia gpu power consumption using python.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Descr: | |
A how-to log power consumption of nvidia gpus in a system using python. | |
After obtaining results, user can plot them using matplotlib. | |
author: | |
Ioannis Athanasiadis (supernlogn) | |
""" | |
import numpy as np | |
from matplotlib import pyplot as plt | |
import subprocess | |
import time, datetime | |
import re | |
def read_nvidia_power(timeDuration, timeStep=1, numGPUs=2, logFile=""): | |
""" | |
Descr: | |
Function used to return nvidia times per timeStep(default = 1). | |
Args: | |
timeDuration: duration in seconds. | |
timeStep: the measurement time step in seconds (integer only > 1). | |
numGPUs: number of nvidia GPUs in the system. | |
logFile: file path to write results live. | |
Returns: | |
A numpy array of 1+numGPUs collumns | |
1-st collumn are the timestamps of the measurements. | |
2-nd collumn are the power measurements in Watts of the 1st GPU. | |
3-rd collumn are the power measurements in Watts of the 2nd GPU. | |
... | |
(numGPUs+1)-th collumn are the power measurements in Watts of the (numGPUs)-th GPU. | |
""" | |
time_start = time.time() | |
timeStep = int(timeStep) | |
p = subprocess.Popen("nvidia-smi -l " + str(timeStep), shell=True, stdout=subprocess.PIPE) | |
pattern = re.compile("[0-9]*?[W]{1,1}[\s]{1,1}[/]{1,1}[\s]{1,1}[0-9]*?[W]{1,1}") | |
if( logFile != "" ): | |
output_f = open(logFile, "w") | |
else: | |
output_f = None | |
measurements = [] | |
i = 0 | |
with p.stdout as f: | |
while( time.time() - time_start < timeDuration ): | |
time.sleep(1) | |
s = "123123123123123" | |
while( len(s) >= 10 and time.time() - time_start < timeDuration ): | |
s = f.readline().decode() # for python3+ | |
# s = f.readline() # for python2 | |
m = re.findall(pattern, s) | |
if( len(m) != 0 ): | |
measurement = float(m[0].partition('W /')[0]) | |
if( i == 0 ): | |
measurements.append([datetime.datetime.now(), measurement]) | |
elif( i == numGPUs-1 and output_f != None): | |
measurements[-1].append(measurement) | |
output_f.write(str(measurements[-1]) +"\n") | |
else: | |
measurements[-1].append(measurement) | |
i = (i+1)%numGPUs | |
# if the last row of measurements is incomplete, then use -1.0 to fill it | |
if( not len(measurements) < 2 ): | |
while( len(measurements[-1]) != len(measurements[-2]) ): | |
measurements[-1].append(-1.0) | |
p.kill() | |
if( output_f != None ): | |
output_f.close() | |
return np.array(measurements) | |
if __name__ == "__main__": | |
measurements = read_nvidia_power(20, logFile="nv-power.txt") | |
plt.plot(measurements[0,:], measurements[1,:]) | |
plt.show() |
If anybody is still finding this from searches, nvidia-smi
also supports streaming CSV output every N milliseconds:
$ nvidia-smi -lms 10 --query-gpu=pstate,power.management,power.draw,power.draw.average,power.draw.instant,power.limit,power.default_limit,power.min_limit,power.max_limit,temperature.gpu,temperature.memory,memory.used,memory.total,memory.free,clocks.current.sm,clocks.current.memory --format=csv
pstate, power.management, power.draw [W], power.draw.average [W], power.draw.instant [W], power.limit [W], power.default_limit [W], power.min_limit [W], power.max_limit [W], temperature.gpu, temperature.memory, memory.used [MiB], memory.total [MiB], memory.free [MiB], clocks.current.sm [MHz], clocks.current.memory [MHz]
P0, Enabled, 216.48 W, 216.48 W, 213.25 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 89, 96, 78751 MiB, 81559 MiB, 2479 MiB, 585 MHz, 1593 MHz
P0, Enabled, 216.48 W, 216.48 W, 213.25 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 89, 94, 78751 MiB, 81559 MiB, 2479 MiB, 615 MHz, 1593 MHz
P0, Enabled, 216.48 W, 216.48 W, 213.25 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 94, 78751 MiB, 81559 MiB, 2479 MiB, 630 MHz, 1593 MHz
P0, Enabled, 215.27 W, 215.27 W, 208.87 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 96, 78751 MiB, 81559 MiB, 2479 MiB, 630 MHz, 1593 MHz
P0, Enabled, 215.27 W, 215.27 W, 208.87 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 89, 94, 78751 MiB, 81559 MiB, 2479 MiB, 660 MHz, 1593 MHz
P0, Enabled, 215.27 W, 215.27 W, 208.87 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 89, 96, 78751 MiB, 81559 MiB, 2479 MiB, 645 MHz, 1593 MHz
P0, Enabled, 215.27 W, 215.27 W, 208.87 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 94, 78751 MiB, 81559 MiB, 2479 MiB, 660 MHz, 1593 MHz
P0, Enabled, 215.27 W, 215.27 W, 208.87 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 89, 93, 78751 MiB, 81559 MiB, 2479 MiB, 585 MHz, 1593 MHz
P0, Enabled, 214.90 W, 214.90 W, 209.45 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 89, 94, 78751 MiB, 81559 MiB, 2479 MiB, 510 MHz, 1593 MHz
P0, Enabled, 214.90 W, 214.90 W, 209.45 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 94, 78751 MiB, 81559 MiB, 2479 MiB, 525 MHz, 1593 MHz
P0, Enabled, 214.90 W, 214.90 W, 209.45 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 96, 78751 MiB, 81559 MiB, 2479 MiB, 540 MHz, 1593 MHz
P0, Enabled, 214.90 W, 214.90 W, 209.45 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 95, 78751 MiB, 81559 MiB, 2479 MiB, 555 MHz, 1593 MHz
P0, Enabled, 214.90 W, 214.90 W, 209.45 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 95, 78751 MiB, 81559 MiB, 2479 MiB, 495 MHz, 1593 MHz
P0, Enabled, 214.31 W, 214.31 W, 190.39 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 94, 78751 MiB, 81559 MiB, 2479 MiB, 495 MHz, 1593 MHz
P0, Enabled, 214.31 W, 214.31 W, 190.39 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 94, 78751 MiB, 81559 MiB, 2479 MiB, 750 MHz, 1593 MHz
P0, Enabled, 214.31 W, 214.31 W, 190.39 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 95, 78751 MiB, 81559 MiB, 2479 MiB, 1125 MHz, 1593 MHz
P0, Enabled, 214.31 W, 214.31 W, 190.39 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 93, 78751 MiB, 81559 MiB, 2479 MiB, 1380 MHz, 1593 MHz
P0, Enabled, 214.31 W, 214.31 W, 190.39 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 94, 78751 MiB, 81559 MiB, 2479 MiB, 1545 MHz, 1593 MHz
P0, Enabled, 206.43 W, 206.43 W, 126.68 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 88, 94, 78751 MiB, 81559 MiB, 2479 MiB, 1650 MHz, 1593 MHz
P0, Enabled, 206.43 W, 206.43 W, 126.68 W, 350.00 W, 310.00 W, 200.00 W, 350.00 W, 89, 93, 78751 MiB, 81559 MiB, 2479 MiB, 1425 MHz, 1593 MHz
All available fields are in nvidia-smi --help-query-gpu
You can also ask for "no units" for easier importing into metrics too:
$ nvidia-smi -lms 10 --query-gpu=pstate,power.management,power.draw,power.draw.average,power.draw.instant,power.limit,power.default_limit,power.min_limit,power.max_limit,temperature.gpu,temperature.memory,memory.used,memory.total,memory.free,clocks.current.sm,clocks.current.memory --format=csv,nounits
pstate, power.management, power.draw [W], power.draw.average [W], power.draw.instant [W], power.limit [W], power.default_limit [W], power.min_limit [W], power.max_limit [W], temperature.gpu, temperature.memory, memory.used [MiB], memory.total [MiB], memory.free [MiB], clocks.current.sm [MHz], clocks.current.memory [MHz]
P0, Enabled, 187.70, 187.70, 127.95, 350.00, 310.00, 200.00, 350.00, 88, 94, 78751, 81559, 2479, 975, 1593
P0, Enabled, 187.70, 187.70, 127.95, 350.00, 310.00, 200.00, 350.00, 88, 93, 78751, 81559, 2479, 1275, 1593
P0, Enabled, 187.70, 187.70, 127.95, 350.00, 310.00, 200.00, 350.00, 88, 93, 78751, 81559, 2479, 1515, 1593
P0, Enabled, 187.70, 187.70, 127.95, 350.00, 310.00, 200.00, 350.00, 88, 93, 78751, 81559, 2479, 1635, 1593
P0, Enabled, 187.70, 187.70, 127.95, 350.00, 310.00, 200.00, 350.00, 88, 93, 78751, 81559, 2479, 1695, 1593
P0, Enabled, 196.40, 196.40, 271.26, 350.00, 310.00, 200.00, 350.00, 89, 93, 78751, 81559, 2479, 1215, 1593
P0, Enabled, 196.40, 196.40, 271.26, 350.00, 310.00, 200.00, 350.00, 90, 93, 78751, 81559, 2479, 945, 1593
P0, Enabled, 196.40, 196.40, 271.26, 350.00, 310.00, 200.00, 350.00, 89, 93, 78751, 81559, 2479, 600, 1593
P0, Enabled, 196.40, 196.40, 271.26, 350.00, 310.00, 200.00, 350.00, 89, 93, 78751, 81559, 2479, 600, 1593
P0, Enabled, 196.40, 196.40, 271.26, 350.00, 310.00, 200.00, 350.00, 89, 95, 78751, 81559, 2479, 600, 1593
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
this script was based on python2.
Try the updated one.
I changed line #45 to decode the subprocess output.
Tell me if you still have issues.