-
-
Save leuc/e45f4dc64dc1db870e4bad1c436228bb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3 | |
# | |
# SPDX-License-Identifier: GPL-3.0-or-later | |
# | |
# amdgpu_metrics.py decode amdgpu metrics from sysfs | |
# Copyright (C) 2021 leuc | |
# | |
# This program is free software: you can redistribute it and/or modify it under the | |
# terms of the GNU Affero General Public License as published by the Free Software | |
# Foundation, either version 3 of the License, or (at your option) any later | |
# version. | |
# | |
# This program is distributed in the hope that it will be useful, but WITHOUT ANY | |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A | |
# PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. | |
# | |
# You should have received a copy of the GNU Affero General Public License along | |
# with this program. If not, see <https://www.gnu.org/licenses/>. | |
import argparse | |
import ctypes | |
from json import dumps | |
from enum import IntFlag | |
COMMON_HEADER_SIZE = 4 | |
class ThrottleStatus(IntFlag): | |
# linux/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | |
PPT0 = 1 << 0 | |
PPT1 = 1 << 1 | |
PPT2 = 1 << 2 | |
PPT3 = 1 << 3 | |
SPL = 1 << 4 | |
FPPT = 1 << 5 | |
SPPT = 1 << 6 | |
SPPT_APU = 1 << 7 | |
TDC_GFX = 1 << 16 | |
TDC_SOC = 1 << 17 | |
TDC_MEM = 1 << 18 | |
TDC_VDD = 1 << 19 | |
TDC_CVIP = 1 << 20 | |
EDC_CPU = 1 << 21 | |
EDC_GFX = 1 << 22 | |
APCC = 1 << 23 | |
TEMP_GPU = 1 << 32 | |
TEMP_CORE = 1 << 33 | |
TEMP_MEM = 1 << 34 | |
TEMP_EDGE = 1 << 35 | |
TEMP_HOTSPOT = 1 << 36 | |
TEMP_SOC = 1 << 37 | |
TEMP_VR_GFX = 1 << 38 | |
TEMP_VR_SOC = 1 << 39 | |
TEMP_VR_MEM0 = 1 << 40 | |
TEMP_VR_MEM1 = 1 << 41 | |
TEMP_LIQUID0 = 1 << 42 | |
TEMP_LIQUID1 = 1 << 43 | |
VRHOT0 = 1 << 44 | |
VRHOT1 = 1 << 45 | |
PROCHOT_CPU = 1 << 46 | |
PROCHOT_GFX = 1 << 47 | |
PPM = 1 << 56 | |
FIT = 1 << 57 | |
def active(self): | |
members = self.__class__.__members__ | |
return (m for m in members if getattr(self, m)._value_ & self.value != 0) | |
def __iter__(self): | |
return self.active() | |
def __str__(self): | |
return u', '.join(self.active()) | |
class GpuMetrics(ctypes.Structure): | |
def __new__(cls, buf): | |
return cls.from_buffer_copy(buf) | |
def __init__(self, data): | |
pass | |
def __iter__(self): | |
return ((f[0], getattr(self, f[0])) for f in self._fields_) | |
def __str__(self): | |
a = [u'{}: {}'.format(f[0], getattr(self, f[0])) | |
for f in self._fields_] | |
return u'> {}\n'.format(type(self).__name__) + u'\n'.join(a) | |
class MetricsTableHeader(GpuMetrics): | |
_fields_ = [ | |
('structure_size', ctypes.c_uint16), | |
('format_revision', ctypes.c_uint8), | |
('content_revision', ctypes.c_uint8), | |
] | |
# AMD GPU metrics defined in | |
# linux/drivers/gpu/drm/amd/include/kgd_pp_interface.h | |
class GpuMetrics_v1_0(GpuMetrics): | |
_fields_ = [ | |
('system_clock_counter', ctypes.c_uint64), | |
('temperature_edge', ctypes.c_uint16), | |
('temperature_hotspot', ctypes.c_uint16), | |
('temperature_mem', ctypes.c_uint16), | |
('temperature_vrgfx', ctypes.c_uint16), | |
('temperature_vrsoc', ctypes.c_uint16), | |
('temperature_vrmem', ctypes.c_uint16), | |
('average_gfx_activity', ctypes.c_uint16), | |
('average_umc_activity', ctypes.c_uint16), | |
('average_mm_activity', ctypes.c_uint16), | |
('average_socket_power', ctypes.c_uint16), | |
('energy_accumulator', ctypes.c_uint32), | |
('average_gfxclk_frequency', ctypes.c_uint16), | |
('average_socclk_frequency', ctypes.c_uint16), | |
('average_uclk_frequency', ctypes.c_uint16), | |
('average_vclk0_frequency', ctypes.c_uint16), | |
('average_dclk0_frequency', ctypes.c_uint16), | |
('average_vclk1_frequency', ctypes.c_uint16), | |
('average_dclk1_frequency', ctypes.c_uint16), | |
('current_gfxclk', ctypes.c_uint16), | |
('current_socclk', ctypes.c_uint16), | |
('current_uclk', ctypes.c_uint16), | |
('current_vclk0', ctypes.c_uint16), | |
('current_dclk0', ctypes.c_uint16), | |
('current_vclk1', ctypes.c_uint16), | |
('current_dclk1', ctypes.c_uint16), | |
('throttle_status', ctypes.c_uint32), | |
('current_fan_speed', ctypes.c_uint16), | |
('pcie_link_width', ctypes.c_uint8), | |
('pcie_link_speed', ctypes.c_uint8), | |
] | |
class GpuMetrics_v1_1(GpuMetrics): | |
_fields_ = [ | |
('temperature_edge', ctypes.c_uint16), | |
('temperature_hotspot', ctypes.c_uint16), | |
('temperature_mem', ctypes.c_uint16), | |
('temperature_vrgfx', ctypes.c_uint16), | |
('temperature_vrsoc', ctypes.c_uint16), | |
('temperature_vrmem', ctypes.c_uint16), | |
('average_gfx_activity', ctypes.c_uint16), | |
('average_umc_activity', ctypes.c_uint16), | |
('average_mm_activity', ctypes.c_uint16), | |
('average_socket_power', ctypes.c_uint16), | |
('energy_accumulator', ctypes.c_uint64), | |
('system_clock_counter', ctypes.c_uint64), | |
('average_gfxclk_frequency', ctypes.c_uint16), | |
('average_socclk_frequency', ctypes.c_uint16), | |
('average_uclk_frequency', ctypes.c_uint16), | |
('average_vclk0_frequency', ctypes.c_uint16), | |
('average_dclk0_frequency', ctypes.c_uint16), | |
('average_vclk1_frequency', ctypes.c_uint16), | |
('average_dclk1_frequency', ctypes.c_uint16), | |
('current_gfxclk', ctypes.c_uint16), | |
('current_socclk', ctypes.c_uint16), | |
('current_uclk', ctypes.c_uint16), | |
('current_vclk0', ctypes.c_uint16), | |
('current_dclk0', ctypes.c_uint16), | |
('current_vclk1', ctypes.c_uint16), | |
('current_dclk1', ctypes.c_uint16), | |
('throttle_status', ctypes.c_uint32), | |
('current_fan_speed', ctypes.c_uint16), | |
('pcie_link_width', ctypes.c_uint16), | |
('pcie_link_speed', ctypes.c_uint16), | |
('padding', ctypes.c_uint16), | |
('gfx_activity_acc', ctypes.c_uint32), | |
('mem_activity_acc', ctypes.c_uint32), | |
('temperature_hbm', ctypes.c_uint16), | |
] | |
class GpuMetrics_v1_2(GpuMetrics): | |
_fields_ = [ | |
('temperature_edge', ctypes.c_uint16), | |
('temperature_hotspot', ctypes.c_uint16), | |
('temperature_mem', ctypes.c_uint16), | |
('temperature_vrgfx', ctypes.c_uint16), | |
('temperature_vrsoc', ctypes.c_uint16), | |
('temperature_vrmem', ctypes.c_uint16), | |
('average_gfx_activity', ctypes.c_uint16), | |
('average_umc_activity', ctypes.c_uint16), | |
('average_mm_activity', ctypes.c_uint16), | |
('average_socket_power', ctypes.c_uint16), | |
('energy_accumulator', ctypes.c_uint64), | |
('system_clock_counter', ctypes.c_uint64), | |
('average_gfxclk_frequency', ctypes.c_uint16), | |
('average_socclk_frequency', ctypes.c_uint16), | |
('average_uclk_frequency', ctypes.c_uint16), | |
('average_vclk0_frequency', ctypes.c_uint16), | |
('average_dclk0_frequency', ctypes.c_uint16), | |
('average_vclk1_frequency', ctypes.c_uint16), | |
('average_dclk1_frequency', ctypes.c_uint16), | |
('current_gfxclk', ctypes.c_uint16), | |
('current_socclk', ctypes.c_uint16), | |
('current_uclk', ctypes.c_uint16), | |
('current_vclk0', ctypes.c_uint16), | |
('current_dclk0', ctypes.c_uint16), | |
('current_vclk1', ctypes.c_uint16), | |
('current_dclk1', ctypes.c_uint16), | |
('throttle_status', ctypes.c_uint32), | |
('current_fan_speed', ctypes.c_uint16), | |
('pcie_link_width', ctypes.c_uint16), | |
('pcie_link_speed', ctypes.c_uint16), | |
('padding', ctypes.c_uint16), | |
('gfx_activity_acc', ctypes.c_uint32), | |
('mem_activity_acc', ctypes.c_uint32), | |
('temperature_hbm', ctypes.c_uint16), | |
('firmware_timestamp', ctypes.c_uint64), | |
] | |
class GpuMetrics_v1_3(GpuMetrics): | |
_fields_ = [ | |
('temperature_edge', ctypes.c_uint16), | |
('temperature_hotspot', ctypes.c_uint16), | |
('temperature_mem', ctypes.c_uint16), | |
('temperature_vrgfx', ctypes.c_uint16), | |
('temperature_vrsoc', ctypes.c_uint16), | |
('temperature_vrmem', ctypes.c_uint16), | |
('average_gfx_activity', ctypes.c_uint16), | |
('average_umc_activity', ctypes.c_uint16), | |
('average_mm_activity', ctypes.c_uint16), | |
('average_socket_power', ctypes.c_uint16), | |
('energy_accumulator', ctypes.c_uint64), | |
('system_clock_counter', ctypes.c_uint64), | |
('average_gfxclk_frequency', ctypes.c_uint16), | |
('average_socclk_frequency', ctypes.c_uint16), | |
('average_uclk_frequency', ctypes.c_uint16), | |
('average_vclk0_frequency', ctypes.c_uint16), | |
('average_dclk0_frequency', ctypes.c_uint16), | |
('average_vclk1_frequency', ctypes.c_uint16), | |
('average_dclk1_frequency', ctypes.c_uint16), | |
('current_gfxclk', ctypes.c_uint16), | |
('current_socclk', ctypes.c_uint16), | |
('current_uclk', ctypes.c_uint16), | |
('current_vclk0', ctypes.c_uint16), | |
('current_dclk0', ctypes.c_uint16), | |
('current_vclk1', ctypes.c_uint16), | |
('current_dclk1', ctypes.c_uint16), | |
('throttle_status', ctypes.c_uint32), | |
('current_fan_speed', ctypes.c_uint16), | |
('pcie_link_width', ctypes.c_uint16), | |
('pcie_link_speed', ctypes.c_uint16), | |
('padding', ctypes.c_uint16), | |
('gfx_activity_acc', ctypes.c_uint32), | |
('mem_activity_acc', ctypes.c_uint32), | |
('temperature_hbm', ctypes.c_uint16), | |
('firmware_timestamp', ctypes.c_uint64), | |
('voltage_soc', ctypes.c_uint16), | |
('voltage_gfx', ctypes.c_uint16), | |
('voltage_mem', ctypes.c_uint16), | |
('padding1', ctypes.c_uint8), | |
# FIXME Doesn't match output on 5.15.0-051500rc7-generic | |
# with Navi 10 RX 5600 | |
# ('indep_throttle_status', ctypes.c_uint64), | |
] | |
class GpuMetrics_v2_0(GpuMetrics): | |
_fields_ = [ | |
('system_clock_counter', ctypes.c_uint64), | |
('temperature_gfx', ctypes.c_uint16), | |
('temperature_soc', ctypes.c_uint16), | |
('temperature_core', ctypes.c_uint16), | |
('temperature_l3', ctypes.c_uint16), | |
('average_gfx_activity', ctypes.c_uint16), | |
('average_mm_activity', ctypes.c_uint16), | |
('average_socket_power', ctypes.c_uint16), | |
('average_cpu_power', ctypes.c_uint16), | |
('average_soc_power', ctypes.c_uint16), | |
('average_gfx_power', ctypes.c_uint16), | |
('average_core_power', ctypes.c_uint16), | |
('average_gfxclk_frequency', ctypes.c_uint16), | |
('average_socclk_frequency', ctypes.c_uint16), | |
('average_uclk_frequency', ctypes.c_uint16), | |
('average_fclk_frequency', ctypes.c_uint16), | |
('average_vclk_frequency', ctypes.c_uint16), | |
('average_dclk_frequency', ctypes.c_uint16), | |
('current_gfxclk', ctypes.c_uint16), | |
('current_socclk', ctypes.c_uint16), | |
('current_uclk', ctypes.c_uint16), | |
('current_fclk', ctypes.c_uint16), | |
('current_vclk', ctypes.c_uint16), | |
('current_dclk', ctypes.c_uint16), | |
('current_coreclk', ctypes.c_uint16), | |
('current_l3clk', ctypes.c_uint16), | |
('throttle_status', ctypes.c_uint32), | |
('fan_pwm', ctypes.c_uint16), | |
('padding', ctypes.c_uint16), | |
] | |
class GpuMetrics_v2_1(GpuMetrics): | |
_fields_ = [ | |
('temperature_gfx', ctypes.c_uint16), | |
('temperature_soc', ctypes.c_uint16), | |
('temperature_core', ctypes.c_uint16), | |
('temperature_l3', ctypes.c_uint16), | |
('average_gfx_activity', ctypes.c_uint16), | |
('average_mm_activity', ctypes.c_uint16), | |
('system_clock_counter', ctypes.c_uint64), | |
('average_socket_power', ctypes.c_uint16), | |
('average_cpu_power', ctypes.c_uint16), | |
('average_soc_power', ctypes.c_uint16), | |
('average_gfx_power', ctypes.c_uint16), | |
('average_core_power', ctypes.c_uint16), | |
('average_gfxclk_frequency', ctypes.c_uint16), | |
('average_socclk_frequency', ctypes.c_uint16), | |
('average_uclk_frequency', ctypes.c_uint16), | |
('average_fclk_frequency', ctypes.c_uint16), | |
('average_vclk_frequency', ctypes.c_uint16), | |
('average_dclk_frequency', ctypes.c_uint16), | |
('current_gfxclk', ctypes.c_uint16), | |
('current_socclk', ctypes.c_uint16), | |
('current_uclk', ctypes.c_uint16), | |
('current_fclk', ctypes.c_uint16), | |
('current_vclk', ctypes.c_uint16), | |
('current_dclk', ctypes.c_uint16), | |
('current_coreclk', ctypes.c_uint16), | |
('current_l3clk', ctypes.c_uint16), | |
('throttle_status', ctypes.c_uint32), | |
('fan_pwm', ctypes.c_uint16), | |
('padding', ctypes.c_uint16), | |
] | |
class GpuMetrics_v2_2(GpuMetrics): | |
_fields_ = [ | |
('temperature_gfx', ctypes.c_uint16), | |
('temperature_soc', ctypes.c_uint16), | |
('temperature_core', ctypes.c_uint16), | |
('temperature_l3', ctypes.c_uint16), | |
('average_gfx_activity', ctypes.c_uint16), | |
('average_mm_activity', ctypes.c_uint16), | |
('system_clock_counter', ctypes.c_uint64), | |
('average_socket_power', ctypes.c_uint16), | |
('average_socket_power', ctypes.c_uint16), | |
('average_cpu_power', ctypes.c_uint16), | |
('average_soc_power', ctypes.c_uint16), | |
('average_gfx_power', ctypes.c_uint16), | |
('average_core_power', ctypes.c_uint16), | |
('average_gfxclk_frequency', ctypes.c_uint16), | |
('average_socclk_frequency', ctypes.c_uint16), | |
('average_uclk_frequency', ctypes.c_uint16), | |
('average_fclk_frequency', ctypes.c_uint16), | |
('average_vclk_frequency', ctypes.c_uint16), | |
('average_dclk_frequency', ctypes.c_uint16), | |
('current_gfxclk', ctypes.c_uint16), | |
('current_socclk', ctypes.c_uint16), | |
('current_uclk', ctypes.c_uint16), | |
('current_fclk', ctypes.c_uint16), | |
('current_vclk', ctypes.c_uint16), | |
('current_dclk', ctypes.c_uint16), | |
('current_coreclk', ctypes.c_uint16), | |
('current_l3clk', ctypes.c_uint16), | |
('throttle_status', ctypes.c_uint32), | |
('fan_pwm', ctypes.c_uint16), | |
('padding', ctypes.c_uint16), | |
('indep_throttle_status', ctypes.c_uint64), | |
] | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('files', nargs='+', | |
help='Path to gpu_metrics file under /sys') | |
parser.add_argument('-j', '--json', | |
help='Format output as JSON', action="store_true") | |
args = parser.parse_args() | |
for filename in args.files: | |
with open(filename, mode='rb') as fh: | |
header = MetricsTableHeader(fh.read(COMMON_HEADER_SIZE)) | |
assert header.structure_size > 0 | |
buf = fh.read(header.structure_size) | |
assert len(buf) + COMMON_HEADER_SIZE == header.structure_size | |
assert fh.read() == b'' # should be empty | |
if header.format_revision == 1 and header.content_revision == 0: | |
metrics = GpuMetrics_v1_0(buf) | |
elif header.format_revision == 1 and header.content_revision == 1: | |
metrics = GpuMetrics_v1_1(buf) | |
elif header.format_revision == 1 and header.content_revision == 2: | |
metrics = GpuMetrics_v1_2(buf) | |
elif header.format_revision == 1 and header.content_revision == 3: | |
metrics = GpuMetrics_v1_3(buf) | |
elif header.format_revision == 2 and header.content_revision == 0: | |
metrics = GpuMetrics_v2_0(buf) | |
elif header.format_revision == 2 and header.content_revision == 1: | |
metrics = GpuMetrics_v2_1(buf) | |
elif header.format_revision == 2 and header.content_revision == 2: | |
metrics = GpuMetrics_v2_2(buf) | |
else: | |
raise ValueError("Unsupported metrics v{}.{}".format( | |
header.format_revision, header.content_revision)) | |
ts = ThrottleStatus(metrics.throttle_status) | |
if args.json: | |
print(dumps(dict([ | |
("path", filename)] + | |
list(header) + | |
list(metrics) + | |
[('throttle_status_flags', list(ts)) | |
]))) | |
else: | |
print(filename) | |
print(header) | |
print(metrics) | |
print("throttle_status_flags:", ts) |
I suspect the "full" power profiles for 7900 XTX are simply not in the kernel tree ... yet.
in comparison Navi 10 Radeon RX 5600 power profile with kernel v6.0.9
cat /sys/class/drm/card0/device/pp_power_profile_mode
PROFILE_INDEX(NAME) CLOCK_TYPE(NAME) FPS MinFreqType MinActiveFreqType MinActiveFreq BoosterFreqType BoosterFreq PD_Data_limit_c PD_Data_error_coeff PD_Data_error_rate_coeff
0 BOOTUP_DEFAULT*:
0( GFXCLK) 0 5 1 0 4 800 4587520 -65536 0
1( SOCCLK) 0 5 1 0 3 800 1310720 -6553 0
2( MEMLK) 0 5 1 0 4 800 327680 -65536 0
1 3D_FULL_SCREEN :
0( GFXCLK) 0 5 1 0 4 650 3932160 -6553 -65536
1( SOCCLK) 0 5 1 850 4 800 1310720 -6553 0
2( MEMLK) 0 5 4 850 4 800 327680 -65536 0
2 POWER_SAVING :
0( GFXCLK) 0 5 1 0 3 0 5898240 -65536 0
1( SOCCLK) 0 5 1 0 3 0 1310720 -6553 0
2( MEMLK) 0 5 1 0 3 0 1966080 -65536 0
3 VIDEO :
0( GFXCLK) 0 5 1 0 4 500 4587520 -65536 0
1( SOCCLK) 0 5 1 0 4 500 1310720 -6553 0
2( MEMLK) 0 5 1 0 4 500 1966080 -65536 0
4 VR :
0( GFXCLK) 0 5 4 1000 4 800 4587520 -65536 0
1( SOCCLK) 0 5 1 0 4 800 327680 -65536 0
2( MEMLK) 0 5 1 0 4 800 327680 -65536 0
5 COMPUTE :
0( GFXCLK) 0 5 4 1000 3 0 3932160 -65536 -65536
1( SOCCLK) 0 5 4 850 3 0 327680 -65536 -32768
2( MEMLK) 0 5 4 850 3 0 327680 -65536 -32768
6 CUSTOM :
0( GFXCLK) 0 5 1 0 4 800 4587520 -65536 0
1( SOCCLK) 0 5 1 0 3 800 1310720 -6553 0
2( MEMLK) 0 5 1 0 4 800 327680 -65536 0
Sorry, I was trying to save some space and only pasted the one that was in use, here's the full thing:
❯ cat /sys/class/drm/card0/device/pp_power_profile_mode
PROFILE_INDEX(NAME) CLOCK_TYPE(NAME) FPS MinActiveFreqType MinActiveFreq BoosterFreqType BoosterFreq PD_Data_limit_c PD_Data_error_coeff PD_Data_error_rate_coeff
0 BOOTUP_DEFAULT*:
0( GFXCLK) 0 1 0 4 800 4587520 -65536 0
1( FCLK) 0 3 0 1 0 3276800 -65536 -6553
1 3D_FULL_SCREEN :
0( GFXCLK) 0 0 1200 4 650 3932160 -3276 -65536
1( FCLK) 0 3 0 3 0 1310720 -6553 -6553
2 POWER_SAVING :
0( GFXCLK) 0 1 0 3 0 5898240 -65536 0
1( FCLK) 0 1 0 1 0 3407872 -65536 -6553
3 VIDEO :
0( GFXCLK) 0 1 0 4 500 4587520 -65536 0
1( FCLK) 0 1 0 1 0 3473408 -65536 -6553
4 VR :
0( GFXCLK) 0 2 1000 1 0 3276800 0 0
1( FCLK) 0 3 0 3 0 1310720 -6553 -6553
5 COMPUTE :
0( GFXCLK) 0 2 1000 1 0 3932160 0 0
1( FCLK) 0 3 0 3 0 1310720 -6553 -6553
6 CUSTOM :
0( GFXCLK) 0 1 0 4 800 4587520 -65536 0
1( FCLK) 0 3 0 1 0 3276800 -65536 -6553
Not sure if the missing SOCCLK is expected.
It's "good enough for now", so I'll probably just wait for some more driver updates to come down the pipe and see what shakes out.
try manually setting the power profile to 3D_FULL_SCREEN
as root
echo manual > /sys/class/drm/card0/device/power_dpm_force_performance_level
echo "1" > /sys/class/drm/card0/device/pp_power_profile_mode
performance difference might only show with a real benchmark...
Thanks, I used the only benchmark I have installed atm, ffxiv endwalker, and it seemed to bump the score by a little bit, tho it's mostly CPU bottlenecked. The main thing I noticed is the GPU stayed higher clocked and pulled max watts most of the time rather than dropping off as soon as possible. Still goes back to normal during idle / at desktop, so I'll set this as the default for awhile. Thanks !
I still see the ever-changing "throttle_status_flags", like PPT0, PPT1, PPT2, FPPT, TDC_CVIP, but I'm pretty sure those are red herrings and not really indicative of a real problem, or any actual throttling.
temperature_hbm, temperature_core, temperature_l3, average_core_power, current_coreclk, current_l3clk, padding
are arrays.
Therefore, it looks like the data is misaligned.
> MetricsTableHeader
structure_size: 128
format_revision: 2
content_revision: 2
> GpuMetrics_v2_2
temperature_gfx: 3850
temperature_soc: 3925
temperature_core: 3850
temperature_l3: 3975
average_gfx_activity: 3875
average_mm_activity: 5250
system_clock_counter: 17287498960675
average_socket_power: 0
average_socket_power: 0
average_cpu_power: 62303
average_soc_power: 17492
average_gfx_power: 19098
average_core_power: 0
average_gfxclk_frequency: 11
average_socclk_frequency: 6978
average_uclk_frequency: 1744
average_fclk_frequency: 65535
average_vclk_frequency: 0
average_dclk_frequency: 353
current_gfxclk: 0
current_socclk: 8886
current_uclk: 351
current_fclk: 350
current_vclk: 341
current_dclk: 343
current_coreclk: 400
current_l3clk: 400
throttle_status: 104857599
fan_pwm: 400
padding: 65535
indep_throttle_status: 450359988533068176
throttle_status_flags: PPT0, PPT1, PPT2, PPT3, SPL, FPPT, SPPT, SPPT_APU, TDC_GFX, TDC_SOC, TDC_MEM, TDC_VDD, TDC_CVIP, EDC_CPU
V2_2(
gpu_metrics_v2_2 {
common_header: metrics_table_header {
structure_size: 128,
format_revision: 2,
content_revision: 2,
},
temperature_gfx: 4050,
temperature_soc: 3950,
temperature_core: [
3875,
3925,
3900,
4000,
4200,
4000,
5225,
4050,
],
temperature_l3: [
4125,
0,
],
average_gfx_activity: 2,
average_mm_activity: 0,
system_clock_counter: 82267147835201,
average_socket_power: 14,
average_cpu_power: 8300,
average_soc_power: 2587,
average_gfx_power: 65535,
average_core_power: [
0,
396,
0,
403,
380,
339,
4438,
471,
],
average_gfxclk_frequency: 401,
average_socclk_frequency: 401,
average_uclk_frequency: 65535,
average_fclk_frequency: 1599,
average_vclk_frequency: 400,
average_dclk_frequency: 65535,
current_gfxclk: 1900,
current_socclk: 975,
current_uclk: 6,
current_fclk: 1600,
current_vclk: 400,
current_dclk: 400,
current_coreclk: [
0,
3560,
0,
3560,
3560,
3560,
4450,
3560,
],
current_l3clk: [
4450,
0,
],
throttle_status: 0,
fan_pwm: 0,
padding: [
65535,
65535,
65535,
],
indep_throttle_status: 0,
},
)
https://github.com/Umio-Yasuno/libdrm-amdgpu-sys-rs/blob/main/examples/gpu_metrics.rs
@leuc amdgpu_metrics.py
will give partially incorrect results because _pack_ = 1
is not set.
Hi, sorry, I should have provided that bit, kernel is 6.1.2-3-cachyos (the default kernel here uses the BORE scheduler iirc)
The power profile is 0-BOOTUP_DEFAULT, which appears to have the max values the card supports, and looks like:
I've come to the conclusion that the lack of "maximum GO" on the GPU was simply due to specific CPU cores being maxed out, so while stats may have shown "15% CPU load", it was really saying "most of your cores are idle but holy hell one is going to need a medic" . There are some bits of data that seem off, even from the other repo you linked (thanks for that by the way, very good tools!)
I'm still seeing the throttling messages bounce around, but it doesn't appear to actually be throttling the card when forcing it into a high GPU/low CPU load state, so probably just a red herring. Anywho, if there's any other info you want for S&Gs, lemme know. Thanks for the tool ^_^