-
-
Save DecksLabs/5fc8b5b944c3142fd636940beb59e37a to your computer and use it in GitHub Desktop.
#!/bin/bash | |
set -e -o errexit -o pipefail -o nounset | |
################################### | |
# This script can be used by itself, but it's recommended that you read | |
# a tutorial on Proxmox forum first: https://forum.proxmox.com/threads/hey-proxmox-community-lets-talk-about-resources-isolation.124256/ | |
################################### | |
# Do not modify these variables (set by Proxmox when calling the script) | |
vmId="$1" | |
runPhase="$2" | |
idleVmId="990" | |
idleVm="no" | |
hostAllowedCpus="" | |
qemuAllowedCpus="" | |
vmCpus="" | |
cpuGovernor="" | |
echo "Running $runPhase on VM=$vmId" | |
#!/bin/bash | |
set -e -o errexit -o pipefail -o nounset | |
################################### | |
# This script can be used by itself, but it's recommended that you read | |
# a tutorial on Proxmox forum first: https://forum.proxmox.com/threads/hey-proxmox-community-lets-talk-about-resources-isolation.124256/ | |
################################### | |
# Do not modify these variables (set by Proxmox when calling the script) | |
vmId="$1" | |
runPhase="$2" | |
idleVmId="990" | |
idleVm="no" | |
hostAllowedCpus="" | |
qemuAllowedCpus="" | |
vmCpus="" | |
cpuGovernor="" | |
echo "Running $runPhase on VM=$vmId" | |
# vCPU pinning should be done 1:1 between guest and host, especially on systems using NUMA and/or CCDs. | |
# On 5900x the core config, as seen in lscpu -e, looks like the following: | |
# CCX #0: | |
# - NUMA: node 0 | |
# - CPU: 0-5, 12-17 (SMT threads/host CPU#) | |
# - CORE: 0-5 | |
# CCX #1: | |
# - NUMA: node 1 | |
# - CPU: 6-11, 18-23 | |
# - CORE: 6-11 | |
# "lstopo" shouldn't be used here, as it has a bug when RAM is not NUMA but L3 is: https://github.com/open-mpi/hwloc/issues/430 | |
# | |
# VM should be this can be semi-automated with scripts taking into account NUMA etc, but every system is different | |
# so, it's better to conciously tune it. Some scripts are here: https://github.com/64kramsystem/qemu-pinning#one-vcpus-per-corethread-except-one-core | |
# There are some unexplored ideas also at https://github.com/rokups/rokups.github.io/blob/master/pages/gaming-vm-performance.md | |
# | |
# Useful commands while debugging this code: | |
# List running tasks with their affinity as of now: (the "]" filters out kthreads) | |
# ps -T -e -o psr,pid,ppid,pgid,sid,comm,cmd | grep -P '^\s+(6|7|8|9|10|11|18|19|20|21|22|23)' | grep -v -P '\]$' | sort | cut -c-$COLUMNS | |
# Track cgroups resources usage: systemd-cgtop | |
# See tree of cgroups: systemd-cgls | |
# Gets QEMU parent process PID for the current VM | |
getQemuPID () { | |
local qemuParentPid=$(cat /run/qemu-server/$vmId.pid) | |
if [[ -z $qemuParentPid ]]; then | |
echo "ERROR: failed to get QEMU parent PID for VM=$vmId" | |
return 1 | |
fi | |
echo $qemuParentPid | |
} | |
# Gets the last logical CPU (thread) of the system | |
getLastCpu () { | |
echo $(( $(nproc --all) - 1 )) | |
} | |
# Pin vCPU to a host logic CPU (thread) | |
# The theread SHOULD be a single one, but it can be any taskset list | |
# | |
# Since cgroups v2 (used by Proxmox) does NOT allow moving tasks/thread to | |
# sub-scopes, affinity has to be set per-process with taskset here. | |
# | |
# Params: vCPU# hostThread#orList | |
pinVCpu () { | |
local vCpuNum=$1 | |
local hostThreadNum="$2" | |
local qemuParentPid=$(getQemuPID) | |
local vCpuTaskPid=$(grep "^CPU $vCpuNum/KVM\$" /proc/$qemuParentPid/task/*/comm | cut -d '/' -f5) | |
if [[ -z $vCpuTaskPid ]]; then | |
echo "ERROR: failed to get Task PID for vCPU $vCpuNum" | |
return 1 | |
fi | |
echo "Pinning VM $vmId (PPID=$qemuParentPid) vCPU $vCpuNum (TPID=$vCpuTaskPid) to host thread(s) $hostThreadNum" | |
taskset --cpu-list --pid "$hostThreadNum" $vCpuTaskPid | |
} | |
# Pins all non-vCPU QEMU threads (io, emulator, rcu) to a host logic CPU(s) | |
# There thread SHOULD probably be a list unlike pinVCpu | |
# | |
# Since cgroups v2 (used by Proxmox) does NOT allow moving tasks/thread to | |
# sub-scopes, affinity has to be set per-process with taskset here. | |
# | |
# Params: hostThread#orList | |
pinNonVCpuTasks () { | |
local hostThreadNum="$1" | |
local qemuParentPid=$(getQemuPID) | |
local nonVCpuTaskPids=$(grep -v -P "^CPU \d" /proc/$qemuParentPid/task/*/comm | cut -d '/' -f5) | |
while IFS= read -r tpid; do | |
local taskComm=$(cat /proc/$qemuParentPid/task/$tpid/comm) | |
echo "Pinning VM $vmId (PPID=$qemuParentPid) non-vCPU task \"$taskComm\" (TPID=$tpid) to host thread(s) $hostThreadNum" | |
taskset --cpu-list --pid "$hostThreadNum" $tpid || true | |
done <<< "$nonVCpuTaskPids" | |
} | |
# Kernel threads (so-called "kthreads") aren't grouped under any of the cgroups. Thus | |
# to control their affinity manual pinning is needed. | |
# There are hacky ways to identify kthreads like parsing "ps", but the proper way to | |
# that is to actually check the thread type. All kernel threads are marked with PF_KTHREAD | |
# mask (see https://elixir.bootlin.com/linux/v6.3-rc6/source/include/linux/sched.h#L1740) | |
# | |
# Params: hostThread#orList | |
pinKthreads () { | |
local hostThreadNum="$1" | |
echo "Attempting to pin all kthreads to $hostThreadNum..." | |
local procStat="" | |
local pid="" | |
local comm="" | |
for statFile in /proc/[0-9]*/stat; do | |
# This CAN sometimes fail due to TOC-TOU | |
procStat="" | |
2>/dev/null read -a procStat < $statFile || true | |
if [[ -z "${procStat[0]}" ]]; then continue; fi | |
# Ignore not kthreads | |
flags="${procStat[8]}" | |
if (( ($flags & 0x00200000) != 0x00200000 )); then continue; fi | |
pid="${procStat[0]}" | |
comm="${procStat[1]:1:-1}" | |
# This CAN fail for some kthreads that are needed on specific CPUs | |
if taskset --cpu-list --pid "$hostThreadNum" $pid > /dev/null 2>&1; then | |
echo "Pinned kthread \"$comm\" (PID=$pid) to host thread(s) $hostThreadNum" | |
fi | |
done | |
} | |
# Most IRQs can be moved away from the threads running vCPUs, that can cause jitter | |
# when these are rescheduled. This function is not perfect as it doesn't set a mask | |
# for not-yet-triggered IRQs (/proc/irq/default_smp_affinity). However, this shouldn't | |
# be needed as if the VM isn't started on boot most if not all busy IRQs would have | |
# been triggered by now. | |
# | |
# Params: hostThread#orList | |
pinIrqs () { | |
local hostThreadNum="$1" | |
echo "Pinning IRQs to host thread(s) $hostThreadNum..." | |
for irqAffLst in /proc/irq/*/smp_affinity_list; do | |
local irqNum=$(echo "$irqAffLst" | grep -o -E '[0-9]+') | |
if echo "$hostThreadNum" > $irqAffLst 2> /dev/null; then | |
echo "Pinned IRQ $irqNum to host thread(s) $hostThreadNum" | |
fi | |
done | |
} | |
# Set governor/scaling for a host logic CPU (thread) | |
# Params: hostThread# desiredGovernor | |
setGovernor () { | |
local hostCpu=$1 | |
local reqGov="$2" | |
local curGov=$(cat /sys/devices/system/cpu/cpu$hostCpu/cpufreq/scaling_governor) | |
if [[ -z "$curGov" ]]; then | |
echo "ERROR: failed to query governor for CPU $hostCpu" | |
return 1 | |
fi | |
if [[ "$reqGov" == "$curGov" ]]; then | |
echo "CPU $hostCpu: requested governor $reqGov - it is already set" | |
return | |
fi | |
echo "CPU $hostCpu: changing governor from $curGov to $reqGov" | |
echo "$reqGov" > /sys/devices/system/cpu/cpu$hostCpu/cpufreq/scaling_governor | |
} | |
# Sets governor/scaling on a range of host CPUs (threads). Range is inclusive. | |
# Params: hostThreadFrom# hostThreadTo# desiredGovernor | |
setGovernorRange () { | |
for (( i=$1; i<=$2; i++ )); do | |
setGovernor $i "$3" | |
done | |
} | |
# Resets governor/scaling to default state | |
resetGovernor () { | |
echo "Resetting CPU governor to default" | |
service cpufrequtils restart | |
} | |
# Put host CPU (thread) into offline or online state | |
# Params: hostThread# desiredState{0,1} | |
setCpuState () { | |
local hostCpu=$1 | |
local reqState=$2 | |
local curState=$(cat /sys/devices/system/cpu/cpu$hostCpu/online) | |
if [[ -z "$curState" ]]; then | |
echo "ERROR: failed to online status for CPU $hostCpu" | |
return 1 | |
fi | |
if [[ "$reqState" == "$curState" ]]; then | |
echo "CPU $hostCpu: requested state $reqState - it is already set" | |
return | |
fi | |
echo -n "CPU $hostCpu: changing state from $curState to $reqState... " | |
echo $reqState > /sys/devices/system/cpu/cpu$hostCpu/online | |
if [[ $? -eq 0 ]]; then | |
echo "[OK]" | |
else | |
echo "[FAILED]" | |
return 1 | |
fi | |
} | |
# Put host CPU (thread) range into offline or online state. Range is inclusive. | |
# Params: hostThreadFrom# hostThreadTo# desiredState{0,1} | |
setCpuStateRange () { | |
for (( i=$1; i<=$2; i++ )); do | |
setCpuState $i $3 | |
done | |
} | |
tidyCaches () { | |
echo -n "Tidying caches... " | |
sync | |
echo 3 > /proc/sys/vm/drop_caches | |
echo 1 > /proc/sys/vm/compact_memory | |
echo "[OK]" | |
} | |
# Sets cgroup slice or scope cpu isolation | |
# Params: sliceOrScopeName hostThreadsList (e.g. 11,12,13-19) | |
setCgroupAllowedCpus () { | |
local entity="$1" | |
local allowedCpus="$2" | |
echo "Forcing \"$entity\" cgroup to only use CPU(s) $allowedCpus" | |
systemctl set-property --runtime -- "$entity" "AllowedCPUs=$allowedCpus" | |
} | |
# Sets logical CPUs (threads) which can be used by processes on the host | |
# Params: hostThreadsList (e.g. 11,12,13-19) | |
setHostAllowedCpus () { | |
echo "Setting host userland CPU constrain to $1" | |
setCgroupAllowedCpus "init.scope" "$1" | |
setCgroupAllowedCpus "system.slice" "$1" | |
setCgroupAllowedCpus "user.slice" "$1" | |
} | |
# Sets logical CPUs (threads) which can be QEMU processes | |
# Params: hostThreadsList (e.g. 11,12,13-19 | |
setQemuAllowedCpus () { | |
echo "Setting QEMU CPU default constrain to $1" | |
setCgroupAllowedCpus "qemu.slice" "$1" | |
} | |
# Makes sure that a decoupled slice for some QEMU VMs exist | |
# This will only do something the first time a VM start | |
# Params: <none> | |
ensureQemuDecoupledSlice () { | |
if [[ -d "/sys/fs/cgroup/qemu-decoupled.slice" ]]; then | |
return 0 | |
fi | |
echo "Creating decoupled QEMU cgroup" | |
mkdir /sys/fs/cgroup/qemu-decoupled.slice | |
# The slice itself MUST be allowed to run on ALL CPUs. The reason | |
# for that is we will move vCPUs to an isolated set of cores BUT | |
# put emulator and iothread(s) on the shared CPUs. Since cgroups v2 | |
# doesn't allow a thread/task to be in a different cgroup than the | |
# parent these tasks must stay in the qemu-decoupled.slice but with | |
# different affinity | |
local lastCPU=$(getLastCpu) | |
setCgroupAllowedCpus "qemu-decoupled.slice" "0-$lastCPU" | |
} | |
# Moves the VM to an isolated cgroup, outside of the OS user/system/init groups, as well | |
# as away from the standard qemu.slice used by Proxmox; see systemd-cgls | |
# | |
# All processes from host run under system.slice and user.slice, while all QEMU machines run | |
# under qemu.slice. Proxmox actually hardcodes that slice in their startup code: | |
# https://github.com/proxmox/qemu-server/blob/79f5ca393ab3608ff2e82c929167f079f964a505/PVE/QemuServer.pm#L5892-L5893 | |
# This means that setting "setQemuAllowedCpus" to 1st CCX makes it impossible to pin vCPU | |
# threads to the 2nd CCX (taskset willl fail), as the parent slice where the thread/service is | |
# running will enforce 1st CCX only AllowedCPUs. The only way around this I found is to migrate | |
# the VM scope (each one gets a separate one named <VMID>.scope) to a different scope which isn't | |
# under any of the standard slices. However, this is not supported by systemd, as confirmed by one | |
# of the systemd authors: https://www.spinics.net/lists/systemd-devel/msg04072.html but cgropups can | |
# be used directly (albeit without warranties). | |
# | |
# Params: <none> | |
decoupleQemuVm () { | |
ensureQemuDecoupledSlice | |
local vmScope="/sys/fs/cgroup/qemu-decoupled.slice/$vmId.scope" | |
if [[ ! -d "$vmScope" ]]; then | |
echo "Creating cgroups scope for VMID=$vmId at $vmScope" | |
mkdir "$vmScope" | |
fi | |
local qemuParentPid=$(getQemuPID) | |
echo "Migrating VMID=$vmId PPID=$qemuParentPid to scope $vmScope" | |
echo $qemuParentPid > "$vmScope/cgroup.procs" | |
} | |
# Starts/stops the "idle" windows VM to force very low GPU power states | |
setIdleVm () { | |
echo "Setting idle VM to $1" | |
qm "$1" "$idleVmId" | |
} | |
# Since updates around 2023/03/20-22 GPUs and some other PCIe devices will only work once. | |
# When VM is turned off and on it will just black-screen and the VM never boots. This is a | |
# workaround for that issue. | |
# | |
# Params: <none> | |
resetVmPciDevices () { | |
echo "Resetting VM PCI devices..." | |
local pciAddrFun='' | |
local vmPciDevices=$(grep -E '^hostpci[0-9]+:' "/etc/pve/qemu-server/$vmId.conf" | grep -o -E '[0-9a-f]+:[0-9a-f]+:[0-9a-f]+(\.[0-9]*)?') | |
while IFS= read -r pciAddr; do | |
# Single function (mostly SR-IOV or vGPU) device | |
if echo "$pciAddr" | grep -F '.' > /dev/null; then | |
echo "Removing PCI device function at $pciAddr" | |
echo 1 > "/sys/bus/pci/devices/$pciAddr/remove" || true | |
continue | |
fi | |
# Whole device specified => remove all function | |
for pciAddrFunRm in /sys/bus/pci/devices/$pciAddr.*/remove; do | |
pciAddrFun=$(echo $pciAddrFunRm | grep -o -E '\.[0-9]*') | |
echo "Removing PCI device $pciAddr function $pciAddrFun" | |
echo 1 > "$pciAddrFunRm" || true | |
# This is absolutely required. Attempting to remove one function CAN | |
# remove all of them but it's not instantenous. However, if you hit | |
# such a case and try to manually do /remove on another function while | |
# the first is being removed a "general protection fault" will happen | |
# in the subsequent "pci_stop_and_remove_bus_device_locked()" | |
while [[ -f "$pciAddrFunRm" ]]; do | |
sleep 1 | |
echo "Still waiting for $pciAddrFunRm..." | |
done | |
done | |
done <<< "$vmPciDevices" | |
echo "Re-scanning PCI devices..." | |
echo 1 > /sys/bus/pci/rescan | |
# rescan is asynchronous; if we wanted to be 100% correct here we should wait | |
# for /sys entries to appear, but 2 seconds delay is good enough | |
sleep 2 | |
} | |
parseConfig () { | |
echo "Parsing config" | |
idleVm=`grep 'idleVm=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
idleVmId=${idleVm} | |
echo "idleVm=${idleVm}" | |
miscCpus=`grep 'miscCpus=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
echo "miscCpus=${miscCpus}" | |
vmCpus=`grep 'vmCpus=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
echo "vmCpus=${vmCpus}" | |
cpuGovernor=`grep 'cpuGovernor=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
echo "cpuGovernor=${cpuGovernor}" | |
echo "Finished parsing config" | |
} | |
parseConfig | |
case "$runPhase" in | |
pre-start) | |
# Stop idle VM, drop caches & compact memory for hugepages | |
if [[ ${idleVm} != "no" ]]; then | |
setIdleVm shutdown | |
fi | |
tidyCaches | |
resetVmPciDevices | |
;; | |
# Designate 2nd CCD (core 6-11, thread 6-11+18-23) to the VM and 1st CCD to host/housekeeping stuff | |
# All modifications should be done in post-start as doing them in pre-start will execute them even | |
# if the VM fails to start (and thus post-stop will never be called) | |
post-start) | |
# This will inform cgroups via systemd to not use 2nd CCX, effectively constaining host to 1st CCX. | |
# This isn't perfect as it will not stop kthreads. "cset" used to mostly work for kthreads (except like docker & | |
# ZFS), but it doesn't work with cgroups v2: https://forum.proxmox.com/threads/cset-failing-pve7.95613/ | |
# I have no idea about any alternatives besides CPU hotplug hack (see below) | |
# WARNING: THIS MUST BE DONE BEFORE ANY OTHER PINNING. Manipulating slice/scope CPU lists will reset | |
# any manual pinning due to a systemd bug/design choice: https://github.com/systemd/systemd/issues/23748 | |
# The "setQemuAllowedCpus" will be overwritten for just this VM by "decoupleQemuVm" later. | |
setHostAllowedCpus "${miscCpus}" | |
setQemuAllowedCpus "${miscCpus}" | |
# Forcefully move all tasks (user space & kthreads) off the 2nd CCX by offlining them temporarily | |
echo "Offlining to-be pinned CPUs to move tasks away..." | |
for cpu in ${vmCpus//,/ } | |
do | |
setCpuState ${cpu} 0 | |
done | |
# Move kernel threads & IRQs away from vCPU threads | |
# Doing this when CPUs are offlined makes it easier as | |
# nothing is running on these CPUs actively | |
pinIrqs "${miscCpus}" | |
pinKthreads "${miscCpus}" | |
# Bring second CCX online - nothing should be scheduled on it due to host & QEMU constrains from above | |
echo "Onlineing to-be pinned CPUs..." | |
for cpu in ${vmCpus//,/ } | |
do | |
setCpuState ${cpu} 1 | |
done | |
# Set frequency scaling to performance mode | |
for cpu in ${vmCpus//,/ } | |
do | |
setGovernor ${cpu} ${cpuGovernor} | |
done | |
# Stats generation causes jitter in VR | |
sysctl vm.stat_interval=120 | |
# Migrate this VM to a separate isolation group (TLDR: see systemd-cgls) | |
# An alternative hacky way to do that would be to iterate over all currently running VMs and | |
# taskset their affinity to 1st CCX, but a new VM starting while this one is running will | |
# break this. So, it's better to isolate the whole qemu.slice with exception of this VM. That | |
# requires the VM process to be moved to a non-qemu.slice | |
decoupleQemuVm | |
# Pin vCPUs to correct threads - this is crucial. | |
# Since SMT/HT is enabled and proper SMT is passed to the guest, the vCPUs need to be pinned | |
# to correct host logical CPUs. QEMU assings vCPUs sequntially; i.e. vCPU0 == 1st thread of | |
# first vCPU, vCPU1 == 2nd thread of first vCPU, vCPU3 == 1st thread of second vCPU etc. | |
# In Linux (at least this one according to lscpu -e) CPU0 is a 1st thread of first core, with | |
# CPU12 being the 2nd/SMT thread of first core. For the 2nd CCX it's a 6+18, 7+19, 8+20, etc | |
# mapping. | |
vCpu=0 | |
for cpu in ${vmCpus//,/ } | |
do | |
pinVCpu ${vCpu} ${cpu} | |
((vCpu=vCpu+1)) | |
done | |
# Move all QEMU threads (emulator, iothread) of this VM to 1st CCX. This is pretty dumb. IOThread should | |
# probabably be pinned to a single core, but we're counting on host scheduler being smart. | |
# To do static pinning here QMP needs to be used to query types of threads: | |
# https://wiki.qemu.org/Documentation/QMP | |
pinNonVCpuTasks "${miscCpus}" | |
;; | |
pre-stop) | |
;; | |
post-stop) | |
if ! compgen -G "/run/qemu-server/*.pid" > /dev/null; then | |
echo "No other pinned VM runnig, restoring defaults" | |
lastCpu=$(getLastCpu) | |
# Allow kthreads, IRQs, host & QEMU to use all CPUs again | |
pinKthreads "0-$lastCpu" | |
pinIrqs "0-$lastCpu" | |
setHostAllowedCpus "0-$lastCpu" | |
setQemuAllowedCpus "0-$lastCpu" | |
# Restore default scaling | |
resetGovernor | |
# Restore default virtual mem stats frequency | |
sysctl vm.stat_interval=1 | |
fi | |
# Start idle VM | |
resetVmPciDevices | |
if [[ ${idleVm} != "no" ]]; then | |
setIdleVm start | |
fi | |
;; | |
*) | |
echo "Unknown run phase \"$runPhase\"!" | |
;; | |
esac | |
echo "Finished $runPhase on VM=$vmId" | |
# vCPU pinning should be done 1:1 between guest and host, especially on systems using NUMA and/or CCDs. | |
# On 5900x the core config, as seen in lscpu -e, looks like the following: | |
# CCX #0: | |
# - NUMA: node 0 | |
# - CPU: 0-5, 12-17 (SMT threads/host CPU#) | |
# - CORE: 0-5 | |
# CCX #1: | |
# - NUMA: node 1 | |
# - CPU: 6-11, 18-23 | |
# - CORE: 6-11 | |
# "lstopo" shouldn't be used here, as it has a bug when RAM is not NUMA but L3 is: https://github.com/open-mpi/hwloc/issues/430 | |
# | |
# VM should be this can be semi-automated with scripts taking into account NUMA etc, but every system is different | |
# so, it's better to conciously tune it. Some scripts are here: https://github.com/64kramsystem/qemu-pinning#one-vcpus-per-corethread-except-one-core | |
# There are some unexplored ideas also at https://github.com/rokups/rokups.github.io/blob/master/pages/gaming-vm-performance.md | |
# | |
# Useful commands while debugging this code: | |
# List running tasks with their affinity as of now: (the "]" filters out kthreads) | |
# ps -T -e -o psr,pid,ppid,pgid,sid,comm,cmd | grep -P '^\s+(6|7|8|9|10|11|18|19|20|21|22|23)' | grep -v -P '\]$' | sort | cut -c-$COLUMNS | |
# Track cgroups resources usage: systemd-cgtop | |
# See tree of cgroups: systemd-cgls | |
# Gets QEMU parent process PID for the current VM | |
getQemuPID () { | |
local qemuParentPid=$(cat /run/qemu-server/$vmId.pid) | |
if [[ -z $qemuParentPid ]]; then | |
echo "ERROR: failed to get QEMU parent PID for VM=$vmId" | |
return 1 | |
fi | |
echo $qemuParentPid | |
} | |
# Gets the last logical CPU (thread) of the system | |
getLastCpu () { | |
echo $(( $(nproc --all) - 1 )) | |
} | |
# Pin vCPU to a host logic CPU (thread) | |
# The theread SHOULD be a single one, but it can be any taskset list | |
# | |
# Since cgroups v2 (used by Proxmox) does NOT allow moving tasks/thread to | |
# sub-scopes, affinity has to be set per-process with taskset here. | |
# | |
# Params: vCPU# hostThread#orList | |
pinVCpu () { | |
local vCpuNum=$1 | |
local hostThreadNum="$2" | |
local qemuParentPid=$(getQemuPID) | |
local vCpuTaskPid=$(grep "^CPU $vCpuNum/KVM\$" /proc/$qemuParentPid/task/*/comm | cut -d '/' -f5) | |
if [[ -z $vCpuTaskPid ]]; then | |
echo "ERROR: failed to get Task PID for vCPU $vCpuNum" | |
return 1 | |
fi | |
echo "Pinning VM $vmId (PPID=$qemuParentPid) vCPU $vCpuNum (TPID=$vCpuTaskPid) to host thread(s) $hostThreadNum" | |
taskset --cpu-list --pid "$hostThreadNum" $vCpuTaskPid | |
} | |
# Pins all non-vCPU QEMU threads (io, emulator, rcu) to a host logic CPU(s) | |
# There thread SHOULD probably be a list unlike pinVCpu | |
# | |
# Since cgroups v2 (used by Proxmox) does NOT allow moving tasks/thread to | |
# sub-scopes, affinity has to be set per-process with taskset here. | |
# | |
# Params: hostThread#orList | |
pinNonVCpuTasks () { | |
local hostThreadNum="$1" | |
local qemuParentPid=$(getQemuPID) | |
local nonVCpuTaskPids=$(grep -v -P "^CPU \d" /proc/$qemuParentPid/task/*/comm | cut -d '/' -f5) | |
while IFS= read -r tpid; do | |
local taskComm=$(cat /proc/$qemuParentPid/task/$tpid/comm) | |
echo "Pinning VM $vmId (PPID=$qemuParentPid) non-vCPU task \"$taskComm\" (TPID=$tpid) to host thread(s) $hostThreadNum" | |
taskset --cpu-list --pid "$hostThreadNum" $tpid || true | |
done <<< "$nonVCpuTaskPids" | |
} | |
# Kernel threads (so-called "kthreads") aren't grouped under any of the cgroups. Thus | |
# to control their affinity manual pinning is needed. | |
# There are hacky ways to identify kthreads like parsing "ps", but the proper way to | |
# that is to actually check the thread type. All kernel threads are marked with PF_KTHREAD | |
# mask (see https://elixir.bootlin.com/linux/v6.3-rc6/source/include/linux/sched.h#L1740) | |
# | |
# Params: hostThread#orList | |
pinKthreads () { | |
local hostThreadNum="$1" | |
echo "Attempting to pin all kthreads to $hostThreadNum..." | |
local procStat="" | |
local pid="" | |
local comm="" | |
for statFile in /proc/[0-9]*/stat; do | |
# This CAN sometimes fail due to TOC-TOU | |
procStat="" | |
2>/dev/null read -a procStat < $statFile || true | |
if [[ -z "${procStat[0]}" ]]; then continue; fi | |
# Ignore not kthreads | |
flags="${procStat[8]}" | |
if (( ($flags & 0x00200000) != 0x00200000 )); then continue; fi | |
pid="${procStat[0]}" | |
comm="${procStat[1]:1:-1}" | |
# This CAN fail for some kthreads that are needed on specific CPUs | |
if taskset --cpu-list --pid "$hostThreadNum" $pid > /dev/null 2>&1; then | |
echo "Pinned kthread \"$comm\" (PID=$pid) to host thread(s) $hostThreadNum" | |
fi | |
done | |
} | |
# Most IRQs can be moved away from the threads running vCPUs, that can cause jitter | |
# when these are rescheduled. This function is not perfect as it doesn't set a mask | |
# for not-yet-triggered IRQs (/proc/irq/default_smp_affinity). However, this shouldn't | |
# be needed as if the VM isn't started on boot most if not all busy IRQs would have | |
# been triggered by now. | |
# | |
# Params: hostThread#orList | |
pinIrqs () { | |
local hostThreadNum="$1" | |
echo "Pinning IRQs to host thread(s) $hostThreadNum..." | |
for irqAffLst in /proc/irq/*/smp_affinity_list; do | |
local irqNum=$(echo "$irqAffLst" | grep -o -E '[0-9]+') | |
if echo "$hostThreadNum" > $irqAffLst 2> /dev/null; then | |
echo "Pinned IRQ $irqNum to host thread(s) $hostThreadNum" | |
fi | |
done | |
} | |
# Set governor/scaling for a host logic CPU (thread) | |
# Params: hostThread# desiredGovernor | |
setGovernor () { | |
local hostCpu=$1 | |
local reqGov="$2" | |
local curGov=$(cat /sys/devices/system/cpu/cpu$hostCpu/cpufreq/scaling_governor) | |
if [[ -z "$curGov" ]]; then | |
echo "ERROR: failed to query governor for CPU $hostCpu" | |
return 1 | |
fi | |
if [[ "$reqGov" == "$curGov" ]]; then | |
echo "CPU $hostCpu: requested governor $reqGov - it is already set" | |
return | |
fi | |
echo "CPU $hostCpu: changing governor from $curGov to $reqGov" | |
echo "$reqGov" > /sys/devices/system/cpu/cpu$hostCpu/cpufreq/scaling_governor | |
} | |
# Sets governor/scaling on a range of host CPUs (threads). Range is inclusive. | |
# Params: hostThreadFrom# hostThreadTo# desiredGovernor | |
setGovernorRange () { | |
for (( i=$1; i<=$2; i++ )); do | |
setGovernor $i "$3" | |
done | |
} | |
# Resets governor/scaling to default state | |
resetGovernor () { | |
echo "Resetting CPU governor to default" | |
service cpufrequtils restart | |
} | |
# Put host CPU (thread) into offline or online state | |
# Params: hostThread# desiredState{0,1} | |
setCpuState () { | |
local hostCpu=$1 | |
local reqState=$2 | |
local curState=$(cat /sys/devices/system/cpu/cpu$hostCpu/online) | |
if [[ -z "$curState" ]]; then | |
echo "ERROR: failed to online status for CPU $hostCpu" | |
return 1 | |
fi | |
if [[ "$reqState" == "$curState" ]]; then | |
echo "CPU $hostCpu: requested state $reqState - it is already set" | |
return | |
fi | |
echo -n "CPU $hostCpu: changing state from $curState to $reqState... " | |
echo $reqState > /sys/devices/system/cpu/cpu$hostCpu/online | |
if [[ $? -eq 0 ]]; then | |
echo "[OK]" | |
else | |
echo "[FAILED]" | |
return 1 | |
fi | |
} | |
# Put host CPU (thread) range into offline or online state. Range is inclusive. | |
# Params: hostThreadFrom# hostThreadTo# desiredState{0,1} | |
setCpuStateRange () { | |
for (( i=$1; i<=$2; i++ )); do | |
setCpuState $i $3 | |
done | |
} | |
tidyCaches () { | |
echo -n "Tidying caches... " | |
sync | |
echo 3 > /proc/sys/vm/drop_caches | |
echo 1 > /proc/sys/vm/compact_memory | |
echo "[OK]" | |
} | |
# Sets cgroup slice or scope cpu isolation | |
# Params: sliceOrScopeName hostThreadsList (e.g. 11,12,13-19) | |
setCgroupAllowedCpus () { | |
local entity="$1" | |
local allowedCpus="$2" | |
echo "Forcing \"$entity\" cgroup to only use CPU(s) $allowedCpus" | |
systemctl set-property --runtime -- "$entity" "AllowedCPUs=$allowedCpus" | |
} | |
# Sets logical CPUs (threads) which can be used by processes on the host | |
# Params: hostThreadsList (e.g. 11,12,13-19) | |
setHostAllowedCpus () { | |
echo "Setting host userland CPU constrain to $1" | |
setCgroupAllowedCpus "init.scope" "$1" | |
setCgroupAllowedCpus "system.slice" "$1" | |
setCgroupAllowedCpus "user.slice" "$1" | |
} | |
# Sets logical CPUs (threads) which can be QEMU processes | |
# Params: hostThreadsList (e.g. 11,12,13-19 | |
setQemuAllowedCpus () { | |
echo "Setting QEMU CPU default constrain to $1" | |
setCgroupAllowedCpus "qemu.slice" "$1" | |
} | |
# Makes sure that a decoupled slice for some QEMU VMs exist | |
# This will only do something the first time a VM start | |
# Params: <none> | |
ensureQemuDecoupledSlice () { | |
if [[ -d "/sys/fs/cgroup/qemu-decoupled.slice" ]]; then | |
return 0 | |
fi | |
echo "Creating decoupled QEMU cgroup" | |
mkdir /sys/fs/cgroup/qemu-decoupled.slice | |
# The slice itself MUST be allowed to run on ALL CPUs. The reason | |
# for that is we will move vCPUs to an isolated set of cores BUT | |
# put emulator and iothread(s) on the shared CPUs. Since cgroups v2 | |
# doesn't allow a thread/task to be in a different cgroup than the | |
# parent these tasks must stay in the qemu-decoupled.slice but with | |
# different affinity | |
local lastCPU=$(getLastCpu) | |
setCgroupAllowedCpus "qemu-decoupled.slice" "0-$lastCPU" | |
} | |
# Moves the VM to an isolated cgroup, outside of the OS user/system/init groups, as well | |
# as away from the standard qemu.slice used by Proxmox; see systemd-cgls | |
# | |
# All processes from host run under system.slice and user.slice, while all QEMU machines run | |
# under qemu.slice. Proxmox actually hardcodes that slice in their startup code: | |
# https://github.com/proxmox/qemu-server/blob/79f5ca393ab3608ff2e82c929167f079f964a505/PVE/QemuServer.pm#L5892-L5893 | |
# This means that setting "setQemuAllowedCpus" to 1st CCX makes it impossible to pin vCPU | |
# threads to the 2nd CCX (taskset willl fail), as the parent slice where the thread/service is | |
# running will enforce 1st CCX only AllowedCPUs. The only way around this I found is to migrate | |
# the VM scope (each one gets a separate one named <VMID>.scope) to a different scope which isn't | |
# under any of the standard slices. However, this is not supported by systemd, as confirmed by one | |
# of the systemd authors: https://www.spinics.net/lists/systemd-devel/msg04072.html but cgropups can | |
# be used directly (albeit without warranties). | |
# | |
# Params: <none> | |
decoupleQemuVm () { | |
ensureQemuDecoupledSlice | |
local vmScope="/sys/fs/cgroup/qemu-decoupled.slice/$vmId.scope" | |
if [[ ! -d "$vmScope" ]]; then | |
echo "Creating cgroups scope for VMID=$vmId at $vmScope" | |
mkdir "$vmScope" | |
fi | |
local qemuParentPid=$(getQemuPID) | |
echo "Migrating VMID=$vmId PPID=$qemuParentPid to scope $vmScope" | |
echo $qemuParentPid > "$vmScope/cgroup.procs" | |
} | |
# Starts/stops the "idle" windows VM to force very low GPU power states | |
setIdleVm () { | |
echo "Setting idle VM to $1" | |
qm "$1" "$idleVmId" | |
} | |
# Since updates around 2023/03/20-22 GPUs and some other PCIe devices will only work once. | |
# When VM is turned off and on it will just black-screen and the VM never boots. This is a | |
# workaround for that issue. | |
# | |
# Params: <none> | |
resetVmPciDevices () { | |
echo "Resetting VM PCI devices..." | |
local pciAddrFun='' | |
local vmPciDevices=$(grep -E '^hostpci[0-9]+:' "/etc/pve/qemu-server/$vmId.conf" | grep -o -E '[0-9a-f]+:[0-9a-f]+:[0-9a-f]+(\.[0-9]*)?') | |
while IFS= read -r pciAddr; do | |
# Single function (mostly SR-IOV or vGPU) device | |
if echo "$pciAddr" | grep -F '.' > /dev/null; then | |
echo "Removing PCI device function at $pciAddr" | |
echo 1 > "/sys/bus/pci/devices/$pciAddr/remove" || true | |
continue | |
fi | |
# Whole device specified => remove all function | |
for pciAddrFunRm in /sys/bus/pci/devices/$pciAddr.*/remove; do | |
pciAddrFun=$(echo $pciAddrFunRm | grep -o -E '\.[0-9]*') | |
echo "Removing PCI device $pciAddr function $pciAddrFun" | |
echo 1 > "$pciAddrFunRm" || true | |
# This is absolutely required. Attempting to remove one function CAN | |
# remove all of them but it's not instantenous. However, if you hit | |
# such a case and try to manually do /remove on another function while | |
# the first is being removed a "general protection fault" will happen | |
# in the subsequent "pci_stop_and_remove_bus_device_locked()" | |
while [[ -f "$pciAddrFunRm" ]]; do | |
sleep 1 | |
echo "Still waiting for $pciAddrFunRm..." | |
done | |
done | |
done <<< "$vmPciDevices" | |
echo "Re-scanning PCI devices..." | |
echo 1 > /sys/bus/pci/rescan | |
# rescan is asynchronous; if we wanted to be 100% correct here we should wait | |
# for /sys entries to appear, but 2 seconds delay is good enough | |
sleep 2 | |
} | |
parseConfig () { | |
echo "Parsing config" | |
idleVm=`grep 'idleVm=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
idleVmId=${idleVm} | |
echo "idleVm=${idleVm}" | |
miscCpus=`grep 'miscCpus=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
echo "miscCpus=${miscCpus}" | |
vmCpus=`grep 'vmCpus=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
echo "vmCpus=${vmCpus}" | |
cpuGovernor=`grep 'cpuGovernor=' "/etc/pve/qemu-server/${vmId}.conf" | sed 's/.*=//'` | |
echo "cpuGovernor=${cpuGovernor}" | |
echo "Finished parsing config" | |
} | |
parseConfig | |
case "$runPhase" in | |
pre-start) | |
# Stop idle VM, drop caches & compact memory for hugepages | |
if [[ ${idleVm} != "no" ]]; then | |
setIdleVm shutdown | |
fi | |
tidyCaches | |
resetVmPciDevices | |
;; | |
post-start) | |
# This will inform cgroups via systemd to not use 2nd CCX, effectively constaining host to 1st CCX. | |
# This isn't perfect as it will not stop kthreads. "cset" used to mostly work for kthreads (except like docker & | |
# ZFS), but it doesn't work with cgroups v2: https://forum.proxmox.com/threads/cset-failing-pve7.95613/ | |
# I have no idea about any alternatives besides CPU hotplug hack (see below) | |
# WARNING: THIS MUST BE DONE BEFORE ANY OTHER PINNING. Manipulating slice/scope CPU lists will reset | |
# any manual pinning due to a systemd bug/design choice: https://github.com/systemd/systemd/issues/23748 | |
# The "setQemuAllowedCpus" will be overwritten for just this VM by "decoupleQemuVm" later. | |
setHostAllowedCpus "${miscCpus}" | |
setQemuAllowedCpus "${miscCpus}" | |
# Forcefully move all tasks (user space & kthreads) off the 2nd CCX by offlining them temporarily | |
echo "Offlining to-be pinned CPUs to move tasks away..." | |
for cpu in ${vmCpus//,/ } | |
do | |
setCpuState ${cpu} 0 | |
done | |
# Move kernel threads & IRQs away from vCPU threads | |
# Doing this when CPUs are offlined makes it easier as | |
# nothing is running on these CPUs actively | |
pinIrqs "${miscCpus}" | |
pinKthreads "${miscCpus}" | |
# Bring second CCX online - nothing should be scheduled on it due to host & QEMU constrains from above | |
echo "Onlineing to-be pinned CPUs..." | |
for cpu in ${vmCpus//,/ } | |
do | |
setCpuState ${cpu} 1 | |
done | |
# Set frequency scaling to performance mode | |
for cpu in ${vmCpus//,/ } | |
do | |
setGovernor ${cpu} ${cpuGovernor} | |
done | |
# Stats generation causes jitter in VR | |
sysctl vm.stat_interval=120 | |
# Migrate this VM to a separate isolation group (TLDR: see systemd-cgls) | |
# An alternative hacky way to do that would be to iterate over all currently running VMs and | |
# taskset their affinity to 1st CCX, but a new VM starting while this one is running will | |
# break this. So, it's better to isolate the whole qemu.slice with exception of this VM. That | |
# requires the VM process to be moved to a non-qemu.slice | |
decoupleQemuVm | |
# Pin vCPUs to correct threads - this is crucial. | |
# Since SMT/HT is enabled and proper SMT is passed to the guest, the vCPUs need to be pinned | |
# to correct host logical CPUs. QEMU assings vCPUs sequntially; i.e. vCPU0 == 1st thread of | |
# first vCPU, vCPU1 == 2nd thread of first vCPU, vCPU3 == 1st thread of second vCPU etc. | |
# In Linux (at least this one according to lscpu -e) CPU0 is a 1st thread of first core, with | |
# CPU12 being the 2nd/SMT thread of first core. For the 2nd CCX it's a 6+18, 7+19, 8+20, etc | |
# mapping. | |
vCpu=0 | |
for cpu in ${vmCpus//,/ } | |
do | |
pinVCpu ${vCpu} ${cpu} | |
((vCpu=vCpu+1)) | |
done | |
# Move all QEMU threads (emulator, iothread) of this VM to 1st CCX. This is pretty dumb. IOThread should | |
# probabably be pinned to a single core, but we're counting on host scheduler being smart. | |
# To do static pinning here QMP needs to be used to query types of threads: | |
# https://wiki.qemu.org/Documentation/QMP | |
pinNonVCpuTasks "${miscCpus}" | |
;; | |
pre-stop) | |
;; | |
post-stop) | |
if ! compgen -G "/run/qemu-server/*.pid" > /dev/null; then | |
echo "No other pinned VM runnig, restoring defaults" | |
lastCpu=$(getLastCpu) | |
# Allow kthreads, IRQs, host & QEMU to use all CPUs again | |
pinKthreads "0-$lastCpu" | |
pinIrqs "0-$lastCpu" | |
setHostAllowedCpus "0-$lastCpu" | |
setQemuAllowedCpus "0-$lastCpu" | |
# Restore default scaling | |
resetGovernor | |
# Restore default virtual mem stats frequency | |
sysctl vm.stat_interval=1 | |
fi | |
# Start idle VM | |
resetVmPciDevices | |
if [[ ${idleVm} != "no" ]]; then | |
setIdleVm start | |
fi | |
;; | |
*) | |
echo "Unknown run phase \"$runPhase\"!" | |
;; | |
esac | |
echo "Finished $runPhase on VM=$vmId" | |
Depends what you want to achieve.
If you don't want to deep dive into how it works you basically want to specify
idleVm is a VM that has minimal configuration that runs windows with installed drivers, stripped of pretty much anything and with disabled networking - this minimizes power consumption of GPU which is pretty power hungry when not being handled by a driver.
vmCpus - this specifies cores to which pins vCPUs, requires addtional SMT configuration.
miscCpus - addional cpus that are handling I/O, etc
cpuGovernor - sets governor for vmCpus
example from Ryzen 9 5950X:
idleVm=991 #set it to "no" to disable starting idleVm
vmCpus=8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31
miscCpus=0,1,16,17
cpuGovernor=performance
Tell me what do you want to achieve and I'll try to provide you with configuration needed.
hi ! im just currious ,. this script working with single socket ? i have xeon e5-2696 v3 , i love to tried, i read tutorial from proxmox forum but i not fully understant how it works. i want to achieve with 2 vm gaming and 1 vm for nas , can you provide with this specifi script? with my basic taskset script it always had a problem with stutering.
Depends what you want to achieve. If you don't want to deep dive into how it works you basically want to specify
idleVm is a VM that has minimal configuration that runs windows with installed drivers, stripped of pretty much anything and with disabled networking - this minimizes power consumption of GPU which is pretty power hungry when not being handled by a driver. vmCpus - this specifies cores to which pins vCPUs, requires addtional SMT configuration. miscCpus - addional cpus that are handling I/O, etc cpuGovernor - sets governor for vmCpus
example from Ryzen 9 5950X: idleVm=991 #set it to "no" to disable starting idleVm vmCpus=8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31 miscCpus=0,1,16,17 cpuGovernor=performance
Tell me what do you want to achieve and I'll try to provide you with configuration needed.
Hello,
Actually I migrated my old baremetal system (I7-4790k / Asus ranger) to a new one (i9-9900k / supermicro) and "virtualized" my Windows system by creating a Windows VM and directly passthrough the original SSD/HDD baremetal disks to this VM
I have another VM running a NAS.
So 99% of the time I only have 2 VM running.
Since the migration from baremetal to VM, I have USB and audio issues...
I bought a dedicated PCI-e sound card to passthrough it, did the same with an USB PCI-e
But I still face audio and USB issues...
I have a USB splitter to switch my keyboard between 2 computers, I don't know why but most of the time, when I switch it to the other PC (a laptop) it works, but when I switch back to VM, it is undetected, and I have to unplug/replug USB keyboard to make it works again...
regarding the sound, I have a Logitech G Pro X wireless with its USB dongle, since I migrated to VM, I have little audio artefacts.
I can't use anymore VoiceMeeter as it has random audio drops, no matter what setting I use...
So I tought it may be related to VM performances issues...
I have 2 Nvidia GPU card installed on the motherboard, one is passed trough to Windows, the other one is passed through to the NAS VM...
So to be honest I don't know if this script could help me or not.
Depends what you want to achieve. If you don't want to deep dive into how it works you basically want to specify
idleVm is a VM that has minimal configuration that runs windows with installed drivers, stripped of pretty much anything and with disabled networking - this minimizes power consumption of GPU which is pretty power hungry when not being handled by a driver. vmCpus - this specifies cores to which pins vCPUs, requires addtional SMT configuration. miscCpus - addional cpus that are handling I/O, etc cpuGovernor - sets governor for vmCpus
example from Ryzen 9 5950X: idleVm=991 #set it to "no" to disable starting idleVm vmCpus=8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31 miscCpus=0,1,16,17 cpuGovernor=performance
Tell me what do you want to achieve and I'll try to provide you with configuration needed.Hello,
Actually I migrated my old baremetal system (I7-4790k / Asus ranger) to a new one (i9-9900k / supermicro) and "virtualized" my Windows system by creating a Windows VM and directly passthrough the original SSD/HDD baremetal disks to this VM
I have another VM running a NAS.
So 99% of the time I only have 2 VM running.
Since the migration from baremetal to VM, I have USB and audio issues... I bought a dedicated PCI-e sound card to passthrough it, did the same with an USB PCI-e
But I still face audio and USB issues...
I have a USB splitter to switch my keyboard between 2 computers, I don't know why but most of the time, when I switch it to the other PC (a laptop) it works, but when I switch back to VM, it is undetected, and I have to unplug/replug USB keyboard to make it works again...
regarding the sound, I have a Logitech G Pro X wireless with its USB dongle, since I migrated to VM, I have little audio artefacts. I can't use anymore VoiceMeeter as it has random audio drops, no matter what setting I use...
So I tought it may be related to VM performances issues...
I have 2 Nvidia GPU card installed on the motherboard, one is passed trough to Windows, the other one is passed through to the NAS VM...
So to be honest I don't know if this script could help me or not.
just quick tips ,.maybe it helps, dont use all your core / threads if running multiple VM's
just quick tips ,.maybe it helps, dont use all your core / threads if running multiple VM's
I just reduced Windows VM to 8 cores.
hi ! im just currious ,. this script working with single socket ? i have xeon e5-2696 v3 , i love to tried, i read tutorial from proxmox forum but i not fully understant how it works. i want to achieve with 2 vm gaming and 1 vm for nas , can you provide with this specifi script? with my basic taskset script it always had a problem with stutering.
@flynuxbgz
It should work just fine with your CPU. It doesn't care about sockets. It's even better this way, one less thing to worry about :)
@OrpheeGT
Since I guess you care the most about Windows VM latency, it's the only configuration we're going to modify. Your CPU has 8 cores, 16 threads. Let's assign 4c/8t to it. Leave other VMs to the host scheduler, it can be a little bit overprovisioned, depends on other factors.
I'd suggest following:
Open VM configuration (/etc/pve/qemu-server/<vmid>.conf file and:
- Add script entry to your VMs configuration to call the scritpt - just point the hookscript to wherever you want to keep the script:
hookscript: local:snippets/pinning-hook-generic.sh
- You have single socket, so need to worry about NUMA configuration which is mentioned in the original post:
numa: 0
- Add:
args: -cpu 'host,topoext=on' -smp '8,sockets=1,cores=4,threads=2,maxcpus=8'
- make sure cores is set to
cores: 16
- Add following lines on top:
#idleVm=no
#
#vmCpus=2,10,3,11,4,12,5,13
#
#miscCpus=0,1,8,9
#
#cpuGovernor=performance
What it does it assigns 2,3,4,5 cores with their logical second threads 10,11,12,13 to the VM cores, attaches VM IO/misc threads to cores 0,1, sets cpu governor to performance for those cores. The second most important modificatino is 'args: -cpu...' settings which is responsible for telling qemu and thus guest windows, how to treat mapped cores (as 4 physical cores with 2 threads each). I wouldn't care about the rest of the VMs, they will be fine on their own :)
script also reverts back the settings once you turn off configured VM
Thanks for your help !
I tried your suggestion, but it seems something goes wrong.
At VM start, after a few seconds, CPU is stuck at 50%, and VM freeze on Proxmox Bios boot Logo
# cat 106.conf
#idleVm=no
#
#vmCpus=2,10,3,11,4,12,5,13
#
#miscCpus=0,1,8,9
#
#cpuGovernor=performance
hookscript: local:snippets/pinning-hook-generic.sh
args: -cpu 'host,topoext=on' -smp '8,sockets=1,cores=4,threads=2,maxcpus=8'
agent: 1
balloon: 0
bios: ovmf
boot: order=sata0
cores: 16
cpu: host
efidisk0: local-lvm:vm-106-disk-0,efitype=4m,pre-enrolled-keys=1,size=4M
hostpci0: 0000:02:00,pcie=1
hostpci1: 0000:04:00,pcie=1
hostpci2: 0000:06:00,pcie=1
machine: pc-q35-7.2
memory: 16384
meta: creation-qemu=7.1.0,ctime=1671272507
name: Windows
net0: virtio=XX:XX:XX:XX:XX,bridge=vmbr0,firewall=1
numa: 0
onboot: 1
ostype: win10
sata0: /dev/disk/by-id/ata-SanDisk_xxxxx,size=250059096K
sata1: /dev/disk/by-id/ata-ST4000DM004xxxxx,size=3907018584K
scsihw: virtio-scsi-single
smbios1: uuid=caba2457-4cbc-xxxxx
sockets: 1
tablet: 0
usb0: host=1b1c:1c07
vmgenid: ef2404c1-d939-xxxxx
Edit : Proxmox syslog
Jul 07 00:01:14 proxmox pvedaemon[2755]: <root@pam> update VM 106: -cores 16 -delete vcpus,affinity,cpuunits,cpulimit,cpu -numa 0 -sockets 1
Jul 07 00:01:14 proxmox pvedaemon[2755]: cannot delete 'vcpus' - not set in current configuration!
Jul 07 00:01:14 proxmox pvedaemon[2755]: cannot delete 'affinity' - not set in current configuration!
Jul 07 00:01:14 proxmox pvedaemon[2755]: cannot delete 'cpuunits' - not set in current configuration!
Jul 07 00:01:14 proxmox pvedaemon[2755]: cannot delete 'cpulimit' - not set in current configuration!
Jul 07 00:01:41 proxmox pvedaemon[6493]: start VM 106: UPID:proxmox:0000195D:00012AB0:64A739C5:qmstart:106:root@pam:
Jul 07 00:01:41 proxmox pvedaemon[2753]: <root@pam> starting task UPID:proxmox:0000195D:00012AB0:64A739C5:qmstart:106:root@pam:
Jul 07 00:01:41 proxmox kernel: pinning-hook-ge (6494): drop_caches: 3
Jul 07 00:01:41 proxmox kernel: vfio-pci 0000:02:00.0: vgaarb: changed VGA decodes: olddecodes=none,decodes=io+mem:owns=none
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: Removing from iommu group 21
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.1: Removing from iommu group 22
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: Removing from iommu group 23
Jul 07 00:01:41 proxmox kernel: pci 0000:06:00.0: Removing from iommu group 24
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: [10de:13c0] type 00 class 0x030000
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: reg 0x10: [mem 0x62000000-0x62ffffff]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: reg 0x14: [mem 0x50000000-0x5fffffff 64bit pref]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: reg 0x1c: [mem 0x60000000-0x61ffffff 64bit pref]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: reg 0x24: [io 0x5000-0x507f]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: reg 0x30: [mem 0x63000000-0x6307ffff pref]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: 63.008 Gb/s available PCIe bandwidth, limited by 8.0 GT/s PCIe x8 link at 0000:00:01.1 (capable of 126.016 Gb/s with 8.0 GT/s PCIe x16 link)
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: vgaarb: VGA device added: decodes=io+mem,owns=none,locks=none
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: Adding to iommu group 21
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.1: [10de:0fbb] type 00 class 0x040300
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.1: reg 0x10: [mem 0x63080000-0x63083fff]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.1: Adding to iommu group 22
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: [1102:0012] type 00 class 0x040300
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: reg 0x10: [mem 0x86304000-0x86307fff 64bit]
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: reg 0x18: [mem 0x86300000-0x86303fff 64bit]
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: supports D2
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: Adding to iommu group 23
Jul 07 00:01:41 proxmox kernel: pci 0000:06:00.0: [1912:0014] type 00 class 0x0c0330
Jul 07 00:01:41 proxmox kernel: pci 0000:06:00.0: reg 0x10: [mem 0x86200000-0x86201fff 64bit]
Jul 07 00:01:41 proxmox kernel: pci 0000:06:00.0: PME# supported from D0 D3hot D3cold
Jul 07 00:01:41 proxmox kernel: pci 0000:06:00.0: Adding to iommu group 24
Jul 07 00:01:41 proxmox kernel: pcieport 0000:00:01.1: BAR 15: assigned [mem 0x4008000000-0x401fffffff 64bit pref]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: BAR 1: assigned [mem 0x4010000000-0x401fffffff 64bit pref]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: BAR 3: assigned [mem 0x4008000000-0x4009ffffff 64bit pref]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: BAR 0: assigned [mem 0x50000000-0x50ffffff]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: BAR 6: assigned [mem 0x51000000-0x5107ffff pref]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.1: BAR 0: assigned [mem 0x51080000-0x51083fff]
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.0: BAR 5: assigned [io 0x5000-0x507f]
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: BAR 0: assigned [mem 0x86300000-0x86303fff 64bit]
Jul 07 00:01:41 proxmox kernel: pci 0000:04:00.0: BAR 2: assigned [mem 0x86304000-0x86307fff 64bit]
Jul 07 00:01:41 proxmox kernel: pci 0000:06:00.0: BAR 0: assigned [mem 0x86200000-0x86201fff 64bit]
Jul 07 00:01:41 proxmox kernel: pci 0000:0a:00.0: PCI bridge to [bus 0b]
Jul 07 00:01:41 proxmox kernel: vfio-pci 0000:02:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=none
Jul 07 00:01:41 proxmox kernel: pci 0000:02:00.1: D0 power state depends on 0000:02:00.0
Jul 07 00:01:41 proxmox kernel: xhci_hcd 0000:06:00.0: failed to load firmware renesas_usb_fw.mem, fallback to ROM
Jul 07 00:01:41 proxmox kernel: xhci_hcd 0000:06:00.0: xHCI Host Controller
Jul 07 00:01:41 proxmox kernel: xhci_hcd 0000:06:00.0: new USB bus registered, assigned bus number 3
Jul 07 00:01:41 proxmox kernel: xhci_hcd 0000:06:00.0: hcc params 0x014051cf hci version 0x100 quirks 0x0000001100000410
Jul 07 00:01:41 proxmox kernel: xhci_hcd 0000:06:00.0: xHCI Host Controller
Jul 07 00:01:41 proxmox kernel: xhci_hcd 0000:06:00.0: new USB bus registered, assigned bus number 4
Jul 07 00:01:41 proxmox kernel: xhci_hcd 0000:06:00.0: Host supports USB 3.0 SuperSpeed
Jul 07 00:01:41 proxmox kernel: usb usb3: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 5.15
Jul 07 00:01:41 proxmox kernel: usb usb3: New USB device strings: Mfr=3, Product=2, SerialNumber=1
Jul 07 00:01:41 proxmox kernel: usb usb3: Product: xHCI Host Controller
Jul 07 00:01:41 proxmox kernel: usb usb3: Manufacturer: Linux 5.15.108-1-pve xhci-hcd
Jul 07 00:01:41 proxmox kernel: usb usb3: SerialNumber: 0000:06:00.0
Jul 07 00:01:41 proxmox kernel: hub 3-0:1.0: USB hub found
Jul 07 00:01:41 proxmox kernel: hub 3-0:1.0: 4 ports detected
Jul 07 00:01:41 proxmox kernel: usb usb4: We don't know the algorithms for LPM for this host, disabling LPM.
Jul 07 00:01:41 proxmox kernel: usb usb4: New USB device found, idVendor=1d6b, idProduct=0003, bcdDevice= 5.15
Jul 07 00:01:41 proxmox kernel: usb usb4: New USB device strings: Mfr=3, Product=2, SerialNumber=1
Jul 07 00:01:41 proxmox kernel: usb usb4: Product: xHCI Host Controller
Jul 07 00:01:41 proxmox kernel: usb usb4: Manufacturer: Linux 5.15.108-1-pve xhci-hcd
Jul 07 00:01:41 proxmox kernel: usb usb4: SerialNumber: 0000:06:00.0
Jul 07 00:01:41 proxmox kernel: hub 4-0:1.0: USB hub found
Jul 07 00:01:41 proxmox kernel: hub 4-0:1.0: 4 ports detected
Jul 07 00:01:42 proxmox kernel: usb 3-1: new high-speed USB device number 2 using xhci_hcd
Jul 07 00:01:42 proxmox kernel: usb 3-1: New USB device found, idVendor=2109, idProduct=2815, bcdDevice= 7.04
Jul 07 00:01:42 proxmox kernel: usb 3-1: New USB device strings: Mfr=1, Product=2, SerialNumber=0
Jul 07 00:01:42 proxmox kernel: usb 3-1: Product: USB2.0 Hub
Jul 07 00:01:42 proxmox kernel: usb 3-1: Manufacturer: VIA Labs, Inc.
Jul 07 00:01:42 proxmox kernel: hub 3-1:1.0: USB hub found
Jul 07 00:01:42 proxmox kernel: hub 3-1:1.0: 4 ports detected
Jul 07 00:01:42 proxmox kernel: usb 4-1: new SuperSpeed USB device number 2 using xhci_hcd
Jul 07 00:01:42 proxmox kernel: usb 4-1: New USB device found, idVendor=2109, idProduct=0815, bcdDevice= 7.04
Jul 07 00:01:42 proxmox kernel: usb 4-1: New USB device strings: Mfr=1, Product=2, SerialNumber=0
Jul 07 00:01:42 proxmox kernel: usb 4-1: Product: USB3.0 Hub
Jul 07 00:01:42 proxmox kernel: usb 4-1: Manufacturer: VIA Labs, Inc.
Jul 07 00:01:42 proxmox kernel: hub 4-1:1.0: USB hub found
Jul 07 00:01:42 proxmox kernel: hub 4-1:1.0: 4 ports detected
Jul 07 00:01:42 proxmox kernel: usb 3-2: new full-speed USB device number 3 using xhci_hcd
Jul 07 00:01:42 proxmox kernel: usb 3-2: New USB device found, idVendor=1b1c, idProduct=1b8b, bcdDevice= 3.20
Jul 07 00:01:42 proxmox kernel: usb 3-2: New USB device strings: Mfr=1, Product=2, SerialNumber=3
Jul 07 00:01:42 proxmox kernel: usb 3-2: Product: CORSAIR SCIMITAR RGB ELITE Gaming Mouse
Jul 07 00:01:42 proxmox kernel: usb 3-2: Manufacturer: Corsair
Jul 07 00:01:42 proxmox kernel: usb 3-2: SerialNumber: 1203701FAF5D1C045DD7433BF5001C05
Jul 07 00:01:42 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000A/input/input15
Jul 07 00:01:42 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Consumer Control as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000A/input/input16
Jul 07 00:01:43 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000A/input/input17
Jul 07 00:01:43 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000A/input/input18
Jul 07 00:01:43 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Keyboard as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000A/input/input19
Jul 07 00:01:43 proxmox kernel: hid-generic 0003:1B1C:1B8B.000A: input,hiddev0,hidraw3: USB HID v1.11 Mouse [Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse] on usb-0000:06:00.0-2/input0
Jul 07 00:01:43 proxmox kernel: hid-generic 0003:1B1C:1B8B.000B: hiddev1,hidraw4: USB HID v1.11 Device [Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse] on usb-0000:06:00.0-2/input1
Jul 07 00:01:43 proxmox systemd-logind[2372]: Watching system buttons on /dev/input/event6 (Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Consumer Control)
Jul 07 00:01:43 proxmox systemd-logind[2372]: Watching system buttons on /dev/input/event9 (Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Keyboard)
Jul 07 00:01:43 proxmox kernel: usb 3-3: new full-speed USB device number 4 using xhci_hcd
Jul 07 00:01:43 proxmox kernel: usb 3-1.2: new full-speed USB device number 5 using xhci_hcd
Jul 07 00:01:43 proxmox kernel: usb 3-1.2: New USB device found, idVendor=046d, idProduct=0aba, bcdDevice= 1.00
Jul 07 00:01:43 proxmox kernel: usb 3-1.2: New USB device strings: Mfr=3, Product=4, SerialNumber=0
Jul 07 00:01:43 proxmox kernel: usb 3-1.2: Product: PRO X Wireless Gaming Headset
Jul 07 00:01:43 proxmox kernel: usb 3-1.2: Manufacturer: Logitech
Jul 07 00:01:43 proxmox kernel: input: Logitech PRO X Wireless Gaming Headset Consumer Control as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-1/3-1.2/3-1.2:1.3/0003:046D:0ABA.000C/input/input20
Jul 07 00:01:43 proxmox kernel: input: Logitech PRO X Wireless Gaming Headset as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-1/3-1.2/3-1.2:1.3/0003:046D:0ABA.000C/input/input22
Jul 07 00:01:43 proxmox kernel: hid-generic 0003:046D:0ABA.000C: input,hiddev2,hidraw5: USB HID v1.11 Device [Logitech PRO X Wireless Gaming Headset] on usb-0000:06:00.0-1.2/input3
Jul 07 00:01:43 proxmox systemd[1]: Reached target Sound Card.
Jul 07 00:01:43 proxmox systemd[2917]: Reached target Sound Card.
Jul 07 00:01:44 proxmox kernel: pinning-hook-ge (6494): drop_caches: 3
Jul 07 00:01:44 proxmox kernel: vfio-pci 0000:02:00.0: vgaarb: changed VGA decodes: olddecodes=none,decodes=io+mem:owns=none
Jul 07 00:01:44 proxmox kernel: pci 0000:02:00.0: Removing from iommu group 21
Jul 07 00:01:44 proxmox kernel: pci 0000:02:00.1: Removing from iommu group 22
Jul 07 00:01:44 proxmox kernel: pci 0000:04:00.0: Removing from iommu group 23
Jul 07 00:01:44 proxmox kernel: xhci_hcd 0000:06:00.0: remove, state 4
Jul 07 00:01:44 proxmox kernel: usb usb4: USB disconnect, device number 1
Jul 07 00:01:44 proxmox kernel: usb 4-1: USB disconnect, device number 2
Jul 07 00:01:44 proxmox kernel: xhci_hcd 0000:06:00.0: USB bus 4 deregistered
Jul 07 00:01:44 proxmox kernel: xhci_hcd 0000:06:00.0: remove, state 1
Jul 07 00:01:44 proxmox kernel: usb usb3: USB disconnect, device number 1
Jul 07 00:01:44 proxmox systemd[2917]: Stopped target Sound Card.
Jul 07 00:01:44 proxmox systemd[1]: Stopped target Sound Card.
Jul 07 00:01:44 proxmox kernel: usb 3-1.4: new full-speed USB device number 6 using xhci_hcd
Jul 07 00:01:44 proxmox kernel: usb 3-1.4: hub failed to enable device, error -108
Jul 07 00:01:44 proxmox kernel: usb 3-1-port4: attempt power cycle
Jul 07 00:01:44 proxmox kernel: usb 3-1-port4: failed to disable port power
Jul 07 00:01:44 proxmox kernel: usb 3-1.2: USB disconnect, device number 5
Jul 07 00:01:45 proxmox kernel: usb 3-1: USB disconnect, device number 2
Jul 07 00:01:45 proxmox kernel: usb 3-2: USB disconnect, device number 3
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: USB bus 3 deregistered
Jul 07 00:01:46 proxmox kernel: pci 0000:06:00.0: Removing from iommu group 24
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: [10de:13c0] type 00 class 0x030000
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: reg 0x10: [mem 0x50000000-0x50ffffff]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: reg 0x14: [mem 0x4010000000-0x401fffffff 64bit pref]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: reg 0x1c: [mem 0x4008000000-0x4009ffffff 64bit pref]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: reg 0x24: [io 0x5000-0x507f]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: reg 0x30: [mem 0x63000000-0x6307ffff pref]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: 63.008 Gb/s available PCIe bandwidth, limited by 8.0 GT/s PCIe x8 link at 0000:00:01.1 (capable of 126.016 Gb/s with 8.0 GT/s PCIe x16 link)
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: vgaarb: VGA device added: decodes=io+mem,owns=none,locks=none
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: Adding to iommu group 21
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.1: [10de:0fbb] type 00 class 0x040300
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.1: reg 0x10: [mem 0x51080000-0x51083fff]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.1: Adding to iommu group 22
Jul 07 00:01:46 proxmox kernel: pci 0000:04:00.0: [1102:0012] type 00 class 0x040300
Jul 07 00:01:46 proxmox kernel: pci 0000:04:00.0: reg 0x10: [mem 0x86300000-0x86303fff 64bit]
Jul 07 00:01:46 proxmox kernel: pci 0000:04:00.0: reg 0x18: [mem 0x86304000-0x86307fff 64bit]
Jul 07 00:01:46 proxmox kernel: pci 0000:04:00.0: supports D2
Jul 07 00:01:46 proxmox kernel: pci 0000:04:00.0: Adding to iommu group 23
Jul 07 00:01:46 proxmox kernel: pci 0000:06:00.0: [1912:0014] type 00 class 0x0c0330
Jul 07 00:01:46 proxmox kernel: pci 0000:06:00.0: reg 0x10: [mem 0x86200000-0x86201fff 64bit]
Jul 07 00:01:46 proxmox kernel: pci 0000:06:00.0: PME# supported from D0 D3hot D3cold
Jul 07 00:01:46 proxmox kernel: pci 0000:06:00.0: Adding to iommu group 24
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: BAR 1: assigned [mem 0x4010000000-0x401fffffff 64bit pref]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: BAR 3: assigned [mem 0x4008000000-0x4009ffffff 64bit pref]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: BAR 0: assigned [mem 0x50000000-0x50ffffff]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: BAR 6: assigned [mem 0x51000000-0x5107ffff pref]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.1: BAR 0: assigned [mem 0x51080000-0x51083fff]
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.0: BAR 5: assigned [io 0x5000-0x507f]
Jul 07 00:01:46 proxmox kernel: pci 0000:04:00.0: BAR 0: assigned [mem 0x86300000-0x86303fff 64bit]
Jul 07 00:01:46 proxmox kernel: pci 0000:04:00.0: BAR 2: assigned [mem 0x86304000-0x86307fff 64bit]
Jul 07 00:01:46 proxmox kernel: pci 0000:06:00.0: BAR 0: assigned [mem 0x86200000-0x86201fff 64bit]
Jul 07 00:01:46 proxmox kernel: pci 0000:0a:00.0: PCI bridge to [bus 0b]
Jul 07 00:01:46 proxmox kernel: vfio-pci 0000:02:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=none
Jul 07 00:01:46 proxmox kernel: pci 0000:02:00.1: D0 power state depends on 0000:02:00.0
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: failed to load firmware renesas_usb_fw.mem, fallback to ROM
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: xHCI Host Controller
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: new USB bus registered, assigned bus number 3
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: hcc params 0x014051cf hci version 0x100 quirks 0x0000001100000410
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: xHCI Host Controller
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: new USB bus registered, assigned bus number 4
Jul 07 00:01:46 proxmox kernel: xhci_hcd 0000:06:00.0: Host supports USB 3.0 SuperSpeed
Jul 07 00:01:46 proxmox kernel: usb usb3: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 5.15
Jul 07 00:01:46 proxmox kernel: usb usb3: New USB device strings: Mfr=3, Product=2, SerialNumber=1
Jul 07 00:01:46 proxmox kernel: usb usb3: Product: xHCI Host Controller
Jul 07 00:01:46 proxmox kernel: usb usb3: Manufacturer: Linux 5.15.108-1-pve xhci-hcd
Jul 07 00:01:46 proxmox kernel: usb usb3: SerialNumber: 0000:06:00.0
Jul 07 00:01:46 proxmox kernel: hub 3-0:1.0: USB hub found
Jul 07 00:01:46 proxmox kernel: hub 3-0:1.0: 4 ports detected
Jul 07 00:01:46 proxmox kernel: usb usb4: We don't know the algorithms for LPM for this host, disabling LPM.
Jul 07 00:01:46 proxmox kernel: usb usb4: New USB device found, idVendor=1d6b, idProduct=0003, bcdDevice= 5.15
Jul 07 00:01:46 proxmox kernel: usb usb4: New USB device strings: Mfr=3, Product=2, SerialNumber=1
Jul 07 00:01:46 proxmox kernel: usb usb4: Product: xHCI Host Controller
Jul 07 00:01:46 proxmox kernel: usb usb4: Manufacturer: Linux 5.15.108-1-pve xhci-hcd
Jul 07 00:01:46 proxmox kernel: usb usb4: SerialNumber: 0000:06:00.0
Jul 07 00:01:46 proxmox kernel: hub 4-0:1.0: USB hub found
Jul 07 00:01:46 proxmox kernel: hub 4-0:1.0: 4 ports detected
Jul 07 00:01:46 proxmox kernel: usb 3-1: new high-speed USB device number 2 using xhci_hcd
Jul 07 00:01:47 proxmox kernel: usb 3-1: New USB device found, idVendor=2109, idProduct=2815, bcdDevice= 7.04
Jul 07 00:01:47 proxmox kernel: usb 3-1: New USB device strings: Mfr=1, Product=2, SerialNumber=0
Jul 07 00:01:47 proxmox kernel: usb 3-1: Product: USB2.0 Hub
Jul 07 00:01:47 proxmox kernel: usb 3-1: Manufacturer: VIA Labs, Inc.
Jul 07 00:01:47 proxmox kernel: hub 3-1:1.0: USB hub found
Jul 07 00:01:47 proxmox kernel: hub 3-1:1.0: 4 ports detected
Jul 07 00:01:47 proxmox kernel: usb 4-1: new SuperSpeed USB device number 2 using xhci_hcd
Jul 07 00:01:47 proxmox kernel: usb 4-1: New USB device found, idVendor=2109, idProduct=0815, bcdDevice= 7.04
Jul 07 00:01:47 proxmox kernel: usb 4-1: New USB device strings: Mfr=1, Product=2, SerialNumber=0
Jul 07 00:01:47 proxmox kernel: usb 4-1: Product: USB3.0 Hub
Jul 07 00:01:47 proxmox kernel: usb 4-1: Manufacturer: VIA Labs, Inc.
Jul 07 00:01:47 proxmox kernel: hub 4-1:1.0: USB hub found
Jul 07 00:01:47 proxmox kernel: hub 4-1:1.0: 4 ports detected
Jul 07 00:01:47 proxmox kernel: usb 3-2: new full-speed USB device number 3 using xhci_hcd
Jul 07 00:01:47 proxmox kernel: usb 3-2: New USB device found, idVendor=1b1c, idProduct=1b8b, bcdDevice= 3.20
Jul 07 00:01:47 proxmox kernel: usb 3-2: New USB device strings: Mfr=1, Product=2, SerialNumber=3
Jul 07 00:01:47 proxmox kernel: usb 3-2: Product: CORSAIR SCIMITAR RGB ELITE Gaming Mouse
Jul 07 00:01:47 proxmox kernel: usb 3-2: Manufacturer: Corsair
Jul 07 00:01:47 proxmox kernel: usb 3-2: SerialNumber: 1203701FAF5D1C045DD7433BF5001C05
Jul 07 00:01:47 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000D/input/input23
Jul 07 00:01:47 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Consumer Control as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000D/input/input24
Jul 07 00:01:47 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000D/input/input25
Jul 07 00:01:47 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000D/input/input26
Jul 07 00:01:47 proxmox kernel: input: Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Keyboard as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-2/3-2:1.0/0003:1B1C:1B8B.000D/input/input27
Jul 07 00:01:47 proxmox kernel: hid-generic 0003:1B1C:1B8B.000D: input,hiddev0,hidraw3: USB HID v1.11 Mouse [Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse] on usb-0000:06:00.0-2/input0
Jul 07 00:01:47 proxmox kernel: hid-generic 0003:1B1C:1B8B.000E: hiddev1,hidraw4: USB HID v1.11 Device [Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse] on usb-0000:06:00.0-2/input1
Jul 07 00:01:47 proxmox systemd-logind[2372]: Watching system buttons on /dev/input/event9 (Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Keyboard)
Jul 07 00:01:47 proxmox systemd-logind[2372]: Watching system buttons on /dev/input/event6 (Corsair CORSAIR SCIMITAR RGB ELITE Gaming Mouse Consumer Control)
Jul 07 00:01:47 proxmox kernel: usb 3-3: new full-speed USB device number 4 using xhci_hcd
Jul 07 00:01:48 proxmox kernel: usb 3-1.2: new full-speed USB device number 5 using xhci_hcd
Jul 07 00:01:48 proxmox kernel: usb 3-1.2: New USB device found, idVendor=046d, idProduct=0aba, bcdDevice= 1.00
Jul 07 00:01:48 proxmox kernel: usb 3-1.2: New USB device strings: Mfr=3, Product=4, SerialNumber=0
Jul 07 00:01:48 proxmox kernel: usb 3-1.2: Product: PRO X Wireless Gaming Headset
Jul 07 00:01:48 proxmox kernel: usb 3-1.2: Manufacturer: Logitech
Jul 07 00:01:48 proxmox kernel: input: Logitech PRO X Wireless Gaming Headset Consumer Control as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-1/3-1.2/3-1.2:1.3/0003:046D:0ABA.000F/input/input28
Jul 07 00:01:48 proxmox kernel: input: Logitech PRO X Wireless Gaming Headset as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-1/3-1.2/3-1.2:1.3/0003:046D:0ABA.000F/input/input30
Jul 07 00:01:48 proxmox kernel: hid-generic 0003:046D:0ABA.000F: input,hiddev2,hidraw5: USB HID v1.11 Device [Logitech PRO X Wireless Gaming Headset] on usb-0000:06:00.0-1.2/input3
Jul 07 00:01:48 proxmox systemd[1]: Reached target Sound Card.
Jul 07 00:01:48 proxmox systemd[2917]: Reached target Sound Card.
Jul 07 00:01:48 proxmox kernel: usb 3-1.4: new full-speed USB device number 6 using xhci_hcd
Jul 07 00:01:48 proxmox kernel: usb 3-1.4: New USB device found, idVendor=1050, idProduct=0407, bcdDevice= 4.35
Jul 07 00:01:48 proxmox kernel: usb 3-1.4: New USB device strings: Mfr=1, Product=2, SerialNumber=0
Jul 07 00:01:48 proxmox kernel: usb 3-1.4: Product: Yubikey 4 OTP+U2F+CCID
Jul 07 00:01:48 proxmox kernel: usb 3-1.4: Manufacturer: Yubico
Jul 07 00:01:48 proxmox kernel: input: Yubico Yubikey 4 OTP+U2F+CCID as /devices/pci0000:00/0000:00:1c.2/0000:06:00.0/usb3/3-1/3-1.4/3-1.4:1.0/0003:1050:0407.0010/input/input31
Jul 07 00:01:48 proxmox kernel: xhci_hcd 0000:06:00.0: remove, state 4
Jul 07 00:01:48 proxmox kernel: usb usb4: USB disconnect, device number 1
Jul 07 00:01:48 proxmox kernel: usb 4-1: USB disconnect, device number 2
Jul 07 00:01:48 proxmox kernel: xhci_hcd 0000:06:00.0: USB bus 4 deregistered
Jul 07 00:01:48 proxmox kernel: xhci_hcd 0000:06:00.0: remove, state 1
Jul 07 00:01:48 proxmox kernel: usb usb3: USB disconnect, device number 1
Jul 07 00:01:48 proxmox systemd[2917]: Stopped target Sound Card.
Jul 07 00:01:48 proxmox systemd[1]: Stopped target Sound Card.
Jul 07 00:01:48 proxmox kernel: hid-generic 0003:1050:0407.0010: input,hidraw6: USB HID v1.10 Keyboard [Yubico Yubikey 4 OTP+U2F+CCID] on usb-0000:06:00.0-1.4/input0
Jul 07 00:01:48 proxmox kernel: usb 3-1.2: USB disconnect, device number 5
Jul 07 00:01:48 proxmox systemd[2917]: Reached target Smart Card.
Jul 07 00:01:49 proxmox kernel: usb 3-1.4: USB disconnect, device number 6
Jul 07 00:01:50 proxmox kernel: usb 3-1: USB disconnect, device number 2
Jul 07 00:01:50 proxmox kernel: usb 3-2: USB disconnect, device number 3
Jul 07 00:01:50 proxmox kernel: xhci_hcd 0000:06:00.0: USB bus 3 deregistered
Jul 07 00:01:51 proxmox systemd[1]: Started 106.scope.
Jul 07 00:01:51 proxmox systemd-udevd[6539]: Using default interface naming scheme 'v247'.
Jul 07 00:01:51 proxmox systemd-udevd[6539]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
Jul 07 00:01:51 proxmox kernel: device tap106i0 entered promiscuous mode
Jul 07 00:01:51 proxmox systemd-udevd[6539]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
Jul 07 00:01:51 proxmox systemd-udevd[6539]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
Jul 07 00:01:51 proxmox systemd-udevd[6518]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
Jul 07 00:01:51 proxmox systemd-udevd[6518]: Using default interface naming scheme 'v247'.
Jul 07 00:01:51 proxmox kernel: vmbr0: port 3(fwpr106p0) entered blocking state
Jul 07 00:01:51 proxmox kernel: vmbr0: port 3(fwpr106p0) entered disabled state
Jul 07 00:01:51 proxmox kernel: device fwpr106p0 entered promiscuous mode
Jul 07 00:01:51 proxmox kernel: vmbr0: port 3(fwpr106p0) entered blocking state
Jul 07 00:01:51 proxmox kernel: vmbr0: port 3(fwpr106p0) entered forwarding state
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 1(fwln106i0) entered blocking state
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 1(fwln106i0) entered disabled state
Jul 07 00:01:51 proxmox kernel: device fwln106i0 entered promiscuous mode
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 1(fwln106i0) entered blocking state
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 1(fwln106i0) entered forwarding state
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 2(tap106i0) entered blocking state
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 2(tap106i0) entered disabled state
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 2(tap106i0) entered blocking state
Jul 07 00:01:51 proxmox kernel: fwbr106i0: port 2(tap106i0) entered forwarding state
Jul 07 00:01:53 proxmox kernel: vfio-pci 0000:02:00.0: vfio_ecap_init: hiding ecap 0x1e@0x258
Jul 07 00:01:53 proxmox kernel: vfio-pci 0000:02:00.0: vfio_ecap_init: hiding ecap 0x19@0x900
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 2 is now offline
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 10 is now offline
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 3 is now offline
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 11 is now offline
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 4 is now offline
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 12 is now offline
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 5 is now offline
Jul 07 00:01:55 proxmox kernel: smpboot: CPU 13 is now offline
Jul 07 00:01:56 proxmox kernel: smpboot: Booting Node 0 Processor 2 APIC 0x4
Jul 07 00:01:56 proxmox kernel: smpboot: Booting Node 0 Processor 10 APIC 0x5
Jul 07 00:01:56 proxmox kernel: smpboot: Booting Node 0 Processor 3 APIC 0x6
Jul 07 00:01:56 proxmox kernel: smpboot: Booting Node 0 Processor 11 APIC 0x7
Jul 07 00:01:56 proxmox kernel: smpboot: Booting Node 0 Processor 4 APIC 0x8
Jul 07 00:01:56 proxmox kernel: smpboot: Booting Node 0 Processor 12 APIC 0x9
Jul 07 00:01:56 proxmox kernel: smpboot: Booting Node 0 Processor 5 APIC 0xa
Jul 07 00:01:57 proxmox kernel: smpboot: Booting Node 0 Processor 13 APIC 0xb
Jul 07 00:01:57 proxmox systemd[1]: 106.scope: Succeeded.
Jul 07 00:01:57 proxmox systemd[1]: 106.scope: Consumed 3.831s CPU time.
Jul 07 00:01:57 proxmox kernel: select_fallback_rq: 9 callbacks suppressed
Jul 07 00:01:57 proxmox kernel: process 6703 (CPU 0/KVM) no longer affine to cpu2
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 2 is now offline
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 10 is now offline
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 3 is now offline
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 11 is now offline
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 4 is now offline
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 12 is now offline
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 5 is now offline
Jul 07 00:01:57 proxmox kernel: smpboot: CPU 13 is now offline
Jul 07 00:01:58 proxmox pvedaemon[2755]: VM 106 qmp command failed - VM 106 qmp command 'guest-ping' failed - got timeout
Jul 07 00:01:58 proxmox kernel: smpboot: Booting Node 0 Processor 2 APIC 0x4
Jul 07 00:01:58 proxmox kernel: smpboot: Booting Node 0 Processor 10 APIC 0x5
Jul 07 00:01:58 proxmox kernel: smpboot: Booting Node 0 Processor 3 APIC 0x6
Jul 07 00:01:58 proxmox kernel: smpboot: Booting Node 0 Processor 11 APIC 0x7
Jul 07 00:01:59 proxmox kernel: smpboot: Booting Node 0 Processor 4 APIC 0x8
Jul 07 00:01:59 proxmox kernel: smpboot: Booting Node 0 Processor 12 APIC 0x9
Jul 07 00:01:59 proxmox kernel: smpboot: Booting Node 0 Processor 5 APIC 0xa
Jul 07 00:01:59 proxmox kernel: smpboot: Booting Node 0 Processor 13 APIC 0xb
Jul 07 00:01:59 proxmox pvedaemon[2753]: <root@pam> end task UPID:proxmox:0000195D:00012AB0:64A739C5:qmstart:106:root@pam: OK
Jul 07 00:02:16 proxmox kernel: usb 1-7: reset full-speed USB device number 2 using xhci_hcd
Jul 07 00:02:18 proxmox pvedaemon[2754]: VM 106 qmp command failed - VM 106 qmp command 'guest-ping' failed - got timeout
Jul 07 00:02:37 proxmox pvedaemon[2753]: VM 106 qmp command failed - VM 106 qmp command 'guest-ping' failed - got timeout
Jul 07 00:03:26 proxmox pvedaemon[2755]: <root@pam> successful auth for user 'root@pam'
Jul 07 00:03:27 proxmox pvedaemon[2754]: VM 106 qmp command failed - VM 106 qmp command 'guest-ping' failed - got timeout
Jul 07 00:03:37 proxmox pvedaemon[8261]: shutdown VM 106: UPID:proxmox:00002045:000157F9:64A73A39:qmshutdown:106:root@pam:
Jul 07 00:03:37 proxmox pvedaemon[2753]: <root@pam> starting task UPID:proxmox:00002045:000157F9:64A73A39:qmshutdown:106:root@pam:
Edit 2 :
Comparing with normal default VM boot :
pvedaemon[4304]: start VM 106: UPID:pve-gen8:000010D0:00005919:64A73C4F:qmstart:106:root@pam:
pvedaemon[2754]: <root@pam> starting task UPID:pve-gen8:000010D0:00005919:64A73C4F:qmstart:106:root@pam:
systemd[1]: Started 106.scope.
systemd-udevd[4320]: Using default interface naming scheme 'v247'.
systemd-udevd[4320]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
kernel: device tap106i0 entered promiscuous mode
systemd-udevd[4323]: Using default interface naming scheme 'v247'.
systemd-udevd[4323]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
systemd-udevd[4323]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
systemd-udevd[4320]: ethtool: autonegotiation is unset or enabled, the speed and duplex are not writable.
kernel: vmbr0: port 3(fwpr106p0) entered blocking state
kernel: vmbr0: port 3(fwpr106p0) entered disabled state
kernel: device fwpr106p0 entered promiscuous mode
kernel: vmbr0: port 3(fwpr106p0) entered blocking state
kernel: vmbr0: port 3(fwpr106p0) entered forwarding state
kernel: fwbr106i0: port 1(fwln106i0) entered blocking state
kernel: fwbr106i0: port 1(fwln106i0) entered disabled state
kernel: device fwln106i0 entered promiscuous mode
kernel: fwbr106i0: port 1(fwln106i0) entered blocking state
kernel: fwbr106i0: port 1(fwln106i0) entered forwarding state
kernel: fwbr106i0: port 2(tap106i0) entered blocking state
kernel: fwbr106i0: port 2(tap106i0) entered disabled state
kernel: fwbr106i0: port 2(tap106i0) entered blocking state
kernel: fwbr106i0: port 2(tap106i0) entered forwarding state
kernel: vfio-pci 0000:02:00.0: vfio_ecap_init: hiding ecap 0x1e@0x258
kernel: vfio-pci 0000:02:00.0: vfio_ecap_init: hiding ecap 0x19@0x900
pvedaemon[2754]: <root@pam> end task UPID:pve-gen8:000010D0:00005919:64A73C4F:qmstart:106:root@pam: OK
kernel: usb 1-7: reset full-speed USB device number 2 using xhci_hcd
It may not like iommu / passthrough
hi ! im just currious ,. this script working with single socket ? i have xeon e5-2696 v3 , i love to tried, i read tutorial from proxmox forum but i not fully understant how it works. i want to achieve with 2 vm gaming and 1 vm for nas , can you provide with this specifi script? with my basic taskset script it always had a problem with stutering.
@flynuxbgz It should work just fine with your CPU. It doesn't care about sockets. It's even better this way, one less thing to worry about :)
@OrpheeGT Since I guess you care the most about Windows VM latency, it's the only configuration we're going to modify. Your CPU has 8 cores, 16 threads. Let's assign 4c/8t to it. Leave other VMs to the host scheduler, it can be a little bit overprovisioned, depends on other factors.
I'd suggest following: Open VM configuration (/etc/pve/qemu-server/.conf file and:
- Add script entry to your VMs configuration to call the scritpt - just point the hookscript to wherever you want to keep the script:
hookscript: local:snippets/pinning-hook-generic.sh
- You have single socket, so need to worry about NUMA configuration which is mentioned in the original post:
numa: 0
- Add:
args: -cpu 'host,topoext=on' -smp '8,sockets=1,cores=4,threads=2,maxcpus=8'
- make sure cores is set to
cores: 16
- Add following lines on top:
#idleVm=no # #vmCpus=2,10,3,11,4,12,5,13 # #miscCpus=0,1,8,9 # #cpuGovernor=performance
What it does it assigns 2,3,4,5 cores with their logical second threads 10,11,12,13 to the VM cores, attaches VM IO/misc threads to cores 0,1, sets cpu governor to performance for those cores. The second most important modificatino is 'args: -cpu...' settings which is responsible for telling qemu and thus guest windows, how to treat mapped cores (as 4 physical cores with 2 threads each). I wouldn't care about the rest of the VMs, they will be fine on their own :)
script also reverts back the settings once you turn off configured VM
i just tried this script , it really great performance and also low latency im testing with dpc latency , but there something wrong i think, it my vm gaming no 2 so stuttering really bad, while vm gaming no 1 great performance , btw im using 2 gpu . vm gaming no 1 ( Rx 580 ) and vm gaming no 2 ( Nvidia Gt 740) , can you suggest script pinningcpu for vm gaming no 2 to fix this stuttering , and thank very much for all you work , really appreciated.
@flynuxbgz Please, share your VMs config files and lscpu -e output, maybe there will be a clue. Also post some context, are there any other VMs running? What is your memory configuration, etc.
Also all the credit goes to kiler129, original author.
Hello nice script but I can't find the right configuration for my case :(
lscpu -e :
CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE MAXMHZ MINMHZ
0 0 0 0 0:0:0:0 yes 3500.0000 1200.0000
1 1 1 1 1:1:1:1 yes 3500.0000 1200.0000
2 0 0 2 2:2:2:0 yes 3500.0000 1200.0000
3 1 1 3 3:3:3:1 yes 3500.0000 1200.0000
4 0 0 4 4:4:4:0 yes 3500.0000 1200.0000
5 1 1 5 5:5:5:1 yes 3500.0000 1200.0000
6 0 0 6 6:6:6:0 yes 3500.0000 1200.0000
7 1 1 7 7:7:7:1 yes 3500.0000 1200.0000
8 0 0 8 8:8:8:0 yes 3500.0000 1200.0000
9 1 1 9 9:9:9:1 yes 3500.0000 1200.0000
10 0 0 10 10:10:10:0 yes 3500.0000 1200.0000
11 1 1 11 11:11:11:1 yes 3500.0000 1200.0000
12 0 0 12 12:12:12:0 yes 3500.0000 1200.0000
13 1 1 13 13:13:13:1 yes 3500.0000 1200.0000
14 2 0 14 14:14:14:2 yes 3500.0000 1200.0000
15 3 1 15 15:15:15:3 yes 3500.0000 1200.0000
16 2 0 16 16:16:16:2 yes 3500.0000 1200.0000
17 3 1 17 17:17:17:3 yes 3500.0000 1200.0000
18 2 0 18 18:18:18:2 yes 3500.0000 1200.0000
19 3 1 19 19:19:19:3 yes 3500.0000 1200.0000
20 2 0 20 20:20:20:2 yes 3500.0000 1200.0000
21 3 1 21 21:21:21:3 yes 3500.0000 1200.0000
22 2 0 22 22:22:22:2 yes 3500.0000 1200.0000
23 3 1 23 23:23:23:3 yes 3500.0000 1200.0000
24 2 0 24 24:24:24:2 yes 3500.0000 1200.0000
25 3 1 25 25:25:25:3 yes 3500.0000 1200.0000
26 2 0 26 26:26:26:2 yes 3500.0000 1200.0000
27 3 1 27 27:27:27:3 yes 3500.0000 1200.0000
28 0 0 0 0:0:0:0 yes 3500.0000 1200.0000
29 1 1 1 1:1:1:1 yes 3500.0000 1200.0000
30 0 0 2 2:2:2:0 yes 3500.0000 1200.0000
31 1 1 3 3:3:3:1 yes 3500.0000 1200.0000
32 0 0 4 4:4:4:0 yes 3500.0000 1200.0000
33 1 1 5 5:5:5:1 yes 3500.0000 1200.0000
34 0 0 6 6:6:6:0 yes 3500.0000 1200.0000
35 1 1 7 7:7:7:1 yes 3500.0000 1200.0000
36 0 0 8 8:8:8:0 yes 3500.0000 1200.0000
37 1 1 9 9:9:9:1 yes 3500.0000 1200.0000
38 0 0 10 10:10:10:0 yes 3500.0000 1200.0000
39 1 1 11 11:11:11:1 yes 3500.0000 1200.0000
40 0 0 12 12:12:12:0 yes 3500.0000 1200.0000
41 1 1 13 13:13:13:1 yes 3500.0000 1200.0000
42 2 0 14 14:14:14:2 yes 3500.0000 1200.0000
43 3 1 15 15:15:15:3 yes 3500.0000 1200.0000
44 2 0 16 16:16:16:2 yes 3500.0000 1200.0000
45 3 1 17 17:17:17:3 yes 3500.0000 1200.0000
46 2 0 18 18:18:18:2 yes 3500.0000 1200.0000
47 3 1 19 19:19:19:3 yes 3500.0000 1200.0000
48 2 0 20 20:20:20:2 yes 3500.0000 1200.0000
49 3 1 21 21:21:21:3 yes 3500.0000 1200.0000
50 2 0 22 22:22:22:2 yes 3500.0000 1200.0000
51 3 1 23 23:23:23:3 yes 3500.0000 1200.0000
52 2 0 24 24:24:24:2 yes 3500.0000 1200.0000
53 3 1 25 25:25:25:3 yes 3500.0000 1200.0000
54 2 0 26 26:26:26:2 yes 3500.0000 1200.0000
55 3 1 27 27:27:27:3 yes 3500.0000 1200.0000
I have 2* E5-2690 V4 and 15 linux VM (core affinity on second proc) and one gaming windows and I would like have the windows only on the first proc. Can you help me ?
I tried something like this but perfo are very very bad only half of proc used:
#idleVm=no
#vmCpus=0,28,2,30,4,32,6,34,8,36,10,38,12,40,14,42,16,44,18,46,20,48,22,50,24,52,26,54
#miscCpus=1,3,5,7,9,11
#cpuGovernor=performance
args: -cpu 'host,topoext=on' -smp '28,sockets=1,cores=14,threads=2,maxcpus=28'
hookscript: local:snippets/proxmox-hook.sh
affinity: 0,28,2,30,4,32,6,34,8,36,10,38,12,40,14,42,16,44,18,46,20,48,22,50,24,52,26,54
agent: 1
balloon: 0
bios: ovmf
boot: order=scsi0
cores: 28
cpu: host,hidden=1,flags=+pcid;+pdpe1gb;+hv-tlbflush;+hv-evmcs;+aes
cpuunits: 10000
efidisk0: nvme1:vm-101-disk-2,efitype=4m,pre-enrolled-keys=1,size=4M
hostpci0: 0000:04:00,pcie=1,x-vga=1
hotplug: disk,network,usb
kvm: 1
machine: pc-q35-7.2
memory: 40960
meta: creation-qemu=8.0.2,ctime=1692726157
name: windows
numa: 1
onboot: 1
ostype: win10
scsi0: nvme1:vm-101-disk-1,cache=unsafe,discard=on,size=360G,ssd=1
scsihw: virtio-scsi-pci
sockets: 1
tablet: 0
usb0: host=046d:c328
usb1: host=30fa:1030
usb2: host=0d8c:0014
vcpus: 28
vga: none
Hello, I have a Windows VM with GPU passthrough that I want to utilise for gaming. How would I go about using this script? My goal is to have 10 threads for my VM.
My CPU is AMD Ryzen 5 1600 (6 cores 12 threads)
Output of lscpu -e:
I tried to do this myself, but everytime I start or stop the Windows VM, my other specific VM stops working and shows "internal error", and htop/lscpu reports some of my cores offline, so I had to reboot the proxmox node for it to bring the cores back online.
This is my Windows VM config:
root@proxmox:~# cat /etc/pve/qemu-server/102.conf
#idleVm=no
#vmCpus=2,2,3,3,4,4,5,5,0,6,1,7,2,8,3,9,4,10,5,11
#miscCpus=0,1,8,9
#cpuGovernor=performance
agent: 1
args: -cpu 'host,topoext=on' -smp '10,sockets=1,cores=5,threads=2,maxcpus=10' hv_vendor_id=GIGABYTE,+pdpe1gb' -smbios type=0,version=UX305UA.201 -smbios type=1,manufacturer=GIGABYTE,product=UX305UA,version=2021.1 -smbios type=2,manufacturer=AMD,version=2021.5,product='AMD Ryzen 5 1600' -smbios type=3,manufacturer=XBZJ -smbios type=17,manufacturer=KINGSTON,loc_pfx=DDR4,speed=3200,serial=114514,part=FF63 -smbios type=4,manufacturer=AMD,max-speed=4800,current-speed=3200
audio0: device=ich9-intel-hda,driver=none
balloon: 0
bios: ovmf
boot: order=scsi2;scsi1;net0
cores: 12
cpu: host,hidden=1
cpuunits: 10000
efidisk0: local:102/vm-102-disk-0.raw,efitype=4m,pre-enrolled-keys=1,size=528K
hookscript: local:snippets/proxmox-hook.sh
hostpci0: 0000:07:00,pcie=1,x-vga=1
hotplug: usb
hugepages: 1024
machine: pc-q35-8.1
memory: 24576
meta: creation-qemu=8.1.2,ctime=1704182629
name: wintest
net0: virtio=E8:2A:EA:9F:8A:1A,bridge=vmbr0,firewall=1
numa: 1
ostype: win10
scsi1: /dev/disk/by-id/ata-WDC_WD10EZEX-00BBHA0_WD-WCC6Y7FUZN3L,size=976762584K,serial=4421
scsi2: local:102/vm-102-disk-2.raw,cache=unsafe,iothread=1,size=400G,ssd=1
scsihw: virtio-scsi-single
smbios1: uuid=24c326dd-3cec-48fc-bb9f-87aa3984e2c9,manufacturer=QVNVUw==,product=VVgzMDVVQQ==,version=MjAyMS4x,serial=MTI0NjY3,sku=MTM0NDY4,family=Ng==,base64=1
sockets: 1
tablet: 0
tpmstate0: local:102/vm-102-disk-1.raw,size=4M,version=v2.0
usb0: host=2-4
vcpus: 10
vga: none
vmgenid: 12ff2d20-3979-404b-91b0-90bdb31cf66f
Thanks, I got this working nicely.
Just wondering, would it be possible to reassign the amount of cores while the VM is running? e.g. by running this script through a cronjob.
If so, what would be needed to make this work?
What I aim to do is, we have an OPNsense VM serving as router/firewall. In the night the load (throughput) is low, and this VM could do with a single core (it's still latency sensitive though). The other cores could then be set to the powersaving CPU governor, as to reduce the systems power usage.
Thanks, I got this working nicely. Just wondering, would it be possible to reassign the amount of cores while the VM is running? e.g. by running this script through a cronjob. If so, what would be needed to make this work?
What I aim to do is, we have an OPNsense VM serving as router/firewall. In the night the load (throughput) is low, and this VM could do with a single core (it's still latency sensitive though). The other cores could then be set to the powersaving CPU governor, as to reduce the systems power usage.
@gijs007 I doubt you can reduce number of cores without rebooting the guest. You can however change the governor for cores via cron anytime you want.
Hi !
Even reading the original topic, I'm too dumb to understand how it works...
Would it be possible to have some sort or running tutorial ?
I currently have a I9-9900k