-
-
Save ProjectInitiative/790cf06dd4206e858bbcbe19d96a706b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3 | |
#Copyright 2022 Kyle Petryszak | |
# Exmple hook script for PVE guests (hookscript config option) | |
# You can set this via pct/qm with | |
# pct set <vmid> -hookscript <volume-id> | |
# qm set <vmid> -hookscript <volume-id> | |
# where <volume-id> has to be an executable file in the snippets folder | |
# of any storage with directories e.g.: | |
# qm set 100 -hookscript local:snippets/hookscript-pci.py | |
import sys, os, re, time, subprocess, apt, enum | |
'''The following actions are user defined. By default if the VM requested to | |
start needs the PCI devices attached to an already running VM, the system will | |
shutdown that VM, freeing the PCI devices in favor of the currently requested target.''' | |
# vm_actions = Enum('fail','hibernate','suspend','shutdown','stop') | |
class VMActions(enum.Enum): | |
fail = 1 | |
hibernate = 2 | |
suspend = 3 | |
shutdown = 4 | |
stop = 5 | |
# USER DEFINED | |
vm_action = VMActions.shutdown | |
# provided my 'qm' application in Proxmox in the following format: | |
# hookscript-pci.py [vm_id] [phase] | |
vm_id = sys.argv[1] | |
phase = sys.argv[2] | |
# Proxmox VM config location and formatting | |
path = '/etc/pve/qemu-server/' | |
current_file = path + vm_id + '.conf' | |
# required dependencies | |
pkg_names = ['driverctl'] | |
#regex for GPU PCIe configuration matching | |
comment_pattern = re.compile('^#') | |
pci_pattern1 = re.compile('hostpci') | |
pci_id_pattern = re.compile('([0-9]+:[0-9]+:[0-9]+)|([0-9]+:[0-9]+)') | |
def main(): | |
'''phase logic''' | |
# check that all dependenices are installed before proceeding | |
check_installed_deps() | |
# process phase logic | |
if phase == 'pre-start': | |
attached_vm = check_other_running_vms(vm_id, current_file) | |
# check if any other VM is currently attached, and run the according action | |
if attached_vm is not None: | |
run_vm_action(attached_vm, vm_action) | |
# wait for drivers to be dropped from offending VM | |
time.sleep(2) | |
print('binding vfio-pci drivers') | |
bind_vfio_pci(extract_pci_ids(current_file)) | |
print('vfio-pci drivers bound, pre-flight checks passed') | |
sys.exit(0) | |
elif phase == 'post-start': | |
# nothing to do here | |
sys.exit(0) | |
elif phase == 'pre-stop': | |
# nothing to do here | |
sys.exit(0) | |
elif phase == 'post-stop': | |
# unbind and rebind to system driver | |
unbind_vfio_pci(extract_pci_ids(current_file)) | |
print('all pci devices unbound from vfio-pci') | |
sys.exit(0) | |
else: | |
print('Got unknown phase ' + phase) | |
sys.exit(1) | |
def extract_pci_ids(file): | |
'''scan proxmox vm config and pull all active PCI slot locations and return as a list''' | |
pci_ids = [] | |
for i, line in enumerate(open(file)): | |
# check config file for PCIe canidates | |
if re.search(pci_pattern1,line): | |
# ignore comment lines | |
if comment_pattern.match(line): | |
continue | |
# get IDs of all PCI devices being passed through | |
for match in pci_id_pattern.findall(line): | |
# seperate matches i.e. ([('', '07:00')] --> 07:00 or ([('0000:06:00', '')] --> 0000:06:00 | |
if match[0] != '': | |
pci_ids.append(match[0]) | |
elif match[1] != '': | |
pci_ids.append(match[1]) | |
return _get_sub_pci_ids(pci_ids) | |
def _get_sub_pci_ids(pci_ids): | |
'''find all sub-ids of a given PCI card | |
i.e. 0000:06:00 -> [0000:06:00, 0000:06:00.0, 0000:06:00.1]''' | |
out = subprocess.run(['/usr/sbin/driverctl', 'list-devices'],capture_output=True).stdout.decode() | |
all_pci_ids = [] | |
for i in range(len(pci_ids)): | |
# add base id to list | |
all_pci_ids.append([pci_ids[i]]) | |
# add sub ids after base id | |
for match in re.findall(pci_ids[i]+'+.[0-9]+', out): | |
all_pci_ids[i].append(match) | |
return all_pci_ids | |
def bind_vfio_pci(pci_ids): | |
'''set default driver to vfio-pci before VM starts''' | |
for pci_sub_ids in pci_ids: | |
# make sure sub ids exist i.e. 0000:06:00.0 | |
if len(pci_sub_ids) > 1: | |
for pci_sub_id in pci_sub_ids[1:]: | |
out = subprocess.run(['/usr/sbin/driverctl', '--nosave', 'set-override', pci_sub_id, 'vfio-pci']) | |
if out.returncode != 0: | |
print('error binding vfio-pci to ' + pci_sub_id) | |
sys.exit(1) | |
def unbind_vfio_pci(pci_ids): | |
'''set default kernel drivers back to normal after VM exits''' | |
for pci_sub_ids in pci_ids: | |
# make sure sub ids exist i.e. 0000:06:00.0 | |
if len(pci_sub_ids) > 1: | |
for pci_sub_id in pci_sub_ids[1:]: | |
out = subprocess.run(['/usr/sbin/driverctl', '--nosave', 'unset-override', pci_sub_id]) | |
if out.returncode != 0: | |
print('error unbinding vfio-pci from ' + pci_sub_id) | |
sys.exit(1) | |
def check_other_running_vms(vm_id, current_file): | |
'''returns the running VM for shared PCI slot ID with target VM config''' | |
# get all PCI IDs from current VM config | |
current_file_pci_ids = extract_pci_ids(current_file) | |
# get list of all currently running VMs | |
running_vms = get_all_running_vms() | |
# get all other VM config files on host | |
# vm_configs = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))] | |
# fail pre-flight if target VM already running | |
if vm_id in running_vms: | |
print('Target VM ' + vm_id + ' already running') | |
sys.exit(1) | |
# check if any currently running VM is attahed to requested target PCI devices | |
for vm in running_vms: | |
vm_pci_ids = extract_pci_ids(path + vm + '.conf') | |
if len(vm_pci_ids) != 0: | |
for pci_id in current_file_pci_ids: | |
if pci_id in vm_pci_ids: | |
return vm | |
return None | |
def get_all_running_vms(): | |
'''request a list of all of the currently running VMs on the host''' | |
out = subprocess.run(['/usr/sbin/qm', 'list'],capture_output=True) | |
# split output into columns | |
columns = out.stdout.decode().split('\n') | |
# drop empty lists | |
columns.pop() | |
# split output into 2D array | |
# first array will contain the following chart structure | |
# ['VMID', 'NAME', 'STATUS', 'MEM(MB)', 'BOOTDISK(GB)', 'PID'] | |
vm_info = [row.split() for row in columns] | |
# filter out only running VMs IDs and return list | |
return [vm[0] for vm in vm_info[1:] if vm[2] == 'running'] | |
def run_vm_action(vm_id, action): | |
'''run the user defined action when a conflict arises''' | |
if action == VMActions.fail: | |
print('Aborting start due to fail condition.\n To change behavior edit vm_action.') | |
sys.exit(1) | |
elif action == VMActions.hibernate: | |
print('Hibernating ' + vm_id) | |
out = subprocess.run(['/usr/sbin/qm', 'suspend', vm_id, '--to-disk', '1'],capture_output=True) | |
if out.returncode != 0: | |
print('Error while attempting hibernation: \n' + out.stderr.decode()) | |
sys.exit(1) | |
wait_vm_stop(vm_id) | |
elif action == VMActions.suspend: | |
print('Suspending ' + vm_id) | |
out = subprocess.run(['/usr/sbin/qm', 'suspend', vm_id],capture_output=True) | |
if out.returncode != 0: | |
print('Error while attempting suspension: \n' + out.stderr.decode()) | |
sys.exit(1) | |
wait_vm_stop(vm_id) | |
elif action == VMActions.shutdown: | |
print('Shutting down ' + vm_id) | |
out = subprocess.run(['/usr/sbin/qm', 'shutdown', vm_id],capture_output=True) | |
if out.returncode != 0: | |
print('Error while attempting shutdown: \n' + out.stderr.decode()) | |
sys.exit(1) | |
wait_vm_stop(vm_id) | |
elif action == VMActions.stop: | |
print('Stopping ' + vm_id) | |
out = subprocess.run(['/usr/sbin/qm', 'stop', vm_id],capture_output=True) | |
if out.returncode != 0: | |
print('Error while stopping: \n' + out.stderr.decode()) | |
sys.exit(1) | |
wait_vm_stop(vm_id) | |
def wait_vm_stop(vm_id): | |
'''function blocks continued execution until target is full stopped''' | |
print('Waiting for VM to stop...') | |
out = subprocess.run(['/usr/sbin/qm', 'wait', vm_id],capture_output=True) | |
if out.returncode != 0: | |
print(out.stderr.decode()) | |
sys.exit(1) | |
def check_installed_deps(): | |
'''check that the required binaries are installed, and install if not''' | |
cache = apt.cache.Cache() | |
cache.open() | |
for pkg in pkg_names: | |
pkg = cache[pkg] | |
if not pkg.is_installed: | |
cache.update() | |
pkg.mark_install() | |
try: | |
cache.commit() | |
except Exception as arg: | |
print >> sys.stderr, "Sorry, package installation failed [{err}]".format(err=str(arg)) | |
sys.exit(1) | |
if __name__ == '__main__': | |
main() |
The script is entirely dependent on the underlying proxmox qm system. Proxmox should detect that the VM is not running any more and run the "post-stop" phase. I will have to test that out
@luby2999 Have you tried installing the qemu-guest-agent? I wonder if that will help communicate the internal shutdown to the main system.
I have it installed in my windows and linux guests, and the script properly resets the drivers accordingly.
Hello Kyle:
thank you for your reply.
The mac system I am using doesn't have Qemu-guest-agent installed.
, Does the mac system also have Qemu-guest-agent?
I do not believe that MacOS is officially supported by the Proxmox platform, and as far as I am aware, does not have a qemu-guest-agent. I would head over to the proxmox fourms for MacOS support.
Referencing this article about the qemu-guest-agent,
Pushed some updates that fixed some bugs and removed code redundancy.
Previously, when scanning for other VMs running with the same PCI slot, script would compare target VM with target VM, which always returned true, shutting down first running VM in list of running VMs when starting a VM with a PCI card.
Hello Kyle Petryszak:
Your code is great, thanks.
Stop the virtual machine via the webui button, no problem. But when the virtual machine is stopped from inside the vm, the code does not work. It should be that the terminating process cannot call 'driverctl'. Is there a way to solve this? Thanks again.