Created
May 27, 2022 14:16
-
-
Save gibizer/ddac439b0d85d73515f9e04af0180c2f to your computer and use it in GitHub Desktop.
Passing a PCI device from the host to L2 VM with libvirt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## host(with a real PCI dev) ---libvirt/qemu---> L1 VM (running devstack) ---nova/libvirt/qemu---> L2 VM | |
## host | |
# Enable iommu in the kernel | |
$ grep iommu /etc/default/grub | |
GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on" | |
# the PCI device needs to be in a separate IOMMU group | |
$ lspci -v | grep 14.3 -A 2 | |
00:14.3 Network controller: Intel Corporation Comet Lake PCH-LP CNVi WiFi | |
Subsystem: Intel Corporation Wi-Fi 6 AX201 160MHz | |
Flags: fast devsel, IRQ 16, IOMMU group 6 | |
# detach the device from the host | |
virsh nodedev-detach --device pci_0000_00_14_3 | |
# create an L1 domain | |
# important pieces: | |
# * q35 machine type | |
# * UEFI | |
# * ioapci: <ioapic driver='qemu'/> | |
# * iommu: <iommu model='intel'> | |
# <driver intremap='on' caching_mode='on'/> | |
# </iommu> | |
# * and the hostdev passing through the PCI device (00:14.3): | |
# <hostdev mode='subsystem' type='pci' managed='no'> | |
# <driver name='vfio'/> | |
# <source> | |
# <address domain='0x0000' bus='0x00' slot='0x14' function='0x3'/> | |
# </source> | |
# <alias name='hostdev0'/> | |
# <address type='pci' domain='0x0000' bus='0x06' slot='0x00' function='0x0'/> | |
# </hostdev> | |
$ virsh dumpxml pci-devstack-aio | |
<domain type='kvm' id='19'> | |
<name>pci-devstack-aio</name> | |
<uuid>e24c7a3a-c4ec-4b96-9f27-a1052d40a8f7</uuid> | |
<memory unit='KiB'>8388608</memory> | |
<currentMemory unit='KiB'>8388608</currentMemory> | |
<vcpu placement='static'>4</vcpu> | |
<resource> | |
<partition>/machine</partition> | |
</resource> | |
<os> | |
<type arch='x86_64' machine='pc-q35-4.2'>hvm</type> | |
<loader readonly='yes' secure='no' type='pflash'>/usr/share/OVMF/OVMF_CODE.fd</loader> | |
<nvram template='/usr/share/OVMF/OVMF_VARS.fd'>/var/lib/libvirt/qemu/nvram/pci-devstack-aio_VARS.fd</nvram> | |
<boot dev='hd'/> | |
</os> | |
<features> | |
<acpi/> | |
<apic/> | |
<ioapic driver='qemu'/> | |
<vmcoreinfo state='on'/> | |
</features> | |
<cpu mode='host-passthrough' check='none' migratable='on'/> | |
<clock offset='utc'> | |
<timer name='pit' tickpolicy='delay'/> | |
<timer name='rtc' tickpolicy='catchup'/> | |
<timer name='hpet' present='no'/> | |
</clock> | |
<on_poweroff>destroy</on_poweroff> | |
<on_reboot>restart</on_reboot> | |
<on_crash>destroy</on_crash> | |
<devices> | |
<emulator>/usr/bin/qemu-system-x86_64</emulator> | |
<disk type='file' device='disk'> | |
<driver name='qemu' type='qcow2' cache='none'/> | |
<source file='/home/gibi/upstream/pci-tracking/pci-devstack.img' index='1'/> | |
<backingStore/> | |
<target dev='vda' bus='virtio'/> | |
<alias name='virtio-disk0'/> | |
<address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/> | |
</disk> | |
<controller type='usb' index='0' model='none'> | |
<alias name='usb'/> | |
</controller> | |
<controller type='sata' index='0'> | |
<alias name='ide'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> | |
</controller> | |
<controller type='pci' index='0' model='pcie-root'> | |
<alias name='pcie.0'/> | |
</controller> | |
<controller type='pci' index='1' model='pcie-root-port'> | |
<model name='pcie-root-port'/> | |
<target chassis='1' port='0x10'/> | |
<alias name='pci.1'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0' multifunction='on'/> | |
</controller> | |
<controller type='pci' index='2' model='pcie-root-port'> | |
<model name='pcie-root-port'/> | |
<target chassis='2' port='0x11'/> | |
<alias name='pci.2'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x1'/> | |
</controller> | |
<controller type='pci' index='3' model='pcie-root-port'> | |
<model name='pcie-root-port'/> | |
<target chassis='3' port='0x12'/> | |
<alias name='pci.3'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x2'/> | |
</controller> | |
<controller type='pci' index='4' model='pcie-root-port'> | |
<model name='pcie-root-port'/> | |
<target chassis='4' port='0x13'/> | |
<alias name='pci.4'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x3'/> | |
</controller> | |
<controller type='pci' index='5' model='pcie-root-port'> | |
<model name='pcie-root-port'/> | |
<target chassis='5' port='0x14'/> | |
<alias name='pci.5'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x4'/> | |
</controller> | |
<controller type='pci' index='6' model='pcie-root-port'> | |
<model name='pcie-root-port'/> | |
<target chassis='6' port='0x15'/> | |
<alias name='pci.6'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x5'/> | |
</controller> | |
<controller type='pci' index='7' model='pcie-to-pci-bridge'> | |
<model name='pcie-pci-bridge'/> | |
<alias name='pci.7'/> | |
<address type='pci' domain='0x0000' bus='0x05' slot='0x00' function='0x0'/> | |
</controller> | |
<interface type='network'> | |
<mac address='fa:16:3e:12:13:13'/> | |
<source network='vagrant-libvirt' portid='5c74ea14-b140-4874-b332-945d8e43af45' bridge='virbr1'/> | |
<target dev='vnet14'/> | |
<model type='virtio'/> | |
<alias name='net0'/> | |
<address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/> | |
</interface> | |
<serial type='pty'> | |
<source path='/dev/pts/20'/> | |
<log file='/home/gibi/upstream/pci-tracking/pci-devstack-aio-console.log' append='off'/> | |
<target type='isa-serial' port='0'> | |
<model name='isa-serial'/> | |
</target> | |
<alias name='serial0'/> | |
</serial> | |
<console type='pty' tty='/dev/pts/20'> | |
<source path='/dev/pts/20'/> | |
<log file='/home/gibi/upstream/pci-tracking/pci-devstack-aio-console.log' append='off'/> | |
<target type='serial' port='0'/> | |
<alias name='serial0'/> | |
</console> | |
<input type='mouse' bus='ps2'> | |
<alias name='input0'/> | |
</input> | |
<input type='keyboard' bus='ps2'> | |
<alias name='input1'/> | |
</input> | |
<audio id='1' type='none'/> | |
<video> | |
<model type='virtio' heads='1' primary='yes'/> | |
<alias name='video0'/> | |
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0'/> | |
</video> | |
<hostdev mode='subsystem' type='pci' managed='no'> | |
<driver name='vfio'/> | |
<source> | |
<address domain='0x0000' bus='0x00' slot='0x14' function='0x3'/> | |
</source> | |
<alias name='hostdev0'/> | |
<address type='pci' domain='0x0000' bus='0x06' slot='0x00' function='0x0'/> | |
</hostdev> | |
<memballoon model='virtio'> | |
<stats period='10'/> | |
<alias name='balloon0'/> | |
<address type='pci' domain='0x0000' bus='0x03' slot='0x00' function='0x0'/> | |
</memballoon> | |
<rng model='virtio'> | |
<backend model='random'>/dev/urandom</backend> | |
<alias name='rng0'/> | |
<address type='pci' domain='0x0000' bus='0x04' slot='0x00' function='0x0'/> | |
</rng> | |
<iommu model='intel'> | |
<driver intremap='on' caching_mode='on'/> | |
</iommu> | |
</devices> | |
<seclabel type='dynamic' model='dac' relabel='yes'> | |
<label>+64055:+64055</label> | |
<imagelabel>+64055:+64055</imagelabel> | |
</seclabel> | |
</domain> | |
## L1 VM | |
# enable iommu | |
stack@pci-stack:~$ grep iommu /etc/default/grub | |
GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on" | |
GRUB_CMDLINE_LINUX="intel_iommu=on" | |
# check in dmesg that there is real IOMMU support | |
stack@pci-stack:~$ dmesg | grep DMAR | |
[ 0.011109] ACPI: DMAR 0x000000007FB77000 0000A0 (v01 BOCHS BXPC 00000001 BXPC 00000001) | |
[ 0.011119] ACPI: Reserving DMAR table memory at [mem 0x7fb77000-0x7fb7709f] | |
[ 0.033456] DMAR: IOMMU enabled | |
[ 0.133930] DMAR: Host address width 39 | |
[ 0.134131] DMAR: DRHD base: 0x000000fed90000 flags: 0x0 | |
[ 0.134459] DMAR: dmar0: reg_base_addr fed90000 ver 1:0 cap d2008c22260286 ecap f00f5a | |
[ 0.134946] DMAR-IR: IOAPIC id 0 under DRHD base 0xfed90000 IOMMU 0 | |
[ 0.135317] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping. | |
[ 0.144703] DMAR-IR: Enabled IRQ remapping in x2apic mode | |
[ 0.572621] DMAR: No RMRR found | |
[ 0.572790] DMAR: No ATSR found | |
[ 0.572981] DMAR: dmar0: Using Queued invalidation | |
[ 0.573268] DMAR: Disable batched IOTLB flush due to virtualization | |
[ 0.580035] DMAR: Intel(R) Virtualization Technology for Directed I/O | |
# detach the PCI device from the L1 | |
virsh nodedev-detach --device pci_0000_07_00_0 | |
# start devstack on L1 | |
# see: https://docs.openstack.org/devstack/latest/ | |
# configure nova to offer 07:00 to L2 guests: | |
stack@pci-stack:~$ grep pci /etc/nova/nova-cpu.conf -A 3 | |
[pci] | |
passthrough_whitelist = {"vendor_id": "8086","product_id": "02f0"} | |
stack@pci-stack:~$ grep pci /etc/nova/nova.conf -A 3 | |
[pci] | |
alias = {"name": "intel-wifi", "product_id": "02f0", "vendor_id": "8086","device_type": "type-PCI"} | |
# configure nova flavor to request pci device | |
stack@pci-stack:~$ openstack flavor show m1.tiny | |
/usr/lib/python3/dist-packages/secretstorage/dhcrypto.py:15: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead | |
from cryptography.utils import int_from_bytes | |
/usr/lib/python3/dist-packages/secretstorage/util.py:19: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead | |
from cryptography.utils import int_from_bytes | |
+----------------------------+-------------------------------------------------------------+ | |
| Field | Value | | |
+----------------------------+-------------------------------------------------------------+ | |
| OS-FLV-DISABLED:disabled | False | | |
| OS-FLV-EXT-DATA:ephemeral | 0 | | |
| access_project_ids | None | | |
| description | None | | |
| disk | 1 | | |
| id | 1 | | |
| name | m1.tiny | | |
| os-flavor-access:is_public | True | | |
| properties | hw_rng:allowed='True', pci_passthrough:alias='intel-wifi:1' | | |
| ram | 512 | | |
| rxtx_factor | 1.0 | | |
| swap | | | |
| vcpus | 1 | | |
+----------------------------+-------------------------------------------------------------+ | |
# boot the L2 VM with nova | |
openstack server create --flavor m1.tiny --image cirros-0.5.2-x86_64-disk --nic net-id=public --use-config-drive vm2-pci --wait | |
## L2 VM | |
stack@pci-stack:~$ virsh list --all | |
Id Name State | |
----------------------------------- | |
1 instance-00000004 running | |
stack@pci-stack:~$ virsh console 1 | |
Connected to domain instance-00000004 | |
Escape character is ^] | |
login as 'cirros' user. default password: 'gocubsgo'. use 'sudo' for root. | |
vm2-pci login: cirros | |
Password: | |
$ lspci | |
00:01.0 Class 0601: 8086:7000 | |
00:04.0 Class 0100: 1af4:1001 | |
00:07.0 Class 00ff: 1af4:1005 | |
00:00.0 Class 0600: 8086:1237 | |
00:01.3 Class 0680: 8086:7113 | |
00:03.0 Class 0200: 1af4:1000 | |
00:01.1 Class 0101: 8086:7010 | |
00:06.0 Class 00ff: 1af4:1002 | |
00:02.0 Class 0300: 1af4:1050 | |
00:05.0 Class 0280: 8086:02f0 <--- this is the PCI device from the host | |
$ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment