Skip to content

Instantly share code, notes, and snippets.

@anfernee
Last active September 14, 2024 23:52
Show Gist options
  • Save anfernee/e8c321e35efc81d83d8b7d2ddaa89518 to your computer and use it in GitHub Desktop.
Save anfernee/e8c321e35efc81d83d8b7d2ddaa89518 to your computer and use it in GitHub Desktop.

iperf result when vhost is enabled:

root@anfernee-XPS-8700:/home/anfernee/VM/images# iperf3 -c 192.168.122.41
Connecting to host 192.168.122.41, port 5201
[  5] local 192.168.122.1 port 50716 connected to 192.168.122.41 port 5201
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
[  5]   0.00-1.00   sec  4.72 GBytes  40.6 Gbits/sec    0   1.95 MBytes       
[  5]   1.00-2.00   sec  4.97 GBytes  42.7 Gbits/sec    0   2.64 MBytes       
[  5]   2.00-3.00   sec  4.86 GBytes  41.7 Gbits/sec    0   2.77 MBytes       
[  5]   3.00-4.00   sec  4.54 GBytes  39.0 Gbits/sec    0   4.00 MBytes       
[  5]   4.00-5.00   sec  4.78 GBytes  41.0 Gbits/sec    0   4.00 MBytes       
[  5]   5.00-6.00   sec  4.85 GBytes  41.6 Gbits/sec    0   4.00 MBytes       
[  5]   6.00-7.00   sec  4.92 GBytes  42.3 Gbits/sec    0   4.00 MBytes       
[  5]   7.00-8.00   sec  4.91 GBytes  42.2 Gbits/sec    0   4.00 MBytes       
[  5]   8.00-9.00   sec  4.61 GBytes  39.6 Gbits/sec    0   4.00 MBytes       
[  5]   9.00-10.00  sec  4.76 GBytes  40.9 Gbits/sec    0   4.00 MBytes       
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec  47.9 GBytes  41.2 Gbits/sec    0             sender
[  5]   0.00-10.00  sec  47.9 GBytes  41.2 Gbits/sec                  receiver

iperf Done.

Disable vhost by add

      <model type='virtio'/>
             <driver name="qemu"/>
      <alias name='net0'/>  

iperf result:

root@anfernee-XPS-8700:/home/anfernee/VM/images# iperf3 -c 192.168.122.41
Connecting to host 192.168.122.41, port 5201
[  5] local 192.168.122.1 port 53958 connected to 192.168.122.41 port 5201
[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
[  5]   0.00-1.00   sec  3.44 GBytes  29.5 Gbits/sec    0   3.79 MBytes       
[  5]   1.00-2.00   sec  3.45 GBytes  29.6 Gbits/sec    0   3.98 MBytes       
[  5]   2.00-3.00   sec  3.42 GBytes  29.4 Gbits/sec    0   3.98 MBytes       
[  5]   3.00-4.00   sec  3.52 GBytes  30.2 Gbits/sec    0   3.98 MBytes       
[  5]   4.00-5.00   sec  3.49 GBytes  29.9 Gbits/sec    0   3.98 MBytes       
[  5]   5.00-6.00   sec  3.51 GBytes  30.2 Gbits/sec    0   3.98 MBytes       
[  5]   6.00-7.00   sec  3.30 GBytes  28.3 Gbits/sec    0   3.98 MBytes       
[  5]   7.00-8.00   sec  3.35 GBytes  28.8 Gbits/sec    0   3.98 MBytes       
[  5]   8.00-9.00   sec  3.60 GBytes  30.9 Gbits/sec    0   3.98 MBytes       
[  5]   9.00-10.00  sec  3.14 GBytes  26.9 Gbits/sec    0   3.98 MBytes       
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-10.00  sec  34.2 GBytes  29.4 Gbits/sec    0             sender
[  5]   0.00-10.00  sec  34.2 GBytes  29.4 Gbits/sec                  receiver

Referenence

https://www.redhat.com/en/blog/hands-vhost-net-do-or-do-not-there-no-try

In QEMU:

    if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
        qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
        file.fd = net->backend;
        for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
            if (!virtio_queue_enabled(dev, net->dev.vq_index +
                                      file.index)) {
                /* Queue might not be ready for start */
                continue;
            }
            r = vhost_net_set_backend(&net->dev, &file);
            if (r < 0) {
                r = -errno;
                goto fail;
            }
        }
    }

Tries to find an unused virtqueue and set backend to it.

https://github.com/qemu/qemu/blob/28ae3179fc52d2e4d870b635c4a412aab99759e7/hw/net/vhost_net.c#L413-L428

In kernel, it is implemented by:

static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
			    unsigned long arg)
{
	struct vhost_net *n = f->private_data;
	void __user *argp = (void __user *)arg;
	u64 __user *featurep = argp;
	struct vhost_vring_file backend;
	u64 features;
	int r;

	switch (ioctl) {
	case VHOST_NET_SET_BACKEND:
		if (copy_from_user(&backend, argp, sizeof backend))
			return -EFAULT;
		return vhost_net_set_backend(n, backend.index, backend.fd);

https://elixir.bootlin.com/linux/v6.10/source/drivers/vhost/net.c

Clearly, backend index is a virtqueue index.

		vhost_vq_set_backend(vq, sock);
		vhost_net_buf_unproduce(nvq);
		r = vhost_vq_init_access(vq);
		if (r)
			goto err_used;
		r = vhost_net_enable_vq(n, vq);
		if (r)
			goto err_used;
		if (index == VHOST_NET_VQ_RX) {
			if (sock)
				nvq->rx_ring = get_tap_ptr_ring(sock->file);
			else
				nvq->rx_ring = NULL;
		}
// save tap sock data to private section. 
static inline void vhost_vq_set_backend(struct vhost_virtqueue *vq,
					void *private_data)
{
	vq->private_data = private_data;
}
static int vhost_net_enable_vq(struct vhost_net *n,
				struct vhost_virtqueue *vq)
{
	struct vhost_net_virtqueue *nvq =
		container_of(vq, struct vhost_net_virtqueue, vq);
	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
	struct socket *sock;

	sock = vhost_vq_get_backend(vq);
	if (!sock)
		return 0;

	return vhost_poll_start(poll, sock->file);
}
/* Start polling a file. We add ourselves to file's wait queue. The caller must
 * keep a reference to a file until after vhost_poll_stop is called. */
int vhost_poll_start(struct vhost_poll *poll, struct file *file)
{
	__poll_t mask;

	if (poll->wqh)
		return 0;

	mask = vfs_poll(file, &poll->table);
	if (mask)
		vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
	if (mask & EPOLLERR) {
		vhost_poll_stop(poll);
		return -EINVAL;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(vhost_poll_start);

https://elixir.bootlin.com/linux/v6.10/source/drivers/vhost/vhost.c#L204-L223

Datapath

There are 2 functions: handle_rx and handle_tx.

Normally there is a socket involved in tx/rx. It's normally the backend TAP device's socket descriptor.

/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net)
{
	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
	struct vhost_virtqueue *vq = &nvq->vq;
	struct socket *sock;

	mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
	sock = vhost_vq_get_backend(vq);
	if (!sock)
		goto out;

	if (!vq_meta_prefetch(vq))
		goto out;

	vhost_disable_notify(&net->dev, vq);
	vhost_net_disable_vq(net, vq);

	if (vhost_sock_zcopy(sock))
		handle_tx_zerocopy(net, sock);
	else
		handle_tx_copy(net, sock);

out:
	mutex_unlock(&vq->mutex);
}

https://elixir.bootlin.com/linux/v6.10/source/drivers/vhost/net.c#L938-L964

virtio device's status:

# ethtool -S eth0
NIC statistics:
     rx_queue_0_packets: 713447
     rx_queue_0_bytes: 36764496608
     rx_queue_0_drops: 0
     rx_queue_0_xdp_packets: 0
     rx_queue_0_xdp_tx: 0
     rx_queue_0_xdp_redirects: 0
     rx_queue_0_xdp_drops: 0
     rx_queue_0_kicks: 160
     tx_queue_0_packets: 493899
     tx_queue_0_bytes: 32604769
     tx_queue_0_xdp_tx: 0
     tx_queue_0_xdp_tx_drops: 0
     tx_queue_0_kicks: 14938
     tx_queue_0_tx_timeouts: 0

Defined here: https://elixir.bootlin.com/linux/v6.10/source/drivers/net/virtio_net.c#L131-L158

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment