This adds support for vhost-net virtio kernel backend.
This patch is not intended to being merged yet.
I'm posting it for the benefit of people testing
the backend.
Usage instructions:
vhost currently requires MSI-X support in guest virtio.
This means guests kernel version should be >= 2.6.31.
To enable vhost, simply add ",vhost" flag to nic options.
Example with tap backend:
qemu-system-x86_64 -m 1G disk-c.qcow2 \
-net tap,ifname=msttap0,script=/home/mst/ifup,downscript=no \
-net nic,model=virtio,vhost
Example with raw socket backend:
ifconfig eth3 promisc
qemu-system-x86_64 -m 1G disk-c.qcow2 \
-net raw,ifname=eth3 \
-net nic,model=virtio,vhost
This patchset is RFC, but works without issues for me.
TODO:
* migration support
* level triggered interrupts
* fix driver unloading/hotplug
* general cleanup and upstreaming
It still needs to be split up, tested and benchmarked properly,
but posting it here in case people want to test drive
the kernel bits I posted.
Some further info, performance etc:
http://www.linux-kvm.org/page/VhostNet
Signed-off-by: Michael S. Tsirkin <mst at redhat.com>
---
Makefile.target | 3 +-
hw/vhost_net.c | 251 +++++++++++++++++++++++++++++++++++++++++++++
hw/vhost_net.h | 38 +++++++
hw/virtio-net.c | 67 ++++++++++--
hw/virtio-pci.c | 40 +++++++
hw/virtio.c | 19 ----
hw/virtio.h | 28 +++++-
kvm/include/linux/vhost.h | 126 +++++++++++++++++++++++
net.c | 7 ++
net.h | 1 +
qemu-kvm.c | 8 --
qemu-kvm.h | 9 ++
12 files changed, 555 insertions(+), 42 deletions(-)
create mode 100644 hw/vhost_net.c
create mode 100644 hw/vhost_net.h
create mode 100644 kvm/include/linux/vhost.h
diff --git a/Makefile.target b/Makefile.target
index acee285..0d8e688 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -160,7 +160,8 @@ obj-y = vl.o monitor.o pci.o isa_mmio.o machine.o \
gdbstub.o gdbstub-xml.o
# virtio has to be here due to weird dependency between PCI and virtio-net.
# need to fix this properly
-obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
virtio-pci.o
+obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
virtio-pci.o \
+ vhost_net.o
obj-$(CONFIG_KVM) += kvm.o kvm-all.o
# MSI-X depends on kvm for interrupt injection,
# so moved it from Makefile.hw to Makefile.target for now
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
new file mode 100644
index 0000000..bc179ab
--- /dev/null
+++ b/hw/vhost_net.c
@@ -0,0 +1,251 @@
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <linux/kvm.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/vhost.h>
+#include <linux/virtio_ring.h>
+#include <netpacket/packet.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <stdio.h>
+
+#include "net.h"
+#include "qemu-kvm.h"
+
+#include "vhost_net.h"
+
+static int vhost_virtqueue_init(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ struct VirtQueue *q,
+ unsigned idx)
+{
+ target_phys_addr_t s, l;
+ int r;
+ struct vhost_vring_addr addr = {
+ .index = idx,
+ };
+ struct vhost_vring_file file = {
+ .index = idx,
+ };
+ struct vhost_vring_state size = {
+ .index = idx,
+ };
+
+ size.num = q->vring.num;
+ r = ioctl(dev->control, VHOST_SET_VRING_NUM, &size);
+ if (r)
+ return -errno;
+
+ file.fd = vq->kick = eventfd(0, 0);
+ r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ if (r)
+ return -errno;
+ file.fd = vq->call = eventfd(0, 0);
+ r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ if (r)
+ return -errno;
+
+ s = l = sizeof(struct vring_desc) * q->vring.num;
+ vq->desc = cpu_physical_memory_map(q->vring.desc, &l, 0);
+ if (!vq->desc || l != s)
+ return -ENOMEM;
+ addr.user_addr = (u_int64_t)(unsigned long)vq->desc;
+ r = ioctl(dev->control, VHOST_SET_VRING_DESC, &addr);
+ if (r < 0)
+ return -errno;
+ s = l = offsetof(struct vring_avail, ring) +
+ sizeof(u_int64_t) * q->vring.num;
+ vq->avail = cpu_physical_memory_map(q->vring.avail, &l, 0);
+ if (!vq->avail || l != s)
+ return -ENOMEM;
+ addr.user_addr = (u_int64_t)(unsigned long)vq->avail;
+ r = ioctl(dev->control, VHOST_SET_VRING_AVAIL, &addr);
+ if (r < 0)
+ return -errno;
+ s = l = offsetof(struct vring_used, ring) +
+ sizeof(struct vring_used_elem) * q->vring.num;
+ vq->used = cpu_physical_memory_map(q->vring.used, &l, 1);
+ if (!vq->used || l != s)
+ return -ENOMEM;
+ addr.user_addr = (u_int64_t)(unsigned long)vq->used;
+ r = ioctl(dev->control, VHOST_SET_VRING_USED, &addr);
+ if (r < 0)
+ return -errno;
+
+ r = vdev->binding->irqfd(vdev->binding_opaque, q->vector,
vq->call);
+ if (r < 0)
+ return -errno;
+
+ r = vdev->binding->queuefd(vdev->binding_opaque, idx,
vq->kick);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int vhost_dev_init(struct vhost_dev *hdev)
+{
+ uint64_t features;
+ int r;
+ hdev->control = open("/dev/vhost-net", O_RDWR);
+ if (hdev->control < 0)
+ return -errno;
+ r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
+ if (r < 0)
+ return -errno;
+
+ r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
+ if (r < 0)
+ return -errno;
+ hdev->features = features;
+ return 0;
+}
+
+static void vhost_dev_cleanup(struct vhost_dev *hdev)
+{
+ close(hdev->control);
+}
+
+static int vhost_dev_start(struct vhost_dev *hdev,
+ VirtIODevice *vdev)
+{
+ int i, r, n = 0;
+ struct vhost_memory *mem;
+
+ r = ioctl(hdev->control, VHOST_ACK_FEATURES, &hdev->acked_features);
+ if (r < 0)
+ return -errno;
+
+ for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+ if (!slots[i].len || (slots[i].flags & KVM_MEM_LOG_DIRTY_PAGES)) {
+ continue;
+ }
+ ++n;
+ }
+
+ mem = qemu_mallocz(offsetof(struct vhost_memory, regions) +
+ n * sizeof(struct vhost_memory_region));
+ if (!mem)
+ return -ENOMEM;
+ mem->nregions = n;
+ n = 0;
+ for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) {
+ if (!slots[i].len || (slots[i].flags & KVM_MEM_LOG_DIRTY_PAGES)) {
+ continue;
+ }
+ mem->regions[n].guest_phys_addr = slots[i].phys_addr;
+ mem->regions[n].memory_size = slots[i].len;
+ mem->regions[n].userspace_addr = slots[i].userspace_addr;
+ ++n;
+ }
+
+ r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, mem);
+ if (r < 0)
+ return -errno;
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vhost_virtqueue_init(hdev,
+ vdev,
+ hdev->vqs + i,
+ vdev->vq + i,
+ i);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+unsigned vhost_net_get_features(struct vhost_net *net)
+{
+ unsigned features = 0;
+ if (net->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))
+ features |= VIRTIO_F_NOTIFY_ON_EMPTY;
+ if (net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))
+ features |= VIRTIO_RING_F_INDIRECT_DESC;
+ return features;
+}
+
+void vhost_net_ack_features(struct vhost_net *net, unsigned features)
+{
+ net->dev.acked_features = net->dev.backend_features;
+ if (features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))
+ net->dev.acked_features |= VIRTIO_F_NOTIFY_ON_EMPTY;
+ if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC))
+ net->dev.acked_features |= VIRTIO_RING_F_INDIRECT_DESC;
+}
+
+static int vhost_net_get_fd(VLANClientState *backend,
+ unsigned long long *backend_features)
+{
+ int r;
+ r = raw_get_fd(backend);
+ if (r >= 0) {
+ *backend_features = (1 << VHOST_NET_F_VIRTIO_NET_HDR);
+ return r;
+ }
+ r = tap_get_fd(backend);
+ if (r >= 0) {
+ *backend_features = 0;
+ return r;
+ }
+ fprintf(stderr, "vhost requires raw socket or tap backend\n");
+ return -EBADFD;
+}
+
+int vhost_net_init(struct vhost_net *net, VLANClientState *backend)
+{
+ int r;
+
+ if (!backend) {
+ fprintf(stderr, "vhost requires backend to be setup\n");
+ return -EINVAL;
+ }
+ r = vhost_net_get_fd(backend, &net->dev.backend_features);
+ if (r < 0)
+ return r;
+ net->backend = r;
+
+ r = vhost_dev_init(&net->dev);
+ if (r < 0)
+ return r;
+ if (~net->dev.features & net->dev.backend_features) {
+ fprintf(stderr, "vhost lacks feature mask %llu for backend\n",
+ ~net->dev.features & net->dev.backend_features);
+ vhost_dev_cleanup(&net->dev);
+ return -EINVAL;
+ }
+
+ /* Set sane init value. Override when guest acks. */
+ vhost_net_ack_features(net, 0);
+ return 0;
+}
+
+int vhost_net_start(struct vhost_net *net,
+ VirtIODevice *dev)
+{
+ struct vhost_vring_file file = { };
+ int r;
+
+ net->dev.nvqs = 2;
+ net->dev.vqs = net->vqs;
+ r = vhost_dev_start(&net->dev, dev);
+ if (r < 0)
+ return r;
+
+ /* Stop polling backend from qemu. */
+ qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
+ file.fd = net->backend;
+ for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+ if (r < 0) {
+ /* TODO: cleanup on error. */
+ return -errno;
+ }
+ }
+ return 0;
+}
diff --git a/hw/vhost_net.h b/hw/vhost_net.h
new file mode 100644
index 0000000..65720e1
--- /dev/null
+++ b/hw/vhost_net.h
@@ -0,0 +1,38 @@
+#ifndef VHOST_NET_H
+#define VHOST_NET_H
+
+#include "hw/virtio.h"
+
+struct vhost_virtqueue {
+ int kick;
+ int call;
+ void *desc;
+ void *avail;
+ void *used;
+};
+
+struct vhost_dev {
+ int control;
+ struct vhost_virtqueue *vqs;
+ int nvqs;
+ unsigned long long features;
+ unsigned long long acked_features;
+ unsigned long long backend_features;
+};
+
+struct vhost_net {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[2];
+ int backend;
+};
+
+int vhost_net_init(struct vhost_net *net,
+ VLANClientState *backend);
+
+int vhost_net_start(struct vhost_net *net,
+ VirtIODevice *dev);
+
+unsigned vhost_net_get_features(struct vhost_net *net);
+void vhost_net_ack_features(struct vhost_net *net, unsigned features);
+
+#endif
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 2e51a6a..3b0b947 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -19,6 +19,8 @@
#include "qemu-kvm.h"
#endif
+#include "vhost_net.h"
+
#define TAP_VNET_HDR
#define VIRTIO_NET_VM_VERSION 10
@@ -56,6 +58,8 @@ typedef struct VirtIONet
uint8_t *macs;
} mac_table;
uint32_t *vlans;
+ int vhost_device;
+ struct vhost_net vhost;
} VirtIONet;
/* TODO
@@ -127,16 +131,10 @@ static void virtio_net_reset(VirtIODevice *vdev)
static uint32_t virtio_net_get_features(VirtIODevice *vdev)
{
- uint32_t features = (1 << VIRTIO_NET_F_MAC) |
- (1 << VIRTIO_NET_F_MRG_RXBUF) |
- (1 << VIRTIO_NET_F_STATUS) |
- (1 << VIRTIO_NET_F_CTRL_VQ) |
- (1 << VIRTIO_NET_F_CTRL_RX) |
- (1 << VIRTIO_NET_F_CTRL_VLAN) |
- (1 << VIRTIO_NET_F_CTRL_RX_EXTRA);
+ uint32_t features = 0;
+ VirtIONet *n = to_virtio_net(vdev);
#ifdef TAP_VNET_HDR
- VirtIONet *n = to_virtio_net(vdev);
VLANClientState *host = n->vc->vlan->first_client;
if (tap_has_vnet_hdr(host)) {
@@ -149,12 +147,23 @@ static uint32_t virtio_net_get_features(VirtIODevice
*vdev)
features |= (1 << VIRTIO_NET_F_HOST_TSO4);
features |= (1 << VIRTIO_NET_F_HOST_TSO6);
features |= (1 << VIRTIO_NET_F_HOST_ECN);
- features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
/* Kernel can't actually handle UFO in software currently. */
}
#endif
- return features | virtio_common_features();
+ if (n->vhost_device)
+ features |= (1 << VIRTIO_NET_F_MAC) |
vhost_net_get_features(&n->vhost);
+ else
+ features |= virtio_common_features() |
+ (1 << VIRTIO_NET_F_MAC) |
+ (1 << VIRTIO_NET_F_MRG_RXBUF) |
+ (1 << VIRTIO_NET_F_STATUS) |
+ (1 << VIRTIO_NET_F_CTRL_VQ) |
+ (1 << VIRTIO_NET_F_CTRL_RX) |
+ (1 << VIRTIO_NET_F_CTRL_VLAN) |
+ (1 << VIRTIO_NET_F_CTRL_RX_EXTRA);
+
+ return features;
}
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
@@ -175,11 +184,15 @@ static uint32_t virtio_net_bad_features(VirtIODevice
*vdev)
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
{
VirtIONet *n = to_virtio_net(vdev);
+ /* vhost net supports no features */
#ifdef TAP_VNET_HDR
VLANClientState *host = n->vc->vlan->first_client;
#endif
n->mergeable_rx_bufs = !!(features & (1 <<
VIRTIO_NET_F_MRG_RXBUF));
+ if (n->vhost_device) {
+ vhost_net_ack_features(&n->vhost, features);
+ }
#ifdef TAP_VNET_HDR
if (!tap_has_vnet_hdr(host) || !host->set_offload)
@@ -351,6 +364,9 @@ static void virtio_net_handle_rx(VirtIODevice *vdev,
VirtQueue *vq)
static int do_virtio_net_can_receive(VirtIONet *n, int bufsize)
{
+ if (n->vhost_device)
+ return 0;
+
if (!virtio_queue_ready(n->rx_vq) ||
!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return 0;
@@ -411,6 +427,7 @@ static int iov_fill(struct iovec *iov, int iovcnt, const
void *buf, int count)
while (offset < count && i < iovcnt) {
int len = MIN(iov[i].iov_len, count - offset);
memcpy(iov[i].iov_base, buf + offset, len);
+
offset += len;
i++;
}
@@ -611,6 +628,8 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
#else
int has_vnet_hdr = 0;
#endif
+ if (n->vhost_device)
+ return;
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return;
@@ -810,6 +829,8 @@ static void virtio_net_cleanup(VLANClientState *vc)
{
VirtIONet *n = vc->opaque;
+ /* TODO: vhost device cleanup */
+
qemu_purge_queued_packets(vc);
unregister_savevm("virtio-net", n);
@@ -823,6 +844,21 @@ static void virtio_net_cleanup(VLANClientState *vc)
virtio_cleanup(&n->vdev);
}
+static void virtio_net_driver_ok(VirtIODevice *vdev)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ int r;
+
+ if (!n->vhost_device)
+ return;
+
+ r = vhost_net_start(&n->vhost, vdev);
+ if (r) {
+ fprintf(stderr, "\nvhost_net_init returned %d\n", r);
+ exit(-r);
+ }
+}
+
VirtIODevice *virtio_net_init(DeviceState *dev)
{
VirtIONet *n;
@@ -831,6 +867,15 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
sizeof(struct virtio_net_config),
sizeof(VirtIONet));
+ n->vhost_device = dev->nd->vhost_device;
+ if (n->vhost_device) {
+ int r = vhost_net_init(&n->vhost,
dev->nd->vlan->first_client);
+ if (r) {
+ fprintf(stderr, "Unable to initialize vhost device:
%d\n", r);
+ virtio_cleanup(&n->vdev);
+ return NULL;
+ }
+ }
n->vdev.get_config = virtio_net_get_config;
n->vdev.set_config = virtio_net_set_config;
@@ -838,6 +883,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
n->vdev.set_features = virtio_net_set_features;
n->vdev.bad_features = virtio_net_bad_features;
n->vdev.reset = virtio_net_reset;
+ n->vdev.driver_ok = virtio_net_driver_ok;
n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
n->ctrl_vq = virtio_add_queue(&n->vdev, 64,
virtio_net_handle_ctrl);
@@ -864,7 +910,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
n->vdev.nvectors = 3;
else
n->vdev.nvectors = dev->nd->nvectors;
-
register_savevm("virtio-net", virtio_net_id++,
VIRTIO_NET_VM_VERSION,
virtio_net_save, virtio_net_load, n);
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 0716f6f..b7f073b 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -15,11 +15,13 @@
#include <inttypes.h>
+#include <linux/kvm.h>
#include "virtio.h"
#include "pci.h"
#include "sysemu.h"
#include "msix.h"
#include "net.h"
+#include "qemu-kvm.h"
/* from Linux's linux/virtio_pci.h */
@@ -199,6 +201,8 @@ static void virtio_ioport_write(void *opaque, uint32_t addr,
uint32_t val)
vdev->status = val & 0xFF;
if (vdev->status == 0)
virtio_pci_reset(&proxy->pci_dev.qdev);
+ if ((val & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->driver_ok)
+ vdev->driver_ok(vdev);
break;
case VIRTIO_MSI_CONFIG_VECTOR:
msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
@@ -373,12 +377,48 @@ static void virtio_write_config(PCIDevice *pci_dev,
uint32_t address,
msix_write_config(pci_dev, address, val, len);
}
+static int virtio_pci_irqfd(void * opaque, uint16_t vector, int fd)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ struct kvm_irqfd call = { };
+ int r;
+
+ if (vector >= proxy->pci_dev.msix_entries_nr)
+ return -EINVAL;
+ if (!proxy->pci_dev.msix_entry_used[vector])
+ return -ENOENT;
+ call.fd = fd;
+ call.gsi = proxy->pci_dev.msix_irq_entries[vector].gsi;
+ r = kvm_vm_ioctl(kvm_state, KVM_IRQFD, &call);
+ if (r < 0)
+ return r;
+ return 0;
+}
+
+static int virtio_pci_queuefd(void * opaque, int n, int fd)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ struct kvm_ioeventfd kick = {
+ .datamatch = n,
+ .addr = proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+ .len = 2,
+ .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
+ .fd = fd,
+ };
+ int r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
+ if (r < 0)
+ return r;
+ return 0;
+}
+
static const VirtIOBindings virtio_pci_bindings = {
.notify = virtio_pci_notify,
.save_config = virtio_pci_save_config,
.load_config = virtio_pci_load_config,
.save_queue = virtio_pci_save_queue,
.load_queue = virtio_pci_load_queue,
+ .irqfd = virtio_pci_irqfd,
+ .queuefd = virtio_pci_queuefd,
};
static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
diff --git a/hw/virtio.c b/hw/virtio.c
index 337ff27..cc5c205 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -54,24 +54,6 @@ typedef struct VRingUsed
VRingUsedElem ring[0];
} VRingUsed;
-typedef struct VRing
-{
- unsigned int num;
- target_phys_addr_t desc;
- target_phys_addr_t avail;
- target_phys_addr_t used;
-} VRing;
-
-struct VirtQueue
-{
- VRing vring;
- target_phys_addr_t pa;
- uint16_t last_avail_idx;
- int inuse;
- uint16_t vector;
- void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
-};
-
#define VIRTIO_PCI_QUEUE_MAX 16
/* virt queue functions */
@@ -401,7 +383,6 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
sg->iov_base = cpu_physical_memory_map(vring_desc_addr(desc_pa, i),
&len, is_write);
-
if (sg->iov_base == NULL || len != sg->iov_len) {
fprintf(stderr, "virtio: trying to map MMIO memory\n");
exit(1);
diff --git a/hw/virtio.h b/hw/virtio.h
index 799e608..12792da 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -54,15 +54,34 @@
struct VirtQueue;
+typedef struct VRing
+{
+ unsigned int num;
+ target_phys_addr_t desc;
+ target_phys_addr_t avail;
+ target_phys_addr_t used;
+} VRing;
+
+typedef struct VirtQueue VirtQueue;
+struct VirtIODevice;
+typedef struct VirtIODevice VirtIODevice;
+
+struct VirtQueue
+{
+ VRing vring;
+ target_phys_addr_t pa;
+ uint16_t last_avail_idx;
+ int inuse;
+ uint16_t vector;
+ void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+};
+
static inline target_phys_addr_t vring_align(target_phys_addr_t addr,
unsigned long align)
{
return (addr + align - 1) & ~(align - 1);
}
-typedef struct VirtQueue VirtQueue;
-typedef struct VirtIODevice VirtIODevice;
-
#define VIRTQUEUE_MAX_SIZE 1024
typedef struct VirtQueueElement
@@ -81,6 +100,8 @@ typedef struct {
void (*save_queue)(void * opaque, int n, QEMUFile *f);
int (*load_config)(void * opaque, QEMUFile *f);
int (*load_queue)(void * opaque, int n, QEMUFile *f);
+ int (*irqfd)(void * opaque, uint16_t vector, int fd);
+ int (*queuefd)(void * opaque, int n, int fd);
} VirtIOBindings;
#define VIRTIO_PCI_QUEUE_MAX 16
@@ -104,6 +125,7 @@ struct VirtIODevice
void (*get_config)(VirtIODevice *vdev, uint8_t *config);
void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
void (*reset)(VirtIODevice *vdev);
+ void (*driver_ok)(VirtIODevice *vdev);
VirtQueue *vq;
const VirtIOBindings *binding;
void *binding_opaque;
diff --git a/kvm/include/linux/vhost.h b/kvm/include/linux/vhost.h
new file mode 100644
index 0000000..aa4ff24
--- /dev/null
+++ b/kvm/include/linux/vhost.h
@@ -0,0 +1,126 @@
+#ifndef _LINUX_VHOST_H
+#define _LINUX_VHOST_H
+/* Userspace interface for in-kernel virtio accelerators. */
+
+/* vhost is used to reduce the number of system calls involved in virtio.
+ *
+ * Existing virtio net code is used in the guest without modification.
+ *
+ * This header includes interface used by userspace hypervisor for
+ * device configuration.
+ */
+
+#include <linux/types.h>
+
+#include <linux/ioctl.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_vring_state {
+ unsigned int index;
+ unsigned int num;
+};
+
+struct vhost_vring_file {
+ unsigned int index;
+ int fd; /* Pass -1 to unbind from file. */
+
+};
+
+struct vhost_vring_addr {
+ unsigned int index;
+ unsigned int padding;
+ __u64 user_addr;
+};
+
+struct vhost_memory_region {
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+ __u64 userspace_addr;
+ __u64 flags_padding; /* No flags are currently specified. */
+};
+
+/* All region addresses and sizes must be 4K aligned. */
+#define VHOST_PAGE_SIZE 0x1000
+
+struct vhost_memory {
+ __u32 nregions;
+ __u32 padding;
+ struct vhost_memory_region regions[0];
+};
+
+/* ioctls */
+
+#define VHOST_VIRTIO 0xAF
+
+/* Features bitmask for forward compatibility. Transport bits are used for
+ * vhost specific features. */
+#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_ACK_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
+
+/* Set current process as the (exclusive) owner of this file descriptor. This
+ * must be called before any other vhost command. Further calls to
+ * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+/* Give up ownership, and reset the device to default values.
+ * Allows subsequent call to VHOST_OWNER_SET to succeed. */
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+
+/* Set up/modify memory layout */
+#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
+
+/* Write logging setup. */
+/* Memory writes can optionally be logged by setting bit at an offset
+ * (calculated from the physical address) from specified log base.
+ * The bit is set using an atomic 32 bit operation. */
+/* Set base address for logging. */
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+/* Specify an eventfd file descriptor to signal on log write. */
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+
+/* Ring setup. These parameters can not be modified while ring is running
+ * (bound to a device). */
+/* Set number of descriptors in ring */
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+/* Start of array of descriptors (virtually contiguous) */
+#define VHOST_SET_VRING_DESC _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+/* Used structure address. Must be 32 bit aligned */
+#define VHOST_SET_VRING_USED _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_addr)
+/* Available structure address. Must be 16 bit aligned */
+#define VHOST_SET_VRING_AVAIL _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_addr)
+/* Base value where queue looks for available descriptors */
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+/* Get accessor: reads index, writes value in num */
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x14, struct
vhost_vring_state)
+
+/* Logging support. Can be modified while ring is running. */
+/* Log writes to used structure, at offset calculated from specified address.
+ * Address must be 32 bit aligned. Pass 0x1 to disable logging. */
+#define VHOST_SET_VRING_LOG _IOW(VHOST_VIRTIO, 0x18, struct vhost_vring_addr)
+#define VHOST_VRING_LOG_DISABLE (0x1)
+
+/* The following ioctls use eventfd file descriptors to signal and poll
+ * for events. */
+
+/* Set eventfd to poll for added buffers */
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+/* Set eventfd to signal when buffers have beed used */
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+/* Set eventfd to signal an error */
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+
+/* VHOST_NET specific defines */
+
+/* Attach virtio net ring to a raw socket, or tap device.
+ * The socket must be already bound to an ethernet device, this device will be
+ * used for transmit. Pass fd -1 to unbind from the socket and the transmit
+ * device. This can be used to stop the ring (e.g. for migration). */
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+
+/* Feature bits */
+/* Log all write descriptors. Can be changed while device is active. */
+#define VHOST_F_LOG_ALL 26
+/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
+#define VHOST_NET_F_VIRTIO_NET_HDR 27
+
+#endif
diff --git a/net.c b/net.c
index 9168460..5d98e90 100644
--- a/net.c
+++ b/net.c
@@ -2767,6 +2767,9 @@ static int net_init_nic(QemuOpts *opts, Monitor *mon)
if (qemu_opt_get(opts, "addr")) {
nd->devaddr = qemu_strdup(qemu_opt_get(opts, "addr"));
}
+ if (qemu_opt_get(opts, "vhost")) {
+ nd->vhost_device = qemu_opt_get_bool(opts, "vhost", 0);
+ }
nd->macaddr[0] = 0x52;
nd->macaddr[1] = 0x54;
@@ -3182,6 +3185,10 @@ static struct {
.name = "vectors",
.type = QEMU_OPT_NUMBER,
.help = "number of MSI-x vectors, 0 to disable
MSI-X",
+ }, {
+ .name = "vhost",
+ .type = QEMU_OPT_BOOL,
+ .help = "enable vhost backend",
},
{ /* end of list */ }
},
diff --git a/net.h b/net.h
index 932b50d..adcd5c6 100644
--- a/net.h
+++ b/net.h
@@ -115,6 +115,7 @@ struct NICInfo {
int used;
int bootable;
int nvectors;
+ int vhost_device;
};
extern int nb_nics;
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 62ca050..a547975 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -150,14 +150,6 @@ static inline void clear_gsi(kvm_context_t kvm, unsigned
int gsi)
DPRINTF("Invalid GSI %d\n");
}
-struct slot_info {
- unsigned long phys_addr;
- unsigned long len;
- unsigned long userspace_addr;
- unsigned flags;
- int logging_count;
-};
-
struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
static void init_slots(void)
diff --git a/qemu-kvm.h b/qemu-kvm.h
index d6748c7..2ab6c33 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -1240,6 +1240,15 @@ int kvm_ioctl(KVMState *s, int type, ...);
int kvm_vm_ioctl(KVMState *s, int type, ...);
int kvm_check_extension(KVMState *s, unsigned int ext);
+struct slot_info {
+ unsigned long phys_addr;
+ unsigned long len;
+ unsigned long userspace_addr;
+ unsigned flags;
+ int logging_count;
+};
+
+extern struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS];
#endif
#endif
--
1.6.5.2.143.g8cc62