This patch enables the IOTLB API support for vhost-vsock devices, allowing the userspace to emulate an IOMMU for the guest. These changes were made following vhost-net, in details this patch: - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb device if the feature is acked - implements VHOST_GET_BACKEND_FEATURES and VHOST_SET_BACKEND_FEATURES ioctls - calls vq_meta_prefetch() before vq processing to prefetch vq metadata address in IOTLB - provides .read_iter, .write_iter, and .poll callbacks for the chardev; they are used by the userspace to exchange IOTLB messages This patch was tested specifying "intel_iommu=strict" in the guest kernel command line. I used QEMU with a patch applied [1] to fix a simple issue (that patch was merged in QEMU v5.2.0): $ qemu -M q35,accel=kvm,kernel-irqchip=split \ -drive file=fedora.qcow2,format=qcow2,if=virtio \ -device intel-iommu,intremap=on,device-iotlb=on \ -device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on [1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com> Signed-off-by: Stefano Garzarella <sgarzare at redhat.com> --- The patch is the same of v1, but I re-tested it with: - QEMU v5.2.0-551-ga05f8ecd88 - Linux 5.9.15 (host) - Linux 5.9.15 and 5.10.0 (guest) Now, enabling 'ats' it works well, there are just a few simple changes. v1: https://www.spinics.net/lists/kernel/msg3716022.html v2: - updated commit message about QEMU version and string used to test - rebased on mst/vhost branch Thanks, Stefano --- drivers/vhost/vsock.c | 68 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a483cec31d5c..5e78fb719602 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -30,7 +30,12 @@ #define VHOST_VSOCK_PKT_WEIGHT 256 enum { - VHOST_VSOCK_FEATURES = VHOST_FEATURES, + VHOST_VSOCK_FEATURES = VHOST_FEATURES | + (1ULL << VIRTIO_F_ACCESS_PLATFORM) +}; + +enum { + VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) }; /* Used to track all the vhost_vsock instances on the system. */ @@ -94,6 +99,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, if (!vhost_vq_get_backend(vq)) goto out; + if (!vq_meta_prefetch(vq)) + goto out; + /* Avoid further vmexits, we're already processing the virtqueue */ vhost_disable_notify(&vsock->dev, vq); @@ -449,6 +457,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) if (!vhost_vq_get_backend(vq)) goto out; + if (!vq_meta_prefetch(vq)) + goto out; + vhost_disable_notify(&vsock->dev, vq); do { u32 len; @@ -766,8 +777,12 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) mutex_lock(&vsock->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&vsock->dev)) { - mutex_unlock(&vsock->dev.mutex); - return -EFAULT; + goto err; + } + + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { + if (vhost_init_device_iotlb(&vsock->dev, true)) + goto err; } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { @@ -778,6 +793,10 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) } mutex_unlock(&vsock->dev.mutex); return 0; + +err: + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; } static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, @@ -811,6 +830,18 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (copy_from_user(&features, argp, sizeof(features))) return -EFAULT; return vhost_vsock_set_features(vsock, features); + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VSOCK_BACKEND_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_BACKEND_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + if (features & ~VHOST_VSOCK_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&vsock->dev, features); + return 0; default: mutex_lock(&vsock->dev.mutex); r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); @@ -823,6 +854,34 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, } } +static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_write_iter(dev, from); +} + +static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait) +{ + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_poll(file, dev, wait); +} + static const struct file_operations vhost_vsock_fops = { .owner = THIS_MODULE, .open = vhost_vsock_dev_open, @@ -830,6 +889,9 @@ static const struct file_operations vhost_vsock_fops = { .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, + .read_iter = vhost_vsock_chr_read_iter, + .write_iter = vhost_vsock_chr_write_iter, + .poll = vhost_vsock_chr_poll, }; static struct miscdevice vhost_vsock_misc = { -- 2.26.2
On 2020/12/23 ??10:36, Stefano Garzarella wrote:> This patch enables the IOTLB API support for vhost-vsock devices, > allowing the userspace to emulate an IOMMU for the guest. > > These changes were made following vhost-net, in details this patch: > - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb > device if the feature is acked > - implements VHOST_GET_BACKEND_FEATURES and > VHOST_SET_BACKEND_FEATURES ioctls > - calls vq_meta_prefetch() before vq processing to prefetch vq > metadata address in IOTLB > - provides .read_iter, .write_iter, and .poll callbacks for the > chardev; they are used by the userspace to exchange IOTLB messages > > This patch was tested specifying "intel_iommu=strict" in the guest > kernel command line. I used QEMU with a patch applied [1] to fix a > simple issue (that patch was merged in QEMU v5.2.0): > $ qemu -M q35,accel=kvm,kernel-irqchip=split \ > -drive file=fedora.qcow2,format=qcow2,if=virtio \ > -device intel-iommu,intremap=on,device-iotlb=on \ > -device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on > > [1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html > > Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com> > Signed-off-by: Stefano Garzarella <sgarzare at redhat.com>Acked-by: Jason Wang <jasowang at redhat.com>> --- > > The patch is the same of v1, but I re-tested it with: > - QEMU v5.2.0-551-ga05f8ecd88 > - Linux 5.9.15 (host) > - Linux 5.9.15 and 5.10.0 (guest) > Now, enabling 'ats' it works well, there are just a few simple changes. > > v1: https://www.spinics.net/lists/kernel/msg3716022.html > v2: > - updated commit message about QEMU version and string used to test > - rebased on mst/vhost branch > > Thanks, > Stefano > --- > drivers/vhost/vsock.c | 68 +++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 65 insertions(+), 3 deletions(-) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index a483cec31d5c..5e78fb719602 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -30,7 +30,12 @@ > #define VHOST_VSOCK_PKT_WEIGHT 256 > > enum { > - VHOST_VSOCK_FEATURES = VHOST_FEATURES, > + VHOST_VSOCK_FEATURES = VHOST_FEATURES | > + (1ULL << VIRTIO_F_ACCESS_PLATFORM) > +}; > + > +enum { > + VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) > }; > > /* Used to track all the vhost_vsock instances on the system. */ > @@ -94,6 +99,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, > if (!vhost_vq_get_backend(vq)) > goto out; > > + if (!vq_meta_prefetch(vq)) > + goto out; > + > /* Avoid further vmexits, we're already processing the virtqueue */ > vhost_disable_notify(&vsock->dev, vq); > > @@ -449,6 +457,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) > if (!vhost_vq_get_backend(vq)) > goto out; > > + if (!vq_meta_prefetch(vq)) > + goto out; > + > vhost_disable_notify(&vsock->dev, vq); > do { > u32 len; > @@ -766,8 +777,12 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) > mutex_lock(&vsock->dev.mutex); > if ((features & (1 << VHOST_F_LOG_ALL)) && > !vhost_log_access_ok(&vsock->dev)) { > - mutex_unlock(&vsock->dev.mutex); > - return -EFAULT; > + goto err; > + } > + > + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { > + if (vhost_init_device_iotlb(&vsock->dev, true)) > + goto err; > } > > for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { > @@ -778,6 +793,10 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) > } > mutex_unlock(&vsock->dev.mutex); > return 0; > + > +err: > + mutex_unlock(&vsock->dev.mutex); > + return -EFAULT; > } > > static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, > @@ -811,6 +830,18 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, > if (copy_from_user(&features, argp, sizeof(features))) > return -EFAULT; > return vhost_vsock_set_features(vsock, features); > + case VHOST_GET_BACKEND_FEATURES: > + features = VHOST_VSOCK_BACKEND_FEATURES; > + if (copy_to_user(argp, &features, sizeof(features))) > + return -EFAULT; > + return 0; > + case VHOST_SET_BACKEND_FEATURES: > + if (copy_from_user(&features, argp, sizeof(features))) > + return -EFAULT; > + if (features & ~VHOST_VSOCK_BACKEND_FEATURES) > + return -EOPNOTSUPP; > + vhost_set_backend_features(&vsock->dev, features); > + return 0; > default: > mutex_lock(&vsock->dev.mutex); > r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); > @@ -823,6 +854,34 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, > } > } > > +static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) > +{ > + struct file *file = iocb->ki_filp; > + struct vhost_vsock *vsock = file->private_data; > + struct vhost_dev *dev = &vsock->dev; > + int noblock = file->f_flags & O_NONBLOCK; > + > + return vhost_chr_read_iter(dev, to, noblock); > +} > + > +static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb, > + struct iov_iter *from) > +{ > + struct file *file = iocb->ki_filp; > + struct vhost_vsock *vsock = file->private_data; > + struct vhost_dev *dev = &vsock->dev; > + > + return vhost_chr_write_iter(dev, from); > +} > + > +static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait) > +{ > + struct vhost_vsock *vsock = file->private_data; > + struct vhost_dev *dev = &vsock->dev; > + > + return vhost_chr_poll(file, dev, wait); > +} > + > static const struct file_operations vhost_vsock_fops = { > .owner = THIS_MODULE, > .open = vhost_vsock_dev_open, > @@ -830,6 +889,9 @@ static const struct file_operations vhost_vsock_fops = { > .llseek = noop_llseek, > .unlocked_ioctl = vhost_vsock_dev_ioctl, > .compat_ioctl = compat_ptr_ioctl, > + .read_iter = vhost_vsock_chr_read_iter, > + .write_iter = vhost_vsock_chr_write_iter, > + .poll = vhost_vsock_chr_poll, > }; > > static struct miscdevice vhost_vsock_misc = {
On Wed, Dec 23, 2020 at 03:36:38PM +0100, Stefano Garzarella wrote:> This patch enables the IOTLB API support for vhost-vsock devices, > allowing the userspace to emulate an IOMMU for the guest. > > These changes were made following vhost-net, in details this patch: > - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb > device if the feature is acked > - implements VHOST_GET_BACKEND_FEATURES and > VHOST_SET_BACKEND_FEATURES ioctls > - calls vq_meta_prefetch() before vq processing to prefetch vq > metadata address in IOTLB > - provides .read_iter, .write_iter, and .poll callbacks for the > chardev; they are used by the userspace to exchange IOTLB messages > > This patch was tested specifying "intel_iommu=strict" in the guest > kernel command line. I used QEMU with a patch applied [1] to fix a > simple issue (that patch was merged in QEMU v5.2.0): > $ qemu -M q35,accel=kvm,kernel-irqchip=split \ > -drive file=fedora.qcow2,format=qcow2,if=virtio \ > -device intel-iommu,intremap=on,device-iotlb=on \ > -device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on > > [1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html > > Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com> > Signed-off-by: Stefano Garzarella <sgarzare at redhat.com> > --- > > The patch is the same of v1, but I re-tested it with: > - QEMU v5.2.0-551-ga05f8ecd88 > - Linux 5.9.15 (host) > - Linux 5.9.15 and 5.10.0 (guest) > Now, enabling 'ats' it works well, there are just a few simple changes. > > v1: https://www.spinics.net/lists/kernel/msg3716022.html > v2: > - updated commit message about QEMU version and string used to test > - rebased on mst/vhost branch > > Thanks, > Stefano > --- > drivers/vhost/vsock.c | 68 +++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 65 insertions(+), 3 deletions(-)Acked-by: Stefan Hajnoczi <stefanha at redhat.com> -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 488 bytes Desc: not available URL: <http://lists.linuxfoundation.org/pipermail/virtualization/attachments/20210104/afea0f01/attachment.sig>