thr3ads.net - Linux Virtualization - [PATCH] vhost: introduce mdev based hardware backend [Sep 2019]

If this information is useful, please help other people find it:
Share via:

Tiwei Bie

2019-Sep-27 04:54 UTC

[PATCH] vhost: introduce mdev based hardware backend

On Fri, Sep 27, 2019 at 11:46:06AM +0800, Jason Wang
wrote:> On 2019/9/26 ??12:54, Tiwei Bie wrote:
> > +
> > +static long vhost_mdev_start(struct vhost_mdev *m)
> > +{
> > +	struct mdev_device *mdev = m->mdev;
> > +	const struct virtio_mdev_device_ops *ops = mdev_get_dev_ops(mdev);
> > +	struct virtio_mdev_callback cb;
> > +	struct vhost_virtqueue *vq;
> > +	int idx;
> > +
> > +	ops->set_features(mdev, m->acked_features);
> > +
> > +	mdev_add_status(mdev, VIRTIO_CONFIG_S_FEATURES_OK);
> > +	if (!(mdev_get_status(mdev) & VIRTIO_CONFIG_S_FEATURES_OK))
> > +		goto reset;
> > +
> > +	for (idx = 0; idx < m->nvqs; idx++) {
> > +		vq = &m->vqs[idx];
> > +
> > +		if (!vq->desc || !vq->avail || !vq->used)
> > +			break;
> > +
> > +		if (ops->set_vq_state(mdev, idx, vq->last_avail_idx))
> > +			goto reset;
> 
> 
> If we do set_vq_state() in SET_VRING_BASE, we won't need this step
here.
Yeah, I plan to do it in the next version.
> 
> 
> > +
> > +		/*
> > +		 * In vhost-mdev, userspace should pass ring addresses
> > +		 * in guest physical addresses when IOMMU is disabled or
> > +		 * IOVAs when IOMMU is enabled.
> > +		 */
> 
> 
> A question here, consider we're using noiommu mode. If guest physical
> address is passed here, how can a device use that?
> 
> I believe you meant "host physical address" here? And it also
have the
> implication that the HPA should be continuous (e.g using hugetlbfs).
The comment is talking about the virtual IOMMU (i.e. iotlb in vhost).
It should be rephrased to cover the noiommu case as well. Thanks for
spotting this.

> > +
> > +	switch (cmd) {
> > +	case VHOST_MDEV_SET_STATE:
> > +		r = vhost_set_state(m, argp);
> > +		break;
> > +	case VHOST_GET_FEATURES:
> > +		r = vhost_get_features(m, argp);
> > +		break;
> > +	case VHOST_SET_FEATURES:
> > +		r = vhost_set_features(m, argp);
> > +		break;
> > +	case VHOST_GET_VRING_BASE:
> > +		r = vhost_get_vring_base(m, argp);
> > +		break;
> 
> 
> Does it mean the SET_VRING_BASE may only take affect after
> VHOST_MEV_SET_STATE?
Yeah, in this version, SET_VRING_BASE won't set the base to the
device directly. But I plan to not delay this anymore in the next
version to support the SET_STATUS.
> 
> 
> > +	default:
> > +		r = vhost_dev_ioctl(&m->dev, cmd, argp);
> > +		if (r == -ENOIOCTLCMD)
> > +			r = vhost_vring_ioctl(&m->dev, cmd, argp);
> > +	}
> > +
> > +	mutex_unlock(&m->mutex);
> > +	return r;
> > +}
> > +
> > +static const struct vfio_device_ops vfio_vhost_mdev_dev_ops = {
> > +	.name		= "vfio-vhost-mdev",
> > +	.open		= vhost_mdev_open,
> > +	.release	= vhost_mdev_release,
> > +	.ioctl		= vhost_mdev_unlocked_ioctl,
> > +};
> > +
> > +static int vhost_mdev_probe(struct device *dev)
> > +{
> > +	struct mdev_device *mdev = mdev_from_dev(dev);
> > +	const struct virtio_mdev_device_ops *ops = mdev_get_dev_ops(mdev);
> > +	struct vhost_mdev *m;
> > +	int nvqs, r;
> > +
> > +	m = kzalloc(sizeof(*m), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
> > +	if (!m)
> > +		return -ENOMEM;
> > +
> > +	mutex_init(&m->mutex);
> > +
> > +	nvqs = ops->get_queue_max(mdev);
> > +	m->nvqs = nvqs;
> 
> 
> The name could be confusing, get_queue_max() is to get the maximum number
of
> entries for a virtqueue supported by this device.
OK. It might be better to rename it to something like:

	get_vq_num_max()

which is more consistent with the set_vq_num().
> 
> It looks to me that we need another API to query the maximum number of
> virtqueues supported by the device.
Yeah.

Thanks,
Tiwei

> 
> Thanks
> 
> 
> > +
> > +	m->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue),
> > +			       GFP_KERNEL);
> > +	if (!m->vqs) {
> > +		r = -ENOMEM;
> > +		goto err;
> > +	}
> > +
> > +	r = vfio_add_group_dev(dev, &vfio_vhost_mdev_dev_ops, m);
> > +	if (r)
> > +		goto err;
> > +
> > +	m->features = ops->get_features(mdev);
> > +	m->mdev = mdev;
> > +	return 0;
> > +
> > +err:
> > +	kfree(m->vqs);
> > +	kfree(m);
> > +	return r;
> > +}
> > +
> > +static void vhost_mdev_remove(struct device *dev)
> > +{
> > +	struct vhost_mdev *m;
> > +
> > +	m = vfio_del_group_dev(dev);
> > +	mutex_destroy(&m->mutex);
> > +	kfree(m->vqs);
> > +	kfree(m);
> > +}
> > +
> > +static struct mdev_class_id id_table[] = {
> > +	{ MDEV_ID_VHOST },
> > +	{ 0 },
> > +};
> > +
> > +static struct mdev_driver vhost_mdev_driver = {
> > +	.name	= "vhost_mdev",
> > +	.probe	= vhost_mdev_probe,
> > +	.remove	= vhost_mdev_remove,
> > +	.id_table = id_table,
> > +};
> > +
> > +static int __init vhost_mdev_init(void)
> > +{
> > +	return mdev_register_driver(&vhost_mdev_driver, THIS_MODULE);
> > +}
> > +module_init(vhost_mdev_init);
> > +
> > +static void __exit vhost_mdev_exit(void)
> > +{
> > +	mdev_unregister_driver(&vhost_mdev_driver);
> > +}
> > +module_exit(vhost_mdev_exit);
> > +
> > +MODULE_VERSION("0.0.1");
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_DESCRIPTION("Mediated device based accelerator for
virtio");
> > diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> > index 40d028eed645..5afbc2f08fa3 100644
> > --- a/include/uapi/linux/vhost.h
> > +++ b/include/uapi/linux/vhost.h
> > @@ -116,4 +116,12 @@
> >   #define VHOST_VSOCK_SET_GUEST_CID	_IOW(VHOST_VIRTIO, 0x60, __u64)
> >   #define VHOST_VSOCK_SET_RUNNING		_IOW(VHOST_VIRTIO, 0x61, int)
> > +/* VHOST_MDEV specific defines */
> > +
> > +#define VHOST_MDEV_SET_STATE	_IOW(VHOST_VIRTIO, 0x70, __u64)
> > +
> > +#define VHOST_MDEV_S_STOPPED	0
> > +#define VHOST_MDEV_S_RUNNING	1
> > +#define VHOST_MDEV_S_MAX	2
> > +
> >   #endif

Jason Wang

2019-Sep-27 07:14 UTC

head link

[PATCH] vhost: introduce mdev based hardware backend

On 2019/9/27 ??12:54, Tiwei Bie wrote:>>> +
>>> +		/*
>>> +		 * In vhost-mdev, userspace should pass ring addresses
>>> +		 * in guest physical addresses when IOMMU is disabled or
>>> +		 * IOVAs when IOMMU is enabled.
>>> +		 */
>> A question here, consider we're using noiommu mode. If guest
physical
>> address is passed here, how can a device use that?
>>
>> I believe you meant "host physical address" here? And it also
have the
>> implication that the HPA should be continuous (e.g using hugetlbfs).
> The comment is talking about the virtual IOMMU (i.e. iotlb in vhost).
> It should be rephrased to cover the noiommu case as well. Thanks for
> spotting this.

So the question still, if GPA is passed how can it be used by the 
virtio-mdev device?

Thanks

Jason Wang

2019-Sep-27 07:17 UTC

head link

[PATCH] vhost: introduce mdev based hardware backend

On 2019/9/27 ??12:54, Tiwei Bie wrote:>> The name could be confusing, get_queue_max() is to get the maximum
number of
>> entries for a virtqueue supported by this device.
> OK. It might be better to rename it to something like:
>
> 	get_vq_num_max()
>
> which is more consistent with the set_vq_num().
>
Yes, will do in next version.

Thanks

Tiwei Bie

2019-Sep-27 08:04 UTC

head link

[PATCH] vhost: introduce mdev based hardware backend

On Fri, Sep 27, 2019 at 03:14:42PM +0800, Jason Wang
wrote:> On 2019/9/27 ??12:54, Tiwei Bie wrote:
> > > > +
> > > > +		/*
> > > > +		 * In vhost-mdev, userspace should pass ring addresses
> > > > +		 * in guest physical addresses when IOMMU is disabled or
> > > > +		 * IOVAs when IOMMU is enabled.
> > > > +		 */
> > > A question here, consider we're using noiommu mode. If guest
physical
> > > address is passed here, how can a device use that?
> > > 
> > > I believe you meant "host physical address" here? And
it also have the
> > > implication that the HPA should be continuous (e.g using
hugetlbfs).
> > The comment is talking about the virtual IOMMU (i.e. iotlb in vhost).
> > It should be rephrased to cover the noiommu case as well. Thanks for
> > spotting this.
> 
> 
> So the question still, if GPA is passed how can it be used by the
> virtio-mdev device?
Sorry if I didn't make it clear..
Of course, GPA can't be passed in noiommu mode.

> 
> Thanks
>

Jason Wang

2019-Sep-27 08:47 UTC

head link

[PATCH] vhost: introduce mdev based hardware backend

On 2019/9/27 ??12:54, Tiwei Bie wrote:> On Fri, Sep 27, 2019 at 11:46:06AM +0800, Jason Wang wrote:
>> On 2019/9/26 ??12:54, Tiwei Bie wrote:
>>> +
>>> +static long vhost_mdev_start(struct vhost_mdev *m)
>>> +{
>>> +	struct mdev_device *mdev = m->mdev;
>>> +	const struct virtio_mdev_device_ops *ops =
mdev_get_dev_ops(mdev);
>>> +	struct virtio_mdev_callback cb;
>>> +	struct vhost_virtqueue *vq;
>>> +	int idx;
>>> +
>>> +	ops->set_features(mdev, m->acked_features);
>>> +
>>> +	mdev_add_status(mdev, VIRTIO_CONFIG_S_FEATURES_OK);
>>> +	if (!(mdev_get_status(mdev) & VIRTIO_CONFIG_S_FEATURES_OK))
>>> +		goto reset;
>>> +
>>> +	for (idx = 0; idx < m->nvqs; idx++) {
>>> +		vq = &m->vqs[idx];
>>> +
>>> +		if (!vq->desc || !vq->avail || !vq->used)
>>> +			break;
>>> +
>>> +		if (ops->set_vq_state(mdev, idx, vq->last_avail_idx))
>>> +			goto reset;
>> If we do set_vq_state() in SET_VRING_BASE, we won't need this step
here.
> Yeah, I plan to do it in the next version.
>
>>> +
>>> +		/*
>>> +		 * In vhost-mdev, userspace should pass ring addresses
>>> +		 * in guest physical addresses when IOMMU is disabled or
>>> +		 * IOVAs when IOMMU is enabled.
>>> +		 */
>> A question here, consider we're using noiommu mode. If guest
physical
>> address is passed here, how can a device use that?
>>
>> I believe you meant "host physical address" here? And it also
have the
>> implication that the HPA should be continuous (e.g using hugetlbfs).
> The comment is talking about the virtual IOMMU (i.e. iotlb in vhost).
> It should be rephrased to cover the noiommu case as well. Thanks for
> spotting this.
>
>
>>> +
>>> +	switch (cmd) {
>>> +	case VHOST_MDEV_SET_STATE:
>>> +		r = vhost_set_state(m, argp);
>>> +		break;
>>> +	case VHOST_GET_FEATURES:
>>> +		r = vhost_get_features(m, argp);
>>> +		break;
>>> +	case VHOST_SET_FEATURES:
>>> +		r = vhost_set_features(m, argp);
>>> +		break;
>>> +	case VHOST_GET_VRING_BASE:
>>> +		r = vhost_get_vring_base(m, argp);
>>> +		break;
>> Does it mean the SET_VRING_BASE may only take affect after
>> VHOST_MEV_SET_STATE?
> Yeah, in this version, SET_VRING_BASE won't set the base to the
> device directly. But I plan to not delay this anymore in the next
> version to support the SET_STATUS.
>
>>> +	default:
>>> +		r = vhost_dev_ioctl(&m->dev, cmd, argp);
>>> +		if (r == -ENOIOCTLCMD)
>>> +			r = vhost_vring_ioctl(&m->dev, cmd, argp);
>>> +	}
>>> +
>>> +	mutex_unlock(&m->mutex);
>>> +	return r;
>>> +}
>>> +
>>> +static const struct vfio_device_ops vfio_vhost_mdev_dev_ops = {
>>> +	.name		= "vfio-vhost-mdev",
>>> +	.open		= vhost_mdev_open,
>>> +	.release	= vhost_mdev_release,
>>> +	.ioctl		= vhost_mdev_unlocked_ioctl,
>>> +};
>>> +
>>> +static int vhost_mdev_probe(struct device *dev)
>>> +{
>>> +	struct mdev_device *mdev = mdev_from_dev(dev);
>>> +	const struct virtio_mdev_device_ops *ops =
mdev_get_dev_ops(mdev);
>>> +	struct vhost_mdev *m;
>>> +	int nvqs, r;
>>> +
>>> +	m = kzalloc(sizeof(*m), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
>>> +	if (!m)
>>> +		return -ENOMEM;
>>> +
>>> +	mutex_init(&m->mutex);
>>> +
>>> +	nvqs = ops->get_queue_max(mdev);
>>> +	m->nvqs = nvqs;
>> The name could be confusing, get_queue_max() is to get the maximum
number of
>> entries for a virtqueue supported by this device.
> OK. It might be better to rename it to something like:
>
> 	get_vq_num_max()
>
> which is more consistent with the set_vq_num().
>
>> It looks to me that we need another API to query the maximum number of
>> virtqueues supported by the device.
> Yeah.
>
> Thanks,
> Tiwei

One problem here:

Consider if we want to support multiqueue, how did userspace know about 
this? Note this information could be fetched from get_config() via a 
device specific way, do we want ioctl for accessing that area?

Thanks

Michael S. Tsirkin

2019-Sep-27 09:38 UTC

head link

[PATCH] vhost: introduce mdev based hardware backend

On Fri, Sep 27, 2019 at 04:47:43PM +0800, Jason Wang
wrote:> 
> On 2019/9/27 ??12:54, Tiwei Bie wrote:
> > On Fri, Sep 27, 2019 at 11:46:06AM +0800, Jason Wang wrote:
> > > On 2019/9/26 ??12:54, Tiwei Bie wrote:
> > > > +
> > > > +static long vhost_mdev_start(struct vhost_mdev *m)
> > > > +{
> > > > +	struct mdev_device *mdev = m->mdev;
> > > > +	const struct virtio_mdev_device_ops *ops =
mdev_get_dev_ops(mdev);
> > > > +	struct virtio_mdev_callback cb;
> > > > +	struct vhost_virtqueue *vq;
> > > > +	int idx;
> > > > +
> > > > +	ops->set_features(mdev, m->acked_features);
> > > > +
> > > > +	mdev_add_status(mdev, VIRTIO_CONFIG_S_FEATURES_OK);
> > > > +	if (!(mdev_get_status(mdev) &
VIRTIO_CONFIG_S_FEATURES_OK))
> > > > +		goto reset;
> > > > +
> > > > +	for (idx = 0; idx < m->nvqs; idx++) {
> > > > +		vq = &m->vqs[idx];
> > > > +
> > > > +		if (!vq->desc || !vq->avail || !vq->used)
> > > > +			break;
> > > > +
> > > > +		if (ops->set_vq_state(mdev, idx,
vq->last_avail_idx))
> > > > +			goto reset;
> > > If we do set_vq_state() in SET_VRING_BASE, we won't need this
step here.
> > Yeah, I plan to do it in the next version.
> > 
> > > > +
> > > > +		/*
> > > > +		 * In vhost-mdev, userspace should pass ring addresses
> > > > +		 * in guest physical addresses when IOMMU is disabled or
> > > > +		 * IOVAs when IOMMU is enabled.
> > > > +		 */
> > > A question here, consider we're using noiommu mode. If guest
physical
> > > address is passed here, how can a device use that?
> > > 
> > > I believe you meant "host physical address" here? And
it also have the
> > > implication that the HPA should be continuous (e.g using
hugetlbfs).
> > The comment is talking about the virtual IOMMU (i.e. iotlb in vhost).
> > It should be rephrased to cover the noiommu case as well. Thanks for
> > spotting this.
> > 
> > 
> > > > +
> > > > +	switch (cmd) {
> > > > +	case VHOST_MDEV_SET_STATE:
> > > > +		r = vhost_set_state(m, argp);
> > > > +		break;
> > > > +	case VHOST_GET_FEATURES:
> > > > +		r = vhost_get_features(m, argp);
> > > > +		break;
> > > > +	case VHOST_SET_FEATURES:
> > > > +		r = vhost_set_features(m, argp);
> > > > +		break;
> > > > +	case VHOST_GET_VRING_BASE:
> > > > +		r = vhost_get_vring_base(m, argp);
> > > > +		break;
> > > Does it mean the SET_VRING_BASE may only take affect after
> > > VHOST_MEV_SET_STATE?
> > Yeah, in this version, SET_VRING_BASE won't set the base to the
> > device directly. But I plan to not delay this anymore in the next
> > version to support the SET_STATUS.
> > 
> > > > +	default:
> > > > +		r = vhost_dev_ioctl(&m->dev, cmd, argp);
> > > > +		if (r == -ENOIOCTLCMD)
> > > > +			r = vhost_vring_ioctl(&m->dev, cmd, argp);
> > > > +	}
> > > > +
> > > > +	mutex_unlock(&m->mutex);
> > > > +	return r;
> > > > +}
> > > > +
> > > > +static const struct vfio_device_ops vfio_vhost_mdev_dev_ops
= {
> > > > +	.name		= "vfio-vhost-mdev",
> > > > +	.open		= vhost_mdev_open,
> > > > +	.release	= vhost_mdev_release,
> > > > +	.ioctl		= vhost_mdev_unlocked_ioctl,
> > > > +};
> > > > +
> > > > +static int vhost_mdev_probe(struct device *dev)
> > > > +{
> > > > +	struct mdev_device *mdev = mdev_from_dev(dev);
> > > > +	const struct virtio_mdev_device_ops *ops =
mdev_get_dev_ops(mdev);
> > > > +	struct vhost_mdev *m;
> > > > +	int nvqs, r;
> > > > +
> > > > +	m = kzalloc(sizeof(*m), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
> > > > +	if (!m)
> > > > +		return -ENOMEM;
> > > > +
> > > > +	mutex_init(&m->mutex);
> > > > +
> > > > +	nvqs = ops->get_queue_max(mdev);
> > > > +	m->nvqs = nvqs;
> > > The name could be confusing, get_queue_max() is to get the
maximum number of
> > > entries for a virtqueue supported by this device.
> > OK. It might be better to rename it to something like:
> > 
> > 	get_vq_num_max()
> > 
> > which is more consistent with the set_vq_num().
> > 
> > > It looks to me that we need another API to query the maximum
number of
> > > virtqueues supported by the device.
> > Yeah.
> > 
> > Thanks,
> > Tiwei
> 
> 
> One problem here:
> 
> Consider if we want to support multiqueue, how did userspace know about
> this?
There's a feature bit for this, isn't there?
> Note this information could be fetched from get_config() via a device
> specific way, do we want ioctl for accessing that area?
> 
> Thanks

Maybe Matching Threads

Search for more maybe matching threads

Linux Virtualization - Sep 2019 - [PATCH] vhost: introduce mdev based hardware backend

[PATCH] vhost: introduce mdev based hardware backend

[PATCH] vhost: introduce mdev based hardware backend

[PATCH] vhost: introduce mdev based hardware backend

[PATCH] vhost: introduce mdev based hardware backend

[PATCH] vhost: introduce mdev based hardware backend

[PATCH] vhost: introduce mdev based hardware backend

Maybe Matching Threads