Michael S. Tsirkin
2010-May-05 20:58 UTC
[PATCH RFC] virtio: put last seen used index into ring itself
Generally, the Host end of the virtio ring doesn't need to see where Guest is up to in consuming the ring. However, to completely understand what's going on from the outside, this information must be exposed. For example, host can reduce the number of interrupts by detecting that the guest is currently handling previous buffers. Fortunately, we have room to expand: the ring is always a whole number of pages and there's hundreds of bytes of padding after the avail ring and the used ring, whatever the number of descriptors (which must be a power of 2). We add a feature bit so the guest can tell the host that it's writing out the current value there, if it wants to use that. This is based on a patch by Rusty Russell, with the main difference being that we dedicate a feature bit to guest to tell the host it is writing the used index. This way we don't need to force host to publish the last available index until we have a use for it. Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> Signed-off-by: Michael S. Tsirkin <mst at redhat.com> --- Rusty, this is a simplified form of a patch you posted in the past. I have a vhost patch that, using this feature, shows external to host bandwidth grow from 5 to 7 GB/s, by avoiding an interrupt in the window after previous interrupt was sent and before interrupts were disabled for the vq. With vhost under some external to host loads I see this window being hit about 30% sometimes. I'm finalizing the host bits and plan to send the final version for inclusion when all's ready, but I'd like to hear comments meanwhile. drivers/virtio/virtio_ring.c | 28 +++++++++++++++++----------- include/linux/virtio_ring.h | 14 +++++++++++++- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 1ca8890..7729aba 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -89,9 +89,6 @@ struct vring_virtqueue /* Number we've added since last sync. */ unsigned int num_added; - /* Last used index we've seen. */ - u16 last_used_idx; - /* How to notify other side. FIXME: commonalize hcalls! */ void (*notify)(struct virtqueue *vq); @@ -285,12 +282,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) static inline bool more_used(const struct vring_virtqueue *vq) { - return vq->last_used_idx != vq->vring.used->idx; + return *vq->vring.last_used_idx != vq->vring.used->idx; } void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) { struct vring_virtqueue *vq = to_vvq(_vq); + struct vring_used_elem *u; void *ret; unsigned int i; @@ -307,12 +305,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) return NULL; } - /* Only get used array entries after they have been exposed by host. */ - virtio_rmb(); - - i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; - *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; + /* Only get used array entries after they have been exposed by host. + * Need mb(), not just rmb() because we write last_used_idx below. */ + virtio_mb(); + u = &vq->vring.used->ring[*vq->vring.last_used_idx % vq->vring.num]; + i = u->id; + *len = u->len; if (unlikely(i >= vq->vring.num)) { BAD_RING(vq, "id %u out of range\n", i); return NULL; @@ -325,7 +324,8 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) /* detach_buf clears data, so grab it now. */ ret = vq->data[i]; detach_buf(vq, i); - vq->last_used_idx++; + (*vq->vring.last_used_idx)++; + END_USE(vq); return ret; } @@ -431,7 +431,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->vq.name = name; vq->notify = notify; vq->broken = false; - vq->last_used_idx = 0; + *vq->vring.last_used_idx = 0; vq->num_added = 0; list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG @@ -440,6 +440,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + /* We publish used index whether Host offers it or not: if not, it's + * junk space anyway. But calling this acknowledges the feature. */ + virtio_has_feature(vdev, VIRTIO_RING_F_PUBLISH_USED); + /* No callback? Tell other side not to bother us. */ if (!callback) vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; @@ -473,6 +477,8 @@ void vring_transport_features(struct virtio_device *vdev) switch (i) { case VIRTIO_RING_F_INDIRECT_DESC: break; + case VIRTIO_RING_F_PUBLISH_INDICES: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e4d144b..9d01de9 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -29,6 +29,9 @@ /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 +/* The Guest publishes last-seen used index at the end of the avail ring. */ +#define VIRTIO_RING_F_PUBLISH_USED 29 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -69,6 +72,8 @@ struct vring { struct vring_avail *avail; struct vring_used *used; + /* Last used index seen by the Guest. */ + __u16 *last_used_idx; }; /* The standard layout for the ring is a continuous chunk of memory which looks @@ -83,6 +88,7 @@ struct vring { * __u16 avail_flags; * __u16 avail_idx; * __u16 available[num]; + * __u16 last_used_idx; * * // Padding to the next align boundary. * char pad[]; @@ -101,11 +107,17 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, vr->avail = p + num*sizeof(struct vring_desc); vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1) & ~(align - 1)); + /* We publish the last-seen used index at the end of the available ring. + * It is at the end for backwards compatibility. */ + vr->last_used_idx = &(vr)->avail->ring[num]; + /* Verify that last used index does not spill over the used ring. */ + BUG_ON((void *)vr->last_used_idx + + sizeof *vr->last_used_idx > (void *)vr->used); } static inline unsigned vring_size(unsigned int num, unsigned long align) { - return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) + return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num) + align - 1) & ~(align - 1)) + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; } -- 1.7.1.12.g42b7f
Dor Laor
2010-May-05 21:18 UTC
[PATCH RFC] virtio: put last seen used index into ring itself
On 05/05/2010 11:58 PM, Michael S. Tsirkin wrote:> Generally, the Host end of the virtio ring doesn't need to see where > Guest is up to in consuming the ring. However, to completely understand > what's going on from the outside, this information must be exposed. > For example, host can reduce the number of interrupts by detecting > that the guest is currently handling previous buffers. > > Fortunately, we have room to expand: the ring is always a whole number > of pages and there's hundreds of bytes of padding after the avail ring > and the used ring, whatever the number of descriptors (which must be a > power of 2). > > We add a feature bit so the guest can tell the host that it's writing > out the current value there, if it wants to use that. > > This is based on a patch by Rusty Russell, with the main difference > being that we dedicate a feature bit to guest to tell the host it is > writing the used index. This way we don't need to force host to publish > the last available index until we have a use for it. > > Signed-off-by: Rusty Russell<rusty at rustcorp.com.au> > Signed-off-by: Michael S. Tsirkin<mst at redhat.com> > --- > > Rusty, > this is a simplified form of a patch you posted in the past. > I have a vhost patch that, using this feature, shows external > to host bandwidth grow from 5 to 7 GB/s, by avoidingYou mean external to guest I guess. We have a similar issue with virtio-blk - when using very fast multi-spindle storage on the host side, there are too many irq injection events. This patch should probably reduce them allot. The principle exactly matches the Xen ring.> an interrupt in the window after previous interrupt > was sent and before interrupts were disabled for the vq. > With vhost under some external to host loads I see > this window being hit about 30% sometimes. > > I'm finalizing the host bits and plan to send > the final version for inclusion when all's ready, > but I'd like to hear comments meanwhile. > > drivers/virtio/virtio_ring.c | 28 +++++++++++++++++----------- > include/linux/virtio_ring.h | 14 +++++++++++++- > 2 files changed, 30 insertions(+), 12 deletions(-) > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index 1ca8890..7729aba 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -89,9 +89,6 @@ struct vring_virtqueue > /* Number we've added since last sync. */ > unsigned int num_added; > > - /* Last used index we've seen. */ > - u16 last_used_idx; > - > /* How to notify other side. FIXME: commonalize hcalls! */ > void (*notify)(struct virtqueue *vq); > > @@ -285,12 +282,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) > > static inline bool more_used(const struct vring_virtqueue *vq) > { > - return vq->last_used_idx != vq->vring.used->idx; > + return *vq->vring.last_used_idx != vq->vring.used->idx; > } > > void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > { > struct vring_virtqueue *vq = to_vvq(_vq); > + struct vring_used_elem *u; > void *ret; > unsigned int i; > > @@ -307,12 +305,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > return NULL; > } > > - /* Only get used array entries after they have been exposed by host. */ > - virtio_rmb(); > - > - i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; > - *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; > + /* Only get used array entries after they have been exposed by host. > + * Need mb(), not just rmb() because we write last_used_idx below. */ > + virtio_mb(); > > + u =&vq->vring.used->ring[*vq->vring.last_used_idx % vq->vring.num]; > + i = u->id; > + *len = u->len; > if (unlikely(i>= vq->vring.num)) { > BAD_RING(vq, "id %u out of range\n", i); > return NULL; > @@ -325,7 +324,8 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > /* detach_buf clears data, so grab it now. */ > ret = vq->data[i]; > detach_buf(vq, i); > - vq->last_used_idx++; > + (*vq->vring.last_used_idx)++; > + > END_USE(vq); > return ret; > } > @@ -431,7 +431,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, > vq->vq.name = name; > vq->notify = notify; > vq->broken = false; > - vq->last_used_idx = 0; > + *vq->vring.last_used_idx = 0; > vq->num_added = 0; > list_add_tail(&vq->vq.list,&vdev->vqs); > #ifdef DEBUG > @@ -440,6 +440,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, > > vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); > > + /* We publish used index whether Host offers it or not: if not, it's > + * junk space anyway. But calling this acknowledges the feature. */ > + virtio_has_feature(vdev, VIRTIO_RING_F_PUBLISH_USED); > + > /* No callback? Tell other side not to bother us. */ > if (!callback) > vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; > @@ -473,6 +477,8 @@ void vring_transport_features(struct virtio_device *vdev) > switch (i) { > case VIRTIO_RING_F_INDIRECT_DESC: > break; > + case VIRTIO_RING_F_PUBLISH_INDICES: > + break; > default: > /* We don't understand this bit. */ > clear_bit(i, vdev->features); > diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h > index e4d144b..9d01de9 100644 > --- a/include/linux/virtio_ring.h > +++ b/include/linux/virtio_ring.h > @@ -29,6 +29,9 @@ > /* We support indirect buffer descriptors */ > #define VIRTIO_RING_F_INDIRECT_DESC 28 > > +/* The Guest publishes last-seen used index at the end of the avail ring. */ > +#define VIRTIO_RING_F_PUBLISH_USED 29 > + > /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ > struct vring_desc { > /* Address (guest-physical). */ > @@ -69,6 +72,8 @@ struct vring { > struct vring_avail *avail; > > struct vring_used *used; > + /* Last used index seen by the Guest. */ > + __u16 *last_used_idx; > }; > > /* The standard layout for the ring is a continuous chunk of memory which looks > @@ -83,6 +88,7 @@ struct vring { > * __u16 avail_flags; > * __u16 avail_idx; > * __u16 available[num]; > + * __u16 last_used_idx; > * > * // Padding to the next align boundary. > * char pad[]; > @@ -101,11 +107,17 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, > vr->avail = p + num*sizeof(struct vring_desc); > vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1) > & ~(align - 1)); > + /* We publish the last-seen used index at the end of the available ring. > + * It is at the end for backwards compatibility. */ > + vr->last_used_idx =&(vr)->avail->ring[num]; > + /* Verify that last used index does not spill over the used ring. */ > + BUG_ON((void *)vr->last_used_idx + > + sizeof *vr->last_used_idx> (void *)vr->used); > } > > static inline unsigned vring_size(unsigned int num, unsigned long align) > { > - return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) > + return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num) > + align - 1)& ~(align - 1)) > + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; > }
Rusty Russell
2010-May-06 02:31 UTC
[PATCH RFC] virtio: put last seen used index into ring itself
On Thu, 6 May 2010 06:28:14 am Michael S. Tsirkin wrote:> Rusty, > this is a simplified form of a patch you posted in the past. > I have a vhost patch that, using this feature, shows external > to host bandwidth grow from 5 to 7 GB/s, by avoiding > an interrupt in the window after previous interrupt > was sent and before interrupts were disabled for the vq. > With vhost under some external to host loads I see > this window being hit about 30% sometimes.Fascinating. So you use this to guess if the guest is still processing? I haven't thought about it hard, but is that racy? Obviously happy to apply this when you finalize it. Thanks! Rusty.
Avi Kivity
2010-May-06 10:00 UTC
[Qemu-devel] [PATCH RFC] virtio: put last seen used index into ring itself
On 05/05/2010 11:58 PM, Michael S. Tsirkin wrote:> + /* We publish the last-seen used index at the end of the available ring. > + * It is at the end for backwards compatibility. */ > + vr->last_used_idx =&(vr)->avail->ring[num]; > + /* Verify that last used index does not spill over the used ring. */ > + BUG_ON((void *)vr->last_used_idx + > + sizeof *vr->last_used_idx> (void *)vr->used); > } >Shouldn't this be on its own cache line? One way to achieve this is to allocate it at the end. -- error compiling committee.c: too many arguments to function
Ryan Harper
2010-May-11 18:46 UTC
[PATCH RFC] virtio: put last seen used index into ring itself
* Michael S. Tsirkin <mst at redhat.com> [2010-05-05 16:37]:> Generally, the Host end of the virtio ring doesn't need to see where > Guest is up to in consuming the ring. However, to completely understand > what's going on from the outside, this information must be exposed. > For example, host can reduce the number of interrupts by detecting > that the guest is currently handling previous buffers. > > Fortunately, we have room to expand: the ring is always a whole number > of pages and there's hundreds of bytes of padding after the avail ring > and the used ring, whatever the number of descriptors (which must be a > power of 2). > > We add a feature bit so the guest can tell the host that it's writing > out the current value there, if it wants to use that. > > This is based on a patch by Rusty Russell, with the main difference > being that we dedicate a feature bit to guest to tell the host it is > writing the used index. This way we don't need to force host to publish > the last available index until we have a use for it. > > Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> > Signed-off-by: Michael S. Tsirkin <mst at redhat.com> > --- > > Rusty, > this is a simplified form of a patch you posted in the past. > I have a vhost patch that, using this feature, shows external > to host bandwidth grow from 5 to 7 GB/s, by avoiding > an interrupt in the window after previous interrupt > was sent and before interrupts were disabled for the vq. > With vhost under some external to host loads I see > this window being hit about 30% sometimes. > > I'm finalizing the host bits and plan to send > the final version for inclusion when all's ready, > but I'd like to hear comments meanwhile. > > drivers/virtio/virtio_ring.c | 28 +++++++++++++++++----------- > include/linux/virtio_ring.h | 14 +++++++++++++- > 2 files changed, 30 insertions(+), 12 deletions(-) > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index 1ca8890..7729aba 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -89,9 +89,6 @@ struct vring_virtqueue > /* Number we've added since last sync. */ > unsigned int num_added; > > - /* Last used index we've seen. */ > - u16 last_used_idx; > - > /* How to notify other side. FIXME: commonalize hcalls! */ > void (*notify)(struct virtqueue *vq); > > @@ -285,12 +282,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head) > > static inline bool more_used(const struct vring_virtqueue *vq) > { > - return vq->last_used_idx != vq->vring.used->idx; > + return *vq->vring.last_used_idx != vq->vring.used->idx; > } > > void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > { > struct vring_virtqueue *vq = to_vvq(_vq); > + struct vring_used_elem *u; > void *ret; > unsigned int i; > > @@ -307,12 +305,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > return NULL; > } > > - /* Only get used array entries after they have been exposed by host. */ > - virtio_rmb(); > - > - i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; > - *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; > + /* Only get used array entries after they have been exposed by host. > + * Need mb(), not just rmb() because we write last_used_idx below. */ > + virtio_mb(); > > + u = &vq->vring.used->ring[*vq->vring.last_used_idx % vq->vring.num]; > + i = u->id; > + *len = u->len; > if (unlikely(i >= vq->vring.num)) { > BAD_RING(vq, "id %u out of range\n", i); > return NULL; > @@ -325,7 +324,8 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > /* detach_buf clears data, so grab it now. */ > ret = vq->data[i]; > detach_buf(vq, i); > - vq->last_used_idx++; > + (*vq->vring.last_used_idx)++; > + > END_USE(vq); > return ret; > } > @@ -431,7 +431,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, > vq->vq.name = name; > vq->notify = notify; > vq->broken = false; > - vq->last_used_idx = 0; > + *vq->vring.last_used_idx = 0; > vq->num_added = 0; > list_add_tail(&vq->vq.list, &vdev->vqs); > #ifdef DEBUG > @@ -440,6 +440,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, > > vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); > > + /* We publish used index whether Host offers it or not: if not, it's > + * junk space anyway. But calling this acknowledges the feature. */ > + virtio_has_feature(vdev, VIRTIO_RING_F_PUBLISH_USED); > +You use VIRTIO_RING_F_PUBLISH_USED here, but VIRTIO_RING_F_PUBLISH_INDICES below...> /* No callback? Tell other side not to bother us. */ > if (!callback) > vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; > @@ -473,6 +477,8 @@ void vring_transport_features(struct virtio_device *vdev) > switch (i) { > case VIRTIO_RING_F_INDIRECT_DESC: > break; > + case VIRTIO_RING_F_PUBLISH_INDICES: > + break;Here ^^^> default: > /* We don't understand this bit. */ > clear_bit(i, vdev->features); > diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h > index e4d144b..9d01de9 100644 > --- a/include/linux/virtio_ring.h > +++ b/include/linux/virtio_ring.h > @@ -29,6 +29,9 @@ > /* We support indirect buffer descriptors */ > #define VIRTIO_RING_F_INDIRECT_DESC 28 > > +/* The Guest publishes last-seen used index at the end of the avail ring. */ > +#define VIRTIO_RING_F_PUBLISH_USED 29 > +And here; is it PUBLISHED_USED or PUBLISHED_INDICIES ? -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx ryanh at us.ibm.com
Possibly Parallel Threads
- [PATCH RFC] virtio: put last seen used index into ring itself
- [PATCHv3 0/2] virtio: put last seen used index into ring itself
- [PATCHv3 0/2] virtio: put last seen used index into ring itself
- [PATCH 0/5] High-speed tun receive and xmit
- [PATCH 0/5] High-speed tun receive and xmit