Michael S. Tsirkin
2022-Feb-01 18:27 UTC
[PATCH v2 3/9] [RFC] virtio_ring: Embed a wrap counter in opaque poll index value
Looks correct, thanks. Some minor comments below: On Tue, Feb 01, 2022 at 05:15:55PM +0000, Cristian Marussi wrote:> Exported API virtqueue_poll() can be used to support polling mode operation > on top of virtio layer if needed; currently the parameter last_used_idx is > the opaque value that needs to be passed to the virtqueue_poll() function > to check if there are new pending used buffers in the queue: such opaque > value would have been previously obtained by a call to the API function > virtqueue_enable_cb_prepare(). > > Since such opaque value is indeed containing simply a snapshot in time of > the internalto add: 16 bit> last_used_index (roughly), it is possible that,to add here: if another thread calls virtqueue_add_*() at the same time (which existing drivers don't do, but does not seem to be documented as prohibited anywhere), and> if exactly > 2**16 buffers are marked as used between two successive calls to > virtqueue_poll(), the caller is fooled into thinking that nothing is > pending (ABA problem). > Keep a full fledged internal wraps counters/full fledged/a 16 bit/ since I don't see why is a 16 bit counter full but not e.g. a 32 bit one> per virtqueue and embed it into > the upper 16bits of the returned opaque value, so that the above scenario > can be detected transparently by virtqueue_poll(): this way each single > possible last_used_idx value is really belonging to a different wrap.Just to add here: the ABA problem can in theory still happen but now that's after 2^32 requests, which seems sufficient in practice.> Cc: "Michael S. Tsirkin" <mst at redhat.com> > Cc: Igor Skalkin <igor.skalkin at opensynergy.com> > Cc: Peter Hilber <peter.hilber at opensynergy.com> > Cc: virtualization at lists.linux-foundation.org > Signed-off-by: Cristian Marussi <cristian.marussi at arm.com> > --- > Still no perf data on this, I was wondering what exactly to measure in > term of perf metrics to evaluate the impact of the rolling vq->wraps > counter. > --- > drivers/virtio/virtio_ring.c | 51 +++++++++++++++++++++++++++++++++--- > 1 file changed, 47 insertions(+), 4 deletions(-) > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index 00f64f2f8b72..613ec0503509 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -12,6 +12,8 @@ > #include <linux/hrtimer.h> > #include <linux/dma-mapping.h> > #include <linux/spinlock.h> > +#include <linux/bits.h> > +#include <linux/bitfield.h> > #include <xen/xen.h> > > static bool force_used_validation = false; > @@ -69,6 +71,17 @@ module_param(force_used_validation, bool, 0444); > #define LAST_ADD_TIME_INVALID(vq) > #endif > > +#define VRING_IDX_MASK GENMASK(15, 0) > +#define VRING_GET_IDX(opaque) \ > + ((u16)FIELD_GET(VRING_IDX_MASK, (opaque))) > + > +#define VRING_WRAPS_MASK GENMASK(31, 16) > +#define VRING_GET_WRAPS(opaque) \ > + ((u16)FIELD_GET(VRING_WRAPS_MASK, (opaque))) > + > +#define VRING_BUILD_OPAQUE(idx, wraps) \ > + (FIELD_PREP(VRING_WRAPS_MASK, (wraps)) | ((idx) & VRING_IDX_MASK)) > +Maybe prefix with VRING_POLL_ since that is the only user.> struct vring_desc_state_split { > void *data; /* Data for callback. */ > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ > @@ -117,6 +130,8 @@ struct vring_virtqueue { > /* Last used index we've seen. */ > u16 last_used_idx; > > + u16 wraps; > + > /* Hint for event idx: already triggered no need to disable. */ > bool event_triggered; > > @@ -806,6 +821,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, > ret = vq->split.desc_state[i].data; > detach_buf_split(vq, i, ctx); > vq->last_used_idx++; > + if (unlikely(!vq->last_used_idx)) > + vq->wraps++; > /* If we expect an interrupt for the next entry, tell host > * by writing event index and flush out the write before > * the read in the next get_buf call. */So most drivers don't call virtqueue_poll. Concerned about the overhead here: another option is with a flag that will have to be set whenever a driver wants to use virtqueue_poll. Could you pls do a quick perf test e.g. using tools/virtio/ to see what's faster?> @@ -1508,6 +1525,7 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, > if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { > vq->last_used_idx -= vq->packed.vring.num; > vq->packed.used_wrap_counter ^= 1; > + vq->wraps++; > } > > /* > @@ -1744,6 +1762,7 @@ static struct virtqueue *vring_create_virtqueue_packed( > vq->weak_barriers = weak_barriers; > vq->broken = false; > vq->last_used_idx = 0; > + vq->wraps = 0; > vq->event_triggered = false; > vq->num_added = 0; > vq->packed_ring = true; > @@ -2092,13 +2111,17 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb); > */ > unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) > { > + unsigned int last_used_idx; > struct vring_virtqueue *vq = to_vvq(_vq); > > if (vq->event_triggered) > vq->event_triggered = false; > > - return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : > - virtqueue_enable_cb_prepare_split(_vq); > + last_used_idx = vq->packed_ring ? > + virtqueue_enable_cb_prepare_packed(_vq) : > + virtqueue_enable_cb_prepare_split(_vq); > + > + return VRING_BUILD_OPAQUE(last_used_idx, vq->wraps); > } > EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); > > @@ -2107,6 +2130,21 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); > * @_vq: the struct virtqueue we're talking about. > * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). > * > + * The provided last_used_idx, as returned by virtqueue_enable_cb_prepare(), > + * is an opaque value representing the queue state and it is built as follows: > + * > + * --------------------------------------------------------- > + * | vq->wraps | vq->last_used_idx | > + * 31------------------------------------------------------0 > + * > + * The MSB 16bits embedding the wraps counter for the underlying virtqueue > + * is stripped out here before reaching into the lower layer helpers. > + * > + * This structure of the opaque value mitigates the scenario in which, when > + * exactly 2**16 messages are marked as used between two successive calls to > + * virtqueue_poll(), the caller is fooled into thinking nothing new has arrived > + * since the pure last_used_idx is exactly the same. > + *Do you want to move this comment to where the macros implementing it are?> * Returns "true" if there are pending used buffers in the queue. > * > * This does not need to be serialized. > @@ -2118,9 +2156,13 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) > if (unlikely(vq->broken)) > return false; > > + if (unlikely(vq->wraps != VRING_GET_WRAPS(last_used_idx))) > + return true; > + > virtio_mb(vq->weak_barriers); > - return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : > - virtqueue_poll_split(_vq, last_used_idx); > + return vq->packed_ring ? > + virtqueue_poll_packed(_vq, VRING_GET_IDX(last_used_idx)) : > + virtqueue_poll_split(_vq, VRING_GET_IDX(last_used_idx)); > } > EXPORT_SYMBOL_GPL(virtqueue_poll); > > @@ -2245,6 +2287,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, > vq->weak_barriers = weak_barriers; > vq->broken = false; > vq->last_used_idx = 0; > + vq->wraps = 0; > vq->event_triggered = false; > vq->num_added = 0; > vq->use_dma_api = vring_use_dma_api(vdev); > -- > 2.17.1