Hi, This patch set implements packed ring support in virtio driver. A performance test between pktgen (pktgen_sample03_burst_single_flow.sh) and DPDK vhost (testpmd/rxonly/vhost-PMD) has been done, I saw ~30% performance gain in packed ring in this case. To make this patch set work with below patch set for vhost, some hacks are needed to set the _F_NEXT flag in indirect descriptors (this should be fixed in vhost): https://lkml.org/lkml/2018/7/3/33 v2 -> v3: - Use leXX instead of virtioXX (MST); - Refactor split ring first (MST); - Add debug helpers (MST); - Put split/packed ring specific fields in sub structures (MST); - Handle normal descriptors and indirect descriptors differently (MST); - Track the DMA addr/len related info in a separate structure (MST); - Calculate AVAIL/USED flags only when wrap counter wraps (MST); - Define a struct/union to read event structure (MST); - Define a macro for wrap counter bit in uapi (MST); - Define the AVAIL/USED bits as shifts instead of values (MST); - s/_F_/_FLAG_/ in VRING_PACKED_EVENT_* as they are values (MST); - Drop the notify workaround for QEMU's tx-timer in packed ring (MST); v1 -> v2: - Use READ_ONCE() to read event off_wrap and flags together (Jason); - Add comments related to ccw (Jason); RFC v6 -> v1: - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed() when event idx is off (Jason); - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason); - Test the state of the desc at used_idx instead of last_used_idx in virtqueue_enable_cb_delayed_packed() (Jason); - Save wrap counter (as part of queue state) in the return value of virtqueue_enable_cb_prepare_packed(); - Refine the packed ring definitions in uapi; - Rebase on the net-next tree; RFC v5 -> RFC v6: - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason); - Define wrap counter as bool (Jason); - Use ALIGN() in vring_init_packed() (Jason); - Avoid using pointer to track `next` in detach_buf_packed() (Jason); - Add comments for barriers (Jason); - Don't enable RING_PACKED on ccw for now (noticed by Jason); - Refine the memory barrier in virtqueue_poll(); - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed(); - Remove the hacks in virtqueue_enable_cb_prepare_packed(); RFC v4 -> RFC v5: - Save DMA addr, etc in desc state (Jason); - Track used wrap counter; RFC v3 -> RFC v4: - Make ID allocation support out-of-order (Jason); - Various fixes for EVENT_IDX support; RFC v2 -> RFC v3: - Split into small patches (Jason); - Add helper virtqueue_use_indirect() (Jason); - Just set id for the last descriptor of a list (Jason); - Calculate the prev in virtqueue_add_packed() (Jason); - Fix/improve desc suppression code (Jason/MST); - Refine the code layout for XXX_split/packed and wrappers (MST); - Fix the comments and API in uapi (MST); - Remove the BUG_ON() for indirect (Jason); - Some other refinements and bug fixes; RFC v1 -> RFC v2: - Add indirect descriptor support - compile test only; - Add event suppression supprt - compile test only; - Move vring_packed_init() out of uapi (Jason, MST); - Merge two loops into one in virtqueue_add_packed() (Jason); - Split vring_unmap_one() for packed ring and split ring (Jason); - Avoid using '%' operator (Jason); - Rename free_head -> next_avail_idx (Jason); - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason); - Some other refinements and bug fixes; Tiwei Bie (13): virtio: add packed ring types and macros virtio_ring: add _split suffix for split ring functions virtio_ring: put split ring functions together virtio_ring: put split ring fields in a sub struct virtio_ring: introduce debug helpers virtio_ring: introduce helper for indirect feature virtio_ring: allocate desc state for split ring separately virtio_ring: extract split ring handling from ring creation virtio_ring: cache whether we will use DMA API virtio_ring: introduce packed ring support virtio_ring: leverage event idx in packed ring virtio_ring: disable packed ring on unsupported transports virtio_ring: advertize packed ring layout drivers/misc/mic/vop/vop_main.c | 13 + drivers/remoteproc/remoteproc_virtio.c | 13 + drivers/s390/virtio/virtio_ccw.c | 14 + drivers/virtio/virtio_ring.c | 1811 +++++++++++++++++++++++++------- include/uapi/linux/virtio_config.h | 3 + include/uapi/linux/virtio_ring.h | 52 + 6 files changed, 1530 insertions(+), 376 deletions(-) -- 2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 01/13] virtio: add packed ring types and macros
Add types and macros for packed ring.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 include/uapi/linux/virtio_config.h |  3 +++
 include/uapi/linux/virtio_ring.h   | 52 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)
diff --git a/include/uapi/linux/virtio_config.h
b/include/uapi/linux/virtio_config.h
index 449132c76b1c..1196e1c1d4f6 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -75,6 +75,9 @@
  */
 #define VIRTIO_F_IOMMU_PLATFORM		33
 
+/* This feature indicates support for the packed virtqueue layout. */
+#define VIRTIO_F_RING_PACKED		34
+
 /*
  * Does the device support Single Root I/O Virtualization?
  */
diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index 6d5d5faa989b..2414f8af26b3 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -44,6 +44,13 @@
 /* This means the buffer contains a list of buffer descriptors. */
 #define VRING_DESC_F_INDIRECT	4
 
+/*
+ * Mark a descriptor as available or used in packed ring.
+ * Notice: they are defined as shifts instead of shifted values.
+ */
+#define VRING_PACKED_DESC_F_AVAIL	7
+#define VRING_PACKED_DESC_F_USED	15
+
 /* The Host uses this in used->flags to advise the Guest: don't kick me
when
  * you add a buffer.  It's unreliable, so it's simply an optimization. 
Guest
  * will still kick if it's out of buffers. */
@@ -53,6 +60,23 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT	1
 
+/* Enable events in packed ring. */
+#define VRING_PACKED_EVENT_FLAG_ENABLE	0x0
+/* Disable events in packed ring. */
+#define VRING_PACKED_EVENT_FLAG_DISABLE	0x1
+/*
+ * Enable events for a specific descriptor in packed ring.
+ * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
+ */
+#define VRING_PACKED_EVENT_FLAG_DESC	0x2
+
+/*
+ * Wrap counter bit shift in event suppression structure
+ * of packed ring.
+ */
+#define VRING_PACKED_EVENT_F_WRAP_CTR	15
+
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC	28
 
@@ -171,4 +195,32 @@ static inline int vring_need_event(__u16 event_idx, __u16
new_idx, __u16 old)
 	return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old);
 }
 
+struct vring_packed_desc_event {
+	/* Descriptor Ring Change Event Offset/Wrap Counter. */
+	__le16 off_wrap;
+	/* Descriptor Ring Change Event Flags. */
+	__le16 flags;
+};
+
+struct vring_packed_desc {
+	/* Buffer Address. */
+	__le64 addr;
+	/* Buffer Length. */
+	__le32 len;
+	/* Buffer ID. */
+	__le16 id;
+	/* The flags depending on descriptor type. */
+	__le16 flags;
+};
+
+struct vring_packed {
+	unsigned int num;
+
+	struct vring_packed_desc *desc;
+
+	struct vring_packed_desc_event *driver;
+
+	struct vring_packed_desc_event *device;
+};
+
 #endif /* _UAPI_LINUX_VIRTIO_RING_H */
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 02/13] virtio_ring: add _split suffix for split ring functions
Add _split suffix for split ring specific functions. This
is a preparation for introducing the packed ring support.
There is no functional change.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 269 ++++++++++++++++++++++++++-----------------
 1 file changed, 164 insertions(+), 105 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 814b395007b2..29fab2fb39cb 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -200,8 +200,8 @@ static dma_addr_t vring_map_single(const struct
vring_virtqueue *vq,
 			      cpu_addr, size, direction);
 }
 
-static void vring_unmap_one(const struct vring_virtqueue *vq,
-			    struct vring_desc *desc)
+static void vring_unmap_one_split(const struct vring_virtqueue *vq,
+				  struct vring_desc *desc)
 {
 	u16 flags;
 
@@ -234,8 +234,9 @@ static int vring_mapping_error(const struct vring_virtqueue
*vq,
 	return dma_mapping_error(vring_dma_dev(vq), addr);
 }
 
-static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
-					 unsigned int total_sg, gfp_t gfp)
+static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
+					       unsigned int total_sg,
+					       gfp_t gfp)
 {
 	struct vring_desc *desc;
 	unsigned int i;
@@ -256,14 +257,14 @@ static struct vring_desc *alloc_indirect(struct virtqueue
*_vq,
 	return desc;
 }
 
-static inline int virtqueue_add(struct virtqueue *_vq,
-				struct scatterlist *sgs[],
-				unsigned int total_sg,
-				unsigned int out_sgs,
-				unsigned int in_sgs,
-				void *data,
-				void *ctx,
-				gfp_t gfp)
+static inline int virtqueue_add_split(struct virtqueue *_vq,
+				      struct scatterlist *sgs[],
+				      unsigned int total_sg,
+				      unsigned int out_sgs,
+				      unsigned int in_sgs,
+				      void *data,
+				      void *ctx,
+				      gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
@@ -302,7 +303,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
 	if (vq->indirect && total_sg > 1 && vq->vq.num_free)
-		desc = alloc_indirect(_vq, total_sg, gfp);
+		desc = alloc_indirect_split(_vq, total_sg, gfp);
 	else {
 		desc = NULL;
 		WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
@@ -423,7 +424,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	for (n = 0; n < total_sg; n++) {
 		if (i == err_idx)
 			break;
-		vring_unmap_one(vq, &desc[i]);
+		vring_unmap_one_split(vq, &desc[i]);
 		i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
 	}
 
@@ -434,6 +435,19 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	return -EIO;
 }
 
+static inline int virtqueue_add(struct virtqueue *_vq,
+				struct scatterlist *sgs[],
+				unsigned int total_sg,
+				unsigned int out_sgs,
+				unsigned int in_sgs,
+				void *data,
+				void *ctx,
+				gfp_t gfp)
+{
+	return virtqueue_add_split(_vq, sgs, total_sg,
+				   out_sgs, in_sgs, data, ctx, gfp);
+}
+
 /**
  * virtqueue_add_sgs - expose buffers to other end
  * @vq: the struct virtqueue we're talking about.
@@ -536,18 +550,7 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
 
-/**
- * virtqueue_kick_prepare - first half of split virtqueue_kick call.
- * @vq: the struct virtqueue
- *
- * Instead of virtqueue_kick(), you can do:
- *	if (virtqueue_kick_prepare(vq))
- *		virtqueue_notify(vq);
- *
- * This is sometimes useful because the virtqueue_kick_prepare() needs
- * to be serialized, but the actual virtqueue_notify() call does not.
- */
-bool virtqueue_kick_prepare(struct virtqueue *_vq)
+static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	u16 new, old;
@@ -579,6 +582,22 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
 	END_USE(vq);
 	return needs_kick;
 }
+
+/**
+ * virtqueue_kick_prepare - first half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * Instead of virtqueue_kick(), you can do:
+ *	if (virtqueue_kick_prepare(vq))
+ *		virtqueue_notify(vq);
+ *
+ * This is sometimes useful because the virtqueue_kick_prepare() needs
+ * to be serialized, but the actual virtqueue_notify() call does not.
+ */
+bool virtqueue_kick_prepare(struct virtqueue *_vq)
+{
+	return virtqueue_kick_prepare_split(_vq);
+}
 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
 
 /**
@@ -625,8 +644,8 @@ bool virtqueue_kick(struct virtqueue *vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
-static void detach_buf(struct vring_virtqueue *vq, unsigned int head,
-		       void **ctx)
+static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
+			     void **ctx)
 {
 	unsigned int i, j;
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
@@ -638,12 +657,12 @@ static void detach_buf(struct vring_virtqueue *vq,
unsigned int head,
 	i = head;
 
 	while (vq->vring.desc[i].flags & nextflag) {
-		vring_unmap_one(vq, &vq->vring.desc[i]);
+		vring_unmap_one_split(vq, &vq->vring.desc[i]);
 		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
 		vq->vq.num_free++;
 	}
 
-	vring_unmap_one(vq, &vq->vring.desc[i]);
+	vring_unmap_one_split(vq, &vq->vring.desc[i]);
 	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
 	vq->free_head = head;
 
@@ -665,7 +684,7 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned
int head,
 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 
 		for (j = 0; j < len / sizeof(struct vring_desc); j++)
-			vring_unmap_one(vq, &indir_desc[j]);
+			vring_unmap_one_split(vq, &indir_desc[j]);
 
 		kfree(indir_desc);
 		vq->desc_state[head].indir_desc = NULL;
@@ -674,29 +693,14 @@ static void detach_buf(struct vring_virtqueue *vq,
unsigned int head,
 	}
 }
 
-static inline bool more_used(const struct vring_virtqueue *vq)
+static inline bool more_used_split(const struct vring_virtqueue *vq)
 {
 	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
vq->vring.used->idx);
 }
 
-/**
- * virtqueue_get_buf - get the next used buffer
- * @vq: the struct virtqueue we're talking about.
- * @len: the length written into the buffer
- *
- * If the device wrote data into the buffer, @len will be set to the
- * amount written.  This means you don't need to clear the buffer
- * beforehand to ensure there's no data leakage in the case of short
- * writes.
- *
- * Caller must ensure we don't call this with other virtqueue
- * operations at the same time (except where noted).
- *
- * Returns NULL if there are no used buffers, or the "data" token
- * handed to virtqueue_add_*().
- */
-void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
-			    void **ctx)
+static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
+					 unsigned int *len,
+					 void **ctx)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	void *ret;
@@ -710,7 +714,7 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned
int *len,
 		return NULL;
 	}
 
-	if (!more_used(vq)) {
+	if (!more_used_split(vq)) {
 		pr_debug("No more buffers in queue\n");
 		END_USE(vq);
 		return NULL;
@@ -732,9 +736,9 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned
int *len,
 		return NULL;
 	}
 
-	/* detach_buf clears data, so grab it now. */
+	/* detach_buf_split clears data, so grab it now. */
 	ret = vq->desc_state[i].data;
-	detach_buf(vq, i, ctx);
+	detach_buf_split(vq, i, ctx);
 	vq->last_used_idx++;
 	/* If we expect an interrupt for the next entry, tell host
 	 * by writing event index and flush out the write before
@@ -751,6 +755,28 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned
int *len,
 	END_USE(vq);
 	return ret;
 }
+
+/**
+ * virtqueue_get_buf - get the next used buffer
+ * @vq: the struct virtqueue we're talking about.
+ * @len: the length written into the buffer
+ *
+ * If the device wrote data into the buffer, @len will be set to the
+ * amount written.  This means you don't need to clear the buffer
+ * beforehand to ensure there's no data leakage in the case of short
+ * writes.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns NULL if there are no used buffers, or the "data" token
+ * handed to virtqueue_add_*().
+ */
+void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
+			    void **ctx)
+{
+	return virtqueue_get_buf_ctx_split(_vq, len, ctx);
+}
 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
@@ -758,6 +784,18 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int
*len)
 	return virtqueue_get_buf_ctx(_vq, len, NULL);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
+
+static void virtqueue_disable_cb_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+	}
+}
+
 /**
  * virtqueue_disable_cb - disable callbacks
  * @vq: the struct virtqueue we're talking about.
@@ -769,17 +807,32 @@ EXPORT_SYMBOL_GPL(virtqueue_get_buf);
  */
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
-	struct vring_virtqueue *vq = to_vvq(_vq);
-
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
-	}
-
+	virtqueue_disable_cb_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
+static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 last_used_idx;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
+	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+	}
+	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev,
last_used_idx = vq->last_used_idx);
+	END_USE(vq);
+	return last_used_idx;
+}
+
 /**
  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
  * @vq: the struct virtqueue we're talking about.
@@ -794,27 +847,18 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
  */
 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
 {
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 last_used_idx;
-
-	START_USE(vq);
-
-	/* We optimistically turn back on interrupts, then check if there was
-	 * more to do. */
-	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
-	 * either clear the flags bit or point the event index at the next
-	 * entry. Always do both to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
-	}
-	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev,
last_used_idx = vq->last_used_idx);
-	END_USE(vq);
-	return last_used_idx;
+	return virtqueue_enable_cb_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
 
+static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
+			vq->vring.used->idx);
+}
+
 /**
  * virtqueue_poll - query pending used buffers
  * @vq: the struct virtqueue we're talking about.
@@ -829,7 +873,7 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned
last_used_idx)
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	virtio_mb(vq->weak_barriers);
-	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
vq->vring.used->idx);
+	return virtqueue_poll_split(_vq, last_used_idx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_poll);
 
@@ -851,20 +895,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
-/**
- * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
- * @vq: the struct virtqueue we're talking about.
- *
- * This re-enables callbacks but hints to the other side to delay
- * interrupts until most of the available buffers have been processed;
- * it returns "false" if there are many pending buffers in the queue,
- * to detect a possible race between the driver checking for more work,
- * and enabling callbacks.
- *
- * Caller must ensure we don't call this with other virtqueue
- * operations at the same time (except where noted).
- */
-bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
+static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	u16 bufs;
@@ -896,17 +927,27 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 	END_USE(vq);
 	return true;
 }
+
+/**
+ * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks but hints to the other side to delay
+ * interrupts until most of the available buffers have been processed;
+ * it returns "false" if there are many pending buffers in the queue,
+ * to detect a possible race between the driver checking for more work,
+ * and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
+bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
+{
+	return virtqueue_enable_cb_delayed_split(_vq);
+}
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
-/**
- * virtqueue_detach_unused_buf - detach first unused buffer
- * @vq: the struct virtqueue we're talking about.
- *
- * Returns NULL or the "data" token handed to virtqueue_add_*().
- * This is not valid on an active queue; it is useful only for device
- * shutdown.
- */
-void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
+static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i;
@@ -917,9 +958,9 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 	for (i = 0; i < vq->vring.num; i++) {
 		if (!vq->desc_state[i].data)
 			continue;
-		/* detach_buf clears data, so grab it now. */
+		/* detach_buf_split clears data, so grab it now. */
 		buf = vq->desc_state[i].data;
-		detach_buf(vq, i, NULL);
+		detach_buf_split(vq, i, NULL);
 		vq->avail_idx_shadow--;
 		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev,
vq->avail_idx_shadow);
 		END_USE(vq);
@@ -931,8 +972,26 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 	END_USE(vq);
 	return NULL;
 }
+
+/**
+ * virtqueue_detach_unused_buf - detach first unused buffer
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Returns NULL or the "data" token handed to virtqueue_add_*().
+ * This is not valid on an active queue; it is useful only for device
+ * shutdown.
+ */
+void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
+{
+	return virtqueue_detach_unused_buf_split(_vq);
+}
 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
 
+static inline bool more_used(const struct vring_virtqueue *vq)
+{
+	return more_used_split(vq);
+}
+
 irqreturn_t vring_interrupt(int irq, void *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 03/13] virtio_ring: put split ring functions together
Put the xxx_split() functions together to make the
code more readable and avoid misuse after introducing
the packed ring. There is no functional change.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 587 ++++++++++++++++++++++---------------------
 1 file changed, 302 insertions(+), 285 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 29fab2fb39cb..7cd40a2a0d21 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -113,6 +113,11 @@ struct vring_virtqueue {
 	struct vring_desc_state desc_state[];
 };
 
+
+/*
+ * Helpers.
+ */
+
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
 /*
@@ -200,6 +205,20 @@ static dma_addr_t vring_map_single(const struct
vring_virtqueue *vq,
 			      cpu_addr, size, direction);
 }
 
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+			       dma_addr_t addr)
+{
+	if (!vring_use_dma_api(vq->vq.vdev))
+		return 0;
+
+	return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
+
+/*
+ * Split ring specific functions - *_split().
+ */
+
 static void vring_unmap_one_split(const struct vring_virtqueue *vq,
 				  struct vring_desc *desc)
 {
@@ -225,15 +244,6 @@ static void vring_unmap_one_split(const struct
vring_virtqueue *vq,
 	}
 }
 
-static int vring_mapping_error(const struct vring_virtqueue *vq,
-			       dma_addr_t addr)
-{
-	if (!vring_use_dma_api(vq->vq.vdev))
-		return 0;
-
-	return dma_mapping_error(vring_dma_dev(vq), addr);
-}
-
 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
 					       unsigned int total_sg,
 					       gfp_t gfp)
@@ -435,121 +445,6 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 	return -EIO;
 }
 
-static inline int virtqueue_add(struct virtqueue *_vq,
-				struct scatterlist *sgs[],
-				unsigned int total_sg,
-				unsigned int out_sgs,
-				unsigned int in_sgs,
-				void *data,
-				void *ctx,
-				gfp_t gfp)
-{
-	return virtqueue_add_split(_vq, sgs, total_sg,
-				   out_sgs, in_sgs, data, ctx, gfp);
-}
-
-/**
- * virtqueue_add_sgs - expose buffers to other end
- * @vq: the struct virtqueue we're talking about.
- * @sgs: array of terminated scatterlists.
- * @out_num: the number of scatterlists readable by other side
- * @in_num: the number of scatterlists which are writable (after readable ones)
- * @data: the token identifying the buffer.
- * @gfp: how to do memory allocations (if necessary).
- *
- * Caller must ensure we don't call this with other virtqueue operations
- * at the same time (except where noted).
- *
- * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
- */
-int virtqueue_add_sgs(struct virtqueue *_vq,
-		      struct scatterlist *sgs[],
-		      unsigned int out_sgs,
-		      unsigned int in_sgs,
-		      void *data,
-		      gfp_t gfp)
-{
-	unsigned int i, total_sg = 0;
-
-	/* Count them first. */
-	for (i = 0; i < out_sgs + in_sgs; i++) {
-		struct scatterlist *sg;
-		for (sg = sgs[i]; sg; sg = sg_next(sg))
-			total_sg++;
-	}
-	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
-			     data, NULL, gfp);
-}
-EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
-
-/**
- * virtqueue_add_outbuf - expose output buffers to other end
- * @vq: the struct virtqueue we're talking about.
- * @sg: scatterlist (must be well-formed and terminated!)
- * @num: the number of entries in @sg readable by other side
- * @data: the token identifying the buffer.
- * @gfp: how to do memory allocations (if necessary).
- *
- * Caller must ensure we don't call this with other virtqueue operations
- * at the same time (except where noted).
- *
- * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
- */
-int virtqueue_add_outbuf(struct virtqueue *vq,
-			 struct scatterlist *sg, unsigned int num,
-			 void *data,
-			 gfp_t gfp)
-{
-	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
-}
-EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
-
-/**
- * virtqueue_add_inbuf - expose input buffers to other end
- * @vq: the struct virtqueue we're talking about.
- * @sg: scatterlist (must be well-formed and terminated!)
- * @num: the number of entries in @sg writable by other side
- * @data: the token identifying the buffer.
- * @gfp: how to do memory allocations (if necessary).
- *
- * Caller must ensure we don't call this with other virtqueue operations
- * at the same time (except where noted).
- *
- * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
- */
-int virtqueue_add_inbuf(struct virtqueue *vq,
-			struct scatterlist *sg, unsigned int num,
-			void *data,
-			gfp_t gfp)
-{
-	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
-}
-EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
-
-/**
- * virtqueue_add_inbuf_ctx - expose input buffers to other end
- * @vq: the struct virtqueue we're talking about.
- * @sg: scatterlist (must be well-formed and terminated!)
- * @num: the number of entries in @sg writable by other side
- * @data: the token identifying the buffer.
- * @ctx: extra context for the token
- * @gfp: how to do memory allocations (if necessary).
- *
- * Caller must ensure we don't call this with other virtqueue operations
- * at the same time (except where noted).
- *
- * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
- */
-int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
-			struct scatterlist *sg, unsigned int num,
-			void *data,
-			void *ctx,
-			gfp_t gfp)
-{
-	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
-}
-EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
-
 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -583,67 +478,6 @@ static bool virtqueue_kick_prepare_split(struct virtqueue
*_vq)
 	return needs_kick;
 }
 
-/**
- * virtqueue_kick_prepare - first half of split virtqueue_kick call.
- * @vq: the struct virtqueue
- *
- * Instead of virtqueue_kick(), you can do:
- *	if (virtqueue_kick_prepare(vq))
- *		virtqueue_notify(vq);
- *
- * This is sometimes useful because the virtqueue_kick_prepare() needs
- * to be serialized, but the actual virtqueue_notify() call does not.
- */
-bool virtqueue_kick_prepare(struct virtqueue *_vq)
-{
-	return virtqueue_kick_prepare_split(_vq);
-}
-EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
-
-/**
- * virtqueue_notify - second half of split virtqueue_kick call.
- * @vq: the struct virtqueue
- *
- * This does not need to be serialized.
- *
- * Returns false if host notify failed or queue is broken, otherwise true.
- */
-bool virtqueue_notify(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-
-	if (unlikely(vq->broken))
-		return false;
-
-	/* Prod other side to tell it about changes. */
-	if (!vq->notify(_vq)) {
-		vq->broken = true;
-		return false;
-	}
-	return true;
-}
-EXPORT_SYMBOL_GPL(virtqueue_notify);
-
-/**
- * virtqueue_kick - update after add_buf
- * @vq: the struct virtqueue
- *
- * After one or more virtqueue_add_* calls, invoke this to kick
- * the other side.
- *
- * Caller must ensure we don't call this with other virtqueue
- * operations at the same time (except where noted).
- *
- * Returns false if kick failed, otherwise true.
- */
-bool virtqueue_kick(struct virtqueue *vq)
-{
-	if (virtqueue_kick_prepare(vq))
-		return virtqueue_notify(vq);
-	return true;
-}
-EXPORT_SYMBOL_GPL(virtqueue_kick);
-
 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 			     void **ctx)
 {
@@ -756,6 +590,288 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
*_vq,
 	return ret;
 }
 
+static void virtqueue_disable_cb_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+	}
+}
+
+static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 last_used_idx;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
+	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+	}
+	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev,
last_used_idx = vq->last_used_idx);
+	END_USE(vq);
+	return last_used_idx;
+}
+
+static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
+			vq->vring.used->idx);
+}
+
+static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 bufs;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always update the event index to keep code simple. */
+	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		if (!vq->event)
+			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+	}
+	/* TODO: tune this threshold */
+	bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
+
+	virtio_store_mb(vq->weak_barriers,
+			&vring_used_event(&vq->vring),
+			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
+
+	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) -
vq->last_used_idx) > bufs)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+
+static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->vring.num; i++) {
+		if (!vq->desc_state[i].data)
+			continue;
+		/* detach_buf_split clears data, so grab it now. */
+		buf = vq->desc_state[i].data;
+		detach_buf_split(vq, i, NULL);
+		vq->avail_idx_shadow--;
+		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev,
vq->avail_idx_shadow);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->vq.num_free != vq->vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
+
+/*
+ * Generic functions and exported symbols.
+ */
+
+static inline int virtqueue_add(struct virtqueue *_vq,
+				struct scatterlist *sgs[],
+				unsigned int total_sg,
+				unsigned int out_sgs,
+				unsigned int in_sgs,
+				void *data,
+				void *ctx,
+				gfp_t gfp)
+{
+	return virtqueue_add_split(_vq, sgs, total_sg,
+				   out_sgs, in_sgs, data, ctx, gfp);
+}
+
+/**
+ * virtqueue_add_sgs - expose buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sgs: array of terminated scatterlists.
+ * @out_num: the number of scatterlists readable by other side
+ * @in_num: the number of scatterlists which are writable (after readable ones)
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_sgs(struct virtqueue *_vq,
+		      struct scatterlist *sgs[],
+		      unsigned int out_sgs,
+		      unsigned int in_sgs,
+		      void *data,
+		      gfp_t gfp)
+{
+	unsigned int i, total_sg = 0;
+
+	/* Count them first. */
+	for (i = 0; i < out_sgs + in_sgs; i++) {
+		struct scatterlist *sg;
+
+		for (sg = sgs[i]; sg; sg = sg_next(sg))
+			total_sg++;
+	}
+	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
+			     data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
+
+/**
+ * virtqueue_add_outbuf - expose output buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg readable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_outbuf(struct virtqueue *vq,
+			 struct scatterlist *sg, unsigned int num,
+			 void *data,
+			 gfp_t gfp)
+{
+	return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
+
+/**
+ * virtqueue_add_inbuf - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf(struct virtqueue *vq,
+			struct scatterlist *sg, unsigned int num,
+			void *data,
+			gfp_t gfp)
+{
+	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
+
+/**
+ * virtqueue_add_inbuf_ctx - expose input buffers to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
+ * @data: the token identifying the buffer.
+ * @ctx: extra context for the token
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
+ */
+int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
+			struct scatterlist *sg, unsigned int num,
+			void *data,
+			void *ctx,
+			gfp_t gfp)
+{
+	return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
+}
+EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
+
+/**
+ * virtqueue_kick_prepare - first half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * Instead of virtqueue_kick(), you can do:
+ *	if (virtqueue_kick_prepare(vq))
+ *		virtqueue_notify(vq);
+ *
+ * This is sometimes useful because the virtqueue_kick_prepare() needs
+ * to be serialized, but the actual virtqueue_notify() call does not.
+ */
+bool virtqueue_kick_prepare(struct virtqueue *_vq)
+{
+	return virtqueue_kick_prepare_split(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
+
+/**
+ * virtqueue_notify - second half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * This does not need to be serialized.
+ *
+ * Returns false if host notify failed or queue is broken, otherwise true.
+ */
+bool virtqueue_notify(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (unlikely(vq->broken))
+		return false;
+
+	/* Prod other side to tell it about changes. */
+	if (!vq->notify(_vq)) {
+		vq->broken = true;
+		return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(virtqueue_notify);
+
+/**
+ * virtqueue_kick - update after add_buf
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_* calls, invoke this to kick
+ * the other side.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns false if kick failed, otherwise true.
+ */
+bool virtqueue_kick(struct virtqueue *vq)
+{
+	if (virtqueue_kick_prepare(vq))
+		return virtqueue_notify(vq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick);
+
 /**
  * virtqueue_get_buf - get the next used buffer
  * @vq: the struct virtqueue we're talking about.
@@ -785,17 +901,6 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int
*len)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 
-static void virtqueue_disable_cb_split(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
-	}
-}
-
 /**
  * virtqueue_disable_cb - disable callbacks
  * @vq: the struct virtqueue we're talking about.
@@ -811,28 +916,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
-static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 last_used_idx;
-
-	START_USE(vq);
-
-	/* We optimistically turn back on interrupts, then check if there was
-	 * more to do. */
-	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
-	 * either clear the flags bit or point the event index at the next
-	 * entry. Always do both to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
-	}
-	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev,
last_used_idx = vq->last_used_idx);
-	END_USE(vq);
-	return last_used_idx;
-}
-
 /**
  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
  * @vq: the struct virtqueue we're talking about.
@@ -851,14 +934,6 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
 
-static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-
-	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
-			vq->vring.used->idx);
-}
-
 /**
  * virtqueue_poll - query pending used buffers
  * @vq: the struct virtqueue we're talking about.
@@ -891,43 +966,11 @@ EXPORT_SYMBOL_GPL(virtqueue_poll);
 bool virtqueue_enable_cb(struct virtqueue *_vq)
 {
 	unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
+
 	return !virtqueue_poll(_vq, last_used_idx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
-static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 bufs;
-
-	START_USE(vq);
-
-	/* We optimistically turn back on interrupts, then check if there was
-	 * more to do. */
-	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
-	 * either clear the flags bit or point the event index at the next
-	 * entry. Always update the event index to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
-	}
-	/* TODO: tune this threshold */
-	bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
-
-	virtio_store_mb(vq->weak_barriers,
-			&vring_used_event(&vq->vring),
-			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
-
-	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) -
vq->last_used_idx) > bufs)) {
-		END_USE(vq);
-		return false;
-	}
-
-	END_USE(vq);
-	return true;
-}
-
 /**
  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
  * @vq: the struct virtqueue we're talking about.
@@ -947,32 +990,6 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
-static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	unsigned int i;
-	void *buf;
-
-	START_USE(vq);
-
-	for (i = 0; i < vq->vring.num; i++) {
-		if (!vq->desc_state[i].data)
-			continue;
-		/* detach_buf_split clears data, so grab it now. */
-		buf = vq->desc_state[i].data;
-		detach_buf_split(vq, i, NULL);
-		vq->avail_idx_shadow--;
-		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev,
vq->avail_idx_shadow);
-		END_USE(vq);
-		return buf;
-	}
-	/* That should have freed everything. */
-	BUG_ON(vq->vq.num_free != vq->vring.num);
-
-	END_USE(vq);
-	return NULL;
-}
-
 /**
  * virtqueue_detach_unused_buf - detach first unused buffer
  * @vq: the struct virtqueue we're talking about.
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 04/13] virtio_ring: put split ring fields in a sub struct
Put the split ring specific fields in a sub-struct named
as "split" to avoid misuse after introducing packed ring.
There is no functional change.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 156 +++++++++++++++++++++++++------------------
 1 file changed, 91 insertions(+), 65 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 7cd40a2a0d21..0b97e5c79654 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -63,9 +63,6 @@ struct vring_desc_state {
 struct vring_virtqueue {
 	struct virtqueue vq;
 
-	/* Actual memory layout for this queue */
-	struct vring vring;
-
 	/* Can we use weak barriers? */
 	bool weak_barriers;
 
@@ -86,11 +83,16 @@ struct vring_virtqueue {
 	/* Last used index we've seen. */
 	u16 last_used_idx;
 
-	/* Last written value to avail->flags */
-	u16 avail_flags_shadow;
+	struct {
+		/* Actual memory layout for this queue */
+		struct vring vring;
 
-	/* Last written value to avail->idx in guest byte order */
-	u16 avail_idx_shadow;
+		/* Last written value to avail->flags */
+		u16 avail_flags_shadow;
+
+		/* Last written value to avail->idx in guest byte order */
+		u16 avail_idx_shadow;
+	} split;
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
@@ -316,7 +318,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		desc = alloc_indirect_split(_vq, total_sg, gfp);
 	else {
 		desc = NULL;
-		WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
+		WARN_ON_ONCE(total_sg > vq->split.vring.num &&
!vq->indirect);
 	}
 
 	if (desc) {
@@ -327,7 +329,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		descs_used = 1;
 	} else {
 		indirect = false;
-		desc = vq->vring.desc;
+		desc = vq->split.vring.desc;
 		i = head;
 		descs_used = total_sg;
 	}
@@ -383,10 +385,13 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 		if (vring_mapping_error(vq, addr))
 			goto unmap_release;
 
-		vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
VRING_DESC_F_INDIRECT);
-		vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
+		vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev,
+				VRING_DESC_F_INDIRECT);
+		vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev,
+				addr);
 
-		vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg *
sizeof(struct vring_desc));
+		vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev,
+				total_sg * sizeof(struct vring_desc));
 	}
 
 	/* We're using some buffers from the free list. */
@@ -394,7 +399,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 
 	/* Update free pointer */
 	if (indirect)
-		vq->free_head = virtio16_to_cpu(_vq->vdev,
vq->vring.desc[head].next);
+		vq->free_head = virtio16_to_cpu(_vq->vdev,
+					vq->split.vring.desc[head].next);
 	else
 		vq->free_head = i;
 
@@ -407,14 +413,15 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
-	avail = vq->avail_idx_shadow & (vq->vring.num - 1);
-	vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
+	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
+	vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev,
head);
 
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
 	virtio_wmb(vq->weak_barriers);
-	vq->avail_idx_shadow++;
-	vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev,
vq->avail_idx_shadow);
+	vq->split.avail_idx_shadow++;
+	vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_idx_shadow);
 	vq->num_added++;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -435,7 +442,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 		if (i == err_idx)
 			break;
 		vring_unmap_one_split(vq, &desc[i]);
-		i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next);
+		i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next);
 	}
 
 	if (indirect)
@@ -456,8 +463,8 @@ static bool virtqueue_kick_prepare_split(struct virtqueue
*_vq)
 	 * event. */
 	virtio_mb(vq->weak_barriers);
 
-	old = vq->avail_idx_shadow - vq->num_added;
-	new = vq->avail_idx_shadow;
+	old = vq->split.avail_idx_shadow - vq->num_added;
+	new = vq->split.avail_idx_shadow;
 	vq->num_added = 0;
 
 #ifdef DEBUG
@@ -469,10 +476,13 @@ static bool virtqueue_kick_prepare_split(struct virtqueue
*_vq)
 #endif
 
 	if (vq->event) {
-		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
vring_avail_event(&vq->vring)),
+		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
+					vring_avail_event(&vq->split.vring)),
 					      new, old);
 	} else {
-		needs_kick = !(vq->vring.used->flags &
cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
+		needs_kick = !(vq->split.vring.used->flags &
+					cpu_to_virtio16(_vq->vdev,
+						VRING_USED_F_NO_NOTIFY));
 	}
 	END_USE(vq);
 	return needs_kick;
@@ -490,14 +500,15 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 	/* Put back on free list: unmap first-level descriptors and find end */
 	i = head;
 
-	while (vq->vring.desc[i].flags & nextflag) {
-		vring_unmap_one_split(vq, &vq->vring.desc[i]);
-		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
+	while (vq->split.vring.desc[i].flags & nextflag) {
+		vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
+		i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next);
 		vq->vq.num_free++;
 	}
 
-	vring_unmap_one_split(vq, &vq->vring.desc[i]);
-	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
+	vring_unmap_one_split(vq, &vq->split.vring.desc[i]);
+	vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev,
+						vq->free_head);
 	vq->free_head = head;
 
 	/* Plus final descriptor */
@@ -511,9 +522,10 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 		if (!indir_desc)
 			return;
 
-		len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len);
+		len = virtio32_to_cpu(vq->vq.vdev,
+				vq->split.vring.desc[head].len);
 
-		BUG_ON(!(vq->vring.desc[head].flags &
+		BUG_ON(!(vq->split.vring.desc[head].flags &
 			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
 		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
 
@@ -529,7 +541,8 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 
 static inline bool more_used_split(const struct vring_virtqueue *vq)
 {
-	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
vq->vring.used->idx);
+	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
+			vq->split.vring.used->idx);
 }
 
 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
@@ -557,11 +570,13 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
*_vq,
 	/* Only get used array entries after they have been exposed by host. */
 	virtio_rmb(vq->weak_barriers);
 
-	last_used = (vq->last_used_idx & (vq->vring.num - 1));
-	i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
-	*len = virtio32_to_cpu(_vq->vdev,
vq->vring.used->ring[last_used].len);
+	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
+	i = virtio32_to_cpu(_vq->vdev,
+			vq->split.vring.used->ring[last_used].id);
+	*len = virtio32_to_cpu(_vq->vdev,
+			vq->split.vring.used->ring[last_used].len);
 
-	if (unlikely(i >= vq->vring.num)) {
+	if (unlikely(i >= vq->split.vring.num)) {
 		BAD_RING(vq, "id %u out of range\n", i);
 		return NULL;
 	}
@@ -577,9 +592,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
*_vq,
 	/* If we expect an interrupt for the next entry, tell host
 	 * by writing event index and flush out the write before
 	 * the read in the next get_buf call. */
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
+	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
 		virtio_store_mb(vq->weak_barriers,
-				&vring_used_event(&vq->vring),
+				&vring_used_event(&vq->split.vring),
 				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 
 #ifdef DEBUG
@@ -594,10 +609,12 @@ static void virtqueue_disable_cb_split(struct virtqueue
*_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+			vq->split.vring.avail->flags +				cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_flags_shadow);
 	}
 }
 
@@ -613,12 +630,15 @@ static unsigned virtqueue_enable_cb_prepare_split(struct
virtqueue *_vq)
 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 	 * either clear the flags bit or point the event index at the next
 	 * entry. Always do both to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+			vq->split.vring.avail->flags +				cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_flags_shadow);
 	}
-	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev,
last_used_idx = vq->last_used_idx);
+	vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
+			last_used_idx = vq->last_used_idx);
 	END_USE(vq);
 	return last_used_idx;
 }
@@ -628,7 +648,7 @@ static bool virtqueue_poll_split(struct virtqueue *_vq,
unsigned last_used_idx)
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
-			vq->vring.used->idx);
+			vq->split.vring.used->idx);
 }
 
 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
@@ -643,19 +663,22 @@ static bool virtqueue_enable_cb_delayed_split(struct
virtqueue *_vq)
 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 	 * either clear the flags bit or point the event index at the next
 	 * entry. Always update the event index to keep code simple. */
-	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
-		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
 		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev,
vq->avail_flags_shadow);
+			vq->split.vring.avail->flags +				cpu_to_virtio16(_vq->vdev,
+						vq->split.avail_flags_shadow);
 	}
 	/* TODO: tune this threshold */
-	bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
+	bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 
 	virtio_store_mb(vq->weak_barriers,
-			&vring_used_event(&vq->vring),
+			&vring_used_event(&vq->split.vring),
 			cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
 
-	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) -
vq->last_used_idx) > bufs)) {
+	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev,
vq->split.vring.used->idx)
+					- vq->last_used_idx) > bufs)) {
 		END_USE(vq);
 		return false;
 	}
@@ -672,19 +695,20 @@ static void *virtqueue_detach_unused_buf_split(struct
virtqueue *_vq)
 
 	START_USE(vq);
 
-	for (i = 0; i < vq->vring.num; i++) {
+	for (i = 0; i < vq->split.vring.num; i++) {
 		if (!vq->desc_state[i].data)
 			continue;
 		/* detach_buf_split clears data, so grab it now. */
 		buf = vq->desc_state[i].data;
 		detach_buf_split(vq, i, NULL);
-		vq->avail_idx_shadow--;
-		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev,
vq->avail_idx_shadow);
+		vq->split.avail_idx_shadow--;
+		vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
+				vq->split.avail_idx_shadow);
 		END_USE(vq);
 		return buf;
 	}
 	/* That should have freed everything. */
-	BUG_ON(vq->vq.num_free != vq->vring.num);
+	BUG_ON(vq->vq.num_free != vq->split.vring.num);
 
 	END_USE(vq);
 	return NULL;
@@ -1046,7 +1070,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 	if (!vq)
 		return NULL;
 
-	vq->vring = vring;
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
@@ -1059,8 +1082,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
-	vq->avail_flags_shadow = 0;
-	vq->avail_idx_shadow = 0;
 	vq->num_added = 0;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
@@ -1072,17 +1093,22 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 		!context;
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
+	vq->split.vring = vring;
+	vq->split.avail_flags_shadow = 0;
+	vq->split.avail_idx_shadow = 0;
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
-		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
 		if (!vq->event)
-			vq->vring.avail->flags = cpu_to_virtio16(vdev,
vq->avail_flags_shadow);
+			vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
+					vq->split.avail_flags_shadow);
 	}
 
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	for (i = 0; i < vring.num-1; i++)
-		vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
+		vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
 	memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
 
 	return &vq->vq;
@@ -1218,7 +1244,7 @@ void vring_del_virtqueue(struct virtqueue *_vq)
 
 	if (vq->we_own_ring) {
 		vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
-				 vq->vring.desc, vq->queue_dma_addr);
+				 vq->split.vring.desc, vq->queue_dma_addr);
 	}
 	list_del(&_vq->list);
 	kfree(vq);
@@ -1260,7 +1286,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue
*_vq)
 
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->vring.num;
+	return vq->split.vring.num;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 
@@ -1304,7 +1330,7 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
 	BUG_ON(!vq->we_own_ring);
 
 	return vq->queue_dma_addr +
-		((char *)vq->vring.avail - (char *)vq->vring.desc);
+		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
 
@@ -1315,13 +1341,13 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue
*_vq)
 	BUG_ON(!vq->we_own_ring);
 
 	return vq->queue_dma_addr +
-		((char *)vq->vring.used - (char *)vq->vring.desc);
+		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
 
 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 {
-	return &to_vvq(vq)->vring;
+	return &to_vvq(vq)->split.vring;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
 
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 05/13] virtio_ring: introduce debug helpers
Introduce debug helpers for last_add_time update, check and
invalid. They will be used by packed ring too.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 49 ++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 22 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0b97e5c79654..10d407910aa2 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -44,6 +44,26 @@
 	} while (0)
 #define END_USE(_vq) \
 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
+#define LAST_ADD_TIME_UPDATE(_vq)				\
+	do {							\
+		ktime_t now = ktime_get();			\
+								\
+		/* No kick or get, with .1 second between?  Warn. */ \
+		if ((_vq)->last_add_time_valid)			\
+			WARN_ON(ktime_to_ms(ktime_sub(now,	\
+				(_vq)->last_add_time)) > 100);	\
+		(_vq)->last_add_time = now;			\
+		(_vq)->last_add_time_valid = true;		\
+	} while (0)
+#define LAST_ADD_TIME_CHECK(_vq)				\
+	do {							\
+		if ((_vq)->last_add_time_valid) {		\
+			WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
+				      (_vq)->last_add_time)) > 100); \
+		}						\
+	} while (0)
+#define LAST_ADD_TIME_INVALID(_vq)				\
+	((_vq)->last_add_time_valid = false)
 #else
 #define BAD_RING(_vq, fmt, args...)				\
 	do {							\
@@ -53,6 +73,9 @@
 	} while (0)
 #define START_USE(vq)
 #define END_USE(vq)
+#define LAST_ADD_TIME_UPDATE(vq)
+#define LAST_ADD_TIME_CHECK(vq)
+#define LAST_ADD_TIME_INVALID(vq)
 #endif
 
 struct vring_desc_state {
@@ -295,18 +318,7 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 		return -EIO;
 	}
 
-#ifdef DEBUG
-	{
-		ktime_t now = ktime_get();
-
-		/* No kick or get, with .1 second between?  Warn. */
-		if (vq->last_add_time_valid)
-			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
-					    > 100);
-		vq->last_add_time = now;
-		vq->last_add_time_valid = true;
-	}
-#endif
+	LAST_ADD_TIME_UPDATE(vq);
 
 	BUG_ON(total_sg == 0);
 
@@ -467,13 +479,8 @@ static bool virtqueue_kick_prepare_split(struct virtqueue
*_vq)
 	new = vq->split.avail_idx_shadow;
 	vq->num_added = 0;
 
-#ifdef DEBUG
-	if (vq->last_add_time_valid) {
-		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
-					      vq->last_add_time)) > 100);
-	}
-	vq->last_add_time_valid = false;
-#endif
+	LAST_ADD_TIME_CHECK(vq);
+	LAST_ADD_TIME_INVALID(vq);
 
 	if (vq->event) {
 		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
@@ -597,9 +604,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
*_vq,
 				&vring_used_event(&vq->split.vring),
 				cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
 
-#ifdef DEBUG
-	vq->last_add_time_valid = false;
-#endif
+	LAST_ADD_TIME_INVALID(vq);
 
 	END_USE(vq);
 	return ret;
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 06/13] virtio_ring: introduce helper for indirect feature
Introduce a helper to check whether we will use indirect
feature. It will be used by packed ring too.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 10d407910aa2..d1076f28c7e9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -145,6 +145,18 @@ struct vring_virtqueue {
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
+					  unsigned int total_sg)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/*
+	 * If the host supports indirect descriptor tables, and we have multiple
+	 * buffers, then go indirect. FIXME: tune this threshold
+	 */
+	return (vq->indirect && total_sg > 1 &&
vq->vq.num_free);
+}
+
 /*
  * Modern virtio devices have feature bits to specify whether they need a
  * quirk and bypass the IOMMU. If not there, just use the DMA API.
@@ -324,9 +336,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 
 	head = vq->free_head;
 
-	/* If the host supports indirect descriptor tables, and we have multiple
-	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && total_sg > 1 && vq->vq.num_free)
+	if (virtqueue_use_indirect(_vq, total_sg))
 		desc = alloc_indirect_split(_vq, total_sg, gfp);
 	else {
 		desc = NULL;
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 07/13] virtio_ring: allocate desc state for split ring separately
Put the split ring's desc state into the .split sub-structure,
and allocate desc state for split ring separately, this makes
the code more readable and more consistent with what we will
do for packed ring.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 45 ++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 18 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index d1076f28c7e9..acd851f3105c 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -78,7 +78,7 @@
 #define LAST_ADD_TIME_INVALID(vq)
 #endif
 
-struct vring_desc_state {
+struct vring_desc_state_split {
 	void *data;			/* Data for callback. */
 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
 };
@@ -115,6 +115,9 @@ struct vring_virtqueue {
 
 		/* Last written value to avail->idx in guest byte order */
 		u16 avail_idx_shadow;
+
+		/* Per-descriptor state. */
+		struct vring_desc_state_split *desc_state;
 	} split;
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
@@ -133,9 +136,6 @@ struct vring_virtqueue {
 	bool last_add_time_valid;
 	ktime_t last_add_time;
 #endif
-
-	/* Per-descriptor state. */
-	struct vring_desc_state desc_state[];
 };
 
 
@@ -427,11 +427,11 @@ static inline int virtqueue_add_split(struct virtqueue
*_vq,
 		vq->free_head = i;
 
 	/* Store token and indirect buffer state. */
-	vq->desc_state[head].data = data;
+	vq->split.desc_state[head].data = data;
 	if (indirect)
-		vq->desc_state[head].indir_desc = desc;
+		vq->split.desc_state[head].indir_desc = desc;
 	else
-		vq->desc_state[head].indir_desc = ctx;
+		vq->split.desc_state[head].indir_desc = ctx;
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
@@ -512,7 +512,7 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 	__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 
 	/* Clear data ptr. */
-	vq->desc_state[head].data = NULL;
+	vq->split.desc_state[head].data = NULL;
 
 	/* Put back on free list: unmap first-level descriptors and find end */
 	i = head;
@@ -532,7 +532,8 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 	vq->vq.num_free++;
 
 	if (vq->indirect) {
-		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
+		struct vring_desc *indir_desc +				vq->split.desc_state[head].indir_desc;
 		u32 len;
 
 		/* Free the indirect table, if any, now that it's unmapped. */
@@ -550,9 +551,9 @@ static void detach_buf_split(struct vring_virtqueue *vq,
unsigned int head,
 			vring_unmap_one_split(vq, &indir_desc[j]);
 
 		kfree(indir_desc);
-		vq->desc_state[head].indir_desc = NULL;
+		vq->split.desc_state[head].indir_desc = NULL;
 	} else if (ctx) {
-		*ctx = vq->desc_state[head].indir_desc;
+		*ctx = vq->split.desc_state[head].indir_desc;
 	}
 }
 
@@ -597,13 +598,13 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue
*_vq,
 		BAD_RING(vq, "id %u out of range\n", i);
 		return NULL;
 	}
-	if (unlikely(!vq->desc_state[i].data)) {
+	if (unlikely(!vq->split.desc_state[i].data)) {
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
 
 	/* detach_buf_split clears data, so grab it now. */
-	ret = vq->desc_state[i].data;
+	ret = vq->split.desc_state[i].data;
 	detach_buf_split(vq, i, ctx);
 	vq->last_used_idx++;
 	/* If we expect an interrupt for the next entry, tell host
@@ -711,10 +712,10 @@ static void *virtqueue_detach_unused_buf_split(struct
virtqueue *_vq)
 	START_USE(vq);
 
 	for (i = 0; i < vq->split.vring.num; i++) {
-		if (!vq->desc_state[i].data)
+		if (!vq->split.desc_state[i].data)
 			continue;
 		/* detach_buf_split clears data, so grab it now. */
-		buf = vq->desc_state[i].data;
+		buf = vq->split.desc_state[i].data;
 		detach_buf_split(vq, i, NULL);
 		vq->split.avail_idx_shadow--;
 		vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
@@ -1080,8 +1081,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 	unsigned int i;
 	struct vring_virtqueue *vq;
 
-	vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state),
-		     GFP_KERNEL);
+	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
@@ -1120,11 +1120,19 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 					vq->split.avail_flags_shadow);
 	}
 
+	vq->split.desc_state = kmalloc_array(vring.num,
+			sizeof(struct vring_desc_state_split), GFP_KERNEL);
+	if (!vq->split.desc_state) {
+		kfree(vq);
+		return NULL;
+	}
+
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	for (i = 0; i < vring.num-1; i++)
 		vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
-	memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
+	memset(vq->split.desc_state, 0, vring.num *
+			sizeof(struct vring_desc_state_split));
 
 	return &vq->vq;
 }
@@ -1260,6 +1268,7 @@ void vring_del_virtqueue(struct virtqueue *_vq)
 	if (vq->we_own_ring) {
 		vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
 				 vq->split.vring.desc, vq->queue_dma_addr);
+		kfree(vq->split.desc_state);
 	}
 	list_del(&_vq->list);
 	kfree(vq);
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 08/13] virtio_ring: extract split ring handling from ring creation
Introduce a specific function to create the split ring.
And also move the DMA allocation and size information to
the .split sub-structure.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 220 ++++++++++++++++++++++++-------------------
 1 file changed, 121 insertions(+), 99 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index acd851f3105c..d00a87909a7e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -118,6 +118,10 @@ struct vring_virtqueue {
 
 		/* Per-descriptor state. */
 		struct vring_desc_state_split *desc_state;
+
+		/* DMA, allocation, and size information */
+		size_t queue_size_in_bytes;
+		dma_addr_t queue_dma_addr;
 	} split;
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
@@ -125,8 +129,6 @@ struct vring_virtqueue {
 
 	/* DMA, allocation, and size information */
 	bool we_own_ring;
-	size_t queue_size_in_bytes;
-	dma_addr_t queue_dma_addr;
 
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
@@ -203,6 +205,48 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
 	return false;
 }
 
+static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
+			      dma_addr_t *dma_handle, gfp_t flag)
+{
+	if (vring_use_dma_api(vdev)) {
+		return dma_alloc_coherent(vdev->dev.parent, size,
+					  dma_handle, flag);
+	} else {
+		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
+
+		if (queue) {
+			phys_addr_t phys_addr = virt_to_phys(queue);
+			*dma_handle = (dma_addr_t)phys_addr;
+
+			/*
+			 * Sanity check: make sure we dind't truncate
+			 * the address.  The only arches I can find that
+			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
+			 * are certain non-highmem MIPS and x86
+			 * configurations, but these configurations
+			 * should never allocate physical pages above 32
+			 * bits, so this is fine.  Just in case, throw a
+			 * warning and abort if we end up with an
+			 * unrepresentable address.
+			 */
+			if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
+				free_pages_exact(queue, PAGE_ALIGN(size));
+				return NULL;
+			}
+		}
+		return queue;
+	}
+}
+
+static void vring_free_queue(struct virtio_device *vdev, size_t size,
+			     void *queue, dma_addr_t dma_handle)
+{
+	if (vring_use_dma_api(vdev))
+		dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
+	else
+		free_pages_exact(queue, PAGE_ALIGN(size));
+}
+
 /*
  * The DMA ops on various arches are rather gnarly right now, and
  * making all of the arch DMA ops work on the vring device itself
@@ -730,6 +774,68 @@ static void *virtqueue_detach_unused_buf_split(struct
virtqueue *_vq)
 	return NULL;
 }
 
+static struct virtqueue *vring_create_virtqueue_split(
+	unsigned int index,
+	unsigned int num,
+	unsigned int vring_align,
+	struct virtio_device *vdev,
+	bool weak_barriers,
+	bool may_reduce_num,
+	bool context,
+	bool (*notify)(struct virtqueue *),
+	void (*callback)(struct virtqueue *),
+	const char *name)
+{
+	struct virtqueue *vq;
+	void *queue = NULL;
+	dma_addr_t dma_addr;
+	size_t queue_size_in_bytes;
+	struct vring vring;
+
+	/* We assume num is a power of 2. */
+	if (num & (num - 1)) {
+		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
+		return NULL;
+	}
+
+	/* TODO: allocate each queue chunk individually */
+	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+					  &dma_addr,
+					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+		if (queue)
+			break;
+	}
+
+	if (!num)
+		return NULL;
+
+	if (!queue) {
+		/* Try to get a single page. You are my only hope! */
+		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
+					  &dma_addr, GFP_KERNEL|__GFP_ZERO);
+	}
+	if (!queue)
+		return NULL;
+
+	queue_size_in_bytes = vring_size(num, vring_align);
+	vring_init(&vring, num, queue, vring_align);
+
+	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
+				   notify, callback, name);
+	if (!vq) {
+		vring_free_queue(vdev, queue_size_in_bytes, queue,
+				 dma_addr);
+		return NULL;
+	}
+
+	to_vvq(vq)->split.queue_dma_addr = dma_addr;
+	to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
+	to_vvq(vq)->we_own_ring = true;
+
+	return vq;
+}
+
 
 /*
  * Generic functions and exported symbols.
@@ -1091,8 +1197,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 	vq->vq.num_free = vring.num;
 	vq->vq.index = index;
 	vq->we_own_ring = false;
-	vq->queue_dma_addr = 0;
-	vq->queue_size_in_bytes = 0;
 	vq->notify = notify;
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
@@ -1108,6 +1212,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 		!context;
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
+	vq->split.queue_dma_addr = 0;
+	vq->split.queue_size_in_bytes = 0;
+
 	vq->split.vring = vring;
 	vq->split.avail_flags_shadow = 0;
 	vq->split.avail_idx_shadow = 0;
@@ -1138,48 +1245,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 }
 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
 
-static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
-			      dma_addr_t *dma_handle, gfp_t flag)
-{
-	if (vring_use_dma_api(vdev)) {
-		return dma_alloc_coherent(vdev->dev.parent, size,
-					  dma_handle, flag);
-	} else {
-		void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
-		if (queue) {
-			phys_addr_t phys_addr = virt_to_phys(queue);
-			*dma_handle = (dma_addr_t)phys_addr;
-
-			/*
-			 * Sanity check: make sure we dind't truncate
-			 * the address.  The only arches I can find that
-			 * have 64-bit phys_addr_t but 32-bit dma_addr_t
-			 * are certain non-highmem MIPS and x86
-			 * configurations, but these configurations
-			 * should never allocate physical pages above 32
-			 * bits, so this is fine.  Just in case, throw a
-			 * warning and abort if we end up with an
-			 * unrepresentable address.
-			 */
-			if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
-				free_pages_exact(queue, PAGE_ALIGN(size));
-				return NULL;
-			}
-		}
-		return queue;
-	}
-}
-
-static void vring_free_queue(struct virtio_device *vdev, size_t size,
-			     void *queue, dma_addr_t dma_handle)
-{
-	if (vring_use_dma_api(vdev)) {
-		dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
-	} else {
-		free_pages_exact(queue, PAGE_ALIGN(size));
-	}
-}
-
 struct virtqueue *vring_create_virtqueue(
 	unsigned int index,
 	unsigned int num,
@@ -1192,54 +1257,9 @@ struct virtqueue *vring_create_virtqueue(
 	void (*callback)(struct virtqueue *),
 	const char *name)
 {
-	struct virtqueue *vq;
-	void *queue = NULL;
-	dma_addr_t dma_addr;
-	size_t queue_size_in_bytes;
-	struct vring vring;
-
-	/* We assume num is a power of 2. */
-	if (num & (num - 1)) {
-		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
-		return NULL;
-	}
-
-	/* TODO: allocate each queue chunk individually */
-	for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
-		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
-					  &dma_addr,
-					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
-		if (queue)
-			break;
-	}
-
-	if (!num)
-		return NULL;
-
-	if (!queue) {
-		/* Try to get a single page. You are my only hope! */
-		queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
-					  &dma_addr, GFP_KERNEL|__GFP_ZERO);
-	}
-	if (!queue)
-		return NULL;
-
-	queue_size_in_bytes = vring_size(num, vring_align);
-	vring_init(&vring, num, queue, vring_align);
-
-	vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
-				   notify, callback, name);
-	if (!vq) {
-		vring_free_queue(vdev, queue_size_in_bytes, queue,
-				 dma_addr);
-		return NULL;
-	}
-
-	to_vvq(vq)->queue_dma_addr = dma_addr;
-	to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes;
-	to_vvq(vq)->we_own_ring = true;
-
-	return vq;
+	return vring_create_virtqueue_split(index, num, vring_align,
+			vdev, weak_barriers, may_reduce_num,
+			context, notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
 
@@ -1266,8 +1286,10 @@ void vring_del_virtqueue(struct virtqueue *_vq)
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	if (vq->we_own_ring) {
-		vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes,
-				 vq->split.vring.desc, vq->queue_dma_addr);
+		vring_free_queue(vq->vq.vdev,
+				 vq->split.queue_size_in_bytes,
+				 vq->split.vring.desc,
+				 vq->split.queue_dma_addr);
 		kfree(vq->split.desc_state);
 	}
 	list_del(&_vq->list);
@@ -1343,7 +1365,7 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
-	return vq->queue_dma_addr;
+	return vq->split.queue_dma_addr;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
 
@@ -1353,7 +1375,7 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
-	return vq->queue_dma_addr +
+	return vq->split.queue_dma_addr +
 		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
@@ -1364,7 +1386,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
-	return vq->queue_dma_addr +
+	return vq->split.queue_dma_addr +
 		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 09/13] virtio_ring: cache whether we will use DMA API
Cache whether we will use DMA API, instead of doing the
check every time. We are going to check whether DMA API
is used more often in packed ring.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index d00a87909a7e..aafe1969b45e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -86,6 +86,9 @@ struct vring_desc_state_split {
 struct vring_virtqueue {
 	struct virtqueue vq;
 
+	/* Is DMA API used? */
+	bool use_dma_api;
+
 	/* Can we use weak barriers? */
 	bool weak_barriers;
 
@@ -262,7 +265,7 @@ static dma_addr_t vring_map_one_sg(const struct
vring_virtqueue *vq,
 				   struct scatterlist *sg,
 				   enum dma_data_direction direction)
 {
-	if (!vring_use_dma_api(vq->vq.vdev))
+	if (!vq->use_dma_api)
 		return (dma_addr_t)sg_phys(sg);
 
 	/*
@@ -279,7 +282,7 @@ static dma_addr_t vring_map_single(const struct
vring_virtqueue *vq,
 				   void *cpu_addr, size_t size,
 				   enum dma_data_direction direction)
 {
-	if (!vring_use_dma_api(vq->vq.vdev))
+	if (!vq->use_dma_api)
 		return (dma_addr_t)virt_to_phys(cpu_addr);
 
 	return dma_map_single(vring_dma_dev(vq),
@@ -289,7 +292,7 @@ static dma_addr_t vring_map_single(const struct
vring_virtqueue *vq,
 static int vring_mapping_error(const struct vring_virtqueue *vq,
 			       dma_addr_t addr)
 {
-	if (!vring_use_dma_api(vq->vq.vdev))
+	if (!vq->use_dma_api)
 		return 0;
 
 	return dma_mapping_error(vring_dma_dev(vq), addr);
@@ -305,7 +308,7 @@ static void vring_unmap_one_split(const struct
vring_virtqueue *vq,
 {
 	u16 flags;
 
-	if (!vring_use_dma_api(vq->vq.vdev))
+	if (!vq->use_dma_api)
 		return;
 
 	flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
@@ -1202,6 +1205,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
+	vq->use_dma_api = vring_use_dma_api(vdev);
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 10/13] virtio_ring: introduce packed ring support
Introduce the packed ring support. Packed ring can only be
created by vring_create_virtqueue() and each chunk of packed
ring will be allocated individually. Packed ring can not be
created on preallocated memory by vring_new_virtqueue() or
the likes currently.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 900 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 870 insertions(+), 30 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index aafe1969b45e..b63eee2034e7 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -83,9 +83,26 @@ struct vring_desc_state_split {
 	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
 };
 
+struct vring_desc_state_packed {
+	void *data;			/* Data for callback. */
+	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
+	u16 num;			/* Descriptor list length. */
+	u16 next;			/* The next desc state in a list. */
+	u16 last;			/* The last desc state in a list. */
+};
+
+struct vring_desc_extra_packed {
+	dma_addr_t addr;		/* Buffer DMA addr. */
+	u32 len;			/* Buffer length. */
+	u16 flags;			/* Descriptor flags. */
+};
+
 struct vring_virtqueue {
 	struct virtqueue vq;
 
+	/* Is this a packed ring? */
+	bool packed_ring;
+
 	/* Is DMA API used? */
 	bool use_dma_api;
 
@@ -109,23 +126,64 @@ struct vring_virtqueue {
 	/* Last used index we've seen. */
 	u16 last_used_idx;
 
-	struct {
-		/* Actual memory layout for this queue */
-		struct vring vring;
+	union {
+		/* Available for split ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct vring vring;
 
-		/* Last written value to avail->flags */
-		u16 avail_flags_shadow;
+			/* Last written value to avail->flags */
+			u16 avail_flags_shadow;
 
-		/* Last written value to avail->idx in guest byte order */
-		u16 avail_idx_shadow;
+			/*
+			 * Last written value to avail->idx in
+			 * guest byte order.
+			 */
+			u16 avail_idx_shadow;
 
-		/* Per-descriptor state. */
-		struct vring_desc_state_split *desc_state;
+			/* Per-descriptor state. */
+			struct vring_desc_state_split *desc_state;
 
-		/* DMA, allocation, and size information */
-		size_t queue_size_in_bytes;
-		dma_addr_t queue_dma_addr;
-	} split;
+			/* DMA address and size information */
+			dma_addr_t queue_dma_addr;
+			size_t queue_size_in_bytes;
+		} split;
+
+		/* Available for packed ring */
+		struct {
+			/* Actual memory layout for this queue. */
+			struct vring_packed vring;
+
+			/* Driver ring wrap counter. */
+			bool avail_wrap_counter;
+
+			/* Device ring wrap counter. */
+			bool used_wrap_counter;
+
+			/* Avail used flags. */
+			u16 avail_used_flags;
+
+			/* Index of the next avail descriptor. */
+			u16 next_avail_idx;
+
+			/*
+			 * Last written value to driver->flags in
+			 * guest byte order.
+			 */
+			u16 event_flags_shadow;
+
+			/* Per-descriptor state. */
+			struct vring_desc_state_packed *desc_state;
+			struct vring_desc_extra_packed *desc_extra;
+
+			/* DMA address and size information */
+			dma_addr_t ring_dma_addr;
+			dma_addr_t driver_event_dma_addr;
+			dma_addr_t device_event_dma_addr;
+			size_t ring_size_in_bytes;
+			size_t event_size_in_bytes;
+		} packed;
+	};
 
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
@@ -840,6 +898,717 @@ static struct virtqueue *vring_create_virtqueue_split(
 }
 
 
+/*
+ * Packed ring specific functions - *_packed().
+ */
+
+static void vring_unmap_state_packed(const struct vring_virtqueue *vq,
+				     struct vring_desc_extra_packed *state)
+{
+	u16 flags;
+
+	if (!vq->use_dma_api)
+		return;
+
+	flags = state->flags;
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 state->addr, state->len,
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       state->addr, state->len,
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
+				   struct vring_packed_desc *desc)
+{
+	u16 flags;
+
+	if (!vq->use_dma_api)
+		return;
+
+	flags = le16_to_cpu(desc->flags);
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 le64_to_cpu(desc->addr),
+				 le32_to_cpu(desc->len),
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       le64_to_cpu(desc->addr),
+			       le32_to_cpu(desc->len),
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
+						       gfp_t gfp)
+{
+	struct vring_packed_desc *desc;
+
+	/*
+	 * We require lowmem mappings for the descriptors because
+	 * otherwise virt_to_phys will give us bogus addresses in the
+	 * virtqueue.
+	 */
+	gfp &= ~__GFP_HIGHMEM;
+
+	desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
+
+	return desc;
+}
+
+static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       gfp_t gfp)
+{
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, err_idx;
+	u16 head, id;
+	dma_addr_t addr;
+
+	head = vq->packed.next_avail_idx;
+	desc = alloc_indirect_packed(total_sg, gfp);
+
+	if (unlikely(vq->vq.num_free < 1)) {
+		pr_debug("Can't add buf len 1 - avail = 0\n");
+		END_USE(vq);
+		return -ENOSPC;
+	}
+
+	i = 0;
+	id = vq->free_head;
+	BUG_ON(id == vq->packed.vring.num);
+
+	for (n = 0; n < out_sgs + in_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+					DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			if (vring_mapping_error(vq, addr))
+				goto unmap_release;
+
+			desc[i].flags = cpu_to_le16(n < out_sgs ?
+						0 : VRING_DESC_F_WRITE);
+			desc[i].addr = cpu_to_le64(addr);
+			desc[i].len = cpu_to_le32(sg->length);
+			i++;
+		}
+	}
+
+	/* Now that the indirect table is filled in, map it. */
+	addr = vring_map_single(vq, desc,
+			total_sg * sizeof(struct vring_packed_desc),
+			DMA_TO_DEVICE);
+	if (vring_mapping_error(vq, addr))
+		goto unmap_release;
+
+	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
+	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
+				sizeof(struct vring_packed_desc));
+	vq->packed.vring.desc[head].id = cpu_to_le16(id);
+
+	if (vq->use_dma_api) {
+		vq->packed.desc_extra[id].addr = addr;
+		vq->packed.desc_extra[id].len = total_sg *
+				sizeof(struct vring_packed_desc);
+		vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
+						  vq->packed.avail_used_flags;
+	}
+
+	/*
+	 * A driver MUST NOT make the first descriptor in the list
+	 * available before all subsequent descriptors comprising
+	 * the list are made available.
+	 */
+	virtio_wmb(vq->weak_barriers);
+	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
+						vq->packed.avail_used_flags);
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= 1;
+
+	/* Update free pointer */
+	n = head + 1;
+	if (n >= vq->packed.vring.num) {
+		n = 0;
+		vq->packed.avail_wrap_counter ^= 1;
+		vq->packed.avail_used_flags ^+				1 << VRING_PACKED_DESC_F_AVAIL |
+				1 << VRING_PACKED_DESC_F_USED;
+	}
+	vq->packed.next_avail_idx = n;
+	vq->free_head = vq->packed.desc_state[id].next;
+
+	/* Store token and indirect buffer state. */
+	vq->packed.desc_state[id].num = 1;
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].indir_desc = desc;
+	vq->packed.desc_state[id].last = id;
+
+	vq->num_added += 1;
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+
+	return 0;
+
+unmap_release:
+	err_idx = i;
+
+	for (i = 0; i < err_idx; i++)
+		vring_unmap_desc_packed(vq, &desc[i]);
+
+	kfree(desc);
+
+	END_USE(vq);
+	return -EIO;
+}
+
+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+				       struct scatterlist *sgs[],
+				       unsigned int total_sg,
+				       unsigned int out_sgs,
+				       unsigned int in_sgs,
+				       void *data,
+				       void *ctx,
+				       gfp_t gfp)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct vring_packed_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i, n, c, descs_used, err_idx;
+	__le16 uninitialized_var(head_flags), flags;
+	u16 head, id, uninitialized_var(prev), curr, avail_used_flags;
+
+	START_USE(vq);
+
+	BUG_ON(data == NULL);
+	BUG_ON(ctx && vq->indirect);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return -EIO;
+	}
+
+	LAST_ADD_TIME_UPDATE(vq);
+
+	BUG_ON(total_sg == 0);
+
+	if (virtqueue_use_indirect(_vq, total_sg))
+		return virtqueue_add_indirect_packed(vq, sgs, total_sg,
+				out_sgs, in_sgs, data, gfp);
+
+	head = vq->packed.next_avail_idx;
+	avail_used_flags = vq->packed.avail_used_flags;
+
+	WARN_ON_ONCE(total_sg > vq->packed.vring.num &&
!vq->indirect);
+
+	desc = vq->packed.vring.desc;
+	i = head;
+	descs_used = total_sg;
+
+	if (unlikely(vq->vq.num_free < descs_used)) {
+		pr_debug("Can't add buf len %i - avail = %i\n",
+			 descs_used, vq->vq.num_free);
+		END_USE(vq);
+		return -ENOSPC;
+	}
+
+	id = vq->free_head;
+	BUG_ON(id == vq->packed.vring.num);
+
+	curr = id;
+	c = 0;
+	for (n = 0; n < out_sgs + in_sgs; n++) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
+			dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
+					DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			if (vring_mapping_error(vq, addr))
+				goto unmap_release;
+
+			flags = cpu_to_le16(vq->packed.avail_used_flags |
+				    (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
+				    (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
+			if (i == head)
+				head_flags = flags;
+			else
+				desc[i].flags = flags;
+
+			desc[i].addr = cpu_to_le64(addr);
+			desc[i].len = cpu_to_le32(sg->length);
+			desc[i].id = cpu_to_le16(id);
+
+			if (unlikely(vq->use_dma_api)) {
+				vq->packed.desc_extra[curr].addr = addr;
+				vq->packed.desc_extra[curr].len = sg->length;
+				vq->packed.desc_extra[curr].flags +					le16_to_cpu(flags);
+			}
+			prev = curr;
+			curr = vq->packed.desc_state[curr].next;
+
+			if ((unlikely(++i >= vq->packed.vring.num))) {
+				i = 0;
+				vq->packed.avail_used_flags ^+					1 << VRING_PACKED_DESC_F_AVAIL
|
+					1 << VRING_PACKED_DESC_F_USED;
+			}
+		}
+	}
+
+	if (i < head)
+		vq->packed.avail_wrap_counter ^= 1;
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= descs_used;
+
+	/* Update free pointer */
+	vq->packed.next_avail_idx = i;
+	vq->free_head = curr;
+
+	/* Store token. */
+	vq->packed.desc_state[id].num = descs_used;
+	vq->packed.desc_state[id].data = data;
+	vq->packed.desc_state[id].indir_desc = ctx;
+	vq->packed.desc_state[id].last = prev;
+
+	/*
+	 * A driver MUST NOT make the first descriptor in the list
+	 * available before all subsequent descriptors comprising
+	 * the list are made available.
+	 */
+	virtio_wmb(vq->weak_barriers);
+	vq->packed.vring.desc[head].flags = head_flags;
+	vq->num_added += descs_used;
+
+	pr_debug("Added buffer head %i to %p\n", head, vq);
+	END_USE(vq);
+
+	return 0;
+
+unmap_release:
+	err_idx = i;
+	i = head;
+
+	vq->packed.avail_used_flags = avail_used_flags;
+
+	for (n = 0; n < total_sg; n++) {
+		if (i == err_idx)
+			break;
+		vring_unmap_desc_packed(vq, &desc[i]);
+		i++;
+		if (i >= vq->packed.vring.num)
+			i = 0;
+	}
+
+	END_USE(vq);
+	return -EIO;
+}
+
+static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 flags;
+	bool needs_kick;
+	union {
+		struct {
+			__le16 off_wrap;
+			__le16 flags;
+		};
+		u32 u32;
+	} snapshot;
+
+	START_USE(vq);
+
+	/*
+	 * We need to expose the new flags value before checking notification
+	 * suppressions.
+	 */
+	virtio_mb(vq->weak_barriers);
+
+	vq->num_added = 0;
+
+	snapshot.u32 = *(u32 *)vq->packed.vring.device;
+	flags = le16_to_cpu(snapshot.flags);
+
+	LAST_ADD_TIME_CHECK(vq);
+	LAST_ADD_TIME_INVALID(vq);
+
+	needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
+	END_USE(vq);
+	return needs_kick;
+}
+
+static void detach_buf_packed(struct vring_virtqueue *vq,
+			      unsigned int id, void **ctx)
+{
+	struct vring_desc_state_packed *state = NULL;
+	struct vring_packed_desc *desc;
+	unsigned int i, curr;
+
+	state = &vq->packed.desc_state[id];
+
+	/* Clear data ptr. */
+	state->data = NULL;
+
+	vq->packed.desc_state[state->last].next = vq->free_head;
+	vq->free_head = id;
+	vq->vq.num_free += state->num;
+
+	if (unlikely(vq->use_dma_api)) {
+		curr = id;
+		for (i = 0; i < state->num; i++) {
+			vring_unmap_state_packed(vq,
+				&vq->packed.desc_extra[curr]);
+			curr = vq->packed.desc_state[curr].next;
+		}
+	}
+
+	if (vq->indirect) {
+		u32 len;
+
+		/* Free the indirect table, if any, now that it's unmapped. */
+		desc = state->indir_desc;
+		if (!desc)
+			return;
+
+		if (vq->use_dma_api) {
+			len = vq->packed.desc_extra[id].len;
+			for (i = 0; i < len / sizeof(struct vring_packed_desc);
+					i++)
+				vring_unmap_desc_packed(vq, &desc[i]);
+		}
+		kfree(desc);
+		state->indir_desc = NULL;
+	} else if (ctx) {
+		*ctx = state->indir_desc;
+	}
+}
+
+static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
+				       u16 idx, bool used_wrap_counter)
+{
+	bool avail, used;
+	u16 flags;
+
+	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
+	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
+	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
+
+	return avail == used && used == used_wrap_counter;
+}
+
+static inline bool more_used_packed(const struct vring_virtqueue *vq)
+{
+	return is_used_desc_packed(vq, vq->last_used_idx,
+			vq->packed.used_wrap_counter);
+}
+
+static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
+					  unsigned int *len,
+					  void **ctx)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 last_used, id;
+	void *ret;
+
+	START_USE(vq);
+
+	if (unlikely(vq->broken)) {
+		END_USE(vq);
+		return NULL;
+	}
+
+	if (!more_used_packed(vq)) {
+		pr_debug("No more buffers in queue\n");
+		END_USE(vq);
+		return NULL;
+	}
+
+	/* Only get used elements after they have been exposed by host. */
+	virtio_rmb(vq->weak_barriers);
+
+	last_used = vq->last_used_idx;
+	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
+	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
+
+	if (unlikely(id >= vq->packed.vring.num)) {
+		BAD_RING(vq, "id %u out of range\n", id);
+		return NULL;
+	}
+	if (unlikely(!vq->packed.desc_state[id].data)) {
+		BAD_RING(vq, "id %u is not a head!\n", id);
+		return NULL;
+	}
+
+	/* detach_buf_packed clears data, so grab it now. */
+	ret = vq->packed.desc_state[id].data;
+	detach_buf_packed(vq, id, ctx);
+
+	vq->last_used_idx += vq->packed.desc_state[id].num;
+	if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
+		vq->last_used_idx -= vq->packed.vring.num;
+		vq->packed.used_wrap_counter ^= 1;
+	}
+
+	LAST_ADD_TIME_INVALID(vq);
+
+	END_USE(vq);
+	return ret;
+}
+
+static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
+		vq->packed.vring.driver->flags +		
cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+}
+
+static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	START_USE(vq);
+
+	/*
+	 * We optimistically turn back on interrupts, then check if there was
+	 * more to do.
+	 */
+
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.vring.driver->flags +			
cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	END_USE(vq);
+	return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
+			VRING_PACKED_EVENT_F_WRAP_CTR);
+}
+
+static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	bool wrap_counter;
+	u16 used_idx;
+
+	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
+	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
+
+	return is_used_desc_packed(vq, used_idx, wrap_counter);
+}
+
+static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	u16 used_idx, wrap_counter;
+
+	START_USE(vq);
+
+	/*
+	 * We optimistically turn back on interrupts, then check if there was
+	 * more to do.
+	 */
+
+	used_idx = vq->last_used_idx;
+	wrap_counter = vq->packed.used_wrap_counter;
+
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.vring.driver->flags +			
cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	/*
+	 * We need to update event suppression structure first
+	 * before re-checking for more used buffers.
+	 */
+	virtio_mb(vq->weak_barriers);
+
+	if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+
+static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->packed.vring.num; i++) {
+		if (!vq->packed.desc_state[i].data)
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->packed.desc_state[i].data;
+		detach_buf_packed(vq, i, NULL);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
+static struct virtqueue *vring_create_virtqueue_packed(
+	unsigned int index,
+	unsigned int num,
+	unsigned int vring_align,
+	struct virtio_device *vdev,
+	bool weak_barriers,
+	bool may_reduce_num,
+	bool context,
+	bool (*notify)(struct virtqueue *),
+	void (*callback)(struct virtqueue *),
+	const char *name)
+{
+	struct vring_virtqueue *vq;
+	struct vring_packed_desc *ring;
+	struct vring_packed_desc_event *driver, *device;
+	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
+	size_t ring_size_in_bytes, event_size_in_bytes;
+	unsigned int i;
+
+	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
+
+	ring = vring_alloc_queue(vdev, ring_size_in_bytes,
+				 &ring_dma_addr,
+				 GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!ring)
+		goto err_ring;
+
+	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
+
+	driver = vring_alloc_queue(vdev, event_size_in_bytes,
+				   &driver_event_dma_addr,
+				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!driver)
+		goto err_driver;
+
+	device = vring_alloc_queue(vdev, event_size_in_bytes,
+				   &device_event_dma_addr,
+				   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+	if (!device)
+		goto err_device;
+
+	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
+	if (!vq)
+		goto err_vq;
+
+	vq->vq.callback = callback;
+	vq->vq.vdev = vdev;
+	vq->vq.name = name;
+	vq->vq.num_free = num;
+	vq->vq.index = index;
+	vq->we_own_ring = true;
+	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
+	vq->broken = false;
+	vq->last_used_idx = 0;
+	vq->num_added = 0;
+	vq->packed_ring = true;
+	vq->use_dma_api = vring_use_dma_api(vdev);
+	list_add_tail(&vq->vq.list, &vdev->vqs);
+#ifdef DEBUG
+	vq->in_use = false;
+	vq->last_add_time_valid = false;
+#endif
+
+	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)
&&
+		!context;
+	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
+
+	vq->packed.ring_dma_addr = ring_dma_addr;
+	vq->packed.driver_event_dma_addr = driver_event_dma_addr;
+	vq->packed.device_event_dma_addr = device_event_dma_addr;
+
+	vq->packed.ring_size_in_bytes = ring_size_in_bytes;
+	vq->packed.event_size_in_bytes = event_size_in_bytes;
+
+	vq->packed.vring.num = num;
+	vq->packed.vring.desc = ring;
+	vq->packed.vring.driver = driver;
+	vq->packed.vring.device = device;
+
+	vq->packed.next_avail_idx = 0;
+	vq->packed.avail_wrap_counter = 1;
+	vq->packed.used_wrap_counter = 1;
+	vq->packed.event_flags_shadow = 0;
+	vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
+
+	vq->packed.desc_state = kmalloc_array(num,
+			sizeof(struct vring_desc_state_packed),
+			GFP_KERNEL);
+	if (!vq->packed.desc_state)
+		goto err_desc_state;
+
+	memset(vq->packed.desc_state, 0,
+		num * sizeof(struct vring_desc_state_packed));
+
+	/* Put everything in free lists. */
+	vq->free_head = 0;
+	for (i = 0; i < num-1; i++)
+		vq->packed.desc_state[i].next = i + 1;
+
+	vq->packed.desc_extra = kmalloc_array(num,
+			sizeof(struct vring_desc_extra_packed),
+			GFP_KERNEL);
+	if (!vq->packed.desc_extra)
+		goto err_desc_extra;
+
+	memset(vq->packed.desc_extra, 0,
+		num * sizeof(struct vring_desc_extra_packed));
+
+	/* No callback?  Tell other side not to bother us. */
+	if (!callback) {
+		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
+		vq->packed.vring.driver->flags +		
cpu_to_le16(vq->packed.event_flags_shadow);
+	}
+
+	return &vq->vq;
+
+err_desc_extra:
+	kfree(vq->packed.desc_state);
+err_desc_state:
+	kfree(vq);
+err_vq:
+	vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
+err_device:
+	vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
+err_driver:
+	vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
+err_ring:
+	return NULL;
+}
+
+
 /*
  * Generic functions and exported symbols.
  */
@@ -853,8 +1622,12 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 				void *ctx,
 				gfp_t gfp)
 {
-	return virtqueue_add_split(_vq, sgs, total_sg,
-				   out_sgs, in_sgs, data, ctx, gfp);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
+					out_sgs, in_sgs, data, ctx, gfp) :
+				 virtqueue_add_split(_vq, sgs, total_sg,
+					out_sgs, in_sgs, data, ctx, gfp);
 }
 
 /**
@@ -973,7 +1746,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
  */
 bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
-	return virtqueue_kick_prepare_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
+				 virtqueue_kick_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
 
@@ -1040,7 +1816,10 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
 			    void **ctx)
 {
-	return virtqueue_get_buf_ctx_split(_vq, len, ctx);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
+				 virtqueue_get_buf_ctx_split(_vq, len, ctx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
 
@@ -1049,7 +1828,6 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned
int *len)
 	return virtqueue_get_buf_ctx(_vq, len, NULL);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
-
 /**
  * virtqueue_disable_cb - disable callbacks
  * @vq: the struct virtqueue we're talking about.
@@ -1061,7 +1839,12 @@ EXPORT_SYMBOL_GPL(virtqueue_get_buf);
  */
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
-	virtqueue_disable_cb_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (vq->packed_ring)
+		virtqueue_disable_cb_packed(_vq);
+	else
+		virtqueue_disable_cb_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
@@ -1079,7 +1862,10 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
  */
 unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
 {
-	return virtqueue_enable_cb_prepare_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
+				 virtqueue_enable_cb_prepare_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
 
@@ -1097,7 +1883,8 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned
last_used_idx)
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	virtio_mb(vq->weak_barriers);
-	return virtqueue_poll_split(_vq, last_used_idx);
+	return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
+				 virtqueue_poll_split(_vq, last_used_idx);
 }
 EXPORT_SYMBOL_GPL(virtqueue_poll);
 
@@ -1135,7 +1922,10 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
  */
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
-	return virtqueue_enable_cb_delayed_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
+				 virtqueue_enable_cb_delayed_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
@@ -1149,13 +1939,16 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
  */
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
-	return virtqueue_detach_unused_buf_split(_vq);
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
+				 virtqueue_detach_unused_buf_split(_vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
 
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
-	return more_used_split(vq);
+	return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
 }
 
 irqreturn_t vring_interrupt(int irq, void *_vq)
@@ -1178,6 +1971,7 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
 }
 EXPORT_SYMBOL_GPL(vring_interrupt);
 
+/* Only available for split ring */
 struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					struct vring vring,
 					struct virtio_device *vdev,
@@ -1190,10 +1984,14 @@ struct virtqueue *__vring_new_virtqueue(unsigned int
index,
 	unsigned int i;
 	struct vring_virtqueue *vq;
 
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return NULL;
+
 	vq = kmalloc(sizeof(*vq), GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
+	vq->packed_ring = false;
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
@@ -1261,12 +2059,19 @@ struct virtqueue *vring_create_virtqueue(
 	void (*callback)(struct virtqueue *),
 	const char *name)
 {
+
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return vring_create_virtqueue_packed(index, num, vring_align,
+				vdev, weak_barriers, may_reduce_num,
+				context, notify, callback, name);
+
 	return vring_create_virtqueue_split(index, num, vring_align,
 			vdev, weak_barriers, may_reduce_num,
 			context, notify, callback, name);
 }
 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
 
+/* Only available for split ring */
 struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      unsigned int num,
 				      unsigned int vring_align,
@@ -1279,6 +2084,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 				      const char *name)
 {
 	struct vring vring;
+
+	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
+		return NULL;
+
 	vring_init(&vring, num, pages, vring_align);
 	return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
 				     notify, callback, name);
@@ -1290,11 +2099,32 @@ void vring_del_virtqueue(struct virtqueue *_vq)
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
 	if (vq->we_own_ring) {
-		vring_free_queue(vq->vq.vdev,
-				 vq->split.queue_size_in_bytes,
-				 vq->split.vring.desc,
-				 vq->split.queue_dma_addr);
-		kfree(vq->split.desc_state);
+		if (vq->packed_ring) {
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.ring_size_in_bytes,
+					 vq->packed.vring.desc,
+					 vq->packed.ring_dma_addr);
+
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.event_size_in_bytes,
+					 vq->packed.vring.driver,
+					 vq->packed.driver_event_dma_addr);
+
+			vring_free_queue(vq->vq.vdev,
+					 vq->packed.event_size_in_bytes,
+					 vq->packed.vring.device,
+					 vq->packed.device_event_dma_addr);
+
+			kfree(vq->packed.desc_state);
+			kfree(vq->packed.desc_extra);
+		} else {
+			vring_free_queue(vq->vq.vdev,
+					 vq->split.queue_size_in_bytes,
+					 vq->split.vring.desc,
+					 vq->split.queue_dma_addr);
+
+			kfree(vq->split.desc_state);
+		}
 	}
 	list_del(&_vq->list);
 	kfree(vq);
@@ -1336,7 +2166,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue
*_vq)
 
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return vq->split.vring.num;
+	return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 
@@ -1369,6 +2199,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed_ring)
+		return vq->packed.ring_dma_addr;
+
 	return vq->split.queue_dma_addr;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
@@ -1379,6 +2212,9 @@ dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed_ring)
+		return vq->packed.driver_event_dma_addr;
+
 	return vq->split.queue_dma_addr +
 		((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
 }
@@ -1390,11 +2226,15 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue
*_vq)
 
 	BUG_ON(!vq->we_own_ring);
 
+	if (vq->packed_ring)
+		return vq->packed.device_event_dma_addr;
+
 	return vq->split.queue_dma_addr +
 		((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
 
+/* Only available for split ring */
 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
 {
 	return &to_vvq(vq)->split.vring;
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 11/13] virtio_ring: leverage event idx in packed ring
Leverage the EVENT_IDX feature in packed ring to suppress
events when it's available.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/virtio/virtio_ring.c | 77 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 6 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index b63eee2034e7..40e4d3798d16 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1222,7 +1222,7 @@ static inline int virtqueue_add_packed(struct virtqueue
*_vq,
 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
-	u16 flags;
+	u16 new, old, off_wrap, flags, wrap_counter, event_idx;
 	bool needs_kick;
 	union {
 		struct {
@@ -1240,6 +1240,8 @@ static bool virtqueue_kick_prepare_packed(struct virtqueue
*_vq)
 	 */
 	virtio_mb(vq->weak_barriers);
 
+	old = vq->packed.next_avail_idx - vq->num_added;
+	new = vq->packed.next_avail_idx;
 	vq->num_added = 0;
 
 	snapshot.u32 = *(u32 *)vq->packed.vring.device;
@@ -1248,7 +1250,20 @@ static bool virtqueue_kick_prepare_packed(struct
virtqueue *_vq)
 	LAST_ADD_TIME_CHECK(vq);
 	LAST_ADD_TIME_INVALID(vq);
 
-	needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
+	if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
+		needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
+		goto out;
+	}
+
+	off_wrap = le16_to_cpu(snapshot.off_wrap);
+
+	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
+	event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
+	if (wrap_counter != vq->packed.avail_wrap_counter)
+		event_idx -= vq->packed.vring.num;
+
+	needs_kick = vring_need_event(event_idx, new, old);
+out:
 	END_USE(vq);
 	return needs_kick;
 }
@@ -1365,6 +1380,18 @@ static void *virtqueue_get_buf_ctx_packed(struct
virtqueue *_vq,
 		vq->packed.used_wrap_counter ^= 1;
 	}
 
+	/*
+	 * If we expect an interrupt for the next entry, tell host
+	 * by writing event index and flush out the write before
+	 * the read in the next get_buf call.
+	 */
+	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
+		virtio_store_mb(vq->weak_barriers,
+				&vq->packed.vring.driver->off_wrap,
+				cpu_to_le16(vq->last_used_idx |
+					(vq->packed.used_wrap_counter <<
+					 VRING_PACKED_EVENT_F_WRAP_CTR)));
+
 	LAST_ADD_TIME_INVALID(vq);
 
 	END_USE(vq);
@@ -1393,8 +1420,22 @@ static unsigned virtqueue_enable_cb_prepare_packed(struct
virtqueue *_vq)
 	 * more to do.
 	 */
 
+	if (vq->event) {
+		vq->packed.vring.driver->off_wrap +			cpu_to_le16(vq->last_used_idx
|
+				(vq->packed.used_wrap_counter <<
+				 VRING_PACKED_EVENT_F_WRAP_CTR));
+		/*
+		 * We need to update event offset and event wrap
+		 * counter first before updating event flags.
+		 */
+		virtio_wmb(vq->weak_barriers);
+	}
+
 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
-		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.event_flags_shadow = vq->event ?
+				VRING_PACKED_EVENT_FLAG_DESC :
+				VRING_PACKED_EVENT_FLAG_ENABLE;
 		vq->packed.vring.driver->flags  			
cpu_to_le16(vq->packed.event_flags_shadow);
 	}
@@ -1420,6 +1461,7 @@ static bool virtqueue_enable_cb_delayed_packed(struct
virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	u16 used_idx, wrap_counter;
+	u16 bufs;
 
 	START_USE(vq);
 
@@ -1428,11 +1470,34 @@ static bool virtqueue_enable_cb_delayed_packed(struct
virtqueue *_vq)
 	 * more to do.
 	 */
 
-	used_idx = vq->last_used_idx;
-	wrap_counter = vq->packed.used_wrap_counter;
+	if (vq->event) {
+		/* TODO: tune this threshold */
+		bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
+		wrap_counter = vq->packed.used_wrap_counter;
+
+		used_idx = vq->last_used_idx + bufs;
+		if (used_idx >= vq->packed.vring.num) {
+			used_idx -= vq->packed.vring.num;
+			wrap_counter ^= 1;
+		}
+
+		vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
+			(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
+
+		/*
+		 * We need to update event offset and event wrap
+		 * counter first before updating event flags.
+		 */
+		virtio_wmb(vq->weak_barriers);
+	} else {
+		used_idx = vq->last_used_idx;
+		wrap_counter = vq->packed.used_wrap_counter;
+	}
 
 	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
-		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_ENABLE;
+		vq->packed.event_flags_shadow = vq->event ?
+				VRING_PACKED_EVENT_FLAG_DESC :
+				VRING_PACKED_EVENT_FLAG_ENABLE;
 		vq->packed.vring.driver->flags  			
cpu_to_le16(vq->packed.event_flags_shadow);
 	}
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 12/13] virtio_ring: disable packed ring on unsupported transports
Currently, ccw, vop and remoteproc need some legacy virtio
APIs to create or access virtio rings, which are not supported
by packed ring. So disable packed ring on these transports
for now.
Signed-off-by: Tiwei Bie <tiwei.bie at intel.com>
---
 drivers/misc/mic/vop/vop_main.c        | 13 +++++++++++++
 drivers/remoteproc/remoteproc_virtio.c | 13 +++++++++++++
 drivers/s390/virtio/virtio_ccw.c       | 14 ++++++++++++++
 3 files changed, 40 insertions(+)
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 3633202e18f4..6b212c8b78e7 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -129,6 +129,16 @@ static u64 vop_get_features(struct virtio_device *vdev)
 	return features;
 }
 
+static void vop_transport_features(struct virtio_device *vdev)
+{
+	/*
+	 * Packed ring isn't enabled on virtio_vop for now,
+	 * because virtio_vop uses vring_new_virtqueue() which
+	 * creates virtio rings on preallocated memory.
+	 */
+	__virtio_clear_bit(vdev, VIRTIO_F_RING_PACKED);
+}
+
 static int vop_finalize_features(struct virtio_device *vdev)
 {
 	unsigned int i, bits;
@@ -141,6 +151,9 @@ static int vop_finalize_features(struct virtio_device *vdev)
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
+	/* Give virtio_vop a chance to accept features. */
+	vop_transport_features(vdev);
+
 	memset_io(out_features, 0, feature_len);
 	bits = min_t(unsigned, feature_len,
 		     sizeof(vdev->features)) * 8;
diff --git a/drivers/remoteproc/remoteproc_virtio.c
b/drivers/remoteproc/remoteproc_virtio.c
index de21f620b882..183fc42a510a 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -214,6 +214,16 @@ static u64 rproc_virtio_get_features(struct virtio_device
*vdev)
 	return rsc->dfeatures;
 }
 
+static void rproc_transport_features(struct virtio_device *vdev)
+{
+	/*
+	 * Packed ring isn't enabled on remoteproc for now,
+	 * because remoteproc uses vring_new_virtqueue() which
+	 * creates virtio rings on preallocated memory.
+	 */
+	__virtio_clear_bit(vdev, VIRTIO_F_RING_PACKED);
+}
+
 static int rproc_virtio_finalize_features(struct virtio_device *vdev)
 {
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
@@ -224,6 +234,9 @@ static int rproc_virtio_finalize_features(struct
virtio_device *vdev)
 	/* Give virtio_ring a chance to accept features */
 	vring_transport_features(vdev);
 
+	/* Give virtio_rproc a chance to accept features. */
+	rproc_transport_features(vdev);
+
 	/* Make sure we don't have any features > 32 bits! */
 	BUG_ON((u32)vdev->features != vdev->features);
 
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 97b6f197f007..406d1f64ad65 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -765,6 +765,17 @@ static u64 virtio_ccw_get_features(struct virtio_device
*vdev)
 	return rc;
 }
 
+static void ccw_transport_features(struct virtio_device *vdev)
+{
+	/*
+	 * Packed ring isn't enabled on virtio_ccw for now,
+	 * because virtio_ccw uses some legacy accessors,
+	 * e.g. virtqueue_get_avail() and virtqueue_get_used()
+	 * which aren't available in packed ring currently.
+	 */
+	__virtio_clear_bit(vdev, VIRTIO_F_RING_PACKED);
+}
+
 static int virtio_ccw_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
@@ -791,6 +802,9 @@ static int virtio_ccw_finalize_features(struct virtio_device
*vdev)
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
+	/* Give virtio_ccw a chance to accept features. */
+	ccw_transport_features(vdev);
+
 	features->index = 0;
 	features->features = cpu_to_le32((u32)vdev->features);
 	/* Write the first half of the feature bits to the host. */
-- 
2.14.5
Tiwei Bie
2018-Nov-21  10:03 UTC
[PATCH net-next v3 13/13] virtio_ring: advertize packed ring layout
Advertize the packed ring layout support. Signed-off-by: Tiwei Bie <tiwei.bie at intel.com> --- drivers/virtio/virtio_ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 40e4d3798d16..cd7e755484e3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2211,6 +2211,8 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_IOMMU_PLATFORM: break; + case VIRTIO_F_RING_PACKED: + break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); -- 2.14.5
Michael S. Tsirkin
2018-Nov-21  12:20 UTC
[PATCH net-next v3 00/13] virtio: support packed ring
On Wed, Nov 21, 2018 at 06:03:17PM +0800, Tiwei Bie wrote:> Hi, > > This patch set implements packed ring support in virtio driver. > > A performance test between pktgen (pktgen_sample03_burst_single_flow.sh) > and DPDK vhost (testpmd/rxonly/vhost-PMD) has been done, I saw > ~30% performance gain in packed ring in this case.Thanks a lot, this is very exciting! Dave, given the holiday, attempts to wrap up the 1.1 spec and the patchset size I would very much appreciate a bit more time for review. Say until Nov 28?> To make this patch set work with below patch set for vhost, > some hacks are needed to set the _F_NEXT flag in indirect > descriptors (this should be fixed in vhost): > > https://lkml.org/lkml/2018/7/3/33Could you pls clarify - do you mean it doesn't yet work with vhost because of a vhost bug, and to test it with the linked patches you had to hack in _F_NEXT? Because I do not see _F_NEXT in indirect descriptors in this patch (which is fine). Or did I miss it?> v2 -> v3: > - Use leXX instead of virtioXX (MST); > - Refactor split ring first (MST); > - Add debug helpers (MST); > - Put split/packed ring specific fields in sub structures (MST); > - Handle normal descriptors and indirect descriptors differently (MST); > - Track the DMA addr/len related info in a separate structure (MST); > - Calculate AVAIL/USED flags only when wrap counter wraps (MST); > - Define a struct/union to read event structure (MST); > - Define a macro for wrap counter bit in uapi (MST); > - Define the AVAIL/USED bits as shifts instead of values (MST); > - s/_F_/_FLAG_/ in VRING_PACKED_EVENT_* as they are values (MST); > - Drop the notify workaround for QEMU's tx-timer in packed ring (MST); > > v1 -> v2: > - Use READ_ONCE() to read event off_wrap and flags together (Jason); > - Add comments related to ccw (Jason); > > RFC v6 -> v1: > - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed() > when event idx is off (Jason); > - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason); > - Test the state of the desc at used_idx instead of last_used_idx > in virtqueue_enable_cb_delayed_packed() (Jason); > - Save wrap counter (as part of queue state) in the return value > of virtqueue_enable_cb_prepare_packed(); > - Refine the packed ring definitions in uapi; > - Rebase on the net-next tree; > > RFC v5 -> RFC v6: > - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason); > - Define wrap counter as bool (Jason); > - Use ALIGN() in vring_init_packed() (Jason); > - Avoid using pointer to track `next` in detach_buf_packed() (Jason); > - Add comments for barriers (Jason); > - Don't enable RING_PACKED on ccw for now (noticed by Jason); > - Refine the memory barrier in virtqueue_poll(); > - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed(); > - Remove the hacks in virtqueue_enable_cb_prepare_packed(); > > RFC v4 -> RFC v5: > - Save DMA addr, etc in desc state (Jason); > - Track used wrap counter; > > RFC v3 -> RFC v4: > - Make ID allocation support out-of-order (Jason); > - Various fixes for EVENT_IDX support; > > RFC v2 -> RFC v3: > - Split into small patches (Jason); > - Add helper virtqueue_use_indirect() (Jason); > - Just set id for the last descriptor of a list (Jason); > - Calculate the prev in virtqueue_add_packed() (Jason); > - Fix/improve desc suppression code (Jason/MST); > - Refine the code layout for XXX_split/packed and wrappers (MST); > - Fix the comments and API in uapi (MST); > - Remove the BUG_ON() for indirect (Jason); > - Some other refinements and bug fixes; > > RFC v1 -> RFC v2: > - Add indirect descriptor support - compile test only; > - Add event suppression supprt - compile test only; > - Move vring_packed_init() out of uapi (Jason, MST); > - Merge two loops into one in virtqueue_add_packed() (Jason); > - Split vring_unmap_one() for packed ring and split ring (Jason); > - Avoid using '%' operator (Jason); > - Rename free_head -> next_avail_idx (Jason); > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason); > - Some other refinements and bug fixes; > > > Tiwei Bie (13): > virtio: add packed ring types and macros > virtio_ring: add _split suffix for split ring functions > virtio_ring: put split ring functions together > virtio_ring: put split ring fields in a sub struct > virtio_ring: introduce debug helpers > virtio_ring: introduce helper for indirect feature > virtio_ring: allocate desc state for split ring separately > virtio_ring: extract split ring handling from ring creation > virtio_ring: cache whether we will use DMA API > virtio_ring: introduce packed ring support > virtio_ring: leverage event idx in packed ring > virtio_ring: disable packed ring on unsupported transports > virtio_ring: advertize packed ring layout > > drivers/misc/mic/vop/vop_main.c | 13 + > drivers/remoteproc/remoteproc_virtio.c | 13 + > drivers/s390/virtio/virtio_ccw.c | 14 + > drivers/virtio/virtio_ring.c | 1811 +++++++++++++++++++++++++------- > include/uapi/linux/virtio_config.h | 3 + > include/uapi/linux/virtio_ring.h | 52 + > 6 files changed, 1530 insertions(+), 376 deletions(-) > > -- > 2.14.5
On Wed, Nov 21, 2018 at 07:20:27AM -0500, Michael S. Tsirkin wrote:> On Wed, Nov 21, 2018 at 06:03:17PM +0800, Tiwei Bie wrote: > > Hi, > > > > This patch set implements packed ring support in virtio driver. > > > > A performance test between pktgen (pktgen_sample03_burst_single_flow.sh) > > and DPDK vhost (testpmd/rxonly/vhost-PMD) has been done, I saw > > ~30% performance gain in packed ring in this case. > > Thanks a lot, this is very exciting! > Dave, given the holiday, attempts to wrap up the 1.1 spec and the > patchset size I would very much appreciate a bit more time for > review. Say until Nov 28? > > > To make this patch set work with below patch set for vhost, > > some hacks are needed to set the _F_NEXT flag in indirect > > descriptors (this should be fixed in vhost): > > > > https://lkml.org/lkml/2018/7/3/33 > > Could you pls clarify - do you mean it doesn't yet work with vhost > because of a vhost bug, and to test it with the linked patches > you had to hack in _F_NEXT? Because I do not see _F_NEXT > in indirect descriptors in this patch (which is fine). > Or did I miss it?You didn't miss anything. :) I think it's a small bug in vhost, which Jason may fix very quickly, so I didn't post it. Below is the hack I used: diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cd7e755484e3..42faea7d8cf8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -980,6 +980,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; + int c = 0; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); @@ -1001,8 +1002,9 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - desc[i].flags = cpu_to_le16(n < out_sgs ? - 0 : VRING_DESC_F_WRITE); + desc[i].flags = cpu_to_le16((n < out_sgs ? + 0 : VRING_DESC_F_WRITE) | + (++c == total_sg ? 0 : VRING_DESC_F_NEXT)); desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(sg->length); i++; -- 2.14.1> > > v2 -> v3: > > - Use leXX instead of virtioXX (MST); > > - Refactor split ring first (MST); > > - Add debug helpers (MST); > > - Put split/packed ring specific fields in sub structures (MST); > > - Handle normal descriptors and indirect descriptors differently (MST); > > - Track the DMA addr/len related info in a separate structure (MST); > > - Calculate AVAIL/USED flags only when wrap counter wraps (MST); > > - Define a struct/union to read event structure (MST); > > - Define a macro for wrap counter bit in uapi (MST); > > - Define the AVAIL/USED bits as shifts instead of values (MST); > > - s/_F_/_FLAG_/ in VRING_PACKED_EVENT_* as they are values (MST); > > - Drop the notify workaround for QEMU's tx-timer in packed ring (MST); > > > > v1 -> v2: > > - Use READ_ONCE() to read event off_wrap and flags together (Jason); > > - Add comments related to ccw (Jason); > > > > RFC v6 -> v1: > > - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed() > > when event idx is off (Jason); > > - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason); > > - Test the state of the desc at used_idx instead of last_used_idx > > in virtqueue_enable_cb_delayed_packed() (Jason); > > - Save wrap counter (as part of queue state) in the return value > > of virtqueue_enable_cb_prepare_packed(); > > - Refine the packed ring definitions in uapi; > > - Rebase on the net-next tree; > > > > RFC v5 -> RFC v6: > > - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason); > > - Define wrap counter as bool (Jason); > > - Use ALIGN() in vring_init_packed() (Jason); > > - Avoid using pointer to track `next` in detach_buf_packed() (Jason); > > - Add comments for barriers (Jason); > > - Don't enable RING_PACKED on ccw for now (noticed by Jason); > > - Refine the memory barrier in virtqueue_poll(); > > - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed(); > > - Remove the hacks in virtqueue_enable_cb_prepare_packed(); > > > > RFC v4 -> RFC v5: > > - Save DMA addr, etc in desc state (Jason); > > - Track used wrap counter; > > > > RFC v3 -> RFC v4: > > - Make ID allocation support out-of-order (Jason); > > - Various fixes for EVENT_IDX support; > > > > RFC v2 -> RFC v3: > > - Split into small patches (Jason); > > - Add helper virtqueue_use_indirect() (Jason); > > - Just set id for the last descriptor of a list (Jason); > > - Calculate the prev in virtqueue_add_packed() (Jason); > > - Fix/improve desc suppression code (Jason/MST); > > - Refine the code layout for XXX_split/packed and wrappers (MST); > > - Fix the comments and API in uapi (MST); > > - Remove the BUG_ON() for indirect (Jason); > > - Some other refinements and bug fixes; > > > > RFC v1 -> RFC v2: > > - Add indirect descriptor support - compile test only; > > - Add event suppression supprt - compile test only; > > - Move vring_packed_init() out of uapi (Jason, MST); > > - Merge two loops into one in virtqueue_add_packed() (Jason); > > - Split vring_unmap_one() for packed ring and split ring (Jason); > > - Avoid using '%' operator (Jason); > > - Rename free_head -> next_avail_idx (Jason); > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason); > > - Some other refinements and bug fixes; > > > > > > Tiwei Bie (13): > > virtio: add packed ring types and macros > > virtio_ring: add _split suffix for split ring functions > > virtio_ring: put split ring functions together > > virtio_ring: put split ring fields in a sub struct > > virtio_ring: introduce debug helpers > > virtio_ring: introduce helper for indirect feature > > virtio_ring: allocate desc state for split ring separately > > virtio_ring: extract split ring handling from ring creation > > virtio_ring: cache whether we will use DMA API > > virtio_ring: introduce packed ring support > > virtio_ring: leverage event idx in packed ring > > virtio_ring: disable packed ring on unsupported transports > > virtio_ring: advertize packed ring layout > > > > drivers/misc/mic/vop/vop_main.c | 13 + > > drivers/remoteproc/remoteproc_virtio.c | 13 + > > drivers/s390/virtio/virtio_ccw.c | 14 + > > drivers/virtio/virtio_ring.c | 1811 +++++++++++++++++++++++++------- > > include/uapi/linux/virtio_config.h | 3 + > > include/uapi/linux/virtio_ring.h | 52 + > > 6 files changed, 1530 insertions(+), 376 deletions(-) > > > > -- > > 2.14.5
From: "Michael S. Tsirkin" <mst at redhat.com> Date: Wed, 21 Nov 2018 07:20:27 -0500> Dave, given the holiday, attempts to wrap up the 1.1 spec and the > patchset size I would very much appreciate a bit more time for > review. Say until Nov 28?Ok.
Michael S. Tsirkin
2018-Nov-27  06:08 UTC
[PATCH net-next v3 00/13] virtio: support packed ring
On Wed, Nov 21, 2018 at 06:03:17PM +0800, Tiwei Bie wrote:> Hi, > > This patch set implements packed ring support in virtio driver. > > A performance test between pktgen (pktgen_sample03_burst_single_flow.sh) > and DPDK vhost (testpmd/rxonly/vhost-PMD) has been done, I saw > ~30% performance gain in packed ring in this case. > > To make this patch set work with below patch set for vhost, > some hacks are needed to set the _F_NEXT flag in indirect > descriptors (this should be fixed in vhost): > > https://lkml.org/lkml/2018/7/3/33I went over it and I think it's correct spec-wise. I have some ideas for enhancements but let's start with getting this stuff merged first. Acked-by: Michael S. Tsirkin <mst at redhat.com>> v2 -> v3: > - Use leXX instead of virtioXX (MST); > - Refactor split ring first (MST); > - Add debug helpers (MST); > - Put split/packed ring specific fields in sub structures (MST); > - Handle normal descriptors and indirect descriptors differently (MST); > - Track the DMA addr/len related info in a separate structure (MST); > - Calculate AVAIL/USED flags only when wrap counter wraps (MST); > - Define a struct/union to read event structure (MST); > - Define a macro for wrap counter bit in uapi (MST); > - Define the AVAIL/USED bits as shifts instead of values (MST); > - s/_F_/_FLAG_/ in VRING_PACKED_EVENT_* as they are values (MST); > - Drop the notify workaround for QEMU's tx-timer in packed ring (MST); > > v1 -> v2: > - Use READ_ONCE() to read event off_wrap and flags together (Jason); > - Add comments related to ccw (Jason); > > RFC v6 -> v1: > - Avoid extra virtio_wmb() in virtqueue_enable_cb_delayed_packed() > when event idx is off (Jason); > - Fix bufs calculation in virtqueue_enable_cb_delayed_packed() (Jason); > - Test the state of the desc at used_idx instead of last_used_idx > in virtqueue_enable_cb_delayed_packed() (Jason); > - Save wrap counter (as part of queue state) in the return value > of virtqueue_enable_cb_prepare_packed(); > - Refine the packed ring definitions in uapi; > - Rebase on the net-next tree; > > RFC v5 -> RFC v6: > - Avoid tracking addr/len/flags when DMA API isn't used (MST/Jason); > - Define wrap counter as bool (Jason); > - Use ALIGN() in vring_init_packed() (Jason); > - Avoid using pointer to track `next` in detach_buf_packed() (Jason); > - Add comments for barriers (Jason); > - Don't enable RING_PACKED on ccw for now (noticed by Jason); > - Refine the memory barrier in virtqueue_poll(); > - Add a missing memory barrier in virtqueue_enable_cb_delayed_packed(); > - Remove the hacks in virtqueue_enable_cb_prepare_packed(); > > RFC v4 -> RFC v5: > - Save DMA addr, etc in desc state (Jason); > - Track used wrap counter; > > RFC v3 -> RFC v4: > - Make ID allocation support out-of-order (Jason); > - Various fixes for EVENT_IDX support; > > RFC v2 -> RFC v3: > - Split into small patches (Jason); > - Add helper virtqueue_use_indirect() (Jason); > - Just set id for the last descriptor of a list (Jason); > - Calculate the prev in virtqueue_add_packed() (Jason); > - Fix/improve desc suppression code (Jason/MST); > - Refine the code layout for XXX_split/packed and wrappers (MST); > - Fix the comments and API in uapi (MST); > - Remove the BUG_ON() for indirect (Jason); > - Some other refinements and bug fixes; > > RFC v1 -> RFC v2: > - Add indirect descriptor support - compile test only; > - Add event suppression supprt - compile test only; > - Move vring_packed_init() out of uapi (Jason, MST); > - Merge two loops into one in virtqueue_add_packed() (Jason); > - Split vring_unmap_one() for packed ring and split ring (Jason); > - Avoid using '%' operator (Jason); > - Rename free_head -> next_avail_idx (Jason); > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason); > - Some other refinements and bug fixes; > > > Tiwei Bie (13): > virtio: add packed ring types and macros > virtio_ring: add _split suffix for split ring functions > virtio_ring: put split ring functions together > virtio_ring: put split ring fields in a sub struct > virtio_ring: introduce debug helpers > virtio_ring: introduce helper for indirect feature > virtio_ring: allocate desc state for split ring separately > virtio_ring: extract split ring handling from ring creation > virtio_ring: cache whether we will use DMA API > virtio_ring: introduce packed ring support > virtio_ring: leverage event idx in packed ring > virtio_ring: disable packed ring on unsupported transports > virtio_ring: advertize packed ring layout > > drivers/misc/mic/vop/vop_main.c | 13 + > drivers/remoteproc/remoteproc_virtio.c | 13 + > drivers/s390/virtio/virtio_ccw.c | 14 + > drivers/virtio/virtio_ring.c | 1811 +++++++++++++++++++++++++------- > include/uapi/linux/virtio_config.h | 3 + > include/uapi/linux/virtio_ring.h | 52 + > 6 files changed, 1530 insertions(+), 376 deletions(-) > > -- > 2.14.5
From: "Michael S. Tsirkin" <mst at redhat.com> Date: Tue, 27 Nov 2018 01:08:08 -0500> On Wed, Nov 21, 2018 at 06:03:17PM +0800, Tiwei Bie wrote: >> Hi, >> >> This patch set implements packed ring support in virtio driver. >> >> A performance test between pktgen (pktgen_sample03_burst_single_flow.sh) >> and DPDK vhost (testpmd/rxonly/vhost-PMD) has been done, I saw >> ~30% performance gain in packed ring in this case. >> >> To make this patch set work with below patch set for vhost, >> some hacks are needed to set the _F_NEXT flag in indirect >> descriptors (this should be fixed in vhost): >> >> https://lkml.org/lkml/2018/7/3/33 > > I went over it and I think it's correct spec-wise. > > I have some ideas for enhancements but let's start > with getting this stuff merged first. > > Acked-by: Michael S. Tsirkin <mst at redhat.com>Series applied.
Jason Wang
2018-Nov-30  08:10 UTC
[PATCH net-next v3 01/13] virtio: add packed ring types and macros
On 2018/11/21 ??6:03, Tiwei Bie wrote:> Add types and macros for packed ring. > > Signed-off-by: Tiwei Bie <tiwei.bie at intel.com> > --- > include/uapi/linux/virtio_config.h | 3 +++ > include/uapi/linux/virtio_ring.h | 52 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 55 insertions(+) > > diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h > index 449132c76b1c..1196e1c1d4f6 100644 > --- a/include/uapi/linux/virtio_config.h > +++ b/include/uapi/linux/virtio_config.h > @@ -75,6 +75,9 @@ > */ > #define VIRTIO_F_IOMMU_PLATFORM 33 > > +/* This feature indicates support for the packed virtqueue layout. */ > +#define VIRTIO_F_RING_PACKED 34 > + > /* > * Does the device support Single Root I/O Virtualization? > */ > diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h > index 6d5d5faa989b..2414f8af26b3 100644 > --- a/include/uapi/linux/virtio_ring.h > +++ b/include/uapi/linux/virtio_ring.h > @@ -44,6 +44,13 @@ > /* This means the buffer contains a list of buffer descriptors. */ > #define VRING_DESC_F_INDIRECT 4 > > +/* > + * Mark a descriptor as available or used in packed ring. > + * Notice: they are defined as shifts instead of shifted values.This looks inconsistent to previous flags, any reason for using shifts?> + */ > +#define VRING_PACKED_DESC_F_AVAIL 7 > +#define VRING_PACKED_DESC_F_USED 15 > + > /* The Host uses this in used->flags to advise the Guest: don't kick me when > * you add a buffer. It's unreliable, so it's simply an optimization. Guest > * will still kick if it's out of buffers. */ > @@ -53,6 +60,23 @@ > * optimization. */ > #define VRING_AVAIL_F_NO_INTERRUPT 1 > > +/* Enable events in packed ring. */ > +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 > +/* Disable events in packed ring. */ > +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 > +/* > + * Enable events for a specific descriptor in packed ring. > + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). > + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. > + */ > +#define VRING_PACKED_EVENT_FLAG_DESC 0x2Any reason for using _FLAG_ instead of _F_? Thanks> + > +/* > + * Wrap counter bit shift in event suppression structure > + * of packed ring. > + */ > +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 > + > /* We support indirect buffer descriptors */ > #define VIRTIO_RING_F_INDIRECT_DESC 28 > > @@ -171,4 +195,32 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) > return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); > } > > +struct vring_packed_desc_event { > + /* Descriptor Ring Change Event Offset/Wrap Counter. */ > + __le16 off_wrap; > + /* Descriptor Ring Change Event Flags. */ > + __le16 flags; > +}; > + > +struct vring_packed_desc { > + /* Buffer Address. */ > + __le64 addr; > + /* Buffer Length. */ > + __le32 len; > + /* Buffer ID. */ > + __le16 id; > + /* The flags depending on descriptor type. */ > + __le16 flags; > +}; > + > +struct vring_packed { > + unsigned int num; > + > + struct vring_packed_desc *desc; > + > + struct vring_packed_desc_event *driver; > + > + struct vring_packed_desc_event *device; > +}; > + > #endif /* _UAPI_LINUX_VIRTIO_RING_H */