Jason Wang
2014-Jul-16  06:21 UTC
[PATCH net-next V2 0/3] rx busy polling support for virtio-net
Hi all: This series introduces the support for rx busy polling support. This was useful for reduing the latency for a kvm guest. Patch 1-2 introduces helpers which is used for rx busy polling. Patch 3 implement the main function. Test was done between a kvm guest and an external host. Two hosts were connected through 40gb mlx4 cards. With both busy_poll and busy_read are set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement: transaction rate was increased from 9151.94 to 19787.37. Changes from V1: - split the patch info smaller ones - add more details about test setup/configuration Please review. Jason Wang (3): virtio-net: introduce helpers to enable and disable all NAPIs virtio-net: introduce virtnet_receive() virtio-net: rx busy polling support drivers/net/virtio_net.c | 234 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 221 insertions(+), 13 deletions(-) -- 1.8.3.1
Jason Wang
2014-Jul-16  06:21 UTC
[PATCH net-next V2 1/3] virtio-net: introduce helpers to enable and disable all NAPIs
This patch introduces helpers to disable and enable NAPI for all rx
queues. This will be used by rx busy polling support.
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: Michael S. Tsirkin <mst at redhat.com>
Cc: Vlad Yasevich <vyasevic at redhat.com>
Cc: Eric Dumazet <eric.dumazet at gmail.com>
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/net/virtio_net.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7d9f84a..e1e1691 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -760,6 +760,22 @@ again:
 	return received;
 }
 
+static void virtnet_napi_enable_all(struct virtnet_info *vi)
+{
+	int i;
+
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		virtnet_napi_enable(&vi->rq[i]);
+}
+
+static void virtnet_napi_disable_all(struct virtnet_info *vi)
+{
+	int i;
+
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		napi_disable(&vi->rq[i].napi);
+}
+
 static int virtnet_open(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
@@ -770,9 +786,10 @@ static int virtnet_open(struct net_device *dev)
 			/* Make sure we have some buffers: if oom use wq. */
 			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
 				schedule_delayed_work(&vi->refill, 0);
-		virtnet_napi_enable(&vi->rq[i]);
 	}
 
+	virtnet_napi_enable_all(vi);
+
 	return 0;
 }
 
@@ -1076,13 +1093,11 @@ static int virtnet_set_queues(struct virtnet_info *vi,
u16 queue_pairs)
 static int virtnet_close(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	int i;
 
 	/* Make sure refill_work doesn't re-enable napi! */
 	cancel_delayed_work_sync(&vi->refill);
 
-	for (i = 0; i < vi->max_queue_pairs; i++)
-		napi_disable(&vi->rq[i].napi);
+	virtnet_napi_disable_all(vi);
 
 	return 0;
 }
@@ -1853,11 +1868,11 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	netif_device_detach(vi->dev);
 	cancel_delayed_work_sync(&vi->refill);
 
-	if (netif_running(vi->dev))
-		for (i = 0; i < vi->max_queue_pairs; i++) {
-			napi_disable(&vi->rq[i].napi);
+	if (netif_running(vi->dev)) {
+		virtnet_napi_disable_all(vi);
+		for (i = 0; i < vi->max_queue_pairs; i++)
 			netif_napi_del(&vi->rq[i].napi);
-		}
+	}
 
 	remove_vq_common(vi);
 
@@ -1880,8 +1895,7 @@ static int virtnet_restore(struct virtio_device *vdev)
 			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
 				schedule_delayed_work(&vi->refill, 0);
 
-		for (i = 0; i < vi->max_queue_pairs; i++)
-			virtnet_napi_enable(&vi->rq[i]);
+		virtnet_napi_enable_all(vi);
 	}
 
 	netif_device_attach(vi->dev);
-- 
1.8.3.1
Jason Wang
2014-Jul-16  06:21 UTC
[PATCH net-next V2 2/3] virtio-net: introduce virtnet_receive()
Move common receive logic to a new helper virtnet_receive(). It will
also be used by rx busy polling method.
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: Michael S. Tsirkin <mst at redhat.com>
Cc: Vlad Yasevich <vyasevic at redhat.com>
Cc: Eric Dumazet <eric.dumazet at gmail.com>
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/net/virtio_net.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e1e1691..e417d93 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -725,15 +725,12 @@ static void refill_work(struct work_struct *work)
 	}
 }
 
-static int virtnet_poll(struct napi_struct *napi, int budget)
+static int virtnet_receive(struct receive_queue *rq, int budget)
 {
-	struct receive_queue *rq -		container_of(napi, struct receive_queue, napi);
 	struct virtnet_info *vi = rq->vq->vdev->priv;
+	unsigned int len, received = 0;
 	void *buf;
-	unsigned int r, len, received = 0;
 
-again:
 	while (received < budget &&
 	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
 		receive_buf(rq, buf, len);
@@ -745,6 +742,18 @@ again:
 			schedule_delayed_work(&vi->refill, 0);
 	}
 
+	return received;
+}
+
+static int virtnet_poll(struct napi_struct *napi, int budget)
+{
+	struct receive_queue *rq +		container_of(napi, struct receive_queue, napi);
+	unsigned int r, received = 0;
+
+again:
+	received += virtnet_receive(rq, budget);
+
 	/* Out of packets? */
 	if (received < budget) {
 		r = virtqueue_enable_cb_prepare(rq->vq);
@@ -753,6 +762,7 @@ again:
 		    napi_schedule_prep(napi)) {
 			virtqueue_disable_cb(rq->vq);
 			__napi_schedule(napi);
+			budget -= received;
 			goto again;
 		}
 	}
-- 
1.8.3.1
Jason Wang
2014-Jul-16  06:21 UTC
[PATCH net-next V2 3/3] virtio-net: rx busy polling support
Add basic support for rx busy polling.
Test was done between a kvm guest and an external host. Two hosts were
connected through 40gb mlx4 cards. With both busy_poll and busy_read
are set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement:
transaction rate was increased from 9151.94 to 19787.37.
Cc: Rusty Russell <rusty at rustcorp.com.au>
Cc: Michael S. Tsirkin <mst at redhat.com>
Cc: Vlad Yasevich <vyasevic at redhat.com>
Cc: Eric Dumazet <eric.dumazet at gmail.com>
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
 drivers/net/virtio_net.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 187 insertions(+), 3 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e417d93..4830713 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/average.h>
+#include <net/busy_poll.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -94,8 +95,143 @@ struct receive_queue {
 
 	/* Name of this receive queue: input.$index */
 	char name[40];
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	unsigned int state;
+#define VIRTNET_RQ_STATE_IDLE        0
+#define VIRTNET_RQ_STATE_NAPI	     1    /* NAPI or refill owns this RQ */
+#define VIRTNET_RQ_STATE_POLL	     2    /* poll owns this RQ */
+#define VIRTNET_RQ_STATE_DISABLED    4    /* RQ is disabled */
+#define VIRTNET_RQ_OWNED (VIRTNET_RQ_STATE_NAPI | VIRTNET_RQ_STATE_POLL)
+#define VIRTNET_RQ_LOCKED (VIRTNET_RQ_OWNED | VIRTNET_RQ_STATE_DISABLED)
+#define VIRTNET_RQ_STATE_NAPI_YIELD  8    /* NAPI or refill yielded this RQ */
+#define VIRTNET_RQ_STATE_POLL_YIELD  16   /* poll yielded this RQ */
+	spinlock_t lock;
+#endif  /* CONFIG_NET_RX_BUSY_POLL */
 };
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static inline void virtnet_rq_init_lock(struct receive_queue *rq)
+{
+
+	spin_lock_init(&rq->lock);
+	rq->state = VIRTNET_RQ_STATE_IDLE;
+}
+
+/* called from the device poll routine or refill routine to get ownership of a
+ * receive queue.
+ */
+static inline bool virtnet_rq_lock_napi_refill(struct receive_queue *rq)
+{
+	int rc = true;
+
+	spin_lock(&rq->lock);
+	if (rq->state & VIRTNET_RQ_LOCKED) {
+		WARN_ON(rq->state & VIRTNET_RQ_STATE_NAPI);
+		rq->state |= VIRTNET_RQ_STATE_NAPI_YIELD;
+		rc = false;
+	} else
+		/* we don't care if someone yielded */
+		rq->state = VIRTNET_RQ_STATE_NAPI;
+	spin_unlock(&rq->lock);
+	return rc;
+}
+
+/* returns true is someone tried to get the rq while napi or refill had it */
+static inline bool virtnet_rq_unlock_napi_refill(struct receive_queue *rq)
+{
+	int rc = false;
+
+	spin_lock(&rq->lock);
+	WARN_ON(rq->state & (VIRTNET_RQ_STATE_POLL |
+			     VIRTNET_RQ_STATE_NAPI_YIELD));
+
+	if (rq->state & VIRTNET_RQ_STATE_POLL_YIELD)
+		rc = true;
+	/* will reset state to idle, unless RQ is disabled */
+	rq->state &= VIRTNET_RQ_STATE_DISABLED;
+	spin_unlock(&rq->lock);
+	return rc;
+}
+
+/* called from virtnet_low_latency_recv() */
+static inline bool virtnet_rq_lock_poll(struct receive_queue *rq)
+{
+	int rc = true;
+
+	spin_lock_bh(&rq->lock);
+	if ((rq->state & VIRTNET_RQ_LOCKED)) {
+		rq->state |= VIRTNET_RQ_STATE_POLL_YIELD;
+		rc = false;
+	} else
+		/* preserve yield marks */
+		rq->state |= VIRTNET_RQ_STATE_POLL;
+	spin_unlock_bh(&rq->lock);
+	return rc;
+}
+
+/* returns true if someone tried to get the receive queue while it was locked
*/
+static inline bool virtnet_rq_unlock_poll(struct receive_queue *rq)
+{
+	int rc = false;
+
+	spin_lock_bh(&rq->lock);
+	WARN_ON(rq->state & (VIRTNET_RQ_STATE_NAPI));
+
+	if (rq->state & VIRTNET_RQ_STATE_POLL_YIELD)
+		rc = true;
+	/* will reset state to idle, unless RQ is disabled */
+	rq->state &= VIRTNET_RQ_STATE_DISABLED;
+	spin_unlock_bh(&rq->lock);
+	return rc;
+}
+
+/* return false if RQ is currently owned */
+static inline bool virtnet_rq_disable(struct receive_queue *rq)
+{
+	int rc = true;
+
+	spin_lock_bh(&rq->lock);
+	if (rq->state & VIRTNET_RQ_OWNED)
+		rc = false;
+	rq->state |= VIRTNET_RQ_STATE_DISABLED;
+	spin_unlock_bh(&rq->lock);
+
+	return rc;
+}
+
+#else /* CONFIG_NET_RX_BUSY_POLL */
+static inline void virtnet_rq_init_lock(struct receive_queue *rq)
+{
+}
+
+static inline bool virtnet_rq_lock_napi_refill(struct receive_queue *rq)
+{
+	return true;
+}
+
+static inline bool virtnet_rq_unlock_napi_refill(struct receive_queue *rq)
+{
+	return false;
+}
+
+static inline bool virtnet_rq_lock_poll(struct receive_queue *rq)
+{
+	return false;
+}
+
+static inline bool virtnet_rq_unlock_poll(struct receive_queue *rq)
+{
+	return false;
+}
+
+static inline bool virtnet_rq_disable(struct receive_queue *rq)
+{
+	return true;
+}
+
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
 struct virtnet_info {
 	struct virtio_device *vdev;
 	struct virtqueue *cvq;
@@ -521,6 +657,8 @@ static void receive_buf(struct receive_queue *rq, void *buf,
unsigned int len)
 		skb_shinfo(skb)->gso_segs = 0;
 	}
 
+	skb_mark_napi_id(skb, &rq->napi);
+
 	netif_receive_skb(skb);
 	return;
 
@@ -714,7 +852,12 @@ static void refill_work(struct work_struct *work)
 		struct receive_queue *rq = &vi->rq[i];
 
 		napi_disable(&rq->napi);
+		if (!virtnet_rq_lock_napi_refill(rq)) {
+			virtnet_napi_enable(rq);
+			continue;
+		}
 		still_empty = !try_fill_recv(rq, GFP_KERNEL);
+		virtnet_rq_unlock_napi_refill(rq);
 		virtnet_napi_enable(rq);
 
 		/* In theory, this can happen: if we don't get any buffers in
@@ -752,8 +895,13 @@ static int virtnet_poll(struct napi_struct *napi, int
budget)
 	unsigned int r, received = 0;
 
 again:
+	if (!virtnet_rq_lock_napi_refill(rq))
+		return budget;
+
 	received += virtnet_receive(rq, budget);
 
+	virtnet_rq_unlock_napi_refill(rq);
+
 	/* Out of packets? */
 	if (received < budget) {
 		r = virtqueue_enable_cb_prepare(rq->vq);
@@ -770,20 +918,50 @@ again:
 	return received;
 }
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+/* must be called with local_bh_disable()d */
+static int virtnet_low_latency_recv(struct napi_struct *napi)
+{
+	struct receive_queue *rq +		container_of(napi, struct receive_queue, napi);
+	struct virtnet_info *vi = rq->vq->vdev->priv;
+	int received;
+
+	if (!(vi->status & VIRTIO_NET_S_LINK_UP))
+		return LL_FLUSH_FAILED;
+
+	if (!virtnet_rq_lock_poll(rq))
+		return LL_FLUSH_BUSY;
+
+	received = virtnet_receive(rq, 4);
+
+	virtnet_rq_unlock_poll(rq);
+
+	return received;
+}
+#endif	/* CONFIG_NET_RX_BUSY_POLL */
+
 static void virtnet_napi_enable_all(struct virtnet_info *vi)
 {
 	int i;
 
-	for (i = 0; i < vi->max_queue_pairs; i++)
+	for (i = 0; i < vi->max_queue_pairs; i++) {
+		virtnet_rq_init_lock(&vi->rq[i]);
 		virtnet_napi_enable(&vi->rq[i]);
+	}
 }
 
 static void virtnet_napi_disable_all(struct virtnet_info *vi)
 {
 	int i;
 
-	for (i = 0; i < vi->max_queue_pairs; i++)
+	for (i = 0; i < vi->max_queue_pairs; i++) {
 		napi_disable(&vi->rq[i].napi);
+		while (!virtnet_rq_disable(&vi->rq[i])) {
+			pr_info("RQ %d locked\n", i);
+			usleep_range(1000, 20000);
+		}
+	}
 }
 
 static int virtnet_open(struct net_device *dev)
@@ -1372,6 +1550,9 @@ static const struct net_device_ops virtnet_netdev = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	.ndo_busy_poll		= virtnet_low_latency_recv,
+#endif
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
@@ -1577,6 +1758,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
 		vi->rq[i].pages = NULL;
 		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
 			       napi_weight);
+		napi_hash_add(&vi->rq[i].napi);
 
 		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
 		ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
@@ -1880,8 +2062,10 @@ static int virtnet_freeze(struct virtio_device *vdev)
 
 	if (netif_running(vi->dev)) {
 		virtnet_napi_disable_all(vi);
-		for (i = 0; i < vi->max_queue_pairs; i++)
+		for (i = 0; i < vi->max_queue_pairs; i++) {
+			napi_hash_del(&vi->rq[i].napi);
 			netif_napi_del(&vi->rq[i].napi);
+		}
 	}
 
 	remove_vq_common(vi);
-- 
1.8.3.1
Varka Bhadram
2014-Jul-16  08:38 UTC
[PATCH net-next V2 3/3] virtio-net: rx busy polling support
On 07/16/2014 11:51 AM, Jason Wang wrote:> Add basic support for rx busy polling. > > Test was done between a kvm guest and an external host. Two hosts were > connected through 40gb mlx4 cards. With both busy_poll and busy_read > are set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement: > transaction rate was increased from 9151.94 to 19787.37. > > Cc: Rusty Russell <rusty at rustcorp.com.au> > Cc: Michael S. Tsirkin <mst at redhat.com> > Cc: Vlad Yasevich <vyasevic at redhat.com> > Cc: Eric Dumazet <eric.dumazet at gmail.com> > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > drivers/net/virtio_net.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 187 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index e417d93..4830713 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -27,6 +27,7 @@ > #include <linux/slab.h> > #include <linux/cpu.h> > #include <linux/average.h> > +#include <net/busy_poll.h> > > static int napi_weight = NAPI_POLL_WEIGHT; > module_param(napi_weight, int, 0444); > @@ -94,8 +95,143 @@ struct receive_queue { > > /* Name of this receive queue: input.$index */ > char name[40]; > + > +#ifdef CONFIG_NET_RX_BUSY_POLL > + unsigned int state; > +#define VIRTNET_RQ_STATE_IDLE 0 > +#define VIRTNET_RQ_STATE_NAPI 1 /* NAPI or refill owns this RQ */ > +#define VIRTNET_RQ_STATE_POLL 2 /* poll owns this RQ */ > +#define VIRTNET_RQ_STATE_DISABLED 4 /* RQ is disabled */ > +#define VIRTNET_RQ_OWNED (VIRTNET_RQ_STATE_NAPI | VIRTNET_RQ_STATE_POLL) > +#define VIRTNET_RQ_LOCKED (VIRTNET_RQ_OWNED | VIRTNET_RQ_STATE_DISABLED) > +#define VIRTNET_RQ_STATE_NAPI_YIELD 8 /* NAPI or refill yielded this RQ */ > +#define VIRTNET_RQ_STATE_POLL_YIELD 16 /* poll yielded this RQ */ > + spinlock_t lock; > +#endif /* CONFIG_NET_RX_BUSY_POLL */ > }; > > +#ifdef CONFIG_NET_RX_BUSY_POLL > +static inline void virtnet_rq_init_lock(struct receive_queue *rq) > +{ > + > + spin_lock_init(&rq->lock); > + rq->state = VIRTNET_RQ_STATE_IDLE; > +} > + > +/* called from the device poll routine or refill routine to get ownership of a > + * receive queue. > + */ > +static inline bool virtnet_rq_lock_napi_refill(struct receive_queue *rq) > +{ > + int rc = true; > +bool instead of int...?> + spin_lock(&rq->lock); > + if (rq->state & VIRTNET_RQ_LOCKED) { > + WARN_ON(rq->state & VIRTNET_RQ_STATE_NAPI); > + rq->state |= VIRTNET_RQ_STATE_NAPI_YIELD; > + rc = false; > + } else > + /* we don't care if someone yielded */ > + rq->state = VIRTNET_RQ_STATE_NAPI; > + spin_unlock(&rq->lock);Lock for rq->state ...? If yes: spin_lock(&rq->lock); if (rq->state & VIRTNET_RQ_LOCKED) { rq->state |= VIRTNET_RQ_STATE_NAPI_YIELD; spin_unlock(&rq->lock); WARN_ON(rq->state & VIRTNET_RQ_STATE_NAPI); rc = false; } else { /* we don't care if someone yielded */ rq->state = VIRTNET_RQ_STATE_NAPI; spin_unlock(&rq->lock); }> + return rc; > +} > + > +/* returns true is someone tried to get the rq while napi or refill had it */ > +static inline bool virtnet_rq_unlock_napi_refill(struct receive_queue *rq) > +{ > + int rc = false; > + > + spin_lock(&rq->lock); > + WARN_ON(rq->state & (VIRTNET_RQ_STATE_POLL | > + VIRTNET_RQ_STATE_NAPI_YIELD)); > + > + if (rq->state & VIRTNET_RQ_STATE_POLL_YIELD) > + rc = true; > + /* will reset state to idle, unless RQ is disabled */ > + rq->state &= VIRTNET_RQ_STATE_DISABLED; > + spin_unlock(&rq->lock); > + return rc; > +} > + > +/* called from virtnet_low_latency_recv() */ > +static inline bool virtnet_rq_lock_poll(struct receive_queue *rq) > +{ > + int rc = true; > + > + spin_lock_bh(&rq->lock); > + if ((rq->state & VIRTNET_RQ_LOCKED)) { > + rq->state |= VIRTNET_RQ_STATE_POLL_YIELD; > + rc = false; > + } else > + /* preserve yield marks */ > + rq->state |= VIRTNET_RQ_STATE_POLL; > + spin_unlock_bh(&rq->lock); > + return rc; > +} > + > +/* returns true if someone tried to get the receive queue while it was locked */ > +static inline bool virtnet_rq_unlock_poll(struct receive_queue *rq) > +{ > + int rc = false; > + > + spin_lock_bh(&rq->lock); > + WARN_ON(rq->state & (VIRTNET_RQ_STATE_NAPI)); > + > + if (rq->state & VIRTNET_RQ_STATE_POLL_YIELD) > + rc = true; > + /* will reset state to idle, unless RQ is disabled */ > + rq->state &= VIRTNET_RQ_STATE_DISABLED; > + spin_unlock_bh(&rq->lock); > + return rc; > +} > + > +/* return false if RQ is currently owned */ > +static inline bool virtnet_rq_disable(struct receive_queue *rq) > +{ > + int rc = true; > + > + spin_lock_bh(&rq->lock); > + if (rq->state & VIRTNET_RQ_OWNED) > + rc = false; > + rq->state |= VIRTNET_RQ_STATE_DISABLED; > + spin_unlock_bh(&rq->lock); > + > + return rc; > +} > + > +#else /* CONFIG_NET_RX_BUSY_POLL */ > +static inline void virtnet_rq_init_lock(struct receive_queue *rq) > +{ > +} > + > +static inline bool virtnet_rq_lock_napi_refill(struct receive_queue *rq) > +{ > + return true; > +} > + > +static inline bool virtnet_rq_unlock_napi_refill(struct receive_queue *rq) > +{ > + return false; > +} > + > +static inline bool virtnet_rq_lock_poll(struct receive_queue *rq) > +{ > + return false; > +} > + > +static inline bool virtnet_rq_unlock_poll(struct receive_queue *rq) > +{ > + return false; > +} > + > +static inline bool virtnet_rq_disable(struct receive_queue *rq) > +{ > + return true; > +} > + > +#endif /* CONFIG_NET_RX_BUSY_POLL */ > + > struct virtnet_info { > struct virtio_device *vdev; > struct virtqueue *cvq; > @@ -521,6 +657,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) > skb_shinfo(skb)->gso_segs = 0; > } > > + skb_mark_napi_id(skb, &rq->napi); > + > netif_receive_skb(skb); > return; > > @@ -714,7 +852,12 @@ static void refill_work(struct work_struct *work) > struct receive_queue *rq = &vi->rq[i]; > > napi_disable(&rq->napi); > + if (!virtnet_rq_lock_napi_refill(rq)) { > + virtnet_napi_enable(rq); > + continue; > + } > still_empty = !try_fill_recv(rq, GFP_KERNEL); > + virtnet_rq_unlock_napi_refill(rq); > virtnet_napi_enable(rq); > > /* In theory, this can happen: if we don't get any buffers in > @@ -752,8 +895,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget) > unsigned int r, received = 0; > > again: > + if (!virtnet_rq_lock_napi_refill(rq)) > + return budget; > + > received += virtnet_receive(rq, budget); > > + virtnet_rq_unlock_napi_refill(rq); > + > /* Out of packets? */ > if (received < budget) { > r = virtqueue_enable_cb_prepare(rq->vq); > @@ -770,20 +918,50 @@ again: > return received; > } > > +#ifdef CONFIG_NET_RX_BUSY_POLL > +/* must be called with local_bh_disable()d */ > +static int virtnet_low_latency_recv(struct napi_struct *napi) > +{ > + struct receive_queue *rq > + container_of(napi, struct receive_queue, napi); > + struct virtnet_info *vi = rq->vq->vdev->priv; > + int received; > + > + if (!(vi->status & VIRTIO_NET_S_LINK_UP)) > + return LL_FLUSH_FAILED; > + > + if (!virtnet_rq_lock_poll(rq)) > + return LL_FLUSH_BUSY; > + > + received = virtnet_receive(rq, 4); > + > + virtnet_rq_unlock_poll(rq); > + > + return received; > +} > +#endif /* CONFIG_NET_RX_BUSY_POLL */ > + > static void virtnet_napi_enable_all(struct virtnet_info *vi) > { > int i; > > - for (i = 0; i < vi->max_queue_pairs; i++) > + for (i = 0; i < vi->max_queue_pairs; i++) { > + virtnet_rq_init_lock(&vi->rq[i]); > virtnet_napi_enable(&vi->rq[i]); > + } > } > > static void virtnet_napi_disable_all(struct virtnet_info *vi) > { > int i; > > - for (i = 0; i < vi->max_queue_pairs; i++) > + for (i = 0; i < vi->max_queue_pairs; i++) { > napi_disable(&vi->rq[i].napi); > + while (!virtnet_rq_disable(&vi->rq[i])) { > + pr_info("RQ %d locked\n", i); > + usleep_range(1000, 20000); > + } > + } > } > > static int virtnet_open(struct net_device *dev) > @@ -1372,6 +1550,9 @@ static const struct net_device_ops virtnet_netdev = { > #ifdef CONFIG_NET_POLL_CONTROLLER > .ndo_poll_controller = virtnet_netpoll, > #endif > +#ifdef CONFIG_NET_RX_BUSY_POLL > + .ndo_busy_poll = virtnet_low_latency_recv, > +#endif > }; > > static void virtnet_config_changed_work(struct work_struct *work) > @@ -1577,6 +1758,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) > vi->rq[i].pages = NULL; > netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, > napi_weight); > + napi_hash_add(&vi->rq[i].napi); > > sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); > ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT); > @@ -1880,8 +2062,10 @@ static int virtnet_freeze(struct virtio_device *vdev) > > if (netif_running(vi->dev)) { > virtnet_napi_disable_all(vi); > - for (i = 0; i < vi->max_queue_pairs; i++) > + for (i = 0; i < vi->max_queue_pairs; i++) { > + napi_hash_del(&vi->rq[i].napi); > netif_napi_del(&vi->rq[i].napi); > + } > } > > remove_vq_common(vi);-- Regards, Varka Bhadram.
David Miller
2014-Jul-17  06:21 UTC
[PATCH net-next V2 0/3] rx busy polling support for virtio-net
From: Jason Wang <jasowang at redhat.com> Date: Wed, 16 Jul 2014 14:21:44 +0800> Hi all: > > This series introduces the support for rx busy polling support. This > was useful for reduing the latency for a kvm guest. Patch 1-2 > introduces helpers which is used for rx busy polling. Patch 3 > implement the main function. > > Test was done between a kvm guest and an external host. Two hosts were > connected through 40gb mlx4 cards. With both busy_poll and busy_read are > set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement: > transaction rate was increased from 9151.94 to 19787.37. > > Changes from V1: > - split the patch info smaller ones > - add more details about test setup/configuration > > Please review.Looks like you can make some minor adjustments based upon review, and anyways I'd like to wait for Michael to get back and review a change of this nature. Thanks.
Jason Wang
2014-Jul-17  06:59 UTC
[PATCH net-next V2 0/3] rx busy polling support for virtio-net
On 07/17/2014 02:21 PM, David Miller wrote:> From: Jason Wang <jasowang at redhat.com> > Date: Wed, 16 Jul 2014 14:21:44 +0800 > >> Hi all: >> >> This series introduces the support for rx busy polling support. This >> was useful for reduing the latency for a kvm guest. Patch 1-2 >> introduces helpers which is used for rx busy polling. Patch 3 >> implement the main function. >> >> Test was done between a kvm guest and an external host. Two hosts were >> connected through 40gb mlx4 cards. With both busy_poll and busy_read are >> set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement: >> transaction rate was increased from 9151.94 to 19787.37. >> >> Changes from V1: >> - split the patch info smaller ones >> - add more details about test setup/configuration >> >> Please review. > Looks like you can make some minor adjustments based upon review, and > anyways I'd like to wait for Michael to get back and review a change > of this nature. > > Thanks.Ok, I will wait for Michael's comments and then post V3. Thanks
Michael S. Tsirkin
2014-Jul-20  20:31 UTC
[PATCH net-next V2 3/3] virtio-net: rx busy polling support
On Wed, Jul 16, 2014 at 02:21:47PM +0800, Jason Wang wrote:> Add basic support for rx busy polling. > > Test was done between a kvm guest and an external host. Two hosts were > connected through 40gb mlx4 cards. With both busy_poll and busy_read > are set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement: > transaction rate was increased from 9151.94 to 19787.37.Pls include data about non polling tests: any effect on cpu utilization there? There could be as we are adding locking.> > Cc: Rusty Russell <rusty at rustcorp.com.au> > Cc: Michael S. Tsirkin <mst at redhat.com> > Cc: Vlad Yasevich <vyasevic at redhat.com> > Cc: Eric Dumazet <eric.dumazet at gmail.com> > Signed-off-by: Jason Wang <jasowang at redhat.com> > --- > drivers/net/virtio_net.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 187 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index e417d93..4830713 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -27,6 +27,7 @@ > #include <linux/slab.h> > #include <linux/cpu.h> > #include <linux/average.h> > +#include <net/busy_poll.h> > > static int napi_weight = NAPI_POLL_WEIGHT; > module_param(napi_weight, int, 0444); > @@ -94,8 +95,143 @@ struct receive_queue { > > /* Name of this receive queue: input.$index */ > char name[40]; > + > +#ifdef CONFIG_NET_RX_BUSY_POLL > + unsigned int state; > +#define VIRTNET_RQ_STATE_IDLE 0 > +#define VIRTNET_RQ_STATE_NAPI 1 /* NAPI or refill owns this RQ */ > +#define VIRTNET_RQ_STATE_POLL 2 /* poll owns this RQ */ > +#define VIRTNET_RQ_STATE_DISABLED 4 /* RQ is disabled */ > +#define VIRTNET_RQ_OWNED (VIRTNET_RQ_STATE_NAPI | VIRTNET_RQ_STATE_POLL) > +#define VIRTNET_RQ_LOCKED (VIRTNET_RQ_OWNED | VIRTNET_RQ_STATE_DISABLED) > +#define VIRTNET_RQ_STATE_NAPI_YIELD 8 /* NAPI or refill yielded this RQ */ > +#define VIRTNET_RQ_STATE_POLL_YIELD 16 /* poll yielded this RQ */ > + spinlock_t lock; > +#endif /* CONFIG_NET_RX_BUSY_POLL */do we have to have a new state? no way to reuse the napi state for this? two lock/unlock operations for a poll seems excessive.> }; > > +#ifdef CONFIG_NET_RX_BUSY_POLL > +static inline void virtnet_rq_init_lock(struct receive_queue *rq) > +{ > + > + spin_lock_init(&rq->lock); > + rq->state = VIRTNET_RQ_STATE_IDLE; > +} > + > +/* called from the device poll routine or refill routine to get ownership of a > + * receive queue. > + */ > +static inline bool virtnet_rq_lock_napi_refill(struct receive_queue *rq) > +{ > + int rc = true; > + > + spin_lock(&rq->lock); > + if (rq->state & VIRTNET_RQ_LOCKED) { > + WARN_ON(rq->state & VIRTNET_RQ_STATE_NAPI); > + rq->state |= VIRTNET_RQ_STATE_NAPI_YIELD; > + rc = false; > + } else > + /* we don't care if someone yielded */ > + rq->state = VIRTNET_RQ_STATE_NAPI; > + spin_unlock(&rq->lock); > + return rc; > +} > + > +/* returns true is someone tried to get the rq while napi or refill had it */s/is/if/> +static inline bool virtnet_rq_unlock_napi_refill(struct receive_queue *rq) > +{ > + int rc = false; > + > + spin_lock(&rq->lock); > + WARN_ON(rq->state & (VIRTNET_RQ_STATE_POLL | > + VIRTNET_RQ_STATE_NAPI_YIELD)); > + > + if (rq->state & VIRTNET_RQ_STATE_POLL_YIELD) > + rc = true; > + /* will reset state to idle, unless RQ is disabled */ > + rq->state &= VIRTNET_RQ_STATE_DISABLED; > + spin_unlock(&rq->lock); > + return rc; > +} > + > +/* called from virtnet_low_latency_recv() */ > +static inline bool virtnet_rq_lock_poll(struct receive_queue *rq) > +{ > + int rc = true; > + > + spin_lock_bh(&rq->lock); > + if ((rq->state & VIRTNET_RQ_LOCKED)) { > + rq->state |= VIRTNET_RQ_STATE_POLL_YIELD; > + rc = false; > + } else > + /* preserve yield marks */ > + rq->state |= VIRTNET_RQ_STATE_POLL; > + spin_unlock_bh(&rq->lock); > + return rc; > +} > + > +/* returns true if someone tried to get the receive queue while it was locked */ > +static inline bool virtnet_rq_unlock_poll(struct receive_queue *rq) > +{ > + int rc = false; > + > + spin_lock_bh(&rq->lock); > + WARN_ON(rq->state & (VIRTNET_RQ_STATE_NAPI)); > + > + if (rq->state & VIRTNET_RQ_STATE_POLL_YIELD) > + rc = true; > + /* will reset state to idle, unless RQ is disabled */ > + rq->state &= VIRTNET_RQ_STATE_DISABLED; > + spin_unlock_bh(&rq->lock); > + return rc; > +} > + > +/* return false if RQ is currently owned */ > +static inline bool virtnet_rq_disable(struct receive_queue *rq) > +{ > + int rc = true; > + > + spin_lock_bh(&rq->lock); > + if (rq->state & VIRTNET_RQ_OWNED) > + rc = false; > + rq->state |= VIRTNET_RQ_STATE_DISABLED; > + spin_unlock_bh(&rq->lock); > + > + return rc; > +} > + > +#else /* CONFIG_NET_RX_BUSY_POLL */ > +static inline void virtnet_rq_init_lock(struct receive_queue *rq) > +{ > +} > + > +static inline bool virtnet_rq_lock_napi_refill(struct receive_queue *rq) > +{ > + return true; > +} > + > +static inline bool virtnet_rq_unlock_napi_refill(struct receive_queue *rq) > +{ > + return false; > +} > + > +static inline bool virtnet_rq_lock_poll(struct receive_queue *rq) > +{ > + return false; > +} > + > +static inline bool virtnet_rq_unlock_poll(struct receive_queue *rq) > +{ > + return false; > +} > + > +static inline bool virtnet_rq_disable(struct receive_queue *rq) > +{ > + return true; > +} > + > +#endif /* CONFIG_NET_RX_BUSY_POLL */ > + > struct virtnet_info { > struct virtio_device *vdev; > struct virtqueue *cvq; > @@ -521,6 +657,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) > skb_shinfo(skb)->gso_segs = 0; > } > > + skb_mark_napi_id(skb, &rq->napi); > + > netif_receive_skb(skb); > return; > > @@ -714,7 +852,12 @@ static void refill_work(struct work_struct *work) > struct receive_queue *rq = &vi->rq[i]; > > napi_disable(&rq->napi); > + if (!virtnet_rq_lock_napi_refill(rq)) { > + virtnet_napi_enable(rq); > + continue; > + } > still_empty = !try_fill_recv(rq, GFP_KERNEL); > + virtnet_rq_unlock_napi_refill(rq); > virtnet_napi_enable(rq); > > /* In theory, this can happen: if we don't get any buffers in > @@ -752,8 +895,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget) > unsigned int r, received = 0; > > again: > + if (!virtnet_rq_lock_napi_refill(rq)) > + return budget; > + > received += virtnet_receive(rq, budget); > > + virtnet_rq_unlock_napi_refill(rq); > + > /* Out of packets? */ > if (received < budget) { > r = virtqueue_enable_cb_prepare(rq->vq); > @@ -770,20 +918,50 @@ again: > return received; > } > > +#ifdef CONFIG_NET_RX_BUSY_POLL > +/* must be called with local_bh_disable()d */ > +static int virtnet_low_latency_recv(struct napi_struct *napi)let's call it busy poll :)> +{ > + struct receive_queue *rq > + container_of(napi, struct receive_queue, napi); > + struct virtnet_info *vi = rq->vq->vdev->priv; > + int received; > + > + if (!(vi->status & VIRTIO_NET_S_LINK_UP)) > + return LL_FLUSH_FAILED; > + > + if (!virtnet_rq_lock_poll(rq)) > + return LL_FLUSH_BUSY; > + > + received = virtnet_receive(rq, 4);Hmm why 4 exactly?> + > + virtnet_rq_unlock_poll(rq); > + > + return received; > +} > +#endif /* CONFIG_NET_RX_BUSY_POLL */ > + > static void virtnet_napi_enable_all(struct virtnet_info *vi) > { > int i; > > - for (i = 0; i < vi->max_queue_pairs; i++) > + for (i = 0; i < vi->max_queue_pairs; i++) { > + virtnet_rq_init_lock(&vi->rq[i]); > virtnet_napi_enable(&vi->rq[i]); > + } > } > > static void virtnet_napi_disable_all(struct virtnet_info *vi) > { > int i; > > - for (i = 0; i < vi->max_queue_pairs; i++) > + for (i = 0; i < vi->max_queue_pairs; i++) { > napi_disable(&vi->rq[i].napi); > + while (!virtnet_rq_disable(&vi->rq[i])) { > + pr_info("RQ %d locked\n", i); > + usleep_range(1000, 20000);What's going on here, exactly?> + } > + } > } > > static int virtnet_open(struct net_device *dev) > @@ -1372,6 +1550,9 @@ static const struct net_device_ops virtnet_netdev = { > #ifdef CONFIG_NET_POLL_CONTROLLER > .ndo_poll_controller = virtnet_netpoll, > #endif > +#ifdef CONFIG_NET_RX_BUSY_POLL > + .ndo_busy_poll = virtnet_low_latency_recv, > +#endif > }; > > static void virtnet_config_changed_work(struct work_struct *work) > @@ -1577,6 +1758,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) > vi->rq[i].pages = NULL; > netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, > napi_weight); > + napi_hash_add(&vi->rq[i].napi); > > sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); > ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT); > @@ -1880,8 +2062,10 @@ static int virtnet_freeze(struct virtio_device *vdev) > > if (netif_running(vi->dev)) { > virtnet_napi_disable_all(vi); > - for (i = 0; i < vi->max_queue_pairs; i++) > + for (i = 0; i < vi->max_queue_pairs; i++) { > + napi_hash_del(&vi->rq[i].napi); > netif_napi_del(&vi->rq[i].napi); > + } > } > > remove_vq_common(vi); > -- > 1.8.3.1
Michael S. Tsirkin
2014-Jul-20  20:34 UTC
[PATCH net-next V2 0/3] rx busy polling support for virtio-net
On Wed, Jul 16, 2014 at 02:21:44PM +0800, Jason Wang wrote:> Hi all: > > This series introduces the support for rx busy polling support. This > was useful for reduing the latency for a kvm guest. Patch 1-2 > introduces helpers which is used for rx busy polling. Patch 3 > implement the main function. > > Test was done between a kvm guest and an external host. Two hosts were > connected through 40gb mlx4 cards. With both busy_poll and busy_read are > set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement: > transaction rate was increased from 9151.94 to 19787.37. > > Changes from V1: > - split the patch info smaller ones > - add more details about test setup/configuration > > Please review.Generally I think we should let host know we are polling. For example, kick the rq or something. Or maybe add another io address. Something like this would need a new feature flag though, so I'm fine with just polling in guest until that is available.> Jason Wang (3): > virtio-net: introduce helpers to enable and disable all NAPIs > virtio-net: introduce virtnet_receive() > virtio-net: rx busy polling support > > drivers/net/virtio_net.c | 234 ++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 221 insertions(+), 13 deletions(-) > > -- > 1.8.3.1
Jason Wang
2014-Jul-21  03:15 UTC
[PATCH net-next V2 0/3] rx busy polling support for virtio-net
On 07/21/2014 04:34 AM, Michael S. Tsirkin wrote:> On Wed, Jul 16, 2014 at 02:21:44PM +0800, Jason Wang wrote: >> Hi all: >> >> This series introduces the support for rx busy polling support. This >> was useful for reduing the latency for a kvm guest. Patch 1-2 >> introduces helpers which is used for rx busy polling. Patch 3 >> implement the main function. >> >> Test was done between a kvm guest and an external host. Two hosts were >> connected through 40gb mlx4 cards. With both busy_poll and busy_read are >> set to 50 in guest, 1 byte netperf tcp_rr shows 116% improvement: >> transaction rate was increased from 9151.94 to 19787.37. >> >> Changes from V1: >> - split the patch info smaller ones >> - add more details about test setup/configuration >> >> Please review. > Generally I think we should let host know we are polling. > For example, kick the rq or something. Or maybe add another > io address.Yes, I'm also working on busy polling for tun and vhost which may also help here.> Something like this would need a new feature flag though, so I'm fine > with just polling in guest until that is available.Yes.> >> Jason Wang (3): >> virtio-net: introduce helpers to enable and disable all NAPIs >> virtio-net: introduce virtnet_receive() >> virtio-net: rx busy polling support >> >> drivers/net/virtio_net.c | 234 ++++++++++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 221 insertions(+), 13 deletions(-) >> >> -- >> 1.8.3.1 > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo at vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/
Reasonably Related Threads
- [PATCH net-next V2 3/3] virtio-net: rx busy polling support
- [PATCH net-next V2 3/3] virtio-net: rx busy polling support
- [PATCH net-next V2 3/3] virtio-net: rx busy polling support
- [PATCH net-next V2 3/3] virtio-net: rx busy polling support
- [PATCH net-next] virtio-net: rx busy polling support