Jason Wang
2023-Apr-13  06:40 UTC
[PATCH net-next V2 0/2] virtio-net: don't busy poll for cvq command
Hi all: The code used to busy poll for cvq command which turns out to have several side effects: 1) infinite poll for buggy devices 2) bad interaction with scheduler So this series tries to use sleep instead of busy polling. In this version, I take a step back: the hardening part is not implemented and leave for future investigation. We use to aggree to use interruptible sleep but it doesn't work for a general workqueue. Please review. Thanks Changes since V1: - use RTNL to synchronize rx mode worker - use completion for simplicity - don't try to harden CVQ command Changes since RFC: - switch to use BAD_RING in virtio_break_device() - check virtqueue_is_broken() after being woken up - use more_used() instead of virtqueue_get_buf() to allow caller to get buffers afterwards - break the virtio-net device when timeout - get buffer manually since the virtio core check more_used() instead Jason Wang (2): virtio-net: convert rx mode setting to use workqueue virtio-net: sleep instead of busy waiting for cvq command drivers/net/virtio_net.c | 76 ++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 10 deletions(-) -- 2.25.1
Jason Wang
2023-Apr-13  06:40 UTC
[PATCH net-next V2 1/2] virtio-net: convert rx mode setting to use workqueue
This patch convert rx mode setting to be done in a workqueue, this is
a must for allow to sleep when waiting for the cvq command to
response since current code is executed under addr spin lock.
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
Changes since V1:
- use RTNL to synchronize rx mode worker
---
 drivers/net/virtio_net.c | 55 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e2560b6f7980..2e56bbf86894 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -265,6 +265,12 @@ struct virtnet_info {
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
+	/* Work struct for config rx mode */
+	struct work_struct rx_mode_work;
+
+	/* Is rx mode work enabled? */
+	bool rx_mode_work_enabled;
+
 	/* Does the affinity hint is set for virtqueues? */
 	bool affinity_hint_set;
 
@@ -388,6 +394,20 @@ static void disable_delayed_refill(struct virtnet_info *vi)
 	spin_unlock_bh(&vi->refill_lock);
 }
 
+static void enable_rx_mode_work(struct virtnet_info *vi)
+{
+	rtnl_lock();
+	vi->rx_mode_work_enabled = true;
+	rtnl_unlock();
+}
+
+static void disable_rx_mode_work(struct virtnet_info *vi)
+{
+	rtnl_lock();
+	vi->rx_mode_work_enabled = false;
+	rtnl_unlock();
+}
+
 static void virtqueue_napi_schedule(struct napi_struct *napi,
 				    struct virtqueue *vq)
 {
@@ -2310,9 +2330,11 @@ static int virtnet_close(struct net_device *dev)
 	return 0;
 }
 
-static void virtnet_set_rx_mode(struct net_device *dev)
+static void virtnet_rx_mode_work(struct work_struct *work)
 {
-	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_info *vi +		container_of(work, struct virtnet_info,
rx_mode_work);
+	struct net_device *dev = vi->dev;
 	struct scatterlist sg[2];
 	struct virtio_net_ctrl_mac *mac_data;
 	struct netdev_hw_addr *ha;
@@ -2325,6 +2347,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
+	rtnl_lock();
+
 	vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
 	vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
 
@@ -2342,14 +2366,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 			 vi->ctrl->allmulti ? "en" : "dis");
 
+	netif_addr_lock_bh(dev);
+
 	uc_count = netdev_uc_count(dev);
 	mc_count = netdev_mc_count(dev);
 	/* MAC filter - use one buffer for both lists */
 	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
 		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 	mac_data = buf;
-	if (!buf)
+	if (!buf) {
+		netif_addr_unlock_bh(dev);
+		rtnl_unlock();
 		return;
+	}
 
 	sg_init_table(sg, 2);
 
@@ -2370,6 +2399,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	netdev_for_each_mc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
+	netif_addr_unlock_bh(dev);
+
 	sg_set_buf(&sg[1], mac_data,
 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
 
@@ -2377,9 +2408,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
 		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
 
+	rtnl_unlock();
+
 	kfree(buf);
 }
 
+static void virtnet_set_rx_mode(struct net_device *dev)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+
+	if (vi->rx_mode_work_enabled)
+		schedule_work(&vi->rx_mode_work);
+}
+
 static int virtnet_vlan_rx_add_vid(struct net_device *dev,
 				   __be16 proto, u16 vid)
 {
@@ -3150,6 +3191,8 @@ static void virtnet_freeze_down(struct virtio_device
*vdev)
 
 	/* Make sure no work handler is accessing the device */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
+	flush_work(&vi->rx_mode_work);
 
 	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
@@ -3172,6 +3215,7 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 	virtio_device_ready(vdev);
 
 	enable_delayed_refill(vi);
+	enable_rx_mode_work(vi);
 
 	if (netif_running(vi->dev)) {
 		err = virtnet_open(vi->dev);
@@ -3969,6 +4013,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	vdev->priv = vi;
 
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+	INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
 	spin_lock_init(&vi->refill_lock);
 
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
@@ -4077,6 +4122,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->has_rss || vi->has_rss_hash_report)
 		virtnet_init_default_rss(vi);
 
+	enable_rx_mode_work(vi);
+
 	/* serialize netdev register + virtio_device_ready() with ndo_open() */
 	rtnl_lock();
 
@@ -4174,6 +4221,8 @@ static void virtnet_remove(struct virtio_device *vdev)
 
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vi->config_work);
+	disable_rx_mode_work(vi);
+	flush_work(&vi->rx_mode_work);
 
 	unregister_netdev(vi->dev);
 
-- 
2.25.1
Jason Wang
2023-Apr-13  06:40 UTC
[PATCH net-next V2 2/2] virtio-net: sleep instead of busy waiting for cvq command
We used to busy waiting on the cvq command this tends to be
problematic since there no way for to schedule another process which
may serve for the control virtqueue. This might be the case when the
control virtqueue is emulated by software. This patch switches to use
completion to allow the CPU to sleep instead of busy waiting for the
cvq command.
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
Changes since V1:
- use completion for simplicity
- don't try to harden the CVQ command which requires more thought
Changes since RFC:
- break the device when timeout
- get buffer manually since the virtio core check more_used() instead
---
 drivers/net/virtio_net.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 2e56bbf86894..d3eb8fd6c9dc 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -19,6 +19,7 @@
 #include <linux/average.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
+#include <linux/completion.h>
 #include <net/route.h>
 #include <net/xdp.h>
 #include <net/net_failover.h>
@@ -295,6 +296,8 @@ struct virtnet_info {
 
 	/* failover when STANDBY feature enabled */
 	struct failover *failover;
+
+	struct completion completion;
 };
 
 struct padded_vnet_hdr {
@@ -1709,6 +1712,13 @@ static bool try_fill_recv(struct virtnet_info *vi, struct
receive_queue *rq,
 	return !oom;
 }
 
+static void virtnet_cvq_done(struct virtqueue *cvq)
+{
+	struct virtnet_info *vi = cvq->vdev->priv;
+
+	complete(&vi->completion);
+}
+
 static void skb_recv_done(struct virtqueue *rvq)
 {
 	struct virtnet_info *vi = rvq->vdev->priv;
@@ -2169,12 +2179,8 @@ static bool virtnet_send_command(struct virtnet_info *vi,
u8 class, u8 cmd,
 	if (unlikely(!virtqueue_kick(vi->cvq)))
 		return vi->ctrl->status == VIRTIO_NET_OK;
 
-	/* Spin for a response, the kick causes an ioport write, trapping
-	 * into the hypervisor, so the request should be handled immediately.
-	 */
-	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
-	       !virtqueue_is_broken(vi->cvq))
-		cpu_relax();
+	wait_for_completion(&vi->completion);
+	virtqueue_get_buf(vi->cvq, &tmp);
 
 	return vi->ctrl->status == VIRTIO_NET_OK;
 }
@@ -3672,7 +3678,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 
 	/* Parameters for control virtqueue, if any */
 	if (vi->has_cvq) {
-		callbacks[total_vqs - 1] = NULL;
+		callbacks[total_vqs - 1] = virtnet_cvq_done;
 		names[total_vqs - 1] = "control";
 	}
 
@@ -4122,6 +4128,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (vi->has_rss || vi->has_rss_hash_report)
 		virtnet_init_default_rss(vi);
 
+	init_completion(&vi->completion);
 	enable_rx_mode_work(vi);
 
 	/* serialize netdev register + virtio_device_ready() with ndo_open() */
-- 
2.25.1
Maxime Coquelin
2023-Apr-13  13:02 UTC
[PATCH net-next V2 0/2] virtio-net: don't busy poll for cvq command
Hi Jason, On 4/13/23 08:40, Jason Wang wrote:> Hi all: > > The code used to busy poll for cvq command which turns out to have > several side effects: > > 1) infinite poll for buggy devices > 2) bad interaction with scheduler > > So this series tries to use sleep instead of busy polling. In this > version, I take a step back: the hardening part is not implemented and > leave for future investigation. We use to aggree to use interruptible > sleep but it doesn't work for a general workqueue. > > Please review.Thanks for working on this. My DPDK VDUSE RFC missed to set the interrupt, as Xuan Zhou highlighted it makes the vdpa dev add/del commands to freeze: [<0>] device_del+0x37/0x3d0 [<0>] device_unregister+0x13/0x60 [<0>] unregister_virtio_device+0x11/0x20 [<0>] device_release_driver_internal+0x193/0x200 [<0>] bus_remove_device+0xbf/0x130 [<0>] device_del+0x174/0x3d0 [<0>] device_unregister+0x13/0x60 [<0>] vdpa_nl_cmd_dev_del_set_doit+0x66/0xe0 [vdpa] [<0>] genl_family_rcv_msg_doit.isra.0+0xb8/0x100 [<0>] genl_rcv_msg+0x151/0x290 [<0>] netlink_rcv_skb+0x54/0x100 [<0>] genl_rcv+0x24/0x40 [<0>] netlink_unicast+0x217/0x340 [<0>] netlink_sendmsg+0x23e/0x4a0 [<0>] sock_sendmsg+0x8f/0xa0 [<0>] __sys_sendto+0xfc/0x170 [<0>] __x64_sys_sendto+0x20/0x30 [<0>] do_syscall_64+0x59/0x90 [<0>] entry_SYSCALL_64_after_hwframe+0x72/0xdc Once fixed on DPDK side (you can use my vduse_v1 branch [0] for testing), it works fine: Tested-by: Maxime Coquelin <maxime.coquelin at redhat.com> For the potential missing interrupt with non-compliant devices, I guess it could be handled with the hardening work as same thing could happen if the VDUSE application crashed for example. Regards, Maxime [0]:> Thanks > > Changes since V1: > - use RTNL to synchronize rx mode worker > - use completion for simplicity > - don't try to harden CVQ command > > Changes since RFC: > > - switch to use BAD_RING in virtio_break_device() > - check virtqueue_is_broken() after being woken up > - use more_used() instead of virtqueue_get_buf() to allow caller to > get buffers afterwards > - break the virtio-net device when timeout > - get buffer manually since the virtio core check more_used() instead > > Jason Wang (2): > virtio-net: convert rx mode setting to use workqueue > virtio-net: sleep instead of busy waiting for cvq command > > drivers/net/virtio_net.c | 76 ++++++++++++++++++++++++++++++++++------ > 1 file changed, 66 insertions(+), 10 deletions(-) >