Hi all: The code used to busy poll for cvq command which turns out to have several side effects: 1) infinite poll for buggy devices 2) bad interaction with scheduler So this series tries to use sleep + timeout instead of busy polling. Please review. Thanks Changes since RFC: - switch to use BAD_RING in virtio_break_device() - check virtqueue_is_broken() after being woken up - use more_used() instead of virtqueue_get_buf() to allow caller to get buffers afterwards - break the virtio-net device when timeout - get buffer manually since the virtio core check more_used() instead Jason Wang (4): virtio-net: convert rx mode setting to use workqueue virtio_ring: switch to use BAD_RING() virtio_ring: introduce a per virtqueue waitqueue virtio-net: sleep instead of busy waiting for cvq command drivers/net/virtio_net.c | 90 +++++++++++++++++++++++++++++++----- drivers/virtio/virtio_ring.c | 37 +++++++++++++-- include/linux/virtio.h | 3 ++ 3 files changed, 115 insertions(+), 15 deletions(-) -- 2.25.1
Jason Wang
2022-Dec-26 07:49 UTC
[PATCH 1/4] virtio-net: convert rx mode setting to use workqueue
This patch convert rx mode setting to be done in a workqueue, this is
a must for allow to sleep when waiting for the cvq command to
response since current code is executed under addr spin lock.
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
drivers/net/virtio_net.c | 66 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 63 insertions(+), 3 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 86e52454b5b5..efd9dd55828b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -260,6 +260,15 @@ struct virtnet_info {
/* Work struct for config space updates */
struct work_struct config_work;
+ /* Work struct for config rx mode */
+ struct work_struct rx_mode_work;
+
+ /* Is rx_mode_work enabled? */
+ bool rx_mode_work_enabled;
+
+ /* The lock to synchronize the access to refill_enabled */
+ spinlock_t rx_mode_lock;
+
/* Does the affinity hint is set for virtqueues? */
bool affinity_hint_set;
@@ -383,6 +392,22 @@ static void disable_delayed_refill(struct virtnet_info *vi)
spin_unlock_bh(&vi->refill_lock);
}
+static void enable_rx_mode_work(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->rx_mode_lock);
+ vi->rx_mode_work_enabled = true;
+ spin_unlock_bh(&vi->rx_mode_lock);
+}
+
+static void disable_rx_mode_work(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->rx_mode_lock);
+ vi->rx_mode_work_enabled = false;
+ spin_unlock_bh(&vi->rx_mode_lock);
+
+ flush_work(&vi->rx_mode_work);
+}
+
static void virtqueue_napi_schedule(struct napi_struct *napi,
struct virtqueue *vq)
{
@@ -1974,6 +1999,8 @@ static bool virtnet_send_command(struct virtnet_info *vi,
u8 class, u8 cmd,
/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
+ ASSERT_RTNL();
+
vi->ctrl->status = ~0;
vi->ctrl->hdr.class = class;
vi->ctrl->hdr.cmd = cmd;
@@ -2160,9 +2187,11 @@ static int virtnet_close(struct net_device *dev)
return 0;
}
-static void virtnet_set_rx_mode(struct net_device *dev)
+static void virtnet_rx_mode_work(struct work_struct *work)
{
- struct virtnet_info *vi = netdev_priv(dev);
+ struct virtnet_info *vi + container_of(work, struct virtnet_info,
rx_mode_work);
+ struct net_device *dev = vi->dev;
struct scatterlist sg[2];
struct virtio_net_ctrl_mac *mac_data;
struct netdev_hw_addr *ha;
@@ -2175,8 +2204,12 @@ static void virtnet_set_rx_mode(struct net_device *dev)
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
return;
+ rtnl_lock();
+
+ netif_addr_lock_bh(dev);
vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+ netif_addr_unlock_bh(dev);
sg_init_one(sg, &vi->ctrl->promisc,
sizeof(vi->ctrl->promisc));
@@ -2192,14 +2225,19 @@ static void virtnet_set_rx_mode(struct net_device *dev)
dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
vi->ctrl->allmulti ? "en" : "dis");
+ netif_addr_lock_bh(dev);
+
uc_count = netdev_uc_count(dev);
mc_count = netdev_mc_count(dev);
/* MAC filter - use one buffer for both lists */
buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
(2 * sizeof(mac_data->entries)), GFP_ATOMIC);
mac_data = buf;
- if (!buf)
+ if (!buf) {
+ netif_addr_unlock_bh(dev);
+ rtnl_unlock();
return;
+ }
sg_init_table(sg, 2);
@@ -2220,6 +2258,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
netdev_for_each_mc_addr(ha, dev)
memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
+ netif_addr_unlock_bh(dev);
+
sg_set_buf(&sg[1], mac_data,
sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
@@ -2227,9 +2267,21 @@ static void virtnet_set_rx_mode(struct net_device *dev)
VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
+ rtnl_unlock();
+
kfree(buf);
}
+static void virtnet_set_rx_mode(struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+
+ spin_lock(&vi->rx_mode_lock);
+ if (vi->rx_mode_work_enabled)
+ schedule_work(&vi->rx_mode_work);
+ spin_unlock(&vi->rx_mode_lock);
+}
+
static int virtnet_vlan_rx_add_vid(struct net_device *dev,
__be16 proto, u16 vid)
{
@@ -3000,6 +3052,7 @@ static void virtnet_freeze_down(struct virtio_device
*vdev)
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
+ disable_rx_mode_work(vi);
netif_tx_lock_bh(vi->dev);
netif_device_detach(vi->dev);
@@ -3022,6 +3075,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
virtio_device_ready(vdev);
enable_delayed_refill(vi);
+ enable_rx_mode_work(vi);
+ virtnet_set_rx_mode(vi->dev);
if (netif_running(vi->dev)) {
err = virtnet_open(vi->dev);
@@ -3799,7 +3854,9 @@ static int virtnet_probe(struct virtio_device *vdev)
vdev->priv = vi;
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+ INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
spin_lock_init(&vi->refill_lock);
+ spin_lock_init(&vi->rx_mode_lock);
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true;
@@ -3905,6 +3962,8 @@ static int virtnet_probe(struct virtio_device *vdev)
if (vi->has_rss || vi->has_rss_hash_report)
virtnet_init_default_rss(vi);
+ enable_rx_mode_work(vi);
+
/* serialize netdev register + virtio_device_ready() with ndo_open() */
rtnl_lock();
@@ -3984,6 +4043,7 @@ static void virtnet_remove(struct virtio_device *vdev)
/* Make sure no work handler is accessing the device. */
flush_work(&vi->config_work);
+ disable_rx_mode_work(vi);
unregister_netdev(vi->dev);
--
2.25.1
Switch to reuse BAD_RING() to allow common logic to be implemented in
BAD_RING().
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
Changes since V1:
- switch to use BAD_RING in virtio_break_device()
---
drivers/virtio/virtio_ring.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 2e7689bb933b..5cfb2fa8abee 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,7 +58,8 @@
do { \
dev_err(&_vq->vq.vdev->dev, \
"%s:"fmt, (_vq)->vq.name, ##args); \
- (_vq)->broken = true; \
+ /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
+ WRITE_ONCE((_vq)->broken, true); \
} while (0)
#define START_USE(vq)
#define END_USE(vq)
@@ -2237,7 +2238,7 @@ bool virtqueue_notify(struct virtqueue *_vq)
/* Prod other side to tell it about changes. */
if (!vq->notify(_vq)) {
- vq->broken = true;
+ BAD_RING(vq, "vq %d is broken\n", vq->vq.index);
return false;
}
return true;
@@ -2786,8 +2787,7 @@ void virtio_break_device(struct virtio_device *dev)
list_for_each_entry(_vq, &dev->vqs, list) {
struct vring_virtqueue *vq = to_vvq(_vq);
- /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
- WRITE_ONCE(vq->broken, true);
+ BAD_RING(vq, "Device break vq %d", _vq->index);
}
spin_unlock(&dev->vqs_list_lock);
}
--
2.25.1
Jason Wang
2022-Dec-26 07:49 UTC
[PATCH 3/4] virtio_ring: introduce a per virtqueue waitqueue
This patch introduces a per virtqueue waitqueue to allow driver to
sleep and wait for more used. Two new helpers are introduced to allow
driver to sleep and wake up.
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
Changes since V1:
- check virtqueue_is_broken() as well
- use more_used() instead of virtqueue_get_buf() to allow caller to
get buffers afterwards
---
drivers/virtio/virtio_ring.c | 29 +++++++++++++++++++++++++++++
include/linux/virtio.h | 3 +++
2 files changed, 32 insertions(+)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5cfb2fa8abee..9c83eb945493 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -13,6 +13,7 @@
#include <linux/dma-mapping.h>
#include <linux/kmsan.h>
#include <linux/spinlock.h>
+#include <linux/wait.h>
#include <xen/xen.h>
#ifdef DEBUG
@@ -60,6 +61,7 @@
"%s:"fmt, (_vq)->vq.name, ##args); \
/* Pairs with READ_ONCE() in virtqueue_is_broken(). */ \
WRITE_ONCE((_vq)->broken, true); \
+ wake_up_interruptible(&(_vq)->wq); \
} while (0)
#define START_USE(vq)
#define END_USE(vq)
@@ -203,6 +205,9 @@ struct vring_virtqueue {
/* DMA, allocation, and size information */
bool we_own_ring;
+ /* Wait for buffer to be used */
+ wait_queue_head_t wq;
+
#ifdef DEBUG
/* They're supposed to lock for us. */
unsigned int in_use;
@@ -2024,6 +2029,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
vq->weak_barriers = false;
+ init_waitqueue_head(&vq->wq);
+
err = vring_alloc_state_extra_packed(&vring_packed);
if (err)
goto err_state_extra;
@@ -2517,6 +2524,8 @@ static struct virtqueue *__vring_new_virtqueue(unsigned
int index,
if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
vq->weak_barriers = false;
+ init_waitqueue_head(&vq->wq);
+
err = vring_alloc_state_extra_split(vring_split);
if (err) {
kfree(vq);
@@ -2654,6 +2663,8 @@ static void vring_free(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
+ wake_up_interruptible(&vq->wq);
+
if (vq->we_own_ring) {
if (vq->packed_ring) {
vring_free_queue(vq->vq.vdev,
@@ -2863,4 +2874,22 @@ const struct vring *virtqueue_get_vring(struct virtqueue
*vq)
}
EXPORT_SYMBOL_GPL(virtqueue_get_vring);
+int virtqueue_wait_for_used(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ /* TODO: Tweak the timeout. */
+ return wait_event_interruptible_timeout(vq->wq,
+ virtqueue_is_broken(_vq) || more_used(vq), HZ);
+}
+EXPORT_SYMBOL_GPL(virtqueue_wait_for_used);
+
+void virtqueue_wake_up(struct virtqueue *_vq)
+{
+ struct vring_virtqueue *vq = to_vvq(_vq);
+
+ wake_up_interruptible(&vq->wq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_wake_up);
+
MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index dcab9c7e8784..2eb62c774895 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -72,6 +72,9 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int
*len);
void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len,
void **ctx);
+int virtqueue_wait_for_used(struct virtqueue *vq);
+void virtqueue_wake_up(struct virtqueue *vq);
+
void virtqueue_disable_cb(struct virtqueue *vq);
bool virtqueue_enable_cb(struct virtqueue *vq);
--
2.25.1
Jason Wang
2022-Dec-26 07:49 UTC
[PATCH 4/4] virtio-net: sleep instead of busy waiting for cvq command
We used to busy waiting on the cvq command this tends to be
problematic since:
1) CPU could wait for ever on a buggy/malicous device
2) There's no wait to terminate the process that triggers the cvq
command
So this patch switch to use virtqueue_wait_for_used() to sleep with a
timeout (1s) instead of busy polling for the cvq command forever. This
gives the scheduler a breath and can let the process can respond to
asignal. If the device doesn't respond in the timeout, break the
device.
Signed-off-by: Jason Wang <jasowang at redhat.com>
---
Changes since V1:
- break the device when timeout
- get buffer manually since the virtio core check more_used() instead
---
drivers/net/virtio_net.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index efd9dd55828b..6a2ea64cfcb5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -405,6 +405,7 @@ static void disable_rx_mode_work(struct virtnet_info *vi)
vi->rx_mode_work_enabled = false;
spin_unlock_bh(&vi->rx_mode_lock);
+ virtqueue_wake_up(vi->cvq);
flush_work(&vi->rx_mode_work);
}
@@ -1497,6 +1498,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct
receive_queue *rq,
return !oom;
}
+static void virtnet_cvq_done(struct virtqueue *cvq)
+{
+ virtqueue_wake_up(cvq);
+}
+
static void skb_recv_done(struct virtqueue *rvq)
{
struct virtnet_info *vi = rvq->vdev->priv;
@@ -1984,6 +1990,8 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
return err;
}
+static int virtnet_close(struct net_device *dev);
+
/*
* Send command via the control virtqueue and check status. Commands
* supported by the hypervisor, as indicated by feature bits, should
@@ -2026,14 +2034,14 @@ static bool virtnet_send_command(struct virtnet_info
*vi, u8 class, u8 cmd,
if (unlikely(!virtqueue_kick(vi->cvq)))
return vi->ctrl->status == VIRTIO_NET_OK;
- /* Spin for a response, the kick causes an ioport write, trapping
- * into the hypervisor, so the request should be handled immediately.
- */
- while (!virtqueue_get_buf(vi->cvq, &tmp) &&
- !virtqueue_is_broken(vi->cvq))
- cpu_relax();
+ if (virtqueue_wait_for_used(vi->cvq)) {
+ virtqueue_get_buf(vi->cvq, &tmp);
+ return vi->ctrl->status == VIRTIO_NET_OK;
+ }
- return vi->ctrl->status == VIRTIO_NET_OK;
+ netdev_err(vi->dev, "CVQ command timeout, break the virtio
device.");
+ virtio_break_device(vi->vdev);
+ return VIRTIO_NET_ERR;
}
static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -3526,7 +3534,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
/* Parameters for control virtqueue, if any */
if (vi->has_cvq) {
- callbacks[total_vqs - 1] = NULL;
+ callbacks[total_vqs - 1] = virtnet_cvq_done;
names[total_vqs - 1] = "control";
}
--
2.25.1
Seemingly Similar Threads
- [PATCH V3 net-next 0/2] virtio-net: don't busy poll for cvq command
- [PATCH net-next v4 0/2] virtio-net: don't busy poll for cvq command
- [PATCH net-next V2 0/2] virtio-net: don't busy poll for cvq command
- [PATCH V3 net-next 1/2] virtio-net: convert rx mode setting to use workqueue
- [PATCH net-next V2 1/2] virtio-net: convert rx mode setting to use workqueue