Jason Wang
2021-May-27 04:22 UTC
[PATCH net-next] virtio_net: set link state down when virtqueue is broken
? 2021/5/26 ??7:39, wangyunjian ??:> From: Yunjian Wang <wangyunjian at huawei.com> > > The NIC can't receive/send packets if a rx/tx virtqueue is broken. > However, the link state of the NIC is still normal. As a result, > the user cannot detect the NIC exception.Doesn't we have: ?????? /* This should not happen! */ ??????? if (unlikely(err)) { ??????????????? dev->stats.tx_fifo_errors++; ??????????????? if (net_ratelimit()) ??????????????????????? dev_warn(&dev->dev, ???????????????????????????????? "Unexpected TXQ (%d) queue failure: %d\n", ???????????????????????????????? qnum, err); ??????????????? dev->stats.tx_dropped++; ??????????????? dev_kfree_skb_any(skb); ??????????????? return NETDEV_TX_OK; ??????? } Which should be sufficient?> > The driver can set the link state down when the virtqueue is broken. > If the state is down, the user can switch over to another NIC.Note that, we probably need the watchdog for virtio-net in order to be a complete solution. Thanks> > Signed-off-by: Yunjian Wang <wangyunjian at huawei.com> > --- > drivers/net/virtio_net.c | 36 +++++++++++++++++++++++++++++++++++- > 1 file changed, 35 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 073fec4c0df1..05a3cd1c589b 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -237,6 +237,10 @@ struct virtnet_info { > > /* failover when STANDBY feature enabled */ > struct failover *failover; > + > + /* Work struct for checking vq status, stop NIC if vq is broken. */ > + struct delayed_work vq_check_work; > + bool broken; > }; > > struct padded_vnet_hdr { > @@ -1407,6 +1411,27 @@ static void refill_work(struct work_struct *work) > } > } > > +static void virnet_vq_check_work(struct work_struct *work) > +{ > + struct virtnet_info *vi > + container_of(work, struct virtnet_info, vq_check_work.work); > + struct net_device *netdev = vi->dev; > + int i; > + > + if (vi->broken) > + return; > + > + /* If virtqueue is broken, set link down and stop all queues */ > + for (i = 0; i < vi->max_queue_pairs; i++) { > + if (virtqueue_is_broken(vi->rq[i].vq) || virtqueue_is_broken(vi->sq[i].vq)) { > + netif_carrier_off(netdev); > + netif_tx_stop_all_queues(netdev); > + vi->broken = true; > + break; > + } > + } > +} > + > static int virtnet_receive(struct receive_queue *rq, int budget, > unsigned int *xdp_xmit) > { > @@ -1432,6 +1457,9 @@ static int virtnet_receive(struct receive_queue *rq, int budget, > } > } > > + if (unlikely(!virtqueue_is_broken(rq->vq))) > + schedule_delayed_work(&vi->vq_check_work, HZ); > + > if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { > if (!try_fill_recv(vi, rq, GFP_ATOMIC)) > schedule_delayed_work(&vi->refill, 0); > @@ -1681,6 +1709,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > qnum, err); > dev->stats.tx_dropped++; > dev_kfree_skb_any(skb); > + schedule_delayed_work(&vi->vq_check_work, HZ); > return NETDEV_TX_OK; > } > > @@ -1905,6 +1934,7 @@ static int virtnet_close(struct net_device *dev) > > /* Make sure refill_work doesn't re-enable napi! */ > cancel_delayed_work_sync(&vi->refill); > + cancel_delayed_work_sync(&vi->vq_check_work); > > for (i = 0; i < vi->max_queue_pairs; i++) { > xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); > @@ -2381,6 +2411,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev) > netif_device_detach(vi->dev); > netif_tx_unlock_bh(vi->dev); > cancel_delayed_work_sync(&vi->refill); > + cancel_delayed_work_sync(&vi->vq_check_work); > > if (netif_running(vi->dev)) { > for (i = 0; i < vi->max_queue_pairs; i++) { > @@ -2662,7 +2693,7 @@ static void virtnet_config_changed_work(struct work_struct *work) > > vi->status = v; > > - if (vi->status & VIRTIO_NET_S_LINK_UP) { > + if ((vi->status & VIRTIO_NET_S_LINK_UP) && !vi->broken) { > virtnet_update_settings(vi); > netif_carrier_on(vi->dev); > netif_tx_wake_all_queues(vi->dev); > @@ -2889,6 +2920,8 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) > goto err_rq; > > INIT_DELAYED_WORK(&vi->refill, refill_work); > + INIT_DELAYED_WORK(&vi->vq_check_work, virnet_vq_check_work); > + > for (i = 0; i < vi->max_queue_pairs; i++) { > vi->rq[i].pages = NULL; > netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, > @@ -3240,6 +3273,7 @@ static int virtnet_probe(struct virtio_device *vdev) > net_failover_destroy(vi->failover); > free_vqs: > cancel_delayed_work_sync(&vi->refill); > + cancel_delayed_work_sync(&vi->vq_check_work); > free_receive_page_frags(vi); > virtnet_del_vqs(vi); > free: