? 2021/4/13 ??6:31, Michael S. Tsirkin ??:> On Mon, Apr 12, 2021 at 06:03:50PM -0400, Michael S. Tsirkin wrote:
>> I was working on the spurios interrupt problem and
>> I noticed something weird.
>>
>> static int virtnet_poll_tx(struct napi_struct *napi, int budget)
>> {
>> struct send_queue *sq = container_of(napi, struct send_queue,
napi);
>> struct virtnet_info *vi = sq->vq->vdev->priv;
>> unsigned int index = vq2txq(sq->vq);
>> struct netdev_queue *txq;
>>
>> if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
>> /* We don't need to enable cb for XDP */
>> napi_complete_done(napi, 0);
>> return 0;
>> }
>>
>> txq = netdev_get_tx_queue(vi->dev, index);
>> __netif_tx_lock(txq, raw_smp_processor_id());
>> free_old_xmit_skbs(sq, true);
>> __netif_tx_unlock(txq);
>>
>> virtqueue_napi_complete(napi, sq->vq, 0);
>>
>> if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
>> netif_tx_wake_queue(txq);
>>
>> return 0;
>> }
>>
>> So virtqueue_napi_complete is called when txq is not locked,
>> thinkably start_xmit can happen right?
Yes, I think so.
>>
>> Now virtqueue_napi_complete
>>
>> static void virtqueue_napi_complete(struct napi_struct *napi,
>> struct virtqueue *vq, int
processed)
>> {
>> int opaque;
>>
>> opaque = virtqueue_enable_cb_prepare(vq);
>> if (napi_complete_done(napi, processed)) {
>> if (unlikely(virtqueue_poll(vq, opaque)))
>> virtqueue_napi_schedule(napi, vq);
>> } else {
>> virtqueue_disable_cb(vq);
>> }
>> }
>>
>>
>> So it is calling virtqueue_enable_cb_prepare but tx queue
>> could be running and can process things in parallel ...
>> What gives? I suspect this corrupts the ring, and explains
>> why we are seeing weird hangs with vhost packed ring ...
>>
>> Jason?
It might cause the interrupt to be disabled unexpectedly I think.
>>
>>
>> --
>> MST
> and wouldn't the following untested patch make sense then?
>
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 82e520d2cb12..c23341b18eb5 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -1504,6 +1505,8 @@ static int virtnet_poll_tx(struct napi_struct *napi,
int budget)
> struct virtnet_info *vi = sq->vq->vdev->priv;
> unsigned int index = vq2txq(sq->vq);
> struct netdev_queue *txq;
> + int opaque;
> + bool done;
>
> if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
> /* We don't need to enable cb for XDP */
> @@ -1514,9 +1517,26 @@ static int virtnet_poll_tx(struct napi_struct *napi,
int budget)
> txq = netdev_get_tx_queue(vi->dev, index);
> __netif_tx_lock(txq, raw_smp_processor_id());
> free_old_xmit_skbs(sq, true);
> +
> + opaque = virtqueue_enable_cb_prepare(sq->vq);
> +
> + done = napi_complete_done(napi, 0);
> +
> + if (!done)
> + virtqueue_disable_cb(sq->vq);
> +
> __netif_tx_unlock(txq);
>
> - virtqueue_napi_complete(napi, sq->vq, 0);
> + if (done) {
> + if (unlikely(virtqueue_poll(vq, opaque))) {
> + if (napi_schedule_prep(napi)) {
> + __netif_tx_lock(txq, raw_smp_processor_id());
> + virtqueue_disable_cb(sq->vq);
> + __netif_tx_unlock(txq);
> + __napi_schedule(napi);
> + }
> + }
> + }
>
> if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
> netif_tx_wake_queue(txq);
I wonder why not simply protect the whole poll_tx with tx lock in this case?
Thanks