On 2018/12/6 ??6:54, Michael S. Tsirkin wrote:> When use_napi is set, let's enable BQLs. Note: some of the issues are > similar to wifi. It's worth considering whether something similar to > commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be > benefitial.I've played a similar patch several days before. The tricky part is the mode switching between napi and no napi. We should make sure when the packet is sent and trakced by BQL,? it should be consumed by BQL as well. I did it by tracking it through skb->cb.? And deal with the freeze by reset the BQL status. Patch attached. But when testing with vhost-net, I don't very a stable performance, it was probably because we batch the used ring updating so tx interrupt may come randomly. We probably need to implement time bounded coalescing mechanism which could be configured from userspace. Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR regression on machine without APICv, (haven't found time to test APICv machine). But consider it was for correctness, I think it's acceptable? Then we can do optimization on top? Thanks> Signed-off-by: Michael S. Tsirkin <mst at redhat.com> > --- > drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- > 1 file changed, 19 insertions(+), 8 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index cecfd77c9f3c..b657bde6b94b 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, > return stats.packets; > } > > -static void free_old_xmit_skbs(struct send_queue *sq) > +static void free_old_xmit_skbs(struct send_queue *sq, struct netdev_queue *txq, > + bool use_napi) > { > struct sk_buff *skb; > unsigned int len; > @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct send_queue *sq) > if (!packets) > return; > > + if (use_napi) > + netdev_tx_completed_queue(txq, packets, bytes); > + > u64_stats_update_begin(&sq->stats.syncp); > sq->stats.bytes += bytes; > sq->stats.packets += packets; > @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) > return; > > if (__netif_tx_trylock(txq)) { > - free_old_xmit_skbs(sq); > + free_old_xmit_skbs(sq, txq, true); > __netif_tx_unlock(txq); > } > > @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); > > __netif_tx_lock(txq, raw_smp_processor_id()); > - free_old_xmit_skbs(sq); > + free_old_xmit_skbs(sq, txq, true); > __netif_tx_unlock(txq); > > virtqueue_napi_complete(napi, sq->vq, 0); > @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > struct send_queue *sq = &vi->sq[qnum]; > int err; > struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); > - bool kick = !skb->xmit_more; > + bool more = skb->xmit_more; > bool use_napi = sq->napi.weight; > + unsigned int bytes = skb->len; > + bool kick; > > /* Free up any pending old buffers before queueing new ones. */ > - free_old_xmit_skbs(sq); > + free_old_xmit_skbs(sq, txq, use_napi); > > - if (use_napi && kick) > + if (use_napi && !more) > virtqueue_enable_cb_delayed(sq->vq); > > /* timestamp packet in software */ > @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > if (!use_napi && > unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { > /* More just got used, free them then recheck. */ > - free_old_xmit_skbs(sq); > + free_old_xmit_skbs(sq, txq, false); > if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { > netif_start_subqueue(dev, qnum); > virtqueue_disable_cb(sq->vq); > @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > } > } > > - if (kick || netif_xmit_stopped(txq)) { > + if (use_napi) > + kick = __netdev_tx_sent_queue(txq, bytes, more); > + else > + kick = !more || netif_xmit_stopped(txq); > + > + if (kick) { > if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { > u64_stats_update_begin(&sq->stats.syncp); > sq->stats.kicks++;
On 2018/12/6 ??4:17, Jason Wang wrote:> > On 2018/12/6 ??6:54, Michael S. Tsirkin wrote: >> When use_napi is set, let's enable BQLs. Note: some of the issues are >> similar to wifi.? It's worth considering whether something similar to >> commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be >> benefitial. > > > I've played a similar patch several days before. The tricky part is > the mode switching between napi and no napi. We should make sure when > the packet is sent and trakced by BQL,? it should be consumed by BQL > as well. I did it by tracking it through skb->cb.? And deal with the > freeze by reset the BQL status. Patch attached.Add the patch. Thanks> > But when testing with vhost-net, I don't very a stable performance, it > was probably because we batch the used ring updating so tx interrupt > may come randomly. We probably need to implement time bounded > coalescing mechanism which could be configured from userspace. > > Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR > regression on machine without APICv, (haven't found time to test APICv > machine). But consider it was for correctness, I think it's > acceptable? Then we can do optimization on top? > > > Thanks > > >> Signed-off-by: Michael S. Tsirkin <mst at redhat.com> >> --- >> ? drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- >> ? 1 file changed, 19 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> index cecfd77c9f3c..b657bde6b94b 100644 >> --- a/drivers/net/virtio_net.c >> +++ b/drivers/net/virtio_net.c >> @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue >> *rq, int budget, >> ????? return stats.packets; >> ? } >> ? -static void free_old_xmit_skbs(struct send_queue *sq) >> +static void free_old_xmit_skbs(struct send_queue *sq, struct >> netdev_queue *txq, >> +?????????????????? bool use_napi) >> ? { >> ????? struct sk_buff *skb; >> ????? unsigned int len; >> @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct >> send_queue *sq) >> ????? if (!packets) >> ????????? return; >> ? +??? if (use_napi) >> +??????? netdev_tx_completed_queue(txq, packets, bytes); >> + >> ????? u64_stats_update_begin(&sq->stats.syncp); >> ????? sq->stats.bytes += bytes; >> ????? sq->stats.packets += packets; >> @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct >> receive_queue *rq) >> ????????? return; >> ? ????? if (__netif_tx_trylock(txq)) { >> -??????? free_old_xmit_skbs(sq); >> +??????? free_old_xmit_skbs(sq, txq, true); >> ????????? __netif_tx_unlock(txq); >> ????? } >> ? @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct >> *napi, int budget) >> ????? struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, >> vq2txq(sq->vq)); >> ? ????? __netif_tx_lock(txq, raw_smp_processor_id()); >> -??? free_old_xmit_skbs(sq); >> +??? free_old_xmit_skbs(sq, txq, true); >> ????? __netif_tx_unlock(txq); >> ? ????? virtqueue_napi_complete(napi, sq->vq, 0); >> @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff >> *skb, struct net_device *dev) >> ????? struct send_queue *sq = &vi->sq[qnum]; >> ????? int err; >> ????? struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); >> -??? bool kick = !skb->xmit_more; >> +??? bool more = skb->xmit_more; >> ????? bool use_napi = sq->napi.weight; >> +??? unsigned int bytes = skb->len; >> +??? bool kick; >> ? ????? /* Free up any pending old buffers before queueing new ones. */ >> -??? free_old_xmit_skbs(sq); >> +??? free_old_xmit_skbs(sq, txq, use_napi); >> ? -??? if (use_napi && kick) >> +??? if (use_napi && !more) >> ????????? virtqueue_enable_cb_delayed(sq->vq); >> ? ????? /* timestamp packet in software */ >> @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff >> *skb, struct net_device *dev) >> ????????? if (!use_napi && >> ????????????? unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { >> ????????????? /* More just got used, free them then recheck. */ >> -??????????? free_old_xmit_skbs(sq); >> +??????????? free_old_xmit_skbs(sq, txq, false); >> ????????????? if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { >> ????????????????? netif_start_subqueue(dev, qnum); >> ????????????????? virtqueue_disable_cb(sq->vq); >> @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff >> *skb, struct net_device *dev) >> ????????? } >> ????? } >> ? -??? if (kick || netif_xmit_stopped(txq)) { >> +??? if (use_napi) >> +??????? kick = __netdev_tx_sent_queue(txq, bytes, more); >> +??? else >> +??????? kick = !more || netif_xmit_stopped(txq); >> + >> +??? if (kick) { >> ????????? if (virtqueue_kick_prepare(sq->vq) && >> virtqueue_notify(sq->vq)) { >> ????????????? u64_stats_update_begin(&sq->stats.syncp); >> ????????????? sq->stats.kicks++; > _______________________________________________ > Virtualization mailing list > Virtualization at lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/virtualization-------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-virtio-net-byte-queue-limit-support.patch Type: text/x-patch Size: 4778 bytes Desc: not available URL: <http://lists.linuxfoundation.org/pipermail/virtualization/attachments/20181206/4eec3e0d/attachment.bin>
On Thu, Dec 06, 2018 at 04:17:36PM +0800, Jason Wang wrote:> > On 2018/12/6 ??6:54, Michael S. Tsirkin wrote: > > When use_napi is set, let's enable BQLs. Note: some of the issues are > > similar to wifi. It's worth considering whether something similar to > > commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be > > benefitial. > > > I've played a similar patch several days before. The tricky part is the mode > switching between napi and no napi. We should make sure when the packet is > sent and trakced by BQL,? it should be consumed by BQL as well. I did it by > tracking it through skb->cb.? And deal with the freeze by reset the BQL > status. Patch attached. > > But when testing with vhost-net, I don't very a stable performance, it was > probably because we batch the used ring updating so tx interrupt may come > randomly. We probably need to implement time bounded coalescing mechanism > which could be configured from userspace. > > Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR > regression on machine without APICv, (haven't found time to test APICv > machine). But consider it was for correctness, I think it's acceptable? Then > we can do optimization on top? > > > ThanksI don't see how it's for correctness to be frank. What if we just do the bulk free? Does that fix the regression?> > > Signed-off-by: Michael S. Tsirkin <mst at redhat.com> > > --- > > drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- > > 1 file changed, 19 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index cecfd77c9f3c..b657bde6b94b 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, > > return stats.packets; > > } > > -static void free_old_xmit_skbs(struct send_queue *sq) > > +static void free_old_xmit_skbs(struct send_queue *sq, struct netdev_queue *txq, > > + bool use_napi) > > { > > struct sk_buff *skb; > > unsigned int len; > > @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct send_queue *sq) > > if (!packets) > > return; > > + if (use_napi) > > + netdev_tx_completed_queue(txq, packets, bytes); > > + > > u64_stats_update_begin(&sq->stats.syncp); > > sq->stats.bytes += bytes; > > sq->stats.packets += packets; > > @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) > > return; > > if (__netif_tx_trylock(txq)) { > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, true); > > __netif_tx_unlock(txq); > > } > > @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > > struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); > > __netif_tx_lock(txq, raw_smp_processor_id()); > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, true); > > __netif_tx_unlock(txq); > > virtqueue_napi_complete(napi, sq->vq, 0); > > @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > struct send_queue *sq = &vi->sq[qnum]; > > int err; > > struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); > > - bool kick = !skb->xmit_more; > > + bool more = skb->xmit_more; > > bool use_napi = sq->napi.weight; > > + unsigned int bytes = skb->len; > > + bool kick; > > /* Free up any pending old buffers before queueing new ones. */ > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, use_napi); > > - if (use_napi && kick) > > + if (use_napi && !more) > > virtqueue_enable_cb_delayed(sq->vq); > > /* timestamp packet in software */ > > @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > if (!use_napi && > > unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { > > /* More just got used, free them then recheck. */ > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, false); > > if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { > > netif_start_subqueue(dev, qnum); > > virtqueue_disable_cb(sq->vq); > > @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > } > > } > > - if (kick || netif_xmit_stopped(txq)) { > > + if (use_napi) > > + kick = __netdev_tx_sent_queue(txq, bytes, more); > > + else > > + kick = !more || netif_xmit_stopped(txq); > > + > > + if (kick) { > > if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { > > u64_stats_update_begin(&sq->stats.syncp); > > sq->stats.kicks++;
On Thu, Dec 06, 2018 at 04:17:36PM +0800, Jason Wang wrote:> > On 2018/12/6 ??6:54, Michael S. Tsirkin wrote: > > When use_napi is set, let's enable BQLs. Note: some of the issues are > > similar to wifi. It's worth considering whether something similar to > > commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be > > benefitial. > > > I've played a similar patch several days before. The tricky part is the mode > switching between napi and no napi. We should make sure when the packet is > sent and trakced by BQL,? it should be consumed by BQL as well. I did it by > tracking it through skb->cb.? And deal with the freeze by reset the BQL > status. Patch attached. > > But when testing with vhost-net, I don't very a stable performance,So how about increasing TSQ pacing shift then?> it was > probably because we batch the used ring updating so tx interrupt may come > randomly. We probably need to implement time bounded coalescing mechanism > which could be configured from userspace.I don't think it's reasonable to expect userspace to be that smart ... Why do we need time bounded? used ring is always updated when ring becomes empty.> Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR > regression on machine without APICv, (haven't found time to test APICv > machine). But consider it was for correctness, I think it's acceptable? Then > we can do optimization on top? > > > Thanks > > > > Signed-off-by: Michael S. Tsirkin <mst at redhat.com> > > --- > > drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- > > 1 file changed, 19 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index cecfd77c9f3c..b657bde6b94b 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, > > return stats.packets; > > } > > -static void free_old_xmit_skbs(struct send_queue *sq) > > +static void free_old_xmit_skbs(struct send_queue *sq, struct netdev_queue *txq, > > + bool use_napi) > > { > > struct sk_buff *skb; > > unsigned int len; > > @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct send_queue *sq) > > if (!packets) > > return; > > + if (use_napi) > > + netdev_tx_completed_queue(txq, packets, bytes); > > + > > u64_stats_update_begin(&sq->stats.syncp); > > sq->stats.bytes += bytes; > > sq->stats.packets += packets; > > @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) > > return; > > if (__netif_tx_trylock(txq)) { > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, true); > > __netif_tx_unlock(txq); > > } > > @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > > struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); > > __netif_tx_lock(txq, raw_smp_processor_id()); > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, true); > > __netif_tx_unlock(txq); > > virtqueue_napi_complete(napi, sq->vq, 0); > > @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > struct send_queue *sq = &vi->sq[qnum]; > > int err; > > struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); > > - bool kick = !skb->xmit_more; > > + bool more = skb->xmit_more; > > bool use_napi = sq->napi.weight; > > + unsigned int bytes = skb->len; > > + bool kick; > > /* Free up any pending old buffers before queueing new ones. */ > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, use_napi); > > - if (use_napi && kick) > > + if (use_napi && !more) > > virtqueue_enable_cb_delayed(sq->vq); > > /* timestamp packet in software */ > > @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > if (!use_napi && > > unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { > > /* More just got used, free them then recheck. */ > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, false); > > if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { > > netif_start_subqueue(dev, qnum); > > virtqueue_disable_cb(sq->vq); > > @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > } > > } > > - if (kick || netif_xmit_stopped(txq)) { > > + if (use_napi) > > + kick = __netdev_tx_sent_queue(txq, bytes, more); > > + else > > + kick = !more || netif_xmit_stopped(txq); > > + > > + if (kick) { > > if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { > > u64_stats_update_begin(&sq->stats.syncp); > > sq->stats.kicks++;
On Thu, Dec 06, 2018 at 04:17:36PM +0800, Jason Wang wrote:> > On 2018/12/6 ??6:54, Michael S. Tsirkin wrote: > > When use_napi is set, let's enable BQLs. Note: some of the issues are > > similar to wifi. It's worth considering whether something similar to > > commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be > > benefitial. > > > I've played a similar patch several days before. The tricky part is the mode > switching between napi and no napi. We should make sure when the packet is > sent and trakced by BQL,? it should be consumed by BQL as well.I just went over the patch again and I don't understand this comment. This patch only enabled BQL with tx napi. Thus there's no mode switching. What did I miss?> I did it by > tracking it through skb->cb.? And deal with the freeze by reset the BQL > status. Patch attached. > > But when testing with vhost-net, I don't very a stable performance, it was > probably because we batch the used ring updating so tx interrupt may come > randomly. We probably need to implement time bounded coalescing mechanism > which could be configured from userspace. > > Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR > regression on machine without APICv, (haven't found time to test APICv > machine). But consider it was for correctness, I think it's acceptable? Then > we can do optimization on top? > > > Thanks > > > > Signed-off-by: Michael S. Tsirkin <mst at redhat.com> > > --- > > drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- > > 1 file changed, 19 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index cecfd77c9f3c..b657bde6b94b 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, > > return stats.packets; > > } > > -static void free_old_xmit_skbs(struct send_queue *sq) > > +static void free_old_xmit_skbs(struct send_queue *sq, struct netdev_queue *txq, > > + bool use_napi) > > { > > struct sk_buff *skb; > > unsigned int len; > > @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct send_queue *sq) > > if (!packets) > > return; > > + if (use_napi) > > + netdev_tx_completed_queue(txq, packets, bytes); > > + > > u64_stats_update_begin(&sq->stats.syncp); > > sq->stats.bytes += bytes; > > sq->stats.packets += packets; > > @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) > > return; > > if (__netif_tx_trylock(txq)) { > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, true); > > __netif_tx_unlock(txq); > > } > > @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > > struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); > > __netif_tx_lock(txq, raw_smp_processor_id()); > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, true); > > __netif_tx_unlock(txq); > > virtqueue_napi_complete(napi, sq->vq, 0); > > @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > struct send_queue *sq = &vi->sq[qnum]; > > int err; > > struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); > > - bool kick = !skb->xmit_more; > > + bool more = skb->xmit_more; > > bool use_napi = sq->napi.weight; > > + unsigned int bytes = skb->len; > > + bool kick; > > /* Free up any pending old buffers before queueing new ones. */ > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, use_napi); > > - if (use_napi && kick) > > + if (use_napi && !more) > > virtqueue_enable_cb_delayed(sq->vq); > > /* timestamp packet in software */ > > @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > if (!use_napi && > > unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { > > /* More just got used, free them then recheck. */ > > - free_old_xmit_skbs(sq); > > + free_old_xmit_skbs(sq, txq, false); > > if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { > > netif_start_subqueue(dev, qnum); > > virtqueue_disable_cb(sq->vq); > > @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > } > > } > > - if (kick || netif_xmit_stopped(txq)) { > > + if (use_napi) > > + kick = __netdev_tx_sent_queue(txq, bytes, more); > > + else > > + kick = !more || netif_xmit_stopped(txq); > > + > > + if (kick) { > > if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { > > u64_stats_update_begin(&sq->stats.syncp); > > sq->stats.kicks++;
On 2018/12/26 ??11:15, Michael S. Tsirkin wrote:> On Thu, Dec 06, 2018 at 04:17:36PM +0800, Jason Wang wrote: >> On 2018/12/6 ??6:54, Michael S. Tsirkin wrote: >>> When use_napi is set, let's enable BQLs. Note: some of the issues are >>> similar to wifi. It's worth considering whether something similar to >>> commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be >>> benefitial. >> >> I've played a similar patch several days before. The tricky part is the mode >> switching between napi and no napi. We should make sure when the packet is >> sent and trakced by BQL,? it should be consumed by BQL as well. I did it by >> tracking it through skb->cb.? And deal with the freeze by reset the BQL >> status. Patch attached. >> >> But when testing with vhost-net, I don't very a stable performance, it was >> probably because we batch the used ring updating so tx interrupt may come >> randomly. We probably need to implement time bounded coalescing mechanism >> which could be configured from userspace. >> >> Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR >> regression on machine without APICv, (haven't found time to test APICv >> machine). But consider it was for correctness, I think it's acceptable? Then >> we can do optimization on top? >> >> >> Thanks > I don't see how it's for correctness to be frank.Socket accounting is wrong in the case. This should be a bug in fact.> What if we just do the bulk free? Does that fix the regression?I can test it.> > >>> Signed-off-by: Michael S. Tsirkin <mst at redhat.com> >>> --- >>> drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- >>> 1 file changed, 19 insertions(+), 8 deletions(-) >>> >>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>> index cecfd77c9f3c..b657bde6b94b 100644 >>> --- a/drivers/net/virtio_net.c >>> +++ b/drivers/net/virtio_net.c >>> @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, >>> return stats.packets; >>> } >>> -static void free_old_xmit_skbs(struct send_queue *sq) >>> +static void free_old_xmit_skbs(struct send_queue *sq, struct netdev_queue *txq, >>> + bool use_napi) >>> { >>> struct sk_buff *skb; >>> unsigned int len; >>> @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct send_queue *sq) >>> if (!packets) >>> return; >>> + if (use_napi) >>> + netdev_tx_completed_queue(txq, packets, bytes); >>> + >>> u64_stats_update_begin(&sq->stats.syncp); >>> sq->stats.bytes += bytes; >>> sq->stats.packets += packets; >>> @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) >>> return; >>> if (__netif_tx_trylock(txq)) { >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, true); >>> __netif_tx_unlock(txq); >>> } >>> @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) >>> struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); >>> __netif_tx_lock(txq, raw_smp_processor_id()); >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, true); >>> __netif_tx_unlock(txq); >>> virtqueue_napi_complete(napi, sq->vq, 0); >>> @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> struct send_queue *sq = &vi->sq[qnum]; >>> int err; >>> struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); >>> - bool kick = !skb->xmit_more; >>> + bool more = skb->xmit_more; >>> bool use_napi = sq->napi.weight; >>> + unsigned int bytes = skb->len; >>> + bool kick; >>> /* Free up any pending old buffers before queueing new ones. */ >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, use_napi); >>> - if (use_napi && kick) >>> + if (use_napi && !more) >>> virtqueue_enable_cb_delayed(sq->vq); >>> /* timestamp packet in software */ >>> @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> if (!use_napi && >>> unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { >>> /* More just got used, free them then recheck. */ >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, false); >>> if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { >>> netif_start_subqueue(dev, qnum); >>> virtqueue_disable_cb(sq->vq); >>> @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> } >>> } >>> - if (kick || netif_xmit_stopped(txq)) { >>> + if (use_napi) >>> + kick = __netdev_tx_sent_queue(txq, bytes, more); >>> + else >>> + kick = !more || netif_xmit_stopped(txq); >>> + >>> + if (kick) { >>> if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { >>> u64_stats_update_begin(&sq->stats.syncp); >>> sq->stats.kicks++;
On 2018/12/26 ??11:19, Michael S. Tsirkin wrote:> On Thu, Dec 06, 2018 at 04:17:36PM +0800, Jason Wang wrote: >> On 2018/12/6 ??6:54, Michael S. Tsirkin wrote: >>> When use_napi is set, let's enable BQLs. Note: some of the issues are >>> similar to wifi. It's worth considering whether something similar to >>> commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be >>> benefitial. >> >> I've played a similar patch several days before. The tricky part is the mode >> switching between napi and no napi. We should make sure when the packet is >> sent and trakced by BQL,? it should be consumed by BQL as well. I did it by >> tracking it through skb->cb.? And deal with the freeze by reset the BQL >> status. Patch attached. >> >> But when testing with vhost-net, I don't very a stable performance, > So how about increasing TSQ pacing shift then?I can test this. But changing default TCP value is much more than a virtio-net specific thing.> >> it was >> probably because we batch the used ring updating so tx interrupt may come >> randomly. We probably need to implement time bounded coalescing mechanism >> which could be configured from userspace. > I don't think it's reasonable to expect userspace to be that smart ... > Why do we need time bounded? used ring is always updated when ring > becomes empty.We don't add used when means BQL may not see the consumed packet in time. And the delay varies based on the workload since we count packets not bytes or time before doing the batched updating. Thanks> >> Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR >> regression on machine without APICv, (haven't found time to test APICv >> machine). But consider it was for correctness, I think it's acceptable? Then >> we can do optimization on top? >> >> >> Thanks >> >> >>> Signed-off-by: Michael S. Tsirkin <mst at redhat.com> >>> --- >>> drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- >>> 1 file changed, 19 insertions(+), 8 deletions(-) >>> >>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>> index cecfd77c9f3c..b657bde6b94b 100644 >>> --- a/drivers/net/virtio_net.c >>> +++ b/drivers/net/virtio_net.c >>> @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, >>> return stats.packets; >>> } >>> -static void free_old_xmit_skbs(struct send_queue *sq) >>> +static void free_old_xmit_skbs(struct send_queue *sq, struct netdev_queue *txq, >>> + bool use_napi) >>> { >>> struct sk_buff *skb; >>> unsigned int len; >>> @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct send_queue *sq) >>> if (!packets) >>> return; >>> + if (use_napi) >>> + netdev_tx_completed_queue(txq, packets, bytes); >>> + >>> u64_stats_update_begin(&sq->stats.syncp); >>> sq->stats.bytes += bytes; >>> sq->stats.packets += packets; >>> @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) >>> return; >>> if (__netif_tx_trylock(txq)) { >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, true); >>> __netif_tx_unlock(txq); >>> } >>> @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) >>> struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); >>> __netif_tx_lock(txq, raw_smp_processor_id()); >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, true); >>> __netif_tx_unlock(txq); >>> virtqueue_napi_complete(napi, sq->vq, 0); >>> @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> struct send_queue *sq = &vi->sq[qnum]; >>> int err; >>> struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); >>> - bool kick = !skb->xmit_more; >>> + bool more = skb->xmit_more; >>> bool use_napi = sq->napi.weight; >>> + unsigned int bytes = skb->len; >>> + bool kick; >>> /* Free up any pending old buffers before queueing new ones. */ >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, use_napi); >>> - if (use_napi && kick) >>> + if (use_napi && !more) >>> virtqueue_enable_cb_delayed(sq->vq); >>> /* timestamp packet in software */ >>> @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> if (!use_napi && >>> unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { >>> /* More just got used, free them then recheck. */ >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, false); >>> if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { >>> netif_start_subqueue(dev, qnum); >>> virtqueue_disable_cb(sq->vq); >>> @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> } >>> } >>> - if (kick || netif_xmit_stopped(txq)) { >>> + if (use_napi) >>> + kick = __netdev_tx_sent_queue(txq, bytes, more); >>> + else >>> + kick = !more || netif_xmit_stopped(txq); >>> + >>> + if (kick) { >>> if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { >>> u64_stats_update_begin(&sq->stats.syncp); >>> sq->stats.kicks++;
On 2018/12/26 ??11:22, Michael S. Tsirkin wrote:> On Thu, Dec 06, 2018 at 04:17:36PM +0800, Jason Wang wrote: >> On 2018/12/6 ??6:54, Michael S. Tsirkin wrote: >>> When use_napi is set, let's enable BQLs. Note: some of the issues are >>> similar to wifi. It's worth considering whether something similar to >>> commit 36148c2bbfbe ("mac80211: Adjust TSQ pacing shift") might be >>> benefitial. >> >> I've played a similar patch several days before. The tricky part is the mode >> switching between napi and no napi. We should make sure when the packet is >> sent and trakced by BQL,? it should be consumed by BQL as well. > > I just went over the patch again and I don't understand this comment. > This patch only enabled BQL with tx napi. > > Thus there's no mode switching. > > What did I miss?Consider the case: TX NAPI is disabled: send N packets turn TX NAPI on: get tx interrupt BQL try to consume those packets when lead WARN for dql. Thanks> > >> I did it by >> tracking it through skb->cb.? And deal with the freeze by reset the BQL >> status. Patch attached. >> >> But when testing with vhost-net, I don't very a stable performance, it was >> probably because we batch the used ring updating so tx interrupt may come >> randomly. We probably need to implement time bounded coalescing mechanism >> which could be configured from userspace. >> >> Btw, maybe it's time just enable napi TX by default. I get ~10% TCP_RR >> regression on machine without APICv, (haven't found time to test APICv >> machine). But consider it was for correctness, I think it's acceptable? Then >> we can do optimization on top? >> >> >> Thanks >> >> >>> Signed-off-by: Michael S. Tsirkin <mst at redhat.com> >>> --- >>> drivers/net/virtio_net.c | 27 +++++++++++++++++++-------- >>> 1 file changed, 19 insertions(+), 8 deletions(-) >>> >>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>> index cecfd77c9f3c..b657bde6b94b 100644 >>> --- a/drivers/net/virtio_net.c >>> +++ b/drivers/net/virtio_net.c >>> @@ -1325,7 +1325,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, >>> return stats.packets; >>> } >>> -static void free_old_xmit_skbs(struct send_queue *sq) >>> +static void free_old_xmit_skbs(struct send_queue *sq, struct netdev_queue *txq, >>> + bool use_napi) >>> { >>> struct sk_buff *skb; >>> unsigned int len; >>> @@ -1347,6 +1348,9 @@ static void free_old_xmit_skbs(struct send_queue *sq) >>> if (!packets) >>> return; >>> + if (use_napi) >>> + netdev_tx_completed_queue(txq, packets, bytes); >>> + >>> u64_stats_update_begin(&sq->stats.syncp); >>> sq->stats.bytes += bytes; >>> sq->stats.packets += packets; >>> @@ -1364,7 +1368,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) >>> return; >>> if (__netif_tx_trylock(txq)) { >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, true); >>> __netif_tx_unlock(txq); >>> } >>> @@ -1440,7 +1444,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) >>> struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); >>> __netif_tx_lock(txq, raw_smp_processor_id()); >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, true); >>> __netif_tx_unlock(txq); >>> virtqueue_napi_complete(napi, sq->vq, 0); >>> @@ -1505,13 +1509,15 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> struct send_queue *sq = &vi->sq[qnum]; >>> int err; >>> struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); >>> - bool kick = !skb->xmit_more; >>> + bool more = skb->xmit_more; >>> bool use_napi = sq->napi.weight; >>> + unsigned int bytes = skb->len; >>> + bool kick; >>> /* Free up any pending old buffers before queueing new ones. */ >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, use_napi); >>> - if (use_napi && kick) >>> + if (use_napi && !more) >>> virtqueue_enable_cb_delayed(sq->vq); >>> /* timestamp packet in software */ >>> @@ -1552,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> if (!use_napi && >>> unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { >>> /* More just got used, free them then recheck. */ >>> - free_old_xmit_skbs(sq); >>> + free_old_xmit_skbs(sq, txq, false); >>> if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { >>> netif_start_subqueue(dev, qnum); >>> virtqueue_disable_cb(sq->vq); >>> @@ -1560,7 +1566,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) >>> } >>> } >>> - if (kick || netif_xmit_stopped(txq)) { >>> + if (use_napi) >>> + kick = __netdev_tx_sent_queue(txq, bytes, more); >>> + else >>> + kick = !more || netif_xmit_stopped(txq); >>> + >>> + if (kick) { >>> if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { >>> u64_stats_update_begin(&sq->stats.syncp); >>> sq->stats.kicks++;