Michael S. Tsirkin
2014-Jan-16 11:53 UTC
[PATCH net-next v3 5/5] virtio-net: initial rx sysfs support, export mergeable rx buffer size
On Thu, Jan 16, 2014 at 01:38:46AM -0800, Michael Dalton wrote:> Add initial support for per-rx queue sysfs attributes to virtio-net. If > mergeable packet buffers are enabled, adds a read-only mergeable packet > buffer size sysfs attribute for each RX queue. > > Signed-off-by: Michael Dalton <mwdalton at google.com> > --- > drivers/net/virtio_net.c | 66 +++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 62 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 3e82311..f315cbb 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -27,6 +27,7 @@ > #include <linux/slab.h> > #include <linux/cpu.h> > #include <linux/average.h> > +#include <linux/seqlock.h> > > static int napi_weight = NAPI_POLL_WEIGHT; > module_param(napi_weight, int, 0444); > @@ -89,6 +90,12 @@ struct receive_queue { > /* Average packet length for mergeable receive buffers. */ > struct ewma mrg_avg_pkt_len; > > + /* Sequence counter to allow sysfs readers to safely access stats. > + * Assumes a single virtio-net writer, which is enforced by virtio-net > + * and NAPI. > + */ > + seqcount_t sysfs_seq; > + > /* Page frag for packet buffer allocation. */ > struct page_frag alloc_frag; > > @@ -416,7 +423,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > } > } > > + write_seqcount_begin(&rq->sysfs_seq); > ewma_add(&rq->mrg_avg_pkt_len, head_skb->len); > + write_seqcount_end(&rq->sysfs_seq); > return head_skb; > > err_skb:Hmm this adds overhead just to prevent sysfs from getting wrong value. Can't sysfs simply disable softirq while it's reading the value?> @@ -604,18 +613,29 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) > return err; > } > > -static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) > +static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len) > { > const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > + unsigned int len; > + > + len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len), > + GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); > + return ALIGN(len, MERGEABLE_BUFFER_ALIGN); > +} > + > +static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) > +{ > struct page_frag *alloc_frag = &rq->alloc_frag; > char *buf; > unsigned long ctx; > int err; > unsigned int len, hole; > > - len = hdr_len + clamp_t(unsigned int, ewma_read(&rq->mrg_avg_pkt_len), > - GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); > - len = ALIGN(len, MERGEABLE_BUFFER_ALIGN); > + /* avg_pkt_len is written only in NAPI rx softirq context. We may > + * read avg_pkt_len without using the sysfs_seq seqcount, as this code > + * is called only in NAPI rx softirq context or when NAPI is disabled. > + */ > + len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); > if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) > return -ENOMEM; > > @@ -1557,6 +1577,7 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) > napi_weight); > > sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); > + seqcount_init(&vi->rq[i].sysfs_seq); > ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT); > sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); > } > @@ -1594,6 +1615,39 @@ err: > return ret; > } > > +#ifdef CONFIG_SYSFS > +static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, > + struct rx_queue_attribute *attribute, char *buf) > +{ > + struct virtnet_info *vi = netdev_priv(queue->dev); > + unsigned int queue_index = get_netdev_rx_queue_index(queue); > + struct receive_queue *rq; > + struct ewma avg; > + unsigned int start; > + > + BUG_ON(queue_index >= vi->max_queue_pairs); > + rq = &vi->rq[queue_index]; > + do { > + start = read_seqcount_begin(&rq->sysfs_seq); > + avg = rq->mrg_avg_pkt_len; > + } while (read_seqcount_retry(&rq->sysfs_seq, start)); > + return sprintf(buf, "%u\n", get_mergeable_buf_len(&avg)); > +} > + > +static struct rx_queue_attribute mergeable_rx_buffer_size_attribute > + __ATTR_RO(mergeable_rx_buffer_size); > + > +static struct attribute *virtio_net_mrg_rx_attrs[] = { > + &mergeable_rx_buffer_size_attribute.attr, > + NULL > +}; > + > +static const struct attribute_group virtio_net_mrg_rx_group = { > + .name = "virtio_net", > + .attrs = virtio_net_mrg_rx_attrs > +}; > +#endif > + > static int virtnet_probe(struct virtio_device *vdev) > { > int i, err; > @@ -1708,6 +1762,10 @@ static int virtnet_probe(struct virtio_device *vdev) > if (err) > goto free_stats; > > +#ifdef CONFIG_SYSFS > + if (vi->mergeable_rx_bufs) > + dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group; > +#endif > netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); > netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); > > -- > 1.8.5.2
Michael Dalton
2014-Jan-16 16:33 UTC
[PATCH net-next v3 5/5] virtio-net: initial rx sysfs support, export mergeable rx buffer size
Hi Michael, On Thu, Jan 16, 2014 at 3:53 AM, Michael S. Tsirkin <mst at redhat.com> wrote:> Hmm this adds overhead just to prevent sysfs from getting wrong value. > Can't sysfs simply disable softirq while it's reading the value?Yes I think this would work, we could call napi_disable(), read the average packet length from the receive_queue, and then call virtnet_napi_enable(). That would eliminate the need for the seqcount. Best, Mike
Michael Dalton
2014-Jan-16 17:27 UTC
[PATCH net-next v3 5/5] virtio-net: initial rx sysfs support, export mergeable rx buffer size
Sorry, just realized - I think disabling NAPI is necessary but not sufficient. There is also the issue that refill_work() could be scheduled. If refill_work() executes, it will re-enable NAPI. We'd need to cancel the vi->refill delayed work to prevent this AFAICT, and also ensure that no other function re-schedules vi->refill or re-enables NAPI (virtnet_open/close, virtnet_set_queues, and virtnet_freeze/restore). How is the following sequence of operations: rtnl_lock(); cancel_delayed_work_sync(&vi->refill); napi_disable(&rq->napi); read rq->mrg_avg_pkt_len virtnet_enable_napi(); rtnl_unlock(); Additionally, if we disable NAPI when reading this file, perhaps the permissions should be changed to 400 so that an unprivileged user cannot temporarily disable network RX processing by reading these sysfs files. Does that sound reasonable? Best, Mike
Seemingly Similar Threads
- [PATCH net-next v3 5/5] virtio-net: initial rx sysfs support, export mergeable rx buffer size
- [PATCH net-next v3 5/5] virtio-net: initial rx sysfs support, export mergeable rx buffer size
- [PATCH net-next v3 5/5] virtio-net: initial rx sysfs support, export mergeable rx buffer size
- [PATCH net-next v3 5/5] virtio-net: initial rx sysfs support, export mergeable rx buffer size
- [PATCH net-next v5 0/6] virtio-net: mergeable rx buffer size auto-tuning