Joby Poriyath
2013-Oct-29 15:27 UTC
[PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
This will reduce memory pressure when allocating struct xenvif. The size of xenvif struct has increased from 168 to 36632 bytes (on x86-32). See commit b3f980bd827e6e81a050c518d60ed7811a83061d. This resulted in occasional netdev allocation failure in dom0 with 752MiB RAM, due to fragmented memory. Signed-off-by: Joby Poriyath <joby.poriyath@citrix.com> Signed-off-by: Andrew J. Bennieston <andrew.bennieston@citrix.com> --- drivers/net/xen-netback/common.h | 10 +++--- drivers/net/xen-netback/interface.c | 61 +++++++++++++++++++++++++++++++++++ drivers/net/xen-netback/netback.c | 6 ++-- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 55b8dec..82515a3 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -114,17 +114,17 @@ struct xenvif { char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */ struct xen_netif_tx_back_ring tx; struct sk_buff_head tx_queue; - struct page *mmap_pages[MAX_PENDING_REQS]; + struct page **mmap_pages; /* [MAX_PENDING_REQS]; */ pending_ring_idx_t pending_prod; pending_ring_idx_t pending_cons; u16 pending_ring[MAX_PENDING_REQS]; - struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; + struct pending_tx_info *pending_tx_info; /* [MAX_PENDING_REQS]; */ /* Coalescing tx requests before copying makes number of grant * copy ops greater or equal to number of slots required. In * worst case a tx request consumes 2 gnttab_copy. */ - struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; + struct gnttab_copy *tx_copy_ops; /* [2*MAX_PENDING_REQS]; */ /* Use kthread for guest RX */ @@ -147,8 +147,8 @@ struct xenvif { * head/fragment page uses 2 copy operations because it * straddles two buffers in the frontend. */ - struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; - struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; + struct gnttab_copy *grant_copy_op; /* [2*XEN_NETIF_RX_RING_SIZE]; */ + struct xenvif_rx_meta *meta; /* [2*XEN_NETIF_RX_RING_SIZE]; */ u8 fe_dev_addr[6]; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index e4aa267..d4a9807 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -288,6 +288,60 @@ static const struct net_device_ops xenvif_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; +static void deallocate_xenvif_arrays(struct xenvif *vif) +{ + vfree(vif->mmap_pages); + vif->mmap_pages = NULL; + + vfree(vif->pending_tx_info); + vif->pending_tx_info = NULL; + + vfree(vif->tx_copy_ops); + vif->tx_copy_ops = NULL; + + vfree(vif->grant_copy_op); + vif->grant_copy_op = NULL; + + vfree(vif->meta); + vif->meta = NULL; +} + +static int allocate_xenvif_arrays(struct xenvif *vif) +{ + vif->mmap_pages = vif->pending_tx_info = NULL; + vif->tx_copy_ops = vif->grant_copy_op = vif->meta = NULL; + + vif->mmap_pages = vzalloc(MAX_PENDING_REQS * sizeof(struct page *)); + if (! vif->mmap_pages) + goto fail; + + vif->pending_tx_info = vzalloc(MAX_PENDING_REQS * + sizeof(struct pending_tx_info)); + if (! vif->pending_tx_info) + goto fail; + + vif->tx_copy_ops = vzalloc(2 * MAX_PENDING_REQS * + sizeof(struct gnttab_copy)); + if (! vif->tx_copy_ops) + goto fail; + + vif->grant_copy_op = vzalloc(2 * XEN_NETIF_RX_RING_SIZE * + sizeof(struct gnttab_copy)); + if (! vif->grant_copy_op) + goto fail; + + vif->meta = vzalloc(2 * XEN_NETIF_RX_RING_SIZE * + sizeof(struct xenvif_rx_meta)); + if (! vif->meta) + goto fail; + + return 0; + +fail: + deallocate_xenvif_arrays(vif); + return 1; +} + struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, unsigned int handle) { @@ -313,6 +367,12 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->ip_csum = 1; vif->dev = dev; + if (allocate_xenvif_arrays(vif)) { + netdev_warn(dev, "Could not create device: out of memory\n"); + free_netdev(dev); + return ERR_PTR(-ENOMEM); + } + vif->credit_bytes = vif->remaining_credit = ~0UL; vif->credit_usec = 0UL; init_timer(&vif->credit_timeout); @@ -484,6 +544,7 @@ void xenvif_free(struct xenvif *vif) unregister_netdev(vif->dev); + deallocate_xenvif_arrays(vif); free_netdev(vif->dev); module_put(THIS_MODULE); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 828fdab..34c0c05 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -602,12 +602,12 @@ void xenvif_rx_action(struct xenvif *vif) break; } - BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); + BUG_ON(npo.meta_prod > 2*XEN_NETIF_RX_RING_SIZE); if (!npo.copy_prod) return; - BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); + BUG_ON(npo.copy_prod > 2*XEN_NETIF_RX_RING_SIZE); gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); while ((skb = __skb_dequeue(&rxq)) != NULL) { @@ -1571,7 +1571,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif) vif->tx.req_cons = idx; - if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) + if ((gop-vif->tx_copy_ops) >= 2*MAX_PENDING_REQS) break; } -- 1.7.10.4
Eric Dumazet
2013-Oct-29 15:43 UTC
Re: [PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
On Tue, 2013-10-29 at 15:27 +0000, Joby Poriyath wrote:> This will reduce memory pressure when allocating struct xenvif. > > The size of xenvif struct has increased from 168 to 36632 bytes (on x86-32). > See commit b3f980bd827e6e81a050c518d60ed7811a83061d. This resulted in > occasional netdev allocation failure in dom0 with 752MiB RAM, due to > fragmented memory.This looks overkill. Replacing a single allocation of ~36 KB into 5 vmalloc() looks like you did not really tried other things... This should be done generically in alloc_netdev_mqs() Take a look at commit 60877a32bce00041 ("net: allow large number of tx queues")
Wei Liu
2013-Oct-29 15:50 UTC
Re: [PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
On Tue, Oct 29, 2013 at 03:27:13PM +0000, Joby Poriyath wrote: [...]> + > +static int allocate_xenvif_arrays(struct xenvif *vif) > +{ > + vif->mmap_pages = vif->pending_tx_info = NULL; > + vif->tx_copy_ops = vif->grant_copy_op = vif->meta = NULL; > + > + vif->mmap_pages = vzalloc(MAX_PENDING_REQS * sizeof(struct page *)); > + if (! vif->mmap_pages)No space after "!".> + goto fail; > + > + vif->pending_tx_info = vzalloc(MAX_PENDING_REQS * > + sizeof(struct pending_tx_info)); > + if (! vif->pending_tx_info) > + goto fail; > + > + vif->tx_copy_ops = vzalloc(2 * MAX_PENDING_REQS * > + sizeof(struct gnttab_copy)); > + if (! vif->tx_copy_ops) > + goto fail; > + > + vif->grant_copy_op = vzalloc(2 * XEN_NETIF_RX_RING_SIZE * > + sizeof(struct gnttab_copy)); > + if (! vif->grant_copy_op) > + goto fail; > + > + vif->meta = vzalloc(2 * XEN_NETIF_RX_RING_SIZE * > + sizeof(struct xenvif_rx_meta));Indentation.> + if (! vif->meta) > + goto fail; > + > + return 0; > + > +fail: > + deallocate_xenvif_arrays(vif); > + return 1;return -ENOMEM;> +} > + > struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, > unsigned int handle) > { > @@ -313,6 +367,12 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, > vif->ip_csum = 1; > vif->dev = dev; > > + if (allocate_xenvif_arrays(vif)) { > + netdev_warn(dev, "Could not create device: out of memory\n"); > + free_netdev(dev); > + return ERR_PTR(-ENOMEM); > + } > + > vif->credit_bytes = vif->remaining_credit = ~0UL; > vif->credit_usec = 0UL; > init_timer(&vif->credit_timeout); > @@ -484,6 +544,7 @@ void xenvif_free(struct xenvif *vif) > > unregister_netdev(vif->dev); > > + deallocate_xenvif_arrays(vif); > free_netdev(vif->dev); > > module_put(THIS_MODULE); > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > index 828fdab..34c0c05 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -602,12 +602,12 @@ void xenvif_rx_action(struct xenvif *vif) > break; > } > > - BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); > + BUG_ON(npo.meta_prod > 2*XEN_NETIF_RX_RING_SIZE); > > if (!npo.copy_prod) > return; > > - BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); > + BUG_ON(npo.copy_prod > 2*XEN_NETIF_RX_RING_SIZE);It''s better to #define XEN_NETBK_RX_ARRAY_SIZE (2*XEN_NETIF_RX_RING_SIZE) And use it consistently in code (allocation / comparison). Otherwise it''s easy to change one site and forget about others.> gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); > > while ((skb = __skb_dequeue(&rxq)) != NULL) { > @@ -1571,7 +1571,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif) > > vif->tx.req_cons = idx; > > - if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) > + if ((gop-vif->tx_copy_ops) >= 2*MAX_PENDING_REQS)Same here. #define XEN_NETBK_TX_ARRAY_SIZE Wei.
Joby Poriyath
2013-Oct-29 18:46 UTC
Re: [PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
On Tue, Oct 29, 2013 at 08:43:50AM -0700, Eric Dumazet wrote:> On Tue, 2013-10-29 at 15:27 +0000, Joby Poriyath wrote: > > This will reduce memory pressure when allocating struct xenvif. > > > > The size of xenvif struct has increased from 168 to 36632 bytes (on x86-32). > > See commit b3f980bd827e6e81a050c518d60ed7811a83061d. This resulted in > > occasional netdev allocation failure in dom0 with 752MiB RAM, due to > > fragmented memory. > > This looks overkill. > > Replacing a single allocation of ~36 KB into 5 vmalloc() looks like you > did not really tried other things... > > This should be done generically in alloc_netdev_mqs()Sorry Eric, I didn''t quite understand how this can be generically done. The netback interfaces are tied to the Xen guests (VMs) and these are created when guests are started and deleted when guest are halted.> > Take a look at commit 60877a32bce00041 > ("net: allow large number of tx queues") >I could try allocating using kmalloc and if that fails, then fall back to vmalloc. Thanks, Joby
Eric Dumazet
2013-Oct-29 23:24 UTC
Re: [PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
On Tue, 2013-10-29 at 18:46 +0000, Joby Poriyath wrote:> On Tue, Oct 29, 2013 at 08:43:50AM -0700, Eric Dumazet wrote: > > On Tue, 2013-10-29 at 15:27 +0000, Joby Poriyath wrote: > > > This will reduce memory pressure when allocating struct xenvif. > > > > > > The size of xenvif struct has increased from 168 to 36632 bytes (on x86-32). > > > See commit b3f980bd827e6e81a050c518d60ed7811a83061d. This resulted in > > > occasional netdev allocation failure in dom0 with 752MiB RAM, due to > > > fragmented memory. > > > > This looks overkill. > > > > Replacing a single allocation of ~36 KB into 5 vmalloc() looks like you > > did not really tried other things... > > > > This should be done generically in alloc_netdev_mqs() > > Sorry Eric, I didn''t quite understand how this can be generically done. > > The netback interfaces are tied to the Xen guests (VMs) and these are created > when guests are started and deleted when guest are halted.They are created by alloc_netdev_mqs()> > > > > Take a look at commit 60877a32bce00041 > > ("net: allow large number of tx queues") > > > > I could try allocating using kmalloc and if that fails, then fall back > to vmalloc.My point is the patch should be done in net/core, not in your driver.
Eric Dumazet
2013-Oct-29 23:32 UTC
Re: [PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
On Tue, 2013-10-29 at 16:24 -0700, Eric Dumazet wrote:> On Tue, 2013-10-29 at 18:46 +0000, Joby Poriyath wrote: > > On Tue, Oct 29, 2013 at 08:43:50AM -0700, Eric Dumazet wrote: > > > On Tue, 2013-10-29 at 15:27 +0000, Joby Poriyath wrote: > > > > This will reduce memory pressure when allocating struct xenvif. > > > > > > > > The size of xenvif struct has increased from 168 to 36632 bytes (on x86-32). > > > > See commit b3f980bd827e6e81a050c518d60ed7811a83061d. This resulted in > > > > occasional netdev allocation failure in dom0 with 752MiB RAM, due to > > > > fragmented memory. > > > > > > This looks overkill. > > > > > > Replacing a single allocation of ~36 KB into 5 vmalloc() looks like you > > > did not really tried other things... > > > > > > This should be done generically in alloc_netdev_mqs() > > > > Sorry Eric, I didn''t quite understand how this can be generically done. > > > > The netback interfaces are tied to the Xen guests (VMs) and these are created > > when guests are started and deleted when guest are halted. > > They are created by alloc_netdev_mqs()Something like the following should be fine. diff --git a/net/core/dev.c b/net/core/dev.c index 0054c8c..874a57a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6239,7 +6239,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL); + p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!p) + p = vzalloc(alloc_size); if (!p) return NULL; @@ -6302,7 +6304,10 @@ free_pcpu: #endif free_p: - kfree(p); + if (is_vmalloc_addr(p)) + vfree(p); + else + kfree(p); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -6339,7 +6344,12 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); + char *addr = (char *)dev - dev->padded; + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); return; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d954b56..406c54b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1259,11 +1259,16 @@ exit: static void netdev_release(struct device *d) { struct net_device *dev = to_net_dev(d); + char *addr; BUG_ON(dev->reg_state != NETREG_RELEASED); kfree(dev->ifalias); - kfree((char *)dev - dev->padded); + addr = (char *)dev - dev->padded; + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); } static const void *net_namespace(struct device *d)
Joby Poriyath
2013-Oct-30 10:39 UTC
Re: [PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
On Tue, Oct 29, 2013 at 04:32:12PM -0700, Eric Dumazet wrote:> On Tue, 2013-10-29 at 16:24 -0700, Eric Dumazet wrote: > > On Tue, 2013-10-29 at 18:46 +0000, Joby Poriyath wrote: > > > On Tue, Oct 29, 2013 at 08:43:50AM -0700, Eric Dumazet wrote: > > > > On Tue, 2013-10-29 at 15:27 +0000, Joby Poriyath wrote: > > > > > This will reduce memory pressure when allocating struct xenvif. > > > > > > > > > > The size of xenvif struct has increased from 168 to 36632 bytes (on x86-32). > > > > > See commit b3f980bd827e6e81a050c518d60ed7811a83061d. This resulted in > > > > > occasional netdev allocation failure in dom0 with 752MiB RAM, due to > > > > > fragmented memory. > > > > > > > > This looks overkill. > > > > > > > > Replacing a single allocation of ~36 KB into 5 vmalloc() looks like you > > > > did not really tried other things... > > > > > > > > This should be done generically in alloc_netdev_mqs() > > > > > > Sorry Eric, I didn''t quite understand how this can be generically done. > > > > > > The netback interfaces are tied to the Xen guests (VMs) and these are created > > > when guests are started and deleted when guest are halted. > > > > They are created by alloc_netdev_mqs() > > Something like the following should be fine. > > >Thanks for the patch.> diff --git a/net/core/dev.c b/net/core/dev.c > index 0054c8c..874a57a 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -6239,7 +6239,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, > /* ensure 32-byte alignment of whole construct */ > alloc_size += NETDEV_ALIGN - 1; > > - p = kzalloc(alloc_size, GFP_KERNEL); > + p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); > + if (!p) > + p = vzalloc(alloc_size); > if (!p) > return NULL; >The net_device allocation rule {linux/Documentation/networking/netdevices.txt} states that net_device struct must be allocated using kmalloc. Is this safe to do?> @@ -6302,7 +6304,10 @@ free_pcpu: > #endif > > free_p: > - kfree(p); > + if (is_vmalloc_addr(p)) > + vfree(p); > + else > + kfree(p); > return NULL; > } > EXPORT_SYMBOL(alloc_netdev_mqs); > @@ -6339,7 +6344,12 @@ void free_netdev(struct net_device *dev) > > /* Compatibility with error handling in drivers */ > if (dev->reg_state == NETREG_UNINITIALIZED) { > - kfree((char *)dev - dev->padded); > + char *addr = (char *)dev - dev->padded; > + > + if (is_vmalloc_addr(addr)) > + vfree(addr); > + else > + kfree(addr); > return; > } > > diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c > index d954b56..406c54b 100644 > --- a/net/core/net-sysfs.c > +++ b/net/core/net-sysfs.c > @@ -1259,11 +1259,16 @@ exit: > static void netdev_release(struct device *d) > { > struct net_device *dev = to_net_dev(d); > + char *addr; > > BUG_ON(dev->reg_state != NETREG_RELEASED); > > kfree(dev->ifalias); > - kfree((char *)dev - dev->padded); > + addr = (char *)dev - dev->padded; > + if (is_vmalloc_addr(addr)) > + vfree(addr); > + else > + kfree(addr); > } > > static const void *net_namespace(struct device *d) > >Thanks, Joby
Eric Dumazet
2013-Oct-30 13:54 UTC
Re: [PATCH net-next] xen-netback: allocate xenvif arrays using vzalloc.
On Wed, 2013-10-30 at 10:39 +0000, Joby Poriyath wrote:> The net_device allocation rule {linux/Documentation/networking/netdevices.txt} states > that net_device struct must be allocated using kmalloc. > > Is this safe to do?As long as the freeing path is aware of the possibility the memory is either allocated with vmalloc() or kmalloc(), we are safe.
Eric Dumazet
2013-Oct-30 15:01 UTC
[PATCH net-next] net: extend net_device allocation to vmalloc()
From: Eric Dumazet <edumazet@google.com> Joby Poriyath provided a xen-netback patch to reduce the size of xenvif structure as some netdev allocation could fail under memory pressure/fragmentation. This patch is handling the problem at the core level, allowing any netdev structures to use vmalloc() if kmalloc() failed. As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT to kzalloc() flags to do this fallback only when really needed. Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Joby Poriyath <joby.poriyath@citrix.com> --- Documentation/networking/netdevices.txt | 7 ++++--- include/linux/netdevice.h | 1 + net/core/dev.c | 22 +++++++++++++++++----- net/core/net-sysfs.c | 2 +- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index c7ecc70..df7cd19 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -10,9 +10,10 @@ network devices. struct net_device allocation rules ================================= Network device structures need to persist even after module is unloaded and -must be allocated with kmalloc. If device has registered successfully, -it will be freed on last use by free_netdev. This is required to handle the -pathologic case cleanly (example: rmmod mydriver </sys/class/net/myeth/mtu ) +must be allocated with kmalloc() or vmalloc(). If device has registered +successfully, it will be freed on last use by free_netdev. +This is required to handle the pathologic case cleanly +(example: rmmod mydriver </sys/class/net/myeth/mtu ) There are routines in net_init.c to handle the common cases of alloc_etherdev, alloc_netdev. These reserve extra space for driver diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb1d918..e6353ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1800,6 +1800,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); +void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 0054c8c..0e61365 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6196,6 +6196,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); +void netdev_freemem(struct net_device *dev) +{ + char *addr = (char *)dev - dev->padded; + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6239,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL); + p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!p) + p = vzalloc(alloc_size); if (!p) return NULL; @@ -6248,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) - goto free_p; + goto free_dev; if (dev_addr_init(dev)) goto free_pcpu; @@ -6301,8 +6313,8 @@ free_pcpu: kfree(dev->_rx); #endif -free_p: - kfree(p); +free_dev: + netdev_freemem(dev); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -6339,7 +6351,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); + netdev_freemem(dev); return; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d954b56..d03f2c9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); kfree(dev->ifalias); - kfree((char *)dev - dev->padded); + netdev_freemem(dev); } static const void *net_namespace(struct device *d)
Ben Hutchings
2013-Oct-30 19:16 UTC
Re: [PATCH net-next] net: extend net_device allocation to vmalloc()
On Wed, 2013-10-30 at 08:01 -0700, Eric Dumazet wrote: [...]> --- a/Documentation/networking/netdevices.txt > +++ b/Documentation/networking/netdevices.txt > @@ -10,9 +10,10 @@ network devices. > struct net_device allocation rules > =================================> Network device structures need to persist even after module is unloaded and > -must be allocated with kmalloc. If device has registered successfully, > -it will be freed on last use by free_netdev. This is required to handle the > -pathologic case cleanly (example: rmmod mydriver </sys/class/net/myeth/mtu ) > +must be allocated with kmalloc() or vmalloc(). If device has registered[...] Well, drivers must never try to allocate and initialise struct net_device from scratch. They always call alloc_netdev_mqs() or one of its many wrappers. So ''must be allocated with kmalloc'' was already wrong and this sentence should be removed completely. Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that''s the marketing department''s job. They asked us to note that Solarflare product names are trademarked.
Eric Dumazet
2013-Oct-30 20:09 UTC
Re: [PATCH net-next] net: extend net_device allocation to vmalloc()
On Wed, 2013-10-30 at 19:16 +0000, Ben Hutchings wrote:> , drivers must never try to allocate and initialise struct > net_device from scratch. They always call alloc_netdev_mqs() or one of > its many wrappers. So ''must be allocated with kmalloc'' was already > wrong and this sentence should be removed completely.Right, I''ll send a v2.
Eric Dumazet
2013-Oct-30 20:10 UTC
[PATCH v2 net-next] net: extend net_device allocation to vmalloc()
From: Eric Dumazet <edumazet@google.com> Joby Poriyath provided a xen-netback patch to reduce the size of xenvif structure as some netdev allocation could fail under memory pressure/fragmentation. This patch is handling the problem at the core level, allowing any netdev structures to use vmalloc() if kmalloc() failed. As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT to kzalloc() flags to do this fallback only when really needed. Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Joby Poriyath <joby.poriyath@citrix.com> Cc: Ben Hutchings <bhutchings@solarflare.com> --- v2: change the Documentation to point to alloc_netdev_mqs() instead of kmalloc()/vzalloc(), from Ben Documentation/networking/netdevices.txt | 10 +++++----- include/linux/netdevice.h | 1 + net/core/dev.c | 22 +++++++++++++++++----- net/core/net-sysfs.c | 2 +- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index c7ecc70..7d8d79f 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -10,12 +10,12 @@ network devices. struct net_device allocation rules ================================= Network device structures need to persist even after module is unloaded and -must be allocated with kmalloc. If device has registered successfully, -it will be freed on last use by free_netdev. This is required to handle the -pathologic case cleanly (example: rmmod mydriver </sys/class/net/myeth/mtu ) +must be allocated with alloc_netdev_mqs() and friends. +If device has registered successfully, it will be freed on last use +by free_netdev(). This is required to handle the pathologic case cleanly +(example: rmmod mydriver </sys/class/net/myeth/mtu ) -There are routines in net_init.c to handle the common cases of -alloc_etherdev, alloc_netdev. These reserve extra space for driver +alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver private data which gets freed when the network device is freed. If separately allocated data is attached to the network device (netdev_priv(dev)) then it is up to the module exit handler to free that. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb1d918..e6353ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1800,6 +1800,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); +void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 0054c8c..0e61365 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6196,6 +6196,16 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); +void netdev_freemem(struct net_device *dev) +{ + char *addr = (char *)dev - dev->padded; + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -6239,7 +6249,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kzalloc(alloc_size, GFP_KERNEL); + p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!p) + p = vzalloc(alloc_size); if (!p) return NULL; @@ -6248,7 +6260,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) - goto free_p; + goto free_dev; if (dev_addr_init(dev)) goto free_pcpu; @@ -6301,8 +6313,8 @@ free_pcpu: kfree(dev->_rx); #endif -free_p: - kfree(p); +free_dev: + netdev_freemem(dev); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -6339,7 +6351,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { - kfree((char *)dev - dev->padded); + netdev_freemem(dev); return; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d954b56..d03f2c9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1263,7 +1263,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); kfree(dev->ifalias); - kfree((char *)dev - dev->padded); + netdev_freemem(dev); } static const void *net_namespace(struct device *d)
David Miller
2013-Nov-04 04:19 UTC
Re: [PATCH v2 net-next] net: extend net_device allocation to vmalloc()
From: Eric Dumazet <eric.dumazet@gmail.com> Date: Wed, 30 Oct 2013 13:10:44 -0700> From: Eric Dumazet <edumazet@google.com> > > Joby Poriyath provided a xen-netback patch to reduce the size of > xenvif structure as some netdev allocation could fail under > memory pressure/fragmentation. > > This patch is handling the problem at the core level, allowing > any netdev structures to use vmalloc() if kmalloc() failed. > > As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT > to kzalloc() flags to do this fallback only when really needed. > > Signed-off-by: Eric Dumazet <edumazet@google.com> > Reported-by: Joby Poriyath <joby.poriyath@citrix.com> > Cc: Ben Hutchings <bhutchings@solarflare.com> > --- > v2: change the Documentation to point to alloc_netdev_mqs() > instead of kmalloc()/vzalloc(), from BenLooks good, applied, thanks Eric.