Kieran Mansley
2007-Jun-15 10:46 UTC
[Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
Frontend net driver acceleration diff -r cd3ade350f3f drivers/xen/netfront/netfront.c --- a/drivers/xen/netfront/netfront.c Thu Jun 14 15:04:32 2007 +0100 +++ b/drivers/xen/netfront/netfront.c Fri Jun 15 09:34:41 2007 +0100 @@ -3,6 +3,7 @@ * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -47,6 +48,7 @@ #include <linux/if_ether.h> #include <linux/io.h> #include <linux/moduleparam.h> +#include <linux/list.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/arp.h> @@ -73,6 +75,8 @@ struct netfront_cb { }; #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) + +#include "netfront.h" /* * Mutually-exclusive module options to select receive data path: @@ -144,57 +148,6 @@ static inline int netif_needs_gso(struct #define GRANT_INVALID_REF 0 -#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) - -struct netfront_info { - struct list_head list; - struct net_device *netdev; - - struct net_device_stats stats; - - struct netif_tx_front_ring tx; - struct netif_rx_front_ring rx; - - spinlock_t tx_lock; - spinlock_t rx_lock; - - unsigned int irq; - unsigned int copying_receiver; - unsigned int carrier; - - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 8 -#define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - unsigned rx_min_target, rx_max_target, rx_target; - struct sk_buff_head rx_batch; - - struct timer_list rx_refill_timer; - - /* - * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs - * is an index into a chain of free entries. - */ - struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; - -#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - grant_ref_t gref_tx_head; - grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; - grant_ref_t gref_rx_head; - grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; - - struct xenbus_device *xbdev; - int tx_ring_ref; - int rx_ring_ref; - u8 mac[ETH_ALEN]; - - unsigned long rx_pfn_array[NET_RX_RING_SIZE]; - struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; - struct mmu_update rx_mmu[NET_RX_RING_SIZE]; -}; - struct netfront_rx_info { struct netif_rx_response rx; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; @@ -278,6 +231,369 @@ static void xennet_sysfs_delif(struct ne #define xennet_sysfs_delif(dev) do { } while(0) #endif +/* + * List of all netfront accelerator plugin modules available. Each + * list entry is of type struct netfront_accelerator. + */ +static struct list_head accelerators_list; +/* + * Lock to protect access to accelerators_list, and also used to + * protect the hooks_usecount field in struct netfront_accelerator + * against concurrent access + */ +static spinlock_t accelerators_lock; + +/* + * Safely remove the accelerator function hooks from a netfront state. + * Must only be called when there are no current users of the hooks. + */ +static void accelerator_remove_hooks(struct netfront_accelerator *accelerator) +{ + struct netfront_accel_vif_state *vif_state; + + list_for_each_entry( vif_state, + &accelerator->vif_states, + link ) { + /* Make sure there are no data path operations going on */ + netif_poll_disable(vif_state->np->netdev); + netif_tx_lock_bh(vif_state->np->netdev); + + /* + * Remove the hooks, but leave the vif_state on the + * accelerator''s list as that signifies this vif is + * interested in using that accelerator if it becomes + * available again + */ + vif_state->hooks = NULL; + + netif_tx_unlock_bh(vif_state->np->netdev); + netif_poll_enable(vif_state->np->netdev); + } + + accelerator->hooks = NULL; + + /* Signal that all users of hooks are done */ + up(&accelerator->exit_semaphore); +} + + +/* + * Compare a frontend description string against an accelerator to see + * if they match. Would ultimately be nice to replace the string with + * a unique numeric identifier for each accelerator. + */ +static int match_accelerator(const char *frontend, + struct netfront_accelerator *accelerator) +{ + return strcmp(frontend, accelerator->frontend) == 0; +} + + +/* + * Add a frontend vif to the list of vifs that is using a netfront + * accelerator plugin module. + */ +static void add_accelerator_vif(struct netfront_accelerator *accelerator, + struct netfront_info *np, + struct xenbus_device *dev) +{ + np->accelerator = accelerator; + np->accel_vif_state.np = np; + np->accel_vif_state.dev = dev; + + list_add(&np->accel_vif_state.link, &accelerator->vif_states); +} + +/* + * Initialise the netfront state of an accelerator plugin module. + */ +static int init_accelerator(const char *frontend, + struct netfront_accelerator **result) +{ + struct netfront_accelerator *accelerator = + kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL); + int frontend_len; + + if ( !accelerator ) { + DPRINTK("%s: no memory for accelerator", __FUNCTION__); + return -ENOMEM; + } + + frontend_len = strlen(frontend) + 1; + accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL); + if ( !accelerator->frontend ) { + DPRINTK("%s: no memory for accelerator", __FUNCTION__); + kfree(accelerator); + return -ENOMEM; + } + strlcpy(accelerator->frontend, frontend, frontend_len); + + INIT_LIST_HEAD(&accelerator->vif_states); + + accelerator->hooks = NULL; + accelerator->hooks_usecount = 0; + + list_add(&accelerator->link, &accelerators_list); + + *result = accelerator; + + return 0; +} + +/* + * Modify the hooks stored in the per-vif state to match that in the + * netfront accelerator''s state. + */ +static void +accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state) +{ + /* Make sure there are no data path operations going on */ + netif_poll_disable(vif_state->np->netdev); + netif_tx_lock_bh(vif_state->np->netdev); + + vif_state->hooks = vif_state->np->accelerator->hooks; + + netif_tx_unlock_bh(vif_state->np->netdev); + netif_poll_enable(vif_state->np->netdev); +} + + +/* + * Request that a particular netfront accelerator plugin is loaded. + * Usually called as a result of the vif configuration specifying + * which one to use. + */ +static int netfront_load_accelerator(struct netfront_info *np, + struct xenbus_device *dev, + const char *frontend) +{ + struct netfront_accelerator *accelerator; + int rc; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + /* + * Look at list of loaded accelerators to see if the requested + * one is already there + */ + list_for_each_entry( accelerator, &accelerators_list, link ) { + if ( match_accelerator(frontend, accelerator) ) { + /* + * Include this frontend device on the + * accelerator''s list + */ + add_accelerator_vif(accelerator, np, dev); + + ++accelerator->hooks_usecount; + + if ( accelerator->hooks == NULL ) + DPRINTK("%s: no hooks set", __FUNCTION__); + else { + spin_unlock_irqrestore(&accelerators_lock, flags); + accelerator->hooks->new_device(np->netdev, dev); + spin_lock_irqsave(&accelerators_lock, flags); + } + + if ( (--accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + + /* + * Hooks will get linked into vif_state by a + * future call by the accelerator to + * netfront_accelerator_ready() + */ + + return 0; + } + } + + /* Couldn''t find it, so create a new one and load the module */ + if ( ( rc = init_accelerator(frontend, &accelerator) ) < 0 ) { + spin_unlock_irqrestore(&accelerators_lock, flags); + return rc; + } + + /* Include this frontend device on the accelerator''s list */ + add_accelerator_vif(accelerator, np, dev); + + spin_unlock_irqrestore(&accelerators_lock, flags); + + DPRINTK("%s: loading module %s\n", __FUNCTION__, frontend); + + /* load module */ + request_module("%s", frontend); + + /* + * Module should now call netfront_accelerator_loaded() once + * it''s up and running, and we can continue from there + */ + + return 0; +} + +/* + * Go through all the netfront vifs and see if they have requested + * this accelerator. Notify the accelerator plugin of the relevant + * device if so. Called when an accelerator plugin module is first + * loaded and connects to netfront. + */ +static void +accelerator_probe_vifs_on_load(struct netfront_accelerator *accelerator) +{ + struct netfront_accel_vif_state *accel_vif_state; + + DPRINTK("%s: %p\n", __FUNCTION__, accelerator); + + list_for_each_entry( accel_vif_state, + &accelerator->vif_states, link ) { + struct netfront_info *np = accel_vif_state->np; + + accelerator->hooks->new_device(np->netdev, + accel_vif_state->dev); + + /* + * Hooks will get linked into vif_state by a call to + * netfront_accelerator_ready() once accelerator + * plugin is ready for action + */ + } +} + + +/* + * Called by the netfront accelerator plugin module when it has loaded + */ +int netfront_accelerator_loaded(const char *frontend, + struct netfront_accel_hooks *hooks) +{ + struct netfront_accelerator *accelerator; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + /* Look through list of accelerators to see if it has already + been requested */ + list_for_each_entry( accelerator, &accelerators_list, link ) { + if ( match_accelerator(frontend, accelerator) ) { + /* + * Deliberate double inc of usecount here - + * one to initialise it to 1 now hooks is + * being set (which persists until unloaded), + * and one for the use of hooks in this + * function (we don''t want an unload to + * succeed in clearing hooks in the middle) + */ + BUG_ON(accelerator->hooks != NULL || + accelerator->hooks_usecount != 0); + accelerator->hooks_usecount = 2; + + accelerator->hooks = hooks; + + spin_unlock_irqrestore(&accelerators_lock, flags); + + accelerator_probe_vifs_on_load(accelerator); + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + + return 0; + } + } + + /* If it wasn''t in the list, add it now so that when it is + requested the caller will find it */ + DPRINTK("%s: Couldn''t find matching accelerator (%s)\n", + __FUNCTION__, frontend); + + init_accelerator(frontend, &accelerator); + + spin_unlock_irqrestore(&accelerators_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(netfront_accelerator_loaded); + + +/* + * Called by the accelerator module after it has been probed with a + * network device to say that it is ready to start accelerating + * traffic on that device + */ +void netfront_accelerator_ready(const char *frontend, + struct xenbus_device *dev) +{ + struct netfront_accelerator *accelerator; + struct netfront_accel_vif_state *accel_vif_state; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + list_for_each_entry( accelerator, &accelerators_list, link ) { + if ( match_accelerator(frontend, accelerator) ) { + ++accelerator->hooks_usecount; + spin_unlock_irqrestore(&accelerators_lock, flags); + + list_for_each_entry( accel_vif_state, + &accelerator->vif_states, link ) { + if ( accel_vif_state->dev == dev ) + accelerator_set_vif_state_hooks + (accel_vif_state); + } + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + + return; + } + } + + spin_unlock_irqrestore(&accelerators_lock, flags); +} +EXPORT_SYMBOL_GPL(netfront_accelerator_ready); + + +/* + * Called by a netfront accelerator when it is unloaded. This safely + * removes the hooks into the plugin and blocks until all devices have + * finished using it, so on return it is safe to unload. + */ +void netfront_accelerator_unloaded(const char *frontend) +{ + struct netfront_accelerator *accelerator; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + list_for_each_entry( accelerator, &accelerators_list, link ) { + if ( match_accelerator(frontend, accelerator) ) { + /* + * Use semaphore to ensure we know when all + * uses of hooks are complete + */ + sema_init(&accelerator->exit_semaphore, 0); + + if ( (--accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(accelerator); + + spin_unlock_irqrestore(&accelerators_lock, flags); + + /* Wait for hooks to be unused, then return */ + down(&accelerator->exit_semaphore); + + return; + } + } + spin_unlock_irqrestore(&accelerators_lock, flags); +} +EXPORT_SYMBOL_GPL(netfront_accelerator_unloaded); + + static inline int xennet_can_sg(struct net_device *dev) { return dev->features & NETIF_F_SG; @@ -331,8 +647,29 @@ static int __devexit netfront_remove(str static int __devexit netfront_remove(struct xenbus_device *dev) { struct netfront_info *info = dev->dev.driver_data; + unsigned flags; DPRINTK("%s\n", dev->nodename); + + /* + * Call the remove accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of the + * call to prevent the accelerator being able to modify the + * hooks in the middle (by, for example, unloading) + */ + if ( info->accel_vif_state.hooks ) { + spin_lock_irqsave(&accelerators_lock, flags); + ++info->accelerator->hooks_usecount; + spin_unlock_irqrestore(&accelerators_lock, flags); + + if ( info->accel_vif_state.hooks ) + info->accel_vif_state.hooks->remove(dev); + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--info->accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(info->accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + } netif_disconnect_backend(info); @@ -356,8 +693,29 @@ static int netfront_resume(struct xenbus static int netfront_resume(struct xenbus_device *dev) { struct netfront_info *info = dev->dev.driver_data; + unsigned flags; DPRINTK("%s\n", dev->nodename); + + /* + * Call the resume accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of + * the call to prevent the accelerator being able to modify + * the * hooks in the middle (by, for example, unloading) + */ + if ( info->accel_vif_state.hooks ) { + spin_lock_irqsave(&accelerators_lock, flags); + ++info->accelerator->hooks_usecount; + spin_unlock_irqrestore(&accelerators_lock, flags); + + if ( info->accel_vif_state.hooks ) + info->accel_vif_state.hooks->resume(dev); + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--info->accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(info->accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + } netif_disconnect_backend(info); return 0; @@ -553,6 +911,7 @@ static void backend_changed(struct xenbu { struct netfront_info *np = dev->dev.driver_data; struct net_device *netdev = np->netdev; + unsigned flags; DPRINTK("%s\n", xenbus_strstate(backend_state)); @@ -577,6 +936,27 @@ static void backend_changed(struct xenbu xenbus_frontend_closed(dev); break; } + + /* + * Call the backend_changed accelerator hook. The use count + * for the accelerator''s hooks is incremented for the duration + * of the call to prevent the accelerator being able to modify + * the hooks in the middle (by, for example, unloading) + */ + if ( np->accel_vif_state.hooks ) { + spin_lock_irqsave(&accelerators_lock, flags); + ++np->accelerator->hooks_usecount; + spin_unlock_irqrestore(&accelerators_lock, flags); + + if ( np->accel_vif_state.hooks ) + np->accel_vif_state.hooks->backend_changed + (dev, backend_state); + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--np->accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(np->accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + } } /** Send a packet on a net device to encourage switches to learn the @@ -613,19 +993,65 @@ static inline int netfront_tx_slot_avail (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); } + +static inline +int netfront_check_accelerator_queue_busy(struct net_device *dev, + struct netfront_info *np) +{ + int rc = 1; + unsigned flags; + + /* + * Call the check busy accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of the + * call to prevent the accelerator being able to modify the + * hooks in the middle (by, for example, unloading) + */ + if ( np->accel_vif_state.hooks ) { + spin_lock_irqsave(&accelerators_lock, flags); + ++(np->accelerator->hooks_usecount); + spin_unlock_irqrestore(&accelerators_lock, flags); + + if ( np->accel_vif_state.hooks ) + rc = np->accel_vif_state.hooks->check_busy(dev); + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--(np->accelerator->hooks_usecount) == 0 ) ) + accelerator_remove_hooks(np->accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + } + + return rc; +} + + static inline void network_maybe_wake_tx(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); if (unlikely(netif_queue_stopped(dev)) && netfront_tx_slot_available(np) && - likely(netif_running(dev))) + likely(netif_running(dev)) && + netfront_check_accelerator_queue_busy(dev, np)) netif_wake_queue(dev); } + +int netfront_check_queue_busy(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + + return unlikely(netif_queue_stopped(dev)) && + netfront_tx_slot_available(np) && + likely(netif_running(dev)); +} +EXPORT_SYMBOL(netfront_check_queue_busy); + + static int network_open(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); + unsigned flags; memset(&np->stats, 0, sizeof(np->stats)); @@ -633,8 +1059,34 @@ static int network_open(struct net_devic if (netfront_carrier_ok(np)) { network_alloc_rx_buffers(dev); np->rx.sring->rsp_event = np->rx.rsp_cons + 1; - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ + /* + * Call the stop_napi_interrupts accelerator + * hook. The use count for the accelerator''s + * hooks is incremented for the duration of + * the call to prevent the accelerator being + * able to modify the hooks in the middle (by, + * for example, unloading) + */ + if ( np->accel_vif_state.hooks ) { + spin_lock_irqsave(&accelerators_lock, flags); + ++np->accelerator->hooks_usecount; + spin_unlock_irqrestore(&accelerators_lock, + flags); + + if ( np->accel_vif_state.hooks ) + np->accel_vif_state.hooks->stop_napi_interrupts(dev); + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--np->accelerator->hooks_usecount)==0 ) + accelerator_remove_hooks + (np->accelerator); + spin_unlock_irqrestore(&accelerators_lock, + flags); + } + netif_rx_schedule(dev); + } } spin_unlock_bh(&np->rx_lock); @@ -702,6 +1154,30 @@ static void rx_refill_timeout(unsigned l static void rx_refill_timeout(unsigned long data) { struct net_device *dev = (struct net_device *)data; + struct netfront_info *np = netdev_priv(dev); + unsigned flags; + + /* + * Call the stop_napi_interrupts accelerator hook. The use + * count for the accelerator''s hooks is incremented for the + * duration of the call to prevent the accelerator being able + * to modify the hooks in the middle (by, for example, + * unloading) + */ + if ( np->accel_vif_state.hooks ) { + spin_lock_irqsave(&accelerators_lock, flags); + ++np->accelerator->hooks_usecount; + spin_unlock_irqrestore(&accelerators_lock, flags); + + if ( np->accel_vif_state.hooks ) + np->accel_vif_state.hooks->stop_napi_interrupts(dev); + + spin_lock_irqsave(&accelerators_lock, flags); + if ( (--np->accelerator->hooks_usecount) == 0 ) + accelerator_remove_hooks(np->accelerator); + spin_unlock_irqrestore(&accelerators_lock, flags); + } + netif_rx_schedule(dev); } @@ -941,6 +1417,12 @@ static int network_start_xmit(struct sk_ unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); + /* Check the fast path, if hooks are available */ + if ( np->accel_vif_state.hooks && + np->accel_vif_state.hooks->start_xmit(skb, dev) ) { + return 0; + } + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; if (unlikely(frags > MAX_SKB_FRAGS + 1)) { printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", @@ -1037,15 +1519,41 @@ static irqreturn_t netif_int(int irq, vo { struct net_device *dev = dev_id; struct netfront_info *np = netdev_priv(dev); - unsigned long flags; + unsigned long flags, flags1; spin_lock_irqsave(&np->tx_lock, flags); if (likely(netfront_carrier_ok(np))) { network_tx_buf_gc(dev); /* Under tx_lock: protects access to rx shared-ring indexes. */ - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { + /* + * Call the stop_napi_interrupts accelerator + * hook. The use count for the accelerator''s + * hooks is incremented for the duration of + * the call to prevent the accelerator being + * able to modify the hooks in the middle (by, + * for example, unloading) + */ + if ( np->accel_vif_state.hooks ) { + spin_lock_irqsave(&accelerators_lock, flags1); + ++np->accelerator->hooks_usecount; + spin_unlock_irqrestore(&accelerators_lock, + flags1); + + if ( np->accel_vif_state.hooks ) + np->accel_vif_state.hooks->stop_napi_interrupts(dev); + + spin_lock_irqsave(&accelerators_lock, flags1); + if ( (--np->accelerator->hooks_usecount)==0 ) + accelerator_remove_hooks + (np->accelerator); + spin_unlock_irqrestore(&accelerators_lock, + flags1); + } + netif_rx_schedule(dev); + } } spin_unlock_irqrestore(&np->tx_lock, flags); @@ -1305,7 +1813,7 @@ static int netif_poll(struct net_device struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct multicall_entry *mcl; - int work_done, budget, more_to_do = 1; + int work_done, budget, more_to_do = 1, accel_more_to_do = 1; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; @@ -1472,6 +1980,20 @@ err: network_alloc_rx_buffers(dev); + if (work_done < budget) { + /* there''s some spare capacity, try the accelerated path */ + int accel_budget = budget - work_done; + int accel_budget_start = accel_budget; + + if ( np->accel_vif_state.hooks ) { + accel_more_to_do = + np->accel_vif_state.hooks->netdev_poll + (dev, &accel_budget); + work_done += (accel_budget_start - accel_budget); + } else + accel_more_to_do = 0; + } + *pbudget -= work_done; dev->quota -= work_done; @@ -1479,15 +2001,28 @@ err: local_irq_save(flags); RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); - if (!more_to_do) + + if (!more_to_do && !accel_more_to_do) { + /* + * Slow path has nothing more to do, see if + * fast path is likewise + */ + if ( np->accel_vif_state.hooks ) { + accel_more_to_do = + np->accel_vif_state.hooks->start_napi_interrupts(dev); + } + } + + if (!more_to_do && !accel_more_to_do) { __netif_rx_complete(dev); + } local_irq_restore(flags); } spin_unlock(&np->rx_lock); - - return more_to_do; + + return more_to_do | accel_more_to_do; } static void netif_release_tx_bufs(struct netfront_info *np) @@ -1687,7 +2222,9 @@ static int network_connect(struct net_de struct sk_buff *skb; grant_ref_t ref; netif_rx_request_t *req; - unsigned int feature_rx_copy, feature_rx_flip; + unsigned int feature_rx_copy, feature_rx_flip, feature_accel; + char *accel_frontend; + int accel_len; err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-rx-copy", "%u", &feature_rx_copy); @@ -1698,6 +2235,13 @@ static int network_connect(struct net_de if (err != 1) feature_rx_flip = 1; + feature_accel = 1; + accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend, + "accel", &accel_len); + if ( IS_ERR(accel_frontend) ) { + feature_accel = 0; + } + /* * Copy packets on receive path if: * (a) This was requested by user, and the backend supports it; or @@ -1709,6 +2253,11 @@ static int network_connect(struct net_de err = talk_to_backend(np->xbdev, np); if (err) return err; + + if ( feature_accel ) { + netfront_load_accelerator(np, np->xbdev, accel_frontend); + kfree(accel_frontend); + } xennet_set_features(dev); @@ -1955,6 +2504,7 @@ static struct net_device * __devinit cre spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); + np->accel_vif_state.hooks = NULL; skb_queue_head_init(&np->rx_batch); np->rx_target = RX_DFL_MIN_TARGET; @@ -2110,6 +2660,9 @@ static int __init netif_init(void) if (is_initial_xendomain()) return 0; + INIT_LIST_HEAD(&accelerators_list); + spin_lock_init(&accelerators_lock); + IPRINTK("Initialising virtual ethernet driver.\n"); (void)register_inetaddr_notifier(¬ifier_inetdev); diff -r cd3ade350f3f drivers/xen/netfront/netfront.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/netfront/netfront.h Thu Jun 14 14:57:34 2007 +0100 @@ -0,0 +1,198 @@ +/****************************************************************************** + * Virtual network driver for conversing with remote driver backends. + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005, XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NETFRONT_H +#define NETFRONT_H + +#include <xen/interface/io/netif.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/list.h> + +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) + +#include <xen/xenbus.h> +/* Function pointer table for hooks into a network acceleration + plugin. These are called at appropriate points from the netfront + driver */ +struct netfront_accel_hooks { + /* new_device: The plugin is asked to support a new network interface */ + int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev); + /* suspend, resume, remove: Equivalent to the normal xenbus_* callbacks */ + int (*suspend)(struct xenbus_device *dev); + int (*resume)(struct xenbus_device *dev); + int (*remove)(struct xenbus_device *dev); + /* backend_changed: Callback from watch based on backend''s + xenbus state changing */ + void (*backend_changed)(struct xenbus_device *dev, + enum xenbus_state backend_state); + /* The net_device is being polled, check the accelerated + hardware for any pending packets */ + int (*netdev_poll)(struct net_device *dev, int *pbudget); + /* start_xmit: Used to give the accelerated plugin the option + of sending a packet. Returns non-zero if has done so, or + zero to decline and force the packet onto normal send path */ + int (*start_xmit)(struct sk_buff *skb, struct net_device *dev); + /* start/stop_napi_interrupts Used by netfront to indicate + when napi interrupts should be enabled or disabled */ + int (*start_napi_interrupts)(struct net_device *dev); + void (*stop_napi_interrupts)(struct net_device *dev); + /* Called before re-enabling the TX queue to check the fast + path has slots too */ + int (*check_busy)(struct net_device *dev); +}; + +/* Per-netfront device state for the accelerator. This is used to + allow efficient per-netfront device access to the accelerator hooks */ +struct netfront_accel_vif_state { + struct list_head link; + + struct xenbus_device *dev; + struct netfront_info *np; + struct netfront_accel_hooks *hooks; +}; + +/* Per-accelerator state stored in netfront. These form a list that + is used to track which devices are accelerated by which plugins, + and what plugins are available/have been requested */ +struct netfront_accelerator { + /* Used to make a list */ + struct list_head link; + /* ID of the accelerator */ + int id; + /* String describing the accelerator. Currently this is the + name of the accelerator module. This is provided by the + backend accelerator through xenstore */ + char *frontend; + /* The hooks into the accelerator plugin module */ + struct netfront_accel_hooks *hooks; + /* Protect against removal of hooks while in use, must hold + accelerators_lock to change */ + unsigned hooks_usecount; + /* List of per-netfront device state (struct netfront_accel_vif_state) + for each netfront device that is using this accelerator */ + struct list_head vif_states; + /* Semaphore to signal that all users of this accelerator have + finished using it before module is unloaded */ + struct semaphore exit_semaphore; +}; + + +struct netfront_info { + struct list_head list; + struct net_device *netdev; + + struct net_device_stats stats; + + struct netif_tx_front_ring tx; + struct netif_rx_front_ring rx; + + spinlock_t tx_lock; + spinlock_t rx_lock; + + unsigned int irq; + unsigned int copying_receiver; + unsigned int carrier; + + /* Receive-ring batched refills. */ +#define RX_MIN_TARGET 8 +#define RX_DFL_MIN_TARGET 64 +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + unsigned rx_min_target, rx_max_target, rx_target; + struct sk_buff_head rx_batch; + + struct timer_list rx_refill_timer; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs + * is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; + +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; + grant_ref_t gref_rx_head; + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + + struct xenbus_device *xbdev; + int tx_ring_ref; + int rx_ring_ref; + u8 mac[ETH_ALEN]; + + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; + + /* Private pointer to state internal to accelerator module */ + void *accel_priv; + /* The (list of) accelerator(s) used by this netfront device */ + struct netfront_accelerator *accelerator; + /* The accelerator state for this netfront device */ + struct netfront_accel_vif_state accel_vif_state; +}; + + +/* Called by an accelerator plugin module when it has loaded. + * + * frontend: the string describing the accelerator, currently the module name + * hooks: the hooks for netfront to use to call into the accelerator + */ +extern int netfront_accelerator_loaded(const char *frontend, + struct netfront_accel_hooks *hooks); + +/* Called when an accelerator plugin is ready to accelerate a device * + * that has been passed to it from netfront using the "new_device" + * hook. + * + * frontend: the string describing the accelerator. Must match the + * one passed to netfront_accelerator_loaded() + * dev: the xenbus device the plugin was asked to accelerate + */ +extern void netfront_accelerator_ready(const char *frontend, + struct xenbus_device *dev); + +/* Called by an accelerator plugin module when it is about to unload. + * + * frontend: the string describing the accelerator. Must match the + * one passed to netfront_accelerator_loaded() + */ +extern void netfront_accelerator_unloaded(const char *frontend); + +/* Called by an accelerator before waking the net device''s TX queue to + * ensure the slow path has available slots. Returns true if OK to + * wake, false if still busy + */ +extern int netfront_check_queue_busy(struct net_device *net_dev); + +#endif /* NETFRONT_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Zhu Han
2007-Jun-15 15:59 UTC
Re: [Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
Hi, Kieran, I''m just wonder why you try to acquire the lock and increase the hooks_usecount each time when you use the hook routine. Is there any generic ways to synchronze the code path using hook routines and netfront_accelerator_unloaded, considering you can synchronize the tx/rx data path easily. On 6/15/07, Kieran Mansley <kmansley@solarflare.com> wrote:> Frontend net driver acceleration > > diff -r cd3ade350f3f drivers/xen/netfront/netfront.c > --- a/drivers/xen/netfront/netfront.c Thu Jun 14 15:04:32 2007 +0100 > +++ b/drivers/xen/netfront/netfront.c Fri Jun 15 09:34:41 2007 +0100 > @@ -3,6 +3,7 @@ > * > * Copyright (c) 2002-2005, K A Fraser > * Copyright (c) 2005, XenSource Ltd > + * Copyright (C) 2007 Solarflare Communications, Inc. > * > * This program is free software; you can redistribute it and/or > * modify it under the terms of the GNU General Public License version 2 > @@ -47,6 +48,7 @@ > #include <linux/if_ether.h> > #include <linux/io.h> > #include <linux/moduleparam.h> > +#include <linux/list.h> > #include <net/sock.h> > #include <net/pkt_sched.h> > #include <net/arp.h> > @@ -73,6 +75,8 @@ struct netfront_cb { > }; > > #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) > + > +#include "netfront.h" > > /* > * Mutually-exclusive module options to select receive data path: > @@ -144,57 +148,6 @@ static inline int netif_needs_gso(struct > > #define GRANT_INVALID_REF 0 > > -#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) > -#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) > - > -struct netfront_info { > - struct list_head list; > - struct net_device *netdev; > - > - struct net_device_stats stats; > - > - struct netif_tx_front_ring tx; > - struct netif_rx_front_ring rx; > - > - spinlock_t tx_lock; > - spinlock_t rx_lock; > - > - unsigned int irq; > - unsigned int copying_receiver; > - unsigned int carrier; > - > - /* Receive-ring batched refills. */ > -#define RX_MIN_TARGET 8 > -#define RX_DFL_MIN_TARGET 64 > -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) > - unsigned rx_min_target, rx_max_target, rx_target; > - struct sk_buff_head rx_batch; > - > - struct timer_list rx_refill_timer; > - > - /* > - * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs > - * is an index into a chain of free entries. > - */ > - struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; > - struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; > - > -#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) > - grant_ref_t gref_tx_head; > - grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; > - grant_ref_t gref_rx_head; > - grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; > - > - struct xenbus_device *xbdev; > - int tx_ring_ref; > - int rx_ring_ref; > - u8 mac[ETH_ALEN]; > - > - unsigned long rx_pfn_array[NET_RX_RING_SIZE]; > - struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; > - struct mmu_update rx_mmu[NET_RX_RING_SIZE]; > -}; > - > struct netfront_rx_info { > struct netif_rx_response rx; > struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; > @@ -278,6 +231,369 @@ static void xennet_sysfs_delif(struct ne > #define xennet_sysfs_delif(dev) do { } while(0) > #endif > > +/* > + * List of all netfront accelerator plugin modules available. Each > + * list entry is of type struct netfront_accelerator. > + */ > +static struct list_head accelerators_list; > +/* > + * Lock to protect access to accelerators_list, and also used to > + * protect the hooks_usecount field in struct netfront_accelerator > + * against concurrent access > + */ > +static spinlock_t accelerators_lock; > + > +/* > + * Safely remove the accelerator function hooks from a netfront state. > + * Must only be called when there are no current users of the hooks. > + */ > +static void accelerator_remove_hooks(struct netfront_accelerator *accelerator) > +{ > + struct netfront_accel_vif_state *vif_state; > + > + list_for_each_entry( vif_state, > + &accelerator->vif_states, > + link ) { > + /* Make sure there are no data path operations going on */ > + netif_poll_disable(vif_state->np->netdev); > + netif_tx_lock_bh(vif_state->np->netdev); > + > + /* > + * Remove the hooks, but leave the vif_state on the > + * accelerator''s list as that signifies this vif is > + * interested in using that accelerator if it becomes > + * available again > + */ > + vif_state->hooks = NULL; > + > + netif_tx_unlock_bh(vif_state->np->netdev); > + netif_poll_enable(vif_state->np->netdev); > + } > + > + accelerator->hooks = NULL; > + > + /* Signal that all users of hooks are done */ > + up(&accelerator->exit_semaphore); > +} > + > + > +/* > + * Compare a frontend description string against an accelerator to see > + * if they match. Would ultimately be nice to replace the string with > + * a unique numeric identifier for each accelerator. > + */ > +static int match_accelerator(const char *frontend, > + struct netfront_accelerator *accelerator) > +{ > + return strcmp(frontend, accelerator->frontend) == 0; > +} > + > + > +/* > + * Add a frontend vif to the list of vifs that is using a netfront > + * accelerator plugin module. > + */ > +static void add_accelerator_vif(struct netfront_accelerator *accelerator, > + struct netfront_info *np, > + struct xenbus_device *dev) > +{ > + np->accelerator = accelerator; > + np->accel_vif_state.np = np; > + np->accel_vif_state.dev = dev; > + > + list_add(&np->accel_vif_state.link, &accelerator->vif_states); > +} > + > +/* > + * Initialise the netfront state of an accelerator plugin module. > + */ > +static int init_accelerator(const char *frontend, > + struct netfront_accelerator **result) > +{ > + struct netfront_accelerator *accelerator > + kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL); > + int frontend_len; > + > + if ( !accelerator ) { > + DPRINTK("%s: no memory for accelerator", __FUNCTION__); > + return -ENOMEM; > + } > + > + frontend_len = strlen(frontend) + 1; > + accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL); > + if ( !accelerator->frontend ) { > + DPRINTK("%s: no memory for accelerator", __FUNCTION__); > + kfree(accelerator); > + return -ENOMEM; > + } > + strlcpy(accelerator->frontend, frontend, frontend_len); > + > + INIT_LIST_HEAD(&accelerator->vif_states); > + > + accelerator->hooks = NULL; > + accelerator->hooks_usecount = 0; > + > + list_add(&accelerator->link, &accelerators_list); > + > + *result = accelerator; > + > + return 0; > +} > + > +/* > + * Modify the hooks stored in the per-vif state to match that in the > + * netfront accelerator''s state. > + */ > +static void > +accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state) > +{ > + /* Make sure there are no data path operations going on */ > + netif_poll_disable(vif_state->np->netdev); > + netif_tx_lock_bh(vif_state->np->netdev); > + > + vif_state->hooks = vif_state->np->accelerator->hooks; > + > + netif_tx_unlock_bh(vif_state->np->netdev); > + netif_poll_enable(vif_state->np->netdev); > +} > + > + > +/* > + * Request that a particular netfront accelerator plugin is loaded. > + * Usually called as a result of the vif configuration specifying > + * which one to use. > + */ > +static int netfront_load_accelerator(struct netfront_info *np, > + struct xenbus_device *dev, > + const char *frontend) > +{ > + struct netfront_accelerator *accelerator; > + int rc; > + unsigned flags; > + > + spin_lock_irqsave(&accelerators_lock, flags); > + > + /* > + * Look at list of loaded accelerators to see if the requested > + * one is already there > + */ > + list_for_each_entry( accelerator, &accelerators_list, link ) { > + if ( match_accelerator(frontend, accelerator) ) { > + /* > + * Include this frontend device on the > + * accelerator''s list > + */ > + add_accelerator_vif(accelerator, np, dev); > + > + ++accelerator->hooks_usecount; > + > + if ( accelerator->hooks == NULL ) > + DPRINTK("%s: no hooks set", __FUNCTION__); > + else { > + spin_unlock_irqrestore(&accelerators_lock, flags); > + accelerator->hooks->new_device(np->netdev, dev); > + spin_lock_irqsave(&accelerators_lock, flags); > + } > + > + if ( (--accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + /* > + * Hooks will get linked into vif_state by a > + * future call by the accelerator to > + * netfront_accelerator_ready() > + */ > + > + return 0; > + } > + } > + > + /* Couldn''t find it, so create a new one and load the module */ > + if ( ( rc = init_accelerator(frontend, &accelerator) ) < 0 ) { > + spin_unlock_irqrestore(&accelerators_lock, flags); > + return rc; > + } > + > + /* Include this frontend device on the accelerator''s list */ > + add_accelerator_vif(accelerator, np, dev); > + > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + DPRINTK("%s: loading module %s\n", __FUNCTION__, frontend); > + > + /* load module */ > + request_module("%s", frontend); > + > + /* > + * Module should now call netfront_accelerator_loaded() once > + * it''s up and running, and we can continue from there > + */ > + > + return 0; > +} > + > +/* > + * Go through all the netfront vifs and see if they have requested > + * this accelerator. Notify the accelerator plugin of the relevant > + * device if so. Called when an accelerator plugin module is first > + * loaded and connects to netfront. > + */ > +static void > +accelerator_probe_vifs_on_load(struct netfront_accelerator *accelerator) > +{ > + struct netfront_accel_vif_state *accel_vif_state; > + > + DPRINTK("%s: %p\n", __FUNCTION__, accelerator); > + > + list_for_each_entry( accel_vif_state, > + &accelerator->vif_states, link ) { > + struct netfront_info *np = accel_vif_state->np; > + > + accelerator->hooks->new_device(np->netdev, > + accel_vif_state->dev); > + > + /* > + * Hooks will get linked into vif_state by a call to > + * netfront_accelerator_ready() once accelerator > + * plugin is ready for action > + */ > + } > +} > + > + > +/* > + * Called by the netfront accelerator plugin module when it has loaded > + */ > +int netfront_accelerator_loaded(const char *frontend, > + struct netfront_accel_hooks *hooks) > +{ > + struct netfront_accelerator *accelerator; > + unsigned flags; > + > + spin_lock_irqsave(&accelerators_lock, flags); > + > + /* Look through list of accelerators to see if it has already > + been requested */ > + list_for_each_entry( accelerator, &accelerators_list, link ) { > + if ( match_accelerator(frontend, accelerator) ) { > + /* > + * Deliberate double inc of usecount here - > + * one to initialise it to 1 now hooks is > + * being set (which persists until unloaded), > + * and one for the use of hooks in this > + * function (we don''t want an unload to > + * succeed in clearing hooks in the middle) > + */ > + BUG_ON(accelerator->hooks != NULL || > + accelerator->hooks_usecount != 0); > + accelerator->hooks_usecount = 2; > + > + accelerator->hooks = hooks; > + > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + accelerator_probe_vifs_on_load(accelerator); > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + return 0; > + } > + } > + > + /* If it wasn''t in the list, add it now so that when it is > + requested the caller will find it */ > + DPRINTK("%s: Couldn''t find matching accelerator (%s)\n", > + __FUNCTION__, frontend); > + > + init_accelerator(frontend, &accelerator); > + > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + return 0; > +} > +EXPORT_SYMBOL_GPL(netfront_accelerator_loaded); > + > + > +/* > + * Called by the accelerator module after it has been probed with a > + * network device to say that it is ready to start accelerating > + * traffic on that device > + */ > +void netfront_accelerator_ready(const char *frontend, > + struct xenbus_device *dev) > +{ > + struct netfront_accelerator *accelerator; > + struct netfront_accel_vif_state *accel_vif_state; > + unsigned flags; > + > + spin_lock_irqsave(&accelerators_lock, flags); > + > + list_for_each_entry( accelerator, &accelerators_list, link ) { > + if ( match_accelerator(frontend, accelerator) ) { > + ++accelerator->hooks_usecount; > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + list_for_each_entry( accel_vif_state, > + &accelerator->vif_states, link ) { > + if ( accel_vif_state->dev == dev ) > + accelerator_set_vif_state_hooks > + (accel_vif_state); > + } > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + return; > + } > + } > + > + spin_unlock_irqrestore(&accelerators_lock, flags); > +} > +EXPORT_SYMBOL_GPL(netfront_accelerator_ready); > + > + > +/* > + * Called by a netfront accelerator when it is unloaded. This safely > + * removes the hooks into the plugin and blocks until all devices have > + * finished using it, so on return it is safe to unload. > + */ > +void netfront_accelerator_unloaded(const char *frontend) > +{ > + struct netfront_accelerator *accelerator; > + unsigned flags; > + > + spin_lock_irqsave(&accelerators_lock, flags); > + > + list_for_each_entry( accelerator, &accelerators_list, link ) { > + if ( match_accelerator(frontend, accelerator) ) { > + /* > + * Use semaphore to ensure we know when all > + * uses of hooks are complete > + */ > + sema_init(&accelerator->exit_semaphore, 0); > + > + if ( (--accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(accelerator); > + > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + /* Wait for hooks to be unused, then return */ > + down(&accelerator->exit_semaphore); > + > + return; > + } > + } > + spin_unlock_irqrestore(&accelerators_lock, flags); > +} > +EXPORT_SYMBOL_GPL(netfront_accelerator_unloaded); > + > + > static inline int xennet_can_sg(struct net_device *dev) > { > return dev->features & NETIF_F_SG; > @@ -331,8 +647,29 @@ static int __devexit netfront_remove(str > static int __devexit netfront_remove(struct xenbus_device *dev) > { > struct netfront_info *info = dev->dev.driver_data; > + unsigned flags; > > DPRINTK("%s\n", dev->nodename); > + > + /* > + * Call the remove accelerator hook. The use count for the > + * accelerator''s hooks is incremented for the duration of the > + * call to prevent the accelerator being able to modify the > + * hooks in the middle (by, for example, unloading) > + */ > + if ( info->accel_vif_state.hooks ) { > + spin_lock_irqsave(&accelerators_lock, flags); > + ++info->accelerator->hooks_usecount; > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + if ( info->accel_vif_state.hooks ) > + info->accel_vif_state.hooks->remove(dev); > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--info->accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(info->accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + } > > netif_disconnect_backend(info); > > @@ -356,8 +693,29 @@ static int netfront_resume(struct xenbus > static int netfront_resume(struct xenbus_device *dev) > { > struct netfront_info *info = dev->dev.driver_data; > + unsigned flags; > > DPRINTK("%s\n", dev->nodename); > + > + /* > + * Call the resume accelerator hook. The use count for the > + * accelerator''s hooks is incremented for the duration of > + * the call to prevent the accelerator being able to modify > + * the * hooks in the middle (by, for example, unloading) > + */ > + if ( info->accel_vif_state.hooks ) { > + spin_lock_irqsave(&accelerators_lock, flags); > + ++info->accelerator->hooks_usecount; > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + if ( info->accel_vif_state.hooks ) > + info->accel_vif_state.hooks->resume(dev); > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--info->accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(info->accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + } > > netif_disconnect_backend(info); > return 0; > @@ -553,6 +911,7 @@ static void backend_changed(struct xenbu > { > struct netfront_info *np = dev->dev.driver_data; > struct net_device *netdev = np->netdev; > + unsigned flags; > > DPRINTK("%s\n", xenbus_strstate(backend_state)); > > @@ -577,6 +936,27 @@ static void backend_changed(struct xenbu > xenbus_frontend_closed(dev); > break; > } > + > + /* > + * Call the backend_changed accelerator hook. The use count > + * for the accelerator''s hooks is incremented for the duration > + * of the call to prevent the accelerator being able to modify > + * the hooks in the middle (by, for example, unloading) > + */ > + if ( np->accel_vif_state.hooks ) { > + spin_lock_irqsave(&accelerators_lock, flags); > + ++np->accelerator->hooks_usecount; > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + if ( np->accel_vif_state.hooks ) > + np->accel_vif_state.hooks->backend_changed > + (dev, backend_state); > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--np->accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(np->accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + } > } > > /** Send a packet on a net device to encourage switches to learn the > @@ -613,19 +993,65 @@ static inline int netfront_tx_slot_avail > (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); > } > > + > +static inline > +int netfront_check_accelerator_queue_busy(struct net_device *dev, > + struct netfront_info *np) > +{ > + int rc = 1; > + unsigned flags; > + > + /* > + * Call the check busy accelerator hook. The use count for the > + * accelerator''s hooks is incremented for the duration of the > + * call to prevent the accelerator being able to modify the > + * hooks in the middle (by, for example, unloading) > + */ > + if ( np->accel_vif_state.hooks ) { > + spin_lock_irqsave(&accelerators_lock, flags); > + ++(np->accelerator->hooks_usecount); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + if ( np->accel_vif_state.hooks ) > + rc = np->accel_vif_state.hooks->check_busy(dev); > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--(np->accelerator->hooks_usecount) == 0 ) ) > + accelerator_remove_hooks(np->accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + } > + > + return rc; > +} > + > + > static inline void network_maybe_wake_tx(struct net_device *dev) > { > struct netfront_info *np = netdev_priv(dev); > > if (unlikely(netif_queue_stopped(dev)) && > netfront_tx_slot_available(np) && > - likely(netif_running(dev))) > + likely(netif_running(dev)) && > + netfront_check_accelerator_queue_busy(dev, np)) > netif_wake_queue(dev); > } > > + > +int netfront_check_queue_busy(struct net_device *dev) > +{ > + struct netfront_info *np = netdev_priv(dev); > + > + return unlikely(netif_queue_stopped(dev)) && > + netfront_tx_slot_available(np) && > + likely(netif_running(dev)); > +} > +EXPORT_SYMBOL(netfront_check_queue_busy); > + > + > static int network_open(struct net_device *dev) > { > struct netfront_info *np = netdev_priv(dev); > + unsigned flags; > > memset(&np->stats, 0, sizeof(np->stats)); > > @@ -633,8 +1059,34 @@ static int network_open(struct net_devic > if (netfront_carrier_ok(np)) { > network_alloc_rx_buffers(dev); > np->rx.sring->rsp_event = np->rx.rsp_cons + 1; > - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) > + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ > + /* > + * Call the stop_napi_interrupts accelerator > + * hook. The use count for the accelerator''s > + * hooks is incremented for the duration of > + * the call to prevent the accelerator being > + * able to modify the hooks in the middle (by, > + * for example, unloading) > + */ > + if ( np->accel_vif_state.hooks ) { > + spin_lock_irqsave(&accelerators_lock, flags); > + ++np->accelerator->hooks_usecount; > + spin_unlock_irqrestore(&accelerators_lock, > + flags); > + > + if ( np->accel_vif_state.hooks ) > + np->accel_vif_state.hooks->stop_napi_interrupts(dev); > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--np->accelerator->hooks_usecount)==0 ) > + accelerator_remove_hooks > + (np->accelerator); > + spin_unlock_irqrestore(&accelerators_lock, > + flags); > + } > + > netif_rx_schedule(dev); > + } > } > spin_unlock_bh(&np->rx_lock); > > @@ -702,6 +1154,30 @@ static void rx_refill_timeout(unsigned l > static void rx_refill_timeout(unsigned long data) > { > struct net_device *dev = (struct net_device *)data; > + struct netfront_info *np = netdev_priv(dev); > + unsigned flags; > + > + /* > + * Call the stop_napi_interrupts accelerator hook. The use > + * count for the accelerator''s hooks is incremented for the > + * duration of the call to prevent the accelerator being able > + * to modify the hooks in the middle (by, for example, > + * unloading) > + */ > + if ( np->accel_vif_state.hooks ) { > + spin_lock_irqsave(&accelerators_lock, flags); > + ++np->accelerator->hooks_usecount; > + spin_unlock_irqrestore(&accelerators_lock, flags); > + > + if ( np->accel_vif_state.hooks ) > + np->accel_vif_state.hooks->stop_napi_interrupts(dev); > + > + spin_lock_irqsave(&accelerators_lock, flags); > + if ( (--np->accelerator->hooks_usecount) == 0 ) > + accelerator_remove_hooks(np->accelerator); > + spin_unlock_irqrestore(&accelerators_lock, flags); > + } > + > netif_rx_schedule(dev); > } > > @@ -941,6 +1417,12 @@ static int network_start_xmit(struct sk_ > unsigned int offset = offset_in_page(data); > unsigned int len = skb_headlen(skb); > > + /* Check the fast path, if hooks are available */ > + if ( np->accel_vif_state.hooks && > + np->accel_vif_state.hooks->start_xmit(skb, dev) ) { > + return 0; > + } > + > frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; > if (unlikely(frags > MAX_SKB_FRAGS + 1)) { > printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", > @@ -1037,15 +1519,41 @@ static irqreturn_t netif_int(int irq, vo > { > struct net_device *dev = dev_id; > struct netfront_info *np = netdev_priv(dev); > - unsigned long flags; > + unsigned long flags, flags1; > > spin_lock_irqsave(&np->tx_lock, flags); > > if (likely(netfront_carrier_ok(np))) { > network_tx_buf_gc(dev); > /* Under tx_lock: protects access to rx shared-ring indexes. */ > - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) > + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { > + /* > + * Call the stop_napi_interrupts accelerator > + * hook. The use count for the accelerator''s > + * hooks is incremented for the duration of > + * the call to prevent the accelerator being > + * able to modify the hooks in the middle (by, > + * for example, unloading) > + */ > + if ( np->accel_vif_state.hooks ) { > + spin_lock_irqsave(&accelerators_lock, flags1); > + ++np->accelerator->hooks_usecount; > + spin_unlock_irqrestore(&accelerators_lock, > + flags1); > + > + if ( np->accel_vif_state.hooks ) > + np->accel_vif_state.hooks->stop_napi_interrupts(dev); > + > + spin_lock_irqsave(&accelerators_lock, flags1); > + if ( (--np->accelerator->hooks_usecount)==0 ) > + accelerator_remove_hooks > + (np->accelerator); > + spin_unlock_irqrestore(&accelerators_lock, > + flags1); > + } > + > netif_rx_schedule(dev); > + } > } > > spin_unlock_irqrestore(&np->tx_lock, flags); > @@ -1305,7 +1813,7 @@ static int netif_poll(struct net_device > struct netif_extra_info *extras = rinfo.extras; > RING_IDX i, rp; > struct multicall_entry *mcl; > - int work_done, budget, more_to_do = 1; > + int work_done, budget, more_to_do = 1, accel_more_to_do = 1; > struct sk_buff_head rxq; > struct sk_buff_head errq; > struct sk_buff_head tmpq; > @@ -1472,6 +1980,20 @@ err: > > network_alloc_rx_buffers(dev); > > + if (work_done < budget) { > + /* there''s some spare capacity, try the accelerated path */ > + int accel_budget = budget - work_done; > + int accel_budget_start = accel_budget; > + > + if ( np->accel_vif_state.hooks ) { > + accel_more_to_do > + np->accel_vif_state.hooks->netdev_poll > + (dev, &accel_budget); > + work_done += (accel_budget_start - accel_budget); > + } else > + accel_more_to_do = 0; > + } > + > *pbudget -= work_done; > dev->quota -= work_done; > > @@ -1479,15 +2001,28 @@ err: > local_irq_save(flags); > > RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); > - if (!more_to_do) > + > + if (!more_to_do && !accel_more_to_do) { > + /* > + * Slow path has nothing more to do, see if > + * fast path is likewise > + */ > + if ( np->accel_vif_state.hooks ) { > + accel_more_to_do > + np->accel_vif_state.hooks->start_napi_interrupts(dev); > + } > + } > + > + if (!more_to_do && !accel_more_to_do) { > __netif_rx_complete(dev); > + } > > local_irq_restore(flags); > } > > spin_unlock(&np->rx_lock); > - > - return more_to_do; > + > + return more_to_do | accel_more_to_do; > } > > static void netif_release_tx_bufs(struct netfront_info *np) > @@ -1687,7 +2222,9 @@ static int network_connect(struct net_de > struct sk_buff *skb; > grant_ref_t ref; > netif_rx_request_t *req; > - unsigned int feature_rx_copy, feature_rx_flip; > + unsigned int feature_rx_copy, feature_rx_flip, feature_accel; > + char *accel_frontend; > + int accel_len; > > err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, > "feature-rx-copy", "%u", &feature_rx_copy); > @@ -1698,6 +2235,13 @@ static int network_connect(struct net_de > if (err != 1) > feature_rx_flip = 1; > > + feature_accel = 1; > + accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend, > + "accel", &accel_len); > + if ( IS_ERR(accel_frontend) ) { > + feature_accel = 0; > + } > + > /* > * Copy packets on receive path if: > * (a) This was requested by user, and the backend supports it; or > @@ -1709,6 +2253,11 @@ static int network_connect(struct net_de > err = talk_to_backend(np->xbdev, np); > if (err) > return err; > + > + if ( feature_accel ) { > + netfront_load_accelerator(np, np->xbdev, accel_frontend); > + kfree(accel_frontend); > + } > > xennet_set_features(dev); > > @@ -1955,6 +2504,7 @@ static struct net_device * __devinit cre > > spin_lock_init(&np->tx_lock); > spin_lock_init(&np->rx_lock); > + np->accel_vif_state.hooks = NULL; > > skb_queue_head_init(&np->rx_batch); > np->rx_target = RX_DFL_MIN_TARGET; > @@ -2110,6 +2660,9 @@ static int __init netif_init(void) > if (is_initial_xendomain()) > return 0; > > + INIT_LIST_HEAD(&accelerators_list); > + spin_lock_init(&accelerators_lock); > + > IPRINTK("Initialising virtual ethernet driver.\n"); > > (void)register_inetaddr_notifier(¬ifier_inetdev); > diff -r cd3ade350f3f drivers/xen/netfront/netfront.h > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/drivers/xen/netfront/netfront.h Thu Jun 14 14:57:34 2007 +0100 > @@ -0,0 +1,198 @@ > +/****************************************************************************** > + * Virtual network driver for conversing with remote driver backends. > + * > + * Copyright (c) 2002-2005, K A Fraser > + * Copyright (c) 2005, XenSource Ltd > + * Copyright (C) 2007 Solarflare Communications, Inc. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version 2 > + * as published by the Free Software Foundation; or, when distributed > + * separately from the Linux kernel or incorporated into other > + * software packages, subject to the following license: > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this source file (the "Software"), to deal in the Software without > + * restriction, including without limitation the rights to use, copy, modify, > + * merge, publish, distribute, sublicense, and/or sell copies of the Software, > + * and to permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#ifndef NETFRONT_H > +#define NETFRONT_H > + > +#include <xen/interface/io/netif.h> > +#include <linux/netdevice.h> > +#include <linux/skbuff.h> > +#include <linux/list.h> > + > +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) > +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) > + > +#include <xen/xenbus.h> > +/* Function pointer table for hooks into a network acceleration > + plugin. These are called at appropriate points from the netfront > + driver */ > +struct netfront_accel_hooks { > + /* new_device: The plugin is asked to support a new network interface */ > + int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev); > + /* suspend, resume, remove: Equivalent to the normal xenbus_* callbacks */ > + int (*suspend)(struct xenbus_device *dev); > + int (*resume)(struct xenbus_device *dev); > + int (*remove)(struct xenbus_device *dev); > + /* backend_changed: Callback from watch based on backend''s > + xenbus state changing */ > + void (*backend_changed)(struct xenbus_device *dev, > + enum xenbus_state backend_state); > + /* The net_device is being polled, check the accelerated > + hardware for any pending packets */ > + int (*netdev_poll)(struct net_device *dev, int *pbudget); > + /* start_xmit: Used to give the accelerated plugin the option > + of sending a packet. Returns non-zero if has done so, or > + zero to decline and force the packet onto normal send path */ > + int (*start_xmit)(struct sk_buff *skb, struct net_device *dev); > + /* start/stop_napi_interrupts Used by netfront to indicate > + when napi interrupts should be enabled or disabled */ > + int (*start_napi_interrupts)(struct net_device *dev); > + void (*stop_napi_interrupts)(struct net_device *dev); > + /* Called before re-enabling the TX queue to check the fast > + path has slots too */ > + int (*check_busy)(struct net_device *dev); > +}; > + > +/* Per-netfront device state for the accelerator. This is used to > + allow efficient per-netfront device access to the accelerator hooks */ > +struct netfront_accel_vif_state { > + struct list_head link; > + > + struct xenbus_device *dev; > + struct netfront_info *np; > + struct netfront_accel_hooks *hooks; > +}; > + > +/* Per-accelerator state stored in netfront. These form a list that > + is used to track which devices are accelerated by which plugins, > + and what plugins are available/have been requested */ > +struct netfront_accelerator { > + /* Used to make a list */ > + struct list_head link; > + /* ID of the accelerator */ > + int id; > + /* String describing the accelerator. Currently this is the > + name of the accelerator module. This is provided by the > + backend accelerator through xenstore */ > + char *frontend; > + /* The hooks into the accelerator plugin module */ > + struct netfront_accel_hooks *hooks; > + /* Protect against removal of hooks while in use, must hold > + accelerators_lock to change */ > + unsigned hooks_usecount; > + /* List of per-netfront device state (struct netfront_accel_vif_state) > + for each netfront device that is using this accelerator */ > + struct list_head vif_states; > + /* Semaphore to signal that all users of this accelerator have > + finished using it before module is unloaded */ > + struct semaphore exit_semaphore; > +}; > + > + > +struct netfront_info { > + struct list_head list; > + struct net_device *netdev; > + > + struct net_device_stats stats; > + > + struct netif_tx_front_ring tx; > + struct netif_rx_front_ring rx; > + > + spinlock_t tx_lock; > + spinlock_t rx_lock; > + > + unsigned int irq; > + unsigned int copying_receiver; > + unsigned int carrier; > + > + /* Receive-ring batched refills. */ > +#define RX_MIN_TARGET 8 > +#define RX_DFL_MIN_TARGET 64 > +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) > + unsigned rx_min_target, rx_max_target, rx_target; > + struct sk_buff_head rx_batch; > + > + struct timer_list rx_refill_timer; > + > + /* > + * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs > + * is an index into a chain of free entries. > + */ > + struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; > + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; > + > +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) > + grant_ref_t gref_tx_head; > + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; > + grant_ref_t gref_rx_head; > + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; > + > + struct xenbus_device *xbdev; > + int tx_ring_ref; > + int rx_ring_ref; > + u8 mac[ETH_ALEN]; > + > + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; > + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; > + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; > + > + /* Private pointer to state internal to accelerator module */ > + void *accel_priv; > + /* The (list of) accelerator(s) used by this netfront device */ > + struct netfront_accelerator *accelerator; > + /* The accelerator state for this netfront device */ > + struct netfront_accel_vif_state accel_vif_state; > +}; > + > + > +/* Called by an accelerator plugin module when it has loaded. > + * > + * frontend: the string describing the accelerator, currently the module name > + * hooks: the hooks for netfront to use to call into the accelerator > + */ > +extern int netfront_accelerator_loaded(const char *frontend, > + struct netfront_accel_hooks *hooks); > + > +/* Called when an accelerator plugin is ready to accelerate a device * > + * that has been passed to it from netfront using the "new_device" > + * hook. > + * > + * frontend: the string describing the accelerator. Must match the > + * one passed to netfront_accelerator_loaded() > + * dev: the xenbus device the plugin was asked to accelerate > + */ > +extern void netfront_accelerator_ready(const char *frontend, > + struct xenbus_device *dev); > + > +/* Called by an accelerator plugin module when it is about to unload. > + * > + * frontend: the string describing the accelerator. Must match the > + * one passed to netfront_accelerator_loaded() > + */ > +extern void netfront_accelerator_unloaded(const char *frontend); > + > +/* Called by an accelerator before waking the net device''s TX queue to > + * ensure the slow path has available slots. Returns true if OK to > + * wake, false if still busy > + */ > +extern int netfront_check_queue_busy(struct net_device *net_dev); > + > +#endif /* NETFRONT_H */ > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel > > >-- best regards, hanzhu _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Stephen Hemminger
2007-Jun-15 16:06 UTC
Re: [Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
On Fri, 15 Jun 2007 11:59:43 -0400 "Zhu Han" <schumi.han@gmail.com> wrote:> Hi, Kieran, > > I''m just wonder why you try to acquire the lock and increase the > hooks_usecount each time when you use the hook routine. Is there any > generic ways to synchronze the code path using hook routines and > netfront_accelerator_unloaded, considering you can synchronize the >Learn to use RCU for this. It would reduce the lock overhead. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Kieran Mansley
2007-Jun-15 16:22 UTC
Re: [Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
On Fri, 2007-06-15 at 11:59 -0400, Zhu Han wrote:> Hi, Kieran, > > I''m just wonder why you try to acquire the lock and increase the > hooks_usecount each time when you use the hook routine. Is there any > generic ways to synchronze the code path using hook routines and > netfront_accelerator_unloaded, considering you can synchronize the > tx/rx data path easily.The lock protects the use_count variable. The use_count variable prevents the plugin module unloading while it is being used. I couldn''t just use the lock to prevent the module unloading as the hook function (i) might block (and holding a spin_lock would be rather antisocial) (ii) might call back into netfront and try to take the lock again, which would deadlock. The data path hooks do not block, and are already protected by locks, so these are also taken when trying to unload the plugin module. For this reason it''s not necessary to use the hooks_usecount on the data path. I think that RCU would only work in this situation if the hook functions didn''t block, and wouldn''t affect the data path locking overhead as it wouldn''t be necessary there. Kieran _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2007-Jun-15 16:31 UTC
Re: [Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
On 15/6/07 17:22, "Kieran Mansley" <kmansley@solarflare.com> wrote:> The lock protects the use_count variable.Yes, that''s one thing I noticed -- can you use atomic_t for reference counts and hence reduce the number of times you need to lock/unlock? At least the open-coded lock-decrement-test-maybe-free-unlock sequences could be abstracted into a put_foo() function. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Zhu Han
2007-Jun-15 18:03 UTC
Re: [Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
On 6/15/07, Kieran Mansley <kmansley@solarflare.com> wrote:> > The lock protects the use_count variable. The use_count variable > prevents the plugin module unloading while it is being used. I couldn''t > just use the lock to prevent the module unloading as the hook function > (i) might block (and holding a spin_lock would be rather antisocial) > (ii) might call back into netfront and try to take the lock again, which > would deadlock. >If the hook routine blocks on the other code path instead of tx/rx path, why not use a simple atomic reference count. When the reference count reachs zero, free it. Considering you can synchronzie on tx/rx path, the free will not happen under the critical code path. So the uninitialize work could be done inside the free routine even if it blocks.>I think that RCU would only work in this situation if the hook functions >didn''t block,.I agree.> > Kieran > > > > > >-- best regards, hanzhu _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Kieran Mansley
2007-Jun-18 15:31 UTC
Re: [Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
On Fri, 2007-06-15 at 14:03 -0400, Zhu Han wrote:> On 6/15/07, Kieran Mansley <kmansley@solarflare.com> wrote: > > > > The lock protects the use_count variable. The use_count variable > > prevents the plugin module unloading while it is being used. I couldn''t > > just use the lock to prevent the module unloading as the hook function > > (i) might block (and holding a spin_lock would be rather antisocial) > > (ii) might call back into netfront and try to take the lock again, which > > would deadlock. > > > > If the hook routine blocks on the other code path instead of tx/rx > path, why not use a simple atomic reference count. When the reference > count reachs zero, free it. Considering you can synchronzie on tx/rx > path, the free will not happen under the critical code path. So the > uninitialize work could be done inside the free routine even if it > blocks.Switching to atomics could be of benefit. This would make the hooks_usecount a kref, and due to the third rule of krefs (from the kref docs) we''d still need synchronisation around most of the kref_get calls, but as in some of the cases we have this lock for the list access already, I''m guessing that would be OK. I can prepare another version of the patches with this change, as I''m currently making a number of other changes as suggested by Keir. I suspect that some would still prefer a full switch to using RCU however. I hope you don''t mind me following up to all the locking- related questions in this one email. The use of RCU seems to centre around whether or not the hooks functions can (or indeed should) block, and whether it will result in a useful increase in performance. I''ve taken another look at RCU today to see if I could make it work. The reason for hooks blocking is not well defined as there is only one implementation of an accelerator, and so I''m not sure what other accelerator modules might do. However, the one we''ve written makes use of xenbus during a number of the callbacks, and I suspect this is likely to be pretty common. Many xenbus calls can block. For example, during the probe hook call, it accesses xenstore to gather information from the backend. During a suspend or resume hook call, it may need to do things such as unregister or register a xenbus watch. These are just examples rather than a definitive list. If RCU is the way to go, these calls would all have to be made non- blocking, by for example using a work queue to perform the blocking work later. This would be OK, but would need an additional few functions on the interface between netfront and the accelerator, and complicate netfront a little more. For example, the accelerator''s suspend hook returning would no longer signify that the plugin had completed it''s suspended-related work, so netfront would have to wait for a "suspend_done" call from the plugin before it could itself return. In these cases the total locking overhead is likely to be similar to the current case, while the code would have become more complex. None of this would affect the data path locking overhead (which is already zero by design). One thing I note from looking into RCU is that the call_rcu callback function may be invoked from bottom-half context. To give us the zero-additional-locking-overhead-on-the-data-path- property that the current approach has, we call the following when trying to disable the hooks: netif_poll_disable(vif_state->np->netdev); netif_tx_lock_bh(vif_state->np->netdev); As both of these can block and so are not suitable for bottom-half execution we''d either have to find an alternative (e.g. use RCU on the data path, which would clearly increase the locking overhead) or defer this work to another context, which would again complicate matters. In all I feel that RCU is not the right solution here: I can see it resulting in more code, harder to write plugins and little benefit as the data path performance would (at best) not be affected. Perhaps a good way forward is for me to produce another iteration of the patches using atomics/kref in place of the hooks_usecount and fewer spin_locks as described at the start, and then see if that is acceptable. Thanks for taking the time to look at the patches, Kieran _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Kieran Mansley
2007-Jun-21 13:17 UTC
[Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
Frontend net driver acceleration Signed-off-by: Kieran Mansley <kmansley@solarflare.com> diff -r 82196b117a5b drivers/xen/netfront/Makefile --- a/drivers/xen/netfront/Makefile Wed Jun 20 17:01:02 2007 +0100 +++ b/drivers/xen/netfront/Makefile Wed Jun 20 17:01:07 2007 +0100 @@ -1,4 +1,4 @@ obj-$(CONFIG_XEN_NETDEV_FRONTEND) := xennet.o -xennet-objs := netfront.o +xennet-objs := netfront.o accel.o diff -r 82196b117a5b drivers/xen/netfront/accel.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/netfront/accel.c Thu Jun 21 13:47:33 2007 +0100 @@ -0,0 +1,773 @@ +/****************************************************************************** + * Virtual network driver for conversing with remote driver backends. + * + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/kref.h> + +#include <xen/xenbus.h> + +#include "netfront.h" + +#define DPRINTK(fmt, args...) \ + pr_debug("netfront/accel (%s:%d) " fmt, \ + __FUNCTION__, __LINE__, ##args) +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "netfront/accel: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "netfront/accel: " fmt, ##args) + +/* + * List of all netfront accelerator plugin modules available. Each + * list entry is of type struct netfront_accelerator. + */ +static struct list_head accelerators_list; + +/* + * Lock to protect access to accelerators_list + */ +static spinlock_t accelerators_lock; + +/* Forward declaration of kref cleanup functions */ +static void accel_kref_release(struct kref *ref); +static void vif_kref_release(struct kref *ref); + + +void netif_init_accel(void) +{ + INIT_LIST_HEAD(&accelerators_list); + spin_lock_init(&accelerators_lock); +} + + +/* + * Initialise the accel_vif_state field in the netfront state + */ +void init_accelerator_vif(struct netfront_info *np, + struct xenbus_device *dev) +{ + np->accelerator = NULL; + + /* It''s assumed that these things don''t change */ + np->accel_vif_state.np = np; + np->accel_vif_state.dev = dev; + + np->accel_vif_state.ready_for_probe = 1; + np->accel_vif_state.need_probe = NULL; +} + + +/* + * Compare a frontend description string against an accelerator to see + * if they match. Would ultimately be nice to replace the string with + * a unique numeric identifier for each accelerator. + */ +static int match_accelerator(const char *frontend, + struct netfront_accelerator *accelerator) +{ + return strcmp(frontend, accelerator->frontend) == 0; +} + + +/* + * Add a frontend vif to the list of vifs that is using a netfront + * accelerator plugin module. + */ +static void add_accelerator_vif(struct netfront_accelerator *accelerator, + struct netfront_info *np) +{ + np->accelerator = accelerator; + + list_add(&np->accel_vif_state.link, &accelerator->vif_states); +} + + +/* + * Initialise the state to track an accelerator plugin module. + */ +static int init_accelerator(const char *frontend, + struct netfront_accelerator **result) +{ + struct netfront_accelerator *accelerator = + kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL); + int frontend_len; + + if (!accelerator) { + DPRINTK("%s: no memory for accelerator", __FUNCTION__); + return -ENOMEM; + } + + frontend_len = strlen(frontend) + 1; + accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL); + if (!accelerator->frontend) { + DPRINTK("%s: no memory for accelerator", __FUNCTION__); + kfree(accelerator); + return -ENOMEM; + } + strlcpy(accelerator->frontend, frontend, frontend_len); + + INIT_LIST_HEAD(&accelerator->vif_states); + spin_lock_init(&accelerator->vif_states_lock); + + accelerator->hooks = NULL; + + accelerator->ready_for_probe = 1; + accelerator->need_probe = NULL; + + list_add(&accelerator->link, &accelerators_list); + + *result = accelerator; + + return 0; +} + + +/* + * Modify the hooks stored in the per-vif state to match that in the + * netfront accelerator''s state. + */ +static void +accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state) +{ + /* This function must be called with the vif_state_lock held */ + + /* + * Take references to stop hooks disappearing. + * This persists until vif_kref gets to zero. + */ + kref_get(&vif_state->np->accelerator->accel_kref); + /* This persists until vif_state->hooks are cleared */ + kref_init(&vif_state->vif_kref); + + /* Make sure there are no data path operations going on */ + netif_poll_disable(vif_state->np->netdev); + netif_tx_lock_bh(vif_state->np->netdev); + + vif_state->hooks = vif_state->np->accelerator->hooks; + + netif_tx_unlock_bh(vif_state->np->netdev); + netif_poll_enable(vif_state->np->netdev); +} + + +static void accelerator_probe_new_vif(struct netfront_info *np, + struct xenbus_device *dev, + struct netfront_accelerator *accelerator) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + + DPRINTK("%s\n", __FUNCTION__); + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + /* + * Include this frontend device on the accelerator''s list + */ + add_accelerator_vif(accelerator, np); + + hooks = accelerator->hooks; + + if (hooks) { + if (np->accel_vif_state.ready_for_probe) { + np->accel_vif_state.ready_for_probe = 0; + + kref_get(&accelerator->accel_kref); + + spin_unlock_irqrestore(&accelerator->vif_states_lock, + flags); + + hooks->new_device(np->netdev, dev); + + kref_put(&accelerator->accel_kref, + accel_kref_release); + /* + * Hooks will get linked into vif_state by a + * future call by the accelerator to + * netfront_accelerator_ready() + */ + return; + } else { + if (np->accel_vif_state.need_probe != NULL) + DPRINTK("Probe request on vif awaiting probe\n"); + np->accel_vif_state.need_probe = hooks; + } + } + + spin_unlock_irqrestore(&accelerator->vif_states_lock, + flags); + return; +} + +/* + * Request that a particular netfront accelerator plugin is loaded. + * Usually called as a result of the vif configuration specifying + * which one to use. + */ +int netfront_load_accelerator(struct netfront_info *np, + struct xenbus_device *dev, + const char *frontend) +{ + struct netfront_accelerator *accelerator; + int rc; + unsigned flags; + + DPRINTK("%s: %s\n", __FUNCTION__, frontend); + + spin_lock_irqsave(&accelerators_lock, flags); + + /* + * Look at list of loaded accelerators to see if the requested + * one is already there + */ + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_unlock_irqrestore(&accelerators_lock, flags); + + accelerator_probe_new_vif(np, dev, accelerator); + + return 0; + } + } + + /* Couldn''t find it, so create a new one and load the module */ + if ((rc = init_accelerator(frontend, &accelerator)) < 0) { + spin_unlock_irqrestore(&accelerators_lock, flags); + return rc; + } + + spin_unlock_irqrestore(&accelerators_lock, flags); + + /* Include this frontend device on the accelerator''s list */ + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + add_accelerator_vif(accelerator, np); + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + + DPRINTK("%s: loading module %s\n", __FUNCTION__, frontend); + + /* load module */ + request_module("%s", frontend); + + /* + * Module should now call netfront_accelerator_loaded() once + * it''s up and running, and we can continue from there + */ + + return 0; +} + + +/* + * Go through all the netfront vifs and see if they have requested + * this accelerator. Notify the accelerator plugin of the relevant + * device if so. Called when an accelerator plugin module is first + * loaded and connects to netfront. + */ +static void +accelerator_probe_vifs(struct netfront_accelerator *accelerator, + struct netfront_accel_hooks *hooks, + unsigned lock_flags) +{ + struct netfront_accel_vif_state *vif_state, *tmp; + + /* Calling function must have taken the vif_states_lock */ + + DPRINTK("%s: %p\n", __FUNCTION__, accelerator); + + /* + * kref_init() takes a single reference to the hooks that will + * persist until the accelerator hooks are removed (e.g. by + * accelerator module unload) + */ + kref_init(&accelerator->accel_kref); + + /* + * Store the hooks for future calls to probe a new device, and + * to wire into the vif_state once the accelerator plugin is + * ready to accelerate each vif + */ + BUG_ON(hooks == NULL); + accelerator->hooks = hooks; + + list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states, + link) { + struct netfront_info *np = vif_state->np; + + if (vif_state->ready_for_probe) { + vif_state->ready_for_probe = 0; + kref_get(&accelerator->accel_kref); + + /* + * drop lock before calling hook. hooks are + * protected by the kref + */ + spin_unlock_irqrestore(&accelerator->vif_states_lock, + lock_flags); + + hooks->new_device(np->netdev, vif_state->dev); + + kref_put(&accelerator->accel_kref, accel_kref_release); + + /* Retake lock for next go round the loop */ + spin_lock_irqsave(&accelerator->vif_states_lock, lock_flags); + + /* + * Hooks will get linked into vif_state by a call to + * netfront_accelerator_ready() once accelerator + * plugin is ready for action + */ + } else { + if (vif_state->need_probe != NULL) + DPRINTK("Probe request on vif awaiting probe\n"); + vif_state->need_probe = hooks; + } + } + + /* Return with vif_states_lock held, as on entry */ +} + + +/* + * Wrapper for accelerator_probe_vifs that checks now is a good time + * to do the probe, and postpones till previous state cleared up if + * necessary + */ +static void +accelerator_probe_vifs_on_load(struct netfront_accelerator *accelerator, + struct netfront_accel_hooks *hooks) +{ + unsigned flags; + + DPRINTK("%s\n", __FUNCTION__); + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + if (accelerator->ready_for_probe) { + accelerator->ready_for_probe = 0; + accelerator_probe_vifs(accelerator, hooks, flags); + } else { + if (accelerator->need_probe) + DPRINTK("Probe request on accelerator awaiting probe\n"); + accelerator->need_probe = hooks; + } + + spin_unlock_irqrestore(&accelerator->vif_states_lock, + flags); +} + + +/* + * Called by the netfront accelerator plugin module when it has loaded + */ +int netfront_accelerator_loaded(const char *frontend, + struct netfront_accel_hooks *hooks) +{ + struct netfront_accelerator *accelerator; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + /* + * Look through list of accelerators to see if it has already + * been requested + */ + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_unlock_irqrestore(&accelerators_lock, flags); + + accelerator_probe_vifs_on_load(accelerator, hooks); + + return 0; + } + } + + /* + * If it wasn''t in the list, add it now so that when it is + * requested the caller will find it + */ + DPRINTK("%s: Couldn''t find matching accelerator (%s)\n", + __FUNCTION__, frontend); + + init_accelerator(frontend, &accelerator); + + spin_unlock_irqrestore(&accelerators_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(netfront_accelerator_loaded); + + +/* + * Called by the accelerator module after it has been probed with a + * network device to say that it is ready to start accelerating + * traffic on that device + */ +void netfront_accelerator_ready(const char *frontend, + struct xenbus_device *dev) +{ + struct netfront_accelerator *accelerator; + struct netfront_accel_vif_state *accel_vif_state; + unsigned flags, flags1; + + DPRINTK("%s: %s %p\n", __FUNCTION__, frontend, dev); + + spin_lock_irqsave(&accelerators_lock, flags); + + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_lock_irqsave + (&accelerator->vif_states_lock, flags1); + + list_for_each_entry(accel_vif_state, + &accelerator->vif_states, link) { + if (accel_vif_state->dev == dev) + accelerator_set_vif_state_hooks + (accel_vif_state); + } + + spin_unlock_irqrestore + (&accelerator->vif_states_lock, flags1); + goto done; + } + } + + done: + spin_unlock_irqrestore(&accelerators_lock, flags); +} +EXPORT_SYMBOL_GPL(netfront_accelerator_ready); + + +/* + * Safely remove the accelerator function hooks from a netfront state. + */ +static void accelerator_remove_hooks(struct netfront_accelerator *accelerator) +{ + struct netfront_accel_vif_state *vif_state, *tmp; + unsigned flags; + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + list_for_each_entry_safe(vif_state, tmp, + &accelerator->vif_states, + link) { + /* Make sure there are no data path operations going on */ + netif_poll_disable(vif_state->np->netdev); + netif_tx_lock_bh(vif_state->np->netdev); + + /* + * Remove the hooks, but leave the vif_state on the + * accelerator''s list as that signifies this vif is + * interested in using that accelerator if it becomes + * available again + */ + vif_state->hooks = NULL; + + netif_tx_unlock_bh(vif_state->np->netdev); + netif_poll_enable(vif_state->np->netdev); + + /* + * Remove the reference taken when the vif_state hooks + * were set, must be called without lock held + */ + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + kref_put(&vif_state->vif_kref, vif_kref_release); + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + } + + accelerator->hooks = NULL; + + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + + /* Remove the reference taken when module loaded */ + kref_put(&accelerator->accel_kref, accel_kref_release); +} + + +/* + * Called by a netfront accelerator when it is unloaded. This safely + * removes the hooks into the plugin and blocks until all devices have + * finished using it, so on return it is safe to unload. + */ +void netfront_accelerator_unloaded(const char *frontend) +{ + struct netfront_accelerator *accelerator; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_unlock_irqrestore(&accelerators_lock, flags); + + /* + * Use semaphore to ensure we know when all + * uses of hooks are complete + */ + sema_init(&accelerator->exit_semaphore, 0); + + accelerator_remove_hooks(accelerator); + + /* Wait for hooks to be unused, then return */ + down(&accelerator->exit_semaphore); + + return; + } + } + spin_unlock_irqrestore(&accelerators_lock, flags); +} +EXPORT_SYMBOL_GPL(netfront_accelerator_unloaded); + + +int netfront_check_accelerator_queue_busy(struct net_device *dev, + struct netfront_info *np) +{ + struct netfront_accel_hooks *hooks; + int rc = 1; + unsigned flags; + + /* + * Call the check busy accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of the + * call to prevent the accelerator being able to modify the + * hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->check_busy(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + + return rc; +} + + +int netfront_accelerator_call_remove(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + int rc = 0; + + /* + * Call the remove accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of the + * call to prevent the accelerator being able to modify the + * hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->remove(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + return rc; +} + + +int netfront_accelerator_call_resume(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + int rc = 0; + + /* + * Call the resume accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of + * the call to prevent the accelerator being able to modify + * the hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->resume(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + return rc; +} + + +void netfront_accelerator_call_backend_changed(struct netfront_info *np, + struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + + /* + * Call the backend_changed accelerator hook. The use count + * for the accelerator''s hooks is incremented for the duration + * of the call to prevent the accelerator being able to modify + * the hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + np->accel_vif_state.hooks->backend_changed + (dev, backend_state); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } +} + + +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, + struct net_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + + /* + * Call the stop_napi_interrupts accelerator hook. The use + * count for the accelerator''s hooks is incremented for the + * duration of the call to prevent the accelerator being able + * to modify the hooks in the middle (by, for example, + * unloading) + */ + + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + np->accel_vif_state.hooks->stop_napi_irq(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } +} + + +/* + * Once all users of hooks have kref_put()''d we can signal that it''s + * safe to unload + */ +static void accel_kref_release(struct kref *ref) +{ + struct netfront_accelerator *accelerator + container_of(ref, struct netfront_accelerator, accel_kref); + struct netfront_accel_hooks *hooks; + unsigned flags; + + /* Signal that all users of hooks are done */ + up(&accelerator->exit_semaphore); + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + if (accelerator->need_probe) { + hooks = accelerator->need_probe; + accelerator->need_probe = NULL; + accelerator_probe_vifs(accelerator, hooks, flags); + } + else + accelerator->ready_for_probe = 1; + + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); +} + + +static void vif_kref_release(struct kref *ref) +{ + struct netfront_accel_vif_state *vif_state = + container_of(ref, struct netfront_accel_vif_state, vif_kref); + struct netfront_accel_hooks *hooks; + unsigned flags; + + /* + * Now that this vif has finished using the hooks, it can + * decrement the accelerator''s global copy ref count + */ + kref_put(&vif_state->np->accelerator->accel_kref, accel_kref_release); + + spin_lock_irqsave(&vif_state->np->accelerator->vif_states_lock, flags); + if (vif_state->need_probe) { + hooks = vif_state->need_probe; + vif_state->need_probe = NULL; + spin_unlock_irqrestore + (&vif_state->np->accelerator->vif_states_lock, flags); + hooks->new_device(vif_state->np->netdev, vif_state->dev); + } else { + vif_state->ready_for_probe = 1; + spin_unlock_irqrestore + (&vif_state->np->accelerator->vif_states_lock, flags); + } +} + diff -r 82196b117a5b drivers/xen/netfront/netfront.c --- a/drivers/xen/netfront/netfront.c Wed Jun 20 17:01:02 2007 +0100 +++ b/drivers/xen/netfront/netfront.c Wed Jun 20 17:01:07 2007 +0100 @@ -3,6 +3,7 @@ * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -74,6 +75,8 @@ struct netfront_cb { #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) +#include "netfront.h" + /* * Mutually-exclusive module options to select receive data path: * rx_copy : Packets are copied by network backend into local memory @@ -144,57 +147,6 @@ static inline int netif_needs_gso(struct #define GRANT_INVALID_REF 0 -#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) - -struct netfront_info { - struct list_head list; - struct net_device *netdev; - - struct net_device_stats stats; - - struct netif_tx_front_ring tx; - struct netif_rx_front_ring rx; - - spinlock_t tx_lock; - spinlock_t rx_lock; - - unsigned int irq; - unsigned int copying_receiver; - unsigned int carrier; - - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 8 -#define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - unsigned rx_min_target, rx_max_target, rx_target; - struct sk_buff_head rx_batch; - - struct timer_list rx_refill_timer; - - /* - * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs - * is an index into a chain of free entries. - */ - struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; - -#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - grant_ref_t gref_tx_head; - grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; - grant_ref_t gref_rx_head; - grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; - - struct xenbus_device *xbdev; - int tx_ring_ref; - int rx_ring_ref; - u8 mac[ETH_ALEN]; - - unsigned long rx_pfn_array[NET_RX_RING_SIZE]; - struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; - struct mmu_update rx_mmu[NET_RX_RING_SIZE]; -}; - struct netfront_rx_info { struct netif_rx_response rx; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; @@ -334,6 +286,8 @@ static int __devexit netfront_remove(str DPRINTK("%s\n", dev->nodename); + netfront_accelerator_call_remove(info, dev); + netif_disconnect_backend(info); del_timer_sync(&info->rx_refill_timer); @@ -358,6 +312,8 @@ static int netfront_resume(struct xenbus struct netfront_info *info = dev->dev.driver_data; DPRINTK("%s\n", dev->nodename); + + netfront_accelerator_call_resume(info, dev); netif_disconnect_backend(info); return 0; @@ -577,6 +533,8 @@ static void backend_changed(struct xenbu xenbus_frontend_closed(dev); break; } + + netfront_accelerator_call_backend_changed(np, dev, backend_state); } /** Send a packet on a net device to encourage switches to learn the @@ -613,15 +571,29 @@ static inline int netfront_tx_slot_avail (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); } + static inline void network_maybe_wake_tx(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); if (unlikely(netif_queue_stopped(dev)) && netfront_tx_slot_available(np) && - likely(netif_running(dev))) + likely(netif_running(dev)) && + netfront_check_accelerator_queue_busy(dev, np)) netif_wake_queue(dev); } + + +int netfront_check_queue_busy(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + + return unlikely(netif_queue_stopped(dev)) && + netfront_tx_slot_available(np) && + likely(netif_running(dev)); +} +EXPORT_SYMBOL(netfront_check_queue_busy); + static int network_open(struct net_device *dev) { @@ -633,8 +605,11 @@ static int network_open(struct net_devic if (netfront_carrier_ok(np)) { network_alloc_rx_buffers(dev); np->rx.sring->rsp_event = np->rx.rsp_cons + 1; - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ + netfront_accelerator_call_stop_napi_irq(np, dev); + netif_rx_schedule(dev); + } } spin_unlock_bh(&np->rx_lock); @@ -702,6 +677,10 @@ static void rx_refill_timeout(unsigned l static void rx_refill_timeout(unsigned long data) { struct net_device *dev = (struct net_device *)data; + struct netfront_info *np = netdev_priv(dev); + + netfront_accelerator_call_stop_napi_irq(np, dev); + netif_rx_schedule(dev); } @@ -941,6 +920,13 @@ static int network_start_xmit(struct sk_ unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); + /* Check the fast path, if hooks are available */ + if (np->accel_vif_state.hooks && + np->accel_vif_state.hooks->start_xmit(skb, dev)) { + /* Fast path has sent this packet */ + return 0; + } + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; if (unlikely(frags > MAX_SKB_FRAGS + 1)) { printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", @@ -1044,8 +1030,11 @@ static irqreturn_t netif_int(int irq, vo if (likely(netfront_carrier_ok(np))) { network_tx_buf_gc(dev); /* Under tx_lock: protects access to rx shared-ring indexes. */ - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { + netfront_accelerator_call_stop_napi_irq(np, dev); + netif_rx_schedule(dev); + } } spin_unlock_irqrestore(&np->tx_lock, flags); @@ -1305,7 +1294,7 @@ static int netif_poll(struct net_device struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct multicall_entry *mcl; - int work_done, budget, more_to_do = 1; + int work_done, budget, more_to_do = 1, accel_more_to_do = 1; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; @@ -1472,6 +1461,20 @@ err: network_alloc_rx_buffers(dev); + if (work_done < budget) { + /* there''s some spare capacity, try the accelerated path */ + int accel_budget = budget - work_done; + int accel_budget_start = accel_budget; + + if (np->accel_vif_state.hooks) { + accel_more_to_do = + np->accel_vif_state.hooks->netdev_poll + (dev, &accel_budget); + work_done += (accel_budget_start - accel_budget); + } else + accel_more_to_do = 0; + } + *pbudget -= work_done; dev->quota -= work_done; @@ -1479,15 +1482,26 @@ err: local_irq_save(flags); RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); - if (!more_to_do) + + if (!more_to_do && !accel_more_to_do && + np->accel_vif_state.hooks) { + /* + * Slow path has nothing more to do, see if + * fast path is likewise + */ + accel_more_to_do = + np->accel_vif_state.hooks->start_napi_irq(dev); + } + + if (!more_to_do && !accel_more_to_do) __netif_rx_complete(dev); local_irq_restore(flags); } spin_unlock(&np->rx_lock); - - return more_to_do; + + return more_to_do | accel_more_to_do; } static void netif_release_tx_bufs(struct netfront_info *np) @@ -1687,7 +1701,9 @@ static int network_connect(struct net_de struct sk_buff *skb; grant_ref_t ref; netif_rx_request_t *req; - unsigned int feature_rx_copy, feature_rx_flip; + unsigned int feature_rx_copy, feature_rx_flip, feature_accel; + char *accel_frontend; + int accel_len; err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-rx-copy", "%u", &feature_rx_copy); @@ -1698,6 +1714,12 @@ static int network_connect(struct net_de if (err != 1) feature_rx_flip = 1; + feature_accel = 1; + accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend, + "accel", &accel_len); + if (IS_ERR(accel_frontend)) + feature_accel = 0; + /* * Copy packets on receive path if: * (a) This was requested by user, and the backend supports it; or @@ -1709,6 +1731,11 @@ static int network_connect(struct net_de err = talk_to_backend(np->xbdev, np); if (err) return err; + + if (feature_accel) { + netfront_load_accelerator(np, np->xbdev, accel_frontend); + kfree(accel_frontend); + } xennet_set_features(dev); @@ -1956,6 +1983,8 @@ static struct net_device * __devinit cre spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); + init_accelerator_vif(np, dev); + skb_queue_head_init(&np->rx_batch); np->rx_target = RX_DFL_MIN_TARGET; np->rx_min_target = RX_DFL_MIN_TARGET; @@ -2110,6 +2139,8 @@ static int __init netif_init(void) if (is_initial_xendomain()) return 0; + netif_init_accel(); + IPRINTK("Initialising virtual ethernet driver.\n"); (void)register_inetaddr_notifier(¬ifier_inetdev); diff -r 82196b117a5b drivers/xen/netfront/netfront.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/netfront/netfront.h Wed Jun 20 17:01:07 2007 +0100 @@ -0,0 +1,287 @@ +/****************************************************************************** + * Virtual network driver for conversing with remote driver backends. + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005, XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NETFRONT_H +#define NETFRONT_H + +#include <xen/interface/io/netif.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/kref.h> + +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) + +#include <xen/xenbus.h> + +/* + * Function pointer table for hooks into a network acceleration + * plugin. These are called at appropriate points from the netfront + * driver + */ +struct netfront_accel_hooks { + /* + * new_device: Accelerator hook to ask the plugin to support a + * new network interface + */ + int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev); + /* + * suspend, resume, remove: Equivalent to the normal xenbus_* + * callbacks + */ + int (*suspend)(struct xenbus_device *dev); + int (*resume)(struct xenbus_device *dev); + int (*remove)(struct xenbus_device *dev); + /* + * backend_changed: Callback from watch based on backend''s + * xenbus state changing + */ + void (*backend_changed)(struct xenbus_device *dev, + enum xenbus_state backend_state); + /* + * The net_device is being polled, check the accelerated + * hardware for any pending packets + */ + int (*netdev_poll)(struct net_device *dev, int *pbudget); + /* + * start_xmit: Used to give the accelerated plugin the option + * of sending a packet. Returns non-zero if has done so, or + * zero to decline and force the packet onto normal send + * path + */ + int (*start_xmit)(struct sk_buff *skb, struct net_device *dev); + /* + * start/stop_napi_interrupts Used by netfront to indicate + * when napi interrupts should be enabled or disabled + */ + int (*start_napi_irq)(struct net_device *dev); + void (*stop_napi_irq)(struct net_device *dev); + /* + * Called before re-enabling the TX queue to check the fast + * path has slots too + */ + int (*check_busy)(struct net_device *dev); +}; + +/* + * Per-netfront device state for the accelerator. This is used to + * allow efficient per-netfront device access to the accelerator + * hooks + */ +struct netfront_accel_vif_state { + struct list_head link; + + struct xenbus_device *dev; + struct netfront_info *np; + struct netfront_accel_hooks *hooks; + + /* + * Protect against removal of hooks while in use. + */ + struct kref vif_kref; + + unsigned ready_for_probe; + struct netfront_accel_hooks *need_probe; +}; + +/* + * Per-accelerator state stored in netfront. These form a list that + * is used to track which devices are accelerated by which plugins, + * and what plugins are available/have been requested + */ +struct netfront_accelerator { + /* Used to make a list */ + struct list_head link; + /* ID of the accelerator */ + int id; + /* + * String describing the accelerator. Currently this is the + * name of the accelerator module. This is provided by the + * backend accelerator through xenstore + */ + char *frontend; + /* The hooks into the accelerator plugin module */ + struct netfront_accel_hooks *hooks; + /* + * Protect against removal of hooks while in use. + */ + struct kref accel_kref; + /* + * List of per-netfront device state (struct + * netfront_accel_vif_state) for each netfront device that is + * using this accelerator + */ + struct list_head vif_states; + spinlock_t vif_states_lock; + /* + * Semaphore to signal that all users of this accelerator have + * finished using it before module is unloaded + */ + struct semaphore exit_semaphore; + + unsigned ready_for_probe; + struct netfront_accel_hooks *need_probe; +}; + +struct netfront_info { + struct list_head list; + struct net_device *netdev; + + struct net_device_stats stats; + + struct netif_tx_front_ring tx; + struct netif_rx_front_ring rx; + + spinlock_t tx_lock; + spinlock_t rx_lock; + + unsigned int irq; + unsigned int copying_receiver; + unsigned int carrier; + + /* Receive-ring batched refills. */ +#define RX_MIN_TARGET 8 +#define RX_DFL_MIN_TARGET 64 +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + unsigned rx_min_target, rx_max_target, rx_target; + struct sk_buff_head rx_batch; + + struct timer_list rx_refill_timer; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs + * is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; + +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; + grant_ref_t gref_rx_head; + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + + struct xenbus_device *xbdev; + int tx_ring_ref; + int rx_ring_ref; + u8 mac[ETH_ALEN]; + + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; + + /* Private pointer to state internal to accelerator module */ + void *accel_priv; + /* The accelerator used by this netfront device */ + struct netfront_accelerator *accelerator; + /* The accelerator state for this netfront device */ + struct netfront_accel_vif_state accel_vif_state; +}; + + +/* Exported Functions */ + +/* + * Called by an accelerator plugin module when it has loaded. + * + * frontend: the string describing the accelerator, currently the module name + * hooks: the hooks for netfront to use to call into the accelerator + */ +extern int netfront_accelerator_loaded(const char *frontend, + struct netfront_accel_hooks *hooks); + +/* + * Called when an accelerator plugin is ready to accelerate a device * + * that has been passed to it from netfront using the "new_device" + * hook. + * + * frontend: the string describing the accelerator. Must match the + * one passed to netfront_accelerator_loaded() + * dev: the xenbus device the plugin was asked to accelerate + */ +extern void netfront_accelerator_ready(const char *frontend, + struct xenbus_device *dev); + +/* + * Called by an accelerator plugin module when it is about to unload. + * + * frontend: the string describing the accelerator. Must match the + * one passed to netfront_accelerator_loaded() + */ +extern void netfront_accelerator_unloaded(const char *frontend); + +/* + * Called by an accelerator before waking the net device''s TX queue to + * ensure the slow path has available slots. Returns true if OK to + * wake, false if still busy + */ +extern int netfront_check_queue_busy(struct net_device *net_dev); + + + +/* Internal-to-netfront Functions */ + +/* + * Call into accelerator and check to see if it has tx space before we + * wake the net device''s TX queue. Returns true if OK to wake, false + * if still busy + */ +extern +int netfront_check_accelerator_queue_busy(struct net_device *dev, + struct netfront_info *np); +extern +int netfront_accelerator_call_remove(struct netfront_info *np, + struct xenbus_device *dev); +extern +int netfront_accelerator_call_resume(struct netfront_info *np, + struct xenbus_device *dev); +extern +void netfront_accelerator_call_backend_changed(struct netfront_info *np, + struct xenbus_device *dev, + enum xenbus_state backend_state); +extern +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, + struct net_device *dev); + +extern +int netfront_load_accelerator(struct netfront_info *np, + struct xenbus_device *dev, + const char *frontend); + +extern +void netif_init_accel(void); + +extern +void init_accelerator_vif(struct netfront_info *np, + struct xenbus_device *dev); +#endif /* NETFRONT_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Kieran Mansley
2007-Jul-09 12:09 UTC
[Xen-devel] [PATCH 4/4] [Net] Support accelerated network plugin modules
Frontend net driver acceleration Signed-off-by: Kieran Mansley <kmansley@solarflare.com> diff -r 2b3852b24aa4 drivers/xen/netfront/Makefile --- a/drivers/xen/netfront/Makefile Mon Jul 09 12:54:55 2007 +0100 +++ b/drivers/xen/netfront/Makefile Mon Jul 09 12:55:00 2007 +0100 @@ -1,4 +1,4 @@ obj-$(CONFIG_XEN_NETDEV_FRONTEND) := xennet.o -xennet-objs := netfront.o +xennet-objs := netfront.o accel.o diff -r 2b3852b24aa4 drivers/xen/netfront/accel.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/netfront/accel.c Mon Jul 09 12:55:38 2007 +0100 @@ -0,0 +1,866 @@ +/****************************************************************************** + * Virtual network driver for conversing with remote driver backends. + * + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/kref.h> + +#include <xen/xenbus.h> + +#include "netfront.h" + +#define DPRINTK(fmt, args...) \ + pr_debug("netfront/accel (%s:%d) " fmt, \ + __FUNCTION__, __LINE__, ##args) +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "netfront/accel: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "netfront/accel: " fmt, ##args) + +/* + * List of all netfront accelerator plugin modules available. Each + * list entry is of type struct netfront_accelerator. + */ +static struct list_head accelerators_list; + +/* + * Lock to protect access to accelerators_list + */ +static spinlock_t accelerators_lock; + +/* Forward declaration of kref cleanup functions */ +static void accel_kref_release(struct kref *ref); +static void vif_kref_release(struct kref *ref); + + +void netif_init_accel(void) +{ + INIT_LIST_HEAD(&accelerators_list); + spin_lock_init(&accelerators_lock); +} + + +/* + * Initialise the accel_vif_state field in the netfront state + */ +void init_accelerator_vif(struct netfront_info *np, + struct xenbus_device *dev) +{ + np->accelerator = NULL; + + /* It''s assumed that these things don''t change */ + np->accel_vif_state.np = np; + np->accel_vif_state.dev = dev; + + np->accel_vif_state.ready_for_probe = 1; + np->accel_vif_state.need_probe = NULL; +} + + +/* + * Compare a frontend description string against an accelerator to see + * if they match. Would ultimately be nice to replace the string with + * a unique numeric identifier for each accelerator. + */ +static int match_accelerator(const char *frontend, + struct netfront_accelerator *accelerator) +{ + return strcmp(frontend, accelerator->frontend) == 0; +} + + +/* + * Add a frontend vif to the list of vifs that is using a netfront + * accelerator plugin module. + */ +static void add_accelerator_vif(struct netfront_accelerator *accelerator, + struct netfront_info *np) +{ + if (np->accelerator == NULL) { + np->accelerator = accelerator; + + list_add(&np->accel_vif_state.link, &accelerator->vif_states); + } else { + /* + * May get here legitimately if reconnecting to the + * same accelerator, eg. after resume, so check that + * is the case + */ + BUG_ON(np->accelerator != accelerator); + } +} + + +/* + * Initialise the state to track an accelerator plugin module. + */ +static int init_accelerator(const char *frontend, + struct netfront_accelerator **result) +{ + struct netfront_accelerator *accelerator = + kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL); + int frontend_len; + + if (!accelerator) { + DPRINTK("no memory for accelerator\n"); + return -ENOMEM; + } + + frontend_len = strlen(frontend) + 1; + accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL); + if (!accelerator->frontend) { + DPRINTK("no memory for accelerator\n"); + kfree(accelerator); + return -ENOMEM; + } + strlcpy(accelerator->frontend, frontend, frontend_len); + + INIT_LIST_HEAD(&accelerator->vif_states); + spin_lock_init(&accelerator->vif_states_lock); + + accelerator->hooks = NULL; + + accelerator->ready_for_probe = 1; + accelerator->need_probe = NULL; + + list_add(&accelerator->link, &accelerators_list); + + *result = accelerator; + + return 0; +} + + +/* + * Modify the hooks stored in the per-vif state to match that in the + * netfront accelerator''s state. + */ +static void +accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state) +{ + /* This function must be called with the vif_state_lock held */ + + DPRINTK("%p\n",vif_state); + + /* + * Take references to stop hooks disappearing. + * This persists until vif_kref gets to zero. + */ + kref_get(&vif_state->np->accelerator->accel_kref); + /* This persists until vif_state->hooks are cleared */ + kref_init(&vif_state->vif_kref); + + /* Make sure there are no data path operations going on */ + netif_poll_disable(vif_state->np->netdev); + netif_tx_lock_bh(vif_state->np->netdev); + + vif_state->hooks = vif_state->np->accelerator->hooks; + + netif_tx_unlock_bh(vif_state->np->netdev); + netif_poll_enable(vif_state->np->netdev); +} + + +static void accelerator_probe_new_vif(struct netfront_info *np, + struct xenbus_device *dev, + struct netfront_accelerator *accelerator) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + + DPRINTK("\n"); + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + /* + * Include this frontend device on the accelerator''s list + */ + add_accelerator_vif(accelerator, np); + + hooks = accelerator->hooks; + + if (hooks) { + if (np->accel_vif_state.ready_for_probe) { + np->accel_vif_state.ready_for_probe = 0; + + kref_get(&accelerator->accel_kref); + + spin_unlock_irqrestore(&accelerator->vif_states_lock, + flags); + + hooks->new_device(np->netdev, dev); + + kref_put(&accelerator->accel_kref, + accel_kref_release); + /* + * Hooks will get linked into vif_state by a + * future call by the accelerator to + * netfront_accelerator_ready() + */ + return; + } else { + if (np->accel_vif_state.need_probe != NULL) + DPRINTK("Probe request on vif awaiting probe\n"); + np->accel_vif_state.need_probe = hooks; + } + } + + spin_unlock_irqrestore(&accelerator->vif_states_lock, + flags); + return; +} + +/* + * Request that a particular netfront accelerator plugin is loaded. + * Usually called as a result of the vif configuration specifying + * which one to use. + */ +int netfront_load_accelerator(struct netfront_info *np, + struct xenbus_device *dev, + const char *frontend) +{ + struct netfront_accelerator *accelerator; + int rc; + unsigned flags; + + DPRINTK(" %s\n", frontend); + + spin_lock_irqsave(&accelerators_lock, flags); + + /* + * Look at list of loaded accelerators to see if the requested + * one is already there + */ + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_unlock_irqrestore(&accelerators_lock, flags); + + accelerator_probe_new_vif(np, dev, accelerator); + + return 0; + } + } + + /* Couldn''t find it, so create a new one and load the module */ + if ((rc = init_accelerator(frontend, &accelerator)) < 0) { + spin_unlock_irqrestore(&accelerators_lock, flags); + return rc; + } + + spin_unlock_irqrestore(&accelerators_lock, flags); + + /* Include this frontend device on the accelerator''s list */ + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + add_accelerator_vif(accelerator, np); + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + + DPRINTK("requesting module %s\n", frontend); + + /* load module */ + request_module("%s", frontend); + + /* + * Module should now call netfront_accelerator_loaded() once + * it''s up and running, and we can continue from there + */ + + return 0; +} + + +/* + * Go through all the netfront vifs and see if they have requested + * this accelerator. Notify the accelerator plugin of the relevant + * device if so. Called when an accelerator plugin module is first + * loaded and connects to netfront. + */ +static void +accelerator_probe_vifs(struct netfront_accelerator *accelerator, + struct netfront_accel_hooks *hooks, + unsigned lock_flags) +{ + struct netfront_accel_vif_state *vif_state, *tmp; + + /* Calling function must have taken the vif_states_lock */ + + DPRINTK("%p\n", accelerator); + + /* + * kref_init() takes a single reference to the hooks that will + * persist until the accelerator hooks are removed (e.g. by + * accelerator module unload) + */ + kref_init(&accelerator->accel_kref); + + /* + * Store the hooks for future calls to probe a new device, and + * to wire into the vif_state once the accelerator plugin is + * ready to accelerate each vif + */ + BUG_ON(hooks == NULL); + accelerator->hooks = hooks; + + list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states, + link) { + struct netfront_info *np = vif_state->np; + + if (vif_state->ready_for_probe) { + vif_state->ready_for_probe = 0; + kref_get(&accelerator->accel_kref); + + /* + * drop lock before calling hook. hooks are + * protected by the kref + */ + spin_unlock_irqrestore(&accelerator->vif_states_lock, + lock_flags); + + hooks->new_device(np->netdev, vif_state->dev); + + kref_put(&accelerator->accel_kref, accel_kref_release); + + /* Retake lock for next go round the loop */ + spin_lock_irqsave(&accelerator->vif_states_lock, lock_flags); + + /* + * Hooks will get linked into vif_state by a call to + * netfront_accelerator_ready() once accelerator + * plugin is ready for action + */ + } else { + if (vif_state->need_probe != NULL) + DPRINTK("Probe request on vif awaiting probe\n"); + vif_state->need_probe = hooks; + } + } + + /* Return with vif_states_lock held, as on entry */ +} + + +/* + * Wrapper for accelerator_probe_vifs that checks now is a good time + * to do the probe, and postpones till previous state cleared up if + * necessary + */ +static void +accelerator_probe_vifs_on_load(struct netfront_accelerator *accelerator, + struct netfront_accel_hooks *hooks) +{ + unsigned flags; + + DPRINTK("\n"); + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + if (accelerator->ready_for_probe) { + accelerator->ready_for_probe = 0; + accelerator_probe_vifs(accelerator, hooks, flags); + } else { + if (accelerator->need_probe) + DPRINTK("Probe request on accelerator awaiting probe\n"); + accelerator->need_probe = hooks; + } + + spin_unlock_irqrestore(&accelerator->vif_states_lock, + flags); +} + + +/* + * Called by the netfront accelerator plugin module when it has loaded + */ +int netfront_accelerator_loaded(const char *frontend, + struct netfront_accel_hooks *hooks) +{ + struct netfront_accelerator *accelerator; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + /* + * Look through list of accelerators to see if it has already + * been requested + */ + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_unlock_irqrestore(&accelerators_lock, flags); + + accelerator_probe_vifs_on_load(accelerator, hooks); + + return 0; + } + } + + /* + * If it wasn''t in the list, add it now so that when it is + * requested the caller will find it + */ + DPRINTK("Couldn''t find matching accelerator (%s)\n", + frontend); + + init_accelerator(frontend, &accelerator); + + spin_unlock_irqrestore(&accelerators_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(netfront_accelerator_loaded); + + +/* + * Called by the accelerator module after it has been probed with a + * network device to say that it is ready to start accelerating + * traffic on that device + */ +void netfront_accelerator_ready(const char *frontend, + struct xenbus_device *dev) +{ + struct netfront_accelerator *accelerator; + struct netfront_accel_vif_state *accel_vif_state; + unsigned flags, flags1; + + DPRINTK("%s %p\n", frontend, dev); + + spin_lock_irqsave(&accelerators_lock, flags); + + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_lock_irqsave + (&accelerator->vif_states_lock, flags1); + + list_for_each_entry(accel_vif_state, + &accelerator->vif_states, link) { + if (accel_vif_state->dev == dev) + accelerator_set_vif_state_hooks + (accel_vif_state); + } + + spin_unlock_irqrestore + (&accelerator->vif_states_lock, flags1); + goto done; + } + } + + done: + spin_unlock_irqrestore(&accelerators_lock, flags); +} +EXPORT_SYMBOL_GPL(netfront_accelerator_ready); + + +/* + * Safely remove the accelerator function hooks from a netfront state. + */ +static void accelerator_remove_hooks(struct netfront_accelerator *accelerator, + int remove_master) +{ + struct netfront_accel_vif_state *vif_state, *tmp; + unsigned flags; + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + + list_for_each_entry_safe(vif_state, tmp, + &accelerator->vif_states, + link) { + /* Make sure there are no data path operations going on */ + netif_poll_disable(vif_state->np->netdev); + netif_tx_lock_bh(vif_state->np->netdev); + + /* + * Remove the hooks, but leave the vif_state on the + * accelerator''s list as that signifies this vif is + * interested in using that accelerator if it becomes + * available again + */ + vif_state->hooks = NULL; + + netif_tx_unlock_bh(vif_state->np->netdev); + netif_poll_enable(vif_state->np->netdev); + + /* + * Remove the reference taken when the vif_state hooks + * were set, must be called without lock held + */ + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + kref_put(&vif_state->vif_kref, vif_kref_release); + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + } + + if(remove_master) + accelerator->hooks = NULL; + + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); + + if(remove_master) + /* Remove the reference taken when module loaded */ + kref_put(&accelerator->accel_kref, accel_kref_release); +} + + +/* + * Called by a netfront accelerator when it is unloaded. This safely + * removes the hooks into the plugin and blocks until all devices have + * finished using it, so on return it is safe to unload. + */ +void netfront_accelerator_stop(const char *frontend, int unloading) +{ + struct netfront_accelerator *accelerator; + unsigned flags; + + spin_lock_irqsave(&accelerators_lock, flags); + + list_for_each_entry(accelerator, &accelerators_list, link) { + if (match_accelerator(frontend, accelerator)) { + spin_unlock_irqrestore(&accelerators_lock, flags); + + /* + * Use semaphore to ensure we know when all + * uses of hooks are complete + */ + sema_init(&accelerator->exit_semaphore, 0); + + accelerator_remove_hooks(accelerator, unloading); + + if (unloading) + /* Wait for hooks to be unused, then return */ + down(&accelerator->exit_semaphore); + + return; + } + } + spin_unlock_irqrestore(&accelerators_lock, flags); +} +EXPORT_SYMBOL_GPL(netfront_accelerator_stop); + + + +int netfront_check_accelerator_queue_busy(struct net_device *dev, + struct netfront_info *np) +{ + struct netfront_accel_hooks *hooks; + int rc = 1; + unsigned flags; + + /* + * Call the check busy accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of the + * call to prevent the accelerator being able to modify the + * hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->check_busy(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + + return rc; +} + + +int netfront_accelerator_call_remove(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + int rc = 0; + + /* + * Call the remove accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of the + * call to prevent the accelerator being able to modify the + * hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->remove(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + return rc; +} + + +int netfront_accelerator_call_suspend(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + int rc = 0; + + IPRINTK("netfront_accelerator_call_suspend\n"); + + /* + * Call the suspend accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of + * the call to prevent the accelerator being able to modify + * the hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->suspend(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + return rc; +} + + +int netfront_accelerator_call_suspend_cancel(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + int rc = 0; + + IPRINTK(" netfront_accelerator_call_suspend_cancel\n"); + + /* + * Call the suspend_cancel accelerator hook. The use count + * for the accelerator''s hooks is incremented for the + * duration of the call to prevent the accelerator being able + * to modify the hooks in the middle (by, for example, + * unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->suspend_cancel(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + return rc; +} + + +int netfront_accelerator_call_resume(struct netfront_info *np, + struct xenbus_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + int rc = 0; + + /* + * Call the resume accelerator hook. The use count for the + * accelerator''s hooks is incremented for the duration of + * the call to prevent the accelerator being able to modify + * the hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + rc = np->accel_vif_state.hooks->resume(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } + return rc; +} + + +void netfront_accelerator_call_backend_changed(struct netfront_info *np, + struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + + /* + * Call the backend_changed accelerator hook. The use count + * for the accelerator''s hooks is incremented for the duration + * of the call to prevent the accelerator being able to modify + * the hooks in the middle (by, for example, unloading) + */ + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + np->accel_vif_state.hooks->backend_changed + (dev, backend_state); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } +} + + +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, + struct net_device *dev) +{ + struct netfront_accel_hooks *hooks; + unsigned flags; + + /* + * Call the stop_napi_interrupts accelerator hook. The use + * count for the accelerator''s hooks is incremented for the + * duration of the call to prevent the accelerator being able + * to modify the hooks in the middle (by, for example, + * unloading) + */ + + if (np->accel_vif_state.hooks) { + spin_lock_irqsave(&np->accelerator->vif_states_lock, flags); + hooks = np->accel_vif_state.hooks; + if (hooks) { + kref_get(&np->accel_vif_state.vif_kref); + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + + np->accel_vif_state.hooks->stop_napi_irq(dev); + + kref_put(&np->accel_vif_state.vif_kref, + vif_kref_release); + } else { + spin_unlock_irqrestore + (&np->accelerator->vif_states_lock, flags); + } + } +} + + +/* + * Once all users of hooks have kref_put()''d we can signal that it''s + * safe to unload + */ +static void accel_kref_release(struct kref *ref) +{ + struct netfront_accelerator *accelerator + container_of(ref, struct netfront_accelerator, accel_kref); + struct netfront_accel_hooks *hooks; + unsigned flags; + + DPRINTK("%p\n", accelerator); + + /* Signal that all users of hooks are done */ + up(&accelerator->exit_semaphore); + + spin_lock_irqsave(&accelerator->vif_states_lock, flags); + if (accelerator->need_probe) { + hooks = accelerator->need_probe; + accelerator->need_probe = NULL; + accelerator_probe_vifs(accelerator, hooks, flags); + } + else + accelerator->ready_for_probe = 1; + + spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); +} + + +static void vif_kref_release(struct kref *ref) +{ + struct netfront_accel_vif_state *vif_state = + container_of(ref, struct netfront_accel_vif_state, vif_kref); + struct netfront_accel_hooks *hooks; + unsigned flags; + + DPRINTK("%p\n", vif_state); + + /* + * Now that this vif has finished using the hooks, it can + * decrement the accelerator''s global copy ref count + */ + kref_put(&vif_state->np->accelerator->accel_kref, accel_kref_release); + + spin_lock_irqsave(&vif_state->np->accelerator->vif_states_lock, flags); + if (vif_state->need_probe) { + hooks = vif_state->need_probe; + vif_state->need_probe = NULL; + spin_unlock_irqrestore + (&vif_state->np->accelerator->vif_states_lock, flags); + hooks->new_device(vif_state->np->netdev, vif_state->dev); + } else { + vif_state->ready_for_probe = 1; + spin_unlock_irqrestore + (&vif_state->np->accelerator->vif_states_lock, flags); + } +} + diff -r 2b3852b24aa4 drivers/xen/netfront/netfront.c --- a/drivers/xen/netfront/netfront.c Mon Jul 09 12:54:55 2007 +0100 +++ b/drivers/xen/netfront/netfront.c Mon Jul 09 12:55:00 2007 +0100 @@ -3,6 +3,7 @@ * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -74,6 +75,8 @@ struct netfront_cb { #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) +#include "netfront.h" + /* * Mutually-exclusive module options to select receive data path: * rx_copy : Packets are copied by network backend into local memory @@ -144,57 +147,6 @@ static inline int netif_needs_gso(struct #define GRANT_INVALID_REF 0 -#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) -#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) - -struct netfront_info { - struct list_head list; - struct net_device *netdev; - - struct net_device_stats stats; - - struct netif_tx_front_ring tx; - struct netif_rx_front_ring rx; - - spinlock_t tx_lock; - spinlock_t rx_lock; - - unsigned int irq; - unsigned int copying_receiver; - unsigned int carrier; - - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 8 -#define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - unsigned rx_min_target, rx_max_target, rx_target; - struct sk_buff_head rx_batch; - - struct timer_list rx_refill_timer; - - /* - * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs - * is an index into a chain of free entries. - */ - struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; - -#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - grant_ref_t gref_tx_head; - grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; - grant_ref_t gref_rx_head; - grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; - - struct xenbus_device *xbdev; - int tx_ring_ref; - int rx_ring_ref; - u8 mac[ETH_ALEN]; - - unsigned long rx_pfn_array[NET_RX_RING_SIZE]; - struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; - struct mmu_update rx_mmu[NET_RX_RING_SIZE]; -}; - struct netfront_rx_info { struct netif_rx_response rx; struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; @@ -334,6 +286,8 @@ static int __devexit netfront_remove(str DPRINTK("%s\n", dev->nodename); + netfront_accelerator_call_remove(info, dev); + netif_disconnect_backend(info); del_timer_sync(&info->rx_refill_timer); @@ -346,6 +300,21 @@ static int __devexit netfront_remove(str return 0; } + + +static int netfront_suspend(struct xenbus_device *dev) +{ + struct netfront_info *info = dev->dev.driver_data; + return netfront_accelerator_call_suspend(info, dev); +} + + +static int netfront_suspend_cancel(struct xenbus_device *dev) +{ + struct netfront_info *info = dev->dev.driver_data; + return netfront_accelerator_call_suspend_cancel(info, dev); +} + /** * We are reconnecting to the backend, due to a suspend/resume, or a backend @@ -358,6 +327,8 @@ static int netfront_resume(struct xenbus struct netfront_info *info = dev->dev.driver_data; DPRINTK("%s\n", dev->nodename); + + netfront_accelerator_call_resume(info, dev); netif_disconnect_backend(info); return 0; @@ -577,6 +548,8 @@ static void backend_changed(struct xenbu xenbus_frontend_closed(dev); break; } + + netfront_accelerator_call_backend_changed(np, dev, backend_state); } /** Send a packet on a net device to encourage switches to learn the @@ -613,15 +586,29 @@ static inline int netfront_tx_slot_avail (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); } + static inline void network_maybe_wake_tx(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); if (unlikely(netif_queue_stopped(dev)) && netfront_tx_slot_available(np) && - likely(netif_running(dev))) + likely(netif_running(dev)) && + netfront_check_accelerator_queue_busy(dev, np)) netif_wake_queue(dev); } + + +int netfront_check_queue_busy(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + + return unlikely(netif_queue_stopped(dev)) && + netfront_tx_slot_available(np) && + likely(netif_running(dev)); +} +EXPORT_SYMBOL(netfront_check_queue_busy); + static int network_open(struct net_device *dev) { @@ -633,8 +620,11 @@ static int network_open(struct net_devic if (netfront_carrier_ok(np)) { network_alloc_rx_buffers(dev); np->rx.sring->rsp_event = np->rx.rsp_cons + 1; - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ + netfront_accelerator_call_stop_napi_irq(np, dev); + netif_rx_schedule(dev); + } } spin_unlock_bh(&np->rx_lock); @@ -702,6 +692,10 @@ static void rx_refill_timeout(unsigned l static void rx_refill_timeout(unsigned long data) { struct net_device *dev = (struct net_device *)data; + struct netfront_info *np = netdev_priv(dev); + + netfront_accelerator_call_stop_napi_irq(np, dev); + netif_rx_schedule(dev); } @@ -941,6 +935,13 @@ static int network_start_xmit(struct sk_ unsigned int offset = offset_in_page(data); unsigned int len = skb_headlen(skb); + /* Check the fast path, if hooks are available */ + if (np->accel_vif_state.hooks && + np->accel_vif_state.hooks->start_xmit(skb, dev)) { + /* Fast path has sent this packet */ + return 0; + } + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; if (unlikely(frags > MAX_SKB_FRAGS + 1)) { printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", @@ -1044,8 +1045,11 @@ static irqreturn_t netif_int(int irq, vo if (likely(netfront_carrier_ok(np))) { network_tx_buf_gc(dev); /* Under tx_lock: protects access to rx shared-ring indexes. */ - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { + netfront_accelerator_call_stop_napi_irq(np, dev); + netif_rx_schedule(dev); + } } spin_unlock_irqrestore(&np->tx_lock, flags); @@ -1305,7 +1309,7 @@ static int netif_poll(struct net_device struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct multicall_entry *mcl; - int work_done, budget, more_to_do = 1; + int work_done, budget, more_to_do = 1, accel_more_to_do = 1; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; @@ -1472,6 +1476,20 @@ err: network_alloc_rx_buffers(dev); + if (work_done < budget) { + /* there''s some spare capacity, try the accelerated path */ + int accel_budget = budget - work_done; + int accel_budget_start = accel_budget; + + if (np->accel_vif_state.hooks) { + accel_more_to_do = + np->accel_vif_state.hooks->netdev_poll + (dev, &accel_budget); + work_done += (accel_budget_start - accel_budget); + } else + accel_more_to_do = 0; + } + *pbudget -= work_done; dev->quota -= work_done; @@ -1479,15 +1497,26 @@ err: local_irq_save(flags); RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); - if (!more_to_do) + + if (!more_to_do && !accel_more_to_do && + np->accel_vif_state.hooks) { + /* + * Slow path has nothing more to do, see if + * fast path is likewise + */ + accel_more_to_do = + np->accel_vif_state.hooks->start_napi_irq(dev); + } + + if (!more_to_do && !accel_more_to_do) __netif_rx_complete(dev); local_irq_restore(flags); } spin_unlock(&np->rx_lock); - - return more_to_do; + + return more_to_do | accel_more_to_do; } static void netif_release_tx_bufs(struct netfront_info *np) @@ -1687,7 +1716,9 @@ static int network_connect(struct net_de struct sk_buff *skb; grant_ref_t ref; netif_rx_request_t *req; - unsigned int feature_rx_copy, feature_rx_flip; + unsigned int feature_rx_copy, feature_rx_flip, feature_accel; + char *accel_frontend; + int accel_len; err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-rx-copy", "%u", &feature_rx_copy); @@ -1698,6 +1729,12 @@ static int network_connect(struct net_de if (err != 1) feature_rx_flip = 1; + feature_accel = 1; + accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend, + "accel", &accel_len); + if (IS_ERR(accel_frontend)) + feature_accel = 0; + /* * Copy packets on receive path if: * (a) This was requested by user, and the backend supports it; or @@ -1709,6 +1746,11 @@ static int network_connect(struct net_de err = talk_to_backend(np->xbdev, np); if (err) return err; + + if (feature_accel) { + netfront_load_accelerator(np, np->xbdev, accel_frontend); + kfree(accel_frontend); + } xennet_set_features(dev); @@ -1956,6 +1998,8 @@ static struct net_device * __devinit cre spin_lock_init(&np->tx_lock); spin_lock_init(&np->rx_lock); + init_accelerator_vif(np, dev); + skb_queue_head_init(&np->rx_batch); np->rx_target = RX_DFL_MIN_TARGET; np->rx_min_target = RX_DFL_MIN_TARGET; @@ -2081,6 +2125,8 @@ static struct xenbus_driver netfront = { .ids = netfront_ids, .probe = netfront_probe, .remove = __devexit_p(netfront_remove), + .suspend = netfront_suspend, + .suspend_cancel = netfront_suspend_cancel, .resume = netfront_resume, .otherend_changed = backend_changed, }; @@ -2110,6 +2156,8 @@ static int __init netif_init(void) if (is_initial_xendomain()) return 0; + netif_init_accel(); + IPRINTK("Initialising virtual ethernet driver.\n"); (void)register_inetaddr_notifier(¬ifier_inetdev); diff -r 2b3852b24aa4 drivers/xen/netfront/netfront.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/netfront/netfront.h Mon Jul 09 12:55:00 2007 +0100 @@ -0,0 +1,297 @@ +/****************************************************************************** + * Virtual network driver for conversing with remote driver backends. + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005, XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NETFRONT_H +#define NETFRONT_H + +#include <xen/interface/io/netif.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/kref.h> + +#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) + +#include <xen/xenbus.h> + +/* + * Function pointer table for hooks into a network acceleration + * plugin. These are called at appropriate points from the netfront + * driver + */ +struct netfront_accel_hooks { + /* + * new_device: Accelerator hook to ask the plugin to support a + * new network interface + */ + int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev); + /* + * suspend, suspend_cancel, resume, remove: Equivalent to the + * normal xenbus_* callbacks + */ + int (*suspend)(struct xenbus_device *dev); + int (*suspend_cancel)(struct xenbus_device *dev); + int (*resume)(struct xenbus_device *dev); + int (*remove)(struct xenbus_device *dev); + /* + * backend_changed: Callback from watch based on backend''s + * xenbus state changing + */ + void (*backend_changed)(struct xenbus_device *dev, + enum xenbus_state backend_state); + /* + * The net_device is being polled, check the accelerated + * hardware for any pending packets + */ + int (*netdev_poll)(struct net_device *dev, int *pbudget); + /* + * start_xmit: Used to give the accelerated plugin the option + * of sending a packet. Returns non-zero if has done so, or + * zero to decline and force the packet onto normal send + * path + */ + int (*start_xmit)(struct sk_buff *skb, struct net_device *dev); + /* + * start/stop_napi_interrupts Used by netfront to indicate + * when napi interrupts should be enabled or disabled + */ + int (*start_napi_irq)(struct net_device *dev); + void (*stop_napi_irq)(struct net_device *dev); + /* + * Called before re-enabling the TX queue to check the fast + * path has slots too + */ + int (*check_busy)(struct net_device *dev); +}; + +/* + * Per-netfront device state for the accelerator. This is used to + * allow efficient per-netfront device access to the accelerator + * hooks + */ +struct netfront_accel_vif_state { + struct list_head link; + + struct xenbus_device *dev; + struct netfront_info *np; + struct netfront_accel_hooks *hooks; + + /* + * Protect against removal of hooks while in use. + */ + struct kref vif_kref; + + unsigned ready_for_probe; + struct netfront_accel_hooks *need_probe; +}; + +/* + * Per-accelerator state stored in netfront. These form a list that + * is used to track which devices are accelerated by which plugins, + * and what plugins are available/have been requested + */ +struct netfront_accelerator { + /* Used to make a list */ + struct list_head link; + /* ID of the accelerator */ + int id; + /* + * String describing the accelerator. Currently this is the + * name of the accelerator module. This is provided by the + * backend accelerator through xenstore + */ + char *frontend; + /* The hooks into the accelerator plugin module */ + struct netfront_accel_hooks *hooks; + /* + * Protect against removal of hooks while in use. + */ + struct kref accel_kref; + /* + * List of per-netfront device state (struct + * netfront_accel_vif_state) for each netfront device that is + * using this accelerator + */ + struct list_head vif_states; + spinlock_t vif_states_lock; + /* + * Semaphore to signal that all users of this accelerator have + * finished using it before module is unloaded + */ + struct semaphore exit_semaphore; + + unsigned ready_for_probe; + struct netfront_accel_hooks *need_probe; +}; + +struct netfront_info { + struct list_head list; + struct net_device *netdev; + + struct net_device_stats stats; + + struct netif_tx_front_ring tx; + struct netif_rx_front_ring rx; + + spinlock_t tx_lock; + spinlock_t rx_lock; + + unsigned int irq; + unsigned int copying_receiver; + unsigned int carrier; + + /* Receive-ring batched refills. */ +#define RX_MIN_TARGET 8 +#define RX_DFL_MIN_TARGET 64 +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + unsigned rx_min_target, rx_max_target, rx_target; + struct sk_buff_head rx_batch; + + struct timer_list rx_refill_timer; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs + * is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; + +#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; + grant_ref_t gref_rx_head; + grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + + struct xenbus_device *xbdev; + int tx_ring_ref; + int rx_ring_ref; + u8 mac[ETH_ALEN]; + + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; + struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; + struct mmu_update rx_mmu[NET_RX_RING_SIZE]; + + /* Private pointer to state internal to accelerator module */ + void *accel_priv; + /* The accelerator used by this netfront device */ + struct netfront_accelerator *accelerator; + /* The accelerator state for this netfront device */ + struct netfront_accel_vif_state accel_vif_state; +}; + + +/* Exported Functions */ + +/* + * Called by an accelerator plugin module when it has loaded. + * + * frontend: the string describing the accelerator, currently the module name + * hooks: the hooks for netfront to use to call into the accelerator + */ +extern int netfront_accelerator_loaded(const char *frontend, + struct netfront_accel_hooks *hooks); + +/* + * Called when an accelerator plugin is ready to accelerate a device * + * that has been passed to it from netfront using the "new_device" + * hook. + * + * frontend: the string describing the accelerator. Must match the + * one passed to netfront_accelerator_loaded() + * dev: the xenbus device the plugin was asked to accelerate + */ +extern void netfront_accelerator_ready(const char *frontend, + struct xenbus_device *dev); + +/* + * Called by an accelerator plugin module when it is about to unload. + * + * frontend: the string describing the accelerator. Must match the + * one passed to netfront_accelerator_loaded() + * + * wait: 1 => wait for all users of module to complete before + * returning, thus making it safe to unload on return + */ +extern void netfront_accelerator_stop(const char *frontend, int wait); + +/* + * Called by an accelerator before waking the net device''s TX queue to + * ensure the slow path has available slots. Returns true if OK to + * wake, false if still busy + */ +extern int netfront_check_queue_busy(struct net_device *net_dev); + + + +/* Internal-to-netfront Functions */ + +/* + * Call into accelerator and check to see if it has tx space before we + * wake the net device''s TX queue. Returns true if OK to wake, false + * if still busy + */ +extern +int netfront_check_accelerator_queue_busy(struct net_device *dev, + struct netfront_info *np); +extern +int netfront_accelerator_call_remove(struct netfront_info *np, + struct xenbus_device *dev); +extern +int netfront_accelerator_call_suspend(struct netfront_info *np, + struct xenbus_device *dev); +extern +int netfront_accelerator_call_suspend_cancel(struct netfront_info *np, + struct xenbus_device *dev); +extern +int netfront_accelerator_call_resume(struct netfront_info *np, + struct xenbus_device *dev); +extern +void netfront_accelerator_call_backend_changed(struct netfront_info *np, + struct xenbus_device *dev, + enum xenbus_state backend_state); +extern +void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np, + struct net_device *dev); + +extern +int netfront_load_accelerator(struct netfront_info *np, + struct xenbus_device *dev, + const char *frontend); + +extern +void netif_init_accel(void); + +extern +void init_accelerator_vif(struct netfront_info *np, + struct xenbus_device *dev); +#endif /* NETFRONT_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel