The purpose of this patch is to allow driver pass the own dma_ops to xsk. This is to cope with the scene of virtio-net. If virtio does not have VIRTIO_F_ACCESS_PLATFORM, then virtio cannot use DMA API. In this case, XSK cannot use DMA API directly to achieve DMA address. Based on this scene, we must let XSK support driver to use the driver's dma_ops. On the other hand, the implementation of XSK as a highlevel code should put the underlying operation of DMA to the driver layer. The driver layer determines the implementation of the final DMA. XSK should not make such assumptions. Everything will be simplified if DMA is done at the driver level. More is here: https://lore.kernel.org/virtualization/1681265026.6082013-1-xuanzhuo at linux.alibaba.com/ Signed-off-by: Xuan Zhuo <xuanzhuo at linux.alibaba.com> --- include/net/xdp_sock_drv.h | 20 +++++++++++++++- include/net/xsk_buff_pool.h | 19 +++++++++++++++ net/xdp/xsk_buff_pool.c | 47 +++++++++++++++++++++++-------------- 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 9c0d860609ba..181583ff6a26 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -67,7 +67,17 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool, { struct xdp_umem *umem = pool->umem; - return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs); + return xp_dma_map(pool, dev, NULL, attrs, umem->pgs, umem->npgs); +} + +static inline int xsk_pool_dma_map_with_ops(struct xsk_buff_pool *pool, + struct device *dev, + struct xsk_dma_ops *dma_ops, + unsigned long attrs) +{ + struct xdp_umem *umem = pool->umem; + + return xp_dma_map(pool, dev, dma_ops, attrs, umem->pgs, umem->npgs); } static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp) @@ -226,6 +236,14 @@ static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool, return 0; } +static inline int xsk_pool_dma_map_with_ops(struct xsk_buff_pool *pool, + struct device *dev, + struct xsk_dma_ops *dma_ops, + unsigned long attrs) +{ + return 0; +} + static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp) { return 0; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 3e952e569418..1299b9d12484 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -43,6 +43,23 @@ struct xsk_dma_map { bool dma_need_sync; }; +struct xsk_dma_ops { + dma_addr_t (*map_page)(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); + bool (*need_sync)(struct device *dev, dma_addr_t dma); + void (*sync_single_range_for_cpu)(struct device *dev, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); + void (*sync_single_range_for_device)(struct device *dev, dma_addr_t addr, + unsigned long offset, size_t size, + enum dma_data_direction dir); +}; + struct xsk_buff_pool { /* Members only used in the control path first. */ struct device *dev; @@ -85,6 +102,7 @@ struct xsk_buff_pool { * sockets share a single cq when the same netdev and queue id is shared. */ spinlock_t cq_lock; + struct xsk_dma_ops dma_ops; struct xdp_buff_xsk *free_heads[]; }; @@ -131,6 +149,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p /* AF_XDP ZC drivers, via xdp_sock_buff.h */ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, + struct xsk_dma_ops *dma_ops, unsigned long attrs, struct page **pages, u32 nr_pages); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index b2df1e0f8153..646090cae8ec 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -328,7 +328,8 @@ static void xp_destroy_dma_map(struct xsk_dma_map *dma_map) kfree(dma_map); } -static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs) +static void __xp_dma_unmap(struct xsk_dma_map *dma_map, + struct xsk_dma_ops *dma_ops, unsigned long attrs) { dma_addr_t *dma; u32 i; @@ -337,8 +338,8 @@ static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs) dma = &dma_map->dma_pages[i]; if (*dma) { *dma &= ~XSK_NEXT_PG_CONTIG_MASK; - dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE, - DMA_BIDIRECTIONAL, attrs); + dma_ops->unmap_page(dma_map->dev, *dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, attrs); *dma = 0; } } @@ -362,7 +363,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) if (!refcount_dec_and_test(&dma_map->users)) return; - __xp_dma_unmap(dma_map, attrs); + __xp_dma_unmap(dma_map, &pool->dma_ops, attrs); kvfree(pool->dma_pages); pool->dma_pages_cnt = 0; pool->dev = NULL; @@ -407,6 +408,7 @@ static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_ } int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, + struct xsk_dma_ops *dma_ops, unsigned long attrs, struct page **pages, u32 nr_pages) { struct xsk_dma_map *dma_map; @@ -424,18 +426,29 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, return 0; } + if (!dma_ops) { + pool->dma_ops.map_page = dma_map_page_attrs; + pool->dma_ops.mapping_error = dma_mapping_error; + pool->dma_ops.need_sync = dma_need_sync; + pool->dma_ops.sync_single_range_for_device = dma_sync_single_range_for_device; + pool->dma_ops.sync_single_range_for_cpu = dma_sync_single_range_for_cpu; + dma_ops = &pool->dma_ops; + } else { + pool->dma_ops = *dma_ops; + } + dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem); if (!dma_map) return -ENOMEM; for (i = 0; i < dma_map->dma_pages_cnt; i++) { - dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, - DMA_BIDIRECTIONAL, attrs); - if (dma_mapping_error(dev, dma)) { - __xp_dma_unmap(dma_map, attrs); + dma = dma_ops->map_page(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, attrs); + if (dma_ops->mapping_error(dev, dma)) { + __xp_dma_unmap(dma_map, dma_ops, attrs); return -ENOMEM; } - if (dma_need_sync(dev, dma)) + if (dma_ops->need_sync(dev, dma)) dma_map->dma_need_sync = true; dma_map->dma_pages[i] = dma; } @@ -445,7 +458,7 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, err = xp_init_dma_info(pool, dma_map); if (err) { - __xp_dma_unmap(dma_map, attrs); + __xp_dma_unmap(dma_map, dma_ops, attrs); return err; } @@ -532,9 +545,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) xskb->xdp.data_meta = xskb->xdp.data; if (pool->dma_need_sync) { - dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, - pool->frame_len, - DMA_BIDIRECTIONAL); + pool->dma_ops.sync_single_range_for_device(pool->dev, xskb->dma, 0, + pool->frame_len, + DMA_BIDIRECTIONAL); } return &xskb->xdp; } @@ -670,15 +683,15 @@ EXPORT_SYMBOL(xp_raw_get_dma); void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb) { - dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, - xskb->pool->frame_len, DMA_BIDIRECTIONAL); + xskb->pool->dma_ops.sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, + xskb->pool->frame_len, DMA_BIDIRECTIONAL); } EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow); void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { - dma_sync_single_range_for_device(pool->dev, dma, 0, - size, DMA_BIDIRECTIONAL); + pool->dma_ops.sync_single_range_for_device(pool->dev, dma, 0, + size, DMA_BIDIRECTIONAL); } EXPORT_SYMBOL(xp_dma_sync_for_device_slow); -- 2.32.0.3.g01195cf9f
On Mon, Apr 17, 2023 at 11:27:50AM +0800, Xuan Zhuo wrote:> The purpose of this patch is to allow driver pass the own dma_ops to > xsk.Drivers have no business passing around dma_ops, or even knowing about them.
Hi Xuan, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/xsk-introduce-xsk_dma_ops/20230417-112903 patch link: https://lore.kernel.org/r/20230417032750.7086-1-xuanzhuo%40linux.alibaba.com patch subject: [PATCH net-next] xsk: introduce xsk_dma_ops config: i386-randconfig-a011-20230417 (https://download.01.org/0day-ci/archive/20230417/202304171427.Uaryn9jl-lkp at intel.com/config) compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/28e766603a33761d7bd1fdd3e107595408319f7d git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Xuan-Zhuo/xsk-introduce-xsk_dma_ops/20230417-112903 git checkout 28e766603a33761d7bd1fdd3e107595408319f7d # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash net/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp at intel.com> | Link: https://lore.kernel.org/oe-kbuild-all/202304171427.Uaryn9jl-lkp at intel.com/ All errors (new ones prefixed by >>):>> net/xdp/xsk_buff_pool.c:430:26: error: incompatible function pointer types assigning to 'dma_addr_t (*)(struct device *, struct page *, unsigned long, size_t, enum dma_data_direction, unsigned long)' (aka 'unsigned int (*)(struct device *, struct page *, unsigned long, unsigned int, enum dma_data_direction, unsigned long)') from 'dma_addr_t (struct device *, struct page *, size_t, size_t, enum dma_data_direction, unsigned long)' (aka 'unsigned int (struct device *, struct page *, unsigned int, unsigned int, enum dma_data_direction, unsigned long)') [-Werror,-Wincompatible-function-pointer-types]pool->dma_ops.map_page = dma_map_page_attrs; ^ ~~~~~~~~~~~~~~~~~~ 1 error generated. vim +430 net/xdp/xsk_buff_pool.c 409 410 int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, 411 struct xsk_dma_ops *dma_ops, 412 unsigned long attrs, struct page **pages, u32 nr_pages) 413 { 414 struct xsk_dma_map *dma_map; 415 dma_addr_t dma; 416 int err; 417 u32 i; 418 419 dma_map = xp_find_dma_map(pool); 420 if (dma_map) { 421 err = xp_init_dma_info(pool, dma_map); 422 if (err) 423 return err; 424 425 refcount_inc(&dma_map->users); 426 return 0; 427 } 428 429 if (!dma_ops) { > 430 pool->dma_ops.map_page = dma_map_page_attrs; 431 pool->dma_ops.mapping_error = dma_mapping_error; 432 pool->dma_ops.need_sync = dma_need_sync; 433 pool->dma_ops.sync_single_range_for_device = dma_sync_single_range_for_device; 434 pool->dma_ops.sync_single_range_for_cpu = dma_sync_single_range_for_cpu; 435 dma_ops = &pool->dma_ops; 436 } else { 437 pool->dma_ops = *dma_ops; 438 } 439 440 dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem); 441 if (!dma_map) 442 return -ENOMEM; 443 444 for (i = 0; i < dma_map->dma_pages_cnt; i++) { 445 dma = dma_ops->map_page(dev, pages[i], 0, PAGE_SIZE, 446 DMA_BIDIRECTIONAL, attrs); 447 if (dma_ops->mapping_error(dev, dma)) { 448 __xp_dma_unmap(dma_map, dma_ops, attrs); 449 return -ENOMEM; 450 } 451 if (dma_ops->need_sync(dev, dma)) 452 dma_map->dma_need_sync = true; 453 dma_map->dma_pages[i] = dma; 454 } 455 456 if (pool->unaligned) 457 xp_check_dma_contiguity(dma_map); 458 459 err = xp_init_dma_info(pool, dma_map); 460 if (err) { 461 __xp_dma_unmap(dma_map, dma_ops, attrs); 462 return err; 463 } 464 465 return 0; 466 } 467 EXPORT_SYMBOL(xp_dma_map); 468 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests
On Mon, Apr 17, 2023 at 11:27:50AM +0800, Xuan Zhuo wrote:> @@ -532,9 +545,9 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) > xskb->xdp.data_meta = xskb->xdp.data; > > if (pool->dma_need_sync) { > - dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, > - pool->frame_len, > - DMA_BIDIRECTIONAL); > + pool->dma_ops.sync_single_range_for_device(pool->dev, xskb->dma, 0, > + pool->frame_len, > + DMA_BIDIRECTIONAL); > } > return &xskb->xdp; > } > @@ -670,15 +683,15 @@ EXPORT_SYMBOL(xp_raw_get_dma); > > void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb) > { > - dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, > - xskb->pool->frame_len, DMA_BIDIRECTIONAL); > + xskb->pool->dma_ops.sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, > + xskb->pool->frame_len, DMA_BIDIRECTIONAL); > } > EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow); > > void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, > size_t size) > { > - dma_sync_single_range_for_device(pool->dev, dma, 0, > - size, DMA_BIDIRECTIONAL); > + pool->dma_ops.sync_single_range_for_device(pool->dev, dma, 0, > + size, DMA_BIDIRECTIONAL); > } > EXPORT_SYMBOL(xp_dma_sync_for_device_slow);So you add an indirect function call on data path? Won't this be costly?> -- > 2.32.0.3.g01195cf9f
Hi Xuan, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Xuan-Zhuo/xsk-introduce-xsk_dma_ops/20230417-112903 patch link: https://lore.kernel.org/r/20230417032750.7086-1-xuanzhuo%40linux.alibaba.com patch subject: [PATCH net-next] xsk: introduce xsk_dma_ops config: mips-randconfig-r021-20230416 (https://download.01.org/0day-ci/archive/20230417/202304171441.eZRwCNsh-lkp at intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project 9638da200e00bd069e6dd63604e14cbafede9324) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install mips cross compiling tool for clang build # apt-get install binutils-mipsel-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/28e766603a33761d7bd1fdd3e107595408319f7d git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Xuan-Zhuo/xsk-introduce-xsk_dma_ops/20230417-112903 git checkout 28e766603a33761d7bd1fdd3e107595408319f7d # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=mips olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash net/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp at intel.com> | Link: https://lore.kernel.org/oe-kbuild-all/202304171441.eZRwCNsh-lkp at intel.com/ All errors (new ones prefixed by >>):>> net/xdp/xsk_buff_pool.c:430:26: error: incompatible function pointer types assigning to 'dma_addr_t (*)(struct device *, struct page *, unsigned long, size_t, enum dma_data_direction, unsigned long)' (aka 'unsigned int (*)(struct device *, struct page *, unsigned long, unsigned int, enum dma_data_direction, unsigned long)') from 'dma_addr_t (struct device *, struct page *, size_t, size_t, enum dma_data_direction, unsigned long)' (aka 'unsigned int (struct device *, struct page *, unsigned int, unsigned int, enum dma_data_direction, unsigned long)') [-Wincompatible-function-pointer-types]pool->dma_ops.map_page = dma_map_page_attrs; ^ ~~~~~~~~~~~~~~~~~~ 1 error generated. vim +430 net/xdp/xsk_buff_pool.c 409 410 int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, 411 struct xsk_dma_ops *dma_ops, 412 unsigned long attrs, struct page **pages, u32 nr_pages) 413 { 414 struct xsk_dma_map *dma_map; 415 dma_addr_t dma; 416 int err; 417 u32 i; 418 419 dma_map = xp_find_dma_map(pool); 420 if (dma_map) { 421 err = xp_init_dma_info(pool, dma_map); 422 if (err) 423 return err; 424 425 refcount_inc(&dma_map->users); 426 return 0; 427 } 428 429 if (!dma_ops) { > 430 pool->dma_ops.map_page = dma_map_page_attrs; 431 pool->dma_ops.mapping_error = dma_mapping_error; 432 pool->dma_ops.need_sync = dma_need_sync; 433 pool->dma_ops.sync_single_range_for_device = dma_sync_single_range_for_device; 434 pool->dma_ops.sync_single_range_for_cpu = dma_sync_single_range_for_cpu; 435 dma_ops = &pool->dma_ops; 436 } else { 437 pool->dma_ops = *dma_ops; 438 } 439 440 dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem); 441 if (!dma_map) 442 return -ENOMEM; 443 444 for (i = 0; i < dma_map->dma_pages_cnt; i++) { 445 dma = dma_ops->map_page(dev, pages[i], 0, PAGE_SIZE, 446 DMA_BIDIRECTIONAL, attrs); 447 if (dma_ops->mapping_error(dev, dma)) { 448 __xp_dma_unmap(dma_map, dma_ops, attrs); 449 return -ENOMEM; 450 } 451 if (dma_ops->need_sync(dev, dma)) 452 dma_map->dma_need_sync = true; 453 dma_map->dma_pages[i] = dma; 454 } 455 456 if (pool->unaligned) 457 xp_check_dma_contiguity(dma_map); 458 459 err = xp_init_dma_info(pool, dma_map); 460 if (err) { 461 __xp_dma_unmap(dma_map, dma_ops, attrs); 462 return err; 463 } 464 465 return 0; 466 } 467 EXPORT_SYMBOL(xp_dma_map); 468 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests