Hi Dan, this series cleans up and simplifies the association between DAX and block devices in preparation of allowing to mount file systems directly on DAX devices without a detour through block devices. Diffstat: drivers/dax/Kconfig | 4 drivers/dax/bus.c | 2 drivers/dax/super.c | 220 +++++-------------------------------------- drivers/md/dm-linear.c | 51 +++------ drivers/md/dm-log-writes.c | 44 +++----- drivers/md/dm-stripe.c | 65 +++--------- drivers/md/dm-table.c | 22 ++-- drivers/md/dm-writecache.c | 2 drivers/md/dm.c | 29 ----- drivers/md/dm.h | 4 drivers/nvdimm/Kconfig | 2 drivers/nvdimm/pmem.c | 9 - drivers/s390/block/Kconfig | 2 drivers/s390/block/dcssblk.c | 12 +- fs/dax.c | 13 ++ fs/erofs/super.c | 11 +- fs/ext2/super.c | 6 - fs/ext4/super.c | 9 + fs/fuse/Kconfig | 2 fs/fuse/virtio_fs.c | 2 fs/xfs/xfs_super.c | 54 +++++----- include/linux/dax.h | 30 ++--- 22 files changed, 185 insertions(+), 410 deletions(-)
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 01/11] dm: make the DAX support dependend on CONFIG_FS_DAX
The device mapper DAX support is all hanging off a block device and thus can't be used with device dax. Make it depend on CONFIG_FS_DAX instead of CONFIG_DAX_DRIVER. This also means that bdev_dax_pgoff only needs to be built under CONFIG_FS_DAX now. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/dax/super.c | 6 ++---- drivers/md/dm-linear.c | 2 +- drivers/md/dm-log-writes.c | 2 +- drivers/md/dm-stripe.c | 2 +- drivers/md/dm-writecache.c | 2 +- drivers/md/dm.c | 2 +- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index b882cf8106ea3..e20d0cef10a18 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -63,7 +63,7 @@ static int dax_host_hash(const char *host) return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; } -#ifdef CONFIG_BLOCK +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) #include <linux/blkdev.h> int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, @@ -80,7 +80,6 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, } EXPORT_SYMBOL(bdev_dax_pgoff); -#if IS_ENABLED(CONFIG_FS_DAX) /** * dax_get_by_host() - temporary lookup mechanism for filesystem-dax * @host: alternate name for the device registered by a dax driver @@ -219,8 +218,7 @@ bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, return ret; } EXPORT_SYMBOL_GPL(dax_supported); -#endif /* CONFIG_FS_DAX */ -#endif /* CONFIG_BLOCK */ +#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ enum dax_device_flags { /* !alive + rcu grace period == no new operations / mappings */ diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 679b4c0a2eea1..32fbab11bf90c 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -163,7 +163,7 @@ static int linear_iterate_devices(struct dm_target *ti, return fn(ti, lc->dev, lc->start, ti->len, data); } -#if IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_FS_DAX) static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index d93a4db235124..6d694526881d0 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -903,7 +903,7 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit limits->io_min = limits->physical_block_size; } -#if IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_FS_DAX) static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, struct iov_iter *i) { diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 6660b6b53d5bf..f084607220293 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -300,7 +300,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } -#if IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_FS_DAX) static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 18320444fb0a9..4c3a6e33604d3 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -38,7 +38,7 @@ #define BITMAP_GRANULARITY PAGE_SIZE #endif -#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX) #define DM_WRITECACHE_HAS_PMEM #endif diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7870e6460633f..79737aee516b1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1783,7 +1783,7 @@ static struct mapped_device *alloc_dev(int minor) md->disk->private_data = md; sprintf(md->disk->disk_name, "dm-%d", minor); - if (IS_ENABLED(CONFIG_DAX_DRIVER)) { + if (IS_ENABLED(CONFIG_FS_DAX)) { md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops, 0); if (IS_ERR(md->dax_dev)) -- 2.30.2
CONFIG_DAX_DRIVER only selects CONFIG_DAX now, so remove it. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/dax/Kconfig | 4 ---- drivers/nvdimm/Kconfig | 2 +- drivers/s390/block/Kconfig | 2 +- fs/fuse/Kconfig | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index d2834c2cfa10d..954ab14ba7778 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -config DAX_DRIVER - select DAX - bool - menuconfig DAX tristate "DAX: direct access to differentiated memory" select SRCU diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index b7d1eb38b27d4..347fe7afa5830 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -22,7 +22,7 @@ if LIBNVDIMM config BLK_DEV_PMEM tristate "PMEM: Persistent memory block device support" default LIBNVDIMM - select DAX_DRIVER + select DAX select ND_BTT if BTT select ND_PFN if NVDIMM_PFN help diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index d0416dbd0cd81..e3710a762abae 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -5,7 +5,7 @@ comment "S/390 block device drivers" config DCSSBLK def_tristate m select FS_DAX_LIMITED - select DAX_DRIVER + select DAX prompt "DCSSBLK support" depends on S390 && BLOCK help diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 40ce9a1c12e5d..038ed0b9aaa5d 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -45,7 +45,7 @@ config FUSE_DAX select INTERVAL_TREE depends on VIRTIO_FS depends on FS_DAX - depends on DAX_DRIVER + depends on DAX help This allows bypassing guest page cache and allows mapping host page cache directly in guest address space. -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 03/11] dax: simplify the dax_device <-> gendisk association
Replace the dax_host_hash with an xarray indexed by the pointer value of the gendisk, and require explicitl calls from the block drivers that want to associate their gendisk with a dax_device. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/dax/bus.c | 2 +- drivers/dax/super.c | 106 +++++++++-------------------------- drivers/md/dm.c | 6 +- drivers/nvdimm/pmem.c | 8 ++- drivers/s390/block/dcssblk.c | 11 +++- fs/fuse/virtio_fs.c | 2 +- include/linux/dax.h | 19 +++++-- 7 files changed, 60 insertions(+), 94 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 6cc4da4c713d9..6d91b0186e3be 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1326,7 +1326,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) * No 'host' or dax_operations since there is no access to this * device outside of mmap of the resulting character device. */ - dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); + dax_dev = alloc_dax(dev_dax, NULL, DAXDEV_F_SYNC); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); goto err_alloc_dax; diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e20d0cef10a18..9383c11b21853 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -7,10 +7,8 @@ #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/magic.h> -#include <linux/genhd.h> #include <linux/pfn_t.h> #include <linux/cdev.h> -#include <linux/hash.h> #include <linux/slab.h> #include <linux/uio.h> #include <linux/dax.h> @@ -26,10 +24,8 @@ * @flags: state and boolean properties */ struct dax_device { - struct hlist_node list; struct inode inode; struct cdev cdev; - const char *host; void *private; unsigned long flags; const struct dax_operations *ops; @@ -42,10 +38,6 @@ static DEFINE_IDA(dax_minor_ida); static struct kmem_cache *dax_cache __read_mostly; static struct super_block *dax_superblock __read_mostly; -#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head)) -static struct hlist_head dax_host_list[DAX_HASH_SIZE]; -static DEFINE_SPINLOCK(dax_host_lock); - int dax_read_lock(void) { return srcu_read_lock(&dax_srcu); @@ -58,13 +50,22 @@ void dax_read_unlock(int id) } EXPORT_SYMBOL_GPL(dax_read_unlock); -static int dax_host_hash(const char *host) +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) +#include <linux/blkdev.h> + +static DEFINE_XARRAY(dax_hosts); + +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { - return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; + return xa_insert(&dax_hosts, (unsigned long)disk, dax_dev, GFP_KERNEL); } +EXPORT_SYMBOL_GPL(dax_add_host); -#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) -#include <linux/blkdev.h> +void dax_remove_host(struct gendisk *disk) +{ + xa_erase(&dax_hosts, (unsigned long)disk); +} +EXPORT_SYMBOL_GPL(dax_remove_host); int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, pgoff_t *pgoff) @@ -82,40 +83,23 @@ EXPORT_SYMBOL(bdev_dax_pgoff); /** * dax_get_by_host() - temporary lookup mechanism for filesystem-dax - * @host: alternate name for the device registered by a dax driver + * @bdev: block device to find a dax_device for */ -static struct dax_device *dax_get_by_host(const char *host) +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { - struct dax_device *dax_dev, *found = NULL; - int hash, id; + struct dax_device *dax_dev; + int id; - if (!host) + if (!blk_queue_dax(bdev->bd_disk->queue)) return NULL; - hash = dax_host_hash(host); - id = dax_read_lock(); - spin_lock(&dax_host_lock); - hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { - if (!dax_alive(dax_dev) - || strcmp(host, dax_dev->host) != 0) - continue; - - if (igrab(&dax_dev->inode)) - found = dax_dev; - break; - } - spin_unlock(&dax_host_lock); + dax_dev = xa_load(&dax_hosts, (unsigned long)bdev->bd_disk); + if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode)) + dax_dev = NULL; dax_read_unlock(id); - return found; -} - -struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) -{ - if (!blk_queue_dax(bdev->bd_disk->queue)) - return NULL; - return dax_get_by_host(bdev->bd_disk->disk_name); + return dax_dev; } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); @@ -361,12 +345,7 @@ void kill_dax(struct dax_device *dax_dev) return; clear_bit(DAXDEV_ALIVE, &dax_dev->flags); - synchronize_srcu(&dax_srcu); - - spin_lock(&dax_host_lock); - hlist_del_init(&dax_dev->list); - spin_unlock(&dax_host_lock); } EXPORT_SYMBOL_GPL(kill_dax); @@ -398,8 +377,6 @@ static struct dax_device *to_dax_dev(struct inode *inode) static void dax_free_inode(struct inode *inode) { struct dax_device *dax_dev = to_dax_dev(inode); - kfree(dax_dev->host); - dax_dev->host = NULL; if (inode->i_rdev) ida_simple_remove(&dax_minor_ida, iminor(inode)); kmem_cache_free(dax_cache, dax_dev); @@ -474,54 +451,25 @@ static struct dax_device *dax_dev_get(dev_t devt) return dax_dev; } -static void dax_add_host(struct dax_device *dax_dev, const char *host) -{ - int hash; - - /* - * Unconditionally init dax_dev since it's coming from a - * non-zeroed slab cache - */ - INIT_HLIST_NODE(&dax_dev->list); - dax_dev->host = host; - if (!host) - return; - - hash = dax_host_hash(host); - spin_lock(&dax_host_lock); - hlist_add_head(&dax_dev->list, &dax_host_list[hash]); - spin_unlock(&dax_host_lock); -} - -struct dax_device *alloc_dax(void *private, const char *__host, - const struct dax_operations *ops, unsigned long flags) +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, + unsigned long flags) { struct dax_device *dax_dev; - const char *host; dev_t devt; int minor; - if (ops && !ops->zero_page_range) { - pr_debug("%s: error: device does not provide dax" - " operation zero_page_range()\n", - __host ? __host : "Unknown"); + if (WARN_ON_ONCE(ops && !ops->zero_page_range)) return ERR_PTR(-EINVAL); - } - - host = kstrdup(__host, GFP_KERNEL); - if (__host && !host) - return ERR_PTR(-ENOMEM); minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL); if (minor < 0) - goto err_minor; + return ERR_PTR(-ENOMEM); devt = MKDEV(MAJOR(dax_devt), minor); dax_dev = dax_dev_get(devt); if (!dax_dev) goto err_dev; - dax_add_host(dax_dev, host); dax_dev->ops = ops; dax_dev->private = private; if (flags & DAXDEV_F_SYNC) @@ -531,8 +479,6 @@ struct dax_device *alloc_dax(void *private, const char *__host, err_dev: ida_simple_remove(&dax_minor_ida, minor); - err_minor: - kfree(host); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(alloc_dax); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 79737aee516b1..a0a4703620650 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1683,6 +1683,7 @@ static void cleanup_mapped_device(struct mapped_device *md) bioset_exit(&md->io_bs); if (md->dax_dev) { + dax_remove_host(md->disk); kill_dax(md->dax_dev); put_dax(md->dax_dev); md->dax_dev = NULL; @@ -1784,10 +1785,11 @@ static struct mapped_device *alloc_dev(int minor) sprintf(md->disk->disk_name, "dm-%d", minor); if (IS_ENABLED(CONFIG_FS_DAX)) { - md->dax_dev = alloc_dax(md, md->disk->disk_name, - &dm_dax_ops, 0); + md->dax_dev = alloc_dax(md, &dm_dax_ops, 0); if (IS_ERR(md->dax_dev)) goto bad; + if (dax_add_host(md->dax_dev, md->disk)) + goto bad; } format_dev_t(md->name, MKDEV(_major, minor)); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4f187ee86bf71..5628afb808f41 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -399,6 +399,7 @@ static void pmem_release_disk(void *__pmem) { struct pmem_device *pmem = __pmem; + dax_remove_host(pmem->disk); kill_dax(pmem->dax_dev); put_dax(pmem->dax_dev); del_gendisk(pmem->disk); @@ -524,10 +525,11 @@ static int pmem_attach_disk(struct device *dev, if (is_nvdimm_sync(nd_region)) flags = DAXDEV_F_SYNC; - dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); - if (IS_ERR(dax_dev)) { + dax_dev = alloc_dax(pmem, &pmem_dax_ops, flags); + if (IS_ERR(dax_dev)) return PTR_ERR(dax_dev); - } + if (dax_add_host(dax_dev, disk)) + return -ENOMEM; dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); pmem->dax_dev = dax_dev; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 27ab888b44d0a..657e492f2bc26 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -687,18 +687,21 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char if (rc) goto put_dev; - dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name, - &dcssblk_dax_ops, DAXDEV_F_SYNC); + dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops, + DAXDEV_F_SYNC); if (IS_ERR(dev_info->dax_dev)) { rc = PTR_ERR(dev_info->dax_dev); dev_info->dax_dev = NULL; goto put_dev; } + rc = dax_add_host(dev_info->dax_dev, dev_info->gd); + if (rc) + goto out_dax; get_device(&dev_info->dev); rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL); if (rc) - goto out_dax; + goto out_dax_host; switch (dev_info->segment_type) { case SEG_TYPE_SR: @@ -714,6 +717,8 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char rc = count; goto out; +out_dax_host: + dax_remove_host(dev_info->gd); out_dax: put_device(&dev_info->dev); kill_dax(dev_info->dax_dev); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 0ad89c6629d74..eda41390bb02a 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -850,7 +850,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); - fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); + fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops, 0); if (IS_ERR(fs->dax_dev)) return PTR_ERR(fs->dax_dev); diff --git a/include/linux/dax.h b/include/linux/dax.h index 8623caa673889..e2e9a67004cbd 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -11,9 +11,11 @@ typedef unsigned long dax_entry_t; +struct dax_device; +struct gendisk; struct iomap_ops; struct iomap; -struct dax_device; + struct dax_operations { /* * direct_access: translate a device-relative @@ -39,8 +41,8 @@ struct dax_operations { }; #if IS_ENABLED(CONFIG_DAX) -struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops, unsigned long flags); +struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, + unsigned long flags); void put_dax(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); @@ -68,7 +70,7 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, return dax_synchronous(dax_dev); } #else -static inline struct dax_device *alloc_dax(void *private, const char *host, +static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops, unsigned long flags) { /* @@ -107,6 +109,8 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct writeback_control; int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); +void dax_remove_host(struct gendisk *disk); bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors); @@ -128,6 +132,13 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); #else +static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) +{ + return 0; +} +static inline void dax_remove_host(struct gendisk *disk) +{ +} #define generic_fsdax_supported NULL static inline bool dax_supported(struct dax_device *dax_dev, -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 04/11] dax: remove the pgmap sanity checks in generic_fsdax_supported
Drivers that register a dax_dev should make sure it works, no need to double check from the file system. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/dax/super.c | 49 +-------------------------------------------- 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 9383c11b21853..04fc680542e8d 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -107,13 +107,9 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors) { - bool dax_enabled = false; pgoff_t pgoff, pgoff_end; - void *kaddr, *end_kaddr; - pfn_t pfn, end_pfn; sector_t last_page; - long len, len2; - int err, id; + int err; if (blocksize != PAGE_SIZE) { pr_info("%pg: error: unsupported blocksize for dax\n", bdev); @@ -138,49 +134,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } - id = dax_read_lock(); - len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); - len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); - - if (len < 1 || len2 < 1) { - pr_info("%pg: error: dax access failed (%ld)\n", - bdev, len < 1 ? len : len2); - dax_read_unlock(id); - return false; - } - - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) { - /* - * An arch that has enabled the pmem api should also - * have its drivers support pfn_t_devmap() - * - * This is a developer warning and should not trigger in - * production. dax_flush() will crash since it depends - * on being able to do (page_address(pfn_to_page())). - */ - WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); - dax_enabled = true; - } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) { - struct dev_pagemap *pgmap, *end_pgmap; - - pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); - end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL); - if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX - && pfn_t_to_page(pfn)->pgmap == pgmap - && pfn_t_to_page(end_pfn)->pgmap == pgmap - && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr)) - && pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr))) - dax_enabled = true; - put_dev_pagemap(pgmap); - put_dev_pagemap(end_pgmap); - - } - dax_read_unlock(id); - - if (!dax_enabled) { - pr_info("%pg: error: dax support not enabled\n", bdev); - return false; - } return true; } EXPORT_SYMBOL_GPL(generic_fsdax_supported); -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 05/11] dax: move the partition alignment check into fs_dax_get_by_bdev
fs_dax_get_by_bdev is the primary interface to find a dax device for a block device, so move the partition alignment check there instead of wiring it up through ->dax_supported. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/dax/super.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 04fc680542e8d..482fe775324a4 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -93,6 +93,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) if (!blk_queue_dax(bdev->bd_disk->queue)) return NULL; + if ((get_start_sect(bdev) * SECTOR_SIZE) % PAGE_SIZE || + (bdev_nr_sectors(bdev) * SECTOR_SIZE) % PAGE_SIZE) { + pr_info("%pg: error: unaligned partition for dax\n", bdev); + return NULL; + } + id = dax_read_lock(); dax_dev = xa_load(&dax_hosts, (unsigned long)bdev->bd_disk); if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode)) @@ -107,10 +113,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors) { - pgoff_t pgoff, pgoff_end; - sector_t last_page; - int err; - if (blocksize != PAGE_SIZE) { pr_info("%pg: error: unsupported blocksize for dax\n", bdev); return false; @@ -121,19 +123,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } - err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff); - if (err) { - pr_info("%pg: error: unaligned partition for dax\n", bdev); - return false; - } - - last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512; - err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end); - if (err) { - pr_info("%pg: error: unaligned partition for dax\n", bdev); - return false; - } - return true; } EXPORT_SYMBOL_GPL(generic_fsdax_supported); -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 06/11] xfs: factor out a xfs_setup_dax helper
Factor out another DAX setup helper to simplify future changes. Also move the experimental warning after the checks to not clutter the log too much if the setup failed. Signed-off-by: Christoph Hellwig <hch at lst.de> --- fs/xfs/xfs_super.c | 47 +++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c4e0cd1c1c8ca..d07020a8eb9e3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -339,6 +339,32 @@ xfs_buftarg_is_dax( bdev_nr_sectors(bt->bt_bdev)); } +static int +xfs_setup_dax( + struct xfs_mount *mp) +{ + struct super_block *sb = mp->m_super; + + if (!xfs_buftarg_is_dax(sb, mp->m_ddev_targp) && + (!mp->m_rtdev_targp || !xfs_buftarg_is_dax(sb, mp->m_rtdev_targp))) { + xfs_alert(mp, + "DAX unsupported by block device. Turning off DAX."); + goto disable_dax; + } + + if (xfs_has_reflink(mp)) { + xfs_alert(mp, "DAX and reflink cannot be used together!"); + return -EINVAL; + } + + xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + return 0; + +disable_dax: + xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); + return 0; +} + STATIC int xfs_blkdev_get( xfs_mount_t *mp, @@ -1592,26 +1618,9 @@ xfs_fs_fill_super( sb->s_flags |= SB_I_VERSION; if (xfs_has_dax_always(mp)) { - bool rtdev_is_dax = false, datadev_is_dax; - - xfs_warn(mp, - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - - datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp); - if (mp->m_rtdev_targp) - rtdev_is_dax = xfs_buftarg_is_dax(sb, - mp->m_rtdev_targp); - if (!rtdev_is_dax && !datadev_is_dax) { - xfs_alert(mp, - "DAX unsupported by block device. Turning off DAX."); - xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); - } - if (xfs_has_reflink(mp)) { - xfs_alert(mp, - "DAX and reflink cannot be used together!"); - error = -EINVAL; + error = xfs_setup_dax(mp); + if (error) goto out_filestream_unmount; - } } if (xfs_has_discard(mp)) { -- 2.30.2
Just open code the block size and dax_dev == NULL checks in the callers. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/dax/super.c | 36 ------------------------------------ drivers/md/dm-table.c | 22 +++++++++++----------- drivers/md/dm.c | 21 --------------------- drivers/md/dm.h | 4 ---- drivers/nvdimm/pmem.c | 1 - drivers/s390/block/dcssblk.c | 1 - fs/erofs/super.c | 11 +++++++---- fs/ext2/super.c | 6 ++++-- fs/ext4/super.c | 9 ++++++--- fs/xfs/xfs_super.c | 21 ++++++++------------- include/linux/dax.h | 14 -------------- 11 files changed, 36 insertions(+), 110 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 482fe775324a4..803942586d1b6 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -108,42 +108,6 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) return dax_dev; } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); - -bool generic_fsdax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t sectors) -{ - if (blocksize != PAGE_SIZE) { - pr_info("%pg: error: unsupported blocksize for dax\n", bdev); - return false; - } - - if (!dax_dev) { - pr_debug("%pg: error: dax unsupported by block device\n", bdev); - return false; - } - - return true; -} -EXPORT_SYMBOL_GPL(generic_fsdax_supported); - -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len) -{ - bool ret = false; - int id; - - if (!dax_dev) - return false; - - id = dax_read_lock(); - if (dax_alive(dax_dev) && dax_dev->ops->dax_supported) - ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, - start, len); - dax_read_unlock(id); - return ret; -} -EXPORT_SYMBOL_GPL(dax_supported); #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ enum dax_device_flags { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 1fa4d5582dca5..4ae671c2168ea 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -807,12 +807,14 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) EXPORT_SYMBOL_GPL(dm_table_set_type); /* validate the dax capability of the target device span */ -int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, +static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - int blocksize = *(int *) data; + if (dev->dax_dev) + return false; - return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); + pr_debug("%pg: error: dax unsupported by block device\n", dev->bdev); + return true; } /* Check devices support synchronous DAX */ @@ -822,8 +824,8 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de return !dev->dax_dev || !dax_synchronous(dev->dax_dev); } -bool dm_table_supports_dax(struct dm_table *t, - iterate_devices_callout_fn iterate_fn, int *blocksize) +static bool dm_table_supports_dax(struct dm_table *t, + iterate_devices_callout_fn iterate_fn) { struct dm_target *ti; unsigned i; @@ -836,7 +838,7 @@ bool dm_table_supports_dax(struct dm_table *t, return false; if (!ti->type->iterate_devices || - ti->type->iterate_devices(ti, iterate_fn, blocksize)) + ti->type->iterate_devices(ti, iterate_fn, NULL)) return false; } @@ -863,7 +865,6 @@ static int dm_table_determine_type(struct dm_table *t) struct dm_target *tgt; struct list_head *devices = dm_table_get_devices(t); enum dm_queue_mode live_md_type = dm_get_md_type(t->md); - int page_size = PAGE_SIZE; if (t->type != DM_TYPE_NONE) { /* target already set the table's type */ @@ -907,7 +908,7 @@ static int dm_table_determine_type(struct dm_table *t) verify_bio_based: /* We must use this table as bio-based */ t->type = DM_TYPE_BIO_BASED; - if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) || + if (dm_table_supports_dax(t, device_not_dax_capable) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; } @@ -1981,7 +1982,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { bool wc = false, fua = false; - int page_size = PAGE_SIZE; int r; /* @@ -2015,9 +2015,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); - if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) { + if (dm_table_supports_dax(t, device_not_dax_capable)) { blk_queue_flag_set(QUEUE_FLAG_DAX, q); - if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL)) + if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) set_dax_synchronous(t->md->dax_dev); } else diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a0a4703620650..f896ad29a67a7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1027,26 +1027,6 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } -static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len) -{ - struct mapped_device *md = dax_get_private(dax_dev); - struct dm_table *map; - bool ret = false; - int srcu_idx; - - map = dm_get_live_table(md, &srcu_idx); - if (!map) - goto out; - - ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize); - -out: - dm_put_live_table(md, srcu_idx); - - return ret; -} - static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { @@ -3050,7 +3030,6 @@ static const struct block_device_operations dm_rq_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, - .dax_supported = dm_dax_supported, .copy_from_iter = dm_dax_copy_from_iter, .copy_to_iter = dm_dax_copy_to_iter, .zero_page_range = dm_dax_zero_page_range, diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 742d9c80efe19..9013dc1a7b002 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -73,10 +73,6 @@ bool dm_table_bio_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); -bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn, - int *blocksize); -int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data); void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 5628afb808f41..428a485800058 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -321,7 +321,6 @@ static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, - .dax_supported = generic_fsdax_supported, .copy_from_iter = pmem_copy_from_iter, .copy_to_iter = pmem_copy_to_iter, .zero_page_range = pmem_dax_zero_page_range, diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 657e492f2bc26..e65e83764d1ce 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -72,7 +72,6 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, - .dax_supported = generic_fsdax_supported, .copy_from_iter = dcssblk_dax_copy_from_iter, .copy_to_iter = dcssblk_dax_copy_to_iter, .zero_page_range = dcssblk_dax_zero_page_range, diff --git a/fs/erofs/super.c b/fs/erofs/super.c index b8f042c3e7e67..530d7b1e0f138 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -649,10 +649,13 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - if (test_opt(&sbi->opt, DAX_ALWAYS) && - !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) { - errorfc(fc, "DAX unsupported by block device. Turning off DAX."); - clear_opt(&sbi->opt, DAX_ALWAYS); + if (test_opt(&sbi->opt, DAX_ALWAYS)) { + BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE); + + if (!sbi->dax_dev) { + errorfc(fc, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } } sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_maxbytes = MAX_LFS_FILESIZE; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d8d580b609baa..a964066a80aa7 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -946,11 +946,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (test_opt(sb, DAX)) { - if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) { + if (!dax_dev) { ext2_msg(sb, KERN_ERR, "DAX unsupported by block device. Turning off DAX."); clear_opt(sbi->s_mount_opt, DAX); + } else if (blocksize != PAGE_SIZE) { + ext2_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + clear_opt(sbi->s_mount_opt, DAX); } } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6361ea1f97bc5..f571be3a6252b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4291,9 +4291,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) - set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + if (dax_dev) { + if (blocksize == PAGE_SIZE) + set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + else + ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + } if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (ext4_has_feature_inline_data(sb)) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d07020a8eb9e3..163ceafbd8fd2 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -330,28 +330,23 @@ xfs_set_inode_alloc( return xfs_is_inode32(mp) ? maxagi : agcount; } -static bool -xfs_buftarg_is_dax( - struct super_block *sb, - struct xfs_buftarg *bt) -{ - return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0, - bdev_nr_sectors(bt->bt_bdev)); -} - static int xfs_setup_dax( struct xfs_mount *mp) { - struct super_block *sb = mp->m_super; - - if (!xfs_buftarg_is_dax(sb, mp->m_ddev_targp) && - (!mp->m_rtdev_targp || !xfs_buftarg_is_dax(sb, mp->m_rtdev_targp))) { + if (!mp->m_ddev_targp->bt_daxdev && + (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { xfs_alert(mp, "DAX unsupported by block device. Turning off DAX."); goto disable_dax; } + if (mp->m_super->s_blocksize != PAGE_SIZE) { + xfs_alert(mp, + "DAX not supported for blocksize. Turning off DAX.\n"); + goto disable_dax; + } + if (xfs_has_reflink(mp)) { xfs_alert(mp, "DAX and reflink cannot be used together!"); return -EINVAL; diff --git a/include/linux/dax.h b/include/linux/dax.h index e2e9a67004cbd..439c3c70e347b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -111,12 +111,6 @@ int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); -bool generic_fsdax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t sectors); - -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len); static inline void fs_put_dax(struct dax_device *dax_dev) { @@ -139,14 +133,6 @@ static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) static inline void dax_remove_host(struct gendisk *disk) { } -#define generic_fsdax_supported NULL - -static inline bool dax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t len) -{ - return false; -} static inline void fs_put_dax(struct dax_device *dax_dev) { -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 08/11] dm-linear: add a linear_dax_pgoff helper
Add a helper to perform the entire remapping for DAX accesses. This helper open codes bdev_dax_pgoff given that the alignment checks have already been done by the submitting file system and don't need to be repeated. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/md/dm-linear.c | 49 +++++++++++++----------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 32fbab11bf90c..bf03f73fd0f36 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -164,63 +164,44 @@ static int linear_iterate_devices(struct dm_target *ti, } #if IS_ENABLED(CONFIG_FS_DAX) +static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) +{ + struct linear_c *lc = ti->private; + sector_t sector = linear_map_sector(ti, *pgoff << PAGE_SECTORS_SHIFT); + + *pgoff = (get_start_sect(lc->dev->bdev) + sector) >> PAGE_SECTORS_SHIFT; + return lc->dev->dax_dev; +} + static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { - long ret; - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - - dev_sector = linear_map_sector(ti, sector); - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff); - if (ret) - return ret; + struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); - dev_sector = linear_map_sector(ti, sector); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) - return 0; return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); - dev_sector = linear_map_sector(ti, sector); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) - return 0; return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { - int ret; - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - - dev_sector = linear_map_sector(ti, sector); - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff); - if (ret) - return ret; + struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); + return dax_zero_page_range(dax_dev, pgoff, nr_pages); } -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 09/11] dm-log-writes: add a log_writes_dax_pgoff helper
Add a helper to perform the entire remapping for DAX accesses. This helper open codes bdev_dax_pgoff given that the alignment checks have already been done by the submitting file system and don't need to be repeated. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/md/dm-log-writes.c | 42 +++++++++++++++----------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 6d694526881d0..5aac60c1b774c 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -949,17 +949,21 @@ static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, return 0; } +static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti, + pgoff_t *pgoff) +{ + struct log_writes_c *lc = ti->private; + + *pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT); + return lc->dev->dax_dev; +} + static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { - struct log_writes_c *lc = ti->private; - sector_t sector = pgoff * PAGE_SECTORS; - int ret; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); - ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff); - if (ret) - return ret; - return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, @@ -968,11 +972,9 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, { struct log_writes_c *lc = ti->private; sector_t sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); int err; - if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) - return 0; - /* Don't bother doing anything if logging has been disabled */ if (!lc->logging_enabled) goto dax_copy; @@ -983,34 +985,24 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, return 0; } dax_copy: - return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - struct log_writes_c *lc = ti->private; - sector_t sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); - if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) - return 0; - return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { - int ret; - struct log_writes_c *lc = ti->private; - sector_t sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); - ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT, - &pgoff); - if (ret) - return ret; - return dax_zero_page_range(lc->dev->dax_dev, pgoff, - nr_pages << PAGE_SHIFT); + return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT); } #else -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 10/11] dm-stripe: add a stripe_dax_pgoff helper
Add a helper to perform the entire remapping for DAX accesses. This helper open codes bdev_dax_pgoff given that the alignment checks have already been done by the submitting file system and don't need to be repeated. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/md/dm-stripe.c | 63 ++++++++++-------------------------------- 1 file changed, 15 insertions(+), 48 deletions(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index f084607220293..50dba3f39274c 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -301,83 +301,50 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) } #if IS_ENABLED(CONFIG_FS_DAX) -static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) +static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) { - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; struct block_device *bdev; + sector_t dev_sector; uint32_t stripe; - long ret; - stripe_map_sector(sc, sector, &stripe, &dev_sector); + stripe_map_sector(sc, *pgoff * PAGE_SECTORS, &stripe, &dev_sector); dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; bdev = sc->stripe[stripe].dev->bdev; - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff); - if (ret) - return ret; + *pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT; + return sc->stripe[stripe].dev->dax_dev; +} + +static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) +{ + struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; - - stripe_map_sector(sc, sector, &stripe, &dev_sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; + struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) - return 0; return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; - - stripe_map_sector(sc, sector, &stripe, &dev_sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; + struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) - return 0; return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { - int ret; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; + struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); - stripe_map_sector(sc, sector, &stripe, &dev_sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; - - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff); - if (ret) - return ret; return dax_zero_page_range(dax_dev, pgoff, nr_pages); } -- 2.30.2
Christoph Hellwig
2021-Oct-18 04:40 UTC
[PATCH 11/11] dax: move bdev_dax_pgoff to fs/dax.c
No functional changet, but this will allow for a tighter integration with the iomap code, including possible passing the partition offset in the iomap in the future. For now it mostly avoids growing more callers outside of fs/dax.c. Signed-off-by: Christoph Hellwig <hch at lst.de> --- drivers/dax/super.c | 14 -------------- fs/dax.c | 13 +++++++++++++ include/linux/dax.h | 1 - 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 803942586d1b6..c0910687fbcb2 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -67,20 +67,6 @@ void dax_remove_host(struct gendisk *disk) } EXPORT_SYMBOL_GPL(dax_remove_host); -int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, - pgoff_t *pgoff) -{ - sector_t start_sect = bdev ? get_start_sect(bdev) : 0; - phys_addr_t phys_off = (start_sect + sector) * 512; - - if (pgoff) - *pgoff = PHYS_PFN(phys_off); - if (phys_off % PAGE_SIZE || size % PAGE_SIZE) - return -EINVAL; - return 0; -} -EXPORT_SYMBOL(bdev_dax_pgoff); - /** * dax_get_by_host() - temporary lookup mechanism for filesystem-dax * @bdev: block device to find a dax_device for diff --git a/fs/dax.c b/fs/dax.c index 4e3e5a283a916..eb715363fd667 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -709,6 +709,19 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, return __dax_invalidate_entry(mapping, index, false); } +static int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, + pgoff_t *pgoff) +{ + sector_t start_sect = bdev ? get_start_sect(bdev) : 0; + phys_addr_t phys_off = (start_sect + sector) * 512; + + if (pgoff) + *pgoff = PHYS_PFN(phys_off); + if (phys_off % PAGE_SIZE || size % PAGE_SIZE) + return -EINVAL; + return 0; +} + static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, struct page *to, unsigned long vaddr) { diff --git a/include/linux/dax.h b/include/linux/dax.h index 439c3c70e347b..324363b798ecd 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -107,7 +107,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, #endif struct writeback_control; -int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); -- 2.30.2
[ add sfr ] On Sun, Oct 17, 2021 at 9:41 PM Christoph Hellwig <hch at lst.de> wrote:> > Hi Dan, > > this series cleans up and simplifies the association between DAX and block > devices in preparation of allowing to mount file systems directly on DAX > devices without a detour through block devices.So I notice that this is based on linux-next while libnvdimm-for-next is based on v5.15-rc4. Since I'm not Andrew I went ahead and rebased these onto v5.15-rc4, tested that, and then merged with linux-next to resolve the conflicts and tested again. My merge resolution is here [1]. Christoph, please have a look. The rebase and the merge result are both passing my test and I'm now going to review the individual patches. However, while I do that and collect acks from DM and EROFS folks, I want to give Stephen a heads up that this is coming. Primarily I want to see if someone sees a better strategy to merge this, please let me know, but if not I plan to walk Stephen and Linus through the resolution. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git/commit/?id=c3894cf6eb8f> > Diffstat: > drivers/dax/Kconfig | 4 > drivers/dax/bus.c | 2 > drivers/dax/super.c | 220 +++++-------------------------------------- > drivers/md/dm-linear.c | 51 +++------ > drivers/md/dm-log-writes.c | 44 +++----- > drivers/md/dm-stripe.c | 65 +++--------- > drivers/md/dm-table.c | 22 ++-- > drivers/md/dm-writecache.c | 2 > drivers/md/dm.c | 29 ----- > drivers/md/dm.h | 4 > drivers/nvdimm/Kconfig | 2 > drivers/nvdimm/pmem.c | 9 - > drivers/s390/block/Kconfig | 2 > drivers/s390/block/dcssblk.c | 12 +- > fs/dax.c | 13 ++ > fs/erofs/super.c | 11 +- > fs/ext2/super.c | 6 - > fs/ext4/super.c | 9 + > fs/fuse/Kconfig | 2 > fs/fuse/virtio_fs.c | 2 > fs/xfs/xfs_super.c | 54 +++++----- > include/linux/dax.h | 30 ++--- > 22 files changed, 185 insertions(+), 410 deletions(-)
Mike Snitzer
2021-Nov-01 16:28 UTC
[PATCH 03/11] dax: simplify the dax_device <-> gendisk association
On Mon, Oct 18 2021 at 12:40P -0400, Christoph Hellwig <hch at lst.de> wrote:> Replace the dax_host_hash with an xarray indexed by the pointer value > of the gendisk, and require explicitl calls from the block drivers that > want to associate their gendisk with a dax_device. > > Signed-off-by: Christoph Hellwig <hch at lst.de>...> diff --git a/drivers/md/dm.c b/drivers/md/dm.c > index 79737aee516b1..a0a4703620650 100644 > --- a/drivers/md/dm.c > +++ b/drivers/md/dm.c > @@ -1683,6 +1683,7 @@ static void cleanup_mapped_device(struct mapped_device *md) > bioset_exit(&md->io_bs); > > if (md->dax_dev) { > + dax_remove_host(md->disk); > kill_dax(md->dax_dev); > put_dax(md->dax_dev); > md->dax_dev = NULL; > @@ -1784,10 +1785,11 @@ static struct mapped_device *alloc_dev(int minor) > sprintf(md->disk->disk_name, "dm-%d", minor); > > if (IS_ENABLED(CONFIG_FS_DAX)) { > - md->dax_dev = alloc_dax(md, md->disk->disk_name, > - &dm_dax_ops, 0); > + md->dax_dev = alloc_dax(md, &dm_dax_ops, 0); > if (IS_ERR(md->dax_dev)) > goto bad; > + if (dax_add_host(md->dax_dev, md->disk)) > + goto bad; > } > > format_dev_t(md->name, MKDEV(_major, minor));Acked-by: Mike Snitzer <snitzer at redhat.com>