This patchset adds support of per-file DAX for virtiofs, which is inspired by Ira Weiny's work on ext4[1] and xfs[2]. Any comment is welcome. [1] commit 9cb20f94afcd ("fs/ext4: Make DAX mount option a tri-state") [2] commit 02beb2686ff9 ("fs/xfs: Make DAX mount option a tri-state") changes since v3: - bug fix (patch 6): s/"IS_DAX(inode) != newdax"/"!!IS_DAX(inode) != newdax" - during FUSE_INIT, advertise capability for per-file DAX only when mounted as "-o dax=inode" (patch 4) changes since v2: - modify fuse_show_options() accordingly to make it compatible with new tri-state mount option (patch 2) - extract FUSE protocol changes into one seperate patch (patch 3) - FUSE server/client need to negotiate if they support per-file DAX (patch 4) - extract DONT_CACHE logic into patch 6/7 v3: https://www.spinics.net/lists/linux-fsdevel/msg200852.html v2: https://www.spinics.net/lists/linux-fsdevel/msg199584.html v1: https://www.spinics.net/lists/linux-virtualization/msg51008.html Jeffle Xu (8): fuse: add fuse_should_enable_dax() helper fuse: Make DAX mount option a tri-state fuse: support per-file DAX fuse: negotiate if server/client supports per-file DAX fuse: enable per-file DAX fuse: mark inode DONT_CACHE when per-file DAX indication changes fuse: support changing per-file DAX flag inside guest fuse: show '-o dax=inode' option only when FUSE server supports fs/fuse/dax.c | 32 +++++++++++++++++++++++++++++--- fs/fuse/file.c | 4 ++-- fs/fuse/fuse_i.h | 22 ++++++++++++++++++---- fs/fuse/inode.c | 27 +++++++++++++++++++-------- fs/fuse/ioctl.c | 15 +++++++++++++-- fs/fuse/virtio_fs.c | 16 ++++++++++++++-- include/uapi/linux/fuse.h | 9 ++++++++- 7 files changed, 103 insertions(+), 22 deletions(-) -- 2.27.0
This is in prep for following per-file DAX checking. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- fs/fuse/dax.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 0e5407f48e6a..c6f4e82e65f3 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1336,11 +1336,19 @@ static const struct address_space_operations fuse_dax_file_aops = { .invalidatepage = noop_invalidatepage, }; -void fuse_dax_inode_init(struct inode *inode) +static bool fuse_should_enable_dax(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); if (!fc->dax) + return false; + + return true; +} + +void fuse_dax_inode_init(struct inode *inode) +{ + if (!fuse_should_enable_dax(inode)) return; inode->i_flags |= S_DAX; -- 2.27.0
We add 'always', 'never', and 'inode' (default). '-o dax' continues to operate the same which is equivalent to 'always'. By the time this patch is applied, 'inode' mode is actually equal to 'always' mode, before the per-file DAX flag is introduced in the following patch. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- fs/fuse/dax.c | 9 +++++++-- fs/fuse/fuse_i.h | 14 ++++++++++++-- fs/fuse/inode.c | 10 +++++++--- fs/fuse/virtio_fs.c | 16 ++++++++++++++-- 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index c6f4e82e65f3..fe4e9593a590 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1288,11 +1288,14 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) return ret; } -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev) +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode, + struct dax_device *dax_dev) { struct fuse_conn_dax *fcd; int err; + fc->dax_mode = dax_mode; + if (!dax_dev) return 0; @@ -1339,8 +1342,10 @@ static const struct address_space_operations fuse_dax_file_aops = { static bool fuse_should_enable_dax(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); + unsigned int dax_mode = fc->dax_mode; - if (!fc->dax) + /* If 'dax=always/inode', fc->dax couldn't be NULL */ + if (dax_mode == FUSE_DAX_NEVER) return false; return true; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 07829ce78695..a23dd8d0c181 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -487,6 +487,12 @@ struct fuse_dev { struct list_head entry; }; +enum fuse_dax_mode { + FUSE_DAX_INODE, + FUSE_DAX_ALWAYS, + FUSE_DAX_NEVER, +}; + struct fuse_fs_context { int fd; unsigned int rootmode; @@ -503,7 +509,7 @@ struct fuse_fs_context { bool no_control:1; bool no_force_umount:1; bool legacy_opts_show:1; - bool dax:1; + enum fuse_dax_mode dax_mode; unsigned int max_read; unsigned int blksize; const char *subtype; @@ -801,6 +807,9 @@ struct fuse_conn { struct list_head devices; #ifdef CONFIG_FUSE_DAX + /* dax mode: FUSE_DAX_* (always, never or per-file) */ + enum fuse_dax_mode dax_mode; + /* Dax specific conn data, non-NULL if DAX is enabled */ struct fuse_conn_dax *dax; #endif @@ -1242,7 +1251,8 @@ ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to); ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from); int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma); int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end); -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev); +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode, + struct dax_device *dax_dev); void fuse_dax_conn_free(struct fuse_conn *fc); bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi); void fuse_dax_inode_init(struct inode *inode); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index b9beb39a4a18..0bc0d8af81e1 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -690,8 +690,12 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",blksize=%lu", sb->s_blocksize); } #ifdef CONFIG_FUSE_DAX - if (fc->dax) - seq_puts(m, ",dax"); + if (fc->dax_mode == FUSE_DAX_ALWAYS) + seq_puts(m, ",dax=always"); + else if (fc->dax_mode == FUSE_DAX_NEVER) + seq_puts(m, ",dax=never"); + else if (fc->dax_mode == FUSE_DAX_INODE) + seq_puts(m, ",dax=inode"); #endif return 0; @@ -1434,7 +1438,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) sb->s_subtype = ctx->subtype; ctx->subtype = NULL; if (IS_ENABLED(CONFIG_FUSE_DAX)) { - err = fuse_dax_conn_alloc(fc, ctx->dax_dev); + err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev); if (err) goto err; } diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6a3a23320edc..7dbf5502c57e 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -88,12 +88,21 @@ struct virtio_fs_req_work { static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, struct fuse_req *req, bool in_flight); +static const struct constant_table dax_param_enums[] = { + {"inode", FUSE_DAX_INODE }, + {"always", FUSE_DAX_ALWAYS }, + {"never", FUSE_DAX_NEVER }, + {} +}; + enum { OPT_DAX, + OPT_DAX_ENUM, }; static const struct fs_parameter_spec virtio_fs_parameters[] = { fsparam_flag("dax", OPT_DAX), + fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), {} }; @@ -110,7 +119,10 @@ static int virtio_fs_parse_param(struct fs_context *fc, switch (opt) { case OPT_DAX: - ctx->dax = 1; + ctx->dax_mode = FUSE_DAX_ALWAYS; + break; + case OPT_DAX_ENUM: + ctx->dax_mode = result.uint_32; break; default: return -EINVAL; @@ -1323,7 +1335,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) /* virtiofs allocates and installs its own fuse devices */ ctx->fudptr = NULL; - if (ctx->dax) { + if (ctx->dax_mode != FUSE_DAX_NEVER) { if (!fs->dax_dev) { err = -EINVAL; pr_err("virtio-fs: dax can't be enabled as filesystem" -- 2.27.0
Expand the fuse protocol to support per-file DAX. FUSE_PERFILE_DAX flag is added indicating if fuse server/client supporting per-file DAX when sending or replying FUSE_INIT request. Besides, FUSE_ATTR_DAX flag is added indicating if DAX shall be enabled for corresponding file when replying FUSE_LOOKUP request. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- include/uapi/linux/fuse.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 36ed092227fa..15a1f5fc0797 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -184,6 +184,9 @@ * * 7.34 * - add FUSE_SYNCFS + * + * 7.35 + * - add FUSE_PERFILE_DAX, FUSE_ATTR_DAX */ #ifndef _LINUX_FUSE_H @@ -219,7 +222,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 34 +#define FUSE_KERNEL_MINOR_VERSION 35 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -336,6 +339,7 @@ struct fuse_file_lock { * write/truncate sgid is killed only if file has group * execute permission. (Same as Linux VFS behavior). * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in + * FUSE_PERFILE_DAX: kernel supports per-file DAX */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -367,6 +371,7 @@ struct fuse_file_lock { #define FUSE_SUBMOUNTS (1 << 27) #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) #define FUSE_SETXATTR_EXT (1 << 29) +#define FUSE_PERFILE_DAX (1 << 30) /** * CUSE INIT request/reply flags @@ -449,8 +454,10 @@ struct fuse_file_lock { * fuse_attr flags * * FUSE_ATTR_SUBMOUNT: Object is a submount root + * FUSE_ATTR_DAX: Enable DAX for this file in per-file DAX mode */ #define FUSE_ATTR_SUBMOUNT (1 << 0) +#define FUSE_ATTR_DAX (1 << 1) /** * Open flags -- 2.27.0
Jeffle Xu
2021-Aug-17 02:22 UTC
[PATCH v4 4/8] fuse: negotiate if server/client supports per-file DAX
Among the FUSE_INIT phase, server/client shall negotiate if supporting per-file DAX. Requirements for server: - capable of handling SETFLAGS/FSSETXATTR ioctl and storing FS_DAX_FL/FS_XFLAG_DAX persistently. - set FUSE_ATTR_DAX if the file capable of per-file DAX when replying FUSE_LOOKUP request accordingly. Requirements for client: - capable of handling per-file DAX when receiving FUSE_ATTR_DAX. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a23dd8d0c181..0b21e76a379a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -770,6 +770,9 @@ struct fuse_conn { /* Propagate syncfs() to server */ unsigned int sync_fs:1; + /* Does the filesystem support per-file DAX? */ + unsigned int perfile_dax:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 0bc0d8af81e1..9d302079281c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1087,10 +1087,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, min_t(unsigned int, fc->max_pages_limit, max_t(unsigned int, arg->max_pages, 1)); } - if (IS_ENABLED(CONFIG_FUSE_DAX) && - arg->flags & FUSE_MAP_ALIGNMENT && - !fuse_dax_check_alignment(fc, arg->map_alignment)) { - ok = false; + if (IS_ENABLED(CONFIG_FUSE_DAX)) { + if (arg->flags & FUSE_MAP_ALIGNMENT && + !fuse_dax_check_alignment(fc, arg->map_alignment)) + ok = false; + if (arg->flags & FUSE_PERFILE_DAX) + fc->perfile_dax = 1; } if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) { fc->handle_killpriv_v2 = 1; @@ -1145,6 +1147,8 @@ void fuse_send_init(struct fuse_mount *fm) #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) ia->in.flags |= FUSE_MAP_ALIGNMENT; + if (fm->fc->dax_mode == FUSE_DAX_INODE) + ia->in.flags |= FUSE_PERFILE_DAX; #endif if (fm->fc->auto_submounts) ia->in.flags |= FUSE_SUBMOUNTS; -- 2.27.0
Enable per-file DAX if fuse server advertises that the file supports that. Currently the state whether the file enables DAX or not is initialized only when inode is instantiated. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- fs/fuse/dax.c | 12 ++++++++---- fs/fuse/file.c | 4 ++-- fs/fuse/fuse_i.h | 4 ++-- fs/fuse/inode.c | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index fe4e9593a590..30833f8d37dd 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1339,7 +1339,7 @@ static const struct address_space_operations fuse_dax_file_aops = { .invalidatepage = noop_invalidatepage, }; -static bool fuse_should_enable_dax(struct inode *inode) +static bool fuse_should_enable_dax(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); unsigned int dax_mode = fc->dax_mode; @@ -1348,12 +1348,16 @@ static bool fuse_should_enable_dax(struct inode *inode) if (dax_mode == FUSE_DAX_NEVER) return false; - return true; + if (dax_mode == FUSE_DAX_ALWAYS) + return true; + + WARN_ON_ONCE(dax_mode != FUSE_DAX_INODE); + return fc->perfile_dax && (flags & FUSE_ATTR_DAX); } -void fuse_dax_inode_init(struct inode *inode) +void fuse_dax_inode_init(struct inode *inode, unsigned int flags) { - if (!fuse_should_enable_dax(inode)) + if (!fuse_should_enable_dax(inode, flags)) return; inode->i_flags |= S_DAX; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ec48bc7ef0a5..1231128f8dd6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3148,7 +3148,7 @@ static const struct address_space_operations fuse_file_aops = { .write_end = fuse_write_end, }; -void fuse_init_file_inode(struct inode *inode) +void fuse_init_file_inode(struct inode *inode, unsigned int flags) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -3162,5 +3162,5 @@ void fuse_init_file_inode(struct inode *inode) fi->writepages = RB_ROOT; if (IS_ENABLED(CONFIG_FUSE_DAX)) - fuse_dax_inode_init(inode); + fuse_dax_inode_init(inode, flags); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0b21e76a379a..7b7b4c208af2 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1006,7 +1006,7 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, /** * Initialize file operations on a regular file */ -void fuse_init_file_inode(struct inode *inode); +void fuse_init_file_inode(struct inode *inode, unsigned int flags); /** * Initialize inode operations on regular files and special files @@ -1258,7 +1258,7 @@ int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode, struct dax_device *dax_dev); void fuse_dax_conn_free(struct fuse_conn *fc); bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi); -void fuse_dax_inode_init(struct inode *inode); +void fuse_dax_inode_init(struct inode *inode, unsigned int flags); void fuse_dax_inode_cleanup(struct inode *inode); bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); void fuse_dax_cancel_work(struct fuse_conn *fc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9d302079281c..8080f78befed 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -281,7 +281,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) inode->i_ctime.tv_nsec = attr->ctimensec; if (S_ISREG(inode->i_mode)) { fuse_init_common(inode); - fuse_init_file_inode(inode); + fuse_init_file_inode(inode, attr->flags); } else if (S_ISDIR(inode->i_mode)) fuse_init_dir(inode); else if (S_ISLNK(inode->i_mode)) -- 2.27.0
Jeffle Xu
2021-Aug-17 02:22 UTC
[PATCH v4 6/8] fuse: mark inode DONT_CACHE when per-file DAX indication changes
When the per-file DAX indication changes while the file is still *opened*, it is quite complicated and maybe fragile to dynamically change the DAX state. Hence mark the inode and corresponding dentries as DONE_CACHE once the per-file DAX indication changes, so that the inode instance will be evicted and freed as soon as possible once the file is closed and the last reference to the inode is put. And then when the file gets reopened next time, the inode will reflect the new DAX state. In summary, when the per-file DAX indication changes for an *opened* file, the state of the file won't be updated until this file is closed and reopened later. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- fs/fuse/dax.c | 9 +++++++++ fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 3 +++ 3 files changed, 13 insertions(+) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 30833f8d37dd..f7ede0be4e00 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1364,6 +1364,15 @@ void fuse_dax_inode_init(struct inode *inode, unsigned int flags) inode->i_data.a_ops = &fuse_dax_file_aops; } +void fuse_dax_dontcache(struct inode *inode, bool newdax) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + if (fc->dax_mode == FUSE_DAX_INODE && + fc->perfile_dax && (!!IS_DAX(inode) != newdax)) + d_mark_dontcache(inode); +} + bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) { if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7b7b4c208af2..56fe1c4d2136 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1260,6 +1260,7 @@ void fuse_dax_conn_free(struct fuse_conn *fc); bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi); void fuse_dax_inode_init(struct inode *inode, unsigned int flags); void fuse_dax_inode_cleanup(struct inode *inode); +void fuse_dax_dontcache(struct inode *inode, bool newdax); bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); void fuse_dax_cancel_work(struct fuse_conn *fc); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 8080f78befed..8c9774c6a210 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -269,6 +269,9 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, if (inval) invalidate_inode_pages2(inode->i_mapping); } + + if (IS_ENABLED(CONFIG_FUSE_DAX)) + fuse_dax_dontcache(inode, attr->flags & FUSE_ATTR_DAX); } static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) -- 2.27.0
Jeffle Xu
2021-Aug-17 02:22 UTC
[PATCH v4 7/8] fuse: support changing per-file DAX flag inside guest
Fuse client can enable or disable per-file DAX inside kernel/guest by chattr(1). Similarly the new state won't be updated until the file is closed and reopened later. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- fs/fuse/ioctl.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 546ea3d58fb4..a9ed53c5dbd1 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -469,8 +469,6 @@ int fuse_fileattr_set(struct user_namespace *mnt_userns, if (fa->flags_valid) { err = fuse_priv_ioctl(inode, ff, FS_IOC_SETFLAGS, &flags, sizeof(flags)); - if (err) - goto cleanup; } else { memset(&xfa, 0, sizeof(xfa)); xfa.fsx_xflags = fa->fsx_xflags; @@ -483,6 +481,19 @@ int fuse_fileattr_set(struct user_namespace *mnt_userns, &xfa, sizeof(xfa)); } + if (err) + goto cleanup; + + if (IS_ENABLED(CONFIG_FUSE_DAX)) { + bool newdax; + + if (fa->flags_valid) + newdax = flags & FS_DAX_FL; + else + newdax = fa->fsx_xflags & FS_XFLAG_DAX; + fuse_dax_dontcache(inode, newdax); + } + cleanup: fuse_priv_ioctl_cleanup(inode, ff); -- 2.27.0
Jeffle Xu
2021-Aug-17 02:22 UTC
[PATCH v4 8/8] fuse: show '-o dax=inode' option only when FUSE server supports
Prior of this patch, the mount option will still show '-o dax=inode' when FUSE server advertises that it doesn't support per-file DAX. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 8c9774c6a210..7f09a964823f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -697,7 +697,7 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",dax=always"); else if (fc->dax_mode == FUSE_DAX_NEVER) seq_puts(m, ",dax=never"); - else if (fc->dax_mode == FUSE_DAX_INODE) + else if ((fc->dax_mode == FUSE_DAX_INODE) && fc->perfile_dax) seq_puts(m, ",dax=inode"); #endif -- 2.27.0
I mentioned in virtiofsd PATCH v1 that virtiofsd exits once ioctl() is called. After depper investigation into this issue, I find that it is because ioctl() is blocked out the whitelist of seccomp of virtiofsd. To support ioctl, ioctl syscall shall be added into the whitelist (see patch 1). And this is the complete workable version for virtiofsd: - virtiofsd now supports FUSE_IOCTL now, though currently only FS_IOC_G[S]ETFLAGS/FS_IOC_FSG[S]ETXATTR are supported. - During FUSE_INIT, virtiofsd advertise support for per-file DAX only when the backend fs is ext4/xfs. - FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR FUSE_IOCTL will be directed to host, so that FS_DAX_FL could be flushed to backed fs persistently. - During FUSE_LOOKUP, virtiofsd will decide DAX shall be enabled for current file according to if this file is marked with FS_DAX_FL in the backend fs. changes since v2/v3: Patch 4 in v2 is incomplete by mistake and it will fail to be compiled. I had ever sent a seperate patch 4 of v3. Now I send the whole complete set in v4. Except for this, there's no other diferrence. Jeffle Xu (4): virtiofsd: add .ioctl() support virtiofsd: expand fuse protocol to support per-file DAX virtiofsd: support per-file DAX negotiation in FUSE_INIT virtiofsd: support per-file DAX in FUSE_LOOKUP include/standard-headers/linux/fuse.h | 2 + tools/virtiofsd/fuse_common.h | 5 ++ tools/virtiofsd/fuse_lowlevel.c | 6 ++ tools/virtiofsd/passthrough_ll.c | 125 ++++++++++++++++++++++++++ tools/virtiofsd/passthrough_seccomp.c | 1 + 5 files changed, 139 insertions(+) -- 2.27.0
Add .ioctl() support for passthrough, in prep for the following support for following per-file DAX feature. Once advertising support for per-file DAX feature, virtiofsd should support storing FS_DAX_FL flag persistently passed by FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in FUSE_LOOKUP accordingly if the file is capable of per-file DAX. When it comes to passthrough, it passes corresponding ioctls to host directly. Currently only these ioctls that are needed for per-file DAX feature, i.e., FS_IOC_GETFLAGS/FS_IOC_SETFLAGS and FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR are supported. Later we can restrict the flags/attributes allowed to be set to reinforce the security, or extend the scope of allowed ioctls if it is really needed later. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- tools/virtiofsd/passthrough_ll.c | 53 +++++++++++++++++++++++++++ tools/virtiofsd/passthrough_seccomp.c | 1 + 2 files changed, 54 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index b76d878509..e170b17adb 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -54,6 +54,7 @@ #include <sys/wait.h> #include <sys/xattr.h> #include <syslog.h> +#include <linux/fs.h> #include "qemu/cutils.h" #include "passthrough_helpers.h" @@ -2105,6 +2106,57 @@ out: fuse_reply_err(req, saverr); } +static void lo_ioctl(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, + struct fuse_file_info *fi, unsigned flags, const void *in_buf, + size_t in_bufsz, size_t out_bufsz) +{ + int fd = lo_fi_fd(req, fi); + int res; + int saverr = ENOSYS; + + fuse_log(FUSE_LOG_DEBUG, "lo_ioctl(ino=%" PRIu64 ", cmd=0x%x, flags=0x%x, " + "in_bufsz = %lu, out_bufsz = %lu)\n", + ino, cmd, flags, in_bufsz, out_bufsz); + + /* unrestricted ioctl is not supported yet */ + if (flags & FUSE_IOCTL_UNRESTRICTED) + goto out; + + /* + * Currently only those ioctls needed to support per-file DAX feature, + * i.e., FS_IOC_GETFLAGS/FS_IOC_SETFLAGS and + * FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR are supported. + */ + if (cmd == FS_IOC_SETFLAGS || cmd == FS_IOC_FSSETXATTR) { + res = ioctl(fd, cmd, in_buf); + if (res < 0) + goto out_err; + + fuse_reply_ioctl(req, 0, NULL, 0); + } + else if (cmd == FS_IOC_GETFLAGS || cmd == FS_IOC_FSGETXATTR) { + /* reused for 'unsigned int' for FS_IOC_GETFLAGS */ + struct fsxattr attr; + + res = ioctl(fd, cmd, &attr); + if (res < 0) + goto out_err; + + fuse_reply_ioctl(req, 0, &attr, out_bufsz); + } + else { + fuse_log(FUSE_LOG_DEBUG, "Unsupported ioctl 0x%x\n", cmd); + goto out; + } + + return; + +out_err: + saverr = errno; +out: + fuse_reply_err(req, saverr); +} + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, struct fuse_file_info *fi) { @@ -3279,6 +3331,7 @@ static struct fuse_lowlevel_ops lo_oper = { .create = lo_create, .getlk = lo_getlk, .setlk = lo_setlk, + .ioctl = lo_ioctl, .open = lo_open, .release = lo_release, .flush = lo_flush, diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index 62441cfcdb..2a5f7614fc 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -62,6 +62,7 @@ static const int syscall_allowlist[] = { SCMP_SYS(gettid), SCMP_SYS(gettimeofday), SCMP_SYS(getxattr), + SCMP_SYS(ioctl), SCMP_SYS(linkat), SCMP_SYS(listxattr), SCMP_SYS(lseek), -- 2.27.0
Jeffle Xu
2021-Aug-17 02:23 UTC
[virtiofsd PATCH v4 2/4] virtiofsd: expand fuse protocol to support per-file DAX
Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- include/standard-headers/linux/fuse.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index 950d7edb7e..7bd006ffcb 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -356,6 +356,7 @@ struct fuse_file_lock { #define FUSE_MAP_ALIGNMENT (1 << 26) #define FUSE_SUBMOUNTS (1 << 27) #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) +#define FUSE_PERFILE_DAX (1 << 30) /** * CUSE INIT request/reply flags @@ -440,6 +441,7 @@ struct fuse_file_lock { * FUSE_ATTR_SUBMOUNT: Object is a submount root */ #define FUSE_ATTR_SUBMOUNT (1 << 0) +#define FUSE_ATTR_DAX (1 << 1) /** * Open flags -- 2.27.0