I mentioned in virtiofsd PATCH v1 that virtiofsd exits once ioctl() is called. After depper investigation into this issue, I find that it is because ioctl() is blocked out the whitelist of seccomp of virtiofsd. To support ioctl, ioctl syscall shall be added into the whitelist (see patch 1). And this is the complete workable version for virtiofsd: - virtiofsd now supports FUSE_IOCTL now, though currently only FS_IOC_G[S]ETFLAGS/FS_IOC_FSG[S]ETXATTR are supported. - During FUSE_INIT, virtiofsd advertise support for per-file DAX only when the backend fs is ext4/xfs. - FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR FUSE_IOCTL will be directed to host, so that FS_DAX_FL could be flushed to backed fs persistently. - During FUSE_LOOKUP, virtiofsd will decide DAX shall be enabled for current file according to if this file is marked with FS_DAX_FL in the backend fs. PS. In the current implementation, the kernel always advertise FUSE_PERFILE_DAX no matter whether it's mounted with '-o dax=inode' or not. It can be fixed in the next version, and I need more feedbacks so far. Any comment on this whole series is welcome. Jeffle Xu (4): virtiofsd: add .ioctl() support virtiofsd: expand fuse protocol to support per-file DAX virtiofsd: support per-file DAX negotiation in FUSE_INIT virtiofsd: support per-file DAX in FUSE_LOOKUP include/standard-headers/linux/fuse.h | 2 + tools/virtiofsd/fuse_common.h | 5 ++ tools/virtiofsd/fuse_lowlevel.c | 6 ++ tools/virtiofsd/passthrough_ll.c | 115 ++++++++++++++++++++++++++ tools/virtiofsd/passthrough_seccomp.c | 1 + 5 files changed, 129 insertions(+) -- 2.27.0
Add .ioctl() support for passthrough, in prep for the following support for following per-file DAX feature. Once advertising support for per-file DAX feature, virtiofsd should support storing FS_DAX_FL flag persistently passed by FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in FUSE_LOOKUP accordingly if the file is capable of per-file DAX. When it comes to passthrough, it passes corresponding ioctls to host directly. Currently only these ioctls that are needed for per-file DAX feature, i.e., FS_IOC_GETFLAGS/FS_IOC_SETFLAGS and FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR are supported. Later we can restrict the flags/attributes allowed to be set to reinforce the security, or extend the scope of allowed ioctls if it is really needed later. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- tools/virtiofsd/passthrough_ll.c | 53 +++++++++++++++++++++++++++ tools/virtiofsd/passthrough_seccomp.c | 1 + 2 files changed, 54 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index b76d878509..e170b17adb 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -54,6 +54,7 @@ #include <sys/wait.h> #include <sys/xattr.h> #include <syslog.h> +#include <linux/fs.h> #include "qemu/cutils.h" #include "passthrough_helpers.h" @@ -2105,6 +2106,57 @@ out: fuse_reply_err(req, saverr); } +static void lo_ioctl(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, + struct fuse_file_info *fi, unsigned flags, const void *in_buf, + size_t in_bufsz, size_t out_bufsz) +{ + int fd = lo_fi_fd(req, fi); + int res; + int saverr = ENOSYS; + + fuse_log(FUSE_LOG_DEBUG, "lo_ioctl(ino=%" PRIu64 ", cmd=0x%x, flags=0x%x, " + "in_bufsz = %lu, out_bufsz = %lu)\n", + ino, cmd, flags, in_bufsz, out_bufsz); + + /* unrestricted ioctl is not supported yet */ + if (flags & FUSE_IOCTL_UNRESTRICTED) + goto out; + + /* + * Currently only those ioctls needed to support per-file DAX feature, + * i.e., FS_IOC_GETFLAGS/FS_IOC_SETFLAGS and + * FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR are supported. + */ + if (cmd == FS_IOC_SETFLAGS || cmd == FS_IOC_FSSETXATTR) { + res = ioctl(fd, cmd, in_buf); + if (res < 0) + goto out_err; + + fuse_reply_ioctl(req, 0, NULL, 0); + } + else if (cmd == FS_IOC_GETFLAGS || cmd == FS_IOC_FSGETXATTR) { + /* reused for 'unsigned int' for FS_IOC_GETFLAGS */ + struct fsxattr attr; + + res = ioctl(fd, cmd, &attr); + if (res < 0) + goto out_err; + + fuse_reply_ioctl(req, 0, &attr, out_bufsz); + } + else { + fuse_log(FUSE_LOG_DEBUG, "Unsupported ioctl 0x%x\n", cmd); + goto out; + } + + return; + +out_err: + saverr = errno; +out: + fuse_reply_err(req, saverr); +} + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, struct fuse_file_info *fi) { @@ -3279,6 +3331,7 @@ static struct fuse_lowlevel_ops lo_oper = { .create = lo_create, .getlk = lo_getlk, .setlk = lo_setlk, + .ioctl = lo_ioctl, .open = lo_open, .release = lo_release, .flush = lo_flush, diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index 62441cfcdb..2a5f7614fc 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -62,6 +62,7 @@ static const int syscall_allowlist[] = { SCMP_SYS(gettid), SCMP_SYS(gettimeofday), SCMP_SYS(getxattr), + SCMP_SYS(ioctl), SCMP_SYS(linkat), SCMP_SYS(listxattr), SCMP_SYS(lseek), -- 2.27.0
Jeffle Xu
2021-Aug-11 06:56 UTC
[virtiofsd PATCH v2 2/4] virtiofsd: expand fuse protocol to support per-file DAX
Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- include/standard-headers/linux/fuse.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index 950d7edb7e..7bd006ffcb 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -356,6 +356,7 @@ struct fuse_file_lock { #define FUSE_MAP_ALIGNMENT (1 << 26) #define FUSE_SUBMOUNTS (1 << 27) #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) +#define FUSE_PERFILE_DAX (1 << 30) /** * CUSE INIT request/reply flags @@ -440,6 +441,7 @@ struct fuse_file_lock { * FUSE_ATTR_SUBMOUNT: Object is a submount root */ #define FUSE_ATTR_SUBMOUNT (1 << 0) +#define FUSE_ATTR_DAX (1 << 1) /** * Open flags -- 2.27.0
Jeffle Xu
2021-Aug-11 06:56 UTC
[virtiofsd PATCH v2 3/4] virtiofsd: support per-file DAX negotiation in FUSE_INIT
In FUSE_INIT negotiating phase, server/client should advertise if it supports per-file DAX. Once advertising support for per-file DAX feature, virtiofsd should support storing FS_DAX_FL flag persistently passed by FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in FUSE_LOOKUP accordingly if the file is capable of per-file DAX. Currently only ext4/xfs since linux kernel v5.8 support storing FS_DAX_FL flag persistently, and thus advertise support for per-file DAX feature only when the backend fs type is ext4 and xfs. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- tools/virtiofsd/fuse_common.h | 5 +++++ tools/virtiofsd/fuse_lowlevel.c | 6 ++++++ tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h index 8a75729be9..ee6fc64c23 100644 --- a/tools/virtiofsd/fuse_common.h +++ b/tools/virtiofsd/fuse_common.h @@ -372,6 +372,11 @@ struct fuse_file_info { */ #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28) +/** + * Indicates support for per-file DAX. + */ +#define FUSE_CAP_PERFILE_DAX (1 << 29) + /** * Ioctl flags * diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index 50fc5c8d5a..04a4f17423 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -2065,6 +2065,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) { se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2; } + if (arg->flags & FUSE_PERFILE_DAX) { + se->conn.capable |= FUSE_CAP_PERFILE_DAX; + } #ifdef HAVE_SPLICE #ifdef HAVE_VMSPLICE se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; @@ -2180,6 +2183,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, if (se->conn.want & FUSE_CAP_POSIX_ACL) { outarg.flags |= FUSE_POSIX_ACL; } + if (se->op.ioctl && (se->conn.want & FUSE_CAP_PERFILE_DAX)) { + outarg.flags |= FUSE_PERFILE_DAX; + } outarg.max_readahead = se->conn.max_readahead; outarg.max_write = se->conn.max_write; if (se->conn.max_background >= (1 << 16)) { diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index e170b17adb..5b6228210f 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -53,8 +53,10 @@ #include <sys/syscall.h> #include <sys/wait.h> #include <sys/xattr.h> +#include <sys/vfs.h> #include <syslog.h> #include <linux/fs.h> +#include <linux/magic.h> #include "qemu/cutils.h" #include "passthrough_helpers.h" @@ -136,6 +138,13 @@ enum { SANDBOX_CHROOT, }; +/* capability of storing DAX flag persistently */ +enum { + DAX_CAP_NONE, /* not supported */ + DAX_CAP_FLAGS, /* stored in flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */ + DAX_CAP_XATTR, /* stored in xflags (FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR) */ +}; + typedef struct xattr_map_entry { char *key; char *prepend; @@ -161,6 +170,7 @@ struct lo_data { int readdirplus_clear; int allow_direct_io; int announce_submounts; + int perfile_dax_cap; /* capability of backend fs */ bool use_statx; struct lo_inode root; GHashTable *inodes; /* protected by lo->mutex */ @@ -703,6 +713,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2; lo->killpriv_v2 = 0; } + + if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) { + conn->want |= FUSE_CAP_PERFILE_DAX; + } } static void lo_getattr(fuse_req_t req, fuse_ino_t ino, @@ -3800,6 +3814,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) int fd, res; struct stat stat; uint64_t mnt_id; + struct statfs statfs; fd = open("/", O_PATH); if (fd == -1) { @@ -3826,6 +3841,20 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) root->posix_locks = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); } + + /* + * Currently only ext4/xfs since linux kernel v5.8 support storing + * FS_DAX_FL flag persistently. Ext4 accesses this flag through + * FS_IOC_G[S]ETFLAGS ioctl, while xfs accesses this flag through + * FS_IOC_FSG[S]ETXATTR ioctl. + */ + res = fstatfs(fd, &statfs); + if (!res) { + if (statfs.f_type == EXT4_SUPER_MAGIC) + lo->perfile_dax_cap = DAX_CAP_FLAGS; + else if (statfs.f_type == XFS_SUPER_MAGIC) + lo->perfile_dax_cap = DAX_CAP_XATTR; + } } static guint lo_key_hash(gconstpointer key) -- 2.27.0
Jeffle Xu
2021-Aug-11 06:56 UTC
[virtiofsd PATCH v2 4/4] virtiofsd: support per-file DAX in FUSE_LOOKUP
For passthrough, when the corresponding virtiofs in guest is mounted with '-o dax=inode', advertise that the file is capable of per-file DAX if the inode in the backend fs is marked with FS_DAX_FL flag. Signed-off-by: Jeffle Xu <jefflexu at linux.alibaba.com> --- tools/virtiofsd/passthrough_ll.c | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 5b6228210f..8904fa73e0 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -171,6 +171,7 @@ struct lo_data { int allow_direct_io; int announce_submounts; int perfile_dax_cap; /* capability of backend fs */ + bool perfile_dax; /* enable per-file DAX or not */ bool use_statx; struct lo_inode root; GHashTable *inodes; /* protected by lo->mutex */ @@ -716,6 +717,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) { conn->want |= FUSE_CAP_PERFILE_DAX; + lo->perfile_dax = 1; + } + else { + lo->perfile_dax = 0; } } @@ -983,6 +988,41 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname, return 0; } +/* + * If the file is marked with FS_DAX_FL or FS_XFLAG_DAX, then DAX should be + * enabled for this file. + */ +static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *dir, + const char *name) +{ + int res, fd; + int ret = false;; + unsigned int attr; + struct fsxattr xattr; + + if (!lo->perfile_dax) + return false; + + /* Open file without O_PATH, so that ioctl can be called. */ + fd = openat(dir->fd, name, O_NOFOLLOW); + if (fd == -1) + return false; + + if (lo->perfile_dax_cap == DAX_FLAG_FLAGS) { + res = ioctl(fd, FS_IOC_GETFLAGS, &attr); + if (!res && (attr & FS_DAX_FL)) + ret = true; + } + else if (lo->perfile_dax_cap == DAX_FLAG_XATTR) { + res = ioctl(fd, FS_IOC_FSGETXATTR, &xattr); + if (!res && (xattr.fsx_xflags & FS_XFLAG_DAX)) + ret = true; + } + + close(fd); + return ret; +} + /* * Increments nlookup on the inode on success. unref_inode_lolocked() must be * called eventually to decrement nlookup again. If inodep is non-NULL, the @@ -1038,6 +1078,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, e->attr_flags |= FUSE_ATTR_SUBMOUNT; } + if (lo_should_enable_dax(lo, dir, name)) + e->attr_flags |= FUSE_ATTR_DAX; + inode = lo_find(lo, &e->attr, mnt_id); if (inode) { close(newfd); -- 2.27.0