Add an ioctl to dump filesystem''s metadata in memory in vfs. Userspace
collects
such info and uses it to do metadata readahead.
Filesystem can hook to super_operations.metadata_incore to get metadata in
specific approach. Next patch will give an example how to implement
.metadata_incore in btrfs.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
---
fs/compat_ioctl.c | 1
fs/ioctl.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 15 ++++++++++
3 files changed, 95 insertions(+)
Index: linux/fs/ioctl.c
==================================================================---
linux.orig/fs/ioctl.c 2010-12-13 14:01:52.000000000 +0800
+++ linux/fs/ioctl.c 2010-12-13 14:01:56.000000000 +0800
@@ -530,6 +530,82 @@ static int ioctl_fsthaw(struct file *fil
}
/*
+ * Copy info about metadata in memory to userspace
+ * Returns:
+ * > 0, number of metadata_incore_ent entries copied to userspace
+ * = 0, no more metadata
+ * < 0, error
+ */
+static int ioctl_metadata_incore(struct file *filp, void __user *argp)
+{
+ struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+ struct metadata_incore_args args;
+ struct metadata_incore_ent ent;
+ loff_t offset, last_offset = 0;
+ ssize_t size, last_size = 0;
+ __u64 __user vec_addr;
+ int entries = 0;
+
+ if (!sb->s_op->metadata_incore)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&args, (struct metadata_incore_args __user *)argp,
+ sizeof(args)))
+ return -EFAULT;
+
+ /* Check the start address: needs to be page-aligned.. */
+ if (args.offset & ~PAGE_CACHE_MASK)
+ return -EINVAL;
+
+ if ((args.vec_size % sizeof(struct metadata_incore_ent)) != 0)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, args.vec_addr, args.vec_size))
+ return -EFAULT;
+
+ offset = args.offset;
+
+ ent.unused = 0;
+ vec_addr = args.vec_addr;
+
+ while (vec_addr < args.vec_addr + args.vec_size) {
+ if (signal_pending(current))
+ return -EINTR;
+ cond_resched();
+
+ if (sb->s_op->metadata_incore(sb, &offset, &size) < 0)
+ break;
+ /* A merge or offset == 0 */
+ if (offset == last_offset + last_size) {
+ last_size += size;
+ offset = offset + size;
+ continue;
+ }
+ ent.offset = last_offset;
+ ent.size = last_size;
+ if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent)))
+ return -EFAULT;
+ vec_addr += sizeof(ent);
+ entries++;
+
+ last_offset = offset;
+ last_size = size;
+ ent.unused = 0;
+ offset = offset + size;
+ }
+
+ if (last_size > 0 && vec_addr < args.vec_addr + args.vec_size) {
+ ent.offset = last_offset;
+ ent.size = last_size;
+ if (copy_to_user((void *)(long)vec_addr, &ent, sizeof(ent)))
+ return -EFAULT;
+ entries++;
+ }
+
+ return entries;
+}
+
+/*
* When you add any new common ioctls to the switches above and below
* please update compat_sys_ioctl() too.
*
@@ -589,6 +665,9 @@ int do_vfs_ioctl(struct file *filp, unsi
return put_user(inode->i_sb->s_blocksize, p);
}
+ case FIMETADATA_INCORE:
+ return ioctl_metadata_incore(filp, argp);
+
default:
if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
error = file_ioctl(filp, cmd, arg);
Index: linux/include/linux/fs.h
==================================================================---
linux.orig/include/linux/fs.h 2010-12-13 14:01:52.000000000 +0800
+++ linux/include/linux/fs.h 2010-12-13 14:01:56.000000000 +0800
@@ -52,6 +52,18 @@ struct inodes_stat_t {
int dummy[5]; /* padding for sysctl ABI compatibility */
};
+struct metadata_incore_ent {
+ __u64 offset;
+ __u32 size;
+ __u32 unused;
+};
+
+struct metadata_incore_args {
+ __u64 offset; /* offset in meta address */
+ __u64 __user vec_addr; /* vector''s address */
+ __u32 vec_size; /* vector''s size */
+ __u32 unused;
+};
#define NR_FILE 8192 /* this can well be larger on a larger system */
@@ -325,6 +337,7 @@ struct inodes_stat_t {
#define FIFREEZE _IOWR(''X'', 119, int) /* Freeze */
#define FITHAW _IOWR(''X'', 120, int) /* Thaw */
#define FITRIM _IOWR(''X'', 121, struct fstrim_range) /* Trim
*/
+#define FIMETADATA_INCORE _IOWR(''X'', 122, struct
metadata_incore_args)
#define FS_IOC_GETFLAGS _IOR(''f'', 1, long)
#define FS_IOC_SETFLAGS _IOW(''f'', 2, long)
@@ -1612,6 +1625,8 @@ struct super_operations {
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t,
loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+ int (*metadata_incore)(struct super_block*, loff_t *offset,
+ ssize_t *size);
};
/*
Index: linux/fs/compat_ioctl.c
==================================================================---
linux.orig/fs/compat_ioctl.c 2010-12-13 14:02:08.000000000 +0800
+++ linux/fs/compat_ioctl.c 2010-12-13 14:03:27.000000000 +0800
@@ -882,6 +882,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
/* ''X'' - originally XFS but some now in the VFS */
COMPATIBLE_IOCTL(FIFREEZE)
COMPATIBLE_IOCTL(FITHAW)
+COMPATIBLE_IOCTL(FIMETADATA_INCORE)
COMPATIBLE_IOCTL(KDGETKEYCODE)
COMPATIBLE_IOCTL(KDSETKEYCODE)
COMPATIBLE_IOCTL(KDGKBTYPE)
--
To unsubscribe from this list: send the line "unsubscribe
linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html