Hugo Mills
2011-Jan-16 13:06 UTC
[PATCH RFC] Add ioctl for balancing a subset of the full filesystem.
This is a patch purely for comment. There''s several things wrong with it that I need to fix (at minimum, it has too much debugging output, the __balance_chunk_filters function takes the wrong set of parameters to make it properly extensible, and the progress counter is broken). I''m planning on adding at least two more filters, once this basic infrastructure is reasonably stable: one to filter on a range of (virtual) addresses, and one to work on device IDs (i.e. "was any part of this block group stored on device $n?"). With the additional filters written, you''ll be able to specify any conjunctive set of filters. i.e. "This block group is RAID1, *and* was stored on devid 4". Disjunctions ("or") aren''t supported, and probably won''t be with this API. The filter data for additional filters will go at the end of struct btrfs_ioctl_balance_start, ensuring extensibility and backwards-compatibility (or at least, proper error reporting of unsupported features). Questions for the panel: * Is the ioctl API reasonably sane, extensible, future-proof? * What other block group filters could be useful for this API? Hugo. There are situations, such as restarting an interrupted balance, where is not necessary or desired to balance all of the block groups in the filesystem. This patch adds the basic infrastructure for filtering block groups during a balance. It also adds a single filter method, allowing the caller to select block groups with specific usage and replication strategies. --- fs/btrfs/ioctl.c | 44 +++++++++++++++++++++++++++++- fs/btrfs/ioctl.h | 15 ++++++++++ fs/btrfs/volumes.c | 76 +++++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/volumes.h | 3 +- 4 files changed, 124 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6d50d24..a2dd60c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2243,6 +2243,46 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) return btrfs_wait_for_commit(root, transid); } +/* Balance the filesystem unconditionally */ +long btrfs_ioctl_balance(struct btrfs_fs_info *fs_info) +{ + return btrfs_balance(fs_info->dev_root, NULL); +} + +/* Balance particular chunks in the filesystem */ +long btrfs_ioctl_balance_filtered( + struct btrfs_fs_info *fs_info, + struct btrfs_ioctl_balance_start __user *user_filters) +{ + int ret = 0; + struct btrfs_ioctl_balance_start *dest; + + dest = kmalloc(sizeof(struct btrfs_ioctl_balance_start), GFP_KERNEL); + if (!dest) + return -ENOMEM; + + if (copy_from_user(dest, user_filters, sizeof(struct btrfs_ioctl_balance_start))) { + ret = -EFAULT; + goto error; + } + + printk("Starting balance with filter: %llx %llx %llx\n", + dest->flags, dest->chunk_type, dest->chunk_type_mask); + + /* Basic sanity checking */ + if (dest->flags & ~BTRFS_BALANCE_FILTER_MASK) { + ret = -ENOTSUPP; + goto error; + } + + /* Do the balance */ + ret = btrfs_balance(fs_info->dev_root, dest); + +error: + kfree(dest); + return ret; +} + /* * Return the current status of any balance operation */ @@ -2335,11 +2375,13 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_RM_DEV: return btrfs_ioctl_rm_dev(root, argp); case BTRFS_IOC_BALANCE: - return btrfs_balance(root->fs_info->dev_root); + return btrfs_ioctl_balance(root->fs_info); case BTRFS_IOC_BALANCE_PROGRESS: return btrfs_ioctl_balance_progress(root->fs_info, argp); case BTRFS_IOC_BALANCE_CANCEL: return btrfs_ioctl_balance_cancel(root->fs_info); + case BTRFS_IOC_BALANCE_FILTERED: + return btrfs_ioctl_balance_filtered(root->fs_info, argp); case BTRFS_IOC_CLONE: return btrfs_ioctl_clone(file, arg, 0, 0, 0); case BTRFS_IOC_CLONE_RANGE: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 4f73d11..7c0c69c 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -154,6 +154,19 @@ struct btrfs_ioctl_balance_progress { __u64 completed; }; +/* Types of balance filter */ +#define BTRFS_BALANCE_FILTER_CHUNK_TYPE 0x1 +#define BTRFS_BALANCE_FILTER_MASK 0x1 + +/* All the possible options for a filter */ +struct btrfs_ioctl_balance_start { + __u64 flags; /* Bit field indicating which fields of this struct are filled */ + + /* For FILTER_CHUNK_TYPE */ + __u64 chunk_type; /* Flag bits required */ + __u64 chunk_type_mask; /* Mask of bits to examine */ +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -201,4 +214,6 @@ struct btrfs_ioctl_balance_progress { #define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 25, \ struct btrfs_ioctl_balance_progress) #define BTRFS_IOC_BALANCE_CANCEL _IO(BTRFS_IOCTL_MAGIC, 26) +#define BTRFS_IOC_BALANCE_FILTERED _IOW(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_balance_start) #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f81535e..b689219 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1899,7 +1899,38 @@ static u64 div_factor(u64 num, int factor) return num; } -int btrfs_balance(struct btrfs_root *dev_root) +int __balance_chunk_filters( + struct btrfs_ioctl_balance_start *filter, + struct extent_buffer *eb, + struct btrfs_chunk *chunk, + struct btrfs_key *key) +{ + if (filter == NULL) { + printk("Filter was NULL: pass all chunks\n"); + return 1; + } + + if (filter->flags == 0) { + printk("Filter was empty: pass all chunks\n"); + return 1; + } + + if (filter->flags & BTRFS_BALANCE_FILTER_CHUNK_TYPE) { + printk(KERN_INFO "btrfs: balance: Filtering chunk at %llu\n", key->offset); + printk(KERN_INFO "btrfs: balance: flags=%llx\n", btrfs_chunk_type(eb, chunk)); + printk(KERN_INFO "btrfs: balance: to match type %llx\n", filter->chunk_type); + printk(KERN_INFO "btrfs: balance: mask %llx\n", filter->chunk_type_mask); + printk(KERN_INFO "btrfs: balance: not-mask %llx\n", ~filter->chunk_type_mask); + printk(KERN_INFO "btrfs: balance: masked flags %llx\n", (btrfs_chunk_type(eb, chunk) & filter->chunk_type_mask)); + return (btrfs_chunk_type(eb, chunk) & filter->chunk_type_mask) + == filter->chunk_type; + } + + return 0; +} + +int btrfs_balance(struct btrfs_root *dev_root, + struct btrfs_ioctl_balance_start *filter) { int ret; struct list_head *devices = &dev_root->fs_info->fs_devices->devices; @@ -1912,6 +1943,9 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_trans_handle *trans; struct btrfs_key found_key; struct btrfs_balance_info *bal_info; + struct btrfs_chunk *chunk; + + printk("Balance: filter pointer is %p\n", filter); if (dev_root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; @@ -1980,6 +2014,15 @@ int btrfs_balance(struct btrfs_root *dev_root) if (ret) break; + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + if (!__balance_chunk_filters(filter, path->nodes[0], chunk, &found_key)) { + printk(KERN_INFO "btrfs: balance (count): Filtering out chunk at %llu\n", found_key.offset); + continue; + } + spin_lock(&dev_root->fs_info->balance_info_lock); bal_info->expected++; spin_unlock(&dev_root->fs_info->balance_info_lock); @@ -2023,18 +2066,27 @@ int btrfs_balance(struct btrfs_root *dev_root) if (found_key.offset == 0) break; - btrfs_release_path(chunk_root, path); - ret = btrfs_relocate_chunk(chunk_root, - chunk_root->root_key.objectid, - found_key.objectid, - found_key.offset); - BUG_ON(ret && ret != -ENOSPC); + chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_chunk); + if (__balance_chunk_filters(filter, path->nodes[0], chunk, &found_key)) { + btrfs_release_path(chunk_root, path); + ret = btrfs_relocate_chunk(chunk_root, + chunk_root->root_key.objectid, + found_key.objectid, + found_key.offset); + BUG_ON(ret && ret != -ENOSPC); + + spin_lock(&dev_root->fs_info->balance_info_lock); + bal_info->completed++; + spin_unlock(&dev_root->fs_info->balance_info_lock); + printk(KERN_INFO "btrfs: balance: %llu/%llu block groups completed\n", + bal_info->completed, bal_info->expected); + } else { + btrfs_release_path(chunk_root, path); + printk(KERN_INFO "btrfs: balance: Filtering out chunk at %llu\n", found_key.offset); + } + key.offset = found_key.offset - 1; - spin_lock(&dev_root->fs_info->balance_info_lock); - bal_info->completed++; - spin_unlock(&dev_root->fs_info->balance_info_lock); - printk(KERN_INFO "btrfs: balance: %llu/%llu block groups completed\n", - bal_info->completed, bal_info->expected); } ret = 0; if(bal_info->cancel_pending) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a668c01..cdbafe6 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -21,6 +21,7 @@ #include <linux/bio.h> #include "async-thread.h" +#include "ioctl.h" struct buffer_head; struct btrfs_pending_bios { @@ -179,7 +180,7 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, u8 *uuid, u8 *fsid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); -int btrfs_balance(struct btrfs_root *dev_root); +int btrfs_balance(struct btrfs_root *dev_root, struct btrfs_ioctl_balance_start *filters); void btrfs_unlock_volumes(void); void btrfs_lock_volumes(void); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hugo Mills
2011-Jan-18 21:31 UTC
[PATCH RFC] Initial implementation of userspace interface for filtered balancing.
This is the userspace side of the filtered balance patch, again purely for comment at this stage. The command-line invocation will look something like this: $ sudo btrfs fi bal --filter type=meta,~raid1 /mnt This will balance all metadata block groups that are not replicated with RAID1. Once I''ve implemented additional filter types, they can be specified with extra --filter options, with the semantics of "and" between each --filter option. (Yes, Goffredo, I know I need to update the man pages for this patch... :) ) This patch, and the preceding kernel one, both apply on top of my previous balance progress/cancel patches. Hugo. It is useful to be able to balance a subset of the full filesystem. This patch implements the infrastructure for filtering block groups on different criteria when balancing the filesystem. Signed-off-by: Hugo Mills <hugo@carfax.org.uk> --- btrfs.c | 4 +- btrfs_cmds.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- ioctl.h | 15 +++++++ 3 files changed, 145 insertions(+), 6 deletions(-) diff --git a/btrfs.c b/btrfs.c index 7b42658..19b0e56 100644 --- a/btrfs.c +++ b/btrfs.c @@ -92,8 +92,8 @@ static struct Command commands[] = { "Show space usage information for a mount point\n." }, { do_balance, -1, - "filesystem balance", "[-w|--wait] <path>\n" - "Balance the chunks across the device." + "filesystem balance", "[-w|--wait] [-f|--filter=<filter>:...] <path>\n" + "Balance chunks across the devices. --filter=help for help on filters.\n" }, { do_balance, -1, "balance start", "[-w|--wait] <path>\n" diff --git a/btrfs_cmds.c b/btrfs_cmds.c index fadcb4f..f7bd835 100644 --- a/btrfs_cmds.c +++ b/btrfs_cmds.c @@ -756,26 +756,74 @@ int do_add_volume(int nargs, char **args) const struct option balance_options[] = { { "wait", 0, NULL, ''w'' }, + { "filter", 1, NULL, ''f'' }, { NULL, 0, NULL, 0 } }; +struct filter_class_desc { + char *keyword; + char *description; + int flag; +}; + +const struct filter_class_desc filter_class[] = { + { "type", + "type=[~]<flagname>[,...]\n" + "\tWhere <flagname> is one of:\n" + "\t\tmeta, sys, data, raid0, raid1, raid10, dup\n" + "\tPrefix a <flagname> with ~ to negate the match.\n", + BTRFS_BALANCE_FILTER_CHUNK_TYPE }, + { NULL, NULL, 0 } +}; + +struct type_filter_desc { + char *keyword; + __u64 mask; + __u64 set; + __u64 unset; +}; + +#define BTRFS_BLOCK_GROUP_SINGLE \ + BTRFS_BLOCK_GROUP_RAID0 | \ + BTRFS_BLOCK_GROUP_RAID1 | \ + BTRFS_BLOCK_GROUP_RAID10 | \ + BTRFS_BLOCK_GROUP_DUP + +const struct type_filter_desc type_filters[] = { + { "data", BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_DATA, 0 }, + { "sys", BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_SYSTEM, 0 }, + { "meta", BTRFS_BLOCK_GROUP_METADATA, BTRFS_BLOCK_GROUP_METADATA, 0 }, + { "raid0", BTRFS_BLOCK_GROUP_RAID0, BTRFS_BLOCK_GROUP_RAID0, 0 }, + { "raid1", BTRFS_BLOCK_GROUP_RAID1, BTRFS_BLOCK_GROUP_RAID1, 0 }, + { "raid10", BTRFS_BLOCK_GROUP_RAID10, BTRFS_BLOCK_GROUP_RAID10, 0 }, + { "dup", BTRFS_BLOCK_GROUP_DUP, BTRFS_BLOCK_GROUP_DUP, 0 }, + { "single", BTRFS_BLOCK_GROUP_SINGLE, 0, BTRFS_BLOCK_GROUP_SINGLE }, + { NULL, 0, 0, 0 } +}; + int do_balance(int argc, char **argv) { int fdmnt, ret=0; int background = 1; - struct btrfs_ioctl_vol_args args; + struct btrfs_ioctl_balance_start *args; char *path; + char *filters_string = NULL; + char *this_filter_string; + char *saveptr; int ttyfd; optind = 1; while(1) { - int c = getopt_long(argc, argv, "w", balance_options, NULL); + int c = getopt_long(argc, argv, "wf:", balance_options, NULL); if (c < 0) break; switch(c) { case ''w'': background = 0; break; + case ''f'': + filters_string = optarg; + break; default: fprintf(stderr, "Invalid arguments for balance\n"); free(argv); @@ -796,6 +844,82 @@ int do_balance(int argc, char **argv) return 12; } + args = malloc(4096); + if (!args) { + fprintf(stderr, "ERROR: Not enough memory\n"); + return 13; + } + + /* Parse the filters string, if there is one */ + this_filter_string = strtok_r(filters_string, ":", &saveptr); + while(this_filter_string) { + char *subsave; + char *part; + char *type = strtok_r(this_filter_string, "=,", &subsave); + int class_id = -1; + + /* Work out what filter type we''re looking at */ + if(strcmp(type, "help") == 0) { + while(filter_class[++class_id].keyword) { + printf("%s", filter_class[class_id].description); + } + return 0; + } + + while(filter_class[++class_id].keyword) { + if(strcmp(type, filter_class[class_id].keyword) == 0) + break; + } + if(filter_class[class_id].keyword == NULL) { + fprintf(stderr, "ERROR: Unknown filter type ''%s''\n", type); + free(args); + return 14; + } + + /* Mark this filter class as being in use */ + args->flags |= filter_class[class_id].flag; + + /* Parse the arguments for this filter */ + part = strtok_r(NULL, "=,", &subsave); + + switch(filter_class[class_id].flag) { + case BTRFS_BALANCE_FILTER_CHUNK_TYPE: + args->chunk_type = 0; + args->chunk_type_mask = 0; + + while(part) { + int negated = 0; + int i = 0; + if(part[0] == ''~'') { + negated = 1; + part += 1; + } + while(type_filters[i].keyword) { + if(strcmp(part, type_filters[i].keyword) == 0) + break; + i += 1; + } + if(type_filters[i].keyword == NULL) { + fprintf(stderr, "ERROR: Unknown chunk type ''%s''\n", part); + free(args); + return 15; + } + + args->chunk_type_mask |= type_filters[i].mask; + args->chunk_type &= ~type_filters[i].mask; + if (negated) + args->chunk_type |= type_filters[i].unset; + else + args->chunk_type |= type_filters[i].set; + + part = strtok_r(NULL, "=,", &subsave); + } + break; + } + + this_filter_string = strtok_r(NULL, ":", &saveptr); + } + if (background) { int pid = fork(); if (pid == 0) { @@ -815,8 +939,8 @@ int do_balance(int argc, char **argv) } } - memset(&args, 0, sizeof(args)); - ret = ioctl(fdmnt, BTRFS_IOC_BALANCE, &args); + ret = ioctl(fdmnt, BTRFS_IOC_BALANCE_FILTERED, args); + free(args); close(fdmnt); if(ret<0){ fprintf(stderr, "ERROR: balancing ''%s''\n", path); diff --git a/ioctl.h b/ioctl.h index 1fc665b..bdcaf13 100644 --- a/ioctl.h +++ b/ioctl.h @@ -137,6 +137,19 @@ struct btrfs_ioctl_balance_progress { __u64 completed; }; +/* Types of balance filter */ +#define BTRFS_BALANCE_FILTER_CHUNK_TYPE 0x1 +#define BTRFS_BALANCE_FILTER_MASK 0x1 + +/* All the possible options for a filter */ +struct btrfs_ioctl_balance_start { + __u64 flags; /* Bit field indicating which fields of this struct are filled */ + + /* For FILTER_CHUNK_TYPE */ + __u64 chunk_type; /* Flag bits required */ + __u64 chunk_type_mask; /* Mask of bits to examine */ +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -177,4 +190,6 @@ struct btrfs_ioctl_balance_progress { #define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 25, \ struct btrfs_ioctl_balance_progress) #define BTRFS_IOC_BALANCE_CANCEL _IO(BTRFS_IOCTL_MAGIC, 26) +#define BTRFS_IOC_BALANCE_FILTERED _IOW(BTRFS_IOCTL_MAGIC, 27, \ + struct btrfs_ioctl_balance_start) #endif -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html