Josef Bacik
2007-Aug-17 14:46 UTC
[Btrfs-devel] [PATCH 2/2] block accounting+sysfs support for btrfs
Hello, This patch adds block accounting per root_item in btrfs, and adds a sysfs infrastructure to report it (as well as future things). The sysfs heirarchy is /sys/fs/btrfs/<sb->s_id>/<root_item name>/ We now keep track of the root name in memory, and the /sys entries are added as soon as we find a new root. I checked the block stuff and it seems to work well. Also I fixed a small bug in open_ctree where if we get an error back from btrfs_find_dead_roots we didnt unlock the fs_mutex (not sure how much that matters). Any comments are welcome, I apologize in advanced for the sysfs ugliness :). Thank you, Josef diff -r 6125224d77d0 Makefile --- a/Makefile Fri Aug 10 16:22:09 2007 -0400 +++ b/Makefile Thu Aug 16 13:56:47 2007 -0400 @@ -4,7 +4,7 @@ obj-m := btrfs.o obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o tree-defrag.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o sysfs.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff -r 6125224d77d0 ctree.h --- a/ctree.h Fri Aug 10 16:22:09 2007 -0400 +++ b/ctree.h Fri Aug 17 17:18:29 2007 -0400 @@ -313,6 +313,7 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head dead_roots; struct delayed_work trans_work; + struct kobject super_kobj; int do_barriers; int closing; }; @@ -328,6 +329,7 @@ struct btrfs_root { struct btrfs_key root_key; struct btrfs_fs_info *fs_info; struct inode *inode; + struct kobject root_kobj; u64 objectid; u64 last_trans; u32 blocksize; @@ -338,6 +340,7 @@ struct btrfs_root { struct btrfs_key defrag_progress; int defrag_running; int defrag_level; + char *name; }; /* the lower bits in the key flags defines the item type */ @@ -814,6 +817,28 @@ static inline void btrfs_set_root_flags( item->flags = cpu_to_le32(val); } +static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, + u64 val) +{ + item->blocks_used = cpu_to_le64(val); +} + +static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->blocks_used); +} + +static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, + u64 val) +{ + item->block_limit = cpu_to_le64(val); +} + +static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->block_limit); +} + static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) { return le64_to_cpu(s->blocknr); @@ -1012,6 +1037,23 @@ static inline void btrfs_memmove(struct { btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); memmove(dst, src, nr); +} + +static inline int btrfs_set_root_name(struct btrfs_root *root, + const char *name, int len) +{ + /* if we already have a name just free it */ + if (root->name) + kfree(root->name); + + root->name = kmalloc(len+1, GFP_KERNEL); + if (!root->name) + return -ENOMEM; + + memcpy(root->name, name, len); + root->name[len] ='\0'; + + return 0; } /* helper function to cast into the data area of the leaf. */ @@ -1184,4 +1226,13 @@ int btrfs_drop_extents(struct btrfs_tran /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); + +/* sysfs.c */ +int btrfs_init_sysfs(void); +void btrfs_exit_sysfs(void); +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs); +int btrfs_sysfs_add_root(struct btrfs_root *root); +void btrfs_sysfs_del_root(struct btrfs_root *root); +void btrfs_sysfs_del_super(struct btrfs_fs_info *root); + #endif diff -r 6125224d77d0 disk-io.c --- a/disk-io.c Fri Aug 10 16:22:09 2007 -0400 +++ b/disk-io.c Fri Aug 17 17:47:45 2007 -0400 @@ -294,9 +294,11 @@ static int __setup_root(int blocksize, root->last_trans = 0; root->highest_inode = 0; root->last_inode_alloc = 0; + root->name = NULL; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + memset(&root->root_kobj, 0, sizeof(root->root_kobj)); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -384,7 +386,8 @@ insert: } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) + struct btrfs_key *location, + const char *name, int namelen) { struct btrfs_root *root; int ret; @@ -405,6 +408,22 @@ struct btrfs_root *btrfs_read_fs_root(st kfree(root); return ERR_PTR(ret); } + + ret = btrfs_set_root_name(root, name, namelen); + if (ret) { + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } + + ret = btrfs_sysfs_add_root(root); + if (ret) { + brelse(root->node); + kfree(root->name); + kfree(root); + return ERR_PTR(ret); + } + return root; } @@ -433,6 +452,7 @@ struct btrfs_root *open_ctree(struct sup INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->last_trans_committed = 0; @@ -500,8 +520,10 @@ struct btrfs_root *open_ctree(struct sup fs_info->generation = btrfs_super_generation(disk_super) + 1; ret = btrfs_find_dead_roots(tree_root); - if (ret) + if (ret) { + mutex_unlock(&fs_info->fs_mutex); goto fail_tree_root; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -553,12 +575,15 @@ int btrfs_free_fs_root(struct btrfs_fs_i { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + btrfs_sysfs_del_root(root); if (root->inode) iput(root->inode); if (root->node) brelse(root->node); if (root->commit_root) brelse(root->commit_root); + if (root->name) + kfree(root->name); kfree(root); return 0; } diff -r 6125224d77d0 disk-io.h --- a/disk-io.h Fri Aug 10 16:22:09 2007 -0400 +++ b/disk-io.h Fri Aug 17 16:30:31 2007 -0400 @@ -66,7 +66,8 @@ int btrfs_csum_data(struct btrfs_root * int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location); + struct btrfs_key *location, + const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); diff -r 6125224d77d0 extent-tree.c --- a/extent-tree.c Fri Aug 10 16:22:09 2007 -0400 +++ b/extent-tree.c Fri Aug 10 17:01:29 2007 -0400 @@ -860,16 +860,23 @@ static int __free_extent(struct btrfs_tr btrfs_set_extent_refs(ei, refs); btrfs_mark_buffer_dirty(path->nodes[0]); if (refs == 0) { - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; if (pin) { ret = pin_down_block(root, blocknr, 0); BUG_ON(ret); } + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used - num_blocks); + + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used - num_blocks); + ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; @@ -1184,7 +1191,7 @@ int btrfs_alloc_extent(struct btrfs_tran { int ret; int pending_ret; - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1202,9 +1209,15 @@ int btrfs_alloc_extent(struct btrfs_tran if (ret) return ret; + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); + + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + num_blocks); if (root == extent_root) { BUG_ON(num_blocks != 1); diff -r 6125224d77d0 inode.c --- a/inode.c Fri Aug 10 16:22:09 2007 -0400 +++ b/inode.c Fri Aug 17 16:17:22 2007 -0400 @@ -692,7 +692,8 @@ out: */ static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_key *location, - struct btrfs_root **sub_root) + struct btrfs_root **sub_root, + struct dentry *dentry) { struct btrfs_path *path; struct btrfs_root_item *ri; @@ -706,7 +707,9 @@ static int fixup_tree_root_location(stru BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - *sub_root = btrfs_read_fs_root(root->fs_info, location); + *sub_root = btrfs_read_fs_root(root->fs_info, location, + dentry->d_name.name, + dentry->d_name.len); if (IS_ERR(*sub_root)) return PTR_ERR(*sub_root); @@ -769,7 +772,8 @@ static struct dentry *btrfs_lookup(struc return ERR_PTR(ret); inode = NULL; if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root); + ret = fixup_tree_root_location(root, &location, &sub_root, + dentry); if (ret < 0) return ERR_PTR(ret); if (ret > 0) @@ -2022,6 +2026,7 @@ static int create_subvol(struct btrfs_ro btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + btrfs_set_root_blocks_used(&root_item, 0); memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; brelse(subvol); @@ -2058,7 +2063,7 @@ static int create_subvol(struct btrfs_ro if (ret) goto fail_commit; - new_root = btrfs_read_fs_root(root->fs_info, &key); + new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); BUG_ON(!new_root); trans = btrfs_start_transaction(new_root, 1); diff -r 6125224d77d0 root-tree.c --- a/root-tree.c Fri Aug 10 16:22:09 2007 -0400 +++ b/root-tree.c Fri Aug 17 16:59:44 2007 -0400 @@ -40,7 +40,7 @@ int btrfs_find_last_root(struct btrfs_ro if (ret < 0) goto out; BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = btrfs_buffer_leaf(path->nodes[0]); BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { @@ -50,6 +50,7 @@ int btrfs_find_last_root(struct btrfs_ro memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); + ret = 0; out: btrfs_release_path(root, path); diff -r 6125224d77d0 super.c --- a/super.c Fri Aug 10 16:22:09 2007 -0400 +++ b/super.c Thu Aug 16 13:48:01 2007 -0400 @@ -45,12 +45,14 @@ static void btrfs_put_super (struct supe static void btrfs_put_super (struct super_block * sb) { struct btrfs_root *root = btrfs_sb(sb); + struct btrfs_fs_info *fs = root->fs_info; int ret; ret = close_ctree(root); if (ret) { printk("close ctree returns %d\n", ret); } + btrfs_sysfs_del_super(fs); sb->s_fs_info = NULL; } @@ -100,6 +102,12 @@ static int btrfs_fill_super(struct super err = -ENOMEM; goto fail_close; } + + /* this does the super kobj at the same time */ + err = btrfs_sysfs_add_super(tree_root->fs_info); + if (err) + goto fail_close; + sb->s_root = root_dentry; btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; @@ -181,6 +189,11 @@ static int __init init_btrfs_fs(void) static int __init init_btrfs_fs(void) { int err; + + err = btrfs_init_sysfs(); + if (err) + return err; + btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) @@ -193,6 +206,7 @@ static void __exit exit_btrfs_fs(void) btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); unregister_filesystem(&btrfs_fs_type); + btrfs_exit_sysfs(); } module_init(init_btrfs_fs) diff -r 6125224d77d0 sysfs.c --- a/sysfs.c Fri Aug 10 16:22:09 2007 -0400 +++ b/sysfs.c Fri Aug 17 17:16:06 2007 -0400 @@ -16,6 +16,224 @@ * Boston, MA 021110-1307, USA. */ +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/completion.h> +#include <linux/buffer_head.h> +#include <linux/module.h> +#include <linux/kobject.h> + #include "ctree.h" #include "disk-io.h" #include "transaction.h" + +static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_blocks_used(&root->root_item)); +} + +static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_block_limit(&root->root_item)); +} + +static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); +} + +static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); +} + +static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocksize(fs->disk_super)); +} + +/* this is for root attrs (subvols/snapshots) */ +struct btrfs_root_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_root *, char *); + ssize_t (*store)(struct btrfs_root *, const char *, size_t); +}; + +#define ROOT_ATTR(name, mode, show, store) \ +static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store) + +ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); +ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); + +static struct attribute *btrfs_root_attrs[] = { + &btrfs_root_attr_blocks_used.attr, + &btrfs_root_attr_block_limit.attr, + NULL, +}; + +/* this is for super attrs (actual full fs) */ +struct btrfs_super_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_fs_info *, char *); + ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); +}; + +#define SUPER_ATTR(name, mode, show, store) \ +static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store) + +SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); +SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); +SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); + +static struct attribute *btrfs_super_attrs[] = { + &btrfs_super_attr_blocks_used.attr, + &btrfs_super_attr_total_blocks.attr, + &btrfs_super_attr_blocksize.attr, + NULL, +}; + +static ssize_t btrfs_super_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->show ? a->show(fs, buf) : 0; +} + +static ssize_t btrfs_super_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->store ? a->store(fs, buf, len) : 0; +} + +static ssize_t btrfs_root_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + + return a->show ? a->show(root, buf) : 0; +} + +static ssize_t btrfs_root_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + return a->store ? a->store(root, buf, len) : 0; +} + +static struct sysfs_ops btrfs_super_attr_ops = { + .show = btrfs_super_attr_show, + .store = btrfs_super_attr_store, +}; + +static struct sysfs_ops btrfs_root_attr_ops = { + .show = btrfs_root_attr_show, + .store = btrfs_root_attr_store, +}; + +static struct kobj_type btrfs_root_ktype = { + .default_attrs = btrfs_root_attrs, + .sysfs_ops = &btrfs_root_attr_ops, +}; + +static struct kobj_type btrfs_super_ktype = { + .default_attrs = btrfs_super_attrs, + .sysfs_ops = &btrfs_super_attr_ops, +}; + +static struct kset btrfs_kset = { + .kobj = {.name = "btrfs"}, +}; + +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) +{ + int error; + + fs->super_kobj.kset = &btrfs_kset; + fs->super_kobj.ktype = &btrfs_super_ktype; + + error = kobject_set_name(&fs->super_kobj, "%s", + fs->sb->s_id); + if (error) + goto fail; + + error = kobject_register(&fs->super_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); + return error; +} + +int btrfs_sysfs_add_root(struct btrfs_root *root) +{ + int error; + + root->root_kobj.ktype = &btrfs_root_ktype; + root->root_kobj.parent = &root->fs_info->super_kobj; + + error = kobject_set_name(&root->root_kobj, "%s", root->name); + if (error) { + goto fail; + } + + error = kobject_register(&root->root_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for root failed\n"); + return error; +} + +void btrfs_sysfs_del_root(struct btrfs_root *root) +{ + kobject_unregister(&root->root_kobj); +} + +void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) +{ + kobject_unregister(&fs->super_kobj); +} + +int btrfs_init_sysfs() +{ + kobj_set_kset_s(&btrfs_kset, fs_subsys); + return kset_register(&btrfs_kset); +} + +void btrfs_exit_sysfs() +{ + kset_unregister(&btrfs_kset); +}
Chris Mason
2007-Aug-20 13:53 UTC
[Btrfs-devel] [PATCH 2/2] block accounting+sysfs support for btrfs
On Fri, 17 Aug 2007 17:49:30 -0400 Josef Bacik <jbacik@redhat.com> wrote:> Hello, > > This patch adds block accounting per root_item in btrfs, and adds a > sysfs infrastructure to report it (as well as future things). The > sysfs heirarchy is > > /sys/fs/btrfs/<sb->s_id>/<root_item name>/Ok, this is almost perfect ;) I see one small problem, quoting from the kobject docs (Documentation/kobject.txt ------- It is _imperative_ that you supply a destructor for dynamically allocated kobjects to free them if you are using kobject reference counts. The reference count controls the lifetime of the object. If it goes to 0, then it is assumed that the object will be freed and cannot be used. ------- This means that we have to free anything referenced by the kobject in the destructor for the kobject. Looks like most of your sysfs bits touch things from kmalloc (struct btrfs_root and struct btrfs_fs_info), with the exception of the 3 fields that reference struct btrfs_fs_info->disk_super. What I would do in this case is just get rid of those sysfs methods. You can get them via df, and we can deal with other problems later ;) -chris
Chris Mason
2007-Aug-28 08:08 UTC
[Btrfs-devel] [PATCH 2/2] block accounting+sysfs support for btrfs
Hello, I've got this merged locally (the completion handler version) and the sysfs bits seem to be working fine. The only problem is the accounting doesn't record blocks that are deleted in the root items. This is because of the somewhat strange path things take to being freed. So, lets pretend we've got a 1GB file named foo and we rm it. 1) start a transaction 2) remove 'foo' from dir (this cows dir blocks) 3) remove extent pointers from foo (this cows all blocks w/extent pointers) 4) remove the foo's inode 5) (eventually) commit At this point, we have two tree roots. There's the most recent root, where all traces of foo are gone, and there's the old root which has all the tree blocks from before the cow. The extents from foo are not free on disk yet, because they are still referenced by the old root. btrfs_drop_snapshot comes in and starts dropping reference counts on blocks in the old root. It will find the extent pointers for foo and drop references on those too, and finally those extents will really freed on disk. So, this is a very long way of saying that last place actually calling btrfs_free_extent is actually btrfs_drop_snapshot, and it does this with a copy of the root item. So, when we make changes to root->root_item, they never actually get sent down to the disk. What we want to do is update the block count in the most recent root with the number of blocks that were freed by btrfs_drop_snapshot. So, before calling btrfs_drop_snapshot, we need to record the number of blocks allocated, and the compare it with the number still there after calling drop_snapshot. Then update the most recent root to reflect the changes on disk. Does that make any sense? -chris