Hello, This patch series does two things: - adds allocation helpers for struct btrfs_device and struct btrfs_fs_devices so that all the list_heads, spinlocks, etc are properly initialized and the code for that is in one place; - fixes a bug in the umount sequence, which, under certain conditions, can lead to mount failures. This is on top of 3.11-rc3, but applies cleanly to btrfs-next/master as of now. You can pull from: git://github.com/idryomov/btrfs-unstable.git dev-fixes Thanks, Ilya Ilya Dryomov (4): Btrfs: find_next_devid: root -> fs_info Btrfs: add btrfs_alloc_device and switch to it Btrfs: add alloc_fs_devices and switch to it Btrfs: rollback btrfs_device fields on umount fs/btrfs/volumes.c | 250 +++++++++++++++++++++++++++++++++------------------- fs/btrfs/volumes.h | 3 + 2 files changed, 162 insertions(+), 91 deletions(-) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
find_next_devid() knows which root to search, so it should take an fs_info instead of an arbitrary root. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/btrfs/volumes.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 78b8717..ae1bcb0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1307,15 +1307,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) return ret; } -static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) +static noinline int find_next_devid(struct btrfs_fs_info *fs_info, + u64 *devid_ret) { int ret; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; - root = root->fs_info->chunk_root; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1324,20 +1323,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) key.type = BTRFS_DEV_ITEM_KEY; key.offset = (u64)-1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); if (ret < 0) goto error; BUG_ON(ret == 0); /* Corruption */ - ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + ret = btrfs_previous_item(fs_info->chunk_root, path, + BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); if (ret) { - *objectid = 1; + *devid_ret = 1; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.offset + 1; + *devid_ret = found_key.offset + 1; } ret = 0; error: @@ -1971,7 +1971,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root, &device->devid); + ret = find_next_devid(root->fs_info, &device->devid); if (ret) { rcu_string_free(device->name); kfree(device); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2013-Aug-12 11:33 UTC
[PATCH 2/4] Btrfs: add btrfs_alloc_device and switch to it
Currently btrfs_device is allocated ad-hoc in a few different places, and as a result not all fields are initialized properly. In particular, readahead state is only initialized in device_list_add (at scan time), and not in btrfs_init_new_device (when the new device is added with ''btrfs dev add''). Fix this by adding an allocation helper and switch everybody but __btrfs_close_devices to it. (__btrfs_close_devices is dealt with in a later commit.) Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/btrfs/volumes.c | 150 ++++++++++++++++++++++++++++++++-------------------- fs/btrfs/volumes.h | 3 ++ 2 files changed, 96 insertions(+), 57 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ae1bcb0..fe52704 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -101,6 +101,27 @@ void btrfs_cleanup_fs_uuids(void) } } +static struct btrfs_device *__alloc_device(void) +{ + struct btrfs_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_NOFS); + if (!dev) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dev->dev_list); + INIT_LIST_HEAD(&dev->dev_alloc_list); + + spin_lock_init(&dev->io_lock); + + spin_lock_init(&dev->reada_lock); + atomic_set(&dev->reada_in_flight, 0); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + + return dev; +} + static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { @@ -414,17 +435,12 @@ static noinline int device_list_add(const char *path, if (fs_devices->opened) return -EBUSY; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(NULL, &devid, + disk_super->dev_item.uuid); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - return -ENOMEM; + return PTR_ERR(device); } - device->devid = devid; - device->dev_stats_valid = 0; - device->work.func = pending_bios_fn; - memcpy(device->uuid, disk_super->dev_item.uuid, - BTRFS_UUID_SIZE); - spin_lock_init(&device->io_lock); name = rcu_string_strdup(path, GFP_NOFS); if (!name) { @@ -432,15 +448,6 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } rcu_assign_pointer(device->name, name); - INIT_LIST_HEAD(&device->dev_alloc_list); - - /* init readahead state */ - spin_lock_init(&device->reada_lock); - device->reada_curr_zone = NULL; - atomic_set(&device->reada_in_flight, 0); - device->reada_next = 0; - INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); @@ -491,8 +498,9 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) list_for_each_entry(orig_dev, &orig->devices, dev_list) { struct rcu_string *name; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &orig_dev->devid, + orig_dev->uuid); + if (IS_ERR(device)) goto error; /* @@ -506,13 +514,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) } rcu_assign_pointer(device->name, name); - device->devid = orig_dev->devid; - device->work.func = pending_bios_fn; - memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_list); - INIT_LIST_HEAD(&device->dev_alloc_list); - list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; @@ -1956,10 +1957,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(root->fs_info, NULL, NULL); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - ret = -ENOMEM; + ret = PTR_ERR(device); goto error; } @@ -1971,13 +1972,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root->fs_info, &device->devid); - if (ret) { - rcu_string_free(device->name); - kfree(device); - goto error; - } - trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); @@ -1992,9 +1986,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (blk_queue_discard(q)) device->can_discard = 1; device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2142,9 +2133,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, } } - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { - ret = -ENOMEM; + device = btrfs_alloc_device(NULL, BTRFS_DEV_REPLACE_DEVID, NULL); + if (IS_ERR(device)) { + ret = PTR_ERR(device); goto error; } @@ -2161,10 +2152,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->can_discard = 1; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - device->devid = BTRFS_DEV_REPLACE_DEVID; - spin_lock_init(&device->io_lock); device->generation = 0; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -5314,23 +5301,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &devid, dev_uuid); + if (IS_ERR(device)) return NULL; - list_add(&device->dev_list, - &fs_devices->devices); - device->devid = devid; - device->work.func = pending_bios_fn; + + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; - device->missing = 1; fs_devices->num_devices++; + + device->missing = 1; fs_devices->missing_devices++; - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_alloc_list); - memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; } +/** + * btrfs_alloc_device - allocate struct btrfs_device + * @fs_info: used only for generating a new devid, can be NULL if + * devid is provided (i.e. @devid != NULL). + * @devid: a pointer to devid for this device. If NULL a new devid + * is generated. + * @uuid: a pointer to UUID for this device. If NULL a new UUID + * is generated. + * + * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() + * on error. Returned struct is not linked onto any lists and can be + * destroyed with kfree() right away. + */ +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid) +{ + struct btrfs_device *dev; + u64 tmp; + + if (!devid && !fs_info) { + WARN_ON(1); + return ERR_PTR(-EINVAL); + } + + dev = __alloc_device(); + if (IS_ERR(dev)) + return dev; + + if (devid) + tmp = *devid; + else { + int ret; + + ret = find_next_devid(fs_info, &tmp); + if (ret) { + kfree(dev); + return ERR_PTR(ret); + } + } + dev->devid = tmp; + + if (uuid) + memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE); + else + generate_random_uuid(dev->uuid); + + dev->work.func = pending_bios_fn; + + return dev; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8670558..5706cd8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -298,6 +298,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, char *device_path, struct btrfs_device **device); +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid); int btrfs_rm_device(struct btrfs_root *root, char *device_path); void btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2013-Aug-12 11:33 UTC
[PATCH 3/4] Btrfs: add alloc_fs_devices and switch to it
In the spirit of btrfs_alloc_device, add a helper for allocating and doing some common initialization of btrfs_fs_devices struct. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/btrfs/volumes.c | 71 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fe52704..af17b17 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -62,6 +62,48 @@ static void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +static struct btrfs_fs_devices *__alloc_fs_devices(void) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); + if (!fs_devs) + return ERR_PTR(-ENOMEM); + + mutex_init(&fs_devs->device_list_mutex); + + INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->alloc_list); + INIT_LIST_HEAD(&fs_devs->list); + + return fs_devs; +} + +/** + * alloc_fs_devices - allocate struct btrfs_fs_devices + * @fsid: a pointer to UUID for this FS. If NULL a new UUID is + * generated. + * + * Return: a pointer to a new &struct btrfs_fs_devices on success; + * ERR_PTR() on error. Returned struct is not linked onto any lists and + * can be destroyed with kfree() right away. + */ +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = __alloc_fs_devices(); + if (IS_ERR(fs_devs)) + return fs_devs; + + if (fsid) + memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); + else + generate_random_uuid(fs_devs->fsid); + + return fs_devs; +} + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; @@ -416,16 +458,14 @@ static noinline int device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return -ENOMEM; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); + fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return PTR_ERR(fs_devices); + list_add(&fs_devices->list, &fs_uuids); - memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - mutex_init(&fs_devices->device_list_mutex); + device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -481,18 +521,13 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *device; struct btrfs_device *orig_dev; - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return ERR_PTR(-ENOMEM); + fs_devices = alloc_fs_devices(orig->fsid); + if (IS_ERR(fs_devices)) + return fs_devices; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); - INIT_LIST_HEAD(&fs_devices->list); - mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; fs_devices->total_devices = orig->total_devices; - memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { @@ -1794,9 +1829,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) if (!fs_devices->seeding) return -EINVAL; - seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!seed_devices) - return -ENOMEM; + seed_devices = __alloc_fs_devices(); + if (IS_ERR(seed_devices)) + return PTR_ERR(seed_devices); old_devices = clone_fs_devices(fs_devices); if (IS_ERR(old_devices)) { -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2013-Aug-12 11:33 UTC
[PATCH 4/4] Btrfs: rollback btrfs_device fields on umount
It turns out we don''t properly rollback in-core btrfs_device state on umount. We zero out ->bdev, ->in_fs_metadata and that''s about it. In particular, we don''t zero out ->generation, and this can lead to us refusing a mount -- a non-NULL fs_devices->latest_bdev is essential, but btrfs_close_extra_devices will happily assign NULL to ->latest_bdev if the first device on the dev_list happens to be missing and consequently has no bdev attached. This happens because since commit a6b0d5c8 btrfs_close_extra_devices adjusts ->latest_bdev, and in doing that, relies on the ->generation. Fix this, and possibly other problems, by zeroing out everything except for what device_list_add sets, so that a mount right after insmod and ''btrfs dev scan'' is no different from any later mount in this respect. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/btrfs/volumes.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index af17b17..2f6bc12 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -673,22 +673,19 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->can_discard) fs_devices->num_can_discard--; - new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); /* -ENOMEM */ - memcpy(new_device, device, sizeof(*new_device)); + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ /* Safe because we are under uuid_mutex */ if (device->name) { name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ + BUG_ON(!name); /* -ENOMEM */ rcu_assign_pointer(new_device->name, name); } - new_device->bdev = NULL; - new_device->writeable = 0; - new_device->in_fs_metadata = 0; - new_device->can_discard = 0; - spin_lock_init(&new_device->io_lock); + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; call_rcu(&device->rcu, free_device); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Stefan Behrens
2013-Aug-23 07:35 UTC
Re: [PATCH 2/4] Btrfs: add btrfs_alloc_device and switch to it
On Mon, 12 Aug 2013 14:33:02 +0300, Ilya Dryomov wrote:> Currently btrfs_device is allocated ad-hoc in a few different places, > and as a result not all fields are initialized properly. In particular, > readahead state is only initialized in device_list_add (at scan time), > and not in btrfs_init_new_device (when the new device is added with > ''btrfs dev add''). Fix this by adding an allocation helper and switch > everybody but __btrfs_close_devices to it. (__btrfs_close_devices is > dealt with in a later commit.) > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> > --- > fs/btrfs/volumes.c | 150 ++++++++++++++++++++++++++++++++-------------------- > fs/btrfs/volumes.h | 3 ++ > 2 files changed, 96 insertions(+), 57 deletions(-) > > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index ae1bcb0..fe52704 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c[...]> @@ -2142,9 +2133,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, > } > } > > - device = kzalloc(sizeof(*device), GFP_NOFS); > - if (!device) { > - ret = -ENOMEM; > + device = btrfs_alloc_device(NULL, BTRFS_DEV_REPLACE_DEVID, NULL); > + if (IS_ERR(device)) { > + ret = PTR_ERR(device);BTRFS_DEV_REPLACE_DEVID is not a "const u64 *devid".> +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, > + const u64 *devid, > + const u8 *uuid) > +{ > + struct btrfs_device *dev; > + u64 tmp; > + > + if (!devid && !fs_info) { > + WARN_ON(1); > + return ERR_PTR(-EINVAL); > + }This WARN_ON(1) is triggered with the device replace procedure because BTRFS_DEV_REPLACE_DEVID is zero. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Ilya Dryomov
2013-Aug-23 08:57 UTC
Re: [PATCH 2/4] Btrfs: add btrfs_alloc_device and switch to it
On Fri, Aug 23, 2013 at 10:35 AM, Stefan Behrens <sbehrens@giantdisaster.de> wrote:> This WARN_ON(1) is triggered with the device replace procedure because > BTRFS_DEV_REPLACE_DEVID is zero.Ah, a dreaded 0 in C. What a screw up. This came from my rebuild branch where btrfs_init_dev_replace_tgtdev is a bit different, and I just auto-merged it. I''ll fix it up. Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html