ping?
On 07/04/2011 04:07 PM, WuBo wrote:> I''ve been diging into the idea of chunk tree backups. Here is the
> design, before finishing chunk alloc, the first block in this
> chunk will be written in some information, these information will be
> useful for chunk tree rebuilding if crash, also the first block will
> be moved into fs_info->freed_extents[2], just as the super block.
> after crash, we can search all the stripe header to get the whole view
> of the chunk tree and rebuild it.
>
> Also I consider the balance stuff, cause the relocation will remove
> the block group. If then, I clear the stripe header for fear mistake.
>
> To keep backward compatibility, BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP
> will be added for a incompat flag. This means the chunk tree only with
> the new kernel can be recovered if crash.
>
> What we should do is making some changes in these functions:
> btrfs_make_block_group
> btrfs_read_block_groups
> btrfs_remove_block_group
>
> I have tested this patch by rebuilding chunk tree through writting
> and deleting data(using fstress.sh) repeatly, Also for stuffs like
> defragment balance and "add/delete device". But I appreciated if
> anybody do extra test for it still.
>
> Signed-off-by: Wu Bo <wu.bo@cn.fujitsu.com>
> ---
> fs/btrfs/ctree.h | 14 +++--
> fs/btrfs/extent-tree.c | 95 ++++++++++++++++++++++++++++++-
> fs/btrfs/volumes.c | 144
+++++++++++++++++++++++++++++++++++++++++++++++-
> fs/btrfs/volumes.h | 25 ++++++++
> 4 files changed, 267 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 2e61fe1..29e9f30 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -43,6 +43,7 @@ extern struct kmem_cache *btrfs_bit_radix_cachep;
> extern struct kmem_cache *btrfs_path_cachep;
> extern struct kmem_cache *btrfs_free_space_cachep;
> struct btrfs_ordered_sum;
> +struct map_lookup;
>
> #define BTRFS_MAGIC "_BHRfS_M"
>
> @@ -410,6 +411,7 @@ struct btrfs_super_block {
> #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
> #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
> #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
> +#define BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP (1ULL << 4)
>
> #define BTRFS_FEATURE_COMPAT_SUPP 0ULL
> #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
> @@ -417,7 +419,8 @@ struct btrfs_super_block {
> (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
> BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
> BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
> - BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
> + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
> + BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP)
>
> /*
> * A leaf is full of items. offset and size tell us where to find
> @@ -2179,11 +2182,12 @@ int btrfs_free_block_groups(struct btrfs_fs_info
*info);
> int btrfs_read_block_groups(struct btrfs_root *root);
> int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
> int btrfs_make_block_group(struct btrfs_trans_handle *trans,
> - struct btrfs_root *root, u64 bytes_used,
> - u64 type, u64 chunk_objectid, u64 chunk_offset,
> - u64 size);
> + struct btrfs_root *root, struct map_lookup *map,
> + u64 bytes_used, u64 type, u64 chunk_objectid,
> + u64 chunk_offset, u64 size);
> int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
> - struct btrfs_root *root, u64 group_start);
> + struct btrfs_root *root, struct map_lookup *map,
> + u64 group_start);
> u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
> u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
> void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode
*ionde);
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 9ee6bd5..81141ad 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -23,6 +23,7 @@
> #include <linux/rcupdate.h>
> #include <linux/kthread.h>
> #include <linux/slab.h>
> +#include <linux/buffer_head.h>
> #include "compat.h"
> #include "hash.h"
> #include "ctree.h"
> @@ -248,6 +249,60 @@ static int exclude_super_stripes(struct btrfs_root
*root,
> return 0;
> }
>
> +static int exclude_chunk_stripes_header(struct btrfs_root *root,
> + struct btrfs_block_group_cache *cache,
> + struct map_lookup *map)
> +{
> + int ret = 0;
> + int i;
> + u64 chunk_offset;
> + struct extent_map *em = NULL;
> + struct btrfs_mapping_tree *map_tree;
> +
> + map_tree = &root->fs_info->mapping_tree;
> + chunk_offset = cache->key.objectid;
> +
> + /* before read_block_groups, the mapping tree should be ready */
> + if (!map) {
> + read_lock(&map_tree->map_tree.lock);
> + em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
> + read_unlock(&map_tree->map_tree.lock);
> +
> + /* here i made a strict check */
> + if (em && em->start == chunk_offset &&
> + em->len == cache->key.offset)
> + map = (struct map_lookup *)em->bdev;
> + else
> + BUG_ON(1);
> + }
> +
> + for (i = 0; i < map->num_stripes; i++) {
> + int nr;
> + int stripe_len;
> + u64 devid;
> + u64 physical;
> + u64 *logical;
> +
> + devid = map->stripes[i].dev->devid;
> + physical = map->stripes[i].physical;
> + ret = btrfs_rmap_block(map_tree, chunk_offset, physical,
> + devid, &logical, &nr, &stripe_len);
> + if (ret)
> + goto error;
> +
> + while (nr--) {
> + add_excluded_extent(root, logical[nr],
> + root->sectorsize);
> + cache->bytes_super += root->sectorsize;
> + }
> + kfree(logical);
> + }
> +
> +error:
> + free_extent_map(em);
> + return ret;
> +}
> +
> static struct btrfs_caching_control *
> get_caching_control(struct btrfs_block_group_cache *cache)
> {
> @@ -8524,6 +8579,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
> struct extent_buffer *leaf;
> int need_clear = 0;
> u64 cache_gen;
> + u64 feature;
>
> root = info->extent_root;
> key.objectid = 0;
> @@ -8592,6 +8648,18 @@ int btrfs_read_block_groups(struct btrfs_root *root)
> exclude_super_stripes(root, cache);
>
> /*
> + * it''s should consider the backward compatibility.
> + * in mkfs.btrfs, some chunks has already been created and
> + * the incompat flag will be set.
> + */
> + feature =
btrfs_super_incompat_flags(&root->fs_info->super_copy);
> + if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP) {
> + ret = exclude_chunk_stripes_header(root, cache, NULL);
> + if (ret)
> + goto error;
> + }
> +
> + /*
> * check for two cases, either we are full, and therefore
> * don''t need to bother with the caching work since we
won''t
> * find any space, or we are empty, and we can just add all
> @@ -8655,13 +8723,14 @@ error:
> }
>
> int btrfs_make_block_group(struct btrfs_trans_handle *trans,
> - struct btrfs_root *root, u64 bytes_used,
> - u64 type, u64 chunk_objectid, u64 chunk_offset,
> - u64 size)
> + struct btrfs_root *root, struct map_lookup *map,
> + u64 bytes_used, u64 type, u64 chunk_objectid,
> + u64 chunk_offset, u64 size)
> {
> int ret;
> struct btrfs_root *extent_root;
> struct btrfs_block_group_cache *cache;
> + u64 feature;
>
> extent_root = root->fs_info->extent_root;
>
> @@ -8699,6 +8768,13 @@ int btrfs_make_block_group(struct btrfs_trans_handle
*trans,
> cache->cached = BTRFS_CACHE_FINISHED;
> exclude_super_stripes(root, cache);
>
> + feature =
btrfs_super_incompat_flags(&root->fs_info->super_copy);
> + if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP) {
> + ret = exclude_chunk_stripes_header(root, cache, map);
> + if (ret)
> + return ret;
> + }
> +
> add_new_free_space(cache, root->fs_info, chunk_offset,
> chunk_offset + size);
>
> @@ -8727,7 +8803,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle
*trans,
> }
>
> int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
> - struct btrfs_root *root, u64 group_start)
> + struct btrfs_root *root,
> + struct map_lookup *map,
> + u64 group_start)
> {
> struct btrfs_path *path;
> struct btrfs_block_group_cache *block_group;
> @@ -8737,6 +8815,7 @@ int btrfs_remove_block_group(struct
btrfs_trans_handle *trans,
> struct inode *inode;
> int ret;
> int factor;
> + u64 feature;
>
> root = root->fs_info->extent_root;
>
> @@ -8848,6 +8927,14 @@ int btrfs_remove_block_group(struct
btrfs_trans_handle *trans,
> goto out;
>
> ret = btrfs_del_item(trans, root, path);
> + if (ret)
> + goto out;
> +
> + /* erase the first block which record this chunk info */
> + feature =
btrfs_super_incompat_flags(&root->fs_info->super_copy);
> + if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP)
> + ret = erase_chunk_stripes_header(root, map);
> +
> out:
> btrfs_free_path(path);
> return ret;
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 8b9fb8c..a69255f 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -23,6 +23,7 @@
> #include <linux/random.h>
> #include <linux/iocontext.h>
> #include <linux/capability.h>
> +#include <linux/crc32c.h>
> #include <asm/div64.h>
> #include "compat.h"
> #include "ctree.h"
> @@ -1919,7 +1920,7 @@ static int btrfs_relocate_chunk(struct btrfs_root
*root,
> BUG_ON(ret);
> }
>
> - ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
> + ret = btrfs_remove_block_group(trans, extent_root, map, chunk_offset);
> BUG_ON(ret);
>
> write_lock(&em_tree->lock);
> @@ -2661,7 +2662,7 @@ static int __btrfs_alloc_chunk(struct
btrfs_trans_handle *trans,
> BUG_ON(ret);
> free_extent_map(em);
>
> - ret = btrfs_make_block_group(trans, extent_root, 0, type,
> + ret = btrfs_make_block_group(trans, extent_root, map, 0, type,
> BTRFS_FIRST_CHUNK_TREE_OBJECTID,
> start, *num_bytes);
> BUG_ON(ret);
> @@ -2756,6 +2757,121 @@ static int __finish_chunk_alloc(struct
btrfs_trans_handle *trans,
> return 0;
> }
>
> +static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int
uptodate)
> +{
> + if (uptodate)
> + set_buffer_uptodate(bh);
> + else
> + clear_buffer_uptodate(bh);
> +
> + unlock_buffer(bh);
> + put_bh(bh);
> +}
> +
> +static int mark_chunk_stripes_header(struct btrfs_root *extent_root,
> + struct map_lookup *map,
> + u64 chunk_offset,
> + u64 chunk_size)
> +{
> + struct buffer_head *bh;
> + struct btrfs_device *device = NULL;
> + struct btrfs_stripe_header *header;
> + u64 bytenr;
> + u64 blocknr;
> + u32 sectorsize;
> + u32 crc;
> + int index;
> + int ret;
> +
> + ret = 0;
> + header = kzalloc(sizeof(*header), GFP_NOFS);
> + if (!header)
> + return -ENOMEM;
> +
> + for (index = 0; index < map->num_stripes; index++) {
> + device = map->stripes[index].dev;
> + bytenr = map->stripes[index].physical;
> + blocknr = bytenr;
> + sectorsize = extent_root->sectorsize;
> + do_div(blocknr, sectorsize);
> + bh = __getblk(device->bdev, blocknr, sectorsize);
> + if (!bh) {
> + kfree(header);
> + return -EFAULT;
> + }
> +
> + memset(header, 0, sizeof(*header));
> + header->tag = cpu_to_le64(BTRFS_STRIPE_HEADER_TAG);
> + header->owner = cpu_to_le64(extent_root->root_key.objectid);
> + header->devid = cpu_to_le64(device->devid);
> + header->dev_offset = cpu_to_le64(bytenr);
> + header->chunk_offset = cpu_to_le64(chunk_offset);
> + header->chunk_size = cpu_to_le64(chunk_size);
> + header->type = cpu_to_le64(map->type);
> + header->stripe_len = cpu_to_le64(map->stripe_len);
> + header->stripe_index = cpu_to_le32(index);
> + header->io_align = cpu_to_le32(map->io_align);
> + header->io_width = cpu_to_le32(map->io_width);
> + header->sector_size = cpu_to_le32(map->sector_size);
> + header->num_stripes = cpu_to_le16(map->num_stripes);
> + header->sub_stripes = cpu_to_le16(map->sub_stripes);
> + memcpy(header->uuid, device->uuid, BTRFS_UUID_SIZE);
> + memcpy(header->fsid, extent_root->fs_info->fsid,
BTRFS_FSID_SIZE);
> + crc = crc32c(0, (unsigned char *)header, sizeof(*header));
> + header->crc = cpu_to_le32(crc);
> +
> + memset(bh->b_data, 0, sectorsize);
> + memcpy(bh->b_data, header, sizeof(*header));
> +
> + get_bh(bh);
> + set_buffer_uptodate(bh);
> + lock_buffer(bh);
> + bh->b_end_io = btrfs_end_buffer_write_sync;
> +
> + submit_bh(WRITE_SYNC, bh);
> + wait_on_buffer(bh);
> + brelse(bh);
> + }
> +
> + kfree(header);
> + return ret;
> +}
> +
> +int erase_chunk_stripes_header(struct btrfs_root *root,
> + struct map_lookup *map)
> +{
> + int index;
> + u64 blocknr;
> + u32 sectorsize;
> + struct btrfs_device *device;
> + struct buffer_head *bh;
> +
> + if (!map)
> + return -EIO;
> +
> + for (index = 0; index < map->num_stripes; index++) {
> + device = map->stripes[index].dev;
> + blocknr = map->stripes[index].physical;
> + sectorsize = root->sectorsize;
> + do_div(blocknr, sectorsize);
> + bh = __getblk(device->bdev, blocknr, sectorsize);
> + if (!bh)
> + return -EFAULT;
> +
> + memset(bh->b_data, 0, sectorsize);
> + get_bh(bh);
> + set_buffer_uptodate(bh);
> + lock_buffer(bh);
> + bh->b_end_io = btrfs_end_buffer_write_sync;
> +
> + submit_bh(WRITE_SYNC, bh);
> + wait_on_buffer(bh);
> + brelse(bh);
> + }
> +
> + return 0;
> +}
> +
> /*
> * Chunk allocation falls into two parts. The first part does works
> * that make the new allocated chunk useable, but not do any operation
> @@ -2772,6 +2888,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle
*trans,
> struct map_lookup *map;
> struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
> int ret;
> + u64 feature;
>
> ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
> &chunk_offset);
> @@ -2786,6 +2903,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle
*trans,
> ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
> chunk_size, stripe_size);
> BUG_ON(ret);
> +
> + feature =
btrfs_super_incompat_flags(&extent_root->fs_info->super_copy);
> + if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP) {
> + ret = mark_chunk_stripes_header(extent_root, map,
> + chunk_offset, chunk_size);
> + if (ret)
> + return ret;
> + }
> +
> return 0;
> }
>
> @@ -2805,6 +2931,7 @@ static noinline int init_first_rw_device(struct
btrfs_trans_handle *trans,
> struct btrfs_fs_info *fs_info = root->fs_info;
> struct btrfs_root *extent_root = fs_info->extent_root;
> int ret;
> + u64 feature;
>
> ret = find_next_chunk(fs_info->chunk_root,
> BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
> @@ -2848,6 +2975,19 @@ static noinline int init_first_rw_device(struct
btrfs_trans_handle *trans,
> sys_chunk_offset, sys_chunk_size,
> sys_stripe_size);
> BUG_ON(ret);
> +
> + feature =
btrfs_super_incompat_flags(&root->fs_info->super_copy);
> + if (feature & BTRFS_FEATURE_INCOMPAT_CHUNK_TREE_BACKUP) {
> + ret = mark_chunk_stripes_header(root, map,
> + chunk_offset,
> + chunk_size);
> + BUG_ON(ret);
> +
> + ret = mark_chunk_stripes_header(root, sys_map,
> + sys_chunk_offset,
> + sys_chunk_size);
> + BUG_ON(ret);
> + }
> return 0;
> }
>
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index cc2eada..101acf2 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -157,6 +157,31 @@ struct map_lookup {
> struct btrfs_bio_stripe stripes[];
> };
>
> +#define BTRFS_STRIPE_HEADER_TAG 19860505
> +
> +struct btrfs_stripe_header {
> + u8 fsid[BTRFS_FSID_SIZE];
> + u8 uuid[BTRFS_UUID_SIZE];
> + __le64 tag;
> + __le64 owner;
> + __le64 devid;
> + __le64 dev_offset;
> + __le64 chunk_offset;
> + __le64 chunk_size;
> + __le64 type;
> + __le64 stripe_len;
> + __le32 stripe_index;
> + __le32 io_align;
> + __le32 io_width;
> + __le32 sector_size;
> + __le16 num_stripes;
> + __le16 sub_stripes;
> + __le32 crc;
> +} __attribute__ ((__packed__));
> +
> +int erase_chunk_stripes_header(struct btrfs_root *root,
> + struct map_lookup *map);
> +
> /* Used to sort the devices by max_avail(descending sort) */
> int btrfs_cmp_device_free_bytes(const void *dev_info1, const void
*dev_info2);
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html