This is a step in the direction of better -ENOSPC handling. Instead of checking the global bytes counter we check the space_info bytes counters to make sure we have enough space. If we don''t we go ahead and try to allocate a new chunk, and then if that fails we return -ENOSPC. This patch adds two counters to btrfs_space_info, bytes_delalloc and bytes_may_use. bytes_delalloc account for extents we''ve actually setup for delalloc and will be allocated at some point down the line. bytes_may_use is to keep track of how many bytes we may use for delalloc at some point. When we actually set the extent_bit for the delalloc bytes we subtract the reserved bytes from the bytes_may_use counter. This keeps us from not actually being able to allocate space for any delalloc bytes. This has been tested with fs_mark and dbench. Signed-off-by: Josef Bacik <jbacik@redhat.com> --- fs/btrfs/btrfs_inode.h | 8 ++ fs/btrfs/ctree.h | 40 +++++++-- fs/btrfs/extent-tree.c | 214 ++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/file.c | 16 +++- fs/btrfs/inode.c | 62 ++++---------- fs/btrfs/ioctl.c | 6 +- 6 files changed, 270 insertions(+), 76 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a8c9693..72677ce 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -66,6 +66,9 @@ struct btrfs_inode { */ struct list_head delalloc_inodes; + /* the space_info for where this inode''s data allocations are done */ + struct btrfs_space_info *space_info; + /* full 64 bit generation number, struct vfs_inode doesn''t have a big * enough field for this. */ @@ -94,6 +97,11 @@ struct btrfs_inode { */ u64 delalloc_bytes; + /* total number of bytes that may be used for this inode for + * delalloc + */ + u64 reserved_bytes; + /* * the size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 766b31a..82491ba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,13 +596,27 @@ struct btrfs_block_group_item { struct btrfs_space_info { u64 flags; - u64 total_bytes; - u64 bytes_used; - u64 bytes_pinned; - u64 bytes_reserved; - u64 bytes_readonly; - int full; - int force_alloc; + + u64 total_bytes; /* total bytes in the space */ + u64 bytes_used; /* total bytes used on disk */ + u64 bytes_pinned; /* total bytes pinned, will be freed when the + transaction finishes */ + u64 bytes_reserved; /* total bytes the allocator has reserved for + current allocations */ + u64 bytes_readonly; /* total bytes that are read only */ + + /* delalloc accounting */ + u64 bytes_delalloc; /* number of bytes reserved for allocation, + this space is not necessarily reserved yet + by the allocator */ + u64 bytes_may_use; /* number of bytes that may be used for + delalloc */ + + int full; /* indicates that we cannot allocate any more + chunks for this space */ + int force_alloc; /* set if we need to force a chunk alloc for + this space */ + struct list_head list; /* for block groups in our same type */ @@ -1782,6 +1796,16 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); +int btrfs_check_metadata_free_space(struct btrfs_root *root); +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes); +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, @@ -2027,8 +2051,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0a5d796..af12e9d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -59,6 +59,9 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static int do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 alloc_bytes, + u64 flags, int force); static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) { @@ -1909,6 +1912,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_pinned = 0; found->bytes_reserved = 0; found->bytes_readonly = 0; + found->bytes_delalloc = 0; found->full = 0; found->force_alloc = 0; *space_info = found; @@ -1972,6 +1976,196 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) return flags; } +static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) +{ + struct btrfs_fs_info *info = root->fs_info; + u64 alloc_profile; + + if (data) { + alloc_profile = info->avail_data_alloc_bits & + info->data_alloc_profile; + data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; + } else if (root == root->fs_info->chunk_root) { + alloc_profile = info->avail_system_alloc_bits & + info->system_alloc_profile; + data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; + } else { + alloc_profile = info->avail_metadata_alloc_bits & + info->metadata_alloc_profile; + data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; + } + + return btrfs_reduce_alloc_profile(root, data); +} + +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) +{ + u64 alloc_target; + + alloc_target = btrfs_get_alloc_profile(root, 1); + BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, + alloc_target); +} + +/* + * for now this just makes sure we have at least 5% of our metadata space free + * for use. + */ +int btrfs_check_metadata_free_space(struct btrfs_root *root) +{ + struct btrfs_fs_info *info = root->fs_info; + struct btrfs_space_info *meta_sinfo; + u64 alloc_target, thresh; + + /* get the space info for where the metadata will live */ + alloc_target = btrfs_get_alloc_profile(root, 0); + meta_sinfo = __find_space_info(info, alloc_target); + + /* + * if the metadata area isn''t maxed out then there is no sense in + * checking how much is used, since we can always allocate a new chunk + */ + if (!meta_sinfo->full) + return 0; + + spin_lock(&meta_sinfo->lock); + thresh = meta_sinfo->total_bytes * 95; + + do_div(thresh, 100); + + if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { + spin_unlock(&meta_sinfo->lock); + return -ENOSPC; + } + spin_unlock(&meta_sinfo->lock); + + return 0; +} + +/* + * This will check the space that the inode allocates from to make sure we have + * enough space for bytes. + */ +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + int ret = 0; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; +again: + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + if (data_sinfo->total_bytes - data_sinfo->bytes_used - + data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - + data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - + data_sinfo->bytes_may_use < bytes) { + /* + * if we don''t have enough free bytes in this space then we need + * to alloc a new chunk. + */ + if (!data_sinfo->full) { + u64 alloc_target; + struct btrfs_trans_handle *trans; + + data_sinfo->force_alloc = 1; + spin_unlock(&data_sinfo->lock); + + alloc_target = btrfs_get_alloc_profile(root, 1); + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; + + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + bytes + 2 * 1024 * 1024, + alloc_target, 0); + btrfs_end_transaction(trans, root); + if (ret) + return ret; + goto again; + } + spin_unlock(&data_sinfo->lock); + printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" + ", %llu bytes_used, %llu bytes_reserved, " + "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu total\n", bytes, data_sinfo->bytes_delalloc, + data_sinfo->bytes_used, data_sinfo->bytes_reserved, + data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, + data_sinfo->bytes_may_use, data_sinfo->total_bytes); + return -ENOSPC; + } + data_sinfo->bytes_may_use += bytes; + BTRFS_I(inode)->reserved_bytes += bytes; + spin_unlock(&data_sinfo->lock); + + return btrfs_check_metadata_free_space(root); +} + +/* + * if there was an error for whatever reason after calling + * btrfs_check_data_free_space, call this so we can cleanup the counters. + */ +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + spin_unlock(&data_sinfo->lock); +} + +/* called when we are adding a delalloc extent to the inode''s io_tree */ +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + + /* get the space info for where this inode will be storing its data */ + data_sinfo = BTRFS_I(inode)->space_info; + + /* make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_delalloc += bytes; + + /* + * we are adding a delalloc extent without calling + * btrfs_check_data_free_space first. This happens on a weird + * writepage condition, but shouldn''t hurt our accounting + */ + if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { + data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; + BTRFS_I(inode)->reserved_bytes = 0; + } else { + data_sinfo->bytes_may_use -= bytes; + BTRFS_I(inode)->reserved_bytes -= bytes; + } + + spin_unlock(&data_sinfo->lock); +} + +/* called when we are clearing an delalloc extent from the inode''s io_tree */ +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *info; + + info = BTRFS_I(inode)->space_info; + + spin_lock(&info->lock); + info->bytes_delalloc -= bytes; + spin_unlock(&info->lock); +} + static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force) @@ -3105,6 +3299,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) (unsigned long long)(info->total_bytes - info->bytes_used - info->bytes_pinned - info->bytes_reserved), (info->full) ? "" : "not "); + printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," + " may_use=%llu, used=%llu\n", info->total_bytes, + info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, + info->bytes_used); down_read(&info->groups_sem); list_for_each_entry(cache, &info->block_groups, list) { @@ -3131,24 +3329,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, { int ret; u64 search_start = 0; - u64 alloc_profile; struct btrfs_fs_info *info = root->fs_info; - if (data) { - alloc_profile = info->avail_data_alloc_bits & - info->data_alloc_profile; - data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; - } else if (root == root->fs_info->chunk_root) { - alloc_profile = info->avail_system_alloc_bits & - info->system_alloc_profile; - data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; - } else { - alloc_profile = info->avail_metadata_alloc_bits & - info->metadata_alloc_profile; - data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; - } + data = btrfs_get_alloc_profile(root, data); again: - data = btrfs_reduce_alloc_profile(root, data); /* * the only place that sets empty_size is btrfs_realloc_node, which * is not called recursively on allocations diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3e8023e..1c7d106 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1091,19 +1091,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs); - ret = btrfs_check_free_space(root, write_bytes, 0); + ret = btrfs_check_data_free_space(root, inode, write_bytes); if (ret) goto out; ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); - if (ret) + if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); goto out; + } ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); btrfs_drop_pages(pages, num_pages); goto out; } @@ -1111,8 +1116,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); btrfs_drop_pages(pages, num_pages); - if (ret) + if (ret) { + btrfs_free_reserved_data_space(root, inode, + write_bytes); goto out; + } if (will_write) { btrfs_fdatawrite_range(inode->i_mapping, pos, @@ -1136,6 +1144,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } out: mutex_unlock(&inode->i_mutex); + if (ret) + err = ret; out_nolock: kfree(pages); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3cee77a..7d4f948 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -102,34 +102,6 @@ static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) } /* - * a very lame attempt at stopping writes when the FS is 85% full. There - * are countless ways this is incorrect, but it is better than nothing. - */ -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del) -{ - u64 total; - u64 used; - u64 thresh; - int ret = 0; - - spin_lock(&root->fs_info->delalloc_lock); - total = btrfs_super_total_bytes(&root->fs_info->super_copy); - used = btrfs_super_bytes_used(&root->fs_info->super_copy); - if (for_del) - thresh = total * 90; - else - thresh = total * 85; - - do_div(thresh, 100); - - if (used + root->fs_info->delalloc_bytes + num_required > thresh) - ret = -ENOSPC; - spin_unlock(&root->fs_info->delalloc_lock); - return ret; -} - -/* * this does all the hard work for inserting an inline extent into * the btree. The caller should have done a btrfs_drop_extents so that * no overlapping inline items exist in the btree @@ -1190,6 +1162,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + btrfs_delalloc_reserve_space(root, inode, end - start + 1); spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; @@ -1223,9 +1196,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, (unsigned long long)end - start + 1, (unsigned long long) root->fs_info->delalloc_bytes); + btrfs_delalloc_free_space(root, inode, (u64)-1); root->fs_info->delalloc_bytes = 0; BTRFS_I(inode)->delalloc_bytes = 0; } else { + btrfs_delalloc_free_space(root, inode, + end - start + 1); root->fs_info->delalloc_bytes -= end - start + 1; BTRFS_I(inode)->delalloc_bytes -= end - start + 1; } @@ -2245,10 +2221,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; - ret = btrfs_check_free_space(root, 1, 1); - if (ret) - goto fail; - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2261,7 +2233,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); -fail: btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2284,10 +2255,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOTEMPTY; } - ret = btrfs_check_free_space(root, 1, 1); - if (ret) - goto fail; - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); @@ -2304,7 +2271,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) fail_trans: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); -fail: btrfs_btree_balance_dirty(root, nr); if (ret && !err) @@ -2818,7 +2784,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (size <= hole_start) return 0; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) return err; @@ -3014,6 +2980,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->last_trans = 0; bi->logged_trans = 0; bi->delalloc_bytes = 0; + bi->reserved_bytes = 0; bi->disk_i_size = 0; bi->flags = 0; bi->index_cnt = (u64)-1; @@ -3035,6 +3002,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) inode->i_ino = args->ino; init_btrfs_i(inode); BTRFS_I(inode)->root = args->root; + btrfs_set_inode_space_info(args->root, inode); return 0; } @@ -3455,6 +3423,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_I(inode)->index_cnt = 2; BTRFS_I(inode)->root = root; BTRFS_I(inode)->generation = trans->transid; + btrfs_set_inode_space_info(root, inode); if (mode & S_IFDIR) owner = 0; @@ -3602,7 +3571,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; @@ -3665,7 +3634,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; trans = btrfs_start_transaction(root, 1); @@ -3733,7 +3702,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, return -ENOENT; btrfs_inc_nlink(inode); - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto fail; err = btrfs_set_inode_index(dir, &index); @@ -3779,7 +3748,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto out_unlock; @@ -4336,7 +4305,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) u64 page_start; u64 page_end; - ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); if (ret) goto out; @@ -4349,6 +4318,7 @@ again: if ((page->mapping != inode->i_mapping) || (page_start >= size)) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); /* page got truncated out from underneath us */ goto out_unlock; } @@ -4631,7 +4601,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -EXDEV; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto out_unlock; @@ -4749,7 +4719,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - err = btrfs_check_free_space(root, 1, 0); + err = btrfs_check_metadata_free_space(root); if (err) goto out_fail; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 988fdc8..bca729f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -70,7 +70,7 @@ static noinline int create_subvol(struct btrfs_root *root, u64 index = 0; unsigned long nr = 1; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto fail_commit; @@ -203,7 +203,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (!root->ref_cows) return -EINVAL; - ret = btrfs_check_free_space(root, 1, 0); + ret = btrfs_check_metadata_free_space(root); if (ret) goto fail_unlock; @@ -374,7 +374,7 @@ static int btrfs_defrag_file(struct file *file) unsigned long i; int ret; - ret = btrfs_check_free_space(root, inode->i_size, 0); + ret = btrfs_check_data_free_space(root, inode, inode->i_size); if (ret) return -ENOSPC; -- 1.5.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Yoshihiro Takahashi
2009-Feb-17 09:21 UTC
Re: [PATCH] btrfs: add better -ENOSPC handling VERSION 3
Hi Josef. I created file till return of ENOSPC. I created 18874027 files of size zero on btrfs of 16Gbyte in 1609 seconds. I test to cause ENOSPC-problem but there was not the problem. ENOSPC-problem were fixed in linux-2.6.29-rc5 and this patch. This is a good patch. Next. I created 2758939 files of size 4096 on btrfs of 16Gbyte in 431 seconds. And I received the following messages when I unlink files. btrfs searching for 4096 bytes, num_bytes 4096, loop 2, allowed_alloc 0 btrfs allocation failed flags 36, wanted 4096 space_info has 65536 free, is full space_info total=1932853248, pinned=164216832, delalloc=0, may_use=0, used=1768570880 block group 29360128 has 1073741824 bytes, 942583808 used 131092480 pinned 0 reserved 0 blocks of free space at or bigger than bytes is block group 8619294720 has 859111424 bytes, 825987072 used 33124352 pinned 0 reserved 0 blocks of free space at or bigger than bytes is ------------[ cut here ]------------ kernel BUG at fs/btrfs/extent-tree.c:3360! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/devices/pci0000:80/0000:80:0e.0/0000:84:00.0/local_cpus CPU 3 Modules linked in: btrfs zlib_deflate nls_utf8 hfsplus autofs4 hidp rfcomm l2cap bluetooth sunrpc ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib inet_lro rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ib_sa ib_mad ib_core dm_mirror dm_region_hash dm_log dm_multipath dm_mod sbs sbshc battery acpi_memhotplug ac ipv6 lp floppy snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss sg rtc_cmos snd_mixer_oss snd_pcm snd_timer rtc_core parport_pc tg3 libphy ide_cd_mod snd button parport cdrom serio_raw rtc_lib i2c_amd8111 i2c_amd756 k8temp soundcore hwmon snd_page_alloc shpchp amd_rng i2c_core pcspkr sata_sil libata aic79xx scsi_transport_spi sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd Pid: 20535, comm: rm Tainted: G M 2.6.29-rc5 #1 -[62176J7]- RIP: 0010:[<ffffffffa041e529>] [<ffffffffa041e529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] RSP: 0018:ffff8800acd6d3d8 EFLAGS: 00010246 RAX: ffff88011996ca38 RBX: ffff88011996c960 RCX: 0000000000008610 RDX: 000000000000e1e1 RSI: 0000000000000246 RDI: ffff88011996ca34 RBP: ffff8800acd6d448 R08: 0000000000000000 R09: 00000000ffffff00 R10: 000000000000000a R11: 0000000000000000 R12: ffff88011996c9c0 R13: 0000000000001000 R14: ffff88011996ca30 R15: 0000000000001000 FS: 00007fea3f7486e0(0000) GS:ffff88012ecffcc0(0000) knlGS:00000000f7fa38d0 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f651b793000 CR3: 00000000c64be000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rm (pid: 20535, threadinfo ffff8800acd6c000, task ffff88012ed8c970) Stack: 0000000000000000 ffff8800acd6d4f8 0000000000000000 0000000000000000 ffffe20000000024 ffff880000000000 0000000000000000 0000000000000000 ffff88010c0c4000 0000000000001000 ffff8800acd6d4f8 ffff88012a516000 Call Trace: [<ffffffffa0421005>] btrfs_alloc_extent+0x48/0xa7 [btrfs] [<ffffffffa04210c0>] btrfs_alloc_free_block+0x5c/0x87 [btrfs] [<ffffffffa04155e1>] __btrfs_cow_block+0x1e7/0x82e [btrfs] [<ffffffffa04162d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] [<ffffffffa0419cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] [<ffffffffa041add4>] btrfs_insert_some_items+0xb2/0x570 [btrfs] [<ffffffff80279fa3>] ? unlock_page+0x22/0x27 [<ffffffffa04482df>] ? set_extent_buffer_dirty+0x102/0x131 [btrfs] [<ffffffffa041f50b>] insert_extents+0x162/0x36f [btrfs] [<ffffffffa0420105>] finish_current_insert+0x4de/0x553 [btrfs] [<ffffffffa04228dc>] __btrfs_inc_extent_ref+0x1b5/0x1e4 [btrfs] [<ffffffff8034a2fe>] ? generic_swap+0x0/0x1c [<ffffffffa04256fb>] btrfs_inc_ref+0x42d/0x4ce [btrfs] [<ffffffffa0422727>] ? __btrfs_inc_extent_ref+0x0/0x1e4 [btrfs] [<ffffffffa0445c95>] ? copy_extent_buffer+0x115/0x149 [btrfs] [<ffffffffa0415834>] __btrfs_cow_block+0x43a/0x82e [btrfs] [<ffffffffa04162d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] [<ffffffffa0419cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] [<ffffffff8034d5e8>] ? crc32c+0x4c/0x60 [<ffffffffa0427d8d>] btrfs_lookup_dir_item+0x71/0xe7 [btrfs] [<ffffffffa04337e4>] btrfs_unlink_inode+0x60/0x26f [btrfs] [<ffffffffa042f0e5>] ? start_transaction+0xf9/0x105 [btrfs] [<ffffffffa0433dbc>] btrfs_unlink+0x55/0x97 [btrfs] [<ffffffff802af53b>] vfs_unlink+0xb9/0x12c [<ffffffff802b197e>] do_unlinkat+0xc1/0x173 [<ffffffff802b4063>] ? vfs_readdir+0x9a/0xaf [<ffffffff8026855b>] ? audit_syscall_entry+0x16b/0x19e [<ffffffff802b1a41>] sys_unlink+0x11/0x13 [<ffffffff8020c05b>] system_call_fastpath+0x16/0x1b Code: 00 48 81 eb b8 00 00 00 48 8b 83 b8 00 00 00 48 8d 93 b8 00 00 00 0f 18 08 49 8d 44 24 58 48 39 c2 75 9f 4c 89 f7 e8 29 1e e3 df <0f> 0b eb fe 48 83 c4 48 31 c0 5b 41 5c 41 5d 41 5e 41 5f c9 c3 RIP [<ffffffffa041e529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] RSP <ffff8800acd6d3d8> ---[ end trace b5c34207a1ecc394 ]--- And, I received the following messages when I utime(2) files. btrfs searching for 4096 bytes, num_bytes 4096, loop 2, allowed_alloc 1 btrfs allocation failed flags 36, wanted 4096 space_info has 65536 free, is full space_info total=1932853248, pinned=54943744, delalloc=0, may_use=0, used=1877843968 block group 29360128 has 1073741824 bytes, 1041494016 used 32182272 pinned 0 reserved 0 blocks of free space at or bigger than bytes is block group 8619294720 has 859111424 bytes, 836349952 used 22761472 pinned 0 reserved 0 blocks of free space at or bigger than bytes is ------------[ cut here ]------------ kernel BUG at fs/btrfs/extent-tree.c:3360! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/devices/pci0000:80/0000:80:0e.0/0000:84:00.0/local_cpus CPU 3 Modules linked in: btrfs zlib_deflate autofs4 hidp rfcomm l2cap bluetooth sunrpc ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib inet_lro rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ib_sa ib_mad ib_core dm_mirror dm_region_hash dm_log dm_multipath dm_mod sbs sbshc battery acpi_memhotplug ac ipv6 lp floppy snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss k8temp button sg snd_pcm i2c_amd8111 snd_timer snd serio_raw parport_pc rtc_cmos tg3 i2c_amd756 soundcore rtc_core parport hwmon libphy i2c_core ide_cd_mod cdrom rtc_lib amd_rng pcspkr snd_page_alloc shpchp sata_sil libata aic79xx scsi_transport_spi sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd Pid: 14421, comm: mkfbmt Tainted: G M 2.6.29-rc5 #1 -[62176J7]- RIP: 0010:[<ffffffffa0407529>] [<ffffffffa0407529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] RSP: 0018:ffff88010010d968 EFLAGS: 00010246 RAX: ffff880026bed8f8 RBX: ffff880026bed820 RCX: 0000000000008689 RDX: 0000000000005959 RSI: 0000000000000246 RDI: ffff880026bed8f4 RBP: ffff88010010d9d8 R08: 0000000000000000 R09: 00000000ffffff00 R10: 000000000000000a R11: 0000000000000000 R12: ffff880026bed880 R13: 0000000000001000 R14: ffff880026bed8f0 R15: 0000000000001000 FS: 00007fb97df446e0(0000) GS:ffff88012ecffcc0(0000) knlGS:00000000f7f4f8d0 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000342e614ce5 CR3: 000000011557b000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process mkfbmt (pid: 14421, threadinfo ffff88010010c000, task ffff88011f9738d0) Stack: 0000000000000000 ffff88010010da88 0000000000000000 0000000000000000 ffff880100000024 ffff88010010dad4 0000000000000000 0000000000000000 ffff8801145c0000 0000000000001000 ffff88010010da88 ffff8801149cd000 Call Trace: [<ffffffffa040a005>] btrfs_alloc_extent+0x48/0xa7 [btrfs] [<ffffffffa040a0c0>] btrfs_alloc_free_block+0x5c/0x87 [btrfs] [<ffffffffa03fe5e1>] __btrfs_cow_block+0x1e7/0x82e [btrfs] [<ffffffffa03ff2d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] [<ffffffffa0402cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] [<ffffffffa0419b89>] ? btrfs_lookup_dentry+0x159/0x16f [btrfs] [<ffffffff802cd6a1>] ? inotify_d_instantiate+0x1a/0x43 [<ffffffff802b6984>] ? d_splice_alias+0xc4/0xd0 [<ffffffffa0412a8c>] btrfs_lookup_inode+0x2c/0x90 [btrfs] [<ffffffffa041bb13>] btrfs_update_inode+0x46/0xbd [btrfs] [<ffffffffa041c770>] btrfs_dirty_inode+0x3e/0x52 [btrfs] [<ffffffff802c0f3c>] __mark_inode_dirty+0x33/0x178 [<ffffffff802b998c>] inode_setattr+0x11b/0x125 [<ffffffffa041ace0>] btrfs_setattr+0x56/0x6e [btrfs] [<ffffffff802b9b2a>] notify_change+0x194/0x2d6 [<ffffffff802c3969>] utimes_common+0x121/0x163 [<ffffffff802c3a64>] do_utimes+0xb9/0xda [<ffffffff802c3c0f>] sys_utime+0x66/0x7b [<ffffffff8020c05b>] system_call_fastpath+0x16/0x1b Code: 00 48 81 eb b8 00 00 00 48 8b 83 b8 00 00 00 48 8d 93 b8 00 00 00 0f 18 08 49 8d 44 24 58 48 39 c2 75 9f 4c 89 f7 e8 29 8e e4 df <0f> 0b eb fe 48 83 c4 48 31 c0 5b 41 5c 41 5d 41 5e 41 5f c9 c3 RIP [<ffffffffa0407529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] RSP <ffff88010010d968> ---[ end trace 16075f4743156c16 ]--- Thanks, regards, On Fri, 13 Feb 2009 15:40:04 -0500 Josef Bacik <jbacik@redhat.com> wrote:> This is a step in the direction of better -ENOSPC handling. Instead of checking > the global bytes counter we check the space_info bytes counters to make sure we > have enough space. If we don''t we go ahead and try to allocate a new chunk, and > then if that fails we return -ENOSPC. This patch adds two counters to > btrfs_space_info, bytes_delalloc and bytes_may_use. bytes_delalloc account for > extents we''ve actually setup for delalloc and will be allocated at some point > down the line. bytes_may_use is to keep track of how many bytes we may use for > delalloc at some point. When we actually set the extent_bit for the delalloc > bytes we subtract the reserved bytes from the bytes_may_use counter. This keeps > us from not actually being able to allocate space for any delalloc bytes. This > has been tested with fs_mark and dbench. > > Signed-off-by: Josef Bacik <jbacik@redhat.com>-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2009-Feb-17 13:42 UTC
Re: [PATCH] btrfs: add better -ENOSPC handling VERSION 3
On Tue, Feb 17, 2009 at 06:21:10PM +0900, Yoshihiro Takahashi wrote:> Hi Josef. > > I created file till return of ENOSPC. > I created 18874027 files of size zero on btrfs of 16Gbyte in 1609 seconds. > I test to cause ENOSPC-problem but there was not the problem. > ENOSPC-problem were fixed in linux-2.6.29-rc5 and this patch. > This is a good patch. > > Next. > I created 2758939 files of size 4096 on btrfs of 16Gbyte in 431 seconds. > And I received the following messages when I unlink files. > > btrfs searching for 4096 bytes, num_bytes 4096, loop 2, allowed_alloc 0 > btrfs allocation failed flags 36, wanted 4096 > space_info has 65536 free, is full > space_info total=1932853248, pinned=164216832, delalloc=0, may_use=0, used=1768570880 > block group 29360128 has 1073741824 bytes, 942583808 used 131092480 pinned 0 reserved > 0 blocks of free space at or bigger than bytes is > block group 8619294720 has 859111424 bytes, 825987072 used 33124352 pinned 0 reserved > 0 blocks of free space at or bigger than bytes is > ------------[ cut here ]------------ > kernel BUG at fs/btrfs/extent-tree.c:3360! > invalid opcode: 0000 [#1] SMP > last sysfs file: /sys/devices/pci0000:80/0000:80:0e.0/0000:84:00.0/local_cpus > CPU 3 > Modules linked in: btrfs zlib_deflate nls_utf8 hfsplus autofs4 hidp rfcomm l2cap bluetooth sunrpc ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib inet_lro rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ib_sa ib_mad ib_core dm_mirror dm_region_hash dm_log dm_multipath dm_mod sbs sbshc battery acpi_memhotplug ac ipv6 lp floppy snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss sg rtc_cmos snd_mixer_oss snd_pcm snd_timer rtc_core parport_pc tg3 libphy ide_cd_mod snd button parport cdrom serio_raw rtc_lib i2c_amd8111 i2c_amd756 k8temp soundcore hwmon snd_page_alloc shpchp amd_rng i2c_core pcspkr sata_sil libata aic79xx scsi_transport_spi sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd> Pid: 20535, comm: rm Tainted: G M 2.6.29-rc5 #1 -[62176J7]- > RIP: 0010:[<ffffffffa041e529>] [<ffffffffa041e529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > RSP: 0018:ffff8800acd6d3d8 EFLAGS: 00010246 > RAX: ffff88011996ca38 RBX: ffff88011996c960 RCX: 0000000000008610 > RDX: 000000000000e1e1 RSI: 0000000000000246 RDI: ffff88011996ca34 > RBP: ffff8800acd6d448 R08: 0000000000000000 R09: 00000000ffffff00 > R10: 000000000000000a R11: 0000000000000000 R12: ffff88011996c9c0 > R13: 0000000000001000 R14: ffff88011996ca30 R15: 0000000000001000 > FS: 00007fea3f7486e0(0000) GS:ffff88012ecffcc0(0000) knlGS:00000000f7fa38d0 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 00007f651b793000 CR3: 00000000c64be000 CR4: 00000000000006e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process rm (pid: 20535, threadinfo ffff8800acd6c000, task ffff88012ed8c970) > Stack: > 0000000000000000 ffff8800acd6d4f8 0000000000000000 0000000000000000 > ffffe20000000024 ffff880000000000 0000000000000000 0000000000000000 > ffff88010c0c4000 0000000000001000 ffff8800acd6d4f8 ffff88012a516000 > Call Trace: > [<ffffffffa0421005>] btrfs_alloc_extent+0x48/0xa7 [btrfs] > [<ffffffffa04210c0>] btrfs_alloc_free_block+0x5c/0x87 [btrfs] > [<ffffffffa04155e1>] __btrfs_cow_block+0x1e7/0x82e [btrfs] > [<ffffffffa04162d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] > [<ffffffffa0419cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] > [<ffffffffa041add4>] btrfs_insert_some_items+0xb2/0x570 [btrfs] > [<ffffffff80279fa3>] ? unlock_page+0x22/0x27 > [<ffffffffa04482df>] ? set_extent_buffer_dirty+0x102/0x131 [btrfs] > [<ffffffffa041f50b>] insert_extents+0x162/0x36f [btrfs] > [<ffffffffa0420105>] finish_current_insert+0x4de/0x553 [btrfs] > [<ffffffffa04228dc>] __btrfs_inc_extent_ref+0x1b5/0x1e4 [btrfs] > [<ffffffff8034a2fe>] ? generic_swap+0x0/0x1c > [<ffffffffa04256fb>] btrfs_inc_ref+0x42d/0x4ce [btrfs] > [<ffffffffa0422727>] ? __btrfs_inc_extent_ref+0x0/0x1e4 [btrfs] > [<ffffffffa0445c95>] ? copy_extent_buffer+0x115/0x149 [btrfs] > [<ffffffffa0415834>] __btrfs_cow_block+0x43a/0x82e [btrfs] > [<ffffffffa04162d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] > [<ffffffffa0419cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] > [<ffffffff8034d5e8>] ? crc32c+0x4c/0x60 > [<ffffffffa0427d8d>] btrfs_lookup_dir_item+0x71/0xe7 [btrfs] > [<ffffffffa04337e4>] btrfs_unlink_inode+0x60/0x26f [btrfs] > [<ffffffffa042f0e5>] ? start_transaction+0xf9/0x105 [btrfs] > [<ffffffffa0433dbc>] btrfs_unlink+0x55/0x97 [btrfs] > [<ffffffff802af53b>] vfs_unlink+0xb9/0x12c > [<ffffffff802b197e>] do_unlinkat+0xc1/0x173 > [<ffffffff802b4063>] ? vfs_readdir+0x9a/0xaf > [<ffffffff8026855b>] ? audit_syscall_entry+0x16b/0x19e > [<ffffffff802b1a41>] sys_unlink+0x11/0x13 > [<ffffffff8020c05b>] system_call_fastpath+0x16/0x1b > Code: 00 48 81 eb b8 00 00 00 48 8b 83 b8 00 00 00 48 8d 93 b8 00 00 00 0f 18 08 49 8d 44 24 58 48 39 c2 75 9f 4c 89 f7 e8 29 1e e3 df <0f> 0b eb fe 48 83 c4 48 31 c0 5b 41 5c 41 5d 41 5e 41 5f c9 c3 > RIP [<ffffffffa041e529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > RSP <ffff8800acd6d3d8> > ---[ end trace b5c34207a1ecc394 ]--- > > And, I received the following messages when I utime(2) files. > > btrfs searching for 4096 bytes, num_bytes 4096, loop 2, allowed_alloc 1 > btrfs allocation failed flags 36, wanted 4096 > space_info has 65536 free, is full > space_info total=1932853248, pinned=54943744, delalloc=0, may_use=0, used=1877843968 > block group 29360128 has 1073741824 bytes, 1041494016 used 32182272 pinned 0 reserved > 0 blocks of free space at or bigger than bytes is > block group 8619294720 has 859111424 bytes, 836349952 used 22761472 pinned 0 reserved > 0 blocks of free space at or bigger than bytes is > ------------[ cut here ]------------ > kernel BUG at fs/btrfs/extent-tree.c:3360! > invalid opcode: 0000 [#1] SMP > last sysfs file: /sys/devices/pci0000:80/0000:80:0e.0/0000:84:00.0/local_cpus > CPU 3 > Modules linked in: btrfs zlib_deflate autofs4 hidp rfcomm l2cap bluetooth sunrpc ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib inet_lro rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ib_sa ib_mad ib_core dm_mirror dm_region_hash dm_log dm_multipath dm_mod sbs sbshc battery acpi_memhotplug ac ipv6 lp floppy snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss k8temp button sg snd_pcm i2c_amd8111 snd_timer snd serio_raw parport_pc rtc_cmos tg3 i2c_amd756 soundcore rtc_core parport hwmon libphy i2c_core ide_cd_mod cdrom rtc_lib amd_rng pcspkr snd_page_alloc shpchp sata_sil libata aic79xx scsi_transport_spi sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd > Pid: 14421, comm: mkfbmt Tainted: G M 2.6.29-rc5 #1 -[62176J7]- > RIP: 0010:[<ffffffffa0407529>] [<ffffffffa0407529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > RSP: 0018:ffff88010010d968 EFLAGS: 00010246 > RAX: ffff880026bed8f8 RBX: ffff880026bed820 RCX: 0000000000008689 > RDX: 0000000000005959 RSI: 0000000000000246 RDI: ffff880026bed8f4 > RBP: ffff88010010d9d8 R08: 0000000000000000 R09: 00000000ffffff00 > R10: 000000000000000a R11: 0000000000000000 R12: ffff880026bed880 > R13: 0000000000001000 R14: ffff880026bed8f0 R15: 0000000000001000 > FS: 00007fb97df446e0(0000) GS:ffff88012ecffcc0(0000) knlGS:00000000f7f4f8d0 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 000000342e614ce5 CR3: 000000011557b000 CR4: 00000000000006e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process mkfbmt (pid: 14421, threadinfo ffff88010010c000, task ffff88011f9738d0) > Stack: > 0000000000000000 ffff88010010da88 0000000000000000 0000000000000000 > ffff880100000024 ffff88010010dad4 0000000000000000 0000000000000000 > ffff8801145c0000 0000000000001000 ffff88010010da88 ffff8801149cd000 > Call Trace: > [<ffffffffa040a005>] btrfs_alloc_extent+0x48/0xa7 [btrfs] > [<ffffffffa040a0c0>] btrfs_alloc_free_block+0x5c/0x87 [btrfs] > [<ffffffffa03fe5e1>] __btrfs_cow_block+0x1e7/0x82e [btrfs] > [<ffffffffa03ff2d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] > [<ffffffffa0402cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] > [<ffffffffa0419b89>] ? btrfs_lookup_dentry+0x159/0x16f [btrfs] > [<ffffffff802cd6a1>] ? inotify_d_instantiate+0x1a/0x43 > [<ffffffff802b6984>] ? d_splice_alias+0xc4/0xd0 > [<ffffffffa0412a8c>] btrfs_lookup_inode+0x2c/0x90 [btrfs] > [<ffffffffa041bb13>] btrfs_update_inode+0x46/0xbd [btrfs] > [<ffffffffa041c770>] btrfs_dirty_inode+0x3e/0x52 [btrfs] > [<ffffffff802c0f3c>] __mark_inode_dirty+0x33/0x178 > [<ffffffff802b998c>] inode_setattr+0x11b/0x125 > [<ffffffffa041ace0>] btrfs_setattr+0x56/0x6e [btrfs] > [<ffffffff802b9b2a>] notify_change+0x194/0x2d6 > [<ffffffff802c3969>] utimes_common+0x121/0x163 > [<ffffffff802c3a64>] do_utimes+0xb9/0xda > [<ffffffff802c3c0f>] sys_utime+0x66/0x7b > [<ffffffff8020c05b>] system_call_fastpath+0x16/0x1b > Code: 00 48 81 eb b8 00 00 00 48 8b 83 b8 00 00 00 48 8d 93 b8 00 00 00 0f 18 08 49 8d 44 24 58 48 39 c2 75 9f 4c 89 f7 e8 29 8e e4 df <0f> 0b eb fe 48 83 c4 48 31 c0 5b 41 5c 41 5d 41 5e 41 5f c9 c3 > RIP [<ffffffffa0407529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > RSP <ffff88010010d968> > ---[ end trace 16075f4743156c16 ]--- > > Thanks, regards, >Thank you, what tool are you using to create these files and such? Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Yoshihiro Takahashi
2009-Feb-19 08:45 UTC
Re: [PATCH] btrfs: add better -ENOSPC handling VERSION 3
Hi Josef.> Thank you, what tool are you using to create these files and such?I use an original tool. I do not make the document. ex. ./mkfbmt -d /btrfs -f 10000000 -x 10000 This makes a file under 10,000 directories with 10,000,000 files in /btrfs. make 10,000 files under 100 directories each. 100dir * 10000files = 10000000 /btrfs/0/0 /btrfs/0/1 ... /btrfs/0/9998 /btrfs/0/9999 /btrfs/1/0 ... /btrfs/99/9999 defaults file size is zero byte. When make a file of 4,096 bytes, please appoint -w 4096. defaults option is all test. -m mkdir -c open(O_CREAT | O_EXCL | O_WRONLY) test -o open test -p utime test -s stat test -u unlink test -r rmdir # ./mkfbmt -d /btrfs/ -f 1000000 -x 10000 ## __linuxfs_init(91) [0x401fc8] mkfbmt 100 directry 1000000 files , one directry 10000 files make files for /btrfs mkdir : 0.004923 sec , 1 file mkdir for 0.000000 sec creat : 179.155566 sec , 1 file creat for 0.000179 sec open : 26.696062 sec , 1 file open for 0.000027 sec utime : 55.143818 sec , 1 file utime for 0.000055 sec stat : 4.692097 sec , 1 file stat for 0.000005 sec unlink: 210.624820 sec , 1 file unlink for 0.000211 sec rmdir : 0.806472 sec , 1 file rmdir for 0.000001 sec file creat open utime stat unlink 50000 4.08 1.18 2.17 0.13 14.18 100000 10.70 1.86 4.78 0.25 22.57 150000 55.87 2.42 7.57 0.37 30.99 200000 63.74 3.23 9.81 0.49 42.35 250000 69.72 4.93 12.16 0.60 55.71 300000 76.37 7.16 14.56 0.72 64.03 350000 83.60 10.34 17.01 0.83 91.18 400000 90.23 15.36 19.60 0.94 103.36 450000 97.47 20.07 22.17 1.06 111.07 500000 103.99 22.14 24.56 1.38 118.84 550000 110.85 22.36 26.93 1.88 126.66 600000 118.49 22.58 29.70 2.30 136.47 650000 125.26 23.08 32.05 2.50 148.96 700000 132.34 23.65 34.28 3.01 156.62 750000 138.89 24.22 36.80 3.54 164.49 800000 146.38 24.78 44.05 4.05 172.23 850000 155.14 25.34 46.81 4.28 180.48 900000 163.21 26.25 49.85 4.41 195.99 950000 171.81 26.47 52.22 4.55 203.47 1000000 179.16 26.70 55.14 4.69 210.62 Best regards, On Tue, 17 Feb 2009 08:42:19 -0500 Josef Bacik <josef@redhat.com> wrote:> On Tue, Feb 17, 2009 at 06:21:10PM +0900, Yoshihiro Takahashi wrote: > > Hi Josef. > > > > I created file till return of ENOSPC. > > I created 18874027 files of size zero on btrfs of 16Gbyte in 1609 seconds. > > I test to cause ENOSPC-problem but there was not the problem. > > ENOSPC-problem were fixed in linux-2.6.29-rc5 and this patch. > > This is a good patch. > > > > Next. > > I created 2758939 files of size 4096 on btrfs of 16Gbyte in 431 seconds. > > And I received the following messages when I unlink files. > > > > btrfs searching for 4096 bytes, num_bytes 4096, loop 2, allowed_alloc 0 > > btrfs allocation failed flags 36, wanted 4096 > > space_info has 65536 free, is full > > space_info total=1932853248, pinned=164216832, delalloc=0, may_use=0, used=1768570880 > > block group 29360128 has 1073741824 bytes, 942583808 used 131092480 pinned 0 reserved > > 0 blocks of free space at or bigger than bytes is > > block group 8619294720 has 859111424 bytes, 825987072 used 33124352 pinned 0 reserved > > 0 blocks of free space at or bigger than bytes is > > ------------[ cut here ]------------ > > kernel BUG at fs/btrfs/extent-tree.c:3360! > > invalid opcode: 0000 [#1] SMP > > last sysfs file: /sys/devices/pci0000:80/0000:80:0e.0/0000:84:00.0/local_cpus > > CPU 3 > > Modules linked in: btrfs zlib_deflate nls_utf8 hfsplus autofs4 hidp rfcomm l2cap bluetooth sunrpc ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib inet_lro rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ib_sa ib_mad ib_core dm_mirror dm_region_hash dm_log dm_multipath dm_mod sbs sbshc battery acpi_memhotplug ac ipv6 lp floppy snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss sg rtc_cmos snd_mixer_oss snd_pcm snd_timer rtc_core parport_pc tg3 libphy ide_cd_mod snd button parport cdrom serio_raw rtc_lib i2c_amd8111 i2c_amd756 k8temp soundcore hwmon snd_page_alloc shpchp amd_rng i2c_core pcspkr sata_sil libata aic79xx scsi_transport_spi sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd> > Pid: 20535, comm: rm Tainted: G M 2.6.29-rc5 #1 -[62176J7]- > > RIP: 0010:[<ffffffffa041e529>] [<ffffffffa041e529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > > RSP: 0018:ffff8800acd6d3d8 EFLAGS: 00010246 > > RAX: ffff88011996ca38 RBX: ffff88011996c960 RCX: 0000000000008610 > > RDX: 000000000000e1e1 RSI: 0000000000000246 RDI: ffff88011996ca34 > > RBP: ffff8800acd6d448 R08: 0000000000000000 R09: 00000000ffffff00 > > R10: 000000000000000a R11: 0000000000000000 R12: ffff88011996c9c0 > > R13: 0000000000001000 R14: ffff88011996ca30 R15: 0000000000001000 > > FS: 00007fea3f7486e0(0000) GS:ffff88012ecffcc0(0000) knlGS:00000000f7fa38d0 > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 00007f651b793000 CR3: 00000000c64be000 CR4: 00000000000006e0 > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > > Process rm (pid: 20535, threadinfo ffff8800acd6c000, task ffff88012ed8c970) > > Stack: > > 0000000000000000 ffff8800acd6d4f8 0000000000000000 0000000000000000 > > ffffe20000000024 ffff880000000000 0000000000000000 0000000000000000 > > ffff88010c0c4000 0000000000001000 ffff8800acd6d4f8 ffff88012a516000 > > Call Trace: > > [<ffffffffa0421005>] btrfs_alloc_extent+0x48/0xa7 [btrfs] > > [<ffffffffa04210c0>] btrfs_alloc_free_block+0x5c/0x87 [btrfs] > > [<ffffffffa04155e1>] __btrfs_cow_block+0x1e7/0x82e [btrfs] > > [<ffffffffa04162d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] > > [<ffffffffa0419cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] > > [<ffffffffa041add4>] btrfs_insert_some_items+0xb2/0x570 [btrfs] > > [<ffffffff80279fa3>] ? unlock_page+0x22/0x27 > > [<ffffffffa04482df>] ? set_extent_buffer_dirty+0x102/0x131 [btrfs] > > [<ffffffffa041f50b>] insert_extents+0x162/0x36f [btrfs] > > [<ffffffffa0420105>] finish_current_insert+0x4de/0x553 [btrfs] > > [<ffffffffa04228dc>] __btrfs_inc_extent_ref+0x1b5/0x1e4 [btrfs] > > [<ffffffff8034a2fe>] ? generic_swap+0x0/0x1c > > [<ffffffffa04256fb>] btrfs_inc_ref+0x42d/0x4ce [btrfs] > > [<ffffffffa0422727>] ? __btrfs_inc_extent_ref+0x0/0x1e4 [btrfs] > > [<ffffffffa0445c95>] ? copy_extent_buffer+0x115/0x149 [btrfs] > > [<ffffffffa0415834>] __btrfs_cow_block+0x43a/0x82e [btrfs] > > [<ffffffffa04162d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] > > [<ffffffffa0419cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] > > [<ffffffff8034d5e8>] ? crc32c+0x4c/0x60 > > [<ffffffffa0427d8d>] btrfs_lookup_dir_item+0x71/0xe7 [btrfs] > > [<ffffffffa04337e4>] btrfs_unlink_inode+0x60/0x26f [btrfs] > > [<ffffffffa042f0e5>] ? start_transaction+0xf9/0x105 [btrfs] > > [<ffffffffa0433dbc>] btrfs_unlink+0x55/0x97 [btrfs] > > [<ffffffff802af53b>] vfs_unlink+0xb9/0x12c > > [<ffffffff802b197e>] do_unlinkat+0xc1/0x173 > > [<ffffffff802b4063>] ? vfs_readdir+0x9a/0xaf > > [<ffffffff8026855b>] ? audit_syscall_entry+0x16b/0x19e > > [<ffffffff802b1a41>] sys_unlink+0x11/0x13 > > [<ffffffff8020c05b>] system_call_fastpath+0x16/0x1b > > Code: 00 48 81 eb b8 00 00 00 48 8b 83 b8 00 00 00 48 8d 93 b8 00 00 00 0f 18 08 49 8d 44 24 58 48 39 c2 75 9f 4c 89 f7 e8 29 1e e3 df <0f> 0b eb fe 48 83 c4 48 31 c0 5b 41 5c 41 5d 41 5e 41 5f c9 c3 > > RIP [<ffffffffa041e529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > > RSP <ffff8800acd6d3d8> > > ---[ end trace b5c34207a1ecc394 ]--- > > > > And, I received the following messages when I utime(2) files. > > > > btrfs searching for 4096 bytes, num_bytes 4096, loop 2, allowed_alloc 1 > > btrfs allocation failed flags 36, wanted 4096 > > space_info has 65536 free, is full > > space_info total=1932853248, pinned=54943744, delalloc=0, may_use=0, used=1877843968 > > block group 29360128 has 1073741824 bytes, 1041494016 used 32182272 pinned 0 reserved > > 0 blocks of free space at or bigger than bytes is > > block group 8619294720 has 859111424 bytes, 836349952 used 22761472 pinned 0 reserved > > 0 blocks of free space at or bigger than bytes is > > ------------[ cut here ]------------ > > kernel BUG at fs/btrfs/extent-tree.c:3360! > > invalid opcode: 0000 [#1] SMP > > last sysfs file: /sys/devices/pci0000:80/0000:80:0e.0/0000:84:00.0/local_cpus > > CPU 3 > > Modules linked in: btrfs zlib_deflate autofs4 hidp rfcomm l2cap bluetooth sunrpc ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib inet_lro rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ib_sa ib_mad ib_core dm_mirror dm_region_hash dm_log dm_multipath dm_mod sbs sbshc battery acpi_memhotplug ac ipv6 lp floppy snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss k8temp button sg snd_pcm i2c_amd8111 snd_timer snd serio_raw parport_pc rtc_cmos tg3 i2c_amd756 soundcore rtc_core parport hwmon libphy i2c_core ide_cd_mod cdrom rtc_lib amd_rng pcspkr snd_page_alloc shpchp sata_sil libata aic79xx scsi_transport_spi sd_mod scsi_mod ext3 jbd uhci_hcd ohci_hcd ehci_hcd > > Pid: 14421, comm: mkfbmt Tainted: G M 2.6.29-rc5 #1 -[62176J7]- > > RIP: 0010:[<ffffffffa0407529>] [<ffffffffa0407529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > > RSP: 0018:ffff88010010d968 EFLAGS: 00010246 > > RAX: ffff880026bed8f8 RBX: ffff880026bed820 RCX: 0000000000008689 > > RDX: 0000000000005959 RSI: 0000000000000246 RDI: ffff880026bed8f4 > > RBP: ffff88010010d9d8 R08: 0000000000000000 R09: 00000000ffffff00 > > R10: 000000000000000a R11: 0000000000000000 R12: ffff880026bed880 > > R13: 0000000000001000 R14: ffff880026bed8f0 R15: 0000000000001000 > > FS: 00007fb97df446e0(0000) GS:ffff88012ecffcc0(0000) knlGS:00000000f7f4f8d0 > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 000000342e614ce5 CR3: 000000011557b000 CR4: 00000000000006e0 > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > > Process mkfbmt (pid: 14421, threadinfo ffff88010010c000, task ffff88011f9738d0) > > Stack: > > 0000000000000000 ffff88010010da88 0000000000000000 0000000000000000 > > ffff880100000024 ffff88010010dad4 0000000000000000 0000000000000000 > > ffff8801145c0000 0000000000001000 ffff88010010da88 ffff8801149cd000 > > Call Trace: > > [<ffffffffa040a005>] btrfs_alloc_extent+0x48/0xa7 [btrfs] > > [<ffffffffa040a0c0>] btrfs_alloc_free_block+0x5c/0x87 [btrfs] > > [<ffffffffa03fe5e1>] __btrfs_cow_block+0x1e7/0x82e [btrfs] > > [<ffffffffa03ff2d9>] btrfs_cow_block+0x1ed/0x1fc [btrfs] > > [<ffffffffa0402cb2>] btrfs_search_slot+0x31a/0x90f [btrfs] > > [<ffffffffa0419b89>] ? btrfs_lookup_dentry+0x159/0x16f [btrfs] > > [<ffffffff802cd6a1>] ? inotify_d_instantiate+0x1a/0x43 > > [<ffffffff802b6984>] ? d_splice_alias+0xc4/0xd0 > > [<ffffffffa0412a8c>] btrfs_lookup_inode+0x2c/0x90 [btrfs] > > [<ffffffffa041bb13>] btrfs_update_inode+0x46/0xbd [btrfs] > > [<ffffffffa041c770>] btrfs_dirty_inode+0x3e/0x52 [btrfs] > > [<ffffffff802c0f3c>] __mark_inode_dirty+0x33/0x178 > > [<ffffffff802b998c>] inode_setattr+0x11b/0x125 > > [<ffffffffa041ace0>] btrfs_setattr+0x56/0x6e [btrfs] > > [<ffffffff802b9b2a>] notify_change+0x194/0x2d6 > > [<ffffffff802c3969>] utimes_common+0x121/0x163 > > [<ffffffff802c3a64>] do_utimes+0xb9/0xda > > [<ffffffff802c3c0f>] sys_utime+0x66/0x7b > > [<ffffffff8020c05b>] system_call_fastpath+0x16/0x1b > > Code: 00 48 81 eb b8 00 00 00 48 8b 83 b8 00 00 00 48 8d 93 b8 00 00 00 0f 18 08 49 8d 44 24 58 48 39 c2 75 9f 4c 89 f7 e8 29 8e e4 df <0f> 0b eb fe 48 83 c4 48 31 c0 5b 41 5c 41 5d 41 5e 41 5f c9 c3 > > RIP [<ffffffffa0407529>] __btrfs_reserve_extent+0x25b/0x270 [btrfs] > > RSP <ffff88010010d968> > > ---[ end trace 16075f4743156c16 ]--- > > > > Thanks, regards, > > > > Thank you, what tool are you using to create these files and such? > > Josef
Josef Bacik
2009-Feb-19 13:30 UTC
Re: [PATCH] btrfs: add better -ENOSPC handling VERSION 3
On Thu, Feb 19, 2009 at 05:45:54PM +0900, Yoshihiro Takahashi wrote:> Hi Josef. > > > Thank you, what tool are you using to create these files and such? > > I use an original tool. > I do not make the document. > > ex. > ./mkfbmt -d /btrfs -f 10000000 -x 10000 > This makes a file under 10,000 directories with 10,000,000 files in /btrfs. > > make 10,000 files under 100 directories each. > 100dir * 10000files = 10000000 > > /btrfs/0/0 > /btrfs/0/1 > ... > /btrfs/0/9998 > /btrfs/0/9999 > /btrfs/1/0 > ... > /btrfs/99/9999 > > defaults file size is zero byte. > When make a file of 4,096 bytes, please appoint -w 4096. > defaults option is all test. > -m mkdir > -c open(O_CREAT | O_EXCL | O_WRONLY) test > -o open test > -p utime test > -s stat test > -u unlink test > -r rmdir > > # ./mkfbmt -d /btrfs/ -f 1000000 -x 10000 > ## __linuxfs_init(91) [0x401fc8] > mkfbmt 100 directry 1000000 files , one directry 10000 files > make files for /btrfs > > mkdir : 0.004923 sec , 1 file mkdir for 0.000000 sec > creat : 179.155566 sec , 1 file creat for 0.000179 sec > open : 26.696062 sec , 1 file open for 0.000027 sec > utime : 55.143818 sec , 1 file utime for 0.000055 sec > stat : 4.692097 sec , 1 file stat for 0.000005 sec > unlink: 210.624820 sec , 1 file unlink for 0.000211 sec > rmdir : 0.806472 sec , 1 file rmdir for 0.000001 sec > file creat open utime stat unlink > 50000 4.08 1.18 2.17 0.13 14.18 > 100000 10.70 1.86 4.78 0.25 22.57 > 150000 55.87 2.42 7.57 0.37 30.99 > 200000 63.74 3.23 9.81 0.49 42.35 > 250000 69.72 4.93 12.16 0.60 55.71 > 300000 76.37 7.16 14.56 0.72 64.03 > 350000 83.60 10.34 17.01 0.83 91.18 > 400000 90.23 15.36 19.60 0.94 103.36 > 450000 97.47 20.07 22.17 1.06 111.07 > 500000 103.99 22.14 24.56 1.38 118.84 > 550000 110.85 22.36 26.93 1.88 126.66 > 600000 118.49 22.58 29.70 2.30 136.47 > 650000 125.26 23.08 32.05 2.50 148.96 > 700000 132.34 23.65 34.28 3.01 156.62 > 750000 138.89 24.22 36.80 3.54 164.49 > 800000 146.38 24.78 44.05 4.05 172.23 > 850000 155.14 25.34 46.81 4.28 180.48 > 900000 163.21 26.25 49.85 4.41 195.99 > 950000 171.81 26.47 52.22 4.55 203.47 > 1000000 179.16 26.70 55.14 4.69 210.62 > > Best regards, >That is perfect thank you, I will add it to my enospc tests. I''m working on a patch which should fix your problems, I will send it out later today after I run your tests on it. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html