The old extent merging code down underneath "ocfs2_mark_extent_written()" can't merge extents between leaf blocks. This patch resolve this. So that a large unwritten extent which has been split up with a bunch of writes can be merged together once all unwritten regions have been written to.
Tao Ma
2008-Jan-04 00:45 UTC
[Ocfs2-devel] [PATCH 1/2] Add support for cross extent block merge.V1
In ocfs2_merge_rec_left, when we find the merge extent is "CONTIG_RIGHT" with the first extent record of the next extent block, we will merge it to the next extent block and change all the related extent blocks accordingly. In ocfs2_merge_rec_right, when we find the merge extent is "CONTIG_LEFT" with the last extent record of the previous extent block, we will merge it to the prevoius extent block and change all the related extent blocks accordingly. As for CONTIG_LEFTRIGHT, we will handle CONTIG_RIGHT first when the split_index is zero since it is more efficient and easier. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- fs/ocfs2/alloc.c | 367 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 332 insertions(+), 35 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 23c8cda..c256750 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1450,6 +1450,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, * - When our insert into the right path leaf is at the leftmost edge * and requires an update of the path immediately to it's left. This * can occur at the end of some types of rotation and appending inserts. + * - When we've adjusted the last extent record in the left path leaf and the + * 1st extent record in the right path leaf during cross extent block merge. */ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, struct ocfs2_path *left_path, @@ -2712,24 +2714,119 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, } } +static int ocfs2_get_right_path(struct inode *inode, + struct ocfs2_path *left_path, + struct ocfs2_path **ret_right_path) +{ + int ret; + u32 right_cpos; + struct ocfs2_path *right_path = NULL; + struct ocfs2_extent_list *left_el; + + *ret_right_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(left_path->p_tree_depth == 0); + + left_el = path_leaf_el(left_path); + BUG_ON(left_el->l_next_free_rec != left_el->l_count); + + ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, + &right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the rightmost leaf. */ + BUG_ON(right_cpos == 0); + + right_path = ocfs2_new_path(path_root_bh(left_path), + path_root_el(left_path)); + if (!right_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, right_path, right_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_right_path = right_path; +out: + if (ret) + ocfs2_free_path(right_path); + return ret; +} + /* * Remove split_rec clusters from the record at index and merge them * onto the beginning of the record at index + 1. */ -static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, - handle_t *handle, - struct ocfs2_extent_rec *split_rec, - struct ocfs2_extent_list *el, int index) +static int ocfs2_merge_rec_right(struct inode *inode, + struct ocfs2_path *left_path, + handle_t *handle, + struct ocfs2_extent_rec *split_rec, + struct ocfs2_extent_list *el, int index) { - int ret; + int ret, next_free, i; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct ocfs2_extent_list *right_el; + struct ocfs2_path *right_path = NULL; + int subtree_index = 0; + struct buffer_head *bh = path_leaf_bh(left_path); + struct buffer_head *root_bh = NULL; BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); - left_rec = &el->l_recs[index]; - right_rec = &el->l_recs[index + 1]; + + if (index == le16_to_cpu(el->l_next_free_rec - 1) && + le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_right_path(inode, left_path, &right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + right_el = path_leaf_el(right_path); + next_free = le16_to_cpu(right_el->l_next_free_rec); + BUG_ON(next_free <= 0); + right_rec = &right_el->l_recs[0]; + if (ocfs2_is_empty_extent(right_rec)) { + BUG_ON(le16_to_cpu(next_free) <= 1); + right_rec = &right_el->l_recs[1]; + } + + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) !+ le32_to_cpu(right_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + right_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + path_leaf_bh(right_path), + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } else + right_rec = &el->l_recs[index + 1]; ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2751,7 +2848,90 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, if (ret) mlog_errno(ret); + if (right_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); + if (ret) + mlog_errno(ret); + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } out: + if (right_path) + ocfs2_free_path(right_path); + return ret; +} + +static int ocfs2_get_left_path(struct inode *inode, + struct ocfs2_path *right_path, + struct ocfs2_path **ret_left_path) +{ + int ret; + u32 left_cpos; + struct ocfs2_path *left_path = NULL; + + *ret_left_path = NULL; + + /* This function shouldn't be called for non-trees. */ + BUG_ON(right_path->p_tree_depth == 0); + + ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, + right_path, &left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* This function shouldn't be called for the leftmost leave. */ + BUG_ON(left_cpos == 0); + + left_path = ocfs2_new_path(path_root_bh(right_path), + path_root_el(right_path)); + if (!left_path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_path(inode, left_path, left_cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + *ret_left_path = left_path; +out: + if (ret) + ocfs2_free_path(left_path); return ret; } @@ -2759,22 +2939,64 @@ out: * Remove split_rec clusters from the record at index and merge them * onto the tail of the record at index - 1. */ -static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, +static int ocfs2_merge_rec_left(struct inode *inode, + struct ocfs2_path *right_path, handle_t *handle, struct ocfs2_extent_rec *split_rec, struct ocfs2_extent_list *el, int index) { - int ret, has_empty_extent = 0; + int ret, i, subtree_index = 0, has_empty_extent = 0; unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); struct ocfs2_extent_rec *left_rec; struct ocfs2_extent_rec *right_rec; + struct buffer_head *bh = path_leaf_bh(right_path); + struct buffer_head *root_bh = NULL; + struct ocfs2_path *left_path = NULL; + struct ocfs2_extent_list *left_el; - BUG_ON(index <= 0); + BUG_ON(index < 0); - left_rec = &el->l_recs[index - 1]; right_rec = &el->l_recs[index]; - if (ocfs2_is_empty_extent(&el->l_recs[0])) - has_empty_extent = 1; + if (index == 0) { + /* we meet with a cross extent block merge. */ + ret = ocfs2_get_left_path(inode, right_path, &left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + left_el = path_leaf_el(left_path); + BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !+ le16_to_cpu(left_el->l_count)); + + left_rec = &left_el->l_recs[le16_to_cpu(left_el->l_count) - 1]; + BUG_ON(le32_to_cpu(left_rec->e_cpos) + + le16_to_cpu(left_rec->e_leaf_clusters) !+ le32_to_cpu(split_rec->e_cpos)); + + subtree_index = ocfs2_find_subtree_root(inode, + left_path, right_path); + + ret = ocfs2_extend_rotate_transaction(handle, subtree_index, + handle->h_buffer_credits, + left_path); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + path_leaf_bh(left_path), + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } else { + left_rec = &el->l_recs[index - 1]; + if (ocfs2_is_empty_extent(&el->l_recs[0])) + has_empty_extent = 1; + } ret = ocfs2_journal_access(handle, inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -2802,24 +3024,77 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, ocfs2_cleanup_merge(el, index); ret = ocfs2_journal_dirty(handle, bh); - if (ret) + if (ret) { mlog_errno(ret); + goto out; + } + if (left_path) { + ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); + if (ret) { + mlog_errno(ret); + goto out; + } + + /* + * In the situation that the right_rec is empty and the extent + * block is empty also, ocfs2_complete_edge_insert can't handle + * it, while ocfs2_rotate_tree_left can be called and the extent + * block will be deleted since c_split_covers_rec is set. + */ + if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && + le16_to_cpu(el->l_next_free_rec) == 1) + goto out; + + root_bh = left_path->p_node[subtree_index].bh; + BUG_ON(root_bh != right_path->p_node[subtree_index].bh); + + ret = ocfs2_journal_access(handle, inode, root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + for (i = subtree_index + 1; + i < path_num_items(right_path); i++) { + ret = ocfs2_journal_access(handle, inode, + right_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, + left_path->p_node[i].bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + ocfs2_complete_edge_insert(inode, handle, left_path, + right_path, subtree_index); + } out: + if (left_path) + ocfs2_free_path(left_path); return ret; } static int ocfs2_try_to_merge_extent(struct inode *inode, handle_t *handle, - struct ocfs2_path *left_path, + struct ocfs2_path *path, int split_index, struct ocfs2_extent_rec *split_rec, struct ocfs2_cached_dealloc_ctxt *dealloc, struct ocfs2_merge_ctxt *ctxt) { - int ret = 0; - struct ocfs2_extent_list *el = path_leaf_el(left_path); + int ret = 0, left_first = 1; + struct ocfs2_extent_list *el = path_leaf_el(path); struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; BUG_ON(ctxt->c_contig_type == CONTIG_NONE); @@ -2832,7 +3107,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * extents - having more than one in a leaf is * illegal. */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); @@ -2847,7 +3122,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Left-right contig implies this. */ BUG_ON(!ctxt->c_split_covers_rec); - BUG_ON(split_index == 0); /* * Since the leftright insert always covers the entire @@ -2858,9 +3132,21 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * Since the adding of an empty extent shifts * everything back to the right, there's no need to * update split_index here. - */ - ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), - handle, split_rec, el, split_index); + * + * When the split_index is zero, we need to merge it to the + * prevoius extent block. It is more efficient and easier + * if we do merge_right first and merge_left later. + */ + if (!split_index) + left_first = 0; + if (left_first) + ret = ocfs2_merge_rec_left(inode, path, + handle, split_rec, + el, split_index); + else + ret = ocfs2_merge_rec_right(inode, path, + handle, split_rec, + el, split_index); if (ret) { mlog_errno(ret); goto out; @@ -2871,24 +3157,35 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, */ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); - /* - * The left merge left us with an empty extent, remove - * it. - */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); + /* The merge left us with an empty extent, remove it. */ + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) { mlog_errno(ret); goto out; } - split_index--; + + /* + * For split_index = 0, we have done merge_right first and + * want to merge it to the previous extent block now. + * So no need to decrease it. + */ + if (left_first) + split_index--; rec = &el->l_recs[split_index]; /* * Note that we don't pass split_rec here on purpose - - * we've merged it into the left side. + * we've merged it into the rec already. */ - ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), - handle, rec, el, split_index); + if (left_first) + ret = ocfs2_merge_rec_right(inode, path, + handle, rec, + el, split_index); + else + ret = ocfs2_merge_rec_left(inode, path, + handle, rec, + el, split_index); + if (ret) { mlog_errno(ret); goto out; @@ -2896,7 +3193,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); /* * Error from this last rotate is not critical, so @@ -2915,7 +3212,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, */ if (ctxt->c_contig_type == CONTIG_RIGHT) { ret = ocfs2_merge_rec_left(inode, - path_leaf_bh(left_path), + path, handle, split_rec, el, split_index); if (ret) { @@ -2924,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, } } else { ret = ocfs2_merge_rec_right(inode, - path_leaf_bh(left_path), + path, handle, split_rec, el, split_index); if (ret) { @@ -2938,7 +3235,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, * The merge may have left an empty extent in * our leaf. Try to rotate it away. */ - ret = ocfs2_rotate_tree_left(inode, handle, left_path, + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); if (ret) mlog_errno(ret); -- 1.5.3.GIT
In ocfs2_figure_merge_contig_type, we judge whether there exists a cross extent block merge and enable it by setting CONTIG_LEFT and CONTIG_RIGHT accordingly. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- fs/ocfs2/alloc.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 68 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index c256750..e3b17ac 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3795,20 +3795,48 @@ out: } static enum ocfs2_contig_type -ocfs2_figure_merge_contig_type(struct inode *inode, +ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, struct ocfs2_extent_list *el, int index, struct ocfs2_extent_rec *split_rec) { - struct ocfs2_extent_rec *rec; + int status; + struct ocfs2_extent_rec *rec = NULL; enum ocfs2_contig_type ret = CONTIG_NONE; + u32 left_cpos, right_cpos; + struct ocfs2_extent_list *new_el; + struct ocfs2_path *left_path = NULL, *right_path = NULL; + + if (index > 0) { + rec = &el->l_recs[index - 1]; + } else if (path->p_tree_depth > 0) { + status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, + path, &left_cpos); + if (status) + goto out; + + if (left_cpos != 0) { + left_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!left_path) + goto out; + + status = ocfs2_find_path(inode, left_path, left_cpos); + if (status) + goto out; + + new_el = path_leaf_el(left_path); + + BUG_ON(le16_to_cpu(new_el->l_next_free_rec) !+ le16_to_cpu(new_el->l_count)); + rec = &new_el->l_recs[le16_to_cpu(new_el->l_count) - 1]; + } + } /* * We're careful to check for an empty extent record here - * the merge code will know what to do if it sees one. */ - - if (index > 0) { - rec = &el->l_recs[index - 1]; + if (rec) { if (index == 1 && ocfs2_is_empty_extent(rec)) { if (split_rec->e_cpos == el->l_recs[index].e_cpos) ret = CONTIG_RIGHT; @@ -3817,10 +3845,36 @@ ocfs2_figure_merge_contig_type(struct inode *inode, } } - if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { + rec = NULL; + if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) + rec = &el->l_recs[index + 1]; + else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && + path->p_tree_depth > 0) { + status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, + path, &right_cpos); + if (status) + goto out; + + if (right_cpos != 0) { + right_path = ocfs2_new_path(path_root_bh(path), + path_root_el(path)); + if (!right_path) + goto out; + + status = ocfs2_find_path(inode, right_path, right_cpos); + if (status) + goto out; + + new_el = path_leaf_el(right_path); + rec = &new_el->l_recs[0]; + if (ocfs2_is_empty_extent(rec)) + goto out; + } + } + + if (rec) { enum ocfs2_contig_type contig_type; - rec = &el->l_recs[index + 1]; contig_type = ocfs2_extent_contig(inode, rec, split_rec); if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) @@ -3829,6 +3883,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, ret = contig_type; } +out: + if (left_path) + ocfs2_free_path(left_path); + if (right_path) + ocfs2_free_path(right_path); + return ret; } @@ -4291,7 +4351,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, goto out; } - ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, + ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, split_index, split_rec); -- 1.5.3.GIT
Mark Fasheh
2008-Jan-06 13:44 UTC
[Ocfs2-devel] [PATCH 0/2] Unwritten extent merge update,V1
On Fri, Jan 04, 2008 at 04:32:14PM +0800, tao.ma wrote:> The old extent merging code down underneath "ocfs2_mark_extent_written()" > can't merge extents between leaf blocks. This patch resolve this. > So that a large unwritten extent which has been split up with a bunch of > writes can be merged together once all unwritten regions have been written to.So, just to get things started I'll attach a patch which I think you might find useful. It adds a function 'ocfs2_check_tree()' which you can call after marking an extent unwritten. It'll walk the entire btree and look for certain inconsistencies. If one is found, the journal is synced and then the box is paniced. The syncing gives us the chance to inspect the btree after rebooting the box. Last time I was hacking on the tree code, I modified ocfs2-test/programs/fill_verify_holes/old_burn-in.sh to call fill_verify_holes with '-u' (for unwritten extents) and ran the script against a 512 blocksize/4k clustersize file system. The kernel I was running had the tree check patch applied. This setup helped me catch the vast majority of bugs which I had introduced by my changes. These days, I'd also disable the extent map cache (which wasn't around then) as it would also hide some problems. By the way - the page cache can also hide problems by allowing the "verify" program to read cached data which it wouldn't actually be able to find if the btree was actually searched. I think we can might be able to minimize this by inserting: echo 1 > /proc/sys/vm/drop_caches right before the verify step in old_burn-in.sh. Ultimately of course, you can do whatever you want for testing, but I'd definitely like to get a detailed description of what you did or what you're planning to do in that area. I'm paranoid about btree changes, even though I'm really happy that you took this task on ;) --Mark -- Mark Fasheh Principal Software Developer, Oracle mark.fasheh@oracle.com diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index cf30761..0bebced 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -49,6 +49,8 @@ #include "uptodate.h" #include "buffer_head_io.h" +static void ocfs2_check_tree(struct inode *inode); + static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, struct ocfs2_extent_block *eb); @@ -3782,6 +3784,9 @@ int ocfs2_insert_extent(struct ocfs2_sup else ocfs2_extent_map_insert_rec(inode, &rec); + if (status == 0) + ocfs2_check_tree(inode); + bail: if (bh) brelse(bh); @@ -4134,6 +4139,9 @@ int ocfs2_mark_extent_written(struct ino if (ret) mlog_errno(ret); + if (ret == 0) + ocfs2_check_tree(inode); + out: ocfs2_free_path(left_path); return ret; @@ -4501,6 +4509,9 @@ int ocfs2_remove_extent(struct inode *in } } + if (ret == 0) + ocfs2_check_tree(inode); + out: ocfs2_free_path(path); return ret; @@ -6122,3 +6133,177 @@ static void ocfs2_free_truncate_context( kfree(tc); } + +#define INDEX_DEPTH (OCFS2_MAX_PATH_DEPTH - 1) + +static void ocfs2_path_error(void) +{ + handle_t *handle = journal_current_handle(); + + handle->h_sync = 1; + journal_stop(handle); + + BUG(); +} + +static void ocfs2_check_path(struct inode *inode, + struct ocfs2_path *path, + int *check_index) +{ + int i, next_free, node; + u32 parent_range, child_range; + struct ocfs2_extent_list *el; + struct ocfs2_extent_rec *rec; + struct ocfs2_extent_rec *rec1; + struct ocfs2_extent_rec *parent_rec; + + for(node = 0; node < path_num_items(path); node++) { + el = path->p_node[node].el; + + next_free = le16_to_cpu(el->l_next_free_rec); + + /* Check for duplicate records */ + /* Check for records in order */ + for(i = 0; i < (next_free - 1); i++) { + rec = &el->l_recs[i]; + rec1 = &el->l_recs[i+1]; + + if (!ocfs2_is_empty_extent(rec) && + le32_to_cpu(rec1->e_cpos) <+ le32_to_cpu(rec->e_cpos)) + goto bad_records; + } + } + + /* Check ranges of records */ + el = path_root_el(path); + parent_rec = &el->l_recs[check_index[0]]; + i = 1; + while (i <= path->p_tree_depth) { + struct ocfs2_extent_rec *child_rec; + + el = path->p_node[i].el; + + parent_range = le32_to_cpu(parent_rec->e_cpos) + + le32_to_cpu(parent_rec->e_int_clusters); + child_range = ocfs2_sum_rightmost_rec(el); + if (child_range > parent_range) + goto bad_range; + + child_rec = &el->l_recs[0]; + + /* + * The leftmost branch is allowed to have a starting + * cpos of zero but children who actually start after + * that. + */ + if (parent_rec->e_cpos && + (le32_to_cpu(parent_rec->e_cpos) < + le32_to_cpu(child_rec->e_cpos))) + goto bad_range; + + parent_rec = &el->l_recs[check_index[i]]; + + i++; + } + + return; + +bad_records: + mlog(ML_ERROR, "Inode %lu: Bad Records\n", + (unsigned long)inode->i_ino); + ocfs2_path_error(); + return; + +bad_range: + mlog(ML_ERROR, "Inode %lu: Bad Range\n", + (unsigned long)inode->i_ino); + mlog(ML_ERROR, "Child block: %llu, Parent rec: (%u, %u, %llu)\n", + (unsigned long long)path->p_node[i].bh->b_blocknr, + le32_to_cpu(parent_rec->e_cpos), + le32_to_cpu(parent_rec->e_int_clusters), + (unsigned long long)le64_to_cpu(parent_rec->e_blkno)); + ocfs2_path_error(); +} + +static void ocfs2_check_tree(struct inode *inode) +{ + int ret, i; + int check_index[INDEX_DEPTH] = {0, }; + u64 blkno; + struct ocfs2_dinode *di; + struct buffer_head *di_bh = NULL; + struct buffer_head *bh; + struct buffer_head *first_tail_bh = NULL; + struct ocfs2_path *path = NULL; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_list *el; + + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, + &di_bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_errno(ret); + goto out; + } + + di = (struct ocfs2_dinode *)di_bh->b_data; + + path = ocfs2_new_inode_path(di_bh); + if (!path) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + +restart: + i = 0; + while (i < path->p_tree_depth) { + el = path->p_node[i].el; + + BUG_ON(check_index[i] >= le16_to_cpu(el->l_next_free_rec)); + + blkno = le64_to_cpu(el->l_recs[check_index[i]].e_blkno); + bh = NULL; + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, + &bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_errno(ret); + goto out; + } + + ocfs2_path_insert_eb(path, i + 1, bh); + + eb = (struct ocfs2_extent_block *) bh->b_data; + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) + BUG(); + + i++; + } + + ocfs2_check_path(inode, path, check_index); + + if (le64_to_cpu(di->i_last_eb_blk) != path_leaf_bh(path)->b_blocknr + && path->p_tree_depth) { + int max; + + for(i = (path->p_tree_depth - 1); i >= 0; i--) { + el = path->p_node[i].el; + + max = le16_to_cpu(el->l_next_free_rec); + + check_index[i]++; + if (check_index[i] >= max) + check_index[i] = 0; + else + break; + } + + ocfs2_reinit_path(path, 1); + goto restart; + } + +out: + ocfs2_free_path(path); + brelse(di_bh); + brelse(first_tail_bh); +}