Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 0/8] Dedicated metadata read functions
The following patch series builds on top of the read_block cleanups to provide dedicated functions to read the various types of ocfs2 metadata. Rather than calling ocfs2_read_block[s]() directly, functions wanting an inode now call ocfs2_read_inode_block(). The same is true for extent blocks, group descriptors, and dirblocks. I will get to xattr blocks later. The older the metadata type, the more inconsistent the block validation was. Sometimes we didn't even check the signature, other times we checked everything. As part of the changes, we make sure all validation happens inside the dedicated function. The rest of the code no longer has to check for valid blocks. The last patch is the best - the validation calls are lifted into ocfs2_read_blocks() via a callback. If validation is requested, it is only performed when the block is actually read from disk. If the block was in cache, no validation is performed. Thus, we can do full validation when a block is read yet not lose performance on subsequent cached reads. Joel
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 1/8] ocfs2: Wrap inode block reads in a dedicated function.
The ocfs2 code currently reads inodes off disk with a simple ocfs2_read_block() call. Each place that does this has a different set of sanity checks it performs. Some check only the signature. A couple validate the block number (the block read vs di->i_blkno). A couple others check for VALID_FL. Only one place validates i_fs_generation. A couple check nothing. Even when an error is found, they don't all do the same thing. We wrap inode reading into ocfs2_read_inode_block(). This will validate all the above fields, going readonly if they are invalid (they never should be). ocfs2_read_inode_block_full() is provided for the places that want to pass read_block flags. Every caller is passing a struct inode with a valid ip_blkno, so we don't need a separate blkno argument either. We will remove the validation checks from the rest of the code in a later commit, as they are no longer necessary. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/aops.c | 11 +--- fs/ocfs2/dir.c | 6 +- fs/ocfs2/dlmglue.c | 12 ++--- fs/ocfs2/extent_map.c | 2 +- fs/ocfs2/file.c | 21 ++------ fs/ocfs2/inode.c | 136 +++++++++++++++++++++++++++++++++++-------------- fs/ocfs2/inode.h | 16 +++++- fs/ocfs2/journal.c | 3 +- fs/ocfs2/localalloc.c | 8 ++-- fs/ocfs2/namei.c | 14 +---- fs/ocfs2/symlink.c | 2 +- 12 files changed, 136 insertions(+), 97 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0cc2deb..6d222d5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5586,7 +5586,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, goto bail; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { iput(inode); mlog_errno(status); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c22543b..e219f8b 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -68,20 +68,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, goto bail; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature); - goto bail; - } - if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { mlog(ML_ERROR, "block offset is outside the allocated size: " @@ -262,7 +255,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 026e6eb..5777045 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -231,7 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name, struct ocfs2_dinode *di; struct ocfs2_inline_data *data; - ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(dir, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -458,7 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle, struct ocfs2_dinode *di; struct ocfs2_inline_data *data; - ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(dir, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -636,7 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, struct ocfs2_inline_data *data; struct ocfs2_dir_entry *de; - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ec68442..c9f537c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2024,7 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, } else { /* Boo, we have to go to disk. */ /* read bh, cast, ocfs2_refresh_inode */ - status = ocfs2_read_block(inode, oi->ip_blkno, bh); + status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); goto bail_refresh; @@ -2032,18 +2032,14 @@ static int ocfs2_inode_lock_update(struct inode *inode, fe = (struct ocfs2_dinode *) (*bh)->b_data; /* This is a good chance to make sure we're not - * locking an invalid object. + * locking an invalid object. ocfs2_read_inode_block() + * already checked that the inode block is sane. * * We bug on a stale inode here because we checked * above whether it was wiped from disk. The wiping * node provides a guarantee that we receive that * message and can mark the inode before dropping any * locks associated with it. */ - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto bail_refresh; - } mlog_bug_on_msg(inode->i_generation ! le32_to_cpu(fe->i_generation), "Invalid dinode %llu disk generation: %u " @@ -2085,7 +2081,7 @@ static int ocfs2_assign_bh(struct inode *inode, return 0; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); + status = ocfs2_read_inode_block(inode, ret_bh); if (status < 0) mlog_errno(status); diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2baedac..b686b31 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -630,7 +630,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, if (ret == 0) goto out; - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8d3225a..353fef4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -401,12 +401,9 @@ static int ocfs2_truncate_file(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)new_i_size); + /* We trust di_bh because it comes from ocfs2_inode_lock(), which + * already validated it */ fe = (struct ocfs2_dinode *) di_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto bail; - } mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), "Inode %llu, inode i_size = %lld != di " @@ -545,18 +542,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, */ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto leave; } - fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto leave; - } restart_all: BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); @@ -1129,9 +1120,8 @@ static int ocfs2_write_remove_suid(struct inode *inode) { int ret; struct buffer_head *bh = NULL; - struct ocfs2_inode_info *oi = OCFS2_I(inode); - ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); + ret = ocfs2_read_inode_block(inode, &bh); if (ret < 0) { mlog_errno(ret); goto out; @@ -1157,8 +1147,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, struct buffer_head *di_bh = NULL; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, - &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 4903688..fa24e54 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -214,12 +214,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) return 0; } -int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, - int create_ino) +void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, + int create_ino) { struct super_block *sb; struct ocfs2_super *osb; - int status = -EINVAL; int use_plocks = 1; mlog_entry("(0x%p, size:%llu)\n", inode, @@ -232,25 +231,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) use_plocks = 0; - /* this means that read_inode cannot create a superblock inode - * today. change if needed. */ - if (!OCFS2_IS_VALID_DINODE(fe) || - !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { - mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " - "signature = %.*s, flags = 0x%x\n", - inode->i_ino, - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature, le32_to_cpu(fe->i_flags)); - goto bail; - } + /* + * These have all been checked by ocfs2_read_inode_block() or set + * by ocfs2_mknod_locked(), so a failure is a code bug. + */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode + cannot create a superblock + inode today. change if + that is needed. */ + BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))); + BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation); - if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) { - mlog(ML_ERROR, "file entry generation does not match " - "superblock! osb->fs_generation=%x, " - "fe->i_fs_generation=%x\n", - osb->fs_generation, le32_to_cpu(fe->i_fs_generation)); - goto bail; - } OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); @@ -354,10 +345,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_set_inode_flags(inode); - status = 0; -bail: - mlog_exit(status); - return status; + mlog_exit_void(); } static int ocfs2_read_locked_inode(struct inode *inode, @@ -460,11 +448,14 @@ static int ocfs2_read_locked_inode(struct inode *inode, } } - if (can_lock) - status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); - else + if (can_lock) { + status = ocfs2_read_inode_block_full(inode, &bh, + OCFS2_BH_IGNORE_CACHE); + } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); + if (!status) + status = ocfs2_validate_inode_block(osb->sb, bh); + } if (status < 0) { mlog_errno(status); goto bail; @@ -472,12 +463,6 @@ static int ocfs2_read_locked_inode(struct inode *inode, status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - mlog(0, "Invalid dinode #%llu: signature = %.*s\n", - (unsigned long long)args->fi_blkno, 7, - fe->i_signature); - goto bail; - } /* * This is a code bug. Right now the caller needs to @@ -491,10 +476,9 @@ static int ocfs2_read_locked_inode(struct inode *inode, if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) - inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); + inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); - if (ocfs2_populate_inode(inode, fe, 0) < 0) - goto bail; + ocfs2_populate_inode(inode, fe, 0); BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); @@ -1258,3 +1242,79 @@ void ocfs2_refresh_inode(struct inode *inode, spin_unlock(&OCFS2_I(inode)->ip_lock); } + +int ocfs2_validate_inode_block(struct super_block *sb, + struct buffer_head *bh) +{ + int rc = -EINVAL; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + + BUG_ON(!buffer_uptodate(bh)); + + if (!OCFS2_IS_VALID_DINODE(di)) { + ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", + (unsigned long long)bh->b_blocknr, 7, + di->i_signature); + goto bail; + } + + if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) { + ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(di->i_blkno)); + goto bail; + } + + if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { + ocfs2_error(sb, + "Invalid dinode #%llu: OCFS2_VALID_FL not set\n", + (unsigned long long)bh->b_blocknr); + goto bail; + } + + if (le32_to_cpu(di->i_fs_generation) !+ OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Invalid dinode #%llu: fs_generation is %u\n", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(di->i_fs_generation)); + goto bail; + } + + rc = 0; + +bail: + return rc; +} + +int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, + int flags) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, + flags); + if (rc) + goto out; + + if (!(flags & OCFS2_BH_READAHEAD)) { + rc = ocfs2_validate_inode_block(inode->i_sb, tmp); + if (rc) { + brelse(tmp); + goto out; + } + } + + /* If ocfs2_read_blocks() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + +int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh) +{ + return ocfs2_read_inode_block_full(inode, bh, 0); +} diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 2f37af9..b79c371 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, int sysfile_type); int ocfs2_inode_init_private(struct inode *inode); int ocfs2_inode_revalidate(struct dentry *dentry); -int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, - int create_ino); +void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, + int create_ino); void ocfs2_read_inode(struct inode *inode); void ocfs2_read_inode2(struct inode *inode, void *opaque); ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf, @@ -153,4 +153,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode) return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); } +/* Validate that a bh contains a valid inode */ +int ocfs2_validate_inode_block(struct super_block *sb, + struct buffer_head *bh); +/* + * Read an inode block into *bh. If *bh is NULL, a bh will be allocated. + * This is a cached read. The inode will be validated with + * ocfs2_validate_inode_block(). + */ +int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh); +/* The same, but can be passed OCFS2_BH_* flags */ +int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, + int flags); #endif /* OCFS2_INODE_H */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 81e4067..54eb9ca 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1134,8 +1134,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb, } SET_INODE_JOURNAL(inode); - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, - OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 687b287..19cfb1b 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -248,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) goto bail; } - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, - &alloc_bh, OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, &alloc_bh, + OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; @@ -459,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, mutex_lock(&inode->i_mutex); - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, - &alloc_bh, OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, &alloc_bh, + OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 485a6aa..dd9d7a9 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -465,15 +465,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, goto leave; } - if (ocfs2_populate_inode(inode, fe, 1) < 0) { - mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, " - "i_blkno=%llu, i_ino=%lu\n", - (unsigned long long)(*new_fe_bh)->b_blocknr, - (unsigned long long)le64_to_cpu(fe->i_blkno), - inode->i_ino); - BUG(); - } - + ocfs2_populate_inode(inode, fe, 1); ocfs2_inode_set_new(osb, inode); if (!ocfs2_mount_local(osb)) { status = ocfs2_create_new_inode_locks(inode); @@ -1752,9 +1744,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); - status = ocfs2_read_block(orphan_dir_inode, - OCFS2_I(orphan_dir_inode)->ip_blkno, - &orphan_dir_bh); + status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh); if (status < 0) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index cbd03df..ed0a0cf 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode, mlog_entry_void(); - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); + status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); link = ERR_PTR(status); -- 1.5.6.5
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 2/8] ocfs2: Morph the haphazard OCFS2_IS_VALID_DINODE() checks.
Random places in the code would check a dinode bh to see if it was valid. Not only did they do different levels of validation, they handled errors in different ways. The previous commit unified inode block reads, validating all block reads in the same place. Thus, these haphazard checks are no longer necessary. Rather than eliminate them, however, we change them to BUG_ON() checks. This ensures the assumptions remain true. All of the code paths to these checks have been audited to ensure they come from a validated inode read. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/alloc.c | 50 +++++++++++++++++++++----------------------------- fs/ocfs2/journal.c | 17 +++++------------ fs/ocfs2/ocfs2.h | 8 -------- fs/ocfs2/resize.c | 10 ++++------ fs/ocfs2/suballoc.c | 36 ++++++++++++++++-------------------- 5 files changed, 46 insertions(+), 75 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 6d222d5..7a7f9da 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -187,20 +187,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode, static int ocfs2_dinode_sanity_check(struct inode *inode, struct ocfs2_extent_tree *et) { - int ret = 0; - struct ocfs2_dinode *di; + struct ocfs2_dinode *di = et->et_object; BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); - di = et->et_object; - if (!OCFS2_IS_VALID_DINODE(di)) { - ret = -EIO; - ocfs2_error(inode->i_sb, - "Inode %llu has invalid path root", - (unsigned long long)OCFS2_I(inode)->ip_blkno); - } - - return ret; + return 0; } static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) @@ -5308,13 +5300,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); - status = -EIO; - goto bail; - } + /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated + * by the underlying call to ocfs2_read_inode_block(), so any + * corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; tl_count = le16_to_cpu(tl->tl_count); mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || tl_count == 0, @@ -5464,13 +5456,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) BUG_ON(mutex_trylock(&tl_inode->i_mutex)); di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); - status = -EIO; - goto out; - } + /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated + * by the underlying call to ocfs2_read_inode_block(), so any + * corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; num_to_flush = le16_to_cpu(tl->tl_used); mlog(0, "Flush %u records from truncate log #%llu\n", num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); @@ -5625,13 +5617,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, } di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); - status = -EIO; - goto bail; - } + /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's + * validated by the underlying call to ocfs2_read_inode_block(), + * so any corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; if (le16_to_cpu(tl->tl_used)) { mlog(0, "We'll have %u logs to recover\n", le16_to_cpu(tl->tl_used)); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 54eb9ca..5c4c739 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -587,17 +587,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, mlog_entry_void(); fe = (struct ocfs2_dinode *)bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - /* This is called from startup/shutdown which will - * handle the errors in a specific manner, so no need - * to call ocfs2_error() here. */ - mlog(ML_ERROR, "Journal dinode %llu has invalid " - "signature: %.*s", - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature); - status = -EIO; - goto out; - } + + /* The journal bh on the osb always comes from ocfs2_journal_init() + * and was validated there inside ocfs2_inode_lock_full(). It's a + * code bug if we mess it up. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); flags = le32_to_cpu(fe->id1.journal1.ij_flags); if (dirty) @@ -613,7 +607,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, if (status < 0) mlog_errno(status); -out: mlog_exit(status); return status; } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a21a465..7ea38ac 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -443,14 +443,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_DINODE(ptr) \ (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \ - typeof(__di) ____di = (__di); \ - ocfs2_error((__sb), \ - "Dinode # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \ - (____di)->i_signature); \ -} while (0) - #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index ffd48db..739d452 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) fe = (struct ocfs2_dinode *)main_bm_bh->b_data; + /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(), + * so any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (le16_to_cpu(fe->id2.i_chain.cl_cpg) ! ocfs2_group_bitmap_size(osb->sb) * 8) { mlog(ML_ERROR, "The disk is too old and small. " @@ -322,12 +326,6 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) goto out_unlock; } - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); - ret = -EIO; - goto out_unlock; - } - first_new_cluster = le32_to_cpu(fe->i_clusters); lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c5ff18b..95d432b 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -441,11 +441,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, ac->ac_alloc_slot = slot; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read inside + * ocfs2_inode_lock(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", (unsigned long long)le64_to_cpu(fe->i_blkno)); @@ -931,11 +931,6 @@ static int ocfs2_relink_block_group(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto out; - } if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); status = -EIO; @@ -1392,11 +1387,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, BUG_ON(!ac->ac_bh); fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read during + * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (le32_to_cpu(fe->id1.bitmap1.i_used) > le32_to_cpu(fe->id1.bitmap1.i_total)) { ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " @@ -1782,11 +1777,12 @@ int ocfs2_free_suballoc_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + /* The alloc_bh comes from ocfs2_free_dinode() or + * ocfs2_free_clusters(). The callers have all locked the + * allocator and gotten alloc_bh from the lock call. This + * validates the dinode buffer. Any corruption that has happended + * is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", -- 1.5.6.5
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 3/8] ocfs2: Consolidate validation of group descriptors.
Currently the validation of group descriptors is directly duplicated so that one version can error the filesystem and the other (resize) can just report the problem. Consolidate to one function that takes a boolean. Wrap that function with the old call for the old users. This is in preparation for lifting the read+validate step into a single function. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/resize.c | 40 +++++---------------------- fs/ocfs2/suballoc.c | 74 +++++++++++++++++++++++++++++--------------------- fs/ocfs2/suballoc.h | 20 +++++++++++-- 3 files changed, 68 insertions(+), 66 deletions(-) diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 739d452..a2de32a 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -396,41 +396,16 @@ static int ocfs2_check_new_group(struct inode *inode, struct buffer_head *group_bh) { int ret; - struct ocfs2_group_desc *gd; + struct ocfs2_group_desc *gd + (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * - le16_to_cpu(di->id2.i_chain.cl_bpc); + ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1); + if (ret) + goto out; - gd = (struct ocfs2_group_desc *)group_bh->b_data; - - ret = -EIO; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) - mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno)); - else if (di->i_blkno != gd->bg_parent_dinode) - mlog(ML_ERROR, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - else if (le16_to_cpu(gd->bg_bits) > max_bits) - mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "claims that %u are free\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - else if (le16_to_cpu(gd->bg_chain) != input->chain) + ret = -EINVAL; + if (le16_to_cpu(gd->bg_chain) != input->chain) mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " "while input has %u set.\n", (unsigned long long)le64_to_cpu(gd->bg_blkno), @@ -449,6 +424,7 @@ static int ocfs2_check_new_group(struct inode *inode, else ret = 0; +out: return ret; } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 95d432b..ddba97d 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -146,59 +146,71 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) } /* somewhat more expensive than our other checks, so use sparingly. */ -int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd) +int ocfs2_validate_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd, + int clean_error) { unsigned int max_bits; +#define do_error(fmt, ...) \ + do{ \ + if (clean_error) \ + mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ + else \ + ocfs2_error(sb, fmt, ##__VA_ARGS__); \ + } while (0) + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); - return -EIO; + do_error("Group Descriptor #%llu has bad signature %.*s", + (unsigned long long)le64_to_cpu(gd->bg_blkno), 7, + gd->bg_signature); + return -EINVAL; } if (di->i_blkno != gd->bg_parent_dinode) { - ocfs2_error(sb, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - return -EIO; + do_error("Group descriptor # %llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EINVAL; } max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { - ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - return -EIO; + do_error("Group descriptor # %llu has bit count of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits)); + return -EINVAL; } if (le16_to_cpu(gd->bg_chain) > le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { - ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_chain)); - return -EIO; + do_error("Group descriptor # %llu has bad chain %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_chain)); + return -EINVAL; } if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "claims that %u are free", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - return -EIO; + do_error("Group descriptor # %llu has bit count %u but " + "claims that %u are free", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EINVAL; } if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - return -EIO; + do_error("Group descriptor # %llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EINVAL; } +#undef do_error return 0; } diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 4df159d..7adfcc4 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -165,9 +165,23 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); /* somewhat more expensive than our other checks, so use sparingly. */ -int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd); +/* + * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it + * finds a problem. A caller that wants to check a group descriptor + * without going readonly passes a nonzero clean_error. This is only + * resize, really. + */ +int ocfs2_validate_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd, + int clean_error); +static inline int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd) +{ + return ocfs2_validate_group_descriptor(sb, di, gd, 0); +} + int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, -- 1.5.6.5
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 4/8] ocfs2: Wrap group descriptor reads in a dedicated function.
We have a clean call for validating group descriptors, but every place that wants the always does a read_block()+validate() call pair. Create a toplevel ocfs2_read_group_descriptor() that does the right thing. This allows us to leverage the single call point later for fancier handling. We also add validation of gd->bg_generation against the superblock and gd->bg_blkno against the block we thought we read. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/resize.c | 12 +---- fs/ocfs2/suballoc.c | 108 ++++++++++++++++++++++++++++++-------------------- fs/ocfs2/suballoc.h | 19 +++++---- 3 files changed, 78 insertions(+), 61 deletions(-) diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index a2de32a..252baff 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -330,20 +330,14 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); - ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); goto out_unlock; } - group = (struct ocfs2_group_desc *)group_bh->b_data; - ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); - if (ret) { - mlog_errno(ret); - goto out_unlock; - } - cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > le16_to_cpu(fe->id2.i_chain.cl_cpg)) { @@ -400,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode, (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - ret = ocfs2_validate_group_descriptor(inode->i_sb, di, gd, 1); + ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1); if (ret) goto out; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ddba97d..797f509 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -145,13 +145,13 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -/* somewhat more expensive than our other checks, so use sparingly. */ int ocfs2_validate_group_descriptor(struct super_block *sb, struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd, + struct buffer_head *bh, int clean_error) { unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; #define do_error(fmt, ...) \ do{ \ @@ -162,16 +162,32 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, } while (0) if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - do_error("Group Descriptor #%llu has bad signature %.*s", - (unsigned long long)le64_to_cpu(gd->bg_blkno), 7, + do_error("Group descriptor #%llu has bad signature %.*s", + (unsigned long long)bh->b_blocknr, 7, gd->bg_signature); return -EINVAL; } + if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) { + do_error("Group descriptor #%llu has an invalid bg_blkno " + "of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(gd->bg_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) { + do_error("Group descriptor #%llu has an invalid " + "fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(gd->bg_generation)); + return -EINVAL; + } + if (di->i_blkno != gd->bg_parent_dinode) { - do_error("Group descriptor # %llu has bad parent " + do_error("Group descriptor #%llu has bad parent " "pointer (%llu, expected %llu)", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), (unsigned long long)le64_to_cpu(di->i_blkno)); return -EINVAL; @@ -179,33 +195,33 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { - do_error("Group descriptor # %llu has bit count of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + do_error("Group descriptor #%llu has bit count of %u", + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits)); return -EINVAL; } if (le16_to_cpu(gd->bg_chain) > le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { - do_error("Group descriptor # %llu has bad chain %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + do_error("Group descriptor #%llu has bad chain %u", + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_chain)); return -EINVAL; } if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - do_error("Group descriptor # %llu has bit count %u but " + do_error("Group descriptor #%llu has bit count %u but " "claims that %u are free", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits), le16_to_cpu(gd->bg_free_bits_count)); return -EINVAL; } if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - do_error("Group descriptor # %llu has bit count %u but " + do_error("Group descriptor #%llu has bit count %u but " "max bitmap bits of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)bh->b_blocknr, le16_to_cpu(gd->bg_bits), 8 * le16_to_cpu(gd->bg_size)); return -EINVAL; @@ -215,6 +231,30 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return 0; } +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, gd_blkno, &tmp); + if (rc) + goto out; + + rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0); + if (rc) { + brelse(tmp); + goto out; + } + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + static int ocfs2_block_group_fill(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, @@ -1177,21 +1217,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, u16 found; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *gd; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); return ret; } gd = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); - ret = -EIO; - goto out; - } - ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, ac->ac_max_block, bit_off, &found); if (ret < 0) { @@ -1248,19 +1284,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, bits_wanted, chain, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); - status = ocfs2_read_block(alloc_inode, - le64_to_cpu(cl->cl_recs[chain].c_blkno), - &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + le64_to_cpu(cl->cl_recs[chain].c_blkno), + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use @@ -1278,18 +1309,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; - status = ocfs2_read_block(alloc_inode, - next_group, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + next_group, &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } } if (status < 0) { if (status != -ENOSPC) @@ -1801,18 +1827,14 @@ int ocfs2_free_suballoc_bits(handle_t *handle, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); - status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno, + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } - group = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); - if (status) { - mlog_errno(status); - goto bail; - } + BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); status = ocfs2_block_group_clear_bits(handle, alloc_inode, diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 7adfcc4..43de4fd 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -164,23 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); * and return that block offset. */ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); -/* somewhat more expensive than our other checks, so use sparingly. */ /* * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it * finds a problem. A caller that wants to check a group descriptor * without going readonly passes a nonzero clean_error. This is only - * resize, really. + * resize, really. Everyone else should be using + * ocfs2_read_group_descriptor(). */ int ocfs2_validate_group_descriptor(struct super_block *sb, struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd, + struct buffer_head *bh, int clean_error); -static inline int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd) -{ - return ocfs2_validate_group_descriptor(sb, di, gd, 0); -} +/* + * Read a group descriptor block into *bh. If *bh is NULL, a bh will be + * allocated. This is a cached read. The descriptor will be validated with + * ocfs2_validate_group_descriptor(). + */ +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh); int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, -- 1.5.6.5
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 5/8] ocfs2: Morph the haphazard OCFS2_IS_VALID_GROUP_DESC() checks.
Random places in the code would check a group descriptor bh to see if it was valid. The previous commit unified descriptor block reads, validating all block reads in the same place. Thus, these checks are no longer necessary. Rather than eliminate them, however, we change them to BUG_ON() checks. This ensures the assumptions remain true. All of the code paths to these checks have been audited to ensure they come from a validated descriptor read. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/ocfs2.h | 7 ------- fs/ocfs2/suballoc.c | 39 ++++++++++++++------------------------- 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7ea38ac..8fc78d8 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -457,13 +457,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \ - typeof(__gd) ____gd = (__gd); \ - ocfs2_error((__sb), \ - "Group Descriptor # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \ - (____gd)->bg_signature); \ -} while (0) static inline unsigned long ino_from_blkno(struct super_block *sb, u64 blkno) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 797f509..766a00b 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -842,10 +842,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, int offset, start, found, status = 0; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); - return -EIO; - } + /* Callers got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; @@ -910,11 +909,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* All callers get the descriptor via + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, @@ -983,16 +980,10 @@ static int ocfs2_relink_block_group(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto out; - } - if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); - status = -EIO; - goto out; - } + /* The caller got these descriptors from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", (unsigned long long)le64_to_cpu(fe->i_blkno), chain, @@ -1055,7 +1046,7 @@ out_rollback: bg->bg_next_group = cpu_to_le64(bg_ptr); prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); } -out: + mlog_exit(status); return status; } @@ -1758,11 +1749,9 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* The caller got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); mlog(0, "off = %u, num = %u\n", bit_off, num_bits); -- 1.5.6.5
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 6/8] ocfs2: Wrap extent block reads in a dedicated function.
We weren't consistently checking extent blocks after we read them. Most places checked the signature, but none checked h_blkno or h_fs_signature. Create a toplevel ocfs2_read_extent_block() that does the read and the validation. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/alloc.c | 151 ++++++++++++++++++++++++++++++++----------------- fs/ocfs2/alloc.h | 8 +++ fs/ocfs2/extent_map.c | 23 ++------ fs/ocfs2/ocfs2.h | 8 --- 4 files changed, 111 insertions(+), 79 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7a7f9da..e6ddb8a 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -678,6 +678,66 @@ struct ocfs2_merge_ctxt { int c_split_covers_rec; }; +static int ocfs2_validate_extent_block(struct super_block *sb, + struct buffer_head *bh) +{ + struct ocfs2_extent_block *eb + (struct ocfs2_extent_block *)bh->b_data; + + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { + ocfs2_error(sb, + "Extent block #%llu has bad signature %.*s", + (unsigned long long)bh->b_blocknr, 7, + eb->h_signature); + return -EINVAL; + } + + if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) { + ocfs2_error(sb, + "Extent block #%llu has an invalid h_blkno " + "of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(eb->h_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Extent block #%llu has an invalid " + "h_fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(eb->h_fs_generation)); + return -EINVAL; + } + + return 0; +} + +int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, + struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, eb_blkno, &tmp); + if (rc) + goto out; + + rc = ocfs2_validate_extent_block(inode->i_sb, tmp); + if (rc) { + brelse(tmp); + goto out; + } + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + + /* * How many free extents have we got before we need more meta data? */ @@ -697,8 +757,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb, last_eb_blk = ocfs2_et_get_last_eb_blk(et); if (last_eb_blk) { - retval = ocfs2_read_block(inode, last_eb_blk, - &eb_bh); + retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); if (retval < 0) { mlog_errno(retval); goto bail; @@ -900,11 +959,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, for(i = 0; i < new_blocks; i++) { bh = new_eb_bhs[i]; eb = (struct ocfs2_extent_block *) bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } + /* ocfs2_create_new_meta_bhs() should create it right! */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; status = ocfs2_journal_access(handle, inode, bh, @@ -1044,11 +1100,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, } eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } + /* ocfs2_create_new_meta_bhs() should create it right! */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; root_el = et->et_root_el; @@ -1168,18 +1221,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, brelse(bh); bh = NULL; - status = ocfs2_read_block(inode, blkno, &bh); + status = ocfs2_read_extent_block(inode, blkno, &bh); if (status < 0) { mlog_errno(status); goto bail; } eb = (struct ocfs2_extent_block *) bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } el = &eb->h_list; if (le16_to_cpu(el->l_next_free_rec) < @@ -1532,7 +1580,7 @@ static int __ocfs2_find_path(struct inode *inode, brelse(bh); bh = NULL; - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_extent_block(inode, blkno, &bh); if (ret) { mlog_errno(ret); goto out; @@ -1540,11 +1588,6 @@ static int __ocfs2_find_path(struct inode *inode, eb = (struct ocfs2_extent_block *) bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EIO; - goto out; - } if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { @@ -4089,8 +4132,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, le16_to_cpu(new_el->l_count)) { bh = path_leaf_bh(left_path); eb = (struct ocfs2_extent_block *)bh->b_data; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, - eb); + ocfs2_error(inode->i_sb, + "Extent block #%llu has an " + "invalid l_next_free_rec of " + "%d. It should have " + "matched the l_count of %d", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec), + le16_to_cpu(new_el->l_count)); + status = -EINVAL; goto out; } rec = &new_el->l_recs[ @@ -4139,8 +4189,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { bh = path_leaf_bh(right_path); eb = (struct ocfs2_extent_block *)bh->b_data; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, - eb); + ocfs2_error(inode->i_sb, + "Extent block #%llu has an " + "invalid l_next_free_rec of %d", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec)); + status = -EINVAL; goto out; } rec = &new_el->l_recs[1]; @@ -4286,7 +4340,9 @@ static int ocfs2_figure_insert_type(struct inode *inode, * ocfs2_figure_insert_type() and ocfs2_add_branch() * may want it later. */ - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &bh); if (ret) { mlog_exit(ret); goto out; @@ -4752,20 +4808,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode, if (path->p_tree_depth) { struct ocfs2_extent_block *eb; - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), - &last_eb_bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &last_eb_bh); if (ret) { mlog_exit(ret); goto out; } eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EROFS; - goto out; - } - rightmost_el = &eb->h_list; } else rightmost_el = path_root_el(path); @@ -4910,8 +4961,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, depth = path->p_tree_depth; if (depth > 0) { - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), - &last_eb_bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &last_eb_bh); if (ret < 0) { mlog_errno(ret); goto out; @@ -6067,11 +6119,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode, eb = (struct ocfs2_extent_block *) bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EROFS; - goto out; - } + + /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block(). + * Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); *new_last_eb = bh; get_bh(*new_last_eb); @@ -6976,20 +7027,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); if (fe->id2.i_list.l_tree_depth) { - status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), - &last_eb_bh); + status = ocfs2_read_extent_block(inode, + le64_to_cpu(fe->i_last_eb_blk), + &last_eb_bh); if (status < 0) { mlog_errno(status); goto bail; } eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - - brelse(last_eb_bh); - status = -EIO; - goto bail; - } } (*tc)->tc_last_eb_bh = last_eb_bh; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 70257c8..44bcb50 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -73,6 +73,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, struct buffer_head *bh, struct ocfs2_xattr_value_root *xv); +/* + * Read an extent block into *bh. If *bh is NULL, a bh will be + * allocated. This is a cached read. The extent block will be validated + * with ocfs2_validate_extent_block(). + */ +int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, + struct buffer_head **bh); + struct ocfs2_alloc_context; int ocfs2_insert_extent(struct ocfs2_super *osb, handle_t *handle, diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index b686b31..0bd9d96 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode, struct ocfs2_extent_block *eb; struct ocfs2_extent_list *el; - ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); + ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); if (ret) { mlog_errno(ret); goto out; @@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode, eb = (struct ocfs2_extent_block *) eb_bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - ret = -EROFS; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - goto out; - } - if (el->l_tree_depth) { ocfs2_error(inode->i_sb, "Inode %lu has non zero tree depth in " @@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) goto no_more_extents; - ret = ocfs2_read_block(inode, - le64_to_cpu(eb->h_next_leaf_blk), - &next_eb_bh); + ret = ocfs2_read_extent_block(inode, + le64_to_cpu(eb->h_next_leaf_blk), + &next_eb_bh); if (ret) { mlog_errno(ret); goto out; } - next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; - - if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) { - ret = -EROFS; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb); - goto out; - } + next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; el = &next_eb->h_list; - i = ocfs2_search_for_hole_index(el, v_cluster); } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8fc78d8..bb85f76 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -446,14 +446,6 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \ - typeof(__eb) ____eb = (__eb); \ - ocfs2_error((__sb), \ - "Extent Block # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \ - (____eb)->h_signature); \ -} while (0) - #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) -- 1.5.6.5
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 7/8] ocfs2: Wrap dirblock reads in a dedicated function.
We have ocfs2_bread() as a vestige of the original ext-based dir code. It's only used by directories, though. Turn it into ocfs2_read_dir_block(), with a prototype matching the other metadata read functions. It's set up to validate dirblocks when the time comes. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/dir.c | 150 +++++++++++++++++++++++++++++++++----------------------- 1 files changed, 88 insertions(+), 62 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 5777045..c2f3fd9 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -82,49 +82,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb, struct ocfs2_alloc_context *meta_ac, struct buffer_head **new_bh); -static struct buffer_head *ocfs2_bread(struct inode *inode, - int block, int *err, int reada) -{ - struct buffer_head *bh = NULL; - int tmperr; - u64 p_blkno; - int readflags = 0; - - if (reada) - readflags |= OCFS2_BH_READAHEAD; - - if (((u64)block << inode->i_sb->s_blocksize_bits) >- i_size_read(inode)) { - BUG_ON(!reada); - return NULL; - } - - down_read(&OCFS2_I(inode)->ip_alloc_sem); - tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, - NULL); - up_read(&OCFS2_I(inode)->ip_alloc_sem); - if (tmperr < 0) { - mlog_errno(tmperr); - goto fail; - } - - tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); - if (tmperr < 0) - goto fail; - - tmperr = 0; - - *err = 0; - return bh; - -fail: - brelse(bh); - bh = NULL; - - *err = -EIO; - return NULL; -} - /* * bh passed here can be an inode block or a dir data block, depending * on the inode inline data flag. @@ -250,6 +207,76 @@ out: return NULL; } +static int ocfs2_validate_dir_block(struct super_block *sb, + struct buffer_head *bh) +{ + /* + * Nothing yet. We don't validate dirents here, that's handled + * in-place when the code walks them. + */ + + return 0; +} + +/* + * This function forces all errors to -EIO for consistency with its + * predecessor, ocfs2_bread(). We haven't audited what returning the + * real error codes would do to callers. We log the real codes with + * mlog_errno() before we squash them. + */ +static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, + struct buffer_head **bh, int flags) +{ + int rc = 0; + struct buffer_head *tmp = *bh; + u64 p_blkno; + + if (((u64)v_block << inode->i_sb->s_blocksize_bits) >+ i_size_read(inode)) { + BUG_ON(!(flags & OCFS2_BH_READAHEAD)); + goto out; + } + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + rc = ocfs2_extent_map_get_blocks(inode, v_block, &p_blkno, NULL, + NULL); + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (rc) { + mlog_errno(rc); + goto out; + } + + if (!p_blkno) { + rc = -EIO; + mlog(ML_ERROR, + "Directory #%llu contains a hole at offset %llu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)v_block << inode->i_sb->s_blocksize_bits); + goto out; + } + + rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags); + if (rc) { + mlog_errno(rc); + goto out; + } + + if (!(flags & OCFS2_BH_READAHEAD)) { + rc = ocfs2_validate_dir_block(inode->i_sb, tmp); + if (rc) { + brelse(tmp); + goto out; + } + } + + /* If ocfs2_read_blocks() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc ? -EIO : 0; +} + static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, struct inode *dir, struct ocfs2_dir_entry **res_dir) @@ -296,15 +323,17 @@ restart: } num++; - bh = ocfs2_bread(dir, b++, &err, 1); + bh = NULL; + err = ocfs2_read_dir_block(dir, b++, &bh, + OCFS2_BH_READAHEAD); bh_use[ra_max] = bh; } } if ((bh = bh_use[ra_ptr++]) == NULL) goto next; - if (ocfs2_read_block(dir, block, &bh)) { + if (ocfs2_read_dir_block(dir, block, &bh, 0)) { /* read error, skip block & hope for the best. - * ocfs2_read_block() has released the bh. */ + * ocfs2_read_dir_block() has released the bh. */ ocfs2_error(dir->i_sb, "reading directory %llu, " "offset %lu\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, @@ -724,7 +753,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, int i, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; - int err; struct super_block * sb = inode->i_sb; unsigned int ra_sectors = 16; @@ -735,12 +763,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, while (!error && !stored && *f_pos < i_size_read(inode)) { blk = (*f_pos) >> sb->s_blocksize_bits; - bh = ocfs2_bread(inode, blk, &err, 0); - if (!bh) { - mlog(ML_ERROR, - "directory #%llu contains a hole at offset %lld\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - *f_pos); + if (ocfs2_read_dir_block(inode, blk, &bh, 0)) { + /* Skip the corrupt dirblock and keep trying */ *f_pos += sb->s_blocksize - offset; continue; } @@ -754,8 +778,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { for (i = ra_sectors >> (sb->s_blocksize_bits - 9); i > 0; i--) { - tmp = ocfs2_bread(inode, ++blk, &err, 1); - brelse(tmp); + tmp = NULL; + if (!ocfs2_read_dir_block(inode, ++blk, &tmp, + OCFS2_BH_READAHEAD)) + brelse(tmp); } last_ra_blk = blk; ra_sectors = 8; @@ -828,6 +854,7 @@ revalidate: } offset = 0; brelse(bh); + bh = NULL; } stored = 0; @@ -1680,8 +1707,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, struct super_block *sb = dir->i_sb; int status; - bh = ocfs2_bread(dir, 0, &status, 0); - if (!bh) { + status = ocfs2_read_dir_block(dir, 0, &bh, 0); + if (status) { mlog_errno(status); goto bail; } @@ -1702,11 +1729,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, status = -ENOSPC; goto bail; } - bh = ocfs2_bread(dir, - offset >> sb->s_blocksize_bits, - &status, - 0); - if (!bh) { + status = ocfs2_read_dir_block(dir, + offset >> sb->s_blocksize_bits, + &bh, 0); + if (status) { mlog_errno(status); goto bail; } -- 1.5.6.5
Joel Becker
2008-Oct-16 09:38 UTC
[Ocfs2-devel] [PATCH 8/8] ocfs2: Validate metadata only when it's read from disk.
Add an optional validation hook to ocfs2_read_blocks(). Now the validation function is only called when a block was actually read off of disk. It is not called when the buffer was in cache. We add a buffer state bit BH_NeedsValidate to flag these buffers. It must always be one higher than the last JBD2 buffer state bit. The dinode, dirblock, and extent_block validators are lifted to this scheme directly. The group_descriptor validator needs to be split into two pieces. The first part only needs the gd buffer and is passed to ocfs2_read_block(). The second part requires the dinode as well, and is called every time. It's only 3 compares, so it's tiny. This also allows us to clean up the non-fatal gd check used by resize.c. It now has no magic argument. Signed-off-by: Joel Becker <joel.becker at oracle.com> --- fs/ocfs2/alloc.c | 17 +++----- fs/ocfs2/buffer_head_io.c | 33 ++++++++++++++++- fs/ocfs2/buffer_head_io.h | 27 ++++++++----- fs/ocfs2/dir.c | 13 ++---- fs/ocfs2/inode.c | 18 ++------ fs/ocfs2/resize.c | 2 +- fs/ocfs2/slot_map.c | 4 +- fs/ocfs2/suballoc.c | 91 +++++++++++++++++++++++++++++++------------- fs/ocfs2/suballoc.h | 15 +++---- fs/ocfs2/xattr.c | 46 ++++++++++++---------- 10 files changed, 162 insertions(+), 104 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index e6ddb8a..a29b26e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -684,6 +684,9 @@ static int ocfs2_validate_extent_block(struct super_block *sb, struct ocfs2_extent_block *eb (struct ocfs2_extent_block *)bh->b_data; + mlog(0, "Validating extent block %llu\n", + (unsigned long long)bh->b_blocknr); + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { ocfs2_error(sb, "Extent block #%llu has bad signature %.*s", @@ -719,21 +722,13 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, eb_blkno, &tmp); - if (rc) - goto out; - - rc = ocfs2_validate_extent_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } + rc = ocfs2_read_block(inode, eb_blkno, &tmp, + ocfs2_validate_extent_block); /* If ocfs2_read_block() got us a new bh, pass it up. */ - if (!*bh) + if (!rc && !*bh) *bh = tmp; -out: return rc; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 7e947c6..eeb6b70 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -39,6 +39,19 @@ #include "buffer_head_io.h" +/* + * Bits on bh->b_state used by ocfs2. + * + * These MUST be after the JBD2 bits. Currently BH_Unshadow is the last + * JBD2 bit. + */ +enum ocfs2_state_bits { + BH_NeedsValidate = BH_Unshadow + 1, +}; + +/* Expand the magic b_state functions */ +BUFFER_FNS(NeedsValidate, needs_validate); + int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode) { @@ -171,7 +184,9 @@ bail: } int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, - struct buffer_head *bhs[], int flags) + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; int i, ignore_cache = 0; @@ -305,6 +320,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ + if (validate) + set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); continue; @@ -335,6 +352,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, bhs[i] = NULL; continue; } + + if (buffer_needs_validate(bh)) { + /* We never set NeedsValidate if the + * buffer was held by the journal, so + * that better not have changed */ + BUG_ON(buffer_jbd(bh)); + clear_buffer_needs_validate(bh); + status = validate(inode->i_sb, bh); + if (status) { + put_bh(bh); + bhs[i] = NULL; + continue; + } + } } /* Always set the buffer in the cache, even if it was diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 75e1dcb..c75d682 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h @@ -31,21 +31,24 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh, int uptodate); -static inline int ocfs2_read_block(struct inode *inode, - u64 off, - struct buffer_head **bh); - int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode); -int ocfs2_read_blocks(struct inode *inode, - u64 block, - int nr, - struct buffer_head *bhs[], - int flags); int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]); +/* + * If not NULL, validate() will be called on a buffer that is freshly + * read from disk. It will not be called if the buffer was in cache. + * Note that if validate() is being used for this buffer, it needs to + * be set even for a READAHEAD call, as it marks the buffer for later + * validation. + */ +int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)); + int ocfs2_write_super_or_backup(struct ocfs2_super *osb, struct buffer_head *bh); @@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, #define OCFS2_BH_READAHEAD 8 static inline int ocfs2_read_block(struct inode *inode, u64 off, - struct buffer_head **bh) + struct buffer_head **bh, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; @@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off, goto bail; } - status = ocfs2_read_blocks(inode, off, 1, bh, 0); + status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate); bail: return status; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index c2f3fd9..7e863d4 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -214,6 +214,8 @@ static int ocfs2_validate_dir_block(struct super_block *sb, * Nothing yet. We don't validate dirents here, that's handled * in-place when the code walks them. */ + mlog(0, "Validating dirblock %llu\n", + (unsigned long long)bh->b_blocknr); return 0; } @@ -255,20 +257,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, goto out; } - rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags); + rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags, + ocfs2_validate_dir_block); if (rc) { mlog_errno(rc); goto out; } - if (!(flags & OCFS2_BH_READAHEAD)) { - rc = ocfs2_validate_dir_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } - } - /* If ocfs2_read_blocks() got us a new bh, pass it up. */ if (!*bh) *bh = tmp; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index fa24e54..1bc9f3c 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1249,6 +1249,9 @@ int ocfs2_validate_inode_block(struct super_block *sb, int rc = -EINVAL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + mlog(0, "Validating dinode %llu\n", + (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); if (!OCFS2_IS_VALID_DINODE(di)) { @@ -1294,23 +1297,12 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, struct buffer_head *tmp = *bh; rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, - flags); - if (rc) - goto out; - - if (!(flags & OCFS2_BH_READAHEAD)) { - rc = ocfs2_validate_inode_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } - } + flags, ocfs2_validate_inode_block); /* If ocfs2_read_blocks() got us a new bh, pass it up. */ - if (!*bh) + if (!rc && !*bh) *bh = tmp; -out: return rc; } diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 252baff..867de3e 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -394,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode, (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1); + ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh); if (ret) goto out; diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bdda2d8..40661e7 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) * this is not true, the read of -1 (UINT64_MAX) will fail. */ ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); @@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 766a00b..226fe21 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -145,14 +145,6 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -int ocfs2_validate_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct buffer_head *bh, - int clean_error) -{ - unsigned int max_bits; - struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; - #define do_error(fmt, ...) \ do{ \ if (clean_error) \ @@ -161,6 +153,12 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, ocfs2_error(sb, fmt, ##__VA_ARGS__); \ } while (0) +static int ocfs2_validate_gd_self(struct super_block *sb, + struct buffer_head *bh, + int clean_error) +{ + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { do_error("Group descriptor #%llu has bad signature %.*s", (unsigned long long)bh->b_blocknr, 7, @@ -184,6 +182,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return -EINVAL; } + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + do_error("Group descriptor #%llu has bit count %u but " + "claims that %u are free", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EINVAL; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + do_error("Group descriptor #%llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EINVAL; + } + + return 0; +} + +static int ocfs2_validate_gd_parent(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh, + int clean_error) +{ + unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + if (di->i_blkno != gd->bg_parent_dinode) { do_error("Group descriptor #%llu has bad parent " "pointer (%llu, expected %llu)", @@ -209,26 +236,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb, return -EINVAL; } - if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - do_error("Group descriptor #%llu has bit count %u but " - "claims that %u are free", - (unsigned long long)bh->b_blocknr, - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - return -EINVAL; - } + return 0; +} - if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - do_error("Group descriptor #%llu has bit count %u but " - "max bitmap bits of %u", - (unsigned long long)bh->b_blocknr, - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - return -EINVAL; - } #undef do_error - return 0; +/* + * This version only prints errors. It does not fail the filesystem, and + * exists only for resize. + */ +int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh) +{ + int rc; + + rc = ocfs2_validate_gd_self(sb, bh, 1); + if (!rc) + rc = ocfs2_validate_gd_parent(sb, di, bh, 1); + + return rc; +} + +static int ocfs2_validate_group_descriptor(struct super_block *sb, + struct buffer_head *bh) +{ + mlog(0, "Validating group descriptor %llu\n", + (unsigned long long)bh->b_blocknr); + + return ocfs2_validate_gd_self(sb, bh, 0); } int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, @@ -237,11 +273,12 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, int rc; struct buffer_head *tmp = *bh; - rc = ocfs2_read_block(inode, gd_blkno, &tmp); + rc = ocfs2_read_block(inode, gd_blkno, &tmp, + ocfs2_validate_group_descriptor); if (rc) goto out; - rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0); + rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0); if (rc) { brelse(tmp); goto out; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 43de4fd..e3c13c7 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -165,16 +165,15 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); /* - * By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it + * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it * finds a problem. A caller that wants to check a group descriptor - * without going readonly passes a nonzero clean_error. This is only - * resize, really. Everyone else should be using - * ocfs2_read_group_descriptor(). + * without going readonly should read the block with ocfs2_read_block[s]() + * and then checking it with this function. This is only resize, really. + * Everyone else should be using ocfs2_read_group_descriptor(). */ -int ocfs2_validate_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct buffer_head *bh, - int clean_error); +int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh); /* * Read a group descriptor block into *bh. If *bh is NULL, a bh will be * allocated. This is a cached read. The descriptor will be validated with diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 802c414..48e00ba 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -537,7 +537,8 @@ static int ocfs2_xattr_block_list(struct inode *inode, if (!di->i_xattr_loc) return ret; - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); + ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), + &blk_bh, NULL); if (ret < 0) { mlog_errno(ret); return ret; @@ -670,7 +671,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode, blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); /* Copy ocfs2_xattr_value */ for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -761,7 +762,8 @@ static int ocfs2_xattr_block_get(struct inode *inode, memset(&xs->bucket, 0, sizeof(xs->bucket)); - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); + ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), + &blk_bh, NULL); if (ret < 0) { mlog_errno(ret); return ret; @@ -917,7 +919,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out_commit; @@ -1508,7 +1510,7 @@ static int ocfs2_xattr_free_block(struct inode *inode, u64 blk, bg_blkno; u16 bit; - ret = ocfs2_read_block(inode, block, &blk_bh); + ret = ocfs2_read_block(inode, block, &blk_bh, NULL); if (ret < 0) { mlog_errno(ret); goto out; @@ -1766,7 +1768,8 @@ static int ocfs2_xattr_block_find(struct inode *inode, if (!di->i_xattr_loc) return ret; - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); + ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), + &blk_bh, NULL); if (ret < 0) { mlog_errno(ret); return ret; @@ -2207,8 +2210,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, break; } - ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off, - &name_bh); + ret = ocfs2_read_block(inode, + header_bh->b_blocknr + block_off, + &name_bh, NULL); if (ret) { mlog_errno(ret); break; @@ -2259,7 +2263,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, u32 last_hash; u64 blkno; - ret = ocfs2_read_block(inode, p_blkno, &bh); + ret = ocfs2_read_block(inode, p_blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -2275,7 +2279,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, blkno = p_blkno + bucket * blk_per_bucket; - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -2349,7 +2353,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, */ ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, blk_per_bucket - 1, &xs->bucket.bhs[1], - 0); + 0, NULL); if (ret) { mlog_errno(ret); goto out; @@ -2426,7 +2430,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, - bucket.bhs, 0); + bucket.bhs, 0, NULL); if (ret) { mlog_errno(ret); goto out; @@ -2694,7 +2698,7 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, blk_per_bucket - 1, &xs->bucket.bhs[1], - 0); + 0, NULL); if (ret) { mlog_errno(ret); return ret; @@ -2898,7 +2902,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, if (!bhs) return -ENOMEM; - ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0); + ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0, NULL); if (ret) goto out; @@ -3098,7 +3102,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, prev_blkno, &old_bh); + ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL); if (ret < 0) { mlog_errno(ret); brelse(new_bh); @@ -3151,8 +3155,8 @@ static int ocfs2_read_xattr_bucket(struct inode *inode, u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); if (!new) - return ocfs2_read_blocks(inode, blkno, - blk_per_bucket, bhs, 0); + return ocfs2_read_blocks(inode, blkno, blk_per_bucket, + bhs, 0, NULL); for (i = 0; i < blk_per_bucket; i++) { bhs[i] = sb_getblk(inode->i_sb, blkno + i); @@ -3467,7 +3471,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, ocfs2_journal_dirty(handle, first_bh); /* update the new bucket header. */ - ret = ocfs2_read_block(inode, to_blk_start, &bh); + ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL); if (ret < 0) { mlog_errno(ret); goto out; @@ -3854,7 +3858,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, p_blkno, &first_bh); + ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -4099,7 +4103,7 @@ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, blk_per_bucket - 1, &xs->bucket.bhs[1], - 0); + 0, NULL); if (ret) { mlog_errno(ret); goto out; @@ -4205,7 +4209,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); value_blk += header_bh->b_blocknr; - ret = ocfs2_read_block(inode, value_blk, &value_bh); + ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL); if (ret) { mlog_errno(ret); goto out; -- 1.5.6.5