Ashish Samant
2017-Oct-12 19:12 UTC
[Ocfs2-devel] [PATCH] ocfs2: fstrim: Fix start offset of first cluster group during fstrim
The first cluster group descriptor is not stored at the start of the group but at an offset from the start. We need to take this into account while doing fstrim on the first cluster group. Otherwise we will wrongly start fstrim a few blocks after the desired start block and the range can cross over into the next cluster group and zero out the group descriptor there. This can cause filesytem corruption that cannot be fixed by fsck. Signed-off-by: Ashish Samant <ashish.samant at oracle.com> Cc: stable at vger.kernel.org --- fs/ocfs2/alloc.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index a177eae..addd7c5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7304,13 +7304,24 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, static int ocfs2_trim_extent(struct super_block *sb, struct ocfs2_group_desc *gd, - u32 start, u32 count) + u64 group, u32 start, u32 count) { u64 discard, bcount; + struct ocfs2_super *osb = OCFS2_SB(sb); bcount = ocfs2_clusters_to_blocks(sb, count); - discard = le64_to_cpu(gd->bg_blkno) + - ocfs2_clusters_to_blocks(sb, start); + discard = ocfs2_clusters_to_blocks(sb, start); + + /* + * For the first cluster group, the gd->bg_blkno is not at the start + * of the group, but at an offset from the start. If we add it while + * calculating discard for first group, we will wrongly start fstrim a + * few blocks after the desried start block and the range can cross + * over into the next cluster group. So, add it only if this is not + * the first cluster group. + */ + if (group != osb->first_cluster_group_blkno) + discard += le64_to_cpu(gd->bg_blkno); trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); @@ -7318,7 +7329,7 @@ static int ocfs2_trim_extent(struct super_block *sb, } static int ocfs2_trim_group(struct super_block *sb, - struct ocfs2_group_desc *gd, + struct ocfs2_group_desc *gd, u64 group, u32 start, u32 max, u32 minbits) { int ret = 0, count = 0, next; @@ -7337,7 +7348,7 @@ static int ocfs2_trim_group(struct super_block *sb, next = ocfs2_find_next_bit(bitmap, max, start); if ((next - start) >= minbits) { - ret = ocfs2_trim_extent(sb, gd, + ret = ocfs2_trim_extent(sb, gd, group, start, next - start); if (ret < 0) { mlog_errno(ret); @@ -7435,7 +7446,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) } gd = (struct ocfs2_group_desc *)gd_bh->b_data; - cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); + cnt = ocfs2_trim_group(sb, gd, group, + first_bit, last_bit, minlen); brelse(gd_bh); gd_bh = NULL; if (cnt < 0) { -- 1.9.1
Junxiao Bi
2017-Oct-13 00:45 UTC
[Ocfs2-devel] [PATCH] ocfs2: fstrim: Fix start offset of first cluster group during fstrim
On 10/13/2017 03:12 AM, Ashish Samant wrote:> The first cluster group descriptor is not stored at the start of the group > but at an offset from the start. We need to take this into account while > doing fstrim on the first cluster group. Otherwise we will wrongly start > fstrim a few blocks after the desired start block and the range can cross > over into the next cluster group and zero out the group descriptor there. > This can cause filesytem corruption that cannot be fixed by fsck. > > Signed-off-by: Ashish Samant <ashish.samant at oracle.com> > Cc: stable at vger.kernel.orgLooks good. Reviewed-by: Junxiao Bi <junxiao.bi at oracle.com>> --- > fs/ocfs2/alloc.c | 24 ++++++++++++++++++------ > 1 file changed, 18 insertions(+), 6 deletions(-) > > diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c > index a177eae..addd7c5 100644 > --- a/fs/ocfs2/alloc.c > +++ b/fs/ocfs2/alloc.c > @@ -7304,13 +7304,24 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, > > static int ocfs2_trim_extent(struct super_block *sb, > struct ocfs2_group_desc *gd, > - u32 start, u32 count) > + u64 group, u32 start, u32 count) > { > u64 discard, bcount; > + struct ocfs2_super *osb = OCFS2_SB(sb); > > bcount = ocfs2_clusters_to_blocks(sb, count); > - discard = le64_to_cpu(gd->bg_blkno) + > - ocfs2_clusters_to_blocks(sb, start); > + discard = ocfs2_clusters_to_blocks(sb, start); > + > + /* > + * For the first cluster group, the gd->bg_blkno is not at the start > + * of the group, but at an offset from the start. If we add it while > + * calculating discard for first group, we will wrongly start fstrim a > + * few blocks after the desried start block and the range can cross > + * over into the next cluster group. So, add it only if this is not > + * the first cluster group. > + */ > + if (group != osb->first_cluster_group_blkno) > + discard += le64_to_cpu(gd->bg_blkno); > > trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); > > @@ -7318,7 +7329,7 @@ static int ocfs2_trim_extent(struct super_block *sb, > } > > static int ocfs2_trim_group(struct super_block *sb, > - struct ocfs2_group_desc *gd, > + struct ocfs2_group_desc *gd, u64 group, > u32 start, u32 max, u32 minbits) > { > int ret = 0, count = 0, next; > @@ -7337,7 +7348,7 @@ static int ocfs2_trim_group(struct super_block *sb, > next = ocfs2_find_next_bit(bitmap, max, start); > > if ((next - start) >= minbits) { > - ret = ocfs2_trim_extent(sb, gd, > + ret = ocfs2_trim_extent(sb, gd, group, > start, next - start); > if (ret < 0) { > mlog_errno(ret); > @@ -7435,7 +7446,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) > } > > gd = (struct ocfs2_group_desc *)gd_bh->b_data; > - cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); > + cnt = ocfs2_trim_group(sb, gd, group, > + first_bit, last_bit, minlen); > brelse(gd_bh); > gd_bh = NULL; > if (cnt < 0) { >