Hi all, This patch set adds batched discard support to ocfs2. Please check. Thanks. Regards, Tao
Tao Ma
2011-Mar-07 10:05 UTC
[Ocfs2-devel] [PATCH 1/3] ocfs2: Add ocfs2_trim_fs for SSD trim support.
From: Tao Ma <boyu.mt at taobao.com> Add ocfs2_trim_fs to support trimming freed clusters in the volume. A range will be given and all the freed clusters greater than minlen will be discarded to the block layer. Signed-off-by: Tao Ma <boyu.mt at taobao.com> --- fs/ocfs2/alloc.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/alloc.h | 1 + 2 files changed, 155 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b27a0d8..6e1b3b5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -29,6 +29,7 @@ #include <linux/highmem.h> #include <linux/swap.h> #include <linux/quotaops.h> +#include <linux/blkdev.h> #include <cluster/masklog.h> @@ -7184,3 +7185,156 @@ out_commit: out: return ret; } + +static int ocfs2_trim_extent(struct super_block *sb, + struct ocfs2_group_desc *gd, + int start, int count) +{ + u64 discard; + + count = ocfs2_clusters_to_blocks(sb, count); + discard = le64_to_cpu(gd->bg_blkno) + + ocfs2_clusters_to_blocks(sb, start); + + return sb_issue_discard(sb, discard, count, GFP_NOFS, 0); +} + +static int ocfs2_trim_group(struct super_block *sb, + struct ocfs2_group_desc *gd, + int start, int max, int minbits) +{ + int ret = 0, count = 0, next; + void *bitmap = gd->bg_bitmap; + + while (start < max) { + start = ocfs2_find_next_zero_bit(bitmap, max, start); + if (start >= max) + break; + next = ocfs2_find_next_bit(bitmap, max, start); + + if ((next - start) >= minbits) { + ret = ocfs2_trim_extent(sb, gd, + start, next - start); + if (ret < 0) { + mlog_errno(ret); + break; + } + count += next - start; + } + start = next + 1; + + if (fatal_signal_pending(current)) { + count = -ERESTARTSYS; + break; + } + + if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits) + break; + } + + if (ret < 0) + count = ret; + + return count; +} + +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) +{ + struct ocfs2_super *osb = OCFS2_SB(sb); + u64 start, len, minlen, trimmed, first_group, last_group, group; + int ret, cnt, first_bit, last_bit; + struct buffer_head *main_bm_bh = NULL; + struct inode *main_bm_inode = NULL; + struct buffer_head *gd_bh = NULL; + struct ocfs2_dinode *main_bm; + struct ocfs2_group_desc *gd = NULL; + + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return -EROFS; + + start = range->start >> osb->s_clustersize_bits; + len = range->len >> osb->s_clustersize_bits; + minlen = range->minlen >> osb->s_clustersize_bits; + trimmed = 0; + + if (!len || !minlen || minlen >= osb->bitmap_cpg) + return -EINVAL; + + main_bm_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT); + if (!main_bm_inode) { + ret = -EIO; + mlog_errno(ret); + goto out; + } + + mutex_lock(&main_bm_inode->i_mutex); + + ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_mutex; + } + main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data; + + if (start >= le32_to_cpu(main_bm->i_clusters)) { + ret = -EINVAL; + mlog_errno(ret); + goto out_unlock; + } + + if (start + len > le32_to_cpu(main_bm->i_clusters)) + len = le32_to_cpu(main_bm->i_clusters) - start; + + /* Determine first and last group to examine based on start and len */ + first_group = ocfs2_which_cluster_group(main_bm_inode, start); + if (first_group == osb->first_cluster_group_blkno) + first_bit = start; + else + first_bit = start - ocfs2_blocks_to_clusters(sb, first_group); + last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1); + last_bit = osb->bitmap_cpg; + + for (group = first_group; group <= last_group;) { + if (first_bit + len >= osb->bitmap_cpg) + last_bit = osb->bitmap_cpg - first_bit; + else + last_bit = start + len; + + ret = ocfs2_read_group_descriptor(main_bm_inode, + main_bm, group, + &gd_bh); + if (ret < 0) { + mlog_errno(ret); + break; + } + + gd = (struct ocfs2_group_desc *)gd_bh->b_data; + cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); + brelse(gd_bh); + gd_bh = NULL; + if (cnt < 0) { + ret = cnt; + mlog_errno(ret); + break; + } + + trimmed += cnt; + len -= osb->bitmap_cpg - first_bit; + first_bit = 0; + if (group == osb->first_cluster_group_blkno) + group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); + else + group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); + } + range->len = trimmed * sb->s_blocksize; +out_unlock: + ocfs2_inode_unlock(main_bm_inode, 0); + brelse(main_bm_bh); +out_mutex: + mutex_unlock(&main_bm_inode->i_mutex); + iput(main_bm_inode); +out: + return ret; +} diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 3bd08a0..ca381c5 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci, struct buffer_head **leaf_bh); int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range); /* * Helper function to look at the # of clusters in an extent record. */ -- 1.6.3.GIT
From: Tao Ma <boyu.mt at taobao.com> Add the corresponding ioctl function for FITRIM. Signed-off-by: Tao Ma <boyu.mt at taobao.com> --- fs/ocfs2/ioctl.c | 24 ++++++++++++++++++++++++ 1 files changed, 24 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index d9bfa90..ed13656 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -520,6 +520,29 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EFAULT; return ocfs2_info_handle(inode, &info, 0); + case FITRIM: + { + struct super_block *sb = inode->i_sb; + struct fstrim_range range; + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&range, (struct fstrim_range *)arg, + sizeof(range))) + return -EFAULT; + + ret = ocfs2_trim_fs(sb, &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range *)arg, &range, + sizeof(range))) + return -EFAULT; + + return 0; + } default: return -ENOTTY; } @@ -547,6 +570,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case OCFS2_IOC_GROUP_EXTEND: case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: + case FITRIM: break; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, (struct reflink_arguments *)arg, -- 1.6.3.GIT
From: Tao Ma <boyu.mt at taobao.com> Add the corresponding trace event for trim. Signed-off-by: Tao Ma <boyu.mt at taobao.com> --- fs/ocfs2/alloc.c | 7 +++++++ fs/ocfs2/ocfs2_trace.h | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 6e1b3b5..da05b14 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7196,6 +7196,8 @@ static int ocfs2_trim_extent(struct super_block *sb, discard = le64_to_cpu(gd->bg_blkno) + ocfs2_clusters_to_blocks(sb, start); + trace_ocfs2_trim_extent(sb, (unsigned long long)discard, count); + return sb_issue_discard(sb, discard, count, GFP_NOFS, 0); } @@ -7206,6 +7208,9 @@ static int ocfs2_trim_group(struct super_block *sb, int ret = 0, count = 0, next; void *bitmap = gd->bg_bitmap; + trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno), + start, max, minbits); + while (start < max) { start = ocfs2_find_next_zero_bit(bitmap, max, start); if (start >= max) @@ -7287,6 +7292,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) if (start + len > le32_to_cpu(main_bm->i_clusters)) len = le32_to_cpu(main_bm->i_clusters) - start; + trace_ocfs2_trim_fs(start, len, minlen); + /* Determine first and last group to examine based on start and len */ first_group = ocfs2_which_cluster_group(main_bm_inode, start); if (first_group == osb->first_cluster_group_blkno) diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index a1dae5b..9ab22a1 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc, __entry->blkno, __entry->bit) ); +TRACE_EVENT(ocfs2_trim_extent, + TP_PROTO(struct super_block *sb, unsigned long long blk, + unsigned long long count), + TP_ARGS(sb, blk, count), + TP_STRUCT__entry( + __field(int, dev_major) + __field(int, dev_minor) + __field(unsigned long long, blk) + __field(__u64, count) + ), + TP_fast_assign( + __entry->dev_major = MAJOR(sb->s_dev); + __entry->dev_minor = MINOR(sb->s_dev); + __entry->blk = blk; + __entry->count = count; + ), + TP_printk("%d %d %llu %llu", + __entry->dev_major, __entry->dev_minor, + __entry->blk, __entry->count) +); + +DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_trim_group); + +DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs); + /* End of trace events for fs/ocfs2/alloc.c. */ /* Trace events for fs/ocfs2/localalloc.c. */ -- 1.6.3.GIT
Tao Ma
2011-Mar-08 15:26 UTC
[Ocfs2-devel] [PATCH 1/3 v2] ocfs2: Add ocfs2_trim_fs for SSD trim support.
Changelog from v1 to v2: 1. remove the check for hard ro and soft ro. 2. fix bug found by tristan. 3. if range->len = 0 return 0 instead of -EINVAL. 4. allow minlen = 0 to go ahead instead of returning -EINVAL. Regards, Tao>From 9074413a619f1af32c03c5959d66ff465643496c Mon Sep 17 00:00:00 2001From: Tao Ma <boyu.mt at taobao.com> Date: Wed, 9 Mar 2011 07:12:10 +0800 Subject: [PATCH 1/3 v2] ocfs2: Add ocfs2_trim_fs for SSD trim support. Add ocfs2_trim_fs to support trimming freed clusters in the volume. A range will be given and all the freed clusters greater than minlen will be discarded to the block layer. Signed-off-by: Tao Ma <boyu.mt at taobao.com> --- fs/ocfs2/alloc.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/alloc.h | 1 + 2 files changed, 157 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b27a0d8..0ff46d9 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -29,6 +29,7 @@ #include <linux/highmem.h> #include <linux/swap.h> #include <linux/quotaops.h> +#include <linux/blkdev.h> #include <cluster/masklog.h> @@ -7184,3 +7185,158 @@ out_commit: out: return ret; } + +static int ocfs2_trim_extent(struct super_block *sb, + struct ocfs2_group_desc *gd, + int start, int count) +{ + u64 discard; + + count = ocfs2_clusters_to_blocks(sb, count); + discard = le64_to_cpu(gd->bg_blkno) + + ocfs2_clusters_to_blocks(sb, start); + + return sb_issue_discard(sb, discard, count, GFP_NOFS, 0); +} + +static int ocfs2_trim_group(struct super_block *sb, + struct ocfs2_group_desc *gd, + int start, int max, int minbits) +{ + int ret = 0, count = 0, next; + void *bitmap = gd->bg_bitmap; + + while (start < max) { + start = ocfs2_find_next_zero_bit(bitmap, max, start); + if (start >= max) + break; + next = ocfs2_find_next_bit(bitmap, max, start); + + if ((next - start) >= minbits) { + ret = ocfs2_trim_extent(sb, gd, + start, next - start); + if (ret < 0) { + mlog_errno(ret); + break; + } + count += next - start; + } + start = next + 1; + + if (fatal_signal_pending(current)) { + count = -ERESTARTSYS; + break; + } + + if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits) + break; + } + + if (ret < 0) + count = ret; + + return count; +} + +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) +{ + struct ocfs2_super *osb = OCFS2_SB(sb); + u64 start, len, minlen, trimmed, first_group, last_group, group; + int ret, cnt, first_bit, last_bit; + struct buffer_head *main_bm_bh = NULL; + struct inode *main_bm_inode = NULL; + struct buffer_head *gd_bh = NULL; + struct ocfs2_dinode *main_bm; + struct ocfs2_group_desc *gd = NULL; + + start = range->start >> osb->s_clustersize_bits; + len = range->len >> osb->s_clustersize_bits; + minlen = range->minlen >> osb->s_clustersize_bits; + trimmed = 0; + + if (!len) { + range->len = 0; + return 0; + } + + if (minlen >= osb->bitmap_cpg) + return -EINVAL; + + main_bm_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT); + if (!main_bm_inode) { + ret = -EIO; + mlog_errno(ret); + goto out; + } + + mutex_lock(&main_bm_inode->i_mutex); + + ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_mutex; + } + main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data; + + if (start >= le32_to_cpu(main_bm->i_clusters)) { + ret = -EINVAL; + mlog_errno(ret); + goto out_unlock; + } + + if (start + len > le32_to_cpu(main_bm->i_clusters)) + len = le32_to_cpu(main_bm->i_clusters) - start; + + /* Determine first and last group to examine based on start and len */ + first_group = ocfs2_which_cluster_group(main_bm_inode, start); + if (first_group == osb->first_cluster_group_blkno) + first_bit = start; + else + first_bit = start - ocfs2_blocks_to_clusters(sb, first_group); + last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1); + last_bit = osb->bitmap_cpg; + + for (group = first_group; group <= last_group;) { + if (first_bit + len >= osb->bitmap_cpg) + last_bit = osb->bitmap_cpg; + else + last_bit = first_bit + len; + + ret = ocfs2_read_group_descriptor(main_bm_inode, + main_bm, group, + &gd_bh); + if (ret < 0) { + mlog_errno(ret); + break; + } + + gd = (struct ocfs2_group_desc *)gd_bh->b_data; + cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); + brelse(gd_bh); + gd_bh = NULL; + if (cnt < 0) { + ret = cnt; + mlog_errno(ret); + break; + } + + trimmed += cnt; + len -= osb->bitmap_cpg - first_bit; + first_bit = 0; + if (group == osb->first_cluster_group_blkno) + group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); + else + group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); + } + range->len = trimmed * sb->s_blocksize; +out_unlock: + ocfs2_inode_unlock(main_bm_inode, 0); + brelse(main_bm_bh); +out_mutex: + mutex_unlock(&main_bm_inode->i_mutex); + iput(main_bm_inode); +out: + return ret; +} diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 3bd08a0..ca381c5 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci, struct buffer_head **leaf_bh); int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); +int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range); /* * Helper function to look at the # of clusters in an extent record. */ -- 1.6.3.GIT
Possibly Parallel Threads
- Please assist -- Unable to remove '-' character from char vector--
- Please assist -- Unable to remove '-' character from char vector--
- Please assist -- Unable to remove '-' character from char vector--
- [PATCH v3 13/22] ocfs2: use little-endian bitops
- [PATCH] ocfs2: remove some pointless conditionals before kfree()