Tristan Ye
2010-Nov-03 11:02 UTC
[Ocfs2-devel] [PATCH 1/2] Ocfs2: Add a new code 'OCFS2_INFO_FREEINODE' for o2info ioctl.
The new code is dedicated to calculate free inodes number of all inode_allocs, then return the info to userpace in terms of an array. Specially, flag 'OCFS2_INFO_FL_NON_COHERENT', manipulated by '--cluster-coherent' from userspace, is now going to be involved. setting the flag on means no cluster coherency considered, usually, userspace tools choose none-coherency strategy by default for the sake of performace. Signed-off-by: Tristan Ye <tristan.ye at oracle.com> --- fs/ocfs2/ioctl.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/ocfs2_ioctl.h | 11 ++++++ 2 files changed, 106 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7a48681..a60eaec 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -23,6 +23,7 @@ #include "ioctl.h" #include "resize.h" #include "refcounttree.h" +#include "sysfile.h" #include <linux/ext2_fs.h> @@ -299,6 +300,96 @@ bail: return status; } +int ocfs2_info_scan_inode_alloc(struct inode *inode_alloc, + struct ocfs2_info_freeinode *fi, __u32 slot) +{ + int status = 0, unlock = 0; + + struct buffer_head *bh = NULL; + struct ocfs2_dinode *dinode_alloc = NULL; + + mutex_lock(&inode_alloc->i_mutex); + + if (!(fi->ifi_req.ir_flags & OCFS2_INFO_FL_NON_COHERENT)) { + status = ocfs2_inode_lock(inode_alloc, &bh, 0); + if (status < 0) { + mlog_errno(status); + goto bail; + } + unlock = 1; + + } else { + status = ocfs2_read_inode_block(inode_alloc, &bh); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + dinode_alloc = (struct ocfs2_dinode *)bh->b_data; + + fi->ifi_stat[slot].lfi_total + le32_to_cpu(dinode_alloc->id1.bitmap1.i_total); + fi->ifi_stat[slot].lfi_free + le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) - + le32_to_cpu(dinode_alloc->id1.bitmap1.i_used); +bail: + if (unlock) + ocfs2_inode_unlock(inode_alloc, 0); + + mutex_unlock(&inode_alloc->i_mutex); + + iput(inode_alloc); + brelse(bh); + + mlog_exit(status); + return status; +} + +int ocfs2_info_handle_freeinode(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + u32 i; + int status = -EFAULT; + struct ocfs2_info_freeinode oifi; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *inode_alloc = NULL; + + if (o2info_from_user(oifi, req)) + goto bail; + + oifi.ifi_slotnum = osb->max_slots; + + for (i = 0; i < oifi.ifi_slotnum; i++) { + inode_alloc + ocfs2_get_system_file_inode(osb, + INODE_ALLOC_SYSTEM_INODE, + i); + if (!inode_alloc) { + mlog(ML_ERROR, "unable to get alloc inode in " + "slot %u\n", i); + status = -EIO; + goto bail; + } + + status = ocfs2_info_scan_inode_alloc(inode_alloc, &oifi, i); + if (status < 0) + goto bail; + } + + oifi.ifi_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oifi, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oifi, req); + + return status; +} + int ocfs2_info_handle_unknown(struct inode *inode, struct ocfs2_info_request __user *req) { @@ -370,6 +461,10 @@ int ocfs2_info_handle_request(struct inode *inode, if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) status = ocfs2_info_handle_journal_size(inode, req); break; + case OCFS2_INFO_FREEINODE: + if (oir.ir_size == sizeof(struct ocfs2_info_freeinode)) + status = ocfs2_info_handle_freeinode(inode, req); + break; default: status = ocfs2_info_handle_unknown(inode, req); break; diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index b46f39b..6b4b39a 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -142,6 +142,16 @@ struct ocfs2_info_journal_size { __u64 ij_journal_size; }; +struct ocfs2_info_freeinode { + struct ocfs2_info_request ifi_req; + struct ocfs2_info_local_freeinode { + __u64 lfi_total; + __u64 lfi_free; + } ifi_stat[OCFS2_MAX_SLOTS]; + __u32 ifi_slotnum; /* out */ + __u32 ifi_pad; +}; + /* Codes for ocfs2_info_request */ enum ocfs2_info_type { OCFS2_INFO_CLUSTERSIZE = 1, @@ -151,6 +161,7 @@ enum ocfs2_info_type { OCFS2_INFO_UUID, OCFS2_INFO_FS_FEATURES, OCFS2_INFO_JOURNAL_SIZE, + OCFS2_INFO_FREEINODE, OCFS2_INFO_NUM_TYPES }; -- 1.5.5
Tristan Ye
2010-Nov-03 11:02 UTC
[Ocfs2-devel] [PATCH 2/2] Ocfs2: Add a new code 'OCFS2_INFO_FREEFRAG' for o2info ioctl.
This new code is a bit more complicated than former ones, the goal is to show user all statistics required to take a deep insight into filesystem on how the disk is being fragmentaed. The goal is achieved by scaning global bitmap from (cluster)group to group to figure out following factors in the filesystem: - How many free chunks in a fixed size as user requested. - How many real free chunks in all size. - Min/Max/Avg size(in) clusters of free chunks. - How do free chunks distribute(in size) in terms of a histogram, just like following: --------------------------------------------------------- Extent Size Range : Free extents Free Clusters Percent 32K... 64K- : 1 1 0.00% 1M... 2M- : 9 288 0.03% 8M... 16M- : 2 831 0.09% 32M... 64M- : 1 2047 0.23% 128M... 256M- : 1 8191 0.92% 256M... 512M- : 2 21706 2.43% 512M... 1024M- : 27 858623 96.29% --------------------------------------------------------- Userspace ioctl() call eventually gets the above info returned by passing a 'struct ocfs2_info_freefrag' with the chunk_size being specified first. Signed-off-by: Tristan Ye <tristan.ye at oracle.com> --- fs/ocfs2/ioctl.c | 249 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/ocfs2_ioctl.h | 23 +++++ 2 files changed, 272 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index a60eaec..51ccf24 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -24,6 +24,8 @@ #include "resize.h" #include "refcounttree.h" #include "sysfile.h" +#include "buffer_head_io.h" +#include "suballoc.h" #include <linux/ext2_fs.h> @@ -390,6 +392,249 @@ bail: return status; } +void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg, __u32 chunksize) +{ + int index; + + index = __ilog2_u32(chunksize); + if (index >= OCFS2_INFO_MAX_HIST) + index = OCFS2_INFO_MAX_HIST - 1; + + ffg->iff_ffs.ffs_fc_hist.fc_chunks[index]++; + ffg->iff_ffs.ffs_fc_hist.fc_clusters[index] += chunksize; + + if (chunksize > ffg->iff_ffs.ffs_max) + ffg->iff_ffs.ffs_max = chunksize; + + if (chunksize < ffg->iff_ffs.ffs_min) + ffg->iff_ffs.ffs_min = chunksize; + + ffg->iff_ffs.ffs_avg += chunksize; + ffg->iff_ffs.ffs_free_chunks_real++; +} + +int ocfs2_info_freefrag_scan_chain(struct inode *gb_inode, + struct ocfs2_dinode *gb_dinode, + struct ocfs2_chain_rec *rec, + struct ocfs2_info_freefrag *ffg, + __u32 chunks_in_group) +{ + int status = 0, used; + u64 blkno; + + struct buffer_head *bh = NULL; + struct ocfs2_group_desc *bg = NULL; + + unsigned int max_bits, num_clusters; + unsigned int offset = 0, cluster, chunk; + unsigned int chunk_free, last_chunksize = 0; + + if (!le32_to_cpu(rec->c_free)) + goto bail; + + do { + if (!bg) + blkno = le64_to_cpu(rec->c_blkno); + else + blkno = le64_to_cpu(bg->bg_next_group); + + if (bh) { + brelse(bh); + bh = NULL; + } + + status = ocfs2_read_group_descriptor(gb_inode, gb_dinode, + blkno, &bh); + if (status < 0) { + mlog(ML_ERROR, "Can't read the group descriptor # " + "%llu from device.", (unsigned long long)blkno); + status = -EIO; + goto bail; + } + + bg = (struct ocfs2_group_desc *)bh->b_data; + + if (!le16_to_cpu(bg->bg_free_bits_count)) + continue; + + max_bits = le16_to_cpu(bg->bg_bits); + offset = 0; + + for (chunk = 0; chunk < chunks_in_group; chunk++) { + /* + * last chunk may be not an entire one. + */ + if ((offset + ffg->iff_chunksize) > max_bits) + num_clusters = max_bits - offset; + else + num_clusters = ffg->iff_chunksize; + + chunk_free = 0; + for (cluster = 0; cluster < num_clusters; cluster++) { + used = ocfs2_test_bit(offset, + (unsigned long *)bg->bg_bitmap); + /* + * - chunk_free counts free clusters in #N chunk. + * - last_chunksize records the size(in) clusters + * for the last real free chunk being counted. + */ + if (!used) { + last_chunksize++; + chunk_free++; + } + + if (used && last_chunksize) { + ocfs2_info_update_ffg(ffg, + last_chunksize); + last_chunksize = 0; + } + + offset++; + } + + if (chunk_free == ffg->iff_chunksize) + ffg->iff_ffs.ffs_free_chunks++; + } + + /* + * need to update the info for last free chunk. + */ + if (last_chunksize) + ocfs2_info_update_ffg(ffg, last_chunksize); + + } while (le64_to_cpu(bg->bg_next_group)); + +bail: + brelse(bh); + + mlog_exit(status); + return status; +} + +int ocfs2_info_freefrag_scan_bitmap(struct inode *gb_inode, + struct ocfs2_info_freefrag *ffg) +{ + __u32 chunks_in_group; + int status = 0, unlock = 0, i; + + struct buffer_head *bh = NULL; + struct ocfs2_chain_list *cl = NULL; + struct ocfs2_chain_rec *rec = NULL; + struct ocfs2_dinode *gb_dinode = NULL; + + mutex_lock(&gb_inode->i_mutex); + + if (!(ffg->iff_req.ir_flags & OCFS2_INFO_FL_NON_COHERENT)) { + status = ocfs2_inode_lock(gb_inode, &bh, 0); + if (status < 0) { + mlog_errno(status); + goto bail; + } + unlock = 1; + } else { + status = ocfs2_read_inode_block(gb_inode, &bh); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + gb_dinode = (struct ocfs2_dinode *)bh->b_data; + cl = &(gb_dinode->id2.i_chain); + + /* + * Chunksize(in) clusters from userspace should be + * less than clusters in a group. + */ + if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) { + status = -EINVAL; + goto bail; + } + + memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats)); + + ffg->iff_ffs.ffs_min = ~0U; + ffg->iff_ffs.ffs_clusters + le32_to_cpu(gb_dinode->id1.bitmap1.i_total); + ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters - + le32_to_cpu(gb_dinode->id1.bitmap1.i_used); + + chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1; + + for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { + rec = &(cl->cl_recs[i]); + status = ocfs2_info_freefrag_scan_chain(gb_inode, gb_dinode, + rec, ffg, + chunks_in_group); + if (status) + goto bail; + } + + if (ffg->iff_ffs.ffs_free_chunks_real) + ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg / + ffg->iff_ffs.ffs_free_chunks_real); +bail: + if (unlock) + ocfs2_inode_unlock(gb_inode, 0); + + mutex_unlock(&gb_inode->i_mutex); + + iput(gb_inode); + brelse(bh); + + mlog_exit(status); + return status; +} + +int ocfs2_info_handle_freefrag(struct inode *inode, + struct ocfs2_info_request __user *req) +{ + int status = -EFAULT; + + struct ocfs2_info_freefrag oiff; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *gb_inode = NULL; + + if (o2info_from_user(oiff, req)) + goto bail; + /* + * chunksize from userspace should be power of 2. + */ + if ((oiff.iff_chunksize & (oiff.iff_chunksize - 1)) || + (!oiff.iff_chunksize)) { + status = -EINVAL; + goto bail; + } + + memset(&oiff.iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats)); + oiff.iff_ffs.ffs_min = ~0U; + + gb_inode = ocfs2_get_system_file_inode(osb, + GLOBAL_BITMAP_SYSTEM_INODE, + OCFS2_INVALID_SLOT); + if (!gb_inode) { + mlog(ML_ERROR, "unable to get global_bitmap inode\n"); + status = -EIO; + goto bail; + } + + status = ocfs2_info_freefrag_scan_bitmap(gb_inode, &oiff); + if (status < 0) + goto bail; + + oiff.iff_req.ir_flags |= OCFS2_INFO_FL_FILLED; + + if (o2info_to_user(oiff, req)) + goto bail; + + status = 0; +bail: + if (status) + o2info_set_request_error(oiff, req); + + return status; +} + int ocfs2_info_handle_unknown(struct inode *inode, struct ocfs2_info_request __user *req) { @@ -465,6 +710,10 @@ int ocfs2_info_handle_request(struct inode *inode, if (oir.ir_size == sizeof(struct ocfs2_info_freeinode)) status = ocfs2_info_handle_freeinode(inode, req); break; + case OCFS2_INFO_FREEFRAG: + if (oir.ir_size == sizeof(struct ocfs2_info_freefrag)) + status = ocfs2_info_handle_freefrag(inode, req); + break; default: status = ocfs2_info_handle_unknown(inode, req); break; diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 6b4b39a..18b6770 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -152,6 +152,28 @@ struct ocfs2_info_freeinode { __u32 ifi_pad; }; +#define OCFS2_INFO_MAX_HIST (32) + +struct ocfs2_info_freefrag { + struct ocfs2_info_request iff_req; + struct ocfs2_info_freefrag_stats { /* (out) */ + struct ocfs2_info_free_chunk_list { + __u32 fc_chunks[OCFS2_INFO_MAX_HIST]; + __u32 fc_clusters[OCFS2_INFO_MAX_HIST]; + } ffs_fc_hist; + __u32 ffs_clusters; + __u32 ffs_free_clusters; + __u32 ffs_free_chunks; + __u32 ffs_free_chunks_real; + __u32 ffs_min; /* Minimum free chunksize in clusters */ + __u32 ffs_max; + __u32 ffs_avg; + __u32 ffs_pad; + } iff_ffs; + __u32 iff_chunksize; /* chunksize in clusters(in) */ + __u32 iff_pad; +}; + /* Codes for ocfs2_info_request */ enum ocfs2_info_type { OCFS2_INFO_CLUSTERSIZE = 1, @@ -162,6 +184,7 @@ enum ocfs2_info_type { OCFS2_INFO_FS_FEATURES, OCFS2_INFO_JOURNAL_SIZE, OCFS2_INFO_FREEINODE, + OCFS2_INFO_FREEFRAG, OCFS2_INFO_NUM_TYPES }; -- 1.5.5
Joel Becker
2010-Nov-04 01:18 UTC
[Ocfs2-devel] [PATCH 1/2] Ocfs2: Add a new code 'OCFS2_INFO_FREEINODE' for o2info ioctl.
On Wed, Nov 03, 2010 at 07:02:05PM +0800, Tristan Ye wrote:> The new code is dedicated to calculate free inodes number of all inode_allocs, > then return the info to userpace in terms of an array. > > Specially, flag 'OCFS2_INFO_FL_NON_COHERENT', manipulated by '--cluster-coherent' > from userspace, is now going to be involved. setting the flag on means no cluster > coherency considered, usually, userspace tools choose none-coherency strategy by > default for the sake of performace.This looks pretty straightforward. Note that any non-cached allocator is going to lock, regardless of the coherency flag. Do we want to use ocfs2_ilookup() instead?> +int ocfs2_info_scan_inode_alloc(struct inode *inode_alloc, > + struct ocfs2_info_freeinode *fi, __u32 slot) > +{ > + int status = 0, unlock = 0; > + > + struct buffer_head *bh = NULL; > + struct ocfs2_dinode *dinode_alloc = NULL; > + > + mutex_lock(&inode_alloc->i_mutex); > + > + if (!(fi->ifi_req.ir_flags & OCFS2_INFO_FL_NON_COHERENT)) {Also, I'm thinking that this would look much better as: if (!ocfs2_info_coherent(&fi->ifi_req)) { That implies we probably also want ocfs2_info_set_filled(request), etc. Good, bad? Joel -- The Graham Corollary: The longer a socially-moderated news website exists, the probability of an old Paul Graham link appearing at the top approaches certainty. Joel Becker Senior Development Manager Oracle E-mail: joel.becker at oracle.com Phone: (650) 506-8127