Tao Ma
2007-Nov-16 00:42 UTC
[Ocfs2-devel] [PATCH 0/6] Add online resize for ocfs2-tools,take 1
Add online resize in tunefs.ocfs2 so that user can increase the volume when it is mounted.
Tao Ma
2007-Nov-16 00:50 UTC
[Ocfs2-devel] [PATCH 1/6] Move resize function out of tunefs.c, take 1
We will add online resize in tunefs.ocfs2, and it will be complicated. so add a new file resize.c in it and move all resize function out of tunefs.c. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- tunefs.ocfs2/Makefile | 2 tunefs.ocfs2/resize.c | 320 +++++++++++++++++++++++++++++++++++++++++++++++++ tunefs.ocfs2/tunefs.c | 295 --------------------------------------------- tunefs.ocfs2/tunefs.h | 6 + 4 files changed, 328 insertions(+), 295 deletions(-) create mode 100644 tunefs.ocfs2/resize.c 755feb64b82a2c84291e713ccde2be2a5ea31f18 diff --git a/tunefs.ocfs2/Makefile b/tunefs.ocfs2/Makefile index e6b4427..be68fc8 100644 --- a/tunefs.ocfs2/Makefile +++ b/tunefs.ocfs2/Makefile @@ -31,7 +31,7 @@ DEFINES = -DOCFS2_FLAT_INCLUDES -DVERSIO MANS = tunefs.ocfs2.8 -CFILES = tunefs.c query.c remove_slot.c sparse_file.c features.c +CFILES = tunefs.c query.c remove_slot.c sparse_file.c features.c resize.c HFILES = tunefs.h OBJS = $(subst .c,.o,$(CFILES)) diff --git a/tunefs.ocfs2/resize.c b/tunefs.ocfs2/resize.c new file mode 100644 index 0000000..be5ea6b --- /dev/null +++ b/tunefs.ocfs2/resize.c @@ -0,0 +1,320 @@ +/* + * resize.c + * + * tunefs utility for online and offline resize. + * + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + */ + +#include <tunefs.h> + +extern ocfs2_tune_opts opts; + +void get_vol_size(ocfs2_filesys *fs) +{ + errcode_t ret = 0; + uint64_t num_blocks; + + ret = ocfs2_get_device_size(opts.device, fs->fs_blocksize, + &num_blocks); + if (ret) { + com_err(opts.progname, ret, "while getting size of device %s", + opts.device); + exit(1); + } + + if (!opts.num_blocks) + opts.num_blocks = num_blocks; + + if (opts.num_blocks > num_blocks) { + com_err(opts.progname, 0, "The containing partition (or device) " + "is only %"PRIu64" blocks", num_blocks); + exit(1); + } + + return ; +} + +int validate_vol_size(ocfs2_filesys *fs) +{ + uint64_t num_blocks; + + if (opts.num_blocks == fs->fs_blocks) { + com_err(opts.progname, 0, "The filesystem is already " + "%"PRIu64" blocks", fs->fs_blocks); + return -1; + } + + if (opts.num_blocks < fs->fs_blocks) { + com_err(opts.progname, 0, "Cannot shrink volume size from " + "%"PRIu64" blocks to %"PRIu64" blocks", + fs->fs_blocks, opts.num_blocks); + return -1; + } + + num_blocks = ocfs2_clusters_to_blocks(fs, 1); + if (num_blocks > (opts.num_blocks - fs->fs_blocks)) { + com_err(opts.progname, 0, "Cannot grow volume size less than " + "%d blocks", num_blocks); + return -1; + } + + if (opts.num_blocks > UINT32_MAX) { + com_err(opts.progname, 0, "As JBD can only store block numbers " + "in 32 bits, %s cannot be grown to more than %"PRIu64" " + "blocks.", opts.device, UINT32_MAX); + return -1; + } + + return 0; +} + +errcode_t update_volume_size(ocfs2_filesys *fs, int *changed) +{ + errcode_t ret = 0; + struct ocfs2_dinode *di; + uint64_t bm_blkno = 0; + uint64_t gd_blkno = 0; + uint64_t lgd_blkno = 0; + char *in_buf = NULL; + char *gd_buf = NULL; + char *lgd_buf = NULL; + struct ocfs2_chain_list *cl; + struct ocfs2_chain_rec *cr; + struct ocfs2_group_desc *gd; + uint32_t cluster_chunk; + uint32_t num_new_clusters, save_new_clusters; + uint32_t first_new_cluster; + uint16_t chain; + uint32_t used_bits; + uint32_t total_bits; + uint32_t num_bits; + int flush_lgd = 0; + char *zero_buf = NULL; + + ret = ocfs2_malloc_block(fs->fs_io, &in_buf); + if (ret) { + com_err(opts.progname, ret, "while allocating a block during " + "volume resize"); + goto bail; + } + + ret = ocfs2_malloc_block(fs->fs_io, &gd_buf); + if (ret) { + com_err(opts.progname, ret, "while allocating a block during " + "volume resize"); + goto bail; + } + + ret = ocfs2_malloc_block(fs->fs_io, &lgd_buf); + if (ret) { + com_err(opts.progname, ret, "while allocating a block during " + "volume resize"); + goto bail; + } + + ret = ocfs2_malloc_blocks(fs->fs_io, ocfs2_clusters_to_blocks(fs, 1), + &zero_buf); + if (ret) { + com_err(opts.progname, ret, "while allocating a cluster during " + "volume resize"); + goto bail; + } + + memset(zero_buf, 0, fs->fs_clustersize); + + /* read global bitmap */ + ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE, 0, + &bm_blkno); + if (ret) { + com_err(opts.progname, ret, "while looking up global bitmap " + "inode during volume resize"); + goto bail; + } + + ret = ocfs2_read_inode(fs, bm_blkno, in_buf); + if (ret) { + com_err(opts.progname, ret, "while reading inode at block " + "%"PRIu64" during volume resize", bm_blkno); + goto bail; + } + + di = (struct ocfs2_dinode *)in_buf; + cl = &(di->id2.i_chain); + + total_bits = di->id1.bitmap1.i_total; + used_bits = di->id1.bitmap1.i_used; + + first_new_cluster = di->i_clusters; + save_new_clusters = num_new_clusters + ocfs2_blocks_to_clusters(fs, opts.num_blocks) - di->i_clusters; + + /* Find the blknum of the last cluster group */ + lgd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, first_new_cluster - 1); + + ret = ocfs2_read_group_desc(fs, lgd_blkno, lgd_buf); + if (ret) { + com_err(opts.progname, ret, "while reading group descriptor " + "at block %"PRIu64" during volume resize", lgd_blkno); + goto bail; + } + + gd = (struct ocfs2_group_desc *)lgd_buf; + + /* If only one cluster group then see if we need to adjust up cl_cpg */ + if (cl->cl_next_free_rec == 1) { + if (cl->cl_cpg < 8 * gd->bg_size) + cl->cl_cpg = 8 * gd->bg_size; + } + + chain = gd->bg_chain; + + /* If possible round off the last group to cpg */ + cluster_chunk = MIN(num_new_clusters, + (cl->cl_cpg - (gd->bg_bits/cl->cl_bpc))); + if (cluster_chunk) { + num_new_clusters -= cluster_chunk; + first_new_cluster += cluster_chunk; + + num_bits = cluster_chunk * cl->cl_bpc; + + gd->bg_bits += num_bits; + gd->bg_free_bits_count += num_bits; + + cr = &(cl->cl_recs[chain]); + cr->c_total += num_bits; + cr->c_free += num_bits; + + total_bits += num_bits; + + fs->fs_clusters += cluster_chunk; + fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk); + + /* This cluster group block is written after the new */ + /* cluster groups are written to disk */ + flush_lgd = 1; + } + + /* Init the new groups and write to disk */ + /* Add these groups one by one starting from the first chain after */ + /* the one containing the last group */ + + gd = (struct ocfs2_group_desc *)gd_buf; + + while(num_new_clusters) { + gd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, + first_new_cluster); + cluster_chunk = MIN(num_new_clusters, cl->cl_cpg); + num_new_clusters -= cluster_chunk; + first_new_cluster += cluster_chunk; + + if (++chain >= cl->cl_count) + chain = 0; + + ocfs2_init_group_desc(fs, gd, gd_blkno, + fs->fs_super->i_fs_generation, di->i_blkno, + (cluster_chunk *cl->cl_bpc), chain); + + /* Add group to chain */ + cr = &(cl->cl_recs[chain]); + if (chain >= cl->cl_next_free_rec) { + cl->cl_next_free_rec++; + cr->c_free = 0; + cr->c_total = 0; + cr->c_blkno = 0; + } + + gd->bg_next_group = cr->c_blkno; + cr->c_blkno = gd_blkno; + cr->c_free += gd->bg_free_bits_count; + cr->c_total += gd->bg_bits; + + used_bits += (gd->bg_bits - gd->bg_free_bits_count); + total_bits += gd->bg_bits; + + fs->fs_clusters += cluster_chunk; + fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk); + + /* Initialize the first cluster in the group */ + ret = io_write_block(fs->fs_io, gd_blkno, + ocfs2_clusters_to_blocks(fs, 1), zero_buf); + if (ret) { + com_err(opts.progname, ret, "while initializing the " + "cluster starting at block %"PRIu64" during " + "volume resize", gd_blkno); + goto bail; + } + + /* write a new group descriptor */ + ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf); + if (ret) { + com_err(opts.progname, ret, "while writing group " + "descriptor at block %"PRIu64" during " + "volume resize", gd_blkno); + goto bail; + } + } + + di->id1.bitmap1.i_total = total_bits; + di->id1.bitmap1.i_used = used_bits; + + di->i_clusters += save_new_clusters; + di->i_size = (uint64_t) di->i_clusters * fs->fs_clustersize; + + fs->fs_super->i_clusters = di->i_clusters; + + block_signals(SIG_BLOCK); + /* Flush that last group descriptor we updated before the new ones */ + if (flush_lgd) { + ret = ocfs2_write_group_desc(fs, lgd_blkno, lgd_buf); + if (ret) { + block_signals(SIG_UNBLOCK); + com_err(opts.progname, ret, "while flushing group " + "descriptor at block %"PRIu64" during " + "volume resize", lgd_blkno); + goto bail; + } + } + + /* write the global bitmap inode */ + ret = ocfs2_write_inode(fs, bm_blkno, in_buf); + if (ret) { + block_signals(SIG_UNBLOCK); + com_err(opts.progname, ret, "while writing global bitmap " + "inode at block %"PRIu64" during volume resize", + bm_blkno); + goto bail; + } + + block_signals(SIG_UNBLOCK); + + *changed = 1; + +bail: + if (zero_buf) + ocfs2_free(&zero_buf); + if (in_buf) + ocfs2_free(&in_buf); + if (gd_buf) + ocfs2_free(&gd_buf); + if (lgd_buf) + ocfs2_free(&lgd_buf); + + return ret; +} diff --git a/tunefs.ocfs2/tunefs.c b/tunefs.ocfs2/tunefs.c index fbcee4a..207431c 100644 --- a/tunefs.ocfs2/tunefs.c +++ b/tunefs.ocfs2/tunefs.c @@ -66,7 +66,7 @@ static void handle_signal(int sig) } /* Call this with SIG_BLOCK to block and SIG_UNBLOCK to unblock */ -static void block_signals(int how) +void block_signals(int how) { sigset_t sigs; @@ -408,31 +408,6 @@ static void get_options(int argc, char * return ; } -static void get_vol_size(ocfs2_filesys *fs) -{ - errcode_t ret = 0; - uint64_t num_blocks; - - ret = ocfs2_get_device_size(opts.device, fs->fs_blocksize, - &num_blocks); - if (ret) { - com_err(opts.progname, ret, "while getting size of device %s", - opts.device); - exit(1); - } - - if (!opts.num_blocks) - opts.num_blocks = num_blocks; - - if (opts.num_blocks > num_blocks) { - com_err(opts.progname, 0, "The containing partition (or device) " - "is only %"PRIu64" blocks", num_blocks); - exit(1); - } - - return ; -} - static int validate_mount_change(ocfs2_filesys *fs) { if (opts.mount == MOUNT_LOCAL) { @@ -446,40 +421,6 @@ static int validate_mount_change(ocfs2_f return -1; } -static int validate_vol_size(ocfs2_filesys *fs) -{ - uint64_t num_blocks; - - if (opts.num_blocks == fs->fs_blocks) { - com_err(opts.progname, 0, "The filesystem is already " - "%"PRIu64" blocks", fs->fs_blocks); - return -1; - } - - if (opts.num_blocks < fs->fs_blocks) { - com_err(opts.progname, 0, "Cannot shrink volume size from " - "%"PRIu64" blocks to %"PRIu64" blocks", - fs->fs_blocks, opts.num_blocks); - return -1; - } - - num_blocks = ocfs2_clusters_to_blocks(fs, 1); - if (num_blocks > (opts.num_blocks - fs->fs_blocks)) { - com_err(opts.progname, 0, "Cannot grow volume size less than " - "%d blocks", num_blocks); - return -1; - } - - if (opts.num_blocks > UINT32_MAX) { - com_err(opts.progname, 0, "As JBD can only store block numbers " - "in 32 bits, %s cannot be grown to more than %"PRIu64" " - "blocks.", opts.device, UINT32_MAX); - return -1; - } - - return 0; -} - static errcode_t add_slots(ocfs2_filesys *fs) { errcode_t ret = 0; @@ -994,240 +935,6 @@ bail: return ret; } -static errcode_t update_volume_size(ocfs2_filesys *fs, int *changed) -{ - errcode_t ret = 0; - struct ocfs2_dinode *di; - uint64_t bm_blkno = 0; - uint64_t gd_blkno = 0; - uint64_t lgd_blkno = 0; - char *in_buf = NULL; - char *gd_buf = NULL; - char *lgd_buf = NULL; - struct ocfs2_chain_list *cl; - struct ocfs2_chain_rec *cr; - struct ocfs2_group_desc *gd; - uint32_t cluster_chunk; - uint32_t num_new_clusters, save_new_clusters; - uint32_t first_new_cluster; - uint16_t chain; - uint32_t used_bits; - uint32_t total_bits; - uint32_t num_bits; - int flush_lgd = 0; - char *zero_buf = NULL; - - ret = ocfs2_malloc_block(fs->fs_io, &in_buf); - if (ret) { - com_err(opts.progname, ret, "while allocating a block during " - "volume resize"); - goto bail; - } - - ret = ocfs2_malloc_block(fs->fs_io, &gd_buf); - if (ret) { - com_err(opts.progname, ret, "while allocating a block during " - "volume resize"); - goto bail; - } - - ret = ocfs2_malloc_block(fs->fs_io, &lgd_buf); - if (ret) { - com_err(opts.progname, ret, "while allocating a block during " - "volume resize"); - goto bail; - } - - ret = ocfs2_malloc_blocks(fs->fs_io, ocfs2_clusters_to_blocks(fs, 1), - &zero_buf); - if (ret) { - com_err(opts.progname, ret, "while allocating a cluster during " - "volume resize"); - goto bail; - } - - memset(zero_buf, 0, fs->fs_clustersize); - - /* read global bitmap */ - ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE, 0, - &bm_blkno); - if (ret) { - com_err(opts.progname, ret, "while looking up global bitmap " - "inode during volume resize"); - goto bail; - } - - ret = ocfs2_read_inode(fs, bm_blkno, in_buf); - if (ret) { - com_err(opts.progname, ret, "while reading inode at block " - "%"PRIu64" during volume resize", bm_blkno); - goto bail; - } - - di = (struct ocfs2_dinode *)in_buf; - cl = &(di->id2.i_chain); - - total_bits = di->id1.bitmap1.i_total; - used_bits = di->id1.bitmap1.i_used; - - first_new_cluster = di->i_clusters; - save_new_clusters = num_new_clusters - ocfs2_blocks_to_clusters(fs, opts.num_blocks) - di->i_clusters; - - /* Find the blknum of the last cluster group */ - lgd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, first_new_cluster - 1); - - ret = ocfs2_read_group_desc(fs, lgd_blkno, lgd_buf); - if (ret) { - com_err(opts.progname, ret, "while reading group descriptor " - "at block %"PRIu64" during volume resize", lgd_blkno); - goto bail; - } - - gd = (struct ocfs2_group_desc *)lgd_buf; - - /* If only one cluster group then see if we need to adjust up cl_cpg */ - if (cl->cl_next_free_rec == 1) { - if (cl->cl_cpg < 8 * gd->bg_size) - cl->cl_cpg = 8 * gd->bg_size; - } - - chain = gd->bg_chain; - - /* If possible round off the last group to cpg */ - cluster_chunk = MIN(num_new_clusters, - (cl->cl_cpg - (gd->bg_bits/cl->cl_bpc))); - if (cluster_chunk) { - num_new_clusters -= cluster_chunk; - first_new_cluster += cluster_chunk; - - num_bits = cluster_chunk * cl->cl_bpc; - - gd->bg_bits += num_bits; - gd->bg_free_bits_count += num_bits; - - cr = &(cl->cl_recs[chain]); - cr->c_total += num_bits; - cr->c_free += num_bits; - - total_bits += num_bits; - - fs->fs_clusters += cluster_chunk; - fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk); - - /* This cluster group block is written after the new */ - /* cluster groups are written to disk */ - flush_lgd = 1; - } - - /* Init the new groups and write to disk */ - /* Add these groups one by one starting from the first chain after */ - /* the one containing the last group */ - - gd = (struct ocfs2_group_desc *)gd_buf; - - while(num_new_clusters) { - gd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, - first_new_cluster); - cluster_chunk = MIN(num_new_clusters, cl->cl_cpg); - num_new_clusters -= cluster_chunk; - first_new_cluster += cluster_chunk; - - if (++chain >= cl->cl_count) - chain = 0; - - ocfs2_init_group_desc(fs, gd, gd_blkno, - fs->fs_super->i_fs_generation, di->i_blkno, - (cluster_chunk *cl->cl_bpc), chain); - - /* Add group to chain */ - cr = &(cl->cl_recs[chain]); - if (chain >= cl->cl_next_free_rec) { - cl->cl_next_free_rec++; - cr->c_free = 0; - cr->c_total = 0; - cr->c_blkno = 0; - } - - gd->bg_next_group = cr->c_blkno; - cr->c_blkno = gd_blkno; - cr->c_free += gd->bg_free_bits_count; - cr->c_total += gd->bg_bits; - - used_bits += (gd->bg_bits - gd->bg_free_bits_count); - total_bits += gd->bg_bits; - - fs->fs_clusters += cluster_chunk; - fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk); - - /* Initialize the first cluster in the group */ - ret = io_write_block(fs->fs_io, gd_blkno, - ocfs2_clusters_to_blocks(fs, 1), zero_buf); - if (ret) { - com_err(opts.progname, ret, "while initializing the " - "cluster starting at block %"PRIu64" during " - "volume resize", gd_blkno); - goto bail; - } - - /* write a new group descriptor */ - ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf); - if (ret) { - com_err(opts.progname, ret, "while writing group " - "descriptor at block %"PRIu64" during " - "volume resize", gd_blkno); - goto bail; - } - } - - di->id1.bitmap1.i_total = total_bits; - di->id1.bitmap1.i_used = used_bits; - - di->i_clusters += save_new_clusters; - di->i_size = (uint64_t) di->i_clusters * fs->fs_clustersize; - - fs->fs_super->i_clusters = di->i_clusters; - - block_signals(SIG_BLOCK); - /* Flush that last group descriptor we updated before the new ones */ - if (flush_lgd) { - ret = ocfs2_write_group_desc(fs, lgd_blkno, lgd_buf); - if (ret) { - block_signals(SIG_UNBLOCK); - com_err(opts.progname, ret, "while flushing group " - "descriptor at block %"PRIu64" during " - "volume resize", lgd_blkno); - goto bail; - } - } - - /* write the global bitmap inode */ - ret = ocfs2_write_inode(fs, bm_blkno, in_buf); - if (ret) { - block_signals(SIG_UNBLOCK); - com_err(opts.progname, ret, "while writing global bitmap " - "inode at block %"PRIu64" during volume resize", - bm_blkno); - goto bail; - } - - block_signals(SIG_UNBLOCK); - - *changed = 1; - -bail: - if (zero_buf) - ocfs2_free(&zero_buf); - if (in_buf) - ocfs2_free(&in_buf); - if (gd_buf) - ocfs2_free(&gd_buf); - if (lgd_buf) - ocfs2_free(&lgd_buf); - - return ret; -} - static errcode_t refresh_backup_super(ocfs2_filesys *fs) { errcode_t ret; diff --git a/tunefs.ocfs2/tunefs.h b/tunefs.ocfs2/tunefs.h index 566d746..3863331 100644 --- a/tunefs.ocfs2/tunefs.h +++ b/tunefs.ocfs2/tunefs.h @@ -98,6 +98,8 @@ typedef struct _ocfs2_tune_opts { time_t tune_time; } ocfs2_tune_opts; +void block_signals(int how); + void print_query(char *queryfmt); errcode_t remove_slots(ocfs2_filesys *fs); @@ -113,4 +115,8 @@ void free_clear_ctxt(void); errcode_t feature_check(ocfs2_filesys *fs); errcode_t update_feature(ocfs2_filesys *fs); + +void get_vol_size(ocfs2_filesys *fs); +errcode_t update_volume_size(ocfs2_filesys *fs, int *changed); +int validate_vol_size(ocfs2_filesys *fs); #endif /* _TUNEFS_H */ -- 1.3.3
Tao Ma
2007-Nov-16 00:50 UTC
[Ocfs2-devel] [PATCH 3/6] Abstract checking and validating process in tunefs.ocfs2,take 1
In the main function of tunefs.ocfs2, there are a large number of checking and validating process. They are seperated and can be moved out of "main" so that future online resize can skip these check easily. So add 2 new function, volume_check and validate_parameter to organize the complicated process. There are also 2 minor changes in this patch: 1. the print of "Adding backup superblock for the volume" are moved to validate process as others. 2. remove_slot_check is moved into function volume_check as it does the check work while it was in validate process. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- tunefs.ocfs2/tunefs.c | 267 +++++++++++++++++++++++++++---------------------- 1 files changed, 147 insertions(+), 120 deletions(-) 6e284ab75718e8e55ffb03a262e8a32a5a807a6b diff --git a/tunefs.ocfs2/tunefs.c b/tunefs.ocfs2/tunefs.c index 207431c..cea9eb1 100644 --- a/tunefs.ocfs2/tunefs.c +++ b/tunefs.ocfs2/tunefs.c @@ -30,6 +30,9 @@ ocfs2_tune_opts opts; ocfs2_filesys *fs_gbl = NULL; static int cluster_locked = 0; static int resize = 0; +static uint64_t def_jrnl_size = 0; +static char old_uuid[OCFS2_VOL_UUID_LEN * 2 + 1]; +static char new_uuid[OCFS2_VOL_UUID_LEN * 2 + 1]; static void usage(const char *progname) { @@ -997,6 +1000,145 @@ static void free_opts(void) free(opts.device); } +static errcode_t volume_check(ocfs2_filesys *fs) +{ + errcode_t ret; + int dirty = 0; + uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; + + ret = journal_check(fs, &dirty, &def_jrnl_size); + if (ret || dirty) + goto bail; + + ret = 1; + if (opts.list_sparse) { + if (!ocfs2_sparse_alloc(OCFS2_RAW_SB(fs->fs_super))) { + com_err(opts.progname, 0, + "sparse_file flag check failed. "); + goto bail; + } + printf("List all the sparse files in the volume\n"); + } + + if (opts.feature_string) { + if (feature_check(fs)) { + com_err(opts.progname, 0, + "feature check failed. "); + goto bail; + } + printf("Modify feature \"%s\" for the volume\n", + opts.feature_string); + } + + /* If operation requires touching the global bitmap, ensure it is good */ + /* This is to handle failed resize */ + if (opts.num_blocks || opts.num_slots || opts.jrnl_size || + opts.backup_super) { + if (global_bitmap_check(fs)) { + com_err(opts.progname, 0, "Global bitmap check failed. " + "Run fsck.ocfs2 -f <device>."); + goto bail; + } + } + + /* check whether the block for backup superblock are used. */ + if (opts.backup_super) { + if (backup_super_check(fs)) + goto bail; + } + + /* remove slot check. */ + if (opts.num_slots && opts.num_slots < max_slots) { + ret = remove_slot_check(fs); + if (ret) { + com_err(opts.progname, 0, + "remove slot check failed. "); + goto bail; + } + } + + ret = 0; +bail: + return ret; +} + +static void validate_parameter(ocfs2_filesys *fs) +{ + uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; + uint64_t num_clusters; + char *tmpstr; + + /* valid backup super. */ + if (opts.backup_super) + printf("Adding backup superblock for the volume\n"); + + /* validate volume label */ + if (opts.vol_label) { + printf("Changing volume label from %s to %s\n", + OCFS2_RAW_SB(fs->fs_super)->s_label, opts.vol_label); + } + + /* validate volume uuid */ + if (opts.vol_uuid) { + uuid_unparse(OCFS2_RAW_SB(fs->fs_super)->s_uuid, old_uuid); + uuid_unparse(opts.vol_uuid, new_uuid); + printf("Changing volume uuid from %s to %s\n", old_uuid, new_uuid); + } + + /* validate mount type */ + if (opts.mount) { + if (!validate_mount_change(fs)) { + if (opts.mount == MOUNT_LOCAL) + tmpstr = MOUNT_LOCAL_STR; + else + tmpstr = MOUNT_CLUSTER_STR; + printf("Changing mount type to %s\n", tmpstr); + } else + opts.mount = 0; + } + + /* validate num slots */ + max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; + if (opts.num_slots) { + if (opts.num_slots > max_slots) { + if (!opts.jrnl_size) + opts.jrnl_size = def_jrnl_size; + + } else if (opts.num_slots == max_slots) { + printf("Giving the same number of nodes. " + "Ignore the change of slots."); + opts.num_slots = 0; + } + + if (opts.num_slots) + printf("Changing number of node slots from %d to %d\n", + max_slots, opts.num_slots); + } + + /* validate journal size */ + if (opts.jrnl_size) { + num_clusters = (opts.jrnl_size + fs->fs_clustersize - 1) >> + OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; + + opts.jrnl_size = num_clusters << + OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; + + if (opts.jrnl_size != def_jrnl_size) + printf("Changing journal size %"PRIu64" to %"PRIu64"\n", + def_jrnl_size, opts.jrnl_size); + } + + /* validate volume size */ + if (opts.num_blocks) { + if (validate_vol_size(fs)) + opts.num_blocks = 0; + else + printf("Changing volume size from %"PRIu64" blocks to " + "%"PRIu64" blocks\n", fs->fs_blocks, + opts.num_blocks); + } +} + int main(int argc, char **argv) { errcode_t ret = 0; @@ -1011,14 +1153,8 @@ int main(int argc, char **argv) int upd_incompat = 0; int upd_backup_super = 0; int upd_feature = 0; - char *tmpstr; uint16_t max_slots; - uint64_t def_jrnl_size = 0; - uint64_t num_clusters; uint64_t old_blocks = 0; - int dirty = 0; - char old_uuid[OCFS2_VOL_UUID_LEN * 2 + 1]; - char new_uuid[OCFS2_VOL_UUID_LEN * 2 + 1]; initialize_ocfs_error_table(); initialize_o2dl_error_table(); @@ -1116,121 +1252,11 @@ int main(int argc, char **argv) */ io_init_cache(fs->fs_io, ocfs2_extent_recs_per_eb(fs->fs_blocksize)); - ret = journal_check(fs, &dirty, &def_jrnl_size); - if (ret || dirty) - goto unlock; - - if (opts.list_sparse) { - if (!ocfs2_sparse_alloc(OCFS2_RAW_SB(fs->fs_super))) { - com_err(opts.progname, 0, - "sparse_file flag check failed. "); - goto unlock; - } - printf("List all the sparse files in the volume\n"); - } - - if (opts.feature_string) { - if (feature_check(fs)) { - com_err(opts.progname, 0, - "feature check failed. "); - goto unlock; - } - printf("Modify feature \"%s\" for the volume\n", - opts.feature_string); - } - - /* If operation requires touching the global bitmap, ensure it is good */ - /* This is to handle failed resize */ - if (opts.num_blocks || opts.num_slots || opts.jrnl_size || - opts.backup_super) { - if (global_bitmap_check(fs)) { - com_err(opts.progname, 0, "Global bitmap check failed. " - "Run fsck.ocfs2 -f <device>."); - goto unlock; - } - } - - /* check whether the block for backup superblock are used. */ - if (opts.backup_super) { - if (backup_super_check(fs)) - goto unlock; - else - printf("Adding backup superblock for the volume\n"); - } - - /* validate volume label */ - if (opts.vol_label) { - printf("Changing volume label from %s to %s\n", - OCFS2_RAW_SB(fs->fs_super)->s_label, opts.vol_label); - } - - /* validate volume uuid */ - if (opts.vol_uuid) { - uuid_unparse(OCFS2_RAW_SB(fs->fs_super)->s_uuid, old_uuid); - uuid_unparse(opts.vol_uuid, new_uuid); - printf("Changing volume uuid from %s to %s\n", old_uuid, new_uuid); - } - - /* validate mount type */ - if (opts.mount) { - if (!validate_mount_change(fs)) { - if (opts.mount == MOUNT_LOCAL) - tmpstr = MOUNT_LOCAL_STR; - else - tmpstr = MOUNT_CLUSTER_STR; - printf("Changing mount type to %s\n", tmpstr); - } else - opts.mount = 0; - } - - /* validate num slots */ - max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; - if (opts.num_slots) { - if (opts.num_slots < max_slots) { - ret = remove_slot_check(fs); - if (ret) { - com_err(opts.progname, 0, - "remove slot check failed. "); - goto unlock; - } - } - else if (opts.num_slots > max_slots) { - if (!opts.jrnl_size) - opts.jrnl_size = def_jrnl_size; - - } else { - printf("Giving the same number of nodes. " - "Ignore the change of slots."); - opts.num_slots = 0; - } - - if (opts.num_slots) - printf("Changing number of node slots from %d to %d\n", - max_slots, opts.num_slots); - } - - /* validate journal size */ - if (opts.jrnl_size) { - num_clusters = (opts.jrnl_size + fs->fs_clustersize - 1) >> - OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; - - opts.jrnl_size = num_clusters << - OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; - - if (opts.jrnl_size != def_jrnl_size) - printf("Changing journal size %"PRIu64" to %"PRIu64"\n", - def_jrnl_size, opts.jrnl_size); - } + ret = volume_check(fs); + if (ret) + goto unlock; - /* validate volume size */ - if (opts.num_blocks) { - if (validate_vol_size(fs)) - opts.num_blocks = 0; - else - printf("Changing volume size from %"PRIu64" blocks to " - "%"PRIu64" blocks\n", fs->fs_blocks, - opts.num_blocks); - } + validate_parameter(fs); if (!opts.vol_label && !opts.vol_uuid && !opts.num_slots && !opts.jrnl_size && !opts.num_blocks && !opts.mount && @@ -1249,6 +1275,7 @@ int main(int argc, char **argv) } /* Set resize incompat flag on superblock */ + max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; if (opts.num_blocks || (opts.num_slots && opts.num_slots < max_slots)) { if (opts.num_blocks) -- 1.3.3
Tao Ma
2007-Nov-16 00:50 UTC
[Ocfs2-devel] [PATCH 2/6] Abstract some specific process in resize to some individual function,take 1
Some process may be different for online and offline resize, so move them to different functions for future change. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- tunefs.ocfs2/resize.c | 248 +++++++++++++++++++++++++++++-------------------- 1 files changed, 145 insertions(+), 103 deletions(-) 7d3f1927821cde69fa649ce6a001bba12a24251b diff --git a/tunefs.ocfs2/resize.c b/tunefs.ocfs2/resize.c index be5ea6b..0466d60 100644 --- a/tunefs.ocfs2/resize.c +++ b/tunefs.ocfs2/resize.c @@ -85,15 +85,152 @@ int validate_vol_size(ocfs2_filesys *fs) return 0; } +/* + * Initalize the group descriptors in the new added cluster range. + * + * di: global_bitmap's inode info. + * first_new_cluster: the start cluster offset. + * num_new_cluster: cluster range length. + * chain: the chain position of the last group descriptor. the new + * group will be added to the chain after this one. + * total_bits and used_bits will be added according to the new groups. + */ +static errcode_t init_new_gd(ocfs2_filesys *fs, + struct ocfs2_dinode *di, + uint32_t first_new_cluster, + uint32_t num_new_clusters, + uint16_t chain, + uint32_t *total_bits, + uint32_t *used_bits) +{ + errcode_t ret = 0; + uint32_t cluster_chunk; + uint64_t gd_blkno = 0; + struct ocfs2_chain_list *cl = &di->id2.i_chain; + struct ocfs2_chain_rec *cr = NULL; + struct ocfs2_group_desc *gd = NULL; + char *zero_buf = NULL; + char *gd_buf = NULL; + + ret = ocfs2_malloc_block(fs->fs_io, &gd_buf); + if (ret) { + com_err(opts.progname, ret, "while allocating a block during " + "volume resize"); + goto bail; + } + + ret = ocfs2_malloc_blocks(fs->fs_io, ocfs2_clusters_to_blocks(fs, 1), + &zero_buf); + if (ret) { + com_err(opts.progname, ret, "while allocating a cluster during " + "volume resize"); + goto bail; + } + + memset(zero_buf, 0, fs->fs_clustersize); + gd = (struct ocfs2_group_desc *)gd_buf; + + while(num_new_clusters) { + gd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, + first_new_cluster); + cluster_chunk = MIN(num_new_clusters, cl->cl_cpg); + num_new_clusters -= cluster_chunk; + first_new_cluster += cluster_chunk; + + if (++chain >= cl->cl_count) + chain = 0; + + ocfs2_init_group_desc(fs, gd, gd_blkno, + fs->fs_super->i_fs_generation, di->i_blkno, + (cluster_chunk *cl->cl_bpc), chain); + + /* Add group to chain */ + cr = &(cl->cl_recs[chain]); + if (chain >= cl->cl_next_free_rec) { + cl->cl_next_free_rec++; + cr->c_free = 0; + cr->c_total = 0; + cr->c_blkno = 0; + } + + gd->bg_next_group = cr->c_blkno; + cr->c_blkno = gd_blkno; + cr->c_free += gd->bg_free_bits_count; + cr->c_total += gd->bg_bits; + + *used_bits += (gd->bg_bits - gd->bg_free_bits_count); + *total_bits += gd->bg_bits; + + fs->fs_clusters += cluster_chunk; + fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk); + + /* Initialize the first cluster in the group */ + ret = io_write_block(fs->fs_io, gd_blkno, + ocfs2_clusters_to_blocks(fs, 1), zero_buf); + if (ret) { + com_err(opts.progname, ret, "while initializing the " + "cluster starting at block %"PRIu64" during " + "volume resize", gd_blkno); + goto bail; + } + + /* write a new group descriptor */ + ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf); + if (ret) { + com_err(opts.progname, ret, "while writing group " + "descriptor at block %"PRIu64" during " + "volume resize", gd_blkno); + goto bail; + } + } + +bail: + if (zero_buf) + ocfs2_free(&zero_buf); + if (gd_buf) + ocfs2_free(&gd_buf); + return ret; +} + +static errcode_t update_global_bitmap(ocfs2_filesys *fs, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *lgd, + int flush_lgd) +{ + errcode_t ret = 0; + + block_signals(SIG_BLOCK); + /* Flush that last group descriptor we updated before the new ones */ + if (flush_lgd) { + ret = ocfs2_write_group_desc(fs, lgd->bg_blkno, (char *)lgd); + if (ret) { + com_err(opts.progname, ret, "while flushing group " + "descriptor at block %"PRIu64" during " + "volume resize", lgd->bg_blkno); + goto bail; + } + } + + /* write the global bitmap inode */ + ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di); + if (ret) { + com_err(opts.progname, ret, "while writing global bitmap " + "inode at block %"PRIu64" during volume resize", + di->i_blkno); + } + +bail: + block_signals(SIG_UNBLOCK); + return ret; +} + errcode_t update_volume_size(ocfs2_filesys *fs, int *changed) { errcode_t ret = 0; struct ocfs2_dinode *di; uint64_t bm_blkno = 0; - uint64_t gd_blkno = 0; uint64_t lgd_blkno = 0; char *in_buf = NULL; - char *gd_buf = NULL; char *lgd_buf = NULL; struct ocfs2_chain_list *cl; struct ocfs2_chain_rec *cr; @@ -106,7 +243,6 @@ errcode_t update_volume_size(ocfs2_files uint32_t total_bits; uint32_t num_bits; int flush_lgd = 0; - char *zero_buf = NULL; ret = ocfs2_malloc_block(fs->fs_io, &in_buf); if (ret) { @@ -115,13 +251,6 @@ errcode_t update_volume_size(ocfs2_files goto bail; } - ret = ocfs2_malloc_block(fs->fs_io, &gd_buf); - if (ret) { - com_err(opts.progname, ret, "while allocating a block during " - "volume resize"); - goto bail; - } - ret = ocfs2_malloc_block(fs->fs_io, &lgd_buf); if (ret) { com_err(opts.progname, ret, "while allocating a block during " @@ -129,16 +258,6 @@ errcode_t update_volume_size(ocfs2_files goto bail; } - ret = ocfs2_malloc_blocks(fs->fs_io, ocfs2_clusters_to_blocks(fs, 1), - &zero_buf); - if (ret) { - com_err(opts.progname, ret, "while allocating a cluster during " - "volume resize"); - goto bail; - } - - memset(zero_buf, 0, fs->fs_clustersize); - /* read global bitmap */ ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE, 0, &bm_blkno); @@ -214,62 +333,10 @@ errcode_t update_volume_size(ocfs2_files /* Init the new groups and write to disk */ /* Add these groups one by one starting from the first chain after */ /* the one containing the last group */ - - gd = (struct ocfs2_group_desc *)gd_buf; - - while(num_new_clusters) { - gd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, - first_new_cluster); - cluster_chunk = MIN(num_new_clusters, cl->cl_cpg); - num_new_clusters -= cluster_chunk; - first_new_cluster += cluster_chunk; - - if (++chain >= cl->cl_count) - chain = 0; - - ocfs2_init_group_desc(fs, gd, gd_blkno, - fs->fs_super->i_fs_generation, di->i_blkno, - (cluster_chunk *cl->cl_bpc), chain); - - /* Add group to chain */ - cr = &(cl->cl_recs[chain]); - if (chain >= cl->cl_next_free_rec) { - cl->cl_next_free_rec++; - cr->c_free = 0; - cr->c_total = 0; - cr->c_blkno = 0; - } - - gd->bg_next_group = cr->c_blkno; - cr->c_blkno = gd_blkno; - cr->c_free += gd->bg_free_bits_count; - cr->c_total += gd->bg_bits; - - used_bits += (gd->bg_bits - gd->bg_free_bits_count); - total_bits += gd->bg_bits; - - fs->fs_clusters += cluster_chunk; - fs->fs_blocks += ocfs2_clusters_to_blocks(fs, cluster_chunk); - - /* Initialize the first cluster in the group */ - ret = io_write_block(fs->fs_io, gd_blkno, - ocfs2_clusters_to_blocks(fs, 1), zero_buf); - if (ret) { - com_err(opts.progname, ret, "while initializing the " - "cluster starting at block %"PRIu64" during " - "volume resize", gd_blkno); - goto bail; - } - - /* write a new group descriptor */ - ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf); - if (ret) { - com_err(opts.progname, ret, "while writing group " - "descriptor at block %"PRIu64" during " - "volume resize", gd_blkno); - goto bail; - } - } + ret = init_new_gd(fs, di, first_new_cluster, + num_new_clusters, chain, &total_bits, &used_bits); + if (ret) + goto bail; di->id1.bitmap1.i_total = total_bits; di->id1.bitmap1.i_used = used_bits; @@ -279,40 +346,15 @@ errcode_t update_volume_size(ocfs2_files fs->fs_super->i_clusters = di->i_clusters; - block_signals(SIG_BLOCK); - /* Flush that last group descriptor we updated before the new ones */ - if (flush_lgd) { - ret = ocfs2_write_group_desc(fs, lgd_blkno, lgd_buf); - if (ret) { - block_signals(SIG_UNBLOCK); - com_err(opts.progname, ret, "while flushing group " - "descriptor at block %"PRIu64" during " - "volume resize", lgd_blkno); - goto bail; - } - } - - /* write the global bitmap inode */ - ret = ocfs2_write_inode(fs, bm_blkno, in_buf); - if (ret) { - block_signals(SIG_UNBLOCK); - com_err(opts.progname, ret, "while writing global bitmap " - "inode at block %"PRIu64" during volume resize", - bm_blkno); + ret = update_global_bitmap(fs, di, gd, flush_lgd); + if (ret) goto bail; - } - - block_signals(SIG_UNBLOCK); *changed = 1; bail: - if (zero_buf) - ocfs2_free(&zero_buf); if (in_buf) ocfs2_free(&in_buf); - if (gd_buf) - ocfs2_free(&gd_buf); if (lgd_buf) ocfs2_free(&lgd_buf); -- 1.3.3
Tao Ma
2007-Nov-16 00:50 UTC
[Ocfs2-devel] [PATCH 4/6] Modfiy cl_cpg in global_bitmap to be maximum value during mkfs,take 1
cl_cpg is used in ocfs2 kernel to calculate the group no for a given cluster. Change it to maximum number so that there is no need to update it durin online resize. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- fsck.ocfs2/pass0.c | 2 +- mkfs.ocfs2/mkfs.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) fe4f6a9996e36994e36196422efac3665dca90f8 diff --git a/fsck.ocfs2/pass0.c b/fsck.ocfs2/pass0.c index d53c76d..1e4c243 100644 --- a/fsck.ocfs2/pass0.c +++ b/fsck.ocfs2/pass0.c @@ -393,7 +393,7 @@ static void mark_group_used(o2fsck_state } /* - * Due to a glitch in mkfs, cl->cl_cpg for the GLOBAL BITMAP could be + * Due to a glitch in old mkfs, cl->cl_cpg for the GLOBAL BITMAP could be * less than the max possible for volumes having just one cluster * group. Fix. */ diff --git a/mkfs.ocfs2/mkfs.c b/mkfs.ocfs2/mkfs.c index 0ade0ab..57784a6 100644 --- a/mkfs.ocfs2/mkfs.c +++ b/mkfs.ocfs2/mkfs.c @@ -1910,7 +1910,8 @@ format_file(State *s, SystemFileDiskReco if (rec->cluster_bitmap) { di->id2.i_chain.cl_count = ocfs2_chain_recs_per_inode(s->blocksize); - di->id2.i_chain.cl_cpg = s->global_cpg; + di->id2.i_chain.cl_cpg + ocfs2_group_bitmap_size(s->blocksize) * 8; di->id2.i_chain.cl_bpc = 1; if (s->nr_cluster_groups > ocfs2_chain_recs_per_inode(s->blocksize)) { -- 1.3.3
Tao Ma
2007-Nov-16 00:52 UTC
[Ocfs2-devel] [PATCH 5/6] Modify fsck to trust global bitmap than super block,take 1
In resize, we update the global_bitmap first and then the super block. So if there is any corruption between these 2 steps, there will be a inconsistence. In kernel we use the information in global_bitmap, so fsck.ocfs2 should also trust it during the check. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- fsck.ocfs2/pass0.c | 25 ++++++++++++++++++------- 1 files changed, 18 insertions(+), 7 deletions(-) aa2bfd78e72a1a60302a06775b09f9c014cd487f diff --git a/fsck.ocfs2/pass0.c b/fsck.ocfs2/pass0.c index 1e4c243..c88ab31 100644 --- a/fsck.ocfs2/pass0.c +++ b/fsck.ocfs2/pass0.c @@ -1075,19 +1075,30 @@ errcode_t o2fsck_pass0(o2fsck_state *ost if (ret) goto out; + /* + * during resize, we may update the global bitmap but fails to + * to update i_clusters in superblock, so ask the user which one + * to use before checking. + */ + if (fs->fs_super->i_clusters != di->i_clusters) { + if (prompt(ost, PY, PR_SUPERBLOCK_CLUSTERS, + "Superblock has clusters set to %u instead of %u " + "recorded in global_bitmap, it may be caused by an " + "unsuccessful resize. Trust global_bitmap?", + fs->fs_super->i_clusters, di->i_clusters)) { + ost->ost_num_clusters = di->i_clusters; + fs->fs_clusters = di->i_clusters; + fs->fs_blocks = ocfs2_clusters_to_blocks(fs, + fs->fs_clusters); + } + } + ret = verify_bitmap_descs(ost, di, blocks + ost->ost_fs->fs_blocksize, blocks + (ost->ost_fs->fs_blocksize * 2)); if (ret) goto out; - if (fs->fs_super->i_clusters != di->i_clusters) { - if (prompt(ost, PY, PR_SUPERBLOCK_CLUSTERS, - "Superblock has clusters set to %u instead of %u. Fix?", - fs->fs_super->i_clusters, di->i_clusters)) - ost->ost_num_clusters = di->i_clusters; - } - printf("Pass 0b: Checking inode allocation chains\n"); /* first the global inode alloc and then each of the node's -- 1.3.3
Tao Ma
2007-Nov-16 00:52 UTC
[Ocfs2-devel] [PATCH 6/6] Add online resize in tunefs.ocfs2,take 1
During online resize, we prepare all the new group descriptors in user space. For the update of global_bitmap, super block and all the backups, they are handled in the kernel. Signed-off-by: Tao Ma <tao.ma@oracle.com> --- libocfs2/include/ocfs2_fs.h | 4 + tunefs.ocfs2/resize.c | 255 ++++++++++++++++++++++++++++++++++++++++--- tunefs.ocfs2/tunefs.c | 53 ++++++++- tunefs.ocfs2/tunefs.h | 6 + 4 files changed, 295 insertions(+), 23 deletions(-) 60516ce8535f9cd6c9f36ace7056e5a24fab5e1b diff --git a/libocfs2/include/ocfs2_fs.h b/libocfs2/include/ocfs2_fs.h index cfaf28f..5694114 100644 --- a/libocfs2/include/ocfs2_fs.h +++ b/libocfs2/include/ocfs2_fs.h @@ -27,7 +27,7 @@ #define _OCFS2_FS_H /* Version */ #define OCFS2_MAJOR_REV_LEVEL 0 -#define OCFS2_MINOR_REV_LEVEL 90 +#define OCFS2_MINOR_REV_LEVEL 91 /* * An OCFS2 volume starts this way: @@ -230,6 +230,8 @@ #define OCFS2_IOC_FREESP64 _IOW ('X', 37 #define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) #define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) +#define OCFS2_IOC_FSGROWFSDATA _IOW ('X', 110, struct ocfs2_dinode) + /* * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) */ diff --git a/tunefs.ocfs2/resize.c b/tunefs.ocfs2/resize.c index 0466d60..dfee58f 100644 --- a/tunefs.ocfs2/resize.c +++ b/tunefs.ocfs2/resize.c @@ -22,10 +22,74 @@ * */ +#include <limits.h> /* for PATH_MAX */ +#ifndef PATH_MAX +#define PATH_MAX 8192 +#endif + +#include <sys/ioctl.h> +#include <errno.h> #include <tunefs.h> extern ocfs2_tune_opts opts; +/* + * This lock name is specific and only used in online resize; + */ +static char lock_name[OCFS2_LOCK_ID_MAX_LEN] = "tunefs-online-resize-lock"; +static char mnt_dir[PATH_MAX]; + +errcode_t online_resize_lock(ocfs2_filesys *fs) +{ + return o2dlm_lock(fs->fs_dlm_ctxt, lock_name, + O2DLM_LEVEL_EXMODE, O2DLM_TRYLOCK); +} + +errcode_t online_resize_unlock(ocfs2_filesys *fs) +{ + return o2dlm_unlock(fs->fs_dlm_ctxt, lock_name); +} + +static errcode_t find_mount_point(char *device) +{ + int mount_flags = 0; + errcode_t ret; + + memset(mnt_dir, 0, sizeof(mnt_dir)); + + ret = ocfs2_check_mount_point(device, &mount_flags, + mnt_dir, sizeof(mnt_dir)); + if (ret) + goto out; + + if ((!mount_flags & OCFS2_MF_MOUNTED) || + (mount_flags & OCFS2_MF_READONLY) || + (mount_flags & OCFS2_MF_SWAP)) { + ret = OCFS2_ET_BAD_DEVICE_NAME; + goto out; + } + + ret = 0; +out: + return ret; +} + +errcode_t online_resize_check(ocfs2_filesys *fs) +{ + /* + * we don't allow online resize to be coexist with other tunefs + * options to keep things simple. + */ + if (opts.backup_super || opts.vol_label || opts.num_slots || + opts.mount || opts.jrnl_size) { + com_err(opts.progname, 0, "Cannot do online-resize" + " along with other tasks"); + exit(1); + } + + return find_mount_point(opts.device); +} + void get_vol_size(ocfs2_filesys *fs) { errcode_t ret = 0; @@ -101,7 +165,8 @@ static errcode_t init_new_gd(ocfs2_files uint32_t num_new_clusters, uint16_t chain, uint32_t *total_bits, - uint32_t *used_bits) + uint32_t *used_bits, + int online) { errcode_t ret = 0; uint32_t cluster_chunk; @@ -224,7 +289,131 @@ bail: return ret; } -errcode_t update_volume_size(ocfs2_filesys *fs, int *changed) +/* + * Reserve the block in the specified group and modify the + * group and chain information accordingly. + */ +static errcode_t reserve_cluster(ocfs2_filesys *fs, + char *progname, + struct ocfs2_dinode *di, + uint64_t gd_blkno, + uint32_t cluster) +{ + errcode_t ret; + uint16_t chain, cl_cpg = di->id2.i_chain.cl_cpg; + char *gd_buf = NULL; + struct ocfs2_group_desc *gd = NULL; + void *bitmap = NULL; + struct ocfs2_chain_rec *cr = NULL; + int retval; + + ret = ocfs2_malloc_block(fs->fs_io, &gd_buf); + if (ret) { + com_err(opts.progname, ret, "while allocating a block during " + "reserve backup super blocks"); + goto out; + } + + ret = ocfs2_read_group_desc(fs, gd_blkno, gd_buf); + if (ret) { + com_err(opts.progname, ret, "while reading group descriptor " + "at block %"PRIu64" during volume resize", gd_blkno); + goto out; + } + + gd = (struct ocfs2_group_desc *)gd_buf; + bitmap = gd->bg_bitmap; + retval = ocfs2_set_bit(cluster % cl_cpg, bitmap); + if (retval != 0) { + com_err(opts.progname, 0, "while allocating backup superblock" + "in cluster %u during volume resize", cluster); + goto out; + } + + gd->bg_free_bits_count--; + + ret = ocfs2_write_group_desc(fs, gd_blkno, gd_buf); + if (ret) { + com_err(opts.progname, ret, "while writing group descriptor " + "at block %"PRIu64" during volume resize", gd_blkno); + goto out; + } + + /* update the dinode accordingly.*/ + chain = gd->bg_chain; + cr = &(di->id2.i_chain.cl_recs[chain]); + cr->c_free--; + + di->id1.bitmap1.i_used++; +out: + if (gd_buf) + ocfs2_free(&gd_buf); + return ret; +} + +/* + * Reserve the backup superblocks which exist in the new added groups. + * + * For those which are in the "old last" group but don't be within the + * "old" volume size, the kernel will reserve it. + */ +static errcode_t reserve_backup_supers(ocfs2_filesys *fs, + char *progname, + struct ocfs2_dinode *di, + uint64_t lgd_blkno) +{ + errcode_t ret = 0; + int numsb, i; + uint64_t gd_blkno; + uint64_t blocks[OCFS2_MAX_BACKUP_SUPERBLOCKS]; + uint16_t cl_cpg = di->id2.i_chain.cl_cpg; + uint32_t cluster; + + if (!OCFS2_HAS_COMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super), + OCFS2_FEATURE_COMPAT_BACKUP_SB)) + goto out; + + numsb = ocfs2_get_backup_super_offset(fs, blocks, ARRAY_SIZE(blocks)); + if (numsb <= 0) + goto out; + + for (i = 0; i < numsb; i++) { + cluster = ocfs2_blocks_to_clusters(fs, blocks[i]); + gd_blkno = ocfs2_which_cluster_group(fs, cl_cpg, cluster); + if (lgd_blkno >= gd_blkno) + continue; + + ret = reserve_cluster(fs, progname, di, gd_blkno, cluster); + if (ret) + goto out; + } + +out: + return ret; +} + +static errcode_t update_global_bitmap_online(char *progname, + struct ocfs2_dinode *di) +{ + int fd; + errcode_t ret; + + fd = open(mnt_dir, O_RDONLY); + if (fd < 0) { + com_err(progname, errno, + "while opening mounted dir %s.\n", mnt_dir); + return errno; + } + + ret = ioctl(fd, OCFS2_IOC_FSGROWFSDATA, di); + if (ret < 0) + com_err(progname, errno, "while ioctl on dir %s.\n", mnt_dir); + + close(fd); + return ret; +} + +errcode_t update_volume_size(ocfs2_filesys *fs, int *changed, int online) { errcode_t ret = 0; struct ocfs2_dinode *di; @@ -242,7 +431,7 @@ errcode_t update_volume_size(ocfs2_files uint32_t used_bits; uint32_t total_bits; uint32_t num_bits; - int flush_lgd = 0; + int flush_lgd = 0, i = 0; ret = ocfs2_malloc_block(fs->fs_io, &in_buf); if (ret) { @@ -277,13 +466,31 @@ errcode_t update_volume_size(ocfs2_files di = (struct ocfs2_dinode *)in_buf; cl = &(di->id2.i_chain); - total_bits = di->id1.bitmap1.i_total; - used_bits = di->id1.bitmap1.i_used; - first_new_cluster = di->i_clusters; save_new_clusters = num_new_clusters ocfs2_blocks_to_clusters(fs, opts.num_blocks) - di->i_clusters; + /* + * For online resize, empty total_bits, used_bits, di->i_clusters + * and all the information within the chain except c_blkno. + * So after init_new_gd, used_bits, total_bits, and chain_rec will + * record the real information of the new added group descriptors. + * + * We don't emtpy c_blkno here so that the original group header + * can be linked to the tail of the new added group list. + */ + if (online) { + total_bits = 0; + used_bits = 0; + di->i_clusters = 0; + for (i = 0; i < cl->cl_count; i++) { + cl->cl_recs[i].c_free = 0; + cl->cl_recs[i].c_total = 0; + } + } else { + total_bits = di->id1.bitmap1.i_total; + used_bits = di->id1.bitmap1.i_used; + } /* Find the blknum of the last cluster group */ lgd_blkno = ocfs2_which_cluster_group(fs, cl->cl_cpg, first_new_cluster - 1); @@ -304,7 +511,12 @@ errcode_t update_volume_size(ocfs2_files chain = gd->bg_chain; - /* If possible round off the last group to cpg */ + /* + * If possible round off the last group to cpg. + * + * For online resize, it is proceeded as offline resize, + * but the update of the group will be done by kernel. + */ cluster_chunk = MIN(num_new_clusters, (cl->cl_cpg - (gd->bg_bits/cl->cl_bpc))); if (cluster_chunk) { @@ -330,11 +542,14 @@ errcode_t update_volume_size(ocfs2_files flush_lgd = 1; } - /* Init the new groups and write to disk */ - /* Add these groups one by one starting from the first chain after */ - /* the one containing the last group */ + /* + * Init the new groups and write to disk + * Add these groups one by one starting from the first chain after + * the one containing the last group. + */ ret = init_new_gd(fs, di, first_new_cluster, - num_new_clusters, chain, &total_bits, &used_bits); + num_new_clusters, chain, &total_bits, &used_bits, + online); if (ret) goto bail; @@ -344,11 +559,21 @@ errcode_t update_volume_size(ocfs2_files di->i_clusters += save_new_clusters; di->i_size = (uint64_t) di->i_clusters * fs->fs_clustersize; - fs->fs_super->i_clusters = di->i_clusters; + fs->fs_super->i_clusters = fs->fs_clusters; - ret = update_global_bitmap(fs, di, gd, flush_lgd); - if (ret) - goto bail; + if (online) { + ret = reserve_backup_supers(fs, opts.progname, di, lgd_blkno); + if (ret) + goto bail; + + ret = update_global_bitmap_online(opts.progname, di); + if (ret) + goto bail; + } else { + ret = update_global_bitmap(fs, di, gd, flush_lgd); + if (ret) + goto bail; + } *changed = 1; diff --git a/tunefs.ocfs2/tunefs.c b/tunefs.ocfs2/tunefs.c index cea9eb1..59aa56d 100644 --- a/tunefs.ocfs2/tunefs.c +++ b/tunefs.ocfs2/tunefs.c @@ -30,6 +30,7 @@ ocfs2_tune_opts opts; ocfs2_filesys *fs_gbl = NULL; static int cluster_locked = 0; static int resize = 0; +static int online_resize = 0; static uint64_t def_jrnl_size = 0; static char old_uuid[OCFS2_VOL_UUID_LEN * 2 + 1]; static char new_uuid[OCFS2_VOL_UUID_LEN * 2 + 1]; @@ -1006,6 +1007,17 @@ static errcode_t volume_check(ocfs2_file int dirty = 0; uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; + /* + * online_resize can't coexist with other tasks, and it does't + * need other checks, so we just do the check and return. + */ + if (online_resize) { + ret = online_resize_check(fs); + if (ret) + com_err(opts.progname, 0, "online resize check failed."); + goto bail; + } + ret = journal_check(fs, &dirty, &def_jrnl_size); if (ret || dirty) goto bail; @@ -1235,13 +1247,19 @@ int main(int argc, char **argv) block_signals(SIG_BLOCK); ret = ocfs2_lock_down_cluster(fs); - if (ret) { - block_signals(SIG_UNBLOCK); + block_signals(SIG_UNBLOCK); + if (!ret) + cluster_locked = 1; + else if (ret == O2DLM_ET_TRYLOCK_FAILED && resize) { + /* + * We just set the flag here and more check and + * lock will be done later. + */ + online_resize = 1; + } else { com_err(opts.progname, ret, "while locking down the cluster"); goto close; } - cluster_locked = 1; - block_signals(SIG_UNBLOCK); } /* @@ -1274,6 +1292,27 @@ int main(int argc, char **argv) } } + /* + * We handle online resize seperately here, since it is + * not like tunefs operations. + */ + if (online_resize) { + ret = online_resize_lock(fs); + if (ret) + goto close; + + ret = update_volume_size(fs, &upd_blocks, online_resize); + if (ret) { + com_err(opts.progname, ret, + "while updating volume size"); + goto online_resize_unlock; + } + if (upd_blocks) + printf("Resized volume\n"); + + goto online_resize_unlock; + } + /* Set resize incompat flag on superblock */ max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; if (opts.num_blocks || @@ -1352,7 +1391,7 @@ int main(int argc, char **argv) /* update volume size */ if (opts.num_blocks) { old_blocks = fs->fs_blocks; - ret = update_volume_size(fs, &upd_blocks); + ret = update_volume_size(fs, &upd_blocks, 0); if (ret) { com_err(opts.progname, ret, "while updating volume size"); @@ -1450,7 +1489,9 @@ int main(int argc, char **argv) } } } - +online_resize_unlock: + if (online_resize) + online_resize_unlock(fs); unlock: block_signals(SIG_BLOCK); if (cluster_locked && fs->fs_dlm_ctxt) diff --git a/tunefs.ocfs2/tunefs.h b/tunefs.ocfs2/tunefs.h index 3863331..b1ad470 100644 --- a/tunefs.ocfs2/tunefs.h +++ b/tunefs.ocfs2/tunefs.h @@ -117,6 +117,10 @@ errcode_t feature_check(ocfs2_filesys *f errcode_t update_feature(ocfs2_filesys *fs); void get_vol_size(ocfs2_filesys *fs); -errcode_t update_volume_size(ocfs2_filesys *fs, int *changed); +errcode_t update_volume_size(ocfs2_filesys *fs, int *changed, int online); int validate_vol_size(ocfs2_filesys *fs); + +errcode_t online_resize_check(ocfs2_filesys *fs); +errcode_t online_resize_lock(ocfs2_filesys *fs); +errcode_t online_resize_unlock(ocfs2_filesys *fs); #endif /* _TUNEFS_H */ -- 1.3.3
Roel Kluin
2007-Nov-26 15:06 UTC
[Ocfs2-devel] [PATCH 6/6] Add online resize in tunefs.ocfs2, take 1
Tao Ma wrote:> +static errcode_t find_mount_point(char *device) > +{ > + int mount_flags = 0; > + errcode_t ret; > + > + memset(mnt_dir, 0, sizeof(mnt_dir)); > + > + ret = ocfs2_check_mount_point(device, &mount_flags, > + mnt_dir, sizeof(mnt_dir)); > + if (ret) > + goto out; > + > + if ((!mount_flags & OCFS2_MF_MOUNTED) ||if(!(mount_flags & OCFS2_MF_MOUNTED) ||> + (mount_flags & OCFS2_MF_READONLY) || > + (mount_flags & OCFS2_MF_SWAP)) { > + ret = OCFS2_ET_BAD_DEVICE_NAME; > + goto out; > + } > + > + ret = 0; > +out: > + return ret; > +}
tao.ma
2007-Nov-26 16:59 UTC
[Ocfs2-devel] [PATCH 6/6] Add online resize in tunefs.ocfs2, take 1
Roel Kluin wrote:> Tao Ma wrote: > >> +static errcode_t find_mount_point(char *device) >> +{ >> + int mount_flags = 0; >> + errcode_t ret; >> + >> + memset(mnt_dir, 0, sizeof(mnt_dir)); >> + >> + ret = ocfs2_check_mount_point(device, &mount_flags, >> + mnt_dir, sizeof(mnt_dir)); >> + if (ret) >> + goto out; >> + >> + if ((!mount_flags & OCFS2_MF_MOUNTED) || >> > > if(!(mount_flags & OCFS2_MF_MOUNTED) || >Thanks for pointing it out.> >> + (mount_flags & OCFS2_MF_READONLY) || >> + (mount_flags & OCFS2_MF_SWAP)) { >> + ret = OCFS2_ET_BAD_DEVICE_NAME; >> + goto out; >> + } >> + >> + ret = 0; >> +out: >> + return ret; >> +} >>
Reasonably Related Threads
- [PATCH 0/9] Quota support for ocfs2-tools (version 3)
- [PATCH 0/9] Quota support for ocfs2-tools (version 2)
- [PATCH 0/8] Quota support for ocfs2-tools
- [PATCH 9-10/10] Quota support for disabling sparse feature
- [PATCH 0/6] Ocfs2-tools: Add a new tool 'o2info'.