Wengang Wang
2010-Mar-30 04:29 UTC
[Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work -v4
This patch adds freeze_fs()/unfreeze_fs() for ocfs2 so that it supports freeze/thaw. Signed-off-by: Wengang Wang <wen.gang.wang at oracle.com> --- fs/ocfs2/dlmglue.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++- fs/ocfs2/dlmglue.h | 2 + fs/ocfs2/journal.c | 1 + fs/ocfs2/ocfs2.h | 12 +++++ fs/ocfs2/super.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 259 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index d7a9330..455b16f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3761,10 +3761,124 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, ocfs2_dentry_lock_put(osb, dl); } +/* + * This is only ever run on behalf of another node. + */ +void ocfs2_freeze_worker(struct work_struct *work) +{ + struct super_block *sb; + int ret, do_unlock = 0; + struct ocfs2_super *osb = container_of(work, struct ocfs2_super, + osb_freeze_work); + + mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen); + + /* If umount is in progress, wait it to complete. */ + ret = ocfs2_test_osb_flag(osb, OCFS2_OSB_UMOUNT_INPROG); + if (ret) { + mlog(0, "Unmount in progress, make the freeze request pending" + "\n"); + /* Leave FREEZE_INPROG there so not queue the worker again */ + return; + } + + sb = freeze_bdev(osb->sb->s_bdev); + if (IS_ERR(sb)) { + /* ocfs2_freeze_fs() shouldn't return any error in the remote + * box. If it does it's a bug. But we deal with it gracefully. + */ + ret = PTR_ERR(sb); + mlog_errno(ret); + ocfs2_clear_osb_flag(osb, OCFS2_OSB_FREEZE_INPROG); + return; + } + + spin_lock(&osb->osb_lock); + osb->osb_flags &= ~OCFS2_OSB_FREEZE_INPROG; + osb->osb_flags |= OCFS2_OSB_FROZEN_BY_REMOTE; + spin_unlock(&osb->osb_lock); + + ocfs2_wake_downconvert_thread(osb); + + /* Waits for thaw */ +wait_thaw: + /* thaws the fs if unmount is in progress. */ + ret = ocfs2_test_osb_flag(osb, OCFS2_OSB_UMOUNT_INPROG); + if (ret) { + /* Leave FREEZE_INPROG there so not queue the worker again */ + goto thaw_dev; + + } + + ret = ocfs2_freeze_lock(osb, 0); + if (ret == -EBUSY) { + /* We suppose when it returns -EBUSY when timeout is hit. + * Change me if it's not. + */ + goto wait_thaw; + } else if (ret) { + mlog(ML_ERROR, "Getting PR on freeze_lock failed," + "but going to thaw block device %s\n", osb->dev_str); + } else { + do_unlock = 1; + } + +thaw_dev: + ret = thaw_bdev(osb->sb->s_bdev, osb->sb); + if (ret) { + /* this shouldn't happen */ + mlog_errno(ret); + printk(KERN_WARNING "ocfs2: Thawing %s failed\n", osb->dev_str); + } + + ocfs2_clear_osb_flag(osb, OCFS2_OSB_FROZEN_BY_REMOTE); + + if (do_unlock) + ocfs2_freeze_unlock(osb, 0); +} + +static void ocfs2_queue_freeze_worker(struct ocfs2_super *osb) +{ + int queue_it = 0; + + spin_lock(&osb->osb_lock); + if (!(osb->osb_flags & OCFS2_OSB_FREEZE_INPROG)) { + osb->osb_flags |= OCFS2_OSB_FREEZE_INPROG; + queue_it = 1; + } + spin_unlock(&osb->osb_lock); + + if (queue_it) + queue_work(ocfs2_wq, &osb->osb_freeze_work); +} + static int ocfs2_check_freeze_downconvert(struct ocfs2_lock_res *lockres, int new_level) { - return 1; /* change me */ + struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); + struct super_block *sb = osb->sb; + int frozen_by_remote; + + mlog(0, "flags=0x%lx, frozen=%d, level=%d, newlevel=%d\n", + osb->osb_flags, sb->s_frozen, lockres->l_level, new_level); + + if (new_level == LKM_PRMODE) { + /* other node is during mount or is waiting for thaw. */ + if (sb->s_frozen) + return 0; + else + return 1; + } + + /* now new_level is NL. other node wants to freeze cluster. */ + frozen_by_remote = ocfs2_test_osb_flag(osb, OCFS2_OSB_FROZEN_BY_REMOTE); + + /* ok, this node is frozen for the request. */ + if (frozen_by_remote) + return 1; + + ocfs2_queue_freeze_worker(osb); + return 0; } /* diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 297b3a9..c6da138 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -167,6 +167,8 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); +void ocfs2_freeze_worker(struct work_struct *work); + /* To set the locking protocol on module initialization */ void ocfs2_set_locking_protocol(void); #endif /* DLMGLUE_H */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9336c60..8f82525 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -355,6 +355,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) if (ocfs2_is_hard_readonly(osb)) return ERR_PTR(-EROFS); + vfs_check_frozen(osb->sb, SB_FREEZE_TRANS); BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); BUG_ON(max_buffs <= 0); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7892738..ec751b9 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -261,6 +261,9 @@ enum ocfs2_mount_options #define OCFS2_OSB_HARD_RO 0x0002 #define OCFS2_OSB_ERROR_FS 0x0004 #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 +#define OCFS2_OSB_FREEZE_INPROG 0x0010 +#define OCFS2_OSB_UMOUNT_INPROG 0x0020 +#define OCFS2_OSB_FROZEN_BY_REMOTE 0x0040 /* frozen by remote */ #define OCFS2_DEFAULT_ATIME_QUANTUM 60 @@ -403,6 +406,8 @@ struct ocfs2_super unsigned int *osb_orphan_wipes; wait_queue_head_t osb_wipe_event; + /* osb_freeze_work is protected by osb->s_bdev->bd_fsfreeze_mutex */ + struct work_struct osb_freeze_work; struct ocfs2_orphan_scan osb_orphan_scan; /* used to protect metaecc calculation check of xattr. */ @@ -540,6 +545,13 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, spin_unlock(&osb->osb_lock); } +static inline void ocfs2_clear_osb_flag(struct ocfs2_super *osb, + unsigned long flag) +{ + spin_lock(&osb->osb_lock); + osb->osb_flags &= ~flag; + spin_unlock(&osb->osb_lock); +} static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, unsigned long flag) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 9464080..cc37127 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -138,6 +138,10 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); static int ocfs2_enable_quotas(struct ocfs2_super *osb); static void ocfs2_disable_quotas(struct ocfs2_super *osb); static int ocfs2_freeze_lock_supported(struct ocfs2_super *osb); +static int is_kernel_thread(void); +static int ocfs2_freeze_fs(struct super_block *sb); +static int is_freeze_master(struct ocfs2_super *osb); +static int ocfs2_unfreeze_fs(struct super_block *sb); static const struct super_operations ocfs2_sops = { .statfs = ocfs2_statfs, @@ -152,6 +156,8 @@ static const struct super_operations ocfs2_sops = { .show_options = ocfs2_show_options, .quota_read = ocfs2_quota_read, .quota_write = ocfs2_quota_write, + .freeze_fs = ocfs2_freeze_fs, + .unfreeze_fs = ocfs2_unfreeze_fs, }; enum { @@ -389,7 +395,7 @@ static const struct file_operations ocfs2_osb_debug_fops = { static int ocfs2_sync_fs(struct super_block *sb, int wait) { - int status; + int status, flush_journal = 0; tid_t target; struct ocfs2_super *osb = OCFS2_SB(sb); @@ -410,6 +416,17 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, target); } + + flush_journal = ocfs2_test_osb_flag(osb, OCFS2_OSB_FREEZE_INPROG); + + /* flushes journal when it's during freeze_bdev(). so that we need not + * replay journal if this node crashes before thawed. + */ + if (unlikely(flush_journal)) { + status = jbd2_journal_flush(OCFS2_SB(sb)->journal->j_journal); + if (status) + mlog(ML_ERROR, "flushing journal failed %d\n", status); + } return 0; } @@ -1219,6 +1236,9 @@ static void ocfs2_kill_sb(struct super_block *sb) if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) goto out; + up_write(&sb->s_umount); + ocfs2_set_osb_flag(osb, OCFS2_OSB_UMOUNT_INPROG); + down_write(&sb->s_umount); /* Prevent further queueing of inode drop events */ spin_lock(&dentry_list_lock); ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); @@ -2176,6 +2196,8 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); osb->dentry_lock_list = NULL; + INIT_WORK(&osb->osb_freeze_work, ocfs2_freeze_worker); + /* get some pseudo constants for clustersize bits */ osb->s_clustersize_bits le32_to_cpu(di->id2.i_super.s_clustersize_bits); @@ -2534,5 +2556,111 @@ void __ocfs2_abort(struct super_block* sb, ocfs2_handle_error(sb); } +static inline int is_kernel_thread() +{ + return current->flags & PF_KTHREAD; +} + +/* ocfs2_freeze_fs()/ocfs2_unfreeze_fs() are always called by freeze_bdev()/ + * thaw_bdev(). bdev->bd_fsfreeze_mutex is used for synchronization. an extra + * ocfs2 mutex is not needed. + */ +static int ocfs2_freeze_fs(struct super_block *sb) +{ + int ret = 0; + struct ocfs2_super *osb = OCFS2_SB(sb); + + mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen); + + /* cluster lock is issued only when this is the IOCTL process.(other + * case ocfs2_freeze_fs() is called in ocfs2_wq thread) + */ + + if (is_kernel_thread()) { + BUG_ON(!ocfs2_freeze_lock_supported(osb)); + + /* this is ocfs2_wq kernel thread. we do freeze on behalf of + * the requesting node, don't issue cluster lock again. + */ + printk(KERN_INFO "ocfs2: Block device (%s) frozen by remote\n", + osb->dev_str); + return 0; + } + + /* this is ioctl thread, issues cluster lock */ + + if (!ocfs2_freeze_lock_supported(osb)) + return -ENOTSUPP; + + ret = ocfs2_freeze_lock(osb, 1); + if (ret) { + mlog_errno(ret); + } else { + printk(KERN_INFO "ocfs2: Block device (%s) frozen by local\n", + osb->dev_str); + } + + return ret; +} + +static int is_freeze_master(struct ocfs2_super *osb) +{ + BUG_ON(osb->osb_freeze_lockres.l_ex_holders > 1); + return osb->osb_freeze_lockres.l_ex_holders; +} + +static int ocfs2_unfreeze_fs(struct super_block *sb) +{ + struct ocfs2_super *osb = OCFS2_SB(sb); + + mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen); + + if (is_kernel_thread()) { + /* this is ocfs2_wq kernel thread. nothing to do. */ + BUG_ON(!ocfs2_freeze_lock_supported(osb)); + printk(KERN_INFO "ocfs2: Block device (%s) thawed by remote\n", + osb->dev_str); + return 0; + } + + /* this is the ioctl user thread. */ + + if (!ocfs2_freeze_lock_supported(osb)) + return -ENOTSUPP; + + if (!is_freeze_master(osb)) { + /* THAW ioctl on a node other than the one on with cluster is + * frozen. don't thaw in the case. returns -EINVAL so that + * osb->sb->s_bdev->bd_fsfreeze_count can be decreased. + */ + + if (!ocfs2_test_osb_flag(osb, OCFS2_OSB_FROZEN_BY_REMOTE)) { + /* this is from a nested cross cluster thaw + * case: + * frozen from another node(node A) + * frozen from this node(not suppored though) + * thawed from node A + * thawed from this node(coming here) + * + * thaw this node only. + */ + printk(KERN_INFO "ocfs2: Block device (%s) thawed by " + "local\n", osb->dev_str); + return 0; + } + + /* now the cluster still frozen by another node, fails this + * request. + */ + return -EINVAL; + } + + ocfs2_freeze_unlock(osb, 1); + + printk(KERN_INFO "ocfs2: Block device (%s) thawed by local\n", + osb->dev_str); + return 0; +} + module_init(ocfs2_init); module_exit(ocfs2_exit); -- 1.6.6.1
Sunil Mushran
2010-Mar-30 21:58 UTC
[Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work -v4
http://lkml.org/lkml/2010/3/23/195 http://git.kernel.org/?p=linux/kernel/git/viro/vfs-2.6.git;a=commitdiff;h=9cf4cacda31338a764e2cbe65cd51bb7f18f3a20 The interface is changing... for the better. We should be able to get away without the cluster lock timeout business. Wengang Wang wrote:> This patch adds freeze_fs()/unfreeze_fs() for ocfs2 so that it supports freeze/thaw. > > Signed-off-by: Wengang Wang <wen.gang.wang at oracle.com> > --- > fs/ocfs2/dlmglue.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++- > fs/ocfs2/dlmglue.h | 2 + > fs/ocfs2/journal.c | 1 + > fs/ocfs2/ocfs2.h | 12 +++++ > fs/ocfs2/super.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 5 files changed, 259 insertions(+), 2 deletions(-) > > diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c > index d7a9330..455b16f 100644 > --- a/fs/ocfs2/dlmglue.c > +++ b/fs/ocfs2/dlmglue.c > @@ -3761,10 +3761,124 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, > ocfs2_dentry_lock_put(osb, dl); > } > > +/* > + * This is only ever run on behalf of another node. > + */ > +void ocfs2_freeze_worker(struct work_struct *work) > +{ > + struct super_block *sb; > + int ret, do_unlock = 0; > + struct ocfs2_super *osb = container_of(work, struct ocfs2_super, > + osb_freeze_work); > + > + mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen); > + > + /* If umount is in progress, wait it to complete. */ > + ret = ocfs2_test_osb_flag(osb, OCFS2_OSB_UMOUNT_INPROG); > + if (ret) { > + mlog(0, "Unmount in progress, make the freeze request pending" > + "\n"); > + /* Leave FREEZE_INPROG there so not queue the worker again */ > + return; > + } > + > + sb = freeze_bdev(osb->sb->s_bdev); > + if (IS_ERR(sb)) { > + /* ocfs2_freeze_fs() shouldn't return any error in the remote > + * box. If it does it's a bug. But we deal with it gracefully. > + */ > + ret = PTR_ERR(sb); > + mlog_errno(ret); > + ocfs2_clear_osb_flag(osb, OCFS2_OSB_FREEZE_INPROG); > + return; > + } > + > + spin_lock(&osb->osb_lock); > + osb->osb_flags &= ~OCFS2_OSB_FREEZE_INPROG; > + osb->osb_flags |= OCFS2_OSB_FROZEN_BY_REMOTE; > + spin_unlock(&osb->osb_lock); > + > + ocfs2_wake_downconvert_thread(osb); > + > + /* Waits for thaw */ > +wait_thaw: > + /* thaws the fs if unmount is in progress. */ > + ret = ocfs2_test_osb_flag(osb, OCFS2_OSB_UMOUNT_INPROG); > + if (ret) { > + /* Leave FREEZE_INPROG there so not queue the worker again */ > + goto thaw_dev; > + > + } > + > + ret = ocfs2_freeze_lock(osb, 0); > + if (ret == -EBUSY) { > + /* We suppose when it returns -EBUSY when timeout is hit. > + * Change me if it's not. > + */ > + goto wait_thaw; > + } else if (ret) { > + mlog(ML_ERROR, "Getting PR on freeze_lock failed," > + "but going to thaw block device %s\n", osb->dev_str); > + } else { > + do_unlock = 1; > + } > + > +thaw_dev: > + ret = thaw_bdev(osb->sb->s_bdev, osb->sb); > + if (ret) { > + /* this shouldn't happen */ > + mlog_errno(ret); > + printk(KERN_WARNING "ocfs2: Thawing %s failed\n", osb->dev_str); > + } > + > + ocfs2_clear_osb_flag(osb, OCFS2_OSB_FROZEN_BY_REMOTE); > + > + if (do_unlock) > + ocfs2_freeze_unlock(osb, 0); > +} > + > +static void ocfs2_queue_freeze_worker(struct ocfs2_super *osb) > +{ > + int queue_it = 0; > + > + spin_lock(&osb->osb_lock); > + if (!(osb->osb_flags & OCFS2_OSB_FREEZE_INPROG)) { > + osb->osb_flags |= OCFS2_OSB_FREEZE_INPROG; > + queue_it = 1; > + } > + spin_unlock(&osb->osb_lock); > + > + if (queue_it) > + queue_work(ocfs2_wq, &osb->osb_freeze_work); > +} > + > static int ocfs2_check_freeze_downconvert(struct ocfs2_lock_res *lockres, > int new_level) > { > - return 1; /* change me */ > + struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); > + struct super_block *sb = osb->sb; > + int frozen_by_remote; > + > + mlog(0, "flags=0x%lx, frozen=%d, level=%d, newlevel=%d\n", > + osb->osb_flags, sb->s_frozen, lockres->l_level, new_level); > + > + if (new_level == LKM_PRMODE) { > + /* other node is during mount or is waiting for thaw. */ > + if (sb->s_frozen) > + return 0; > + else > + return 1; > + } > + > + /* now new_level is NL. other node wants to freeze cluster. */ > + frozen_by_remote = ocfs2_test_osb_flag(osb, OCFS2_OSB_FROZEN_BY_REMOTE); > + > + /* ok, this node is frozen for the request. */ > + if (frozen_by_remote) > + return 1; > + > + ocfs2_queue_freeze_worker(osb); > + return 0; > } > > /* > diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h > index 297b3a9..c6da138 100644 > --- a/fs/ocfs2/dlmglue.h > +++ b/fs/ocfs2/dlmglue.h > @@ -167,6 +167,8 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); > struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); > void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); > > +void ocfs2_freeze_worker(struct work_struct *work); > + > /* To set the locking protocol on module initialization */ > void ocfs2_set_locking_protocol(void); > #endif /* DLMGLUE_H */ > diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c > index 9336c60..8f82525 100644 > --- a/fs/ocfs2/journal.c > +++ b/fs/ocfs2/journal.c > @@ -355,6 +355,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) > if (ocfs2_is_hard_readonly(osb)) > return ERR_PTR(-EROFS); > > + vfs_check_frozen(osb->sb, SB_FREEZE_TRANS); > BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); > BUG_ON(max_buffs <= 0); > > diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h > index 7892738..ec751b9 100644 > --- a/fs/ocfs2/ocfs2.h > +++ b/fs/ocfs2/ocfs2.h > @@ -261,6 +261,9 @@ enum ocfs2_mount_options > #define OCFS2_OSB_HARD_RO 0x0002 > #define OCFS2_OSB_ERROR_FS 0x0004 > #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 > +#define OCFS2_OSB_FREEZE_INPROG 0x0010 > +#define OCFS2_OSB_UMOUNT_INPROG 0x0020 > +#define OCFS2_OSB_FROZEN_BY_REMOTE 0x0040 /* frozen by remote */ > > #define OCFS2_DEFAULT_ATIME_QUANTUM 60 > > @@ -403,6 +406,8 @@ struct ocfs2_super > unsigned int *osb_orphan_wipes; > wait_queue_head_t osb_wipe_event; > > + /* osb_freeze_work is protected by osb->s_bdev->bd_fsfreeze_mutex */ > + struct work_struct osb_freeze_work; > struct ocfs2_orphan_scan osb_orphan_scan; > > /* used to protect metaecc calculation check of xattr. */ > @@ -540,6 +545,13 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, > spin_unlock(&osb->osb_lock); > } > > +static inline void ocfs2_clear_osb_flag(struct ocfs2_super *osb, > + unsigned long flag) > +{ > + spin_lock(&osb->osb_lock); > + osb->osb_flags &= ~flag; > + spin_unlock(&osb->osb_lock); > +} > > static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, > unsigned long flag) > diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c > index 9464080..cc37127 100644 > --- a/fs/ocfs2/super.c > +++ b/fs/ocfs2/super.c > @@ -138,6 +138,10 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); > static int ocfs2_enable_quotas(struct ocfs2_super *osb); > static void ocfs2_disable_quotas(struct ocfs2_super *osb); > static int ocfs2_freeze_lock_supported(struct ocfs2_super *osb); > +static int is_kernel_thread(void); > +static int ocfs2_freeze_fs(struct super_block *sb); > +static int is_freeze_master(struct ocfs2_super *osb); > +static int ocfs2_unfreeze_fs(struct super_block *sb); > > static const struct super_operations ocfs2_sops = { > .statfs = ocfs2_statfs, > @@ -152,6 +156,8 @@ static const struct super_operations ocfs2_sops = { > .show_options = ocfs2_show_options, > .quota_read = ocfs2_quota_read, > .quota_write = ocfs2_quota_write, > + .freeze_fs = ocfs2_freeze_fs, > + .unfreeze_fs = ocfs2_unfreeze_fs, > }; > > enum { > @@ -389,7 +395,7 @@ static const struct file_operations ocfs2_osb_debug_fops = { > > static int ocfs2_sync_fs(struct super_block *sb, int wait) > { > - int status; > + int status, flush_journal = 0; > tid_t target; > struct ocfs2_super *osb = OCFS2_SB(sb); > > @@ -410,6 +416,17 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) > jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, > target); > } > + > + flush_journal = ocfs2_test_osb_flag(osb, OCFS2_OSB_FREEZE_INPROG); > + > + /* flushes journal when it's during freeze_bdev(). so that we need not > + * replay journal if this node crashes before thawed. > + */ > + if (unlikely(flush_journal)) { > + status = jbd2_journal_flush(OCFS2_SB(sb)->journal->j_journal); > + if (status) > + mlog(ML_ERROR, "flushing journal failed %d\n", status); > + } > return 0; > } > > @@ -1219,6 +1236,9 @@ static void ocfs2_kill_sb(struct super_block *sb) > if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) > goto out; > > + up_write(&sb->s_umount); > + ocfs2_set_osb_flag(osb, OCFS2_OSB_UMOUNT_INPROG); > + down_write(&sb->s_umount); > /* Prevent further queueing of inode drop events */ > spin_lock(&dentry_list_lock); > ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); > @@ -2176,6 +2196,8 @@ static int ocfs2_initialize_super(struct super_block *sb, > INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); > osb->dentry_lock_list = NULL; > > + INIT_WORK(&osb->osb_freeze_work, ocfs2_freeze_worker); > + > /* get some pseudo constants for clustersize bits */ > osb->s_clustersize_bits > le32_to_cpu(di->id2.i_super.s_clustersize_bits); > @@ -2534,5 +2556,111 @@ void __ocfs2_abort(struct super_block* sb, > ocfs2_handle_error(sb); > } > > +static inline int is_kernel_thread() > +{ > + return current->flags & PF_KTHREAD; > +} > + > +/* ocfs2_freeze_fs()/ocfs2_unfreeze_fs() are always called by freeze_bdev()/ > + * thaw_bdev(). bdev->bd_fsfreeze_mutex is used for synchronization. an extra > + * ocfs2 mutex is not needed. > + */ > +static int ocfs2_freeze_fs(struct super_block *sb) > +{ > + int ret = 0; > + struct ocfs2_super *osb = OCFS2_SB(sb); > + > + mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen); > + > + /* cluster lock is issued only when this is the IOCTL process.(other > + * case ocfs2_freeze_fs() is called in ocfs2_wq thread) > + */ > + > + if (is_kernel_thread()) { > + BUG_ON(!ocfs2_freeze_lock_supported(osb)); > + > + /* this is ocfs2_wq kernel thread. we do freeze on behalf of > + * the requesting node, don't issue cluster lock again. > + */ > + printk(KERN_INFO "ocfs2: Block device (%s) frozen by remote\n", > + osb->dev_str); > + return 0; > + } > + > + /* this is ioctl thread, issues cluster lock */ > + > + if (!ocfs2_freeze_lock_supported(osb)) > + return -ENOTSUPP; > + > + ret = ocfs2_freeze_lock(osb, 1); > + if (ret) { > + mlog_errno(ret); > + } else { > + printk(KERN_INFO "ocfs2: Block device (%s) frozen by local\n", > + osb->dev_str); > + } > + > + return ret; > +} > + > +static int is_freeze_master(struct ocfs2_super *osb) > +{ > + BUG_ON(osb->osb_freeze_lockres.l_ex_holders > 1); > + return osb->osb_freeze_lockres.l_ex_holders; > +} > + > +static int ocfs2_unfreeze_fs(struct super_block *sb) > +{ > + struct ocfs2_super *osb = OCFS2_SB(sb); > + > + mlog(0, "flags=0x%lx, frozen=%d\n", osb->osb_flags, osb->sb->s_frozen); > + > + if (is_kernel_thread()) { > + /* this is ocfs2_wq kernel thread. nothing to do. */ > + BUG_ON(!ocfs2_freeze_lock_supported(osb)); > + printk(KERN_INFO "ocfs2: Block device (%s) thawed by remote\n", > + osb->dev_str); > + return 0; > + } > + > + /* this is the ioctl user thread. */ > + > + if (!ocfs2_freeze_lock_supported(osb)) > + return -ENOTSUPP; > + > + if (!is_freeze_master(osb)) { > + /* THAW ioctl on a node other than the one on with cluster is > + * frozen. don't thaw in the case. returns -EINVAL so that > + * osb->sb->s_bdev->bd_fsfreeze_count can be decreased. > + */ > + > + if (!ocfs2_test_osb_flag(osb, OCFS2_OSB_FROZEN_BY_REMOTE)) { > + /* this is from a nested cross cluster thaw > + * case: > + * frozen from another node(node A) > + * frozen from this node(not suppored though) > + * thawed from node A > + * thawed from this node(coming here) > + * > + * thaw this node only. > + */ > + printk(KERN_INFO "ocfs2: Block device (%s) thawed by " > + "local\n", osb->dev_str); > + return 0; > + } > + > + /* now the cluster still frozen by another node, fails this > + * request. > + */ > + return -EINVAL; > + } > + > + ocfs2_freeze_unlock(osb, 1); > + > + printk(KERN_INFO "ocfs2: Block device (%s) thawed by local\n", > + osb->dev_str); > + return 0; > +} > + > module_init(ocfs2_init); > module_exit(ocfs2_exit); >
Wengang Wang
2010-Apr-07 16:03 UTC
[Ocfs2-devel] [PATCH 3/3] ocfs2:freeze-thaw: make it work -v4
On 10-03-30 14:58, Sunil Mushran wrote:> http://lkml.org/lkml/2010/3/23/195 > http://git.kernel.org/?p=linux/kernel/git/viro/vfs-2.6.git;a=commitdiff;h=9cf4cacda31338a764e2cbe65cd51bb7f18f3a20 > > The interface is changing... for the better. We should be able to get > away without the cluster lock timeout business.I don't see how the patch helps us not use timeoutible cluster lock. The patch doesn't touch unmount path. If it doesn't prevent an unmount, we have the outstanding lock problem. regards, wengang.