Changwei Ge
2018-Apr-02 02:59 UTC
[Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lock resources
Hi Jun, It seems that you have ever posted this patch, If I remember correctly. My concern is still if you disable dlm recovery via ::migrate_done. Then flag DLM_LOCK_RES_RECOVERING can't be cleared. So we can't purge the problematic lock resource since __dlm_lockres_unused() needs to check that flag. Finally, umount will run the while loop in dlm_migrate_all_locks() infinitely. Or if I miss something? Thanks, Changwei On 2018/3/15 21:00, piaojun wrote:> Wait for dlm recovery done when migrating all lock resources in case that > new lock resource left after leaving dlm domain. And the left lock > resource will cause other nodes BUG. > > NodeA NodeB NodeC > > umount: > dlm_unregister_domain() > dlm_migrate_all_locks() > > NodeB down > > do recovery for NodeB > and collect a new lockres > form other live nodes: > > dlm_do_recovery > dlm_remaster_locks > dlm_request_all_locks: > > dlm_mig_lockres_handler > dlm_new_lockres > __dlm_insert_lockres > > at last NodeA become the > master of the new lockres > and leave domain: > dlm_leave_domain() > > mount: > dlm_join_domain() > > touch file and request > for the owner of the new > lockres, but all the > other nodes said 'NO', > so NodeC decide to be > the owner, and send do > assert msg to other > nodes: > dlmlock() > dlm_get_lock_resource() > dlm_do_assert_master() > > other nodes receive the msg > and found two masters exist. > at last cause BUG in > dlm_assert_master_handler() > -->BUG(); > > Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown") > > Signed-off-by: Jun Piao <piaojun at huawei.com> > Reviewed-by: Alex Chen <alex.chen at huawei.com> > Reviewed-by: Yiwen Jiang <jiangyiwen at huawei.com> > --- > fs/ocfs2/dlm/dlmcommon.h | 1 + > fs/ocfs2/dlm/dlmdomain.c | 15 +++++++++++++++ > fs/ocfs2/dlm/dlmrecovery.c | 13 ++++++++++--- > 3 files changed, 26 insertions(+), 3 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h > index 953c200..d06e27e 100644 > --- a/fs/ocfs2/dlm/dlmcommon.h > +++ b/fs/ocfs2/dlm/dlmcommon.h > @@ -140,6 +140,7 @@ struct dlm_ctxt > u8 node_num; > u32 key; > u8 joining_node; > + u8 migrate_done; /* set to 1 means node has migrated all lock resources */ > wait_queue_head_t dlm_join_events; > unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; > unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; > diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c > index 25b76f0..425081b 100644 > --- a/fs/ocfs2/dlm/dlmdomain.c > +++ b/fs/ocfs2/dlm/dlmdomain.c > @@ -461,6 +461,19 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) > cond_resched_lock(&dlm->spinlock); > num += n; > } > + > + if (!num) { > + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { > + mlog(0, "%s: perhaps there are more lock resources " > + "need to be migrated after dlm recovery\n", dlm->name); > + ret = -EAGAIN; > + } else { > + mlog(0, "%s: we won't do dlm recovery after migrating " > + "all lock resources\n", dlm->name); > + dlm->migrate_done = 1; > + } > + } > + > spin_unlock(&dlm->spinlock); > wake_up(&dlm->dlm_thread_wq); > > @@ -2038,6 +2051,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, > dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; > init_waitqueue_head(&dlm->dlm_join_events); > > + dlm->migrate_done = 0; > + > dlm->reco.new_master = O2NM_INVALID_NODE_NUM; > dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; > > diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c > index 505ab42..d4336a5 100644 > --- a/fs/ocfs2/dlm/dlmrecovery.c > +++ b/fs/ocfs2/dlm/dlmrecovery.c > @@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm) > > static void dlm_begin_recovery(struct dlm_ctxt *dlm) > { > - spin_lock(&dlm->spinlock); > + assert_spin_locked(&dlm->spinlock); > BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); > printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", > dlm->name, dlm->reco.dead_node); > dlm->reco.state |= DLM_RECO_STATE_ACTIVE; > - spin_unlock(&dlm->spinlock); > } > > static void dlm_end_recovery(struct dlm_ctxt *dlm) > @@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) > > spin_lock(&dlm->spinlock); > > + if (dlm->migrate_done) { > + mlog(0, "%s: no need do recovery after migrating all " > + "lock resources\n", dlm->name); > + spin_unlock(&dlm->spinlock); > + return 0; > + } > + > /* check to see if the new master has died */ > if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM && > test_bit(dlm->reco.new_master, dlm->recovery_map)) { > @@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) > mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", > dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), > dlm->reco.dead_node); > - spin_unlock(&dlm->spinlock); > > /* take write barrier */ > /* (stops the list reshuffling thread, proxy ast handling) */ > dlm_begin_recovery(dlm); > > + spin_unlock(&dlm->spinlock); > + > if (dlm->reco.new_master == dlm->node_num) > goto master_here; >
piaojun
2018-Apr-02 04:31 UTC
[Ocfs2-devel] [PATCH] ocfs2/dlm: wait for dlm recovery done when migrating all lock resources
Hi Changwei, On 2018/4/2 10:59, Changwei Ge wrote:> Hi Jun, > > It seems that you have ever posted this patch, If I remember correctly. > My concern is still if you disable dlm recovery via ::migrate_done. Then flag > DLM_LOCK_RES_RECOVERING can't be cleared.After I set migrate_done, all lockres had been migrated. So there won't be any lockres with DLM_LOCK_RES_RECOVERING. And I have tested this patch for a few months. thanks, Jun> > So we can't purge the problematic lock resource since __dlm_lockres_unused() > needs to check that flag. > > Finally, umount will run the while loop in dlm_migrate_all_locks() infinitely. > Or if I miss something? > > Thanks, > Changwei > > On 2018/3/15 21:00, piaojun wrote: >> Wait for dlm recovery done when migrating all lock resources in case that >> new lock resource left after leaving dlm domain. And the left lock >> resource will cause other nodes BUG. >> >> NodeA NodeB NodeC >> >> umount: >> dlm_unregister_domain() >> dlm_migrate_all_locks() >> >> NodeB down >> >> do recovery for NodeB >> and collect a new lockres >> form other live nodes: >> >> dlm_do_recovery >> dlm_remaster_locks >> dlm_request_all_locks: >> >> dlm_mig_lockres_handler >> dlm_new_lockres >> __dlm_insert_lockres >> >> at last NodeA become the >> master of the new lockres >> and leave domain: >> dlm_leave_domain() >> >> mount: >> dlm_join_domain() >> >> touch file and request >> for the owner of the new >> lockres, but all the >> other nodes said 'NO', >> so NodeC decide to be >> the owner, and send do >> assert msg to other >> nodes: >> dlmlock() >> dlm_get_lock_resource() >> dlm_do_assert_master() >> >> other nodes receive the msg >> and found two masters exist. >> at last cause BUG in >> dlm_assert_master_handler() >> -->BUG(); >> >> Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown") >> >> Signed-off-by: Jun Piao <piaojun at huawei.com> >> Reviewed-by: Alex Chen <alex.chen at huawei.com> >> Reviewed-by: Yiwen Jiang <jiangyiwen at huawei.com> >> --- >> fs/ocfs2/dlm/dlmcommon.h | 1 + >> fs/ocfs2/dlm/dlmdomain.c | 15 +++++++++++++++ >> fs/ocfs2/dlm/dlmrecovery.c | 13 ++++++++++--- >> 3 files changed, 26 insertions(+), 3 deletions(-) >> >> diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h >> index 953c200..d06e27e 100644 >> --- a/fs/ocfs2/dlm/dlmcommon.h >> +++ b/fs/ocfs2/dlm/dlmcommon.h >> @@ -140,6 +140,7 @@ struct dlm_ctxt >> u8 node_num; >> u32 key; >> u8 joining_node; >> + u8 migrate_done; /* set to 1 means node has migrated all lock resources */ >> wait_queue_head_t dlm_join_events; >> unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; >> unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; >> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c >> index 25b76f0..425081b 100644 >> --- a/fs/ocfs2/dlm/dlmdomain.c >> +++ b/fs/ocfs2/dlm/dlmdomain.c >> @@ -461,6 +461,19 @@ static int dlm_migrate_all_locks(struct dlm_ctxt *dlm) >> cond_resched_lock(&dlm->spinlock); >> num += n; >> } >> + >> + if (!num) { >> + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { >> + mlog(0, "%s: perhaps there are more lock resources " >> + "need to be migrated after dlm recovery\n", dlm->name); >> + ret = -EAGAIN; >> + } else { >> + mlog(0, "%s: we won't do dlm recovery after migrating " >> + "all lock resources\n", dlm->name); >> + dlm->migrate_done = 1; >> + } >> + } >> + >> spin_unlock(&dlm->spinlock); >> wake_up(&dlm->dlm_thread_wq); >> >> @@ -2038,6 +2051,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, >> dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; >> init_waitqueue_head(&dlm->dlm_join_events); >> >> + dlm->migrate_done = 0; >> + >> dlm->reco.new_master = O2NM_INVALID_NODE_NUM; >> dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; >> >> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c >> index 505ab42..d4336a5 100644 >> --- a/fs/ocfs2/dlm/dlmrecovery.c >> +++ b/fs/ocfs2/dlm/dlmrecovery.c >> @@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm) >> >> static void dlm_begin_recovery(struct dlm_ctxt *dlm) >> { >> - spin_lock(&dlm->spinlock); >> + assert_spin_locked(&dlm->spinlock); >> BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); >> printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", >> dlm->name, dlm->reco.dead_node); >> dlm->reco.state |= DLM_RECO_STATE_ACTIVE; >> - spin_unlock(&dlm->spinlock); >> } >> >> static void dlm_end_recovery(struct dlm_ctxt *dlm) >> @@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) >> >> spin_lock(&dlm->spinlock); >> >> + if (dlm->migrate_done) { >> + mlog(0, "%s: no need do recovery after migrating all " >> + "lock resources\n", dlm->name); >> + spin_unlock(&dlm->spinlock); >> + return 0; >> + } >> + >> /* check to see if the new master has died */ >> if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM && >> test_bit(dlm->reco.new_master, dlm->recovery_map)) { >> @@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) >> mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", >> dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), >> dlm->reco.dead_node); >> - spin_unlock(&dlm->spinlock); >> >> /* take write barrier */ >> /* (stops the list reshuffling thread, proxy ast handling) */ >> dlm_begin_recovery(dlm); >> >> + spin_unlock(&dlm->spinlock); >> + >> if (dlm->reco.new_master == dlm->node_num) >> goto master_here; >> > . >