Sunil Mushran
2008-Mar-01 14:04 UTC
[Ocfs2-devel] [PATCH 6/6] ocfs2/dlm: dlm_thread should not sleep while holding the dlm_spinlock
This patch addresses the bug in which the dlm_thread could go to sleep while holding the dlm_spinlock. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> --- fs/ocfs2/dlm/dlmthread.c | 6 ++++-- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index cebd089..4060bb3 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -176,12 +176,14 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, res->lockname.name, master); if (!master) { + /* drop spinlock... retake below */ + spin_unlock(&dlm->spinlock); + spin_lock(&res->spinlock); /* This ensures that clear refmap is sent after the set */ __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); spin_unlock(&res->spinlock); - /* drop spinlock to do messaging, retake below */ - spin_unlock(&dlm->spinlock); + /* clear our bit from the master's refmap, ignore errors */ ret = dlm_drop_lockres_ref(dlm, res); if (ret < 0) { -- 1.5.3.6
Sunil Mushran
2008-Mar-01 14:04 UTC
[Ocfs2-devel] [PATCH 4/6] ocfs2/dlm: Move struct dlm_master_list_entry to dlmcommon.h
This patch moves some mle related definitions from dlmmaster.c to dlmcommon.h. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 35 +++++++++++++++++++++++++++++++++++ fs/ocfs2/dlm/dlmmaster.c | 37 ------------------------------------- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 5b3607c..c52dec6 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -49,6 +49,41 @@ /* Intended to make it easier for us to switch out hash functions */ #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) +enum dlm_mle_type { + DLM_MLE_BLOCK, + DLM_MLE_MASTER, + DLM_MLE_MIGRATION +}; + +struct dlm_lock_name { + u8 len; + u8 name[DLM_LOCKID_NAME_MAX]; +}; + +struct dlm_master_list_entry { + struct list_head list; + struct list_head hb_events; + struct dlm_ctxt *dlm; + spinlock_t spinlock; + wait_queue_head_t wq; + atomic_t woken; + struct kref mle_refs; + int inuse; + unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + u8 master; + u8 new_master; + enum dlm_mle_type type; + struct o2hb_callback_func mle_hb_up; + struct o2hb_callback_func mle_hb_down; + union { + struct dlm_lock_resource *res; + struct dlm_lock_name name; + } u; +}; + enum dlm_ast_type { DLM_AST = 0, DLM_BAST, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index b6629bb..3011183 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -52,43 +52,6 @@ #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) #include "cluster/masklog.h" -enum dlm_mle_type { - DLM_MLE_BLOCK, - DLM_MLE_MASTER, - DLM_MLE_MIGRATION -}; - -struct dlm_lock_name -{ - u8 len; - u8 name[DLM_LOCKID_NAME_MAX]; -}; - -struct dlm_master_list_entry -{ - struct list_head list; - struct list_head hb_events; - struct dlm_ctxt *dlm; - spinlock_t spinlock; - wait_queue_head_t wq; - atomic_t woken; - struct kref mle_refs; - int inuse; - unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - u8 master; - u8 new_master; - enum dlm_mle_type type; - struct o2hb_callback_func mle_hb_up; - struct o2hb_callback_func mle_hb_down; - union { - struct dlm_lock_resource *res; - struct dlm_lock_name name; - } u; -}; - static void dlm_mle_node_down(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, struct o2nm_node *node, -- 1.5.3.6
Sunil Mushran
2008-Mar-01 14:04 UTC
[Ocfs2-devel] [PATCH 1/6] ocfs2/dlm: Add missing dlm_lock_put()s
Normally locks for remote nodes are freed when that node sends an UNLOCK message to the master. The master node tags an DLM_UNLOCK_FREE_LOCK action to do an extra put on the lock at the end. However, there are times when the master node has to free the locks for the remote nodes forcibly. Two cases when this happens are: 1. When the master has migrated the lockres plus all locks to another node. 2. When the master is clearing all the locks of a dead node. It was in the above two conditions that the dlm was missing the extra put. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> --- fs/ocfs2/dlm/dlmmaster.c | 3 +++ fs/ocfs2/dlm/dlmrecovery.c | 9 +++++++++ 2 files changed, 12 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a54d33d..eadf9bf 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2933,6 +2933,9 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, dlm_lockres_clear_refmap_bit(lock->ml.node, res); list_del_init(&lock->list); dlm_lock_put(lock); + /* In a normal unlock, we would have added a + * DLM_UNLOCK_FREE_LOCK action. Force it. */ + dlm_lock_put(lock); } } queue++; diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 91f747b..8ef57c2 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2130,11 +2130,16 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, assert_spin_locked(&dlm->spinlock); assert_spin_locked(&res->spinlock); + /* We do two dlm_lock_put(). One for removing from list and the other is + * to force the DLM_UNLOCK_FREE_LOCK action so as to free the locks */ + /* TODO: check pending_asts, pending_basts here */ list_for_each_entry_safe(lock, next, &res->granted, list) { if (lock->ml.node == dead_node) { list_del_init(&lock->list); dlm_lock_put(lock); + /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ + dlm_lock_put(lock); freed++; } } @@ -2142,6 +2147,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, if (lock->ml.node == dead_node) { list_del_init(&lock->list); dlm_lock_put(lock); + /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ + dlm_lock_put(lock); freed++; } } @@ -2149,6 +2156,8 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, if (lock->ml.node == dead_node) { list_del_init(&lock->list); dlm_lock_put(lock); + /* Can't schedule DLM_UNLOCK_FREE_LOCK - do manually */ + dlm_lock_put(lock); freed++; } } -- 1.5.3.6
These patches are against the mainline kernel and have been tested with the head (2.6.25-rc3+). Once pushed into mainline, they will need to be pulled into both 1.4 and 1.2 trees. Except the last one, all other patches have been reviewed by Joel. Thanks Joel. The patches have been updated with the comments provided. The last patch in the series fixes bugzilla#919. The remaining patches that add features will be mailed again for review once these bugfixes have been pushed into mainline. Sunil
Sunil Mushran
2008-Mar-01 14:04 UTC
[Ocfs2-devel] [PATCH 5/6] ocfs2/dlm: Print message showing the recovery master
Knowing the dlm recovery master helps in debugging recovery issues. This patch prints a message on the recovery master node. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <joel.becker@oracle.com> --- fs/ocfs2/dlm/dlmrecovery.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 23e7b49..283ac0f 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -519,9 +519,9 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) return 0; master_here: - mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n", - task_pid_nr(dlm->dlm_reco_thread_task), - dlm->name, dlm->reco.dead_node, dlm->node_num); + mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node " + "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task), + dlm->node_num, dlm->reco.dead_node, dlm->name); status = dlm_remaster_locks(dlm, dlm->reco.dead_node); if (status < 0) { -- 1.5.3.6
Sunil Mushran
2008-Mar-01 14:04 UTC
[Ocfs2-devel] [PATCH 2/6] ocfs2/dlm: Add missing dlm_lockres_put()s in migration path
During migration, the recovery master node may be asked to master a lockres it may not know about. In that case, it would not only have to create a lockres and add it to the hash, but also remember to to do the _put_ corresponding to the kref_init in dlm_init_lockres(), as soon as the migration is completed. Yes, we don't wait for the dlm_purge_lockres() to do that matching put. Note the ref added for it being in the hash protects the lockres from being freed prematurely. This patch adds that missing put, as described above, to plug a memleak. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 1 + fs/ocfs2/dlm/dlmrecovery.c | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 9843ee1..5b3607c 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -176,6 +176,7 @@ struct dlm_mig_lockres_priv { struct dlm_lock_resource *lockres; u8 real_master; + u8 extra_ref; }; struct dlm_assert_master_priv diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 8ef57c2..23e7b49 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1327,6 +1327,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, (struct dlm_migratable_lockres *)msg->buf; int ret = 0; u8 real_master; + u8 extra_refs = 0; char *buf = NULL; struct dlm_work_item *item = NULL; struct dlm_lock_resource *res = NULL; @@ -1404,16 +1405,28 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, __dlm_insert_lockres(dlm, res); spin_unlock(&dlm->spinlock); + /* Add an extra ref for this lock-less lockres lest the + * dlm_thread purges it before we get the chance to add + * locks to it */ + dlm_lockres_get(res); + + /* There are three refs that need to be put. + * 1. Taken above. + * 2. kref_init in dlm_new_lockres()->dlm_init_lockres(). + * 3. dlm_lookup_lockres() + * The first one is handled at the end of this function. The + * other two are handled in the worker thread after locks have + * been attached. Yes, we don't wait for purge time to match + * kref_init. The lockres will still have atleast one ref + * added because it is in the hash __dlm_insert_lockres() */ + extra_refs++; + /* now that the new lockres is inserted, * make it usable by other processes */ spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; spin_unlock(&res->spinlock); wake_up(&res->wq); - - /* add an extra ref for just-allocated lockres - * otherwise the lockres will be purged immediately */ - dlm_lockres_get(res); } /* at this point we have allocated everything we need, @@ -1443,12 +1456,17 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, dlm_init_work_item(dlm, item, dlm_mig_lockres_worker, buf); item->u.ml.lockres = res; /* already have a ref */ item->u.ml.real_master = real_master; + item->u.ml.extra_ref = extra_refs; spin_lock(&dlm->work_lock); list_add_tail(&item->list, &dlm->work_list); spin_unlock(&dlm->work_lock); queue_work(dlm->dlm_worker, &dlm->dispatched_work); leave: + /* One extra ref taken needs to be put here */ + if (extra_refs) + dlm_lockres_put(res); + dlm_put(dlm); if (ret < 0) { if (buf) @@ -1464,17 +1482,19 @@ leave: static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data) { - struct dlm_ctxt *dlm = data; + struct dlm_ctxt *dlm; struct dlm_migratable_lockres *mres; int ret = 0; struct dlm_lock_resource *res; u8 real_master; + u8 extra_ref; dlm = item->dlm; mres = (struct dlm_migratable_lockres *)data; res = item->u.ml.lockres; real_master = item->u.ml.real_master; + extra_ref = item->u.ml.extra_ref; if (real_master == DLM_LOCK_RES_OWNER_UNKNOWN) { /* this case is super-rare. only occurs if @@ -1517,6 +1537,12 @@ again: } leave: + /* See comment in dlm_mig_lockres_handler() */ + if (res) { + if (extra_ref) + dlm_lockres_put(res); + dlm_lockres_put(res); + } kfree(data); mlog_exit(ret); } @@ -1644,7 +1670,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, /* retry!? */ BUG(); } - } + } else /* put.. incase we are not the master */ + dlm_lockres_put(res); spin_unlock(&res->spinlock); } spin_unlock(&dlm->spinlock); @@ -1921,6 +1948,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, "Recovering res %s:%.*s, is already on recovery list!\n", dlm->name, res->lockname.len, res->lockname.name); list_del_init(&res->recovering); + dlm_lockres_put(res); } /* We need to hold a reference while on the recovery list */ dlm_lockres_get(res); -- 1.5.3.6
Sunil Mushran
2008-Mar-01 14:04 UTC
[Ocfs2-devel] [PATCH 3/6] ocfs2/dlm: Add missing dlm_lockres_put()s
dlm_master_request_handler() forgot to put a lockres when dlm_assert_master_worker() failed or was skipped. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> --- fs/ocfs2/dlm/dlmmaster.c | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index eadf9bf..b6629bb 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1663,7 +1663,12 @@ way_up_top: dlm_put_mle(tmpmle); } send_response: - + /* + * __dlm_lookup_lockres() grabbed a reference to this lockres. + * The reference is released by dlm_assert_master_worker() under + * the call to dlm_dispatch_assert_master(). If + * dlm_assert_master_worker() isn't called, we drop it here. + */ if (dispatch_assert) { if (response != DLM_MASTER_RESP_YES) mlog(ML_ERROR, "invalid response %d\n", response); @@ -1678,7 +1683,11 @@ send_response: if (ret < 0) { mlog(ML_ERROR, "failed to dispatch assert master work\n"); response = DLM_MASTER_RESP_ERROR; + dlm_lockres_put(res); } + } else { + if (res) + dlm_lockres_put(res); } dlm_put(dlm); -- 1.5.3.6
Mark Fasheh
2008-Mar-04 13:55 UTC
[Ocfs2-devel] [PATCH 4/6] ocfs2/dlm: Move struct dlm_master_list_entry to dlmcommon.h
On Mon, Mar 03, 2008 at 10:36:14AM -0800, Sunil Mushran wrote:> Was trying to balance out the number of patches in both. :) > I mean I wanted this cleanup irrespective of the debugfs series.I applied these patches sans #4 and things seemed ok. Should the end result be acceptable? I think everything except this patch looks like 2.6.25 material... --Mark -- Mark Fasheh Principal Software Developer, Oracle mark.fasheh@oracle.com