Srinivas Eeda
2010-Jun-16 04:43 UTC
[Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
There are two problems in dlm_run_purgelist 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge the same lockres instead of trying the next lockres. 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres. spinlock is reacquired but in this window lockres can get reused. This leads to BUG. This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the lockres spinlock protecting it from getting reused. Signed-off-by: Srinivas Eeda <srinivas.eeda at oracle.com> --- fs/ocfs2/dlm/dlmthread.c | 125 +++++++++++++++++++++++----------------------- 1 files changed, 63 insertions(+), 62 deletions(-) diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 11a6d1f..fb0be6c 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -158,39 +158,17 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, int master; int ret = 0; - spin_lock(&res->spinlock); - if (!__dlm_lockres_unused(res)) { - mlog(0, "%s:%.*s: tried to purge but not unused\n", - dlm->name, res->lockname.len, res->lockname.name); - __dlm_print_one_lock_resource(res); - spin_unlock(&res->spinlock); - BUG(); - } - - if (res->state & DLM_LOCK_RES_MIGRATING) { - mlog(0, "%s:%.*s: Delay dropref as this lockres is " - "being remastered\n", dlm->name, res->lockname.len, - res->lockname.name); - /* Re-add the lockres to the end of the purge list */ - if (!list_empty(&res->purge)) { - list_del_init(&res->purge); - list_add_tail(&res->purge, &dlm->purge_list); - } - spin_unlock(&res->spinlock); - return 0; - } - master = (res->owner == dlm->node_num); if (!master) res->state |= DLM_LOCK_RES_DROPPING_REF; - spin_unlock(&res->spinlock); mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, res->lockname.name, master); if (!master) { /* drop spinlock... retake below */ + spin_unlock(&res->spinlock); spin_unlock(&dlm->spinlock); spin_lock(&res->spinlock); @@ -208,48 +186,37 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", dlm->name, res->lockname.len, res->lockname.name, ret); spin_lock(&dlm->spinlock); + spin_lock(&res->spinlock); } - spin_lock(&res->spinlock); - if (!list_empty(&res->purge)) { - mlog(0, "removing lockres %.*s:%p from purgelist, " - "master = %d\n", res->lockname.len, res->lockname.name, - res, master); - list_del_init(&res->purge); - spin_unlock(&res->spinlock); - dlm_lockres_put(res); - dlm->purge_count--; - } else - spin_unlock(&res->spinlock); - - __dlm_unhash_lockres(res); - /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ - if (!master) { - spin_lock(&res->spinlock); + if (!master) res->state &= ~DLM_LOCK_RES_DROPPING_REF; - spin_unlock(&res->spinlock); - wake_up(&res->wq); - } return 0; } static void dlm_run_purge_list(struct dlm_ctxt *dlm, int purge_now) { - unsigned int run_max, unused; + unsigned int run_max; unsigned long purge_jiffies; struct dlm_lock_resource *lockres; + struct dlm_lock_resource *nextres; spin_lock(&dlm->spinlock); run_max = dlm->purge_count; - while(run_max && !list_empty(&dlm->purge_list)) { - run_max--; + if (list_empty(&dlm->purge_list)) { + spin_unlock(&dlm->spinlock); + return; + } + + lockres = list_entry(dlm->purge_list.next, + struct dlm_lock_resource, purge); - lockres = list_entry(dlm->purge_list.next, - struct dlm_lock_resource, purge); + while(run_max && lockres && !list_empty(&dlm->purge_list)) { + run_max--; /* Status of the lockres *might* change so double * check. If the lockres is unused, holding the dlm @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, * refs on it -- there's no need to keep the lockres * spinlock. */ spin_lock(&lockres->spinlock); - unused = __dlm_lockres_unused(lockres); - spin_unlock(&lockres->spinlock); - - if (!unused) - continue; purge_jiffies = lockres->last_used + msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); + mlog(0, "purging lockres %.*s\n", lockres->lockname.len, + lockres->lockname.name); /* Make sure that we want to be processing this guy at * this time. */ if (!purge_now && time_after(purge_jiffies, jiffies)) { @@ -273,20 +237,57 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, * in tail order, we can stop at the first * unpurgable resource -- anyone added after * him will have a greater last_used value */ + spin_unlock(&lockres->spinlock); break; } - dlm_lockres_get(lockres); - + /* If lockres is being used, or migrating purge next lockres */ + if (!__dlm_lockres_unused(lockres) || + (lockres->state & DLM_LOCK_RES_MIGRATING)) { + if (!list_is_last(&lockres->purge, &dlm->purge_list)) + nextres = list_entry(lockres->purge.next, + struct dlm_lock_resource, purge); + else + nextres = NULL; + spin_unlock(&lockres->spinlock); + lockres = nextres; + continue; + } + /* This may drop and reacquire the dlm spinlock if it * has to do migration. */ - if (dlm_purge_lockres(dlm, lockres)) - BUG(); - - dlm_lockres_put(lockres); - - /* Avoid adding any scheduling latencies */ - cond_resched_lock(&dlm->spinlock); + dlm_purge_lockres(dlm, lockres); + + /* before we free the lockres we get the next lockres */ + if (list_empty(&lockres->purge)) + /* Shouldn't be in this state. Start from beginning */ + nextres = list_entry(dlm->purge_list.next, + struct dlm_lock_resource, purge); + else if (!list_is_last(&lockres->purge, &dlm->purge_list)) + nextres = list_entry(lockres->purge.next, + struct dlm_lock_resource, purge); + else + nextres = NULL; + + if (__dlm_lockres_unused(lockres)) { + if (!list_empty(&lockres->purge)) { + list_del_init(&lockres->purge); + dlm->purge_count--; + } + __dlm_unhash_lockres(lockres); + spin_unlock(&lockres->spinlock); + wake_up(&lockres->wq); + dlm_lockres_put(lockres); + } else + spin_unlock(&lockres->spinlock); + lockres = nextres; + + /* Avoid adding any scheduling latencies. If dlm spinlock is + * dropped, retry again from the beginning as purgelist could + * have been modified */ + if (cond_resched_lock(&dlm->spinlock)) + lockres = list_entry(dlm->purge_list.next, + struct dlm_lock_resource, purge); } spin_unlock(&dlm->spinlock); @@ -733,7 +734,7 @@ in_progress: /* unlikely, but we may need to give time to * other tasks */ if (!--n) { - mlog(0, "throttling dlm_thread\n"); + mlog(0, "throttling dlm_thread n=%d\n", n); break; } } -- 1.5.6.5
Srinivas Eeda
2010-Jun-16 04:43 UTC
[Ocfs2-devel] [PATCH 2/2] ocfs2: o2dlm fix race in purge lockres and newlock (orabug 9094491)
This patch fixes the following hole. dlmlock tries to create a new lock on a lockres that is on purge list. It calls dlm_get_lockresource and later adds a lock to blocked list. But in this window, dlm_thread can purge the lockres and unhash it. This will cause a BUG, as when the AST comes back from the master lockres is not found This patch marks the lockres with a new state DLM_LOCK_RES_IN_USE which would protect lockres from dlm_thread purging it. Signed-off-by: Srinivas Eeda <srinivas.eeda at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 1 + fs/ocfs2/dlm/dlmlock.c | 4 ++++ fs/ocfs2/dlm/dlmmaster.c | 5 ++++- fs/ocfs2/dlm/dlmthread.c | 1 + 4 files changed, 10 insertions(+), 1 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 0102be3..6cf3c08 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -279,6 +279,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, #define DLM_LOCK_RES_DIRTY 0x00000008 #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 #define DLM_LOCK_RES_MIGRATING 0x00000020 +#define DLM_LOCK_RES_IN_USE 0x00000100 #define DLM_LOCK_RES_DROPPING_REF 0x00000040 #define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000 #define DLM_LOCK_RES_SETREF_INPROG 0x00002000 diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 7333377..501ac40 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -134,6 +134,8 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, if (status != DLM_NORMAL && lock->ml.node != dlm->node_num) { /* erf. state changed after lock was dropped. */ + /* DLM_LOCK_RES_IN_USE is set in dlm_get_lock_resource */ + res->state &= ~DLM_LOCK_RES_IN_USE; spin_unlock(&res->spinlock); dlm_error(status); return status; @@ -180,6 +182,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, kick_thread = 1; } } + res->state &= ~DLM_LOCK_RES_IN_USE; /* reduce the inflight count, this may result in the lockres * being purged below during calc_usage */ if (lock->ml.node == dlm->node_num) @@ -246,6 +249,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; + res->state &= ~DLM_LOCK_RES_IN_USE; lock->lock_pending = 0; if (status != DLM_NORMAL) { if (status == DLM_RECOVERING && diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9289b43..f0f2d97 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -719,6 +719,7 @@ lookup: if (tmpres) { int dropping_ref = 0; + tmpres->state |= DLM_LOCK_RES_IN_USE; spin_unlock(&dlm->spinlock); spin_lock(&tmpres->spinlock); @@ -731,8 +732,10 @@ lookup: if (tmpres->owner == dlm->node_num) { BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); dlm_lockres_grab_inflight_ref(dlm, tmpres); - } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) + } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) { + tmpres->state &= ~DLM_LOCK_RES_IN_USE; dropping_ref = 1; + } spin_unlock(&tmpres->spinlock); /* wait until done messaging the master, drop our ref to allow diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index fb0be6c..2b82e0b 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -93,6 +93,7 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) int __dlm_lockres_unused(struct dlm_lock_resource *res) { if (!__dlm_lockres_has_locks(res) && + !(res->state & DLM_LOCK_RES_IN_USE) && (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { /* try not to scan the bitmap unless the first two * conditions are already true */ -- 1.5.6.5
Wengang Wang
2010-Jun-16 06:06 UTC
[Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
still the question. If you have sent DEREF request to the master, and the lockres became in-use again, then the lockres remains in the hash table and also in the purge list. So 1) If this node is the last ref, there is a possibility that the master purged the lockres after receiving DEREF request from this node. In this case, when this node does dlmlock_remote(), the lockres won't be found on the master. How to deal with it? 2) The lockres on this node is going to be purged again, it means it will send secondary DEREFs to the master. This is not good I think. A thought is setting lockres->owner to DLM_LOCK_RES_OWNER_UNKNOWN after sending a DEREF request againt this lockres. Also redo master reqeust before locking on it. Regards, wengang. On 10-06-15 21:43, Srinivas Eeda wrote:> There are two problems in dlm_run_purgelist > > 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge > the same lockres instead of trying the next lockres. > > 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock > before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres. > spinlock is reacquired but in this window lockres can get reused. This leads > to BUG. > > This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge > next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the > lockres spinlock protecting it from getting reused. > > Signed-off-by: Srinivas Eeda <srinivas.eeda at oracle.com> > --- > fs/ocfs2/dlm/dlmthread.c | 125 +++++++++++++++++++++++----------------------- > 1 files changed, 63 insertions(+), 62 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c > index 11a6d1f..fb0be6c 100644 > --- a/fs/ocfs2/dlm/dlmthread.c > +++ b/fs/ocfs2/dlm/dlmthread.c > @@ -158,39 +158,17 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, > int master; > int ret = 0; > > - spin_lock(&res->spinlock); > - if (!__dlm_lockres_unused(res)) { > - mlog(0, "%s:%.*s: tried to purge but not unused\n", > - dlm->name, res->lockname.len, res->lockname.name); > - __dlm_print_one_lock_resource(res); > - spin_unlock(&res->spinlock); > - BUG(); > - } > - > - if (res->state & DLM_LOCK_RES_MIGRATING) { > - mlog(0, "%s:%.*s: Delay dropref as this lockres is " > - "being remastered\n", dlm->name, res->lockname.len, > - res->lockname.name); > - /* Re-add the lockres to the end of the purge list */ > - if (!list_empty(&res->purge)) { > - list_del_init(&res->purge); > - list_add_tail(&res->purge, &dlm->purge_list); > - } > - spin_unlock(&res->spinlock); > - return 0; > - } > - > master = (res->owner == dlm->node_num); > > if (!master) > res->state |= DLM_LOCK_RES_DROPPING_REF; > - spin_unlock(&res->spinlock); > > mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, > res->lockname.name, master); > > if (!master) { > /* drop spinlock... retake below */ > + spin_unlock(&res->spinlock); > spin_unlock(&dlm->spinlock); > > spin_lock(&res->spinlock); > @@ -208,48 +186,37 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, > mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", > dlm->name, res->lockname.len, res->lockname.name, ret); > spin_lock(&dlm->spinlock); > + spin_lock(&res->spinlock); > } > > - spin_lock(&res->spinlock); > - if (!list_empty(&res->purge)) { > - mlog(0, "removing lockres %.*s:%p from purgelist, " > - "master = %d\n", res->lockname.len, res->lockname.name, > - res, master); > - list_del_init(&res->purge); > - spin_unlock(&res->spinlock); > - dlm_lockres_put(res); > - dlm->purge_count--; > - } else > - spin_unlock(&res->spinlock); > - > - __dlm_unhash_lockres(res); > - > /* lockres is not in the hash now. drop the flag and wake up > * any processes waiting in dlm_get_lock_resource. */ > - if (!master) { > - spin_lock(&res->spinlock); > + if (!master) > res->state &= ~DLM_LOCK_RES_DROPPING_REF; > - spin_unlock(&res->spinlock); > - wake_up(&res->wq); > - } > return 0; > } > > static void dlm_run_purge_list(struct dlm_ctxt *dlm, > int purge_now) > { > - unsigned int run_max, unused; > + unsigned int run_max; > unsigned long purge_jiffies; > struct dlm_lock_resource *lockres; > + struct dlm_lock_resource *nextres; > > spin_lock(&dlm->spinlock); > run_max = dlm->purge_count; > > - while(run_max && !list_empty(&dlm->purge_list)) { > - run_max--; > + if (list_empty(&dlm->purge_list)) { > + spin_unlock(&dlm->spinlock); > + return; > + } > + > + lockres = list_entry(dlm->purge_list.next, > + struct dlm_lock_resource, purge); > > - lockres = list_entry(dlm->purge_list.next, > - struct dlm_lock_resource, purge); > + while(run_max && lockres && !list_empty(&dlm->purge_list)) { > + run_max--; > > /* Status of the lockres *might* change so double > * check. If the lockres is unused, holding the dlm > @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, > * refs on it -- there's no need to keep the lockres > * spinlock. */ > spin_lock(&lockres->spinlock); > - unused = __dlm_lockres_unused(lockres); > - spin_unlock(&lockres->spinlock); > - > - if (!unused) > - continue; > > purge_jiffies = lockres->last_used + > msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); > > + mlog(0, "purging lockres %.*s\n", lockres->lockname.len, > + lockres->lockname.name); > /* Make sure that we want to be processing this guy at > * this time. */ > if (!purge_now && time_after(purge_jiffies, jiffies)) { > @@ -273,20 +237,57 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, > * in tail order, we can stop at the first > * unpurgable resource -- anyone added after > * him will have a greater last_used value */ > + spin_unlock(&lockres->spinlock); > break; > } > > - dlm_lockres_get(lockres); > - > + /* If lockres is being used, or migrating purge next lockres */ > + if (!__dlm_lockres_unused(lockres) || > + (lockres->state & DLM_LOCK_RES_MIGRATING)) { > + if (!list_is_last(&lockres->purge, &dlm->purge_list)) > + nextres = list_entry(lockres->purge.next, > + struct dlm_lock_resource, purge); > + else > + nextres = NULL; > + spin_unlock(&lockres->spinlock); > + lockres = nextres; > + continue; > + } > + > /* This may drop and reacquire the dlm spinlock if it > * has to do migration. */ > - if (dlm_purge_lockres(dlm, lockres)) > - BUG(); > - > - dlm_lockres_put(lockres); > - > - /* Avoid adding any scheduling latencies */ > - cond_resched_lock(&dlm->spinlock); > + dlm_purge_lockres(dlm, lockres); > + > + /* before we free the lockres we get the next lockres */ > + if (list_empty(&lockres->purge)) > + /* Shouldn't be in this state. Start from beginning */ > + nextres = list_entry(dlm->purge_list.next, > + struct dlm_lock_resource, purge); > + else if (!list_is_last(&lockres->purge, &dlm->purge_list)) > + nextres = list_entry(lockres->purge.next, > + struct dlm_lock_resource, purge); > + else > + nextres = NULL; > + > + if (__dlm_lockres_unused(lockres)) { > + if (!list_empty(&lockres->purge)) { > + list_del_init(&lockres->purge); > + dlm->purge_count--; > + } > + __dlm_unhash_lockres(lockres); > + spin_unlock(&lockres->spinlock); > + wake_up(&lockres->wq); > + dlm_lockres_put(lockres); > + } else > + spin_unlock(&lockres->spinlock); > + lockres = nextres; > + > + /* Avoid adding any scheduling latencies. If dlm spinlock is > + * dropped, retry again from the beginning as purgelist could > + * have been modified */ > + if (cond_resched_lock(&dlm->spinlock)) > + lockres = list_entry(dlm->purge_list.next, > + struct dlm_lock_resource, purge); > } > > spin_unlock(&dlm->spinlock); > @@ -733,7 +734,7 @@ in_progress: > /* unlikely, but we may need to give time to > * other tasks */ > if (!--n) { > - mlog(0, "throttling dlm_thread\n"); > + mlog(0, "throttling dlm_thread n=%d\n", n); > break; > } > } > -- > 1.5.6.5 > > > _______________________________________________ > Ocfs2-devel mailing list > Ocfs2-devel at oss.oracle.com > http://oss.oracle.com/mailman/listinfo/ocfs2-devel
Joel Becker
2010-Jun-17 01:39 UTC
[Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
On Tue, Jun 15, 2010 at 09:43:02PM -0700, Srinivas Eeda wrote:> There are two problems in dlm_run_purgelist > > 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge > the same lockres instead of trying the next lockres. > > 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock > before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres. > spinlock is reacquired but in this window lockres can get reused. This leads > to BUG. > > This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge > next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the > lockres spinlock protecting it from getting reused. > > Signed-off-by: Srinivas Eeda <srinivas.eeda at oracle.com>I don't really like the way you did this. You're absolutely right that we need to hold the spinlock while setting DROPPING REF. But there's no need to lift the lockres check logic into run_purge_list.> @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, > * refs on it -- there's no need to keep the lockres > * spinlock. */ > spin_lock(&lockres->spinlock); > - unused = __dlm_lockres_unused(lockres); > - spin_unlock(&lockres->spinlock); > - > - if (!unused) > - continue; > > purge_jiffies = lockres->last_used + > msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); > > + mlog(0, "purging lockres %.*s\n", lockres->lockname.len, > + lockres->lockname.name); > /* Make sure that we want to be processing this guy at > * this time. */ > if (!purge_now && time_after(purge_jiffies, jiffies)) {In fact, I'd move the __dlm_lockres_unused() and purge_now||time_after() checks into dlm_purge_lockres(). It can return -EBUSY if the lockres is in use. It can return -ETIME if purge_now==0 and time_after hits. Then inside run_purge_list() you just do: spin_lock(&lockres->spinlock); ret = dlm_purge_lockres(dlm, res, purge_now); spin_unlock(&lockres->spinlock); if (ret == -EAGAIN) break; else if (ret == -EBUSY) { lockres = list_entry(lockres->next); continue; else if (ret) BUG(); What about the dlm_lockres_get()? That's only held while we drop the dlm spinlock in dlm_purge_lockres(), so you can move it there. You take the kref only after the _unused() and time_after() checks. This actually would make run_purge_list() more readable, not less. Joel -- "There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle." - Albert Einstein Joel Becker Principal Software Developer Oracle E-mail: joel.becker at oracle.com Phone: (650) 506-8127
Sunil Mushran
2010-Jun-17 01:44 UTC
[Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
One way to skip a lockres in the purgelist is to list_del_init() and list_add_tail(). That simplifies the patch a lot. I have attached a quick & dirty patch. See if that satisfies all the requirements. Sunil On 06/15/2010 09:43 PM, Srinivas Eeda wrote:> There are two problems in dlm_run_purgelist > > 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge > the same lockres instead of trying the next lockres. > > 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock > before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres. > spinlock is reacquired but in this window lockres can get reused. This leads > to BUG. > > This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge > next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the > lockres spinlock protecting it from getting reused. > > Signed-off-by: Srinivas Eeda<srinivas.eeda at oracle.com> > --- > fs/ocfs2/dlm/dlmthread.c | 125 +++++++++++++++++++++++----------------------- > 1 files changed, 63 insertions(+), 62 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c > index 11a6d1f..fb0be6c 100644 > --- a/fs/ocfs2/dlm/dlmthread.c > +++ b/fs/ocfs2/dlm/dlmthread.c > @@ -158,39 +158,17 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, > int master; > int ret = 0; > > - spin_lock(&res->spinlock); > - if (!__dlm_lockres_unused(res)) { > - mlog(0, "%s:%.*s: tried to purge but not unused\n", > - dlm->name, res->lockname.len, res->lockname.name); > - __dlm_print_one_lock_resource(res); > - spin_unlock(&res->spinlock); > - BUG(); > - } > - > - if (res->state& DLM_LOCK_RES_MIGRATING) { > - mlog(0, "%s:%.*s: Delay dropref as this lockres is " > - "being remastered\n", dlm->name, res->lockname.len, > - res->lockname.name); > - /* Re-add the lockres to the end of the purge list */ > - if (!list_empty(&res->purge)) { > - list_del_init(&res->purge); > - list_add_tail(&res->purge,&dlm->purge_list); > - } > - spin_unlock(&res->spinlock); > - return 0; > - } > - > master = (res->owner == dlm->node_num); > > if (!master) > res->state |= DLM_LOCK_RES_DROPPING_REF; > - spin_unlock(&res->spinlock); > > mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, > res->lockname.name, master); > > if (!master) { > /* drop spinlock... retake below */ > + spin_unlock(&res->spinlock); > spin_unlock(&dlm->spinlock); > > spin_lock(&res->spinlock); > @@ -208,48 +186,37 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, > mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", > dlm->name, res->lockname.len, res->lockname.name, ret); > spin_lock(&dlm->spinlock); > + spin_lock(&res->spinlock); > } > > - spin_lock(&res->spinlock); > - if (!list_empty(&res->purge)) { > - mlog(0, "removing lockres %.*s:%p from purgelist, " > - "master = %d\n", res->lockname.len, res->lockname.name, > - res, master); > - list_del_init(&res->purge); > - spin_unlock(&res->spinlock); > - dlm_lockres_put(res); > - dlm->purge_count--; > - } else > - spin_unlock(&res->spinlock); > - > - __dlm_unhash_lockres(res); > - > /* lockres is not in the hash now. drop the flag and wake up > * any processes waiting in dlm_get_lock_resource. */ > - if (!master) { > - spin_lock(&res->spinlock); > + if (!master) > res->state&= ~DLM_LOCK_RES_DROPPING_REF; > - spin_unlock(&res->spinlock); > - wake_up(&res->wq); > - } > return 0; > } > > static void dlm_run_purge_list(struct dlm_ctxt *dlm, > int purge_now) > { > - unsigned int run_max, unused; > + unsigned int run_max; > unsigned long purge_jiffies; > struct dlm_lock_resource *lockres; > + struct dlm_lock_resource *nextres; > > spin_lock(&dlm->spinlock); > run_max = dlm->purge_count; > > - while(run_max&& !list_empty(&dlm->purge_list)) { > - run_max--; > + if (list_empty(&dlm->purge_list)) { > + spin_unlock(&dlm->spinlock); > + return; > + } > + > + lockres = list_entry(dlm->purge_list.next, > + struct dlm_lock_resource, purge); > > - lockres = list_entry(dlm->purge_list.next, > - struct dlm_lock_resource, purge); > + while(run_max&& lockres&& !list_empty(&dlm->purge_list)) { > + run_max--; > > /* Status of the lockres *might* change so double > * check. If the lockres is unused, holding the dlm > @@ -257,15 +224,12 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, > * refs on it -- there's no need to keep the lockres > * spinlock. */ > spin_lock(&lockres->spinlock); > - unused = __dlm_lockres_unused(lockres); > - spin_unlock(&lockres->spinlock); > - > - if (!unused) > - continue; > > purge_jiffies = lockres->last_used + > msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); > > + mlog(0, "purging lockres %.*s\n", lockres->lockname.len, > + lockres->lockname.name); > /* Make sure that we want to be processing this guy at > * this time. */ > if (!purge_now&& time_after(purge_jiffies, jiffies)) { > @@ -273,20 +237,57 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, > * in tail order, we can stop at the first > * unpurgable resource -- anyone added after > * him will have a greater last_used value */ > + spin_unlock(&lockres->spinlock); > break; > } > > - dlm_lockres_get(lockres); > - > + /* If lockres is being used, or migrating purge next lockres */ > + if (!__dlm_lockres_unused(lockres) || > + (lockres->state& DLM_LOCK_RES_MIGRATING)) { > + if (!list_is_last(&lockres->purge,&dlm->purge_list)) > + nextres = list_entry(lockres->purge.next, > + struct dlm_lock_resource, purge); > + else > + nextres = NULL; > + spin_unlock(&lockres->spinlock); > + lockres = nextres; > + continue; > + } > + > /* This may drop and reacquire the dlm spinlock if it > * has to do migration. */ > - if (dlm_purge_lockres(dlm, lockres)) > - BUG(); > - > - dlm_lockres_put(lockres); > - > - /* Avoid adding any scheduling latencies */ > - cond_resched_lock(&dlm->spinlock); > + dlm_purge_lockres(dlm, lockres); > + > + /* before we free the lockres we get the next lockres */ > + if (list_empty(&lockres->purge)) > + /* Shouldn't be in this state. Start from beginning */ > + nextres = list_entry(dlm->purge_list.next, > + struct dlm_lock_resource, purge); > + else if (!list_is_last(&lockres->purge,&dlm->purge_list)) > + nextres = list_entry(lockres->purge.next, > + struct dlm_lock_resource, purge); > + else > + nextres = NULL; > + > + if (__dlm_lockres_unused(lockres)) { > + if (!list_empty(&lockres->purge)) { > + list_del_init(&lockres->purge); > + dlm->purge_count--; > + } > + __dlm_unhash_lockres(lockres); > + spin_unlock(&lockres->spinlock); > + wake_up(&lockres->wq); > + dlm_lockres_put(lockres); > + } else > + spin_unlock(&lockres->spinlock); > + lockres = nextres; > + > + /* Avoid adding any scheduling latencies. If dlm spinlock is > + * dropped, retry again from the beginning as purgelist could > + * have been modified */ > + if (cond_resched_lock(&dlm->spinlock)) > + lockres = list_entry(dlm->purge_list.next, > + struct dlm_lock_resource, purge); > } > > spin_unlock(&dlm->spinlock); > @@ -733,7 +734,7 @@ in_progress: > /* unlikely, but we may need to give time to > * other tasks */ > if (!--n) { > - mlog(0, "throttling dlm_thread\n"); > + mlog(0, "throttling dlm_thread n=%d\n", n); > break; > } > } >-------------- next part -------------- A non-text attachment was scrubbed... Name: srini.patch Type: text/x-patch Size: 1315 bytes Desc: not available Url : http://oss.oracle.com/pipermail/ocfs2-devel/attachments/20100616/6601081c/attachment.bin
Wengang Wang
2010-Jun-17 06:05 UTC
[Ocfs2-devel] [PATCH 1/2] ocfs2 fix o2dlm dlm run purgelist
On 10-06-16 18:44, Sunil Mushran wrote:> One way to skip a lockres in the purgelist is to list_del_init() and > list_add_tail(). That simplifies the patch a lot.For my knowledge, why not just list_move_tail()? regards, wengang.