Changwei Ge
2017-Dec-08 09:09 UTC
[Ocfs2-devel] [PATCH] ocfs2: fix a potential 'ABBA' deadlock caused by 'l_lock' and 'dentry_attach_lock'
On 2017/12/7 20:37, piaojun wrote:> Hi Changwei, > > On 2017/12/7 19:59, Changwei Ge wrote: >> Hi Jun, >> >> On 2017/12/7 19:30, piaojun wrote: >>> CPUA CPUB >>> >>> ocfs2_dentry_convert_worker >>> get 'l_lock' >> >> This lock belongs to ocfs2_dentry_lock::ocfs2_lock_res::l_lock >> >>> >>> get 'dentry_attach_lock' >>> >>> interruptted by dio_end_io: >>> dio_end_io >>> dio_bio_end_aio >>> dio_complete >>> dio->end_io >>> ocfs2_dio_end_io >>> ocfs2_rw_unlock >>> ... >>> try to get 'l_lock' >> >> This lock belongs to ocfs2_lock_res::l_lock though. >> >> So I think they are totally two different locks. >> So making spin_lock to interruption safe is pointless. >> >> Thanks, >> Changwei >> > > For the same lock, we need use spin_lock_irqsave to ensure irq-safe as > you said. But the scenario described above is another kind of deadlock > caused by two different locks which stuck each other. To avoid this > 'ABBA' deadlock we need follow the rule that 'spin_lock_irqsave' should > be called under 'spin_lock_irqsave'.Hi Jun, I'm not sure if you understood my concern? I agree with that we should avoid ABBA dead lock scenario. But the dead lock scenario your sequence diagram is demonstrating doesn't even exist. Because CPU A is holding lock _X1_ and waiting for _dentry_attach_lock_. meanwhile CPU B is holding lock _dentry_attach_lock_ and waiting for _X2_. No ABBA deadlock condition is met, CPU B will acquire lock _X2_ without being affected by CPU A. In a nutshell, there are three locks(lock memory space) involved in your diagram rather than two. Thanks, Changwei> > thanks, > Jun > >>> but CPUA has got it. >>> >>> try to get 'dentry_attach_lock', >>> but CPUB has got 'dentry_attach_lock', >>> and would not release it. >>> >>> so we need use spin_lock_irqsave for 'dentry_attach_lock' to prevent >>> interruptted by softirq. >>> >>> Signed-off-by: Jun Piao <piaojun at huawei.com> >>> Reviewed-by: Alex Chen <alex.chen at huawei.com> >>> --- >>> fs/ocfs2/dcache.c | 14 ++++++++------ >>> fs/ocfs2/dlmglue.c | 14 +++++++------- >>> fs/ocfs2/namei.c | 5 +++-- >>> 3 files changed, 18 insertions(+), 15 deletions(-) >>> >>> diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c >>> index 2903730..6555fbf 100644 >>> --- a/fs/ocfs2/dcache.c >>> +++ b/fs/ocfs2/dcache.c >>> @@ -230,6 +230,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >>> int ret; >>> struct dentry *alias; >>> struct ocfs2_dentry_lock *dl = dentry->d_fsdata; >>> + unsigned long flags; >>> >>> trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name, >>> (unsigned long long)parent_blkno, dl); >>> @@ -309,10 +310,10 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >>> ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); >>> >>> out_attach: >>> - spin_lock(&dentry_attach_lock); >>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>> dentry->d_fsdata = dl; >>> dl->dl_count++; >>> - spin_unlock(&dentry_attach_lock); >>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>> >>> /* >>> * This actually gets us our PRMODE level lock. From now on, >>> @@ -333,9 +334,9 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >>> if (ret < 0 && !alias) { >>> ocfs2_lock_res_free(&dl->dl_lockres); >>> BUG_ON(dl->dl_count != 1); >>> - spin_lock(&dentry_attach_lock); >>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>> dentry->d_fsdata = NULL; >>> - spin_unlock(&dentry_attach_lock); >>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>> kfree(dl); >>> iput(inode); >>> } >>> @@ -379,13 +380,14 @@ void ocfs2_dentry_lock_put(struct ocfs2_super *osb, >>> struct ocfs2_dentry_lock *dl) >>> { >>> int unlock = 0; >>> + unsigned long flags; >>> >>> BUG_ON(dl->dl_count == 0); >>> >>> - spin_lock(&dentry_attach_lock); >>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>> dl->dl_count--; >>> unlock = !dl->dl_count; >>> - spin_unlock(&dentry_attach_lock); >>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>> >>> if (unlock) >>> ocfs2_drop_dentry_lock(osb, dl); >>> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c >>> index 4689940..9bff3d2 100644 >>> --- a/fs/ocfs2/dlmglue.c >>> +++ b/fs/ocfs2/dlmglue.c >>> @@ -3801,7 +3801,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>> struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); >>> struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); >>> struct dentry *dentry; >>> - unsigned long flags; >>> + unsigned long flags, d_flags; >>> int extra_ref = 0; >>> >>> /* >>> @@ -3831,13 +3831,13 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>> * flag. >>> */ >>> spin_lock_irqsave(&lockres->l_lock, flags); >>> - spin_lock(&dentry_attach_lock); >>> + spin_lock_irqsave(&dentry_attach_lock, d_flags); >>> if (!(lockres->l_flags & OCFS2_LOCK_FREEING) >>> && dl->dl_count) { >>> dl->dl_count++; >>> extra_ref = 1; >>> } >>> - spin_unlock(&dentry_attach_lock); >>> + spin_unlock_irqrestore(&dentry_attach_lock, d_flags); >>> spin_unlock_irqrestore(&lockres->l_lock, flags); >>> >>> mlog(0, "extra_ref = %d\n", extra_ref); >>> @@ -3850,13 +3850,13 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>> if (!extra_ref) >>> return UNBLOCK_CONTINUE; >>> >>> - spin_lock(&dentry_attach_lock); >>> + spin_lock_irqsave(&dentry_attach_lock, d_flags); >>> while (1) { >>> dentry = ocfs2_find_local_alias(dl->dl_inode, >>> dl->dl_parent_blkno, 1); >>> if (!dentry) >>> break; >>> - spin_unlock(&dentry_attach_lock); >>> + spin_unlock_irqrestore(&dentry_attach_lock, d_flags); >>> >>> if (S_ISDIR(dl->dl_inode->i_mode)) >>> shrink_dcache_parent(dentry); >>> @@ -3874,9 +3874,9 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>> d_delete(dentry); >>> dput(dentry); >>> >>> - spin_lock(&dentry_attach_lock); >>> + spin_lock_irqsave(&dentry_attach_lock, d_flags); >>> } >>> - spin_unlock(&dentry_attach_lock); >>> + spin_unlock_irqrestore(&dentry_attach_lock, d_flags); >>> >>> /* >>> * If we are the last holder of this dentry lock, there is no >>> diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c >>> index 3b0a10d..a17454e 100644 >>> --- a/fs/ocfs2/namei.c >>> +++ b/fs/ocfs2/namei.c >>> @@ -223,13 +223,14 @@ static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb, >>> struct dentry *dentry, struct inode *inode) >>> { >>> struct ocfs2_dentry_lock *dl = dentry->d_fsdata; >>> + unsigned long flags; >>> >>> ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); >>> ocfs2_lock_res_free(&dl->dl_lockres); >>> BUG_ON(dl->dl_count != 1); >>> - spin_lock(&dentry_attach_lock); >>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>> dentry->d_fsdata = NULL; >>> - spin_unlock(&dentry_attach_lock); >>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>> kfree(dl); >>> iput(inode); >>> } >>> >> >> . >> >
piaojun
2017-Dec-08 10:16 UTC
[Ocfs2-devel] [PATCH] ocfs2: fix a potential 'ABBA' deadlock caused by 'l_lock' and 'dentry_attach_lock'
Hi Changwei, On 2017/12/8 17:09, Changwei Ge wrote:> On 2017/12/7 20:37, piaojun wrote: >> Hi Changwei, >> >> On 2017/12/7 19:59, Changwei Ge wrote: >>> Hi Jun, >>> >>> On 2017/12/7 19:30, piaojun wrote: >>>> CPUA CPUB >>>> >>>> ocfs2_dentry_convert_worker >>>> get 'l_lock' >>> >>> This lock belongs to ocfs2_dentry_lock::ocfs2_lock_res::l_lock >>> >>>> >>>> get 'dentry_attach_lock' >>>> >>>> interruptted by dio_end_io: >>>> dio_end_io >>>> dio_bio_end_aio >>>> dio_complete >>>> dio->end_io >>>> ocfs2_dio_end_io >>>> ocfs2_rw_unlock >>>> ... >>>> try to get 'l_lock' >>> >>> This lock belongs to ocfs2_lock_res::l_lock though. >>> >>> So I think they are totally two different locks. >>> So making spin_lock to interruption safe is pointless. >>> >>> Thanks, >>> Changwei >>> >> >> For the same lock, we need use spin_lock_irqsave to ensure irq-safe as >> you said. But the scenario described above is another kind of deadlock >> caused by two different locks which stuck each other. To avoid this >> 'ABBA' deadlock we need follow the rule that 'spin_lock_irqsave' should >> be called under 'spin_lock_irqsave'. > > Hi Jun, > I'm not sure if you understood my concern? > I agree with that we should avoid ABBA dead lock scenario. > But the dead lock scenario your sequence diagram is demonstrating > doesn't even exist. > > Because CPU A is holding lock _X1_ and waiting for _dentry_attach_lock_. > meanwhile CPU B is holding lock _dentry_attach_lock_ and waiting for _X2_. > No ABBA deadlock condition is met, CPU B will acquire lock _X2_ without > being affected by CPU A. > > In a nutshell, there are three locks(lock memory space) involved in your > diagram rather than two. > > Thanks, > Changwei >Sorry for misunderstanding your point, and I realize no deadlock would happen as these two 'l_lock' belong to different lockres. But I still suggest merging this patch, because this would reduce the risk of introducing new deadlock probably by programming. thanks, Jun>> >> thanks, >> Jun >> >>>> but CPUA has got it. >>>> >>>> try to get 'dentry_attach_lock', >>>> but CPUB has got 'dentry_attach_lock', >>>> and would not release it. >>>> >>>> so we need use spin_lock_irqsave for 'dentry_attach_lock' to prevent >>>> interruptted by softirq. >>>> >>>> Signed-off-by: Jun Piao <piaojun at huawei.com> >>>> Reviewed-by: Alex Chen <alex.chen at huawei.com> >>>> --- >>>> fs/ocfs2/dcache.c | 14 ++++++++------ >>>> fs/ocfs2/dlmglue.c | 14 +++++++------- >>>> fs/ocfs2/namei.c | 5 +++-- >>>> 3 files changed, 18 insertions(+), 15 deletions(-) >>>> >>>> diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c >>>> index 2903730..6555fbf 100644 >>>> --- a/fs/ocfs2/dcache.c >>>> +++ b/fs/ocfs2/dcache.c >>>> @@ -230,6 +230,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >>>> int ret; >>>> struct dentry *alias; >>>> struct ocfs2_dentry_lock *dl = dentry->d_fsdata; >>>> + unsigned long flags; >>>> >>>> trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name, >>>> (unsigned long long)parent_blkno, dl); >>>> @@ -309,10 +310,10 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >>>> ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); >>>> >>>> out_attach: >>>> - spin_lock(&dentry_attach_lock); >>>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>>> dentry->d_fsdata = dl; >>>> dl->dl_count++; >>>> - spin_unlock(&dentry_attach_lock); >>>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>>> >>>> /* >>>> * This actually gets us our PRMODE level lock. From now on, >>>> @@ -333,9 +334,9 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >>>> if (ret < 0 && !alias) { >>>> ocfs2_lock_res_free(&dl->dl_lockres); >>>> BUG_ON(dl->dl_count != 1); >>>> - spin_lock(&dentry_attach_lock); >>>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>>> dentry->d_fsdata = NULL; >>>> - spin_unlock(&dentry_attach_lock); >>>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>>> kfree(dl); >>>> iput(inode); >>>> } >>>> @@ -379,13 +380,14 @@ void ocfs2_dentry_lock_put(struct ocfs2_super *osb, >>>> struct ocfs2_dentry_lock *dl) >>>> { >>>> int unlock = 0; >>>> + unsigned long flags; >>>> >>>> BUG_ON(dl->dl_count == 0); >>>> >>>> - spin_lock(&dentry_attach_lock); >>>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>>> dl->dl_count--; >>>> unlock = !dl->dl_count; >>>> - spin_unlock(&dentry_attach_lock); >>>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>>> >>>> if (unlock) >>>> ocfs2_drop_dentry_lock(osb, dl); >>>> diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c >>>> index 4689940..9bff3d2 100644 >>>> --- a/fs/ocfs2/dlmglue.c >>>> +++ b/fs/ocfs2/dlmglue.c >>>> @@ -3801,7 +3801,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>>> struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres); >>>> struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode); >>>> struct dentry *dentry; >>>> - unsigned long flags; >>>> + unsigned long flags, d_flags; >>>> int extra_ref = 0; >>>> >>>> /* >>>> @@ -3831,13 +3831,13 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>>> * flag. >>>> */ >>>> spin_lock_irqsave(&lockres->l_lock, flags); >>>> - spin_lock(&dentry_attach_lock); >>>> + spin_lock_irqsave(&dentry_attach_lock, d_flags); >>>> if (!(lockres->l_flags & OCFS2_LOCK_FREEING) >>>> && dl->dl_count) { >>>> dl->dl_count++; >>>> extra_ref = 1; >>>> } >>>> - spin_unlock(&dentry_attach_lock); >>>> + spin_unlock_irqrestore(&dentry_attach_lock, d_flags); >>>> spin_unlock_irqrestore(&lockres->l_lock, flags); >>>> >>>> mlog(0, "extra_ref = %d\n", extra_ref); >>>> @@ -3850,13 +3850,13 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>>> if (!extra_ref) >>>> return UNBLOCK_CONTINUE; >>>> >>>> - spin_lock(&dentry_attach_lock); >>>> + spin_lock_irqsave(&dentry_attach_lock, d_flags); >>>> while (1) { >>>> dentry = ocfs2_find_local_alias(dl->dl_inode, >>>> dl->dl_parent_blkno, 1); >>>> if (!dentry) >>>> break; >>>> - spin_unlock(&dentry_attach_lock); >>>> + spin_unlock_irqrestore(&dentry_attach_lock, d_flags); >>>> >>>> if (S_ISDIR(dl->dl_inode->i_mode)) >>>> shrink_dcache_parent(dentry); >>>> @@ -3874,9 +3874,9 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, >>>> d_delete(dentry); >>>> dput(dentry); >>>> >>>> - spin_lock(&dentry_attach_lock); >>>> + spin_lock_irqsave(&dentry_attach_lock, d_flags); >>>> } >>>> - spin_unlock(&dentry_attach_lock); >>>> + spin_unlock_irqrestore(&dentry_attach_lock, d_flags); >>>> >>>> /* >>>> * If we are the last holder of this dentry lock, there is no >>>> diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c >>>> index 3b0a10d..a17454e 100644 >>>> --- a/fs/ocfs2/namei.c >>>> +++ b/fs/ocfs2/namei.c >>>> @@ -223,13 +223,14 @@ static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb, >>>> struct dentry *dentry, struct inode *inode) >>>> { >>>> struct ocfs2_dentry_lock *dl = dentry->d_fsdata; >>>> + unsigned long flags; >>>> >>>> ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); >>>> ocfs2_lock_res_free(&dl->dl_lockres); >>>> BUG_ON(dl->dl_count != 1); >>>> - spin_lock(&dentry_attach_lock); >>>> + spin_lock_irqsave(&dentry_attach_lock, flags); >>>> dentry->d_fsdata = NULL; >>>> - spin_unlock(&dentry_attach_lock); >>>> + spin_unlock_irqrestore(&dentry_attach_lock, flags); >>>> kfree(dl); >>>> iput(inode); >>>> } >>>> >>> >>> . >>> >> > > . >