Changwei Ge
2019-May-05 07:15 UTC
[Ocfs2-devel] [PATCH] fs/ocfs2: fix race in ocfs2_dentry_attach_lock
Hi Wengang, We actually met some strange problems around denty cluster lock attachment before. I suppose solidifying around logic makes sense. Can you make your change log richer with a little more analysis? If you can paste a dlm lock resouce and ocfs2 lock resource snapshot from coredump, that would be great and I can help figure what bad happened. Thanks, Changwei On 2019/5/4 1:01 ??, Wengang Wang wrote:> ocfs2_dentry_attach_lock() can be executed in parallel threads against the > same dentry. Make that race safe. > > Signed-off-by: Wengang Wang <wen.gang.wang at oracle.com> > --- > fs/ocfs2/dcache.c | 27 +++++++++++++++++++++------ > 1 file changed, 21 insertions(+), 6 deletions(-) > > diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c > index 290373024d9d..980e571c163c 100644 > --- a/fs/ocfs2/dcache.c > +++ b/fs/ocfs2/dcache.c > @@ -227,12 +227,13 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, > struct inode *inode, > u64 parent_blkno) > { > - int ret; > + int ret, new_dl = 0; > struct dentry *alias; > - struct ocfs2_dentry_lock *dl = dentry->d_fsdata; > + struct ocfs2_dentry_lock *dl, *dl_free_on_race = NULL; > > trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name, > - (unsigned long long)parent_blkno, dl); > + (unsigned long long)parent_blkno, > + dentry->d_fsdata); > > /* > * Negative dentry. We ignore these for now. > @@ -243,11 +244,15 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, > if (!inode) > return 0; > > + spin_lock(&dentry_attach_lock); > if (d_really_is_negative(dentry) && dentry->d_fsdata) { > /* Converting a negative dentry to positive > Clear dentry->d_fsdata */ > dentry->d_fsdata = dl = NULL; > + } else { > + dl = dentry->d_fsdata; > } > + spin_unlock(&dentry_attach_lock); > > if (dl) { > mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, > @@ -310,10 +315,20 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, > > out_attach: > spin_lock(&dentry_attach_lock); > - dentry->d_fsdata = dl; > - dl->dl_count++; > + /* d_fsdata could be set by parallel thread */ > + if (unlikely(dentry->d_fsdata)) { > + pr_err("race found in ocfs2_dentry_attach_lock dentry=%p\n", dentry); > + if (!alias) > + dl_free_on_race = dl; > + } else { > + dentry->d_fsdata = dl; > + dl->dl_count++; > + if (!alias) > + new_dl = 1; > + } > spin_unlock(&dentry_attach_lock); > > + kfree(dl_free_on_race); > /* > * This actually gets us our PRMODE level lock. From now on, > * we'll have a notification if one of these names is > @@ -330,7 +345,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, > * We need to do this because error here means no d_instantiate(), > * which means iput() will not be called during dput(dentry). > */ > - if (ret < 0 && !alias) { > + if (ret < 0 && new_dl) { > ocfs2_lock_res_free(&dl->dl_lockres); > BUG_ON(dl->dl_count != 1); > spin_lock(&dentry_attach_lock);
Wengang Wang
2019-May-06 17:55 UTC
[Ocfs2-devel] [PATCH] fs/ocfs2: fix race in ocfs2_dentry_attach_lock
Hi Changwei, Actually I'd like to put the details after I confirmed the patch fixes (I didn't get that information yet so far). But since you are interested, I am putting it here. The in core dlm_lock_resource and the dlm_locks in the granted list have the following looking: <struct dlm_lock_resource 0xffff9c7e9ad57cc0> ---------------------------------------------------------------------------------------------------> ??? lockname????????????????????? N000000000064e69f009a7bb5 hex: 4e303030303030303030303634653639660000009a7bb500000 ??? owner???????????????????????? 2 ??? dlm?????????????????????????? 34F9A5BE6F2A481B8DE25ABAE8BBC68D ??? state???????????????????????? NOSTATE ??? *dlm_locks in granted list: ??????? <dlm_lock 0xffff9c7e0429d700>--------------------------------------------------------------> ????????? ml.node?????????????????????? 1 ????????? lockres?????????????????????? 0xffff9c7e9ad57cc0 ????????? refs????????????????????????? 2 ????????? lksb ffff9c7e05093720/ containing ocfs2_lock_res ffff9c7e050936a0 ????????? convert_pending?????????????? 0 ????????? ml.list?????????????????????? 0 ????????? ml.flags????????????????????? 0 ????????? type?? 3?? convert_type?? -1 ????????? ml.highest_blocked??????????? -1 <------------------------------------------------------------------------------------------- ??????? <dlm_lock 0xffff9c7e02f3ee80>--------------------------------------------------------------> ????????? ml.node?????????????????????? 0 ????????? lockres?????????????????????? 0xffff9c7e9ad57cc0 ????????? refs????????????????????????? 2 ????????? lksb ffff9c7e30658a20/ containing ocfs2_lock_res ffff9c7e306589a0 ????????? convert_pending?????????????? 0 ????????? ml.list?????????????????????? 0 ????????? ml.flags????????????????????? 0 ????????? type?? 3?? convert_type?? -1 ????????? ml.highest_blocked??????????? -1 <------------------------------------------------------------------------------------------- ??????? <dlm_lock 0xffff9c7d3ef3fd80>--------------------------------------------------------------> ????????? ml.node?????????????????????? 2 ????????? lockres?????????????????????? 0xffff9c7e9ad57cc0 ????????? refs????????????????????????? 2 ????????? lksb ffff9c7dbffe2898/ containing ocfs2_lock_res ffff9c7dbffe2818 ????????? convert_pending?????????????? 0 ????????? ml.list?????????????????????? 0 ????????? ml.flags????????????????????? 0 ????????? type?? 3?? convert_type?? -1 ????????? ml.highest_blocked??????????? -1 <------------------------------------------------------------------------------------------- ??????? <dlm_lock 0xffff9c7e41c97880>--------------------------------------------------------------> ????????? ml.node?????????????????????? 2 ????????? lockres?????????????????????? 0xffff9c7e9ad57cc0 ????????? refs????????????????????????? 2 ????????? lksb ffff9c7dbffad298/ containing ocfs2_lock_res ffff9c7dbffad218 ????????? convert_pending?????????????? 0 ????????? ml.list?????????????????????? 0 ????????? ml.flags????????????????????? 0 ????????? type?? 3?? convert_type?? -1 ????????? ml.highest_blocked??????????? -1 <------------------------------------------------------------------------------------------- ??????? 4 dlm_lock in granted list You see there are two granted dlm_lock for node 2 (0xffff9c7d3ef3fd80 and 0xffff9c7e41c97880). Normally, ocfs2 FS layer wouldn't request duplicated new lock against same lock resource, It would request coverts instead after the first new lock request. So the two existence of dlm_lock for the same lock resource is a hint for ocfs2 FS layer racing. From lksb, we know the containing ocfs2_lock_res objects (ffff9c7dbffe2818 and ffff9c7dbffad218) crash> ocfs2 struct ocfs2_lock_res ffff9c7dbffe2818 |egrep "holder|flag|name" ??? l_name??????????????????????? N000000000064e69f009a7bb5 hex: 4e303030303030303030303634653639660000009a7bb500000 ??? l_flags |OCFS2_LOCK_ATTACHED|OCFS2_LOCK_INITIALIZED ??? l_ro_holders????????????????? 1 ??? l_ex_holders????????????????? 0 crash> ocfs2 struct ocfs2_lock_res ffff9c7dbffad218 |egrep "holder|flag|name" ??? l_name??????????????????????? N000000000064e69f009a7bb5 hex: 4e303030303030303030303634653639660000009a7bb500000 ??? l_flags |OCFS2_LOCK_ATTACHED|OCFS2_LOCK_INITIALIZED ??? l_ro_holders????????????????? 0 ??? l_ex_holders????????????????? 0 The one @ffff9c7dbffad218 is the victim with l_ro_holdrs being 0. I think the racing is like this: ??????????????? thread A????????????????????????????????? thread B ? (A1) enter ocfs2_dentry_attach_lock, ? seeing dentry->d_fsdata is NULL, ? and no alias found by ? ocfs2_find_local_alias, ? kmalloc a new ocfs2_dentry_lock ? structure to local variable "dl", dl1 ????????????? ..... (B1) enter ocfs2_dentry_attach_lock, seeing dentry->d_fsdata is NULL, andno alias found by ocfs2_find_local_alias so kmalloc ??????????????????????????????????????????????????????????????? a new ocfs2_dentry_lock structure to local variable "dl", dl2 ...... ?(A2) set dentry->d_fsdata with dl1 ???????????? ...... ?(A3) ocfs2_dentry_lock() succeeded ?and dl1->dl_lockres.l_ro_holders ? increased to 1. ? ? ? ? ? ?? ...... (B2) set dentry->d_fsdata with dl2 ....... (B3) ocfs2_dentry_lock() succeeded ?????????????????????????????????????????????????????????????? and dl2->dl_lockres.l_ro_holders increased to 1. ...... (A4) ocfs2_dentry_unlock() called and dl2->dl_lockres.l_ro_holders decreased to 0. ??????????? .... (B4) ocfs2_dentry_unlock() called, decreasing dl2->dl_lockres.l_ro_holders, but see it's zero now, panic The point is that for thread A, ro_holders increased on dl1 at step A3 but decreased on dl2 at step A4. This causes dl1 has ro_holders left 1. It also caused and extra and unexpected decreasing on dl2 making the ro_holders 0. Step B2 caused a memory leak (but not the topic here). Step B4 hit the panic on seeing ro_holders on dl2 is zero. Go back to the vmcore, let's find out dl1 and dl2: crash> kmem ffff9c7dbffe2818 CACHE???????????? OBJSIZE? ALLOCATED???? TOTAL? SLABS? SSIZE? NAME ffff9c7e97c03080????? 512????? 36007???? 36128?? 1129??? 16k kmalloc-512 ? SLAB????????????? MEMORY??????????? NODE? TOTAL? ALLOCATED? FREE ? ffffeeaec2fff800? ffff9c7dbffe0000???? 0???? 32???????? 25???? 7 ? FREE / [ALLOCATED] ? [ffff9c7dbffe2800] ????? PAGE??????? PHYSICAL????? MAPPING?????? INDEX CNT FLAGS ffffeeaec2fff880? bffe2000 dead000000000400??????? 0? 0 fffffc0000000 crash> kmem ffff9c7dbffad218 CACHE???????????? OBJSIZE? ALLOCATED???? TOTAL? SLABS? SSIZE? NAME ffff9c7e97c03080????? 512????? 36007???? 36128?? 1129??? 16k kmalloc-512 ? SLAB????????????? MEMORY??????????? NODE? TOTAL? ALLOCATED? FREE ? ffffeeaec2ffeb00? ffff9c7dbffac000???? 0???? 32???????? 23???? 9 ? FREE / [ALLOCATED] ? [ffff9c7dbffad200] ????? PAGE??????? PHYSICAL????? MAPPING?????? INDEX CNT FLAGS ffffeeaec2ffeb40? bffad000 dead0000ffffffff??????? 0? 0 fffffc0000000 crash> struct ocfs2_dentry_lock -x ffff9c7dbffe2800 |egrep "parent|inode" ? dl_parent_blkno = 0x64e69f, ? dl_inode = 0xffff9c7d13c624c0, crash> struct ocfs2_dentry_lock -x ffff9c7dbffad200 |egrep "parent|inode" ? dl_parent_blkno = 0x64e69f, ? dl_inode = 0xffff9c7d13c624c0, ffff9c7dbffe2800 is dl1, ffff9c7dbffad200 is dl2, they match well in dl_parent_blkno and dl_inode fields. Verifying dentry: crash> struct dentry ffff9c7d8b659780 |grep 24c0 ? d_inode = 0xffff9c7d13c624c0, crash> struct dentry ffff9c7d8b659780 |grep fsdata ? d_fsdata = 0xffff9c7dbffad200, d_fsdata matches dl2 well. Above is the analysis (hoping the format is still good after email sent). Fix is self-explaining. Thanks, Wengang On 2019/5/5 0:15, Changwei Ge wrote:> Hi Wengang, > > > We actually met some strange problems around denty cluster lock > attachment before. I suppose solidifying around logic makes sense. > > Can you make your change log richer with a little more analysis? > > If you can paste a dlm lock resouce and ocfs2 lock resource snapshot > from coredump, that would be great and I can help figure what bad happened. > > > Thanks, > > Changwei > > > > On 2019/5/4 1:01 ??, Wengang Wang wrote: >> ocfs2_dentry_attach_lock() can be executed in parallel threads against the >> same dentry. Make that race safe. >> >> Signed-off-by: Wengang Wang <wen.gang.wang at oracle.com> >> --- >> fs/ocfs2/dcache.c | 27 +++++++++++++++++++++------ >> 1 file changed, 21 insertions(+), 6 deletions(-) >> >> diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c >> index 290373024d9d..980e571c163c 100644 >> --- a/fs/ocfs2/dcache.c >> +++ b/fs/ocfs2/dcache.c >> @@ -227,12 +227,13 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >> struct inode *inode, >> u64 parent_blkno) >> { >> - int ret; >> + int ret, new_dl = 0; >> struct dentry *alias; >> - struct ocfs2_dentry_lock *dl = dentry->d_fsdata; >> + struct ocfs2_dentry_lock *dl, *dl_free_on_race = NULL; >> >> trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name, >> - (unsigned long long)parent_blkno, dl); >> + (unsigned long long)parent_blkno, >> + dentry->d_fsdata); >> >> /* >> * Negative dentry. We ignore these for now. >> @@ -243,11 +244,15 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >> if (!inode) >> return 0; >> >> + spin_lock(&dentry_attach_lock); >> if (d_really_is_negative(dentry) && dentry->d_fsdata) { >> /* Converting a negative dentry to positive >> Clear dentry->d_fsdata */ >> dentry->d_fsdata = dl = NULL; >> + } else { >> + dl = dentry->d_fsdata; >> } >> + spin_unlock(&dentry_attach_lock); >> >> if (dl) { >> mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, >> @@ -310,10 +315,20 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >> >> out_attach: >> spin_lock(&dentry_attach_lock); >> - dentry->d_fsdata = dl; >> - dl->dl_count++; >> + /* d_fsdata could be set by parallel thread */ >> + if (unlikely(dentry->d_fsdata)) { >> + pr_err("race found in ocfs2_dentry_attach_lock dentry=%p\n", dentry); >> + if (!alias) >> + dl_free_on_race = dl; >> + } else { >> + dentry->d_fsdata = dl; >> + dl->dl_count++; >> + if (!alias) >> + new_dl = 1; >> + } >> spin_unlock(&dentry_attach_lock); >> >> + kfree(dl_free_on_race); >> /* >> * This actually gets us our PRMODE level lock. From now on, >> * we'll have a notification if one of these names is >> @@ -330,7 +345,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, >> * We need to do this because error here means no d_instantiate(), >> * which means iput() will not be called during dput(dentry). >> */ >> - if (ret < 0 && !alias) { >> + if (ret < 0 && new_dl) { >> ocfs2_lock_res_free(&dl->dl_lockres); >> BUG_ON(dl->dl_count != 1); >> spin_lock(&dentry_attach_lock);