Eric Ren
2017-Jan-06 09:13 UTC
[Ocfs2-devel] [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points
Hi,>>>>>> >>>>>> Fixes them by adding the tracking logic (in the previous patch) for >>>>>> these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(), >>>>>> ocfs2_setattr(). >>>>> As described cases above, shall we just add the tracking logic only for set/get_acl()? >>>> >>>> The idea is to detect recursive locking on the running task stack. Take case 1) for >>>> example if ocfs2_permisssion() >>>> is not changed: >>>> >>>> ocfs2_permission() <=== take PR, ocfs2_holder is not added >>>> ocfs2_iop_get_acl <=== still take PR, because there is no lock holder on the >>>> tracking list >>> I mean we have no need to check if locked by me, just do inode lock and add holder. >>> This will make code more clean, IMO. >> Oh, sorry, I get your point this time. I think we need to check it if there are more than >> one processes that hold >> PR lock on the same resource. If I don't understand you correctly, please tell me why >> you think it's not neccessary >> to check before getting lock? > The code logic can only check if it is locked by myself. In the caseWhy only...?> described above, ocfs2_permission is the first entry to take inode lock. > And even if check succeeds, it is a bug without unlock, but not the case > of recursive lock.By checking succeeds, you mean it's locked by me, right? If so, this flag "arg_flags = OCFS2_META_LOCK_GETBH" will be passed down to ocfs2_inode_lock_full(), which gets back buffer head of the disk inode for us if necessary, but doesn't take cluster locking again. So, there is no need to unlock in such case. Thanks, Eric> > Thanks, > Joseph >> >> Thanks, >> Eric >>> >>> Thanks, >>> Joseph >>>> >>>> Thanks for your review;-) >>>> Eric >>>> >>>>> >>>>> Thanks, >>>>> Joseph >>>>>> >>>>>> Signed-off-by: Eric Ren <zren at suse.com> >>>>>> --- >>>>>> fs/ocfs2/acl.c | 39 ++++++++++++++++++++++++++++++++++----- >>>>>> fs/ocfs2/file.c | 44 ++++++++++++++++++++++++++++++++++---------- >>>>>> 2 files changed, 68 insertions(+), 15 deletions(-) >>>>>> >>>>>> diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c >>>>>> index bed1fcb..c539890 100644 >>>>>> --- a/fs/ocfs2/acl.c >>>>>> +++ b/fs/ocfs2/acl.c >>>>>> @@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl >>>>>> *acl, int type) >>>>>> { >>>>>> struct buffer_head *bh = NULL; >>>>>> int status = 0; >>>>>> - >>>>>> - status = ocfs2_inode_lock(inode, &bh, 1); >>>>>> + int arg_flags = 0, has_locked; >>>>>> + struct ocfs2_holder oh; >>>>>> + struct ocfs2_lock_res *lockres; >>>>>> + >>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>> + if (has_locked) >>>>>> + arg_flags = OCFS2_META_LOCK_GETBH; >>>>>> + status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags); >>>>>> if (status < 0) { >>>>>> if (status != -ENOENT) >>>>>> mlog_errno(status); >>>>>> return status; >>>>>> } >>>>>> + if (!has_locked) >>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>> + >>>>>> status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); >>>>>> - ocfs2_inode_unlock(inode, 1); >>>>>> + >>>>>> + if (!has_locked) { >>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>> + ocfs2_inode_unlock(inode, 1); >>>>>> + } >>>>>> brelse(bh); >>>>>> + >>>>>> return status; >>>>>> } >>>>>> @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int >>>>>> type) >>>>>> struct buffer_head *di_bh = NULL; >>>>>> struct posix_acl *acl; >>>>>> int ret; >>>>>> + int arg_flags = 0, has_locked; >>>>>> + struct ocfs2_holder oh; >>>>>> + struct ocfs2_lock_res *lockres; >>>>>> osb = OCFS2_SB(inode->i_sb); >>>>>> if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) >>>>>> return NULL; >>>>>> - ret = ocfs2_inode_lock(inode, &di_bh, 0); >>>>>> + >>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>> + if (has_locked) >>>>>> + arg_flags = OCFS2_META_LOCK_GETBH; >>>>>> + ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags); >>>>>> if (ret < 0) { >>>>>> if (ret != -ENOENT) >>>>>> mlog_errno(ret); >>>>>> return ERR_PTR(ret); >>>>>> } >>>>>> + if (!has_locked) >>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>> acl = ocfs2_get_acl_nolock(inode, type, di_bh); >>>>>> - ocfs2_inode_unlock(inode, 0); >>>>>> + if (!has_locked) { >>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>> + ocfs2_inode_unlock(inode, 0); >>>>>> + } >>>>>> brelse(di_bh); >>>>>> + >>>>>> return acl; >>>>>> } >>>>>> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c >>>>>> index c488965..62be75d 100644 >>>>>> --- a/fs/ocfs2/file.c >>>>>> +++ b/fs/ocfs2/file.c >>>>>> @@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) >>>>>> handle_t *handle = NULL; >>>>>> struct dquot *transfer_to[MAXQUOTAS] = { }; >>>>>> int qtype; >>>>>> + int arg_flags = 0, had_lock; >>>>>> + struct ocfs2_holder oh; >>>>>> + struct ocfs2_lock_res *lockres; >>>>>> trace_ocfs2_setattr(inode, dentry, >>>>>> (unsigned long long)OCFS2_I(inode)->ip_blkno, >>>>>> @@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) >>>>>> } >>>>>> } >>>>>> - status = ocfs2_inode_lock(inode, &bh, 1); >>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>> + had_lock = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>> + if (had_lock) >>>>>> + arg_flags = OCFS2_META_LOCK_GETBH; >>>>>> + status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags); >>>>>> if (status < 0) { >>>>>> if (status != -ENOENT) >>>>>> mlog_errno(status); >>>>>> goto bail_unlock_rw; >>>>>> } >>>>>> - inode_locked = 1; >>>>>> + if (!had_lock) { >>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>> + inode_locked = 1; >>>>>> + } >>>>>> if (size_change) { >>>>>> status = inode_newsize_ok(inode, attr->ia_size); >>>>>> @@ -1260,7 +1270,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) >>>>>> bail_commit: >>>>>> ocfs2_commit_trans(osb, handle); >>>>>> bail_unlock: >>>>>> - if (status) { >>>>>> + if (status && inode_locked) { >>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>> ocfs2_inode_unlock(inode, 1); >>>>>> inode_locked = 0; >>>>>> } >>>>>> @@ -1278,8 +1289,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) >>>>>> if (status < 0) >>>>>> mlog_errno(status); >>>>>> } >>>>>> - if (inode_locked) >>>>>> + if (inode_locked) { >>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>> ocfs2_inode_unlock(inode, 1); >>>>>> + } >>>>>> brelse(bh); >>>>>> return status; >>>>>> @@ -1321,20 +1334,31 @@ int ocfs2_getattr(struct vfsmount *mnt, >>>>>> int ocfs2_permission(struct inode *inode, int mask) >>>>>> { >>>>>> int ret; >>>>>> + int has_locked; >>>>>> + struct ocfs2_holder oh; >>>>>> + struct ocfs2_lock_res *lockres; >>>>>> if (mask & MAY_NOT_BLOCK) >>>>>> return -ECHILD; >>>>>> - ret = ocfs2_inode_lock(inode, NULL, 0); >>>>>> - if (ret) { >>>>>> - if (ret != -ENOENT) >>>>>> - mlog_errno(ret); >>>>>> - goto out; >>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>> + if (!has_locked) { >>>>>> + ret = ocfs2_inode_lock(inode, NULL, 0); >>>>>> + if (ret) { >>>>>> + if (ret != -ENOENT) >>>>>> + mlog_errno(ret); >>>>>> + goto out; >>>>>> + } >>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>> } >>>>>> ret = generic_permission(inode, mask); >>>>>> - ocfs2_inode_unlock(inode, 0); >>>>>> + if (!has_locked) { >>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>> + ocfs2_inode_unlock(inode, 0); >>>>>> + } >>>>>> out: >>>>>> return ret; >>>>>> } >>>>> >>>>> >>>> >>> >>> >> > >
Joseph Qi
2017-Jan-06 09:55 UTC
[Ocfs2-devel] [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points
On 17/1/6 17:13, Eric Ren wrote:> Hi, > >>>>>>> >>>>>>> Fixes them by adding the tracking logic (in the previous patch) for >>>>>>> these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(), >>>>>>> ocfs2_setattr(). >>>>>> As described cases above, shall we just add the tracking logic >>>>>> only for set/get_acl()? >>>>> >>>>> The idea is to detect recursive locking on the running task stack. >>>>> Take case 1) for example if ocfs2_permisssion() >>>>> is not changed: >>>>> >>>>> ocfs2_permission() <=== take PR, ocfs2_holder is not added >>>>> ocfs2_iop_get_acl <=== still take PR, because there is no lock >>>>> holder on the tracking list >>>> I mean we have no need to check if locked by me, just do inode lock >>>> and add holder. >>>> This will make code more clean, IMO. >>> Oh, sorry, I get your point this time. I think we need to check it >>> if there are more than one processes that hold >>> PR lock on the same resource. If I don't understand you correctly, >>> please tell me why you think it's not neccessary >>> to check before getting lock? >> The code logic can only check if it is locked by myself. In the case > Why only...? >> described above, ocfs2_permission is the first entry to take inode lock. >> And even if check succeeds, it is a bug without unlock, but not the case >> of recursive lock. > > By checking succeeds, you mean it's locked by me, right? If so, this flag > "arg_flags = OCFS2_META_LOCK_GETBH" > will be passed down to ocfs2_inode_lock_full(), which gets back buffer > head of > the disk inode for us if necessary, but doesn't take cluster locking > again. So, there is > no need to unlock in such case.I am trying to state my point more clearly... The issue case you are trying to fix is: Process A take inode lock (phase1) ... <<< race window (phase2, Process B) ... take inode lock again (phase3) Deadlock happens because Process B in phase2 and Process A in phase3 are waiting for each other. So you are trying to fix it by making phase3 finish without really doing __ocfs2_cluster_lock, then Process B can continue either. Let us bear in mind that phase1 and phase3 are in the same context and executed in order. That's why I think there is no need to check if locked by myself in phase1. If phase1 finds it is already locked by myself, that means the holder is left by last operation without dec holder. That's why I think it is a bug instead of a recursive lock case. Thanks, Joseph> > Thanks, > Eric > >> >> Thanks, >> Joseph >>> >>> Thanks, >>> Eric >>>> >>>> Thanks, >>>> Joseph >>>>> >>>>> Thanks for your review;-) >>>>> Eric >>>>> >>>>>> >>>>>> Thanks, >>>>>> Joseph >>>>>>> >>>>>>> Signed-off-by: Eric Ren <zren at suse.com> >>>>>>> --- >>>>>>> fs/ocfs2/acl.c | 39 ++++++++++++++++++++++++++++++++++----- >>>>>>> fs/ocfs2/file.c | 44 ++++++++++++++++++++++++++++++++++---------- >>>>>>> 2 files changed, 68 insertions(+), 15 deletions(-) >>>>>>> >>>>>>> diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c >>>>>>> index bed1fcb..c539890 100644 >>>>>>> --- a/fs/ocfs2/acl.c >>>>>>> +++ b/fs/ocfs2/acl.c >>>>>>> @@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, >>>>>>> struct posix_acl *acl, int type) >>>>>>> { >>>>>>> struct buffer_head *bh = NULL; >>>>>>> int status = 0; >>>>>>> - >>>>>>> - status = ocfs2_inode_lock(inode, &bh, 1); >>>>>>> + int arg_flags = 0, has_locked; >>>>>>> + struct ocfs2_holder oh; >>>>>>> + struct ocfs2_lock_res *lockres; >>>>>>> + >>>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>>> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>>> + if (has_locked) >>>>>>> + arg_flags = OCFS2_META_LOCK_GETBH; >>>>>>> + status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags); >>>>>>> if (status < 0) { >>>>>>> if (status != -ENOENT) >>>>>>> mlog_errno(status); >>>>>>> return status; >>>>>>> } >>>>>>> + if (!has_locked) >>>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>>> + >>>>>>> status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, >>>>>>> NULL); >>>>>>> - ocfs2_inode_unlock(inode, 1); >>>>>>> + >>>>>>> + if (!has_locked) { >>>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>>> + ocfs2_inode_unlock(inode, 1); >>>>>>> + } >>>>>>> brelse(bh); >>>>>>> + >>>>>>> return status; >>>>>>> } >>>>>>> @@ -303,21 +318,35 @@ struct posix_acl >>>>>>> *ocfs2_iop_get_acl(struct inode *inode, int type) >>>>>>> struct buffer_head *di_bh = NULL; >>>>>>> struct posix_acl *acl; >>>>>>> int ret; >>>>>>> + int arg_flags = 0, has_locked; >>>>>>> + struct ocfs2_holder oh; >>>>>>> + struct ocfs2_lock_res *lockres; >>>>>>> osb = OCFS2_SB(inode->i_sb); >>>>>>> if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) >>>>>>> return NULL; >>>>>>> - ret = ocfs2_inode_lock(inode, &di_bh, 0); >>>>>>> + >>>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>>> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>>> + if (has_locked) >>>>>>> + arg_flags = OCFS2_META_LOCK_GETBH; >>>>>>> + ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags); >>>>>>> if (ret < 0) { >>>>>>> if (ret != -ENOENT) >>>>>>> mlog_errno(ret); >>>>>>> return ERR_PTR(ret); >>>>>>> } >>>>>>> + if (!has_locked) >>>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>>> acl = ocfs2_get_acl_nolock(inode, type, di_bh); >>>>>>> - ocfs2_inode_unlock(inode, 0); >>>>>>> + if (!has_locked) { >>>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>>> + ocfs2_inode_unlock(inode, 0); >>>>>>> + } >>>>>>> brelse(di_bh); >>>>>>> + >>>>>>> return acl; >>>>>>> } >>>>>>> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c >>>>>>> index c488965..62be75d 100644 >>>>>>> --- a/fs/ocfs2/file.c >>>>>>> +++ b/fs/ocfs2/file.c >>>>>>> @@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, >>>>>>> struct iattr *attr) >>>>>>> handle_t *handle = NULL; >>>>>>> struct dquot *transfer_to[MAXQUOTAS] = { }; >>>>>>> int qtype; >>>>>>> + int arg_flags = 0, had_lock; >>>>>>> + struct ocfs2_holder oh; >>>>>>> + struct ocfs2_lock_res *lockres; >>>>>>> trace_ocfs2_setattr(inode, dentry, >>>>>>> (unsigned long long)OCFS2_I(inode)->ip_blkno, >>>>>>> @@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, >>>>>>> struct iattr *attr) >>>>>>> } >>>>>>> } >>>>>>> - status = ocfs2_inode_lock(inode, &bh, 1); >>>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>>> + had_lock = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>>> + if (had_lock) >>>>>>> + arg_flags = OCFS2_META_LOCK_GETBH; >>>>>>> + status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags); >>>>>>> if (status < 0) { >>>>>>> if (status != -ENOENT) >>>>>>> mlog_errno(status); >>>>>>> goto bail_unlock_rw; >>>>>>> } >>>>>>> - inode_locked = 1; >>>>>>> + if (!had_lock) { >>>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>>> + inode_locked = 1; >>>>>>> + } >>>>>>> if (size_change) { >>>>>>> status = inode_newsize_ok(inode, attr->ia_size); >>>>>>> @@ -1260,7 +1270,8 @@ int ocfs2_setattr(struct dentry *dentry, >>>>>>> struct iattr *attr) >>>>>>> bail_commit: >>>>>>> ocfs2_commit_trans(osb, handle); >>>>>>> bail_unlock: >>>>>>> - if (status) { >>>>>>> + if (status && inode_locked) { >>>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>>> ocfs2_inode_unlock(inode, 1); >>>>>>> inode_locked = 0; >>>>>>> } >>>>>>> @@ -1278,8 +1289,10 @@ int ocfs2_setattr(struct dentry *dentry, >>>>>>> struct iattr *attr) >>>>>>> if (status < 0) >>>>>>> mlog_errno(status); >>>>>>> } >>>>>>> - if (inode_locked) >>>>>>> + if (inode_locked) { >>>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>>> ocfs2_inode_unlock(inode, 1); >>>>>>> + } >>>>>>> brelse(bh); >>>>>>> return status; >>>>>>> @@ -1321,20 +1334,31 @@ int ocfs2_getattr(struct vfsmount *mnt, >>>>>>> int ocfs2_permission(struct inode *inode, int mask) >>>>>>> { >>>>>>> int ret; >>>>>>> + int has_locked; >>>>>>> + struct ocfs2_holder oh; >>>>>>> + struct ocfs2_lock_res *lockres; >>>>>>> if (mask & MAY_NOT_BLOCK) >>>>>>> return -ECHILD; >>>>>>> - ret = ocfs2_inode_lock(inode, NULL, 0); >>>>>>> - if (ret) { >>>>>>> - if (ret != -ENOENT) >>>>>>> - mlog_errno(ret); >>>>>>> - goto out; >>>>>>> + lockres = &OCFS2_I(inode)->ip_inode_lockres; >>>>>>> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL); >>>>>>> + if (!has_locked) { >>>>>>> + ret = ocfs2_inode_lock(inode, NULL, 0); >>>>>>> + if (ret) { >>>>>>> + if (ret != -ENOENT) >>>>>>> + mlog_errno(ret); >>>>>>> + goto out; >>>>>>> + } >>>>>>> + ocfs2_add_holder(lockres, &oh); >>>>>>> } >>>>>>> ret = generic_permission(inode, mask); >>>>>>> - ocfs2_inode_unlock(inode, 0); >>>>>>> + if (!has_locked) { >>>>>>> + ocfs2_remove_holder(lockres, &oh); >>>>>>> + ocfs2_inode_unlock(inode, 0); >>>>>>> + } >>>>>>> out: >>>>>>> return ret; >>>>>>> } >>>>>> >>>>>> >>>>> >>>> >>>> >>> >> >> >