Tristan Ye
2010-Nov-19  08:38 UTC
[Ocfs2-devel] [PATCH 1/1] Ocfs2: Teach 'coherency=full' O_DIRECT writes to correctly up_read i_alloc_sem.
Former logic of ocfs2_file_aio_write() was a bit stricky to unlock the rw_lock
and i_alloc_sem, by using some private bits in struct 'iocb' to
communite with
ocfs2_dio_end_io(), it did work before we introduce the patch of supporting
'coherency=full,buffered' option, since rw_lock and i_alloc_sem were
never
acquired both at the same time, no mattar we doing buffered or direct IO or not.
This patch tries to teach ocfs2_dio_end_io fully understand the bahavior of
all writes, including
buffered/concurrency-allowed-odirect/none-concurrency-odirect
writes, to have all lock/sem primitives getting correctly released.
Signed-off-by: Tristan Ye <tristan.ye at oracle.com>
---
 fs/ocfs2/aops.c |    9 +++++++--
 fs/ocfs2/aops.h |    6 ++++++
 fs/ocfs2/file.c |   16 ++++++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962c..fd0713c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -568,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     bool is_async)
 {
 	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
-	int level;
+	int level, coherency;
 
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -576,7 +576,12 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	ocfs2_iocb_clear_rw_locked(iocb);
 
 	level = ocfs2_iocb_rw_locked_level(iocb);
-	if (!level)
+	/*
+	 * 'coherency=full' O_DIRECT writes needs this extra bit
+	 * to correctly up_read the i_alloc_sem.
+	 */
+	coherency = ocfs2_iocb_coherency(iocb);
+	if ((!level) || coherency)
 		up_read(&inode->i_alloc_sem);
 	ocfs2_rw_unlock(inode, level);
 
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfd..213cec6 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -72,4 +72,10 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb
*iocb, int level)
 	clear_bit(0, (unsigned long *)&iocb->private)
 #define ocfs2_iocb_rw_locked_level(iocb) \
 	test_bit(1, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_coherency(iocb) \
+	set_bit(2, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_coherency(iocb) \
+	clear_bit(2, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_coherency(iocb) \
+	test_bit(2, (unsigned long *)&iocb->private)
 #endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04..df070a3 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2277,8 +2277,24 @@ relock:
 		}
 
 		ocfs2_inode_unlock(inode, 1);
+
+		/*
+		 * Due to the fault of 'full_coherency' O_DIRECT
+		 * write needs to acqure both i_alloc_sem and rw_lock.
+		 * We do another trick here to have coherency bit
+		 * stored in iocb to communicate with ocfs2_dio_end_io
+		 * for properly unlocking i_alloc_sem.
+		 */
+		ocfs2_iocb_set_coherency(iocb);
 	}
 
+	/*
+	 * Concurrent-allowed odirect writes was able to up_read i_alloc_sem
+	 * correctly, we therefore don't need this extra and tricky bit.
+	 */
+	if (direct_io && !full_coherency)
+		ocfs2_iocb_clear_coherency(iocb);
+
 	can_do_direct = direct_io;
 	ret = ocfs2_prepare_inode_for_write(file, ppos,
 					    iocb->ki_left, appending,
-- 
1.5.5
Tao Ma
2010-Nov-19  14:34 UTC
[Ocfs2-devel] [PATCH 1/1] Ocfs2: Teach 'coherency=full' O_DIRECT writes to correctly up_read i_alloc_sem.
Hi Tristan, Just add joel to the cc in case he has a different option. On 11/19/2010 04:38 PM, Tristan Ye wrote:> Former logic of ocfs2_file_aio_write() was a bit stricky to unlock the rw_lock > and i_alloc_sem, by using some private bits in struct 'iocb' to communite with > ocfs2_dio_end_io(), it did work before we introduce the patch of supporting > 'coherency=full,buffered' option, since rw_lock and i_alloc_sem were never > acquired both at the same time, no mattar we doing buffered or direct IO or not.These 2 locks can be acquired at the same time. So if we go with direct_io, we do have i_alloc_sem and rw_lock locked simultaneously. why do you get this? I have gone through your patch and the bug. It sees to me that the real cause for the bug is that you have EX rw_lock because of full_coherency while locking i_alloc_sem. So finally in ocfs2_dio_end_io, only rw_lock is freed and i_alloc_sem is left, right? If yes, please update the above commit log for it. I don't like your solution either. full_coherency is only used in direct write and ocfs2_dio_end_io is used for both direct read/write. So why add the complexity of coherency to ocfs2_dio_end_io? Also you long comment in ocfs2_file_aio_write does indicate that it is really hard for the code reader to learn why we need to set this flag. My suggestion is: why not use another flag to indicate the state of i_alloc_sem instead of full_coherency? So in place we down_read the i_alloc_sem, set the flag accordingly, and in ocfs2_dio_end_io, just check this flag instead of !rw_locked_level to up_read it. It should be more straightforward. Agree? Joel, any comments? Regards, Tao> > This patch tries to teach ocfs2_dio_end_io fully understand the bahavior of > all writes, including buffered/concurrency-allowed-odirect/none-concurrency-odirect > writes, to have all lock/sem primitives getting correctly released. > > Signed-off-by: Tristan Ye<tristan.ye at oracle.com> > --- > fs/ocfs2/aops.c | 9 +++++++-- > fs/ocfs2/aops.h | 6 ++++++ > fs/ocfs2/file.c | 16 ++++++++++++++++ > 3 files changed, 29 insertions(+), 2 deletions(-) > > diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c > index f1e962c..fd0713c 100644 > --- a/fs/ocfs2/aops.c > +++ b/fs/ocfs2/aops.c > @@ -568,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, > bool is_async) > { > struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; > - int level; > + int level, coherency; > > /* this io's submitter should not have unlocked this before we could */ > BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); > @@ -576,7 +576,12 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, > ocfs2_iocb_clear_rw_locked(iocb); > > level = ocfs2_iocb_rw_locked_level(iocb); > - if (!level) > + /* > + * 'coherency=full' O_DIRECT writes needs this extra bit > + * to correctly up_read the i_alloc_sem. > + */ > + coherency = ocfs2_iocb_coherency(iocb); > + if ((!level) || coherency) > up_read(&inode->i_alloc_sem); > ocfs2_rw_unlock(inode, level); > > diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h > index 76bfdfd..213cec6 100644 > --- a/fs/ocfs2/aops.h > +++ b/fs/ocfs2/aops.h > @@ -72,4 +72,10 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) > clear_bit(0, (unsigned long *)&iocb->private) > #define ocfs2_iocb_rw_locked_level(iocb) \ > test_bit(1, (unsigned long *)&iocb->private) > +#define ocfs2_iocb_set_coherency(iocb) \ > + set_bit(2, (unsigned long *)&iocb->private) > +#define ocfs2_iocb_clear_coherency(iocb) \ > + clear_bit(2, (unsigned long *)&iocb->private) > +#define ocfs2_iocb_coherency(iocb) \ > + test_bit(2, (unsigned long *)&iocb->private) > #endif /* OCFS2_FILE_H */ > diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c > index 77b4c04..df070a3 100644 > --- a/fs/ocfs2/file.c > +++ b/fs/ocfs2/file.c > @@ -2277,8 +2277,24 @@ relock: > } > > ocfs2_inode_unlock(inode, 1); > + > + /* > + * Due to the fault of 'full_coherency' O_DIRECT > + * write needs to acqure both i_alloc_sem and rw_lock. > + * We do another trick here to have coherency bit > + * stored in iocb to communicate with ocfs2_dio_end_io > + * for properly unlocking i_alloc_sem. > + */ > + ocfs2_iocb_set_coherency(iocb); > } > > + /* > + * Concurrent-allowed odirect writes was able to up_read i_alloc_sem > + * correctly, we therefore don't need this extra and tricky bit. > + */ > + if (direct_io&& !full_coherency) > + ocfs2_iocb_clear_coherency(iocb); > + > can_do_direct = direct_io; > ret = ocfs2_prepare_inode_for_write(file, ppos, > iocb->ki_left, appending,
Tristan Ye
2010-Nov-29  07:54 UTC
[Ocfs2-devel] [PATCH 1/1] Ocfs2: Teach 'coherency=full' O_DIRECT writes to correctly up_read i_alloc_sem.
Due to newly-introduced 'coherency=full' O_DIRECT writes also takes the
EX
rw_lock like buffered writes did(rw_level == 1), it turns out messing the
usage of 'level' in ocfs2_dio_end_io() up, which caused i_alloc_sem
being
failed to get up_read'd correctly.
This patch tries to teach ocfs2_dio_end_io to understand well on all locking
stuffs by explicitly introducing a new bit for i_alloc_sem in iocb's private
data, just like what we did for rw_lock.
Signed-off-by: Tristan Ye <tristan.ye at oracle.com>
---
 fs/ocfs2/aops.c |    6 ++++--
 fs/ocfs2/aops.h |    6 ++++++
 fs/ocfs2/file.c |    5 ++++-
 3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962c..857e013 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -568,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     bool is_async)
 {
 	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
-	int level;
+	int level, sem_locked;
 
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -576,7 +576,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	ocfs2_iocb_clear_rw_locked(iocb);
 
 	level = ocfs2_iocb_rw_locked_level(iocb);
-	if (!level)
+	sem_locked = ocfs2_iocb_is_sem_locked(iocb);
+	ocfs2_iocb_clear_sem_locked(iocb);
+	if (sem_locked)
 		up_read(&inode->i_alloc_sem);
 	ocfs2_rw_unlock(inode, level);
 
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfd..c7a3e5f 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -72,4 +72,10 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb
*iocb, int level)
 	clear_bit(0, (unsigned long *)&iocb->private)
 #define ocfs2_iocb_rw_locked_level(iocb) \
 	test_bit(1, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_sem_locked(iocb) \
+	set_bit(2, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_sem_locked(iocb) \
+	clear_bit(2, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_sem_locked(iocb) \
+	test_bit(2, (unsigned long *)&iocb->private)
 #endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04..4e66990 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2246,7 +2246,10 @@ relock:
 	if (direct_io) {
 		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
-	}
+		/* communicate with ocfs2_dio_end_io */
+		ocfs2_iocb_set_sem_locked(iocb);
+	} else
+		ocfs2_iocb_clear_sem_locked(iocb);
 
 	/*
 	 * Concurrent O_DIRECT writes are allowed with
-- 
1.5.5
Tristan Ye
2010-Nov-29  09:21 UTC
[Ocfs2-devel] [PATCH 1/1] Ocfs2: Teach 'coherency=full' O_DIRECT writes to correctly up_read i_alloc_sem.
Due to newly-introduced 'coherency=full' O_DIRECT writes also takes the
EX
rw_lock like buffered writes did(rw_level == 1), it turns out messing the
usage of 'level' in ocfs2_dio_end_io() up, which caused i_alloc_sem
being
failed to get up_read'd correctly.
This patch tries to teach ocfs2_dio_end_io to understand well on all locking
stuffs by explicitly introducing a new bit for i_alloc_sem in iocb's private
data, just like what we did for rw_lock.
Signed-off-by: Tristan Ye <tristan.ye at oracle.com>
---
 fs/ocfs2/aops.c |    6 ++++--
 fs/ocfs2/aops.h |    6 ++++++
 fs/ocfs2/file.c |    9 +++++++--
 3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962c..857e013 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -568,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     bool is_async)
 {
 	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
-	int level;
+	int level, sem_locked;
 
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -576,7 +576,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	ocfs2_iocb_clear_rw_locked(iocb);
 
 	level = ocfs2_iocb_rw_locked_level(iocb);
-	if (!level)
+	sem_locked = ocfs2_iocb_is_sem_locked(iocb);
+	ocfs2_iocb_clear_sem_locked(iocb);
+	if (sem_locked)
 		up_read(&inode->i_alloc_sem);
 	ocfs2_rw_unlock(inode, level);
 
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfd..c7a3e5f 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -72,4 +72,10 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb
*iocb, int level)
 	clear_bit(0, (unsigned long *)&iocb->private)
 #define ocfs2_iocb_rw_locked_level(iocb) \
 	test_bit(1, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_sem_locked(iocb) \
+	set_bit(2, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_sem_locked(iocb) \
+	clear_bit(2, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_sem_locked(iocb) \
+	test_bit(2, (unsigned long *)&iocb->private)
 #endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04..0e9d729 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2246,7 +2246,10 @@ relock:
 	if (direct_io) {
 		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
-	}
+		/* communicate with ocfs2_dio_end_io */
+		ocfs2_iocb_set_sem_locked(iocb);
+	} else
+		ocfs2_iocb_clear_sem_locked(iocb);
 
 	/*
 	 * Concurrent O_DIRECT writes are allowed with
@@ -2534,6 +2537,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	if (filp->f_flags & O_DIRECT) {
 		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
+		ocfs2_iocb_set_sem_locked(iocb);
 
 		ret = ocfs2_rw_lock(inode, 0);
 		if (ret < 0) {
@@ -2543,7 +2547,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 		rw_level = 0;
 		/* communicate with ocfs2_dio_end_io */
 		ocfs2_iocb_set_rw_locked(iocb, rw_level);
-	}
+	} else
+		ocfs2_iocb_clear_sem_locked(iocb);
 
 	/*
 	 * We're fine letting folks race truncates and extending
-- 
1.5.5
Joel Becker
2010-Dec-06  23:24 UTC
[Ocfs2-devel] [PATCH 1/1] Ocfs2: Teach 'coherency=full' O_DIRECT writes to correctly up_read i_alloc_sem.
On Mon, Nov 29, 2010 at 05:21:46PM +0800, Tristan Ye wrote:> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c > index f1e962c..857e013 100644 > --- a/fs/ocfs2/aops.c > +++ b/fs/ocfs2/aops.c > @@ -568,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, > bool is_async) > { > struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; > - int level; > + int level, sem_locked; > > /* this io's submitter should not have unlocked this before we could */ > BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); > @@ -576,7 +576,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, > ocfs2_iocb_clear_rw_locked(iocb); > > level = ocfs2_iocb_rw_locked_level(iocb); > - if (!level) > + sem_locked = ocfs2_iocb_is_sem_locked(iocb); > + ocfs2_iocb_clear_sem_locked(iocb); > + if (sem_locked) > up_read(&inode->i_alloc_sem); > ocfs2_rw_unlock(inode, level);I agree with Tao, we don't need sem_locked. Just do: if (ocfs2_iocb_is_sem_locked(iocb)) { up_read(&inode->i_alloc_sem); ocfs2_iocb_clear_sem_locked(iocb); } I think it reads better.> diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h > index 76bfdfd..c7a3e5f 100644 > --- a/fs/ocfs2/aops.h > +++ b/fs/ocfs2/aops.h > @@ -72,4 +72,10 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level) > clear_bit(0, (unsigned long *)&iocb->private) > #define ocfs2_iocb_rw_locked_level(iocb) \ > test_bit(1, (unsigned long *)&iocb->private) > +#define ocfs2_iocb_set_sem_locked(iocb) \ > + set_bit(2, (unsigned long *)&iocb->private) > +#define ocfs2_iocb_clear_sem_locked(iocb) \ > + clear_bit(2, (unsigned long *)&iocb->private) > +#define ocfs2_iocb_is_sem_locked(iocb) \ > + test_bit(2, (unsigned long *)&iocb->private)Give these bits an enum. Anonymous numbers aren't good. And make the comment better.> #endif /* OCFS2_FILE_H */ > diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c > index 77b4c04..0e9d729 100644 > --- a/fs/ocfs2/file.c > +++ b/fs/ocfs2/file.c > @@ -2246,7 +2246,10 @@ relock: > if (direct_io) { > down_read(&inode->i_alloc_sem); > have_alloc_sem = 1; > - } > + /* communicate with ocfs2_dio_end_io */ > + ocfs2_iocb_set_sem_locked(iocb); > + } else > + ocfs2_iocb_clear_sem_locked(iocb);I understand you're trying to avoid bad bits here, but you should explicitly clear it at the top of the function. You know for a fact that, at the top (right before relock:), there is no sem held. Then set it as you do, and only re-clear it when you up_write(). Joel -- "Drake! We're LEAVING!" Joel Becker Senior Development Manager Oracle E-mail: joel.becker at oracle.com Phone: (650) 506-8127
Tristan Ye
2010-Dec-07  06:35 UTC
[Ocfs2-devel] [PATCH 1/1] Ocfs2: Teach 'coherency=full' O_DIRECT writes to correctly up_read i_alloc_sem.
Due to newly-introduced 'coherency=full' O_DIRECT writes also takes the
EX
rw_lock like buffered writes did(rw_level == 1), it turns out messing the
usage of 'level' in ocfs2_dio_end_io() up, which caused i_alloc_sem
being
failed to get up_read'd correctly.
This patch tries to teach ocfs2_dio_end_io to understand well on all locking
stuffs by explicitly introducing a new bit for i_alloc_sem in iocb's private
data, just like what we did for rw_lock.
Signed-off-by: Tristan Ye <tristan.ye at oracle.com>
---
 fs/ocfs2/aops.c |    7 +++++--
 fs/ocfs2/aops.h |   23 +++++++++++++++++++++--
 fs/ocfs2/file.c |   15 +++++++++++++--
 3 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962c..0d7c554 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
+	if (ocfs2_iocb_is_sem_locked(iocb)) {
+		up_read(&inode->i_alloc_sem);
+		ocfs2_iocb_clear_sem_locked(iocb);
+	}
+
 	ocfs2_iocb_clear_rw_locked(iocb);
 
 	level = ocfs2_iocb_rw_locked_level(iocb);
-	if (!level)
-		up_read(&inode->i_alloc_sem);
 	ocfs2_rw_unlock(inode, level);
 
 	if (is_async)
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfd..d6db4ee 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb
*iocb, int level)
 	else
 		clear_bit(1, (unsigned long *)&iocb->private);
 }
+
+/*
+ * Using a named enum representing lock types in terms of #N bit stored in
+ * iocb->private, which is going to be used for communication bewteen 
+ * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
+ */
+enum ocfs2_iocb_lock_bits {
+	OCFS2_IOCB_RW_LOCK = 0,
+	OCFS2_IOCB_RW_LOCK_LEVEL,
+	OCFS2_IOCB_SEM,
+	OCFS2_IOCB_NUM_LOCKS
+};
+
 #define ocfs2_iocb_clear_rw_locked(iocb) \
-	clear_bit(0, (unsigned long *)&iocb->private)
+	clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
 #define ocfs2_iocb_rw_locked_level(iocb) \
-	test_bit(1, (unsigned long *)&iocb->private)
+	test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_sem_locked(iocb) \
+	set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_sem_locked(iocb) \
+	clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_sem_locked(iocb) \
+	test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
 #endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04..f6cba56 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2241,11 +2241,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 
 	mutex_lock(&inode->i_mutex);
 
+	ocfs2_iocb_clear_sem_locked(iocb);
+
 relock:
 	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
 	if (direct_io) {
 		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
+		/* communicate with ocfs2_dio_end_io */
+		ocfs2_iocb_set_sem_locked(iocb);
 	}
 
 	/*
@@ -2382,8 +2386,10 @@ out:
 		ocfs2_rw_unlock(inode, rw_level);
 
 out_sems:
-	if (have_alloc_sem)
+	if (have_alloc_sem) {
 		up_read(&inode->i_alloc_sem);
+		ocfs2_iocb_clear_sem_locked(iocb);
+	}
 
 	mutex_unlock(&inode->i_mutex);
 
@@ -2527,6 +2533,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 		goto bail;
 	}
 
+	ocfs2_iocb_clear_sem_locked(iocb);
+
 	/*
 	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads
 	 * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2542,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	if (filp->f_flags & O_DIRECT) {
 		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
+		ocfs2_iocb_set_sem_locked(iocb);
 
 		ret = ocfs2_rw_lock(inode, 0);
 		if (ret < 0) {
@@ -2575,8 +2584,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	}
 
 bail:
-	if (have_alloc_sem)
+	if (have_alloc_sem) {
 		up_read(&inode->i_alloc_sem);
+		ocfs2_iocb_clear_sem_locked(iocb);
+	}
 	if (rw_level != -1)
 		ocfs2_rw_unlock(inode, rw_level);
 	mlog_exit(ret);
-- 
1.5.5