wangyan
2020-Jan-08  09:23 UTC
[Ocfs2-devel] [PATCH] ocfs2: fix a NULL pointer dereference when call ocfs2_update_inode_fsync_trans()
I found a NULL pointer dereference in ocfs2_update_inode_fsync_trans(),
handle->h_transaction may be NULL in this situation:
ocfs2_file_write_iter
   ->__generic_file_write_iter
       ->generic_perform_write
         ->ocfs2_write_begin
           ->ocfs2_write_begin_nolock
             ->ocfs2_write_cluster_by_desc
               ->ocfs2_write_cluster
                 ->ocfs2_mark_extent_written
                   ->ocfs2_change_extent_flag
                     ->ocfs2_split_extent
                       ->ocfs2_try_to_merge_extent
                         ->ocfs2_extend_rotate_transaction
                           ->ocfs2_extend_trans
                             ->jbd2_journal_restart
                               ->jbd2__journal_restart
                                 // handle->h_transaction is NULL here
                                 ->handle->h_transaction = NULL;
                                 ->start_this_handle
                                   /* journal aborted due to storage
                                      network disconnection, return error */
                                   ->return -EROFS;
                          /* line 3806 in ocfs2_try_to_merge_extent (),
                             it will ignore ret error. */
                         ->ret = 0;
         ->...
         ->ocfs2_write_end
           ->ocfs2_write_end_nolock
             ->ocfs2_update_inode_fsync_trans
               // NULL pointer dereference
               ->oi->i_sync_tid = handle->h_transaction->t_tid;
The information of NULL pointer dereference as follows:
     JBD2: Detected IO errors while flushing file data on dm-11-45
     Aborting journal on device dm-11-45.
     JBD2: Error -5 detected when updating journal superblock for dm-11-45.
     (dd,22081,3):ocfs2_extend_trans:474 ERROR: status = -30
     (dd,22081,3):ocfs2_try_to_merge_extent:3877 ERROR: status = -30
     Unable to handle kernel NULL pointer dereference at
     virtual address 0000000000000008
     Mem abort info:
       ESR = 0x96000004
       Exception class = DABT (current EL), IL = 32 bits
       SET = 0, FnV = 0
       EA = 0, S1PTW = 0
     Data abort info:
       ISV = 0, ISS = 0x00000004
       CM = 0, WnR = 0
     user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e74e1338
     [0000000000000008] pgd=0000000000000000
     Internal error: Oops: 96000004 [#1] SMP
     Process dd (pid: 22081, stack limit = 0x00000000584f35a9)
     CPU: 3 PID: 22081 Comm: dd Kdump: loaded
     Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 0.98 08/25/2019
     pstate: 60400009 (nZCv daif +PAN -UAO)
     pc : ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2]
     lr : ocfs2_write_end_nolock+0x2a0/0x550 [ocfs2]
     sp : ffff0000459fba70
     x29: ffff0000459fba70 x28: 0000000000000000
     x27: ffff807ccf7f1000 x26: 0000000000000001
     x25: ffff807bdff57970 x24: ffff807caf1d4000
     x23: ffff807cc79e9000 x22: 0000000000001000
     x21: 000000006c6cd000 x20: ffff0000091d9000
     x19: ffff807ccb239db0 x18: ffffffffffffffff
     x17: 000000000000000e x16: 0000000000000007
     x15: ffff807c5e15bd78 x14: 0000000000000000
     x13: 0000000000000000 x12: 0000000000000000
     x11: 0000000000000000 x10: 0000000000000001
     x9 : 0000000000000228 x8 : 000000000000000c
     x7 : 0000000000000fff x6 : ffff807a308ed6b0
     x5 : ffff7e01f10967c0 x4 : 0000000000000018
     x3 : d0bc661572445600 x2 : 0000000000000000
     x1 : 000000001b2e0200 x0 : 0000000000000000
     Call trace:
      ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2]
      ocfs2_write_end+0x4c/0x80 [ocfs2]
      generic_perform_write+0x108/0x1a8
      __generic_file_write_iter+0x158/0x1c8
      ocfs2_file_write_iter+0x668/0x950 [ocfs2]
      __vfs_write+0x11c/0x190
      vfs_write+0xac/0x1c0
      ksys_write+0x6c/0xd8
      __arm64_sys_write+0x24/0x30
      el0_svc_common+0x78/0x130
      el0_svc_handler+0x38/0x78
      el0_svc+0x8/0xc
To prevent NULL pointer dereference in this situation, we use
is_handle_aborted() before using handle->h_transaction->t_tid.
Signed-off-by: Yan Wang <wangyan122 at huawei.com>
Reviewed-by: Jun Piao <piaojun at huawei.com>
---
  fs/ocfs2/journal.h | 8 +++++---
  fs/ocfs2/namei.c   | 3 +--
  2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3103ba7f97a2..bfe611ed1b1d 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -597,9 +597,11 @@ static inline void 
ocfs2_update_inode_fsync_trans(handle_t *handle,
  {
  	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-	oi->i_sync_tid = handle->h_transaction->t_tid;
-	if (datasync)
-		oi->i_datasync_tid = handle->h_transaction->t_tid;
+	if (!is_handle_aborted(handle)) {
+		oi->i_sync_tid = handle->h_transaction->t_tid;
+		if (datasync)
+			oi->i_datasync_tid = handle->h_transaction->t_tid;
+	}
  }
  #endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8ea51cf27b97..da65251ef815 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -586,8 +586,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
  			mlog_errno(status);
  	}
-	oi->i_sync_tid = handle->h_transaction->t_tid;
-	oi->i_datasync_tid = handle->h_transaction->t_tid;
+	ocfs2_update_inode_fsync_trans(handle, inode, 1);
  leave:
  	if (status < 0) {
-- 
2.19.1
Joseph Qi
2020-Jan-08  11:31 UTC
[Ocfs2-devel] [PATCH] ocfs2: fix a NULL pointer dereference when call ocfs2_update_inode_fsync_trans()
On 20/1/8 17:23, wangyan wrote:> I found a NULL pointer dereference in ocfs2_update_inode_fsync_trans(), > handle->h_transaction may be NULL in this situation: > ocfs2_file_write_iter > ? ->__generic_file_write_iter > ????? ->generic_perform_write > ??????? ->ocfs2_write_begin > ????????? ->ocfs2_write_begin_nolock > ??????????? ->ocfs2_write_cluster_by_desc > ????????????? ->ocfs2_write_cluster > ??????????????? ->ocfs2_mark_extent_written > ????????????????? ->ocfs2_change_extent_flag > ??????????????????? ->ocfs2_split_extent > ????????????????????? ->ocfs2_try_to_merge_extent > ??????????????????????? ->ocfs2_extend_rotate_transaction > ????????????????????????? ->ocfs2_extend_trans > ??????????????????????????? ->jbd2_journal_restart > ????????????????????????????? ->jbd2__journal_restart > ??????????????????????????????? // handle->h_transaction is NULL here > ??????????????????????????????? ->handle->h_transaction = NULL; > ??????????????????????????????? ->start_this_handle > ????????????????????????????????? /* journal aborted due to storage > ???????????????????????????????????? network disconnection, return error */ > ????????????????????????????????? ->return -EROFS; > ???????????????????????? /* line 3806 in ocfs2_try_to_merge_extent (), > ??????????????????????????? it will ignore ret error. */ > ??????????????????????? ->ret = 0; > ??????? ->... > ??????? ->ocfs2_write_end > ????????? ->ocfs2_write_end_nolock > ??????????? ->ocfs2_update_inode_fsync_trans > ????????????? // NULL pointer dereference > ????????????? ->oi->i_sync_tid = handle->h_transaction->t_tid; > > The information of NULL pointer dereference as follows: > ??? JBD2: Detected IO errors while flushing file data on dm-11-45 > ??? Aborting journal on device dm-11-45. > ??? JBD2: Error -5 detected when updating journal superblock for dm-11-45. > ??? (dd,22081,3):ocfs2_extend_trans:474 ERROR: status = -30 > ??? (dd,22081,3):ocfs2_try_to_merge_extent:3877 ERROR: status = -30 > ??? Unable to handle kernel NULL pointer dereference at > ??? virtual address 0000000000000008 > ??? Mem abort info: > ????? ESR = 0x96000004 > ????? Exception class = DABT (current EL), IL = 32 bits > ????? SET = 0, FnV = 0 > ????? EA = 0, S1PTW = 0 > ??? Data abort info: > ????? ISV = 0, ISS = 0x00000004 > ????? CM = 0, WnR = 0 > ??? user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e74e1338 > ??? [0000000000000008] pgd=0000000000000000 > ??? Internal error: Oops: 96000004 [#1] SMP > ??? Process dd (pid: 22081, stack limit = 0x00000000584f35a9) > ??? CPU: 3 PID: 22081 Comm: dd Kdump: loaded > ??? Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 0.98 08/25/2019 > ??? pstate: 60400009 (nZCv daif +PAN -UAO) > ??? pc : ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] > ??? lr : ocfs2_write_end_nolock+0x2a0/0x550 [ocfs2] > ??? sp : ffff0000459fba70 > ??? x29: ffff0000459fba70 x28: 0000000000000000 > ??? x27: ffff807ccf7f1000 x26: 0000000000000001 > ??? x25: ffff807bdff57970 x24: ffff807caf1d4000 > ??? x23: ffff807cc79e9000 x22: 0000000000001000 > ??? x21: 000000006c6cd000 x20: ffff0000091d9000 > ??? x19: ffff807ccb239db0 x18: ffffffffffffffff > ??? x17: 000000000000000e x16: 0000000000000007 > ??? x15: ffff807c5e15bd78 x14: 0000000000000000 > ??? x13: 0000000000000000 x12: 0000000000000000 > ??? x11: 0000000000000000 x10: 0000000000000001 > ??? x9 : 0000000000000228 x8 : 000000000000000c > ??? x7 : 0000000000000fff x6 : ffff807a308ed6b0 > ??? x5 : ffff7e01f10967c0 x4 : 0000000000000018 > ??? x3 : d0bc661572445600 x2 : 0000000000000000 > ??? x1 : 000000001b2e0200 x0 : 0000000000000000 > ??? Call trace: > ???? ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] > ???? ocfs2_write_end+0x4c/0x80 [ocfs2] > ???? generic_perform_write+0x108/0x1a8 > ???? __generic_file_write_iter+0x158/0x1c8 > ???? ocfs2_file_write_iter+0x668/0x950 [ocfs2] > ???? __vfs_write+0x11c/0x190 > ???? vfs_write+0xac/0x1c0 > ???? ksys_write+0x6c/0xd8 > ???? __arm64_sys_write+0x24/0x30 > ???? el0_svc_common+0x78/0x130 > ???? el0_svc_handler+0x38/0x78 > ???? el0_svc+0x8/0xc > > To prevent NULL pointer dereference in this situation, we use > is_handle_aborted() before using handle->h_transaction->t_tid. > > Signed-off-by: Yan Wang <wangyan122 at huawei.com> > Reviewed-by: Jun Piao <piaojun at huawei.com> > --- > ?fs/ocfs2/journal.h | 8 +++++--- > ?fs/ocfs2/namei.c?? | 3 +-- > ?2 files changed, 6 insertions(+), 5 deletions(-) > > diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h > index 3103ba7f97a2..bfe611ed1b1d 100644 > --- a/fs/ocfs2/journal.h > +++ b/fs/ocfs2/journal.h > @@ -597,9 +597,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, > ?{ > ???? struct ocfs2_inode_info *oi = OCFS2_I(inode); > > -??? oi->i_sync_tid = handle->h_transaction->t_tid; > -??? if (datasync) > -??????? oi->i_datasync_tid = handle->h_transaction->t_tid; > +??? if (!is_handle_aborted(handle)) { > +??????? oi->i_sync_tid = handle->h_transaction->t_tid; > +??????? if (datasync) > +??????????? oi->i_datasync_tid = handle->h_transaction->t_tid;Use tab instead of space, please.> +??? } > ?} > > ?#endif /* OCFS2_JOURNAL_H */ > diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c > index 8ea51cf27b97..da65251ef815 100644 > --- a/fs/ocfs2/namei.c > +++ b/fs/ocfs2/namei.c > @@ -586,8 +586,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, > ???????????? mlog_errno(status); > ???? } > > -??? oi->i_sync_tid = handle->h_transaction->t_tid; > -??? oi->i_datasync_tid = handle->h_transaction->t_tid; > +??? ocfs2_update_inode_fsync_trans(handle, inode, 1); >I don't see any reason why we have to check handle here. Thanks, Joseph> ?leave: > ???? if (status < 0) {
Changwei Ge
2020-Jan-09  01:57 UTC
[Ocfs2-devel] [PATCH] ocfs2: fix a NULL pointer dereference when call ocfs2_update_inode_fsync_trans()
On 1/8/20 5:23 PM, wangyan wrote:> I found a NULL pointer dereference in ocfs2_update_inode_fsync_trans(), > handle->h_transaction may be NULL in this situation: > ocfs2_file_write_iter > ->__generic_file_write_iter > ->generic_perform_write > ->ocfs2_write_begin > ->ocfs2_write_begin_nolock > ->ocfs2_write_cluster_by_desc > ->ocfs2_write_cluster > ->ocfs2_mark_extent_written > ->ocfs2_change_extent_flag > ->ocfs2_split_extent > ->ocfs2_try_to_merge_extent > ->ocfs2_extend_rotate_transaction > ->ocfs2_extend_trans > ->jbd2_journal_restart > ->jbd2__journal_restart > // handle->h_transaction is NULL here > ->handle->h_transaction = NULL; > ->start_this_handle > /* journal aborted due to storage > network disconnection, return error */ > ->return -EROFS; > /* line 3806 in ocfs2_try_to_merge_extent (), > it will ignore ret error. */ > ->ret = 0; > ->... > ->ocfs2_write_end > ->ocfs2_write_end_nolock > ->ocfs2_update_inode_fsync_trans > // NULL pointer dereference > ->oi->i_sync_tid = handle->h_transaction->t_tid; > > The information of NULL pointer dereference as follows: > JBD2: Detected IO errors while flushing file data on dm-11-45 > Aborting journal on device dm-11-45. > JBD2: Error -5 detected when updating journal superblock for dm-11-45. > (dd,22081,3):ocfs2_extend_trans:474 ERROR: status = -30 > (dd,22081,3):ocfs2_try_to_merge_extent:3877 ERROR: status = -30 > Unable to handle kernel NULL pointer dereference at > virtual address 0000000000000008 > Mem abort info: > ESR = 0x96000004 > Exception class = DABT (current EL), IL = 32 bits > SET = 0, FnV = 0 > EA = 0, S1PTW = 0 > Data abort info: > ISV = 0, ISS = 0x00000004 > CM = 0, WnR = 0 > user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e74e1338 > [0000000000000008] pgd=0000000000000000 > Internal error: Oops: 96000004 [#1] SMP > Process dd (pid: 22081, stack limit = 0x00000000584f35a9) > CPU: 3 PID: 22081 Comm: dd Kdump: loaded > Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 0.98 08/25/2019 > pstate: 60400009 (nZCv daif +PAN -UAO) > pc : ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] > lr : ocfs2_write_end_nolock+0x2a0/0x550 [ocfs2] > sp : ffff0000459fba70 > x29: ffff0000459fba70 x28: 0000000000000000 > x27: ffff807ccf7f1000 x26: 0000000000000001 > x25: ffff807bdff57970 x24: ffff807caf1d4000 > x23: ffff807cc79e9000 x22: 0000000000001000 > x21: 000000006c6cd000 x20: ffff0000091d9000 > x19: ffff807ccb239db0 x18: ffffffffffffffff > x17: 000000000000000e x16: 0000000000000007 > x15: ffff807c5e15bd78 x14: 0000000000000000 > x13: 0000000000000000 x12: 0000000000000000 > x11: 0000000000000000 x10: 0000000000000001 > x9 : 0000000000000228 x8 : 000000000000000c > x7 : 0000000000000fff x6 : ffff807a308ed6b0 > x5 : ffff7e01f10967c0 x4 : 0000000000000018 > x3 : d0bc661572445600 x2 : 0000000000000000 > x1 : 000000001b2e0200 x0 : 0000000000000000 > Call trace: > ocfs2_write_end_nolock+0x2b8/0x550 [ocfs2] > ocfs2_write_end+0x4c/0x80 [ocfs2] > generic_perform_write+0x108/0x1a8 > __generic_file_write_iter+0x158/0x1c8 > ocfs2_file_write_iter+0x668/0x950 [ocfs2] > __vfs_write+0x11c/0x190 > vfs_write+0xac/0x1c0 > ksys_write+0x6c/0xd8 > __arm64_sys_write+0x24/0x30 > el0_svc_common+0x78/0x130 > el0_svc_handler+0x38/0x78 > el0_svc+0x8/0xc > > To prevent NULL pointer dereference in this situation, we use > is_handle_aborted() before using handle->h_transaction->t_tid. > > Signed-off-by: Yan Wang <wangyan122 at huawei.com> > Reviewed-by: Jun Piao <piaojun at huawei.com> > --- > fs/ocfs2/journal.h | 8 +++++--- > fs/ocfs2/namei.c | 3 +-- > 2 files changed, 6 insertions(+), 5 deletions(-) > > diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h > index 3103ba7f97a2..bfe611ed1b1d 100644 > --- a/fs/ocfs2/journal.h > +++ b/fs/ocfs2/journal.h > @@ -597,9 +597,11 @@ static inline void > ocfs2_update_inode_fsync_trans(handle_t *handle, > { > struct ocfs2_inode_info *oi = OCFS2_I(inode); > > - oi->i_sync_tid = handle->h_transaction->t_tid; > - if (datasync) > - oi->i_datasync_tid = handle->h_transaction->t_tid; > + if (!is_handle_aborted(handle)) { > + oi->i_sync_tid = handle->h_transaction->t_tid; > + if (datasync) > + oi->i_datasync_tid = handle->h_transaction->t_tid; > + }I don't think your way can fix the issue you reported completely. Even you check if the journal is ABORTED or not, you still face a race causing accessing NULL h_transaction. Otherwise, you need synchronization mechanism help. Besides, if journal is aborted, ocfs2 won't fence the machine by resetting? Thanks, Changwei> } > > #endif /* OCFS2_JOURNAL_H */ > diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c > index 8ea51cf27b97..da65251ef815 100644 > --- a/fs/ocfs2/namei.c > +++ b/fs/ocfs2/namei.c > @@ -586,8 +586,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, > mlog_errno(status); > } > > - oi->i_sync_tid = handle->h_transaction->t_tid; > - oi->i_datasync_tid = handle->h_transaction->t_tid; > + ocfs2_update_inode_fsync_trans(handle, inode, 1); > > leave: > if (status < 0) {