Heming Zhao
2022-Apr-13 08:29 UTC
[Ocfs2-devel] [PATCH v2 1/5] ocfs2: fix mounting crash if journal is not alloced
After commit da5e7c87827e8 ("ocfs2: cleanup journal init and
shutdown"),
journal init later than before, it makes NULL pointer access in free
routine.
Crash flow:
ocfs2_fill_super
+ ocfs2_mount_volume
| + ocfs2_dlm_init //fail & return, osb->journal is NULL.
| + ...
| + ocfs2_check_volume //no chance to init osb->journal
|
+ ...
+ ocfs2_dismount_volume
ocfs2_release_system_inodes
...
evict
...
ocfs2_clear_inode
ocfs2_checkpoint_inode
ocfs2_ci_fully_checkpointed
time_after(journal->j_trans_id, ci->ci_last_trans)
+ journal is empty, crash!
For fixing, there are three solutions:
1> Partly revert commit da5e7c87827e8
For avoiding kernel crash, this make sense for us. We only concerned
whether there has any non-system inode access before dlm init. The
answer is NO. And all journal replay/recovery handling happen after
dlm & journal init done. So this method is not graceful but workable.
2> Add osb->journal check in free inode routine (eg ocfs2_clear_inode)
The fix code is special for mounting phase, but it will continue
working after mounting stage. In another word, this method adds useless
code in normal inode free flow.
3> Do directly free inode in mounting phase
This method is brutal/complex and may introduce unsafe code, currently
maintainer didn't like.
At last, we chose method <1> and did partly reverted job.
We reverted journal init codes, and kept cleanup codes flow.
Fixes: da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown")
Signed-off-by: Heming Zhao <heming.zhao at suse.com>
---
fs/ocfs2/inode.c | 4 ++--
fs/ocfs2/journal.c | 32 ++++++++++++++++++++++----------
fs/ocfs2/super.c | 16 ++++++++++++++++
3 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 5739dc301569..bb116c39b581 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -125,6 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno,
unsigned flags,
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
+ journal_t *journal = osb->journal->j_journal;
trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
sysfile_type);
@@ -171,11 +172,10 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64
blkno, unsigned flags,
* part of the transaction - the inode could have been reclaimed and
* now it is reread from disk.
*/
- if (osb->journal) {
+ if (journal) {
transaction_t *transaction;
tid_t tid;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- journal_t *journal = osb->journal->j_journal;
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1887a2708709..49255fddce45 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -810,22 +810,20 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
write_unlock(&journal->j_state_lock);
}
-int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
+/*
+ * alloc & initialize skeleton for journal structure.
+ * ocfs2_journal_init() will make fs have journal ability.
+ */
+int ocfs2_journal_alloc(struct ocfs2_super *osb)
{
- int status = -1;
- struct inode *inode = NULL; /* the journal inode */
- journal_t *j_journal = NULL;
- struct ocfs2_journal *journal = NULL;
- struct ocfs2_dinode *di = NULL;
- struct buffer_head *bh = NULL;
- int inode_lock = 0;
+ int status = 0;
+ struct ocfs2_journal *journal;
- /* initialize our journal structure */
journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
if (!journal) {
mlog(ML_ERROR, "unable to alloc journal\n");
status = -ENOMEM;
- goto done;
+ goto bail;
}
osb->journal = journal;
journal->j_osb = osb;
@@ -839,6 +837,20 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
journal->j_state = OCFS2_JOURNAL_FREE;
+bail:
+ return status;
+}
+
+int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
+{
+ int status = -1;
+ struct inode *inode = NULL; /* the journal inode */
+ journal_t *j_journal = NULL;
+ struct ocfs2_journal *journal = osb->journal;
+ struct ocfs2_dinode *di = NULL;
+ struct buffer_head *bh = NULL;
+ int inode_lock = 0;
+
/* already have the inode for our journal */
inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
osb->slot_num);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 477cdf94122e..babec2c9d638 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2015,6 +2015,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
int i, cbits, bbits;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
struct inode *inode = NULL;
+ struct ocfs2_journal *journal;
struct ocfs2_super *osb;
u64 total_blocks;
@@ -2195,6 +2196,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
get_random_bytes(&osb->s_next_generation, sizeof(u32));
+ /*
+ * FIXME
+ * This should be done in ocfs2_journal_init(), but any inode
+ * writes back operation will cause the filesystem to crash.
+ */
+ status = ocfs2_journal_alloc(osb);
+ if (status)
+ goto bail;
+
INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
init_llist_head(&osb->dquot_drop_list);
@@ -2483,6 +2493,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
kfree(osb->osb_orphan_wipes);
kfree(osb->slot_recovery_generations);
+ /* FIXME
+ * This belongs in journal shutdown, but because we have to
+ * allocate osb->journal at the middle of ocfs2_initialize_super(),
+ * we free it here.
+ */
+ kfree(osb->journal);
kfree(osb->local_alloc_copy);
kfree(osb->uuid_str);
kfree(osb->vol_label);
--
2.35.1
Joseph Qi
2022-Apr-13 10:54 UTC
[Ocfs2-devel] [PATCH v2 1/5] ocfs2: fix mounting crash if journal is not alloced
On 4/13/22 4:29 PM, Heming Zhao wrote:> After commit da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown"), > journal init later than before, it makes NULL pointer access in free > routine. > > Crash flow: > > ocfs2_fill_super > + ocfs2_mount_volume > | + ocfs2_dlm_init //fail & return, osb->journal is NULL. > | + ... > | + ocfs2_check_volume //no chance to init osb->journal > | > + ... > + ocfs2_dismount_volume > ocfs2_release_system_inodes > ... > evict > ... > ocfs2_clear_inode > ocfs2_checkpoint_inode > ocfs2_ci_fully_checkpointed > time_after(journal->j_trans_id, ci->ci_last_trans) > + journal is empty, crash! > > For fixing, there are three solutions: > > 1> Partly revert commit da5e7c87827e8 > > For avoiding kernel crash, this make sense for us. We only concerned > whether there has any non-system inode access before dlm init. The > answer is NO. And all journal replay/recovery handling happen after > dlm & journal init done. So this method is not graceful but workable. > > 2> Add osb->journal check in free inode routine (eg ocfs2_clear_inode) > > The fix code is special for mounting phase, but it will continue > working after mounting stage. In another word, this method adds useless > code in normal inode free flow. > > 3> Do directly free inode in mounting phase > > This method is brutal/complex and may introduce unsafe code, currently > maintainer didn't like. > > At last, we chose method <1> and did partly reverted job. > We reverted journal init codes, and kept cleanup codes flow. > > Fixes: da5e7c87827e8 ("ocfs2: cleanup journal init and shutdown") > Signed-off-by: Heming Zhao <heming.zhao at suse.com> > --- > fs/ocfs2/inode.c | 4 ++-- > fs/ocfs2/journal.c | 32 ++++++++++++++++++++++---------- > fs/ocfs2/super.c | 16 ++++++++++++++++ > 3 files changed, 40 insertions(+), 12 deletions(-) > > diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c > index 5739dc301569..bb116c39b581 100644 > --- a/fs/ocfs2/inode.c > +++ b/fs/ocfs2/inode.c > @@ -125,6 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, > struct inode *inode = NULL; > struct super_block *sb = osb->sb; > struct ocfs2_find_inode_args args; > + journal_t *journal = osb->journal->j_journal; > > trace_ocfs2_iget_begin((unsigned long long)blkno, flags, > sysfile_type); > @@ -171,11 +172,10 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, > * part of the transaction - the inode could have been reclaimed and > * now it is reread from disk. > */ > - if (osb->journal) { > + if (journal) { > transaction_t *transaction; > tid_t tid; > struct ocfs2_inode_info *oi = OCFS2_I(inode); > - journal_t *journal = osb->journal->j_journal; > > read_lock(&journal->j_state_lock); > if (journal->j_running_transaction) > diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c > index 1887a2708709..49255fddce45 100644 > --- a/fs/ocfs2/journal.c > +++ b/fs/ocfs2/journal.c > @@ -810,22 +810,20 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) > write_unlock(&journal->j_state_lock); > } > > -int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) > +/* > + * alloc & initialize skeleton for journal structure. > + * ocfs2_journal_init() will make fs have journal ability. > + */ > +int ocfs2_journal_alloc(struct ocfs2_super *osb) > { > - int status = -1; > - struct inode *inode = NULL; /* the journal inode */ > - journal_t *j_journal = NULL; > - struct ocfs2_journal *journal = NULL; > - struct ocfs2_dinode *di = NULL; > - struct buffer_head *bh = NULL; > - int inode_lock = 0; > + int status = 0; > + struct ocfs2_journal *journal; > > - /* initialize our journal structure */ > journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); > if (!journal) { > mlog(ML_ERROR, "unable to alloc journal\n"); > status = -ENOMEM; > - goto done; > + goto bail; > } > osb->journal = journal; > journal->j_osb = osb; > @@ -839,6 +837,20 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) > INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); > journal->j_state = OCFS2_JOURNAL_FREE; > > +bail: > + return status; > +} > + > +int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) > +{ > + int status = -1; > + struct inode *inode = NULL; /* the journal inode */ > + journal_t *j_journal = NULL; > + struct ocfs2_journal *journal = osb->journal; > + struct ocfs2_dinode *di = NULL; > + struct buffer_head *bh = NULL; > + int inode_lock = 0; > +Better to leave a BUG_ON(!journal) here like before.> /* already have the inode for our journal */ > inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, > osb->slot_num); > diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c > index 477cdf94122e..babec2c9d638 100644 > --- a/fs/ocfs2/super.c > +++ b/fs/ocfs2/super.c > @@ -2015,6 +2015,7 @@ static int ocfs2_initialize_super(struct super_block *sb, > int i, cbits, bbits; > struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; > struct inode *inode = NULL; > + struct ocfs2_journal *journal; > struct ocfs2_super *osb; > u64 total_blocks; > > @@ -2195,6 +2196,15 @@ static int ocfs2_initialize_super(struct super_block *sb, > > get_random_bytes(&osb->s_next_generation, sizeof(u32)); > > + /* > + * FIXME > + * This should be done in ocfs2_journal_init(), but any inode > + * writes back operation will cause the filesystem to crash. > + */ > + status = ocfs2_journal_alloc(osb); > + if (status)Explicitly mark 'status < 0'? Thanks, Joseph> + goto bail; > + > INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); > init_llist_head(&osb->dquot_drop_list); > > @@ -2483,6 +2493,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) > > kfree(osb->osb_orphan_wipes); > kfree(osb->slot_recovery_generations); > + /* FIXME > + * This belongs in journal shutdown, but because we have to > + * allocate osb->journal at the middle of ocfs2_initialize_super(), > + * we free it here. > + */ > + kfree(osb->journal); > kfree(osb->local_alloc_copy); > kfree(osb->uuid_str); > kfree(osb->vol_label);