thr3ads.net - Btrfs devel - [PATCH V2 01/10] Btrfs: use atomic for btrfs_fs

If this information is useful, please help other people find it:
Share via:

Miao Xie

2013-Jan-29 09:59 UTC

[PATCH V2 01/10] Btrfs: use atomic for btrfs_fs_info->generation

fs_info->generation is a 64bit variant, and it can be accessed by
multi-task, if there is no lock or other methods to protect it, we
might get the wrong number, especially on 32bit machine.

For example, Assuming ->generation is 0x0000 0000 ffff ffff at the
beginning, then we increase it by 1, ->generation will be
0x0000 0001 0000 0000, but it is in the registers, then we store it
into the memory. If some task accesses it at this time, just like
this:
	Task0 			Task1
	set low 32 bits
				load low 32 bits
				load high 32 bits
	set high 32 bits

The task will get 0, it is a wrong number.

We fix this problem by the atomic operation.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear and stringency.
---
 fs/btrfs/ctree.c             | 7 ++++---
 fs/btrfs/ctree.h             | 2 +-
 fs/btrfs/disk-io.c           | 8 ++++----
 fs/btrfs/file.c              | 6 ++++--
 fs/btrfs/inode.c             | 5 +++--
 fs/btrfs/qgroup.c            | 2 +-
 fs/btrfs/transaction.c       | 4 ++--
 include/trace/events/btrfs.h | 3 ++-
 8 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index eea5da7..4a36c03 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1365,10 +1365,11 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle
*trans,
 		       (unsigned long long)
 		       root->fs_info->running_transaction->transid);
 
-	if (trans->transid != root->fs_info->generation)
+	if (trans->transid != atomic64_read(&root->fs_info->generation))
 		WARN(1, KERN_CRIT "trans %llu running %llu\n",
 		       (unsigned long long)trans->transid,
-		       (unsigned long long)root->fs_info->generation);
+		       (unsigned long long)atomic64_read(
+		       &root->fs_info->generation));
 
 	if (!should_cow_block(trans, root, buf)) {
 		*cow_ret = buf;
@@ -1465,7 +1466,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 		return 0;
 
 	WARN_ON(trans->transaction != root->fs_info->running_transaction);
-	WARN_ON(trans->transid != root->fs_info->generation);
+	WARN_ON(trans->transid !=
atomic64_read(&root->fs_info->generation));
 
 	parent_nritems = btrfs_header_nritems(parent);
 	blocksize = btrfs_level_size(root, parent_level - 1);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 547b7b0..c3edb22 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1278,7 +1278,7 @@ struct btrfs_fs_info {
 
 	struct btrfs_block_rsv empty_block_rsv;
 
-	u64 generation;
+	atomic64_t generation;
 	u64 last_trans_committed;
 
 	/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 65f0367..f03aebc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1200,7 +1200,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32
sectorsize,
 	memset(&root->root_item, 0, sizeof(root->root_item));
 	memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
 	memset(&root->root_kobj, 0, sizeof(root->root_kobj));
-	root->defrag_trans_start = fs_info->generation;
+	root->defrag_trans_start = atomic64_read(&fs_info->generation);
 	init_completion(&root->kobj_unregister);
 	root->defrag_running = 0;
 	root->root_key.objectid = objectid;
@@ -2501,7 +2501,7 @@ retry_root_backup:
 		fs_info->pending_quota_state = 1;
 	}
 
-	fs_info->generation = generation;
+	atomic64_set(&fs_info->generation, generation);
 	fs_info->last_trans_committed = generation;
 
 	ret = btrfs_recover_balance(fs_info);
@@ -3436,12 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 	int was_dirty;
 
 	btrfs_assert_tree_locked(buf);
-	if (transid != root->fs_info->generation)
+	if (transid != atomic64_read(&root->fs_info->generation))
 		WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
 		       "found %llu running %llu\n",
 			(unsigned long long)buf->start,
 			(unsigned long long)transid,
-			(unsigned long long)root->fs_info->generation);
+			(u64)atomic64_read(&root->fs_info->generation));
 	was_dirty = set_extent_buffer_dirty(buf);
 	if (!was_dirty) {
 		spin_lock(&root->fs_info->delalloc_lock);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 841cfe3..02409b6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1588,7 +1588,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	 * otherwise subsequent syncs to a file that''s been synced in this
 	 * transaction will appear to have already occured.
 	 */
-	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
+	BTRFS_I(inode)->last_trans =
atomic64_read(&root->fs_info->generation) +
+				     1;
 	BTRFS_I(inode)->last_sub_trans = root->log_transid;
 	if (num_written > 0 || num_written == -EIOCBQUEUED) {
 		err = generic_write_sync(file, pos, num_written);
@@ -1679,7 +1680,8 @@ int btrfs_sync_file(struct file *file, loff_t start,
loff_t end, int datasync)
 	 * syncing
 	 */
 	smp_mb();
-	if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
+	if (btrfs_inode_in_log(inode,
+	    atomic64_read(&root->fs_info->generation)) ||
 	    BTRFS_I(inode)->last_trans < 	   
root->fs_info->last_trans_committed) {
 		BTRFS_I(inode)->last_trans = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca7ace7..35c4dda 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2644,7 +2644,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	 * idea about which extents were modified before we were evicted from
 	 * cache.
 	 */
-	if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
+	if (BTRFS_I(inode)->last_trans =+	   
atomic64_read(&root->fs_info->generation))
 		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 			&BTRFS_I(inode)->runtime_flags);
 
@@ -6903,7 +6904,7 @@ again:
 	set_page_dirty(page);
 	SetPageUptodate(page);
 
-	BTRFS_I(inode)->last_trans = root->fs_info->generation;
+	BTRFS_I(inode)->last_trans =
atomic64_read(&root->fs_info->generation);
 	BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
 	BTRFS_I(inode)->last_log_commit =
BTRFS_I(inode)->root->last_log_commit;
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a5c8562..dd002ae 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -290,7 +290,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
 				goto out;
 			}
 			if (btrfs_qgroup_status_generation(l, ptr) !-			    fs_info->generation)
{
+			    atomic64_read(&fs_info->generation)) {
 				flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 				printk(KERN_ERR
 					"btrfs: qgroup generation mismatch, "
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f154946..105d642 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -161,8 +161,8 @@ loop:
 	list_add_tail(&cur_trans->list, &fs_info->trans_list);
 	extent_io_tree_init(&cur_trans->dirty_pages,
 			     fs_info->btree_inode->i_mapping);
-	fs_info->generation++;
-	cur_trans->transid = fs_info->generation;
+	atomic64_inc(&fs_info->generation);
+	cur_trans->transid = atomic64_read(&fs_info->generation);
 	fs_info->running_transaction = cur_trans;
 	cur_trans->aborted = 0;
 	spin_unlock(&fs_info->trans_lock);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index ea546a4..09dc72f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -71,7 +71,8 @@ TRACE_EVENT(btrfs_transaction_commit,
 	),
 
 	TP_fast_assign(
-		__entry->generation	= root->fs_info->generation;
+		__entry->generation	+			
atomic64_read(&root->fs_info->generation);
 		__entry->root_objectid	= root->root_key.objectid;
 	),
 
-- 
1.7.11.7
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:00 UTC

head link

[PATCH V2 02/10] Btrfs: use atomic for fs_info->last_trans_committed

fs_info->last_trans_committed is a 64bit variant, and it can be
accessed by multi-task, if there is no lock or other methods to
protect it, we might get the wrong number, especially on 32bit
machine.(Even on 64bit machine, it is possible that the compiler
may split a 64bit operation into two 32bit operation.)

For example, Assuming ->last_trans_committed is 0x00000000ffffffff
at the beginning, then we want set it to 0x0000000100000000.
	Task0			Task1
	set low 32 bits
				load low 32 bits
				load high 32 bits
	set high 32 bits

The task will get 0, it is a wrong number.

We fix this problem by the atomic operation.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear and stringency.
---
 fs/btrfs/ctree.h        |  2 +-
 fs/btrfs/disk-io.c      |  2 +-
 fs/btrfs/file.c         |  2 +-
 fs/btrfs/ioctl.c        |  2 +-
 fs/btrfs/ordered-data.c |  2 +-
 fs/btrfs/scrub.c        |  2 +-
 fs/btrfs/transaction.c  |  5 +++--
 fs/btrfs/tree-log.c     | 16 +++++++++-------
 8 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c3edb22..34a60a8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1279,7 +1279,7 @@ struct btrfs_fs_info {
 	struct btrfs_block_rsv empty_block_rsv;
 
 	atomic64_t generation;
-	u64 last_trans_committed;
+	atomic64_t last_trans_committed;
 
 	/*
 	 * this is updated to the current trans every time a full commit
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f03aebc..87ed05a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2502,7 +2502,7 @@ retry_root_backup:
 	}
 
 	atomic64_set(&fs_info->generation, generation);
-	fs_info->last_trans_committed = generation;
+	atomic64_set(&fs_info->last_trans_committed, generation);
 
 	ret = btrfs_recover_balance(fs_info);
 	if (ret) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 02409b6..910ea99 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1683,7 +1683,7 @@ int btrfs_sync_file(struct file *file, loff_t start,
loff_t end, int datasync)
 	if (btrfs_inode_in_log(inode,
 	    atomic64_read(&root->fs_info->generation)) ||
 	    BTRFS_I(inode)->last_trans <-	   
root->fs_info->last_trans_committed) {
+	    atomic64_read(&root->fs_info->last_trans_committed)) {
 		BTRFS_I(inode)->last_trans = 0;
 
 		/*
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index afbf3ac..3b6c339 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3114,7 +3114,7 @@ static noinline long btrfs_ioctl_start_sync(struct
btrfs_root *root,
 			return PTR_ERR(trans);
 
 		/* No running transaction, don''t bother */
-		transid = root->fs_info->last_trans_committed;
+		transid = atomic64_read(&root->fs_info->last_trans_committed);
 		goto out;
 	}
 	transid = trans->transid;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f107312..f376621 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -975,7 +975,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle
*trans,
 	 * if this file hasn''t been changed since the last transaction
 	 * commit, we can safely return without doing anything
 	 */
-	if (last_mod < root->fs_info->last_trans_committed)
+	if (last_mod <
atomic64_read(&root->fs_info->last_trans_committed))
 		return;
 
 	spin_lock(&root->fs_info->ordered_extent_lock);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f..af0b566 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2703,7 +2703,7 @@ static noinline_for_stack int scrub_supers(struct
scrub_ctx *sctx,
 	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
 		return -EIO;
 
-	gen = root->fs_info->last_trans_committed;
+	gen = atomic64_read(&root->fs_info->last_trans_committed);
 
 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
 		bytenr = btrfs_sb_offset(i);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 105d642..29fdf1c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -459,7 +459,8 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64
transid)
 	int ret = 0;
 
 	if (transid) {
-		if (transid <= root->fs_info->last_trans_committed)
+		if (transid <+		   
atomic64_read(&root->fs_info->last_trans_committed))
 			goto out;
 
 		ret = -EINVAL;
@@ -1730,7 +1731,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle
*trans,
 
 	cur_trans->commit_done = 1;
 
-	root->fs_info->last_trans_committed = cur_trans->transid;
+	atomic64_set(&root->fs_info->last_trans_committed,
cur_trans->transid);
 
 	wake_up(&cur_trans->commit_wait);
 
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9027bb1..7f42a53 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3397,7 +3397,7 @@ static int btrfs_log_changed_extents(struct
btrfs_trans_handle *trans,
 	INIT_LIST_HEAD(&extents);
 
 	write_lock(&tree->lock);
-	test_gen = root->fs_info->last_trans_committed;
+	test_gen = atomic64_read(&root->fs_info->last_trans_committed);
 
 	list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
 		list_del_init(&em->list);
@@ -3502,7 +3502,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle
*trans,
 
 	/* Only run delayed items if we are a dir or a new file */
 	if (S_ISDIR(inode->i_mode) ||
-	    BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) {
+	    BTRFS_I(inode)->generation >
+	    atomic64_read(&root->fs_info->last_trans_committed)) {
 		ret = btrfs_commit_inode_delayed_items(trans, inode);
 		if (ret) {
 			btrfs_free_path(path);
@@ -3744,7 +3745,8 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle
*trans,
 	struct super_block *sb;
 	struct dentry *old_parent = NULL;
 	int ret = 0;
-	u64 last_committed = root->fs_info->last_trans_committed;
+	u64 last_committed = atomic64_read(
+			     &root->fs_info->last_trans_committed);
 
 	sb = inode->i_sb;
 
@@ -3754,7 +3756,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle
*trans,
 	}
 
 	if (root->fs_info->last_trans_log_full_commit >
-	    root->fs_info->last_trans_committed) {
+	    atomic64_read(&root->fs_info->last_trans_committed)) {
 		ret = 1;
 		goto end_no_trans;
 	}
@@ -3806,7 +3808,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle
*trans,
 			break;
 
 		if (BTRFS_I(inode)->generation >
-		    root->fs_info->last_trans_committed) {
+		    atomic64_read(&root->fs_info->last_trans_committed)) {
 			ret = btrfs_log_inode(trans, root, inode, inode_only);
 			if (ret)
 				goto end_trans;
@@ -4069,9 +4071,9 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	 * from hasn''t been logged, we don''t need to log it
 	 */
 	if (BTRFS_I(inode)->logged_trans <-	   
root->fs_info->last_trans_committed &&
+	    atomic64_read(&root->fs_info->last_trans_committed) &&
 	    (!old_dir || BTRFS_I(old_dir)->logged_trans <-		   
root->fs_info->last_trans_committed))
+		    atomic64_read(&root->fs_info->last_trans_committed)))
 		return 0;
 
 	return btrfs_log_inode_parent(trans, root, inode, parent, 1);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:03 UTC

head link

[PATCH V2 03/10] Btrfs: use atomic for fs_info->last_trans_log_full_commit

fs_info->last_trans_log_full_commit is a 64bit variant, and it
can be accessed by multi-task, if there is no lock or other methods
to protect it, we might get the wrong number, especially on 32bit
machine.(Even on 64bit machine, it is possible that the compiler
may split a 64bit operation into two 32bit operation.)

For example, Assuming ->last_trans_log_full_commit
is 0x00000000ffffffff at the beginning, then we want set it
to 0x0000000100000000.
	Task0			Task1
	set low 32 bits
				load low 32 bits
				load high 32 bits
	set high 32 bits

The task will get 0, it is a wrong number.

We fix this problem by the atomic operation.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear and stringency.
---
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c |  3 ++-
 fs/btrfs/inode.c       |  3 ++-
 fs/btrfs/tree-log.c    | 32 +++++++++++++++++++-------------
 4 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 34a60a8..745e7ad 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1285,7 +1285,7 @@ struct btrfs_fs_info {
 	 * this is updated to the current trans every time a full commit
 	 * is required instead of the faster short fsync log commits
 	 */
-	u64 last_trans_log_full_commit;
+	atomic64_t last_trans_log_full_commit;
 	unsigned long mount_opt;
 	unsigned long compress_type:4;
 	u64 max_inline;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 85b8454..ef61a4a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7868,7 +7868,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle
*trans,
 
 	extent_root = root->fs_info->extent_root;
 
-	root->fs_info->last_trans_log_full_commit = trans->transid;
+	atomic64_set(&root->fs_info->last_trans_log_full_commit,
+		     trans->transid);
 
 	cache = kzalloc(sizeof(*cache), GFP_NOFS);
 	if (!cache)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 35c4dda..803be87 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7433,7 +7433,8 @@ static int btrfs_rename(struct inode *old_dir, struct
dentry *old_dentry,
 
 	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
 		/* force full log commit if subvolume involved. */
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		atomic64_set(&root->fs_info->last_trans_log_full_commit,
+			     trans->transid);
 	} else {
 		ret = btrfs_insert_inode_ref(trans, dest,
 					     new_dentry->d_name.name,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7f42a53..bb7c01b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2227,14 +2227,14 @@ static int wait_log_commit(struct btrfs_trans_handle
*trans,
 				&wait, TASK_UNINTERRUPTIBLE);
 		mutex_unlock(&root->log_mutex);
 
-		if (root->fs_info->last_trans_log_full_commit !+		if
(atomic64_read(&root->fs_info->last_trans_log_full_commit) ! 		   
trans->transid && root->log_transid < transid + 2 &&
 		    atomic_read(&root->log_commit[index]))
 			schedule();
 
 		finish_wait(&root->log_commit_wait[index], &wait);
 		mutex_lock(&root->log_mutex);
-	} while (root->fs_info->last_trans_log_full_commit !+	} while
(atomic64_read(&root->fs_info->last_trans_log_full_commit) ! 		
trans->transid && root->log_transid < transid + 2 &&
 		 atomic_read(&root->log_commit[index]));
 	return 0;
@@ -2244,12 +2244,12 @@ static void wait_for_writer(struct btrfs_trans_handle
*trans,
 			    struct btrfs_root *root)
 {
 	DEFINE_WAIT(wait);
-	while (root->fs_info->last_trans_log_full_commit !+	while
(atomic64_read(&root->fs_info->last_trans_log_full_commit) ! 	      
trans->transid && atomic_read(&root->log_writers)) {
 		prepare_to_wait(&root->log_writer_wait,
 				&wait, TASK_UNINTERRUPTIBLE);
 		mutex_unlock(&root->log_mutex);
-		if (root->fs_info->last_trans_log_full_commit !+		if
(atomic64_read(&root->fs_info->last_trans_log_full_commit) ! 		   
trans->transid && atomic_read(&root->log_writers))
 			schedule();
 		mutex_lock(&root->log_mutex);
@@ -2306,7 +2306,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	}
 
 	/* bail out if we need to do a full commit */
-	if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+	if (atomic64_read(&root->fs_info->last_trans_log_full_commit) =+	   
trans->transid) {
 		ret = -EAGAIN;
 		mutex_unlock(&root->log_mutex);
 		goto out;
@@ -2361,7 +2362,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 			mutex_unlock(&log_root_tree->log_mutex);
 			goto out;
 		}
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		atomic64_set(&root->fs_info->last_trans_log_full_commit,
+			     trans->transid);
 		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
@@ -2390,7 +2392,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	 * now that we''ve moved on to the tree of log tree roots,
 	 * check the full commit flag again
 	 */
-	if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+	if (atomic64_read(&root->fs_info->last_trans_log_full_commit) =+	   
trans->transid) {
 		btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
@@ -2614,7 +2617,8 @@ fail:
 out_unlock:
 	mutex_unlock(&BTRFS_I(dir)->log_mutex);
 	if (ret == -ENOSPC) {
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		atomic64_set(&root->fs_info->last_trans_log_full_commit,
+			     trans->transid);
 		ret = 0;
 	} else if (ret < 0)
 		btrfs_abort_transaction(trans, root, ret);
@@ -2647,7 +2651,8 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle
*trans,
 				  dirid, &index);
 	mutex_unlock(&BTRFS_I(inode)->log_mutex);
 	if (ret == -ENOSPC) {
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		atomic64_set(&root->fs_info->last_trans_log_full_commit,
+			     trans->transid);
 		ret = 0;
 	} else if (ret < 0 && ret != -ENOENT)
 		btrfs_abort_transaction(trans, root, ret);
@@ -3708,8 +3713,8 @@ static noinline int check_parent_dirs_for_sync(struct
btrfs_trans_handle *trans,
 			 * make sure any commits to the log are forced
 			 * to be full commits
 			 */
-			root->fs_info->last_trans_log_full_commit -				trans->transid;
+			atomic64_set(&root->fs_info->last_trans_log_full_commit,
+				trans->transid);
 			ret = 1;
 			break;
 		}
@@ -3755,7 +3760,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle
*trans,
 		goto end_no_trans;
 	}
 
-	if (root->fs_info->last_trans_log_full_commit >
+	if (atomic64_read(&root->fs_info->last_trans_log_full_commit) >
 	    atomic64_read(&root->fs_info->last_trans_committed)) {
 		ret = 1;
 		goto end_no_trans;
@@ -3825,7 +3830,8 @@ end_trans:
 	dput(old_parent);
 	if (ret < 0) {
 		WARN_ON(ret != -ENOSPC);
-		root->fs_info->last_trans_log_full_commit = trans->transid;
+		atomic64_set(&root->fs_info->last_trans_log_full_commit,
+			     trans->transid);
 		ret = 1;
 	}
 	btrfs_end_log_trans(root);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:05 UTC

head link

[PATCH V2 04/10] Btrfs: add a comment for fs_info->max_inline

Though ->max_inline is a 64bit variant, and may be accessed by
multi-task, but it is just suggestive number, so we needn''t add
anything to protect fs_info->max_inline, just add a comment to
explain wny we don''t use a lock to protect it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear.
---
 fs/btrfs/ctree.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 745e7ad..3e672916 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1288,6 +1288,12 @@ struct btrfs_fs_info {
 	atomic64_t last_trans_log_full_commit;
 	unsigned long mount_opt;
 	unsigned long compress_type:4;
+	/*
+	 * It is a suggestive number, the read side is safe even it gets a
+	 * wrong number because we will write out the data into a regular
+	 * extent. The write side(mount/remount) is under ->s_umount lock,
+	 * so it is also safe.
+	 */
 	u64 max_inline;
 	u64 alloc_start;
 	struct btrfs_transaction *running_transaction;
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:07 UTC

head link

[PATCH V2 05/10] Btrfs: protect fs_info->alloc_start

fs_info->alloc_start is a 64bits variant, can be accessed by
multi-task, but it is not protected strictly, it can be changed
while we are accessing it. On 32bit machine, we will get wrong
value because we access it by two instructions.(In fact, it is
also possible that the same problem happens on the 64bit machine,
because the compiler may split the 64bit operation into two 32bit
operation.)

For example:
Assuming -> alloc_start is 0x0000 0000 0001 0000 at the beginning,
then we remount and set ->alloc_start to 0x0000 0100 0000 0000.
	Task0 			Task1
				load high 32 bits
	set high 32 bits
	set low 32 bits
				load low 32 bits

Task1 will get 0.

This patch fixes this problem by using two locks to protect it
	fs_info->chunk_mutex
	sb->s_umount
On the read side, we just need get one of these two locks, and on
the write side, we must lock all of them.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear and stringency.
---
 fs/btrfs/ctree.h | 10 ++++++++++
 fs/btrfs/super.c |  4 ++++
 2 files changed, 14 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3e672916..201be7d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1295,6 +1295,16 @@ struct btrfs_fs_info {
 	 * so it is also safe.
 	 */
 	u64 max_inline;
+	/*
+	 * Protected by ->chunk_mutex and sb->s_umount.
+	 *
+	 * The reason that we use two lock to protect it is because only
+	 * remount and mount operations can change it and these two operations
+	 * are under sb->s_umount, but the read side (chunk allocation) can not
+	 * acquire sb->s_umount or the deadlock would happen. So we use two
+	 * locks to protect it. On the write side, we must acquire two locks,
+	 * and on the read side, we just need acquire one of them.
+	 */
 	u64 alloc_start;
 	struct btrfs_transaction *running_transaction;
 	wait_queue_head_t transaction_throttle;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d8982e9..c96f132 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -519,7 +519,9 @@ int btrfs_parse_options(struct btrfs_root *root, char
*options)
 		case Opt_alloc_start:
 			num = match_strdup(&args[0]);
 			if (num) {
+				mutex_lock(&info->chunk_mutex);
 				info->alloc_start = memparse(num, NULL);
+				mutex_unlock(&info->chunk_mutex);
 				kfree(num);
 				printk(KERN_INFO
 					"btrfs: allocations start at %llu\n",
@@ -1289,7 +1291,9 @@ restore:
 	fs_info->mount_opt = old_opts;
 	fs_info->compress_type = old_compress_type;
 	fs_info->max_inline = old_max_inline;
+	mutex_lock(&fs_info->chunk_mutex);
 	fs_info->alloc_start = old_alloc_start;
+	mutex_unlock(&fs_info->chunk_mutex);
 	btrfs_resize_thread_pool(fs_info,
 		old_thread_pool_size, fs_info->thread_pool_size);
 	fs_info->metadata_ratio = old_metadata_ratio;
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:09 UTC

head link

[PATCH V2 06/10] Btrfs: use percpu counter for dirty metadata count

->dirty_metadata_bytes is accessed very frequently, so use percpu
counter instead of the u64 variant to reduce the contention of
the lock.

This patch also fixed the problem that we access it without
lock protection in __btrfs_btree_balance_dirty(), which may
cause we skip the dirty pages flush.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear and stringency.
---
 fs/btrfs/ctree.h     |  9 ++++----
 fs/btrfs/disk-io.c   | 64 ++++++++++++++++++++++++++++------------------------
 fs/btrfs/extent_io.c |  9 +++-----
 3 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 201be7d..1dcbbfd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
 /* ioprio of readahead is set to idle */
 #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
 
+#define BTRFS_DIRTY_METADATA_THRESH	(32 * 1024 * 1024)
+
 /*
  * The key defines the order in the tree, and so it also defines (optimal)
  * block layout.
@@ -1439,10 +1441,9 @@ struct btrfs_fs_info {
 
 	u64 total_pinned;
 
-	/* protected by the delalloc lock, used to keep from writing
-	 * metadata until there is a nice batch
-	 */
-	u64 dirty_metadata_bytes;
+	/* used to keep from writing metadata until there is a nice batch */
+	struct percpu_counter dirty_metadata_bytes;
+	s32 dirty_metadata_batch;
 	struct list_head dirty_cowonly_roots;
 
 	struct btrfs_fs_devices *fs_devices;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 87ed05a..961ac58 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -946,18 +946,20 @@ static int btree_writepages(struct address_space *mapping,
 			    struct writeback_control *wbc)
 {
 	struct extent_io_tree *tree;
+	struct btrfs_fs_info *fs_info;
+	int ret;
+
 	tree = &BTRFS_I(mapping->host)->io_tree;
 	if (wbc->sync_mode == WB_SYNC_NONE) {
-		struct btrfs_root *root = BTRFS_I(mapping->host)->root;
-		u64 num_dirty;
-		unsigned long thresh = 32 * 1024 * 1024;
 
 		if (wbc->for_kupdate)
 			return 0;
 
+		fs_info = BTRFS_I(mapping->host)->root->fs_info;
 		/* this is a bit racy, but that''s ok */
-		num_dirty = root->fs_info->dirty_metadata_bytes;
-		if (num_dirty < thresh)
+		ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+					     BTRFS_DIRTY_METADATA_THRESH);
+		if (ret < 0)
 			return 0;
 	}
 	return btree_write_cache_pages(mapping, wbc);
@@ -1125,24 +1127,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root
*root, u64 bytenr,
 void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root,
 		      struct extent_buffer *buf)
 {
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
 	if (btrfs_header_generation(buf) =-	   
root->fs_info->running_transaction->transid) {
+	    fs_info->running_transaction->transid) {
 		btrfs_assert_tree_locked(buf);
 
 		if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
-			spin_lock(&root->fs_info->delalloc_lock);
-			if (root->fs_info->dirty_metadata_bytes >= buf->len)
-				root->fs_info->dirty_metadata_bytes -= buf->len;
-			else {
-				spin_unlock(&root->fs_info->delalloc_lock);
-				btrfs_panic(root->fs_info, -EOVERFLOW,
-					  "Can''t clear %lu bytes from "
-					  " dirty_mdatadata_bytes (%llu)",
-					  buf->len,
-					  root->fs_info->dirty_metadata_bytes);
-			}
-			spin_unlock(&root->fs_info->delalloc_lock);
-
+			__percpu_counter_add(&fs_info->dirty_metadata_bytes,
+					     -buf->len,
+					     fs_info->dirty_metadata_batch);
 			/* ugh, clear_extent_buffer_dirty needs to lock the page */
 			btrfs_set_lock_blocking(buf);
 			clear_extent_buffer_dirty(buf);
@@ -2004,10 +1998,18 @@ int open_ctree(struct super_block *sb,
 		goto fail_srcu;
 	}
 
+	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+	if (ret) {
+		err = ret;
+		goto fail_bdi;
+	}
+	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
+					(1 + ilog2(nr_cpu_ids));
+
 	fs_info->btree_inode = new_inode(sb);
 	if (!fs_info->btree_inode) {
 		err = -ENOMEM;
-		goto fail_bdi;
+		goto fail_dirty_metadata_bytes;
 	}
 
 	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2261,6 +2263,7 @@ int open_ctree(struct super_block *sb,
 	leafsize = btrfs_super_leafsize(disk_super);
 	sectorsize = btrfs_super_sectorsize(disk_super);
 	stripesize = btrfs_super_stripesize(disk_super);
+	fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
 
 	/*
 	 * mixed block groups end up with duplicate but slightly offset
@@ -2723,6 +2726,8 @@ fail_iput:
 
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	iput(fs_info->btree_inode);
+fail_dirty_metadata_bytes:
+	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
 fail_bdi:
 	bdi_destroy(&fs_info->bdi);
 fail_srcu:
@@ -3401,6 +3406,7 @@ int close_ctree(struct btrfs_root *root)
 	btrfs_close_devices(fs_info->fs_devices);
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
+	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
 	bdi_destroy(&fs_info->bdi);
 	cleanup_srcu_struct(&fs_info->subvol_srcu);
 
@@ -3443,11 +3449,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 			(unsigned long long)transid,
 			(u64)atomic64_read(&root->fs_info->generation));
 	was_dirty = set_extent_buffer_dirty(buf);
-	if (!was_dirty) {
-		spin_lock(&root->fs_info->delalloc_lock);
-		root->fs_info->dirty_metadata_bytes += buf->len;
-		spin_unlock(&root->fs_info->delalloc_lock);
-	}
+	if (!was_dirty)
+		__percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
+				     buf->len,
+				     root->fs_info->dirty_metadata_batch);
 }
 
 static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@@ -3457,8 +3462,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root
*root,
 	 * looks as though older kernels can get into trouble with
 	 * this code, they end up stuck in balance_dirty_pages forever
 	 */
-	u64 num_dirty;
-	unsigned long thresh = 32 * 1024 * 1024;
+	int ret;
 
 	if (current->flags & PF_MEMALLOC)
 		return;
@@ -3466,9 +3470,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root
*root,
 	if (flush_delayed)
 		btrfs_balance_delayed_items(root);
 
-	num_dirty = root->fs_info->dirty_metadata_bytes;
-
-	if (num_dirty > thresh) {
+	ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
+				     BTRFS_DIRTY_METADATA_THRESH);
+	if (ret > 0) {
 		balance_dirty_pages_ratelimited_nr(
 				   root->fs_info->btree_inode->i_mapping, 1);
 	}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1b319df..2d4bdfd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3124,12 +3124,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer
*eb,
 		set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
 		spin_unlock(&eb->refs_lock);
 		btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
-		spin_lock(&fs_info->delalloc_lock);
-		if (fs_info->dirty_metadata_bytes >= eb->len)
-			fs_info->dirty_metadata_bytes -= eb->len;
-		else
-			WARN_ON(1);
-		spin_unlock(&fs_info->delalloc_lock);
+		__percpu_counter_add(&fs_info->dirty_metadata_bytes,
+				     -eb->len,
+				     fs_info->dirty_metadata_batch);
 		ret = 1;
 	} else {
 		spin_unlock(&eb->refs_lock);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:10 UTC

head link

[PATCH V2 07/10] Btrfs: use percpu counter for fs_info->delalloc_bytes

fs_info->delalloc_bytes is accessed very frequently, so use percpu
counter instead of the u64 variant for it to reduce the lock
contention.

This patch also fixed the problem that we access the variant
without the lock protection.At worst, we would not flush the
delalloc inodes, and just return ENOSPC error when we still have
some free space in the fs.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear and stringency.
---
 fs/btrfs/ctree.h       |  7 ++++---
 fs/btrfs/disk-io.c     | 18 ++++++++++++++----
 fs/btrfs/extent-tree.c |  6 ++++--
 fs/btrfs/inode.c       |  6 ++++--
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1dcbbfd..51515a3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1383,6 +1383,7 @@ struct btrfs_fs_info {
 	 */
 	struct list_head ordered_extents;
 
+	spinlock_t delalloc_lock;
 	/*
 	 * all of the inodes that have delalloc bytes.  It is possible for
 	 * this list to be empty even when there is still dirty data=ordered
@@ -1443,7 +1444,10 @@ struct btrfs_fs_info {
 
 	/* used to keep from writing metadata until there is a nice batch */
 	struct percpu_counter dirty_metadata_bytes;
+	struct percpu_counter delalloc_bytes;
 	s32 dirty_metadata_batch;
+	s32 delalloc_batch;
+
 	struct list_head dirty_cowonly_roots;
 
 	struct btrfs_fs_devices *fs_devices;
@@ -1459,9 +1463,6 @@ struct btrfs_fs_info {
 
 	struct reloc_control *reloc_ctl;
 
-	spinlock_t delalloc_lock;
-	u64 delalloc_bytes;
-
 	/* data_alloc_cluster is only used in ssd mode */
 	struct btrfs_free_cluster data_alloc_cluster;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 961ac58..29d52af 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2006,10 +2006,16 @@ int open_ctree(struct super_block *sb,
 	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
 					(1 + ilog2(nr_cpu_ids));
 
+	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+	if (ret) {
+		err = ret;
+		goto fail_dirty_metadata_bytes;
+	}
+
 	fs_info->btree_inode = new_inode(sb);
 	if (!fs_info->btree_inode) {
 		err = -ENOMEM;
-		goto fail_dirty_metadata_bytes;
+		goto fail_delalloc_bytes;
 	}
 
 	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2264,6 +2270,7 @@ int open_ctree(struct super_block *sb,
 	sectorsize = btrfs_super_sectorsize(disk_super);
 	stripesize = btrfs_super_stripesize(disk_super);
 	fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
+	fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
 
 	/*
 	 * mixed block groups end up with duplicate but slightly offset
@@ -2726,6 +2733,8 @@ fail_iput:
 
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	iput(fs_info->btree_inode);
+fail_delalloc_bytes:
+	percpu_counter_destroy(&fs_info->delalloc_bytes);
 fail_dirty_metadata_bytes:
 	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
 fail_bdi:
@@ -3357,9 +3366,9 @@ int close_ctree(struct btrfs_root *root)
 
 	btrfs_free_qgroup_config(root->fs_info);
 
-	if (fs_info->delalloc_bytes) {
-		printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
-		       (unsigned long long)fs_info->delalloc_bytes);
+	if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
+		printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
+		       percpu_counter_sum(&fs_info->delalloc_bytes));
 	}
 
 	free_extent_buffer(fs_info->extent_root->node);
@@ -3407,6 +3416,7 @@ int close_ctree(struct btrfs_root *root)
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
 	percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
+	percpu_counter_destroy(&fs_info->delalloc_bytes);
 	bdi_destroy(&fs_info->bdi);
 	cleanup_srcu_struct(&fs_info->subvol_srcu);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ef61a4a..f4f0b1e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3724,7 +3724,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64
to_reclaim, u64 orig,
 	space_info = block_rsv->space_info;
 
 	smp_mb();
-	delalloc_bytes = root->fs_info->delalloc_bytes;
+	delalloc_bytes = percpu_counter_sum_positive(
+						&root->fs_info->delalloc_bytes);
 	if (delalloc_bytes == 0) {
 		if (trans)
 			return;
@@ -3766,7 +3767,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64
to_reclaim, u64 orig,
 				break;
 		}
 		smp_mb();
-		delalloc_bytes = root->fs_info->delalloc_bytes;
+		delalloc_bytes = percpu_counter_sum_positive(
+						&root->fs_info->delalloc_bytes);
 	}
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 803be87..d98b508 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1510,7 +1510,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
 
 		spin_lock(&root->fs_info->delalloc_lock);
 		BTRFS_I(inode)->delalloc_bytes += len;
-		root->fs_info->delalloc_bytes += len;
+		__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
+				     root->fs_info->delalloc_batch);
 		if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
 			list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
 				      &root->fs_info->delalloc_inodes);
@@ -1551,7 +1552,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 			btrfs_free_reserved_data_space(inode, len);
 
 		spin_lock(&root->fs_info->delalloc_lock);
-		root->fs_info->delalloc_bytes -= len;
+		__percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
+				     root->fs_info->delalloc_batch);
 		BTRFS_I(inode)->delalloc_bytes -= len;
 
 		if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:11 UTC

head link

[PATCH V2 08/10] Btrfs: use the inode own lock to protect its delalloc_bytes

We need not use a global lock to protect the delalloc_bytes of the
inode, just use its own lock. In this way, we can reduce the lock
contention and ->delalloc_lock will just protect delalloc inode
list.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- none.
---
 fs/btrfs/btrfs_inode.h |  1 +
 fs/btrfs/disk-io.c     |  2 ++
 fs/btrfs/inode.c       | 47 ++++++++++++++++++++++++++++++++++-------------
 3 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 2a8c242..c935a77 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -40,6 +40,7 @@
 #define BTRFS_INODE_HAS_ASYNC_EXTENT		6
 #define BTRFS_INODE_NEEDS_FULL_SYNC		7
 #define BTRFS_INODE_COPY_EVERYTHING		8
+#define BTRFS_INODE_IN_DELALLOC_LIST		9
 
 /* in memory btrfs inode */
 struct btrfs_inode {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 29d52af..abf1f10 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3685,6 +3685,8 @@ static void btrfs_destroy_delalloc_inodes(struct
btrfs_root *root)
 				    delalloc_inodes);
 
 		list_del_init(&btrfs_inode->delalloc_inodes);
+		clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+			  &btrfs_inode->runtime_flags);
 
 		btrfs_invalidate_inodes(btrfs_inode->root);
 	}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d98b508..4f92b35 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1508,15 +1508,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
 			spin_unlock(&BTRFS_I(inode)->lock);
 		}
 
-		spin_lock(&root->fs_info->delalloc_lock);
-		BTRFS_I(inode)->delalloc_bytes += len;
 		__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
 				     root->fs_info->delalloc_batch);
-		if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-			list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
-				      &root->fs_info->delalloc_inodes);
+		spin_lock(&BTRFS_I(inode)->lock);
+		BTRFS_I(inode)->delalloc_bytes += len;
+		if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+					 &BTRFS_I(inode)->runtime_flags)) {
+			spin_lock(&root->fs_info->delalloc_lock);
+			if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+				list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
+					      &root->fs_info->delalloc_inodes);
+				set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+					&BTRFS_I(inode)->runtime_flags);
+			}
+			spin_unlock(&root->fs_info->delalloc_lock);
 		}
-		spin_unlock(&root->fs_info->delalloc_lock);
+		spin_unlock(&BTRFS_I(inode)->lock);
 	}
 }
 
@@ -1551,16 +1558,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		    && do_list)
 			btrfs_free_reserved_data_space(inode, len);
 
-		spin_lock(&root->fs_info->delalloc_lock);
 		__percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
 				     root->fs_info->delalloc_batch);
+		spin_lock(&BTRFS_I(inode)->lock);
 		BTRFS_I(inode)->delalloc_bytes -= len;
-
 		if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
-		    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-			list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+		    test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+			     &BTRFS_I(inode)->runtime_flags)) {
+			spin_lock(&root->fs_info->delalloc_lock);
+			if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+				list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+				clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+					  &BTRFS_I(inode)->runtime_flags);
+			}
+			spin_unlock(&root->fs_info->delalloc_lock);
 		}
-		spin_unlock(&root->fs_info->delalloc_lock);
+		spin_unlock(&BTRFS_I(inode)->lock);
 	}
 }
 
@@ -7316,14 +7329,19 @@ fail:
 static int btrfs_getattr(struct vfsmount *mnt,
 			 struct dentry *dentry, struct kstat *stat)
 {
+	u64 delalloc_bytes;
 	struct inode *inode = dentry->d_inode;
 	u32 blocksize = inode->i_sb->s_blocksize;
 
 	generic_fillattr(inode, stat);
 	stat->dev = BTRFS_I(inode)->root->anon_dev;
 	stat->blksize = PAGE_CACHE_SIZE;
+
+	spin_lock(&BTRFS_I(inode)->lock);
+	delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
+	spin_unlock(&BTRFS_I(inode)->lock);
 	stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
-		ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
+			ALIGN(delalloc_bytes, blocksize)) >> 9;
 	return 0;
 }
 
@@ -7611,8 +7629,11 @@ again:
 		list_del_init(&binode->delalloc_inodes);
 
 		inode = igrab(&binode->vfs_inode);
-		if (!inode)
+		if (!inode) {
+			clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+				  &binode->runtime_flags);
 			continue;
+		}
 
 		list_add_tail(&binode->delalloc_inodes,
 			      &root->fs_info->delalloc_inodes);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:13 UTC

head link

[PATCH V2 09/10] Btrfs: use seqlock to protect fs_info->avail_{data, metadata, system}_alloc_bits

There is no lock to protect
  fs_info->avail_{data, metadata, system}_alloc_bits,
it may introduce some problem, such as the wrong profile
information, so we add a seqlock to protect them.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- none.
---
 fs/btrfs/ctree.h       |  2 ++
 fs/btrfs/disk-io.c     |  1 +
 fs/btrfs/extent-tree.c | 22 ++++++++++++++------
 fs/btrfs/volumes.c     | 56 +++++++++++++++++++++++++++-----------------------
 4 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 51515a3..c95b539 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1474,6 +1474,8 @@ struct btrfs_fs_info {
 	struct rb_root defrag_inodes;
 	atomic_t defrag_running;
 
+	/* Used to protect avail_{data, metadata, system}_alloc_bits */
+	seqlock_t profiles_lock;
 	/*
 	 * these three are in extended format (availability of single
 	 * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index abf1f10..a7797ed 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2036,6 +2036,7 @@ int open_ctree(struct super_block *sb,
 	spin_lock_init(&fs_info->tree_mod_seq_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
 	mutex_init(&fs_info->reloc_mutex);
+	seqlock_init(&fs_info->profiles_lock);
 
 	init_completion(&fs_info->kobj_unregister);
 	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f4f0b1e..bbbfa72 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3223,12 +3223,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info
*fs_info, u64 flags)
 	u64 extra_flags = chunk_to_extended(flags) &
 				BTRFS_EXTENDED_PROFILE_MASK;
 
+	write_seqlock(&fs_info->profiles_lock);
 	if (flags & BTRFS_BLOCK_GROUP_DATA)
 		fs_info->avail_data_alloc_bits |= extra_flags;
 	if (flags & BTRFS_BLOCK_GROUP_METADATA)
 		fs_info->avail_metadata_alloc_bits |= extra_flags;
 	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
 		fs_info->avail_system_alloc_bits |= extra_flags;
+	write_sequnlock(&fs_info->profiles_lock);
 }
 
 /*
@@ -3320,12 +3322,18 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root,
u64 flags)
 
 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
 {
-	if (flags & BTRFS_BLOCK_GROUP_DATA)
-		flags |= root->fs_info->avail_data_alloc_bits;
-	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
-		flags |= root->fs_info->avail_system_alloc_bits;
-	else if (flags & BTRFS_BLOCK_GROUP_METADATA)
-		flags |= root->fs_info->avail_metadata_alloc_bits;
+	unsigned seq;
+
+	do {
+		seq = read_seqbegin(&root->fs_info->profiles_lock);
+
+		if (flags & BTRFS_BLOCK_GROUP_DATA)
+			flags |= root->fs_info->avail_data_alloc_bits;
+		else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+			flags |= root->fs_info->avail_system_alloc_bits;
+		else if (flags & BTRFS_BLOCK_GROUP_METADATA)
+			flags |= root->fs_info->avail_metadata_alloc_bits;
+	} while (read_seqretry(&root->fs_info->profiles_lock, seq));
 
 	return btrfs_reduce_alloc_profile(root, flags);
 }
@@ -7937,12 +7945,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info
*fs_info, u64 flags)
 	u64 extra_flags = chunk_to_extended(flags) &
 				BTRFS_EXTENDED_PROFILE_MASK;
 
+	write_seqlock(&fs_info->profiles_lock);
 	if (flags & BTRFS_BLOCK_GROUP_DATA)
 		fs_info->avail_data_alloc_bits &= ~extra_flags;
 	if (flags & BTRFS_BLOCK_GROUP_METADATA)
 		fs_info->avail_metadata_alloc_bits &= ~extra_flags;
 	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
 		fs_info->avail_system_alloc_bits &= ~extra_flags;
+	write_sequnlock(&fs_info->profiles_lock);
 }
 
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 15f6efd..65f22c2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1372,14 +1372,19 @@ int btrfs_rm_device(struct btrfs_root *root, char
*device_path)
 	u64 devid;
 	u64 num_devices;
 	u8 *dev_uuid;
+	unsigned seq;
 	int ret = 0;
 	bool clear_super = false;
 
 	mutex_lock(&uuid_mutex);
 
-	all_avail = root->fs_info->avail_data_alloc_bits |
-		root->fs_info->avail_system_alloc_bits |
-		root->fs_info->avail_metadata_alloc_bits;
+	do {
+		seq = read_seqbegin(&root->fs_info->profiles_lock);
+
+		all_avail = root->fs_info->avail_data_alloc_bits |
+			    root->fs_info->avail_system_alloc_bits |
+			    root->fs_info->avail_metadata_alloc_bits;
+	} while (read_seqretry(&root->fs_info->profiles_lock, seq));
 
 	num_devices = root->fs_info->fs_devices->num_devices;
 	btrfs_dev_replace_lock(&root->fs_info->dev_replace);
@@ -2984,6 +2989,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 	int mixed = 0;
 	int ret;
 	u64 num_devices;
+	unsigned seq;
 
 	if (btrfs_fs_closing(fs_info) ||
 	    atomic_read(&fs_info->balance_pause_req) ||
@@ -3067,22 +3073,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 	/* allow to reduce meta or sys integrity only if force set */
 	allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
 			BTRFS_BLOCK_GROUP_RAID10;
-	if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-	     (fs_info->avail_system_alloc_bits & allowed) &&
-	     !(bctl->sys.target & allowed)) ||
-	    ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-	     (fs_info->avail_metadata_alloc_bits & allowed) &&
-	     !(bctl->meta.target & allowed))) {
-		if (bctl->flags & BTRFS_BALANCE_FORCE) {
-			printk(KERN_INFO "btrfs: force reducing metadata "
-			       "integrity\n");
-		} else {
-			printk(KERN_ERR "btrfs: balance will reduce metadata "
-			       "integrity, use force if you want this\n");
-			ret = -EINVAL;
-			goto out;
+	do {
+		seq = read_seqbegin(&fs_info->profiles_lock);
+
+		if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+		     (fs_info->avail_system_alloc_bits & allowed) &&
+		     !(bctl->sys.target & allowed)) ||
+		    ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+		     (fs_info->avail_metadata_alloc_bits & allowed) &&
+		     !(bctl->meta.target & allowed))) {
+			if (bctl->flags & BTRFS_BALANCE_FORCE) {
+				printk(KERN_INFO "btrfs: force reducing metadata "
+				       "integrity\n");
+			} else {
+				printk(KERN_ERR "btrfs: balance will reduce metadata "
+				       "integrity, use force if you want this\n");
+				ret = -EINVAL;
+				goto out;
+			}
 		}
-	}
+	} while (read_seqretry(&fs_info->profiles_lock, seq));
 
 	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
 		int num_tolerated_disk_barrier_failures;
@@ -3886,10 +3896,7 @@ static noinline int init_first_rw_device(struct
btrfs_trans_handle *trans,
 	if (ret)
 		return ret;
 
-	alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
-				fs_info->avail_metadata_alloc_bits;
-	alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
+	alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
 	ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
 				  &stripe_size, chunk_offset, alloc_profile);
 	if (ret)
@@ -3897,10 +3904,7 @@ static noinline int init_first_rw_device(struct
btrfs_trans_handle *trans,
 
 	sys_chunk_offset = chunk_offset + chunk_size;
 
-	alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
-				fs_info->avail_system_alloc_bits;
-	alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
+	alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
 	ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
 				  &sys_chunk_size, &sys_stripe_size,
 				  sys_chunk_offset, alloc_profile);
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Miao Xie

2013-Jan-29 10:14 UTC

head link

[PATCH V2 10/10] Btrfs: use bit operation for ->fs_state

There is no lock to protect fs_info->fs_state, it will introduce
some problems, such as the value may be covered by the other task
when several tasks modify it. For example:
	Task0 - CPU0		Task1 - CPU1
	mov %fs_state rax
	or $0x1 rax
				mov %fs_state rax
				or $0x2 rax
	mov rax %fs_state
				mov rax %fs_state
The expected value is 3, but in fact, it is 2.

Though this problem doesn''t happen now (because there is only one
flag currently), the code is error prone, if we add other flags,
the above problem will happen to a certainty.

Now we use bit operation for it to fix the above problem.
In this way, we can make the code more robust and be easy to
add new flags.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
Changelog v1 -> v2:
- modify the changelog and make it more clear and stringency.
---
 fs/btrfs/ctree.h       | 4 +++-
 fs/btrfs/disk-io.c     | 5 +++--
 fs/btrfs/file.c        | 2 +-
 fs/btrfs/scrub.c       | 2 +-
 fs/btrfs/super.c       | 4 ++--
 fs/btrfs/transaction.c | 9 ++++-----
 6 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c95b539..c34e36e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -338,7 +338,9 @@ static inline unsigned long btrfs_chunk_item_size(int
num_stripes)
 /*
  * File system states
  */
+#define BTRFS_FS_STATE_ERROR		0
 
+/* Super block flags */
 /* Errors detected */
 #define BTRFS_SUPER_FLAG_ERROR		(1ULL << 2)
 
@@ -1540,7 +1542,7 @@ struct btrfs_fs_info {
 	u64 qgroup_seq;
 
 	/* filesystem state */
-	u64 fs_state;
+	unsigned long fs_state;
 
 	struct btrfs_delayed_root *delayed_root;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a7797ed..caf329c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2196,7 +2196,8 @@ int open_ctree(struct super_block *sb,
 		goto fail_alloc;
 
 	/* check FS state, whether FS is broken. */
-	fs_info->fs_state |= btrfs_super_flags(disk_super);
+	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
+		set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 
 	ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
 	if (ret) {
@@ -3354,7 +3355,7 @@ int close_ctree(struct btrfs_root *root)
 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
 	}
 
-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
 		btrfs_error_commit_super(root);
 
 	btrfs_put_block_group_cache(fs_info);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 910ea99..796fd79 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1531,7 +1531,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	 * although we have opened a file as writable, we have
 	 * to stop this write operation to ensure FS consistency.
 	 */
-	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
 		mutex_unlock(&inode->i_mutex);
 		err = -EROFS;
 		goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index af0b566..2e91b56 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2700,7 +2700,7 @@ static noinline_for_stack int scrub_supers(struct
scrub_ctx *sctx,
 	int	ret;
 	struct btrfs_root *root = sctx->dev_root;
 
-	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
 		return -EIO;
 
 	gen = atomic64_read(&root->fs_info->last_trans_committed);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c96f132..6528482 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -98,7 +98,7 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
 	 * today we only save the error info into ram.  Long term we''ll
 	 * also send it down to the disk
 	 */
-	fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
+	set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
 }
 
 static void save_error_info(struct btrfs_fs_info *fs_info)
@@ -114,7 +114,7 @@ static void btrfs_handle_error(struct btrfs_fs_info
*fs_info)
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 		sb->s_flags |= MS_RDONLY;
 		printk(KERN_INFO "btrfs is forced readonly\n");
 		/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 29fdf1c..50437b4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,7 +62,7 @@ static noinline int join_transaction(struct btrfs_root *root,
int type)
 	spin_lock(&fs_info->trans_lock);
 loop:
 	/* The file system has been taken offline. No new transactions. */
-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 		spin_unlock(&fs_info->trans_lock);
 		return -EROFS;
 	}
@@ -114,7 +114,7 @@ loop:
 		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
 		cur_trans = fs_info->running_transaction;
 		goto loop;
-	} else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+	} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 		spin_unlock(&fs_info->trans_lock);
 		kmem_cache_free(btrfs_transaction_cachep, cur_trans);
 		return -EROFS;
@@ -302,7 +302,7 @@ start_transaction(struct btrfs_root *root, u64 num_items,
int type,
 	int ret;
 	u64 qgroup_reserved = 0;
 
-	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
 		return ERR_PTR(-EROFS);
 
 	if (current->journal_info) {
@@ -635,9 +635,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle
*trans,
 		btrfs_run_delayed_iputs(root);
 
 	if (trans->aborted ||
-	    root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+	    test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
 		err = -EIO;
-	}
 	assert_qgroups_uptodate(trans);
 
 	memset(trans, 0, sizeof(*trans));
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Btrfs devel - Jan 2013 - [PATCH V2 01/10] Btrfs: use atomic for btrfs_fs_info->generation

[PATCH V2 01/10] Btrfs: use atomic for btrfs_fs_info->generation

[PATCH V2 02/10] Btrfs: use atomic for fs_info->last_trans_committed

[PATCH V2 03/10] Btrfs: use atomic for fs_info->last_trans_log_full_commit

[PATCH V2 04/10] Btrfs: add a comment for fs_info->max_inline

[PATCH V2 05/10] Btrfs: protect fs_info->alloc_start

[PATCH V2 06/10] Btrfs: use percpu counter for dirty metadata count

[PATCH V2 07/10] Btrfs: use percpu counter for fs_info->delalloc_bytes

[PATCH V2 08/10] Btrfs: use the inode own lock to protect its delalloc_bytes

[PATCH V2 09/10] Btrfs: use seqlock to protect fs_info->avail_{data, metadata, system}_alloc_bits

[PATCH V2 10/10] Btrfs: use bit operation for ->fs_state