thr3ads.net - Btrfs devel - [PATCH v2 0/3] Btrfs: quota rescan for 3.10 [Apr 2013]

If this information is useful, please help other people find it:
Share via:

Jan Schmidt

2013-Apr-16 08:45 UTC

[PATCH v2 0/3] Btrfs: quota rescan for 3.10

The kernel side for rescan, which is needed if you want to enable qgroup
tracking on a non-empty volume. The first patch splits
btrfs_qgroup_account_ref into readable ans reusable units. The second
patch adds the rescan implementation (refer to its commit message for a
description of the algorithm). The third  patch starts an automatic
rescan when qgroups are enabled. It is only separated to potentially
help bisecting things in case of a problem.

The required user space patch was sent at 2013-04-05, subject "[PATCH]
Btrfs-progs: quota rescan".

--
Changes v1->v2:
- fix calculation of the "exclusive" field for qgroups in level != 0
- split btrfs_qgroup_account_ref
- take into account that mutex_unlock might schedule
- fix kzalloc error checking
- add some reserved ints to struct btrfs_ioctl_quota_rescan_args
- changed modification to unused #define BTRFS_QUOTA_CTL_RESCAN
- added missing (unsigned long long) casts for pr_debug
- more detailed commit messages

Jan Schmidt (3):
  Btrfs: split btrfs_qgroup_account_ref into four functions
  Btrfs: rescan for qgroups
  Btrfs: automatic rescan after "quota enable" command

 fs/btrfs/ctree.h           |   17 +-
 fs/btrfs/disk-io.c         |    6 +
 fs/btrfs/ioctl.c           |   83 ++++++--
 fs/btrfs/qgroup.c          |  517 +++++++++++++++++++++++++++++++++++---------
 include/uapi/linux/btrfs.h |   12 +-
 5 files changed, 509 insertions(+), 126 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jan Schmidt

2013-Apr-16 08:45 UTC

head link

[PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

The function is separated into a preparation part and the three accounting
steps mentioned in the qgroups documentation. The goal is to make steps two
and three usable by the rescan functionality. A side effect is that the
function is restructured into readable subunits.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/qgroup.c |  212 ++++++++++++++++++++++++++++++-----------------------
 1 files changed, 121 insertions(+), 91 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b44124d..c38a0c5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1075,6 +1075,122 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle
*trans,
 	return 0;
 }
 
+static void qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
+				     struct ulist *roots, struct ulist *tmp,
+				     u64 seq)
+{
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct ulist_node *tmp_unode;
+	struct ulist_iterator tmp_uiter;
+	struct btrfs_qgroup *qg;
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(roots, &uiter))) {
+		qg = find_qgroup_rb(fs_info, unode->val);
+		if (!qg)
+			continue;
+
+		ulist_reinit(tmp);
+						/* XXX id not needed */
+		ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
+		ULIST_ITER_INIT(&tmp_uiter);
+		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+			struct btrfs_qgroup_list *glist;
+
+			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+			if (qg->refcnt < seq)
+				qg->refcnt = seq + 1;
+			else
+				++qg->refcnt;
+
+			list_for_each_entry(glist, &qg->groups, next_group) {
+				ulist_add(tmp, glist->group->qgroupid,
+					  (u64)(uintptr_t)glist->group,
+					  GFP_ATOMIC);
+			}
+		}
+	}
+}
+
+static void qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
+				     struct ulist *roots, struct ulist *tmp,
+				     u64 seq, int sgn, u64 num_bytes,
+				     struct btrfs_qgroup *qgroup)
+{
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct btrfs_qgroup *qg;
+	struct btrfs_qgroup_list *glist;
+
+	ulist_reinit(tmp);
+	ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(tmp, &uiter))) {
+
+		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+		if (qg->refcnt < seq) {
+			/* not visited by step 1 */
+			qg->rfer += sgn * num_bytes;
+			qg->rfer_cmpr += sgn * num_bytes;
+			if (roots->nnodes == 0) {
+				qg->excl += sgn * num_bytes;
+				qg->excl_cmpr += sgn * num_bytes;
+			}
+			qgroup_dirty(fs_info, qg);
+		}
+		WARN_ON(qg->tag >= seq);
+		qg->tag = seq;
+
+		list_for_each_entry(glist, &qg->groups, next_group) {
+			ulist_add(tmp, glist->group->qgroupid,
+				  (uintptr_t)glist->group, GFP_ATOMIC);
+		}
+	}
+}
+
+static void qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
+				     struct ulist *roots, struct ulist *tmp,
+				     u64 seq, int sgn, u64 num_bytes)
+{
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct btrfs_qgroup *qg;
+	struct ulist_node *tmp_unode;
+	struct ulist_iterator tmp_uiter;
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(roots, &uiter))) {
+		qg = find_qgroup_rb(fs_info, unode->val);
+		if (!qg)
+			continue;
+
+		ulist_reinit(tmp);
+		ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
+		ULIST_ITER_INIT(&tmp_uiter);
+		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+			struct btrfs_qgroup_list *glist;
+
+			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+			if (qg->tag == seq)
+				continue;
+
+			if (qg->refcnt - seq == roots->nnodes) {
+				qg->excl -= sgn * num_bytes;
+				qg->excl_cmpr -= sgn * num_bytes;
+				qgroup_dirty(fs_info, qg);
+			}
+
+			list_for_each_entry(glist, &qg->groups, next_group) {
+				ulist_add(tmp, glist->group->qgroupid,
+					  (uintptr_t)glist->group,
+					  GFP_ATOMIC);
+			}
+		}
+	}
+}
+
 /*
  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
  * from the fs. First, all roots referencing the extent are searched, and
@@ -1090,10 +1206,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle
*trans,
 	struct btrfs_root *quota_root;
 	u64 ref_root;
 	struct btrfs_qgroup *qgroup;
-	struct ulist_node *unode;
 	struct ulist *roots = NULL;
 	struct ulist *tmp = NULL;
-	struct ulist_iterator uiter;
 	u64 seq;
 	int ret = 0;
 	int sgn;
@@ -1175,103 +1289,19 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle
*trans,
 	seq = fs_info->qgroup_seq;
 	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
 
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(roots, &uiter))) {
-		struct ulist_node *tmp_unode;
-		struct ulist_iterator tmp_uiter;
-		struct btrfs_qgroup *qg;
-
-		qg = find_qgroup_rb(fs_info, unode->val);
-		if (!qg)
-			continue;
-
-		ulist_reinit(tmp);
-						/* XXX id not needed */
-		ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
-		ULIST_ITER_INIT(&tmp_uiter);
-		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
-			struct btrfs_qgroup_list *glist;
-
-			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-			if (qg->refcnt < seq)
-				qg->refcnt = seq + 1;
-			else
-				++qg->refcnt;
-
-			list_for_each_entry(glist, &qg->groups, next_group) {
-				ulist_add(tmp, glist->group->qgroupid,
-					  (u64)(uintptr_t)glist->group,
-					  GFP_ATOMIC);
-			}
-		}
-	}
+	qgroup_account_ref_step1(fs_info, roots, tmp, seq);
 
 	/*
 	 * step 2: walk from the new root
 	 */
-	ulist_reinit(tmp);
-	ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(tmp, &uiter))) {
-		struct btrfs_qgroup *qg;
-		struct btrfs_qgroup_list *glist;
-
-		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
-		if (qg->refcnt < seq) {
-			/* not visited by step 1 */
-			qg->rfer += sgn * node->num_bytes;
-			qg->rfer_cmpr += sgn * node->num_bytes;
-			if (roots->nnodes == 0) {
-				qg->excl += sgn * node->num_bytes;
-				qg->excl_cmpr += sgn * node->num_bytes;
-			}
-			qgroup_dirty(fs_info, qg);
-		}
-		WARN_ON(qg->tag >= seq);
-		qg->tag = seq;
-
-		list_for_each_entry(glist, &qg->groups, next_group) {
-			ulist_add(tmp, glist->group->qgroupid,
-				  (uintptr_t)glist->group, GFP_ATOMIC);
-		}
-	}
+	qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
+				 node->num_bytes, qgroup);
 
 	/*
 	 * step 3: walk again from old refs
 	 */
-	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(roots, &uiter))) {
-		struct btrfs_qgroup *qg;
-		struct ulist_node *tmp_unode;
-		struct ulist_iterator tmp_uiter;
-
-		qg = find_qgroup_rb(fs_info, unode->val);
-		if (!qg)
-			continue;
-
-		ulist_reinit(tmp);
-		ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
-		ULIST_ITER_INIT(&tmp_uiter);
-		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
-			struct btrfs_qgroup_list *glist;
-
-			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-			if (qg->tag == seq)
-				continue;
-
-			if (qg->refcnt - seq == roots->nnodes) {
-				qg->excl -= sgn * node->num_bytes;
-				qg->excl_cmpr -= sgn * node->num_bytes;
-				qgroup_dirty(fs_info, qg);
-			}
-
-			list_for_each_entry(glist, &qg->groups, next_group) {
-				ulist_add(tmp, glist->group->qgroupid,
-					  (uintptr_t)glist->group,
-					  GFP_ATOMIC);
-			}
-		}
-	}
+	qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
+				 node->num_bytes);
 	ret = 0;
 unlock:
 	spin_unlock(&fs_info->qgroup_lock);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jan Schmidt

2013-Apr-16 08:45 UTC

head link

[PATCH v2 2/3] Btrfs: rescan for qgroups

If qgroup tracking is out of sync, a rescan operation can be started. It
iterates the complete extent tree and recalculates all qgroup tracking data.
This is an expensive operation and should not be used unless required.

A filesystem under rescan can still be umounted. The rescan continues on the
next mount.  Status information is provided with a separate ioctl while a
rescan operation is in progress.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/ctree.h           |   17 ++-
 fs/btrfs/disk-io.c         |    6 +
 fs/btrfs/ioctl.c           |   83 ++++++++++--
 fs/btrfs/qgroup.c          |  295 +++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/btrfs.h |   12 ++-
 5 files changed, 378 insertions(+), 35 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d82922..bd4e2a7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1019,9 +1019,9 @@ struct btrfs_block_group_item {
  */
 #define BTRFS_QGROUP_STATUS_FLAG_ON		(1ULL << 0)
 /*
- * SCANNING is set during the initialization phase
+ * RESCAN is set during the initialization phase
  */
-#define BTRFS_QGROUP_STATUS_FLAG_SCANNING	(1ULL << 1)
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN		(1ULL << 1)
 /*
  * Some qgroup entries are known to be out of date,
  * either because the configuration has changed in a way that
@@ -1050,7 +1050,7 @@ struct btrfs_qgroup_status_item {
 	 * only used during scanning to record the progress
 	 * of the scan. It contains a logical address
 	 */
-	__le64 scan;
+	__le64 rescan;
 } __attribute__ ((__packed__));
 
 struct btrfs_qgroup_info_item {
@@ -1587,6 +1587,11 @@ struct btrfs_fs_info {
 	/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
 	u64 qgroup_seq;
 
+	/* qgroup rescan items */
+	struct mutex qgroup_rescan_lock; /* protects the progress item */
+	struct btrfs_key qgroup_rescan_progress;
+	struct btrfs_workers qgroup_rescan_workers;
+
 	/* filesystem state */
 	unsigned long fs_state;
 
@@ -2864,8 +2869,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct
btrfs_qgroup_status_item,
 		   version, 64);
 BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
 		   flags, 64);
-BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
-		   scan, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+		   rescan, 64);
 
 /* btrfs_qgroup_info_item */
 BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
@@ -3784,7 +3789,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info);
 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info);
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6d19a0a..60d15fe 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2192,6 +2192,7 @@ int open_ctree(struct super_block *sb,
 	fs_info->qgroup_seq = 1;
 	fs_info->quota_enabled = 0;
 	fs_info->pending_quota_state = 0;
+	mutex_init(&fs_info->qgroup_rescan_lock);
 
 	btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
 	btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2394,6 +2395,8 @@ int open_ctree(struct super_block *sb,
 	btrfs_init_workers(&fs_info->readahead_workers, "readahead",
 			   fs_info->thread_pool_size,
 			   &fs_info->generic_worker);
+	btrfs_init_workers(&fs_info->qgroup_rescan_workers,
"qgroup-rescan", 1,
+			   &fs_info->generic_worker);
 
 	/*
 	 * endios are largely parallel and should have a very
@@ -2428,6 +2431,7 @@ int open_ctree(struct super_block *sb,
 	ret |= btrfs_start_workers(&fs_info->caching_workers);
 	ret |= btrfs_start_workers(&fs_info->readahead_workers);
 	ret |= btrfs_start_workers(&fs_info->flush_workers);
+	ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
 	if (ret) {
 		err = -ENOMEM;
 		goto fail_sb_buffer;
@@ -2773,6 +2777,7 @@ fail_sb_buffer:
 	btrfs_stop_workers(&fs_info->delayed_workers);
 	btrfs_stop_workers(&fs_info->caching_workers);
 	btrfs_stop_workers(&fs_info->flush_workers);
+	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
 fail_alloc:
 fail_iput:
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3463,6 +3468,7 @@ int close_ctree(struct btrfs_root *root)
 	btrfs_stop_workers(&fs_info->caching_workers);
 	btrfs_stop_workers(&fs_info->readahead_workers);
 	btrfs_stop_workers(&fs_info->flush_workers);
+	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
 	if (btrfs_test_opt(root, CHECK_INTEGRITY))
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 898c572..17b53df 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3693,12 +3693,10 @@ static long btrfs_ioctl_quota_ctl(struct file *file,
void __user *arg)
 		goto drop_write;
 	}
 
-	if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
-		trans = btrfs_start_transaction(root, 2);
-		if (IS_ERR(trans)) {
-			ret = PTR_ERR(trans);
-			goto out;
-		}
+	trans = btrfs_start_transaction(root, 2);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out;
 	}
 
 	switch (sa->cmd) {
@@ -3708,9 +3706,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void
__user *arg)
 	case BTRFS_QUOTA_CTL_DISABLE:
 		ret = btrfs_quota_disable(trans, root->fs_info);
 		break;
-	case BTRFS_QUOTA_CTL_RESCAN:
-		ret = btrfs_quota_rescan(root->fs_info);
-		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -3719,11 +3714,9 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void
__user *arg)
 	if (copy_to_user(arg, sa, sizeof(*sa)))
 		ret = -EFAULT;
 
-	if (trans) {
-		err = btrfs_commit_transaction(trans, root);
-		if (err && !ret)
-			ret = err;
-	}
+	err = btrfs_commit_transaction(trans, root);
+	if (err && !ret)
+		ret = err;
 out:
 	kfree(sa);
 drop_write:
@@ -3877,6 +3870,64 @@ drop_write:
 	return ret;
 }
 
+static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
+{
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_ioctl_quota_rescan_args *qsa;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	qsa = memdup_user(arg, sizeof(*qsa));
+	if (IS_ERR(qsa)) {
+		ret = PTR_ERR(qsa);
+		goto drop_write;
+	}
+
+	if (qsa->flags) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = btrfs_qgroup_rescan(root->fs_info);
+
+out:
+	kfree(qsa);
+drop_write:
+	mnt_drop_write_file(file);
+	return ret;
+}
+
+static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user
*arg)
+{
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_ioctl_quota_rescan_args *qsa;
+	int ret = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+	if (!qsa)
+		return -ENOMEM;
+
+	if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		qsa->flags = 1;
+		qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
+	}
+
+	if (copy_to_user(arg, qsa, sizeof(*qsa)))
+		ret = -EFAULT;
+
+	kfree(qsa);
+	return ret;
+}
+
 static long btrfs_ioctl_set_received_subvol(struct file *file,
 					    void __user *arg)
 {
@@ -4115,6 +4166,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_qgroup_create(file, argp);
 	case BTRFS_IOC_QGROUP_LIMIT:
 		return btrfs_ioctl_qgroup_limit(file, argp);
+	case BTRFS_IOC_QUOTA_RESCAN:
+		return btrfs_ioctl_quota_rescan(file, argp);
+	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
+		return btrfs_ioctl_quota_rescan_status(file, argp);
 	case BTRFS_IOC_DEV_REPLACE:
 		return btrfs_ioctl_dev_replace(root, argp);
 	case BTRFS_IOC_GET_FSLABEL:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c38a0c5..bb081b5 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
 #include "locking.h"
 #include "ulist.h"
 #include "backref.h"
+#include "extent_io.h"
 
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
  *  - reorganize keys
  *  - compressed
  *  - sync
- *  - rescan
  *  - copy also limits on subvol creation
  *  - limit
  *  - caches fuer ulists
@@ -98,6 +98,14 @@ struct btrfs_qgroup_list {
 	struct btrfs_qgroup *member;
 };
 
+struct qgroup_rescan {
+	struct btrfs_work	work;
+	struct btrfs_fs_info	*fs_info;
+};
+
+static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
+				struct qgroup_rescan *qscan);
+
 /* must be called with qgroup_lock held */
 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
 					   u64 qgroupid)
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
 			}
 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
 									  ptr);
-			/* FIXME read scan element */
+			fs_info->qgroup_rescan_progress.objectid +				
btrfs_qgroup_status_rescan(l, ptr);
+			if (fs_info->qgroup_flags &
+			    BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+				struct qgroup_rescan *qscan +					kmalloc(sizeof(*qscan), GFP_NOFS);
+				if (!qscan) {
+					ret = -ENOMEM;
+					goto out;
+				}
+				fs_info->qgroup_rescan_progress.type = 0;
+				fs_info->qgroup_rescan_progress.offset = 0;
+				qgroup_rescan_start(fs_info, qscan);
+			}
 			goto next1;
 		}
 
@@ -719,9 +740,12 @@ static int update_qgroup_status_item(struct
btrfs_trans_handle *trans,
 	l = path->nodes[0];
 	slot = path->slots[0];
 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
+	spin_lock(&fs_info->qgroup_lock);
 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
-	/* XXX scan */
+	btrfs_set_qgroup_status_rescan(l, ptr,
+				fs_info->qgroup_rescan_progress.objectid);
+	spin_unlock(&fs_info->qgroup_lock);
 
 	btrfs_mark_buffer_dirty(l);
 
@@ -830,7 +854,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
-	btrfs_set_qgroup_status_scan(leaf, ptr, 0);
+	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
 
 	btrfs_mark_buffer_dirty(leaf);
 
@@ -894,10 +918,11 @@ out:
 	return ret;
 }
 
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
+			 struct btrfs_qgroup *qgroup)
 {
-	/* FIXME */
-	return 0;
+	if (list_empty(&qgroup->dirty))
+		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -1045,13 +1070,6 @@ unlock:
 	return ret;
 }
 
-static void qgroup_dirty(struct btrfs_fs_info *fs_info,
-			 struct btrfs_qgroup *qgroup)
-{
-	if (list_empty(&qgroup->dirty))
-		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
-}
-
 /*
  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
  * the modification into a list that''s later used by
btrfs_end_transaction to
@@ -1256,6 +1274,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle
*trans,
 		BUG();
 	}
 
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+			mutex_unlock(&fs_info->qgroup_rescan_lock);
+			return 0;
+		}
+	}
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
 	/*
 	 * the delayed ref sequence number we pass depends on the direction of
 	 * the operation. for add operations, we pass (node->seq - 1) to skip
@@ -1269,7 +1296,17 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle
*trans,
 	if (ret < 0)
 		return ret;
 
+	mutex_lock(&fs_info->qgroup_rescan_lock);
 	spin_lock(&fs_info->qgroup_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+			ret = 0;
+			mutex_unlock(&fs_info->qgroup_rescan_lock);
+			goto unlock;
+		}
+	}
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
 	quota_root = fs_info->quota_root;
 	if (!quota_root)
 		goto unlock;
@@ -1652,3 +1689,233 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle
*trans)
 		trans->delayed_ref_elem.seq);
 	BUG();
 }
+
+/*
+ * returns < 0 on error, 0 when more leafs are to be scanned.
+ * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ */
+static int
+qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
+		   struct btrfs_trans_handle *trans, struct ulist *tmp,
+		   struct extent_buffer *scratch_leaf)
+{
+	struct btrfs_key found;
+	struct btrfs_fs_info *fs_info = qscan->fs_info;
+	struct ulist *roots = NULL;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct seq_list tree_mod_seq_elem = {};
+	u64 seq;
+	int slot;
+	int ret;
+
+	path->leave_spinning = 1;
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	ret = btrfs_search_slot_for_read(fs_info->extent_root,
+					 &fs_info->qgroup_rescan_progress,
+					 path, 1, 0);
+
+	pr_debug("current progress key (%llu %u %llu), search_slot ret
%d\n",
+		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+		 fs_info->qgroup_rescan_progress.type,
+		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
+		 ret);
+
+	if (ret) {
+		/*
+		 * The rescan is about to end, we will not be scanning any
+		 * further blocks. We cannot unset the RESCAN flag here, because
+		 * we want to commit the transaction if everything went well.
+		 * To make the live accounting work in this phase, we set our
+		 * scan progress pointer such that every real extent objectid
+		 * will be smaller.
+		 */
+		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+		btrfs_release_path(path);
+		mutex_unlock(&fs_info->qgroup_rescan_lock);
+		return ret;
+	}
+
+	btrfs_item_key_to_cpu(path->nodes[0], &found,
+			      btrfs_header_nritems(path->nodes[0]) - 1);
+	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
+
+	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
+	slot = path->slots[0];
+	btrfs_release_path(path);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
+		if (found.type != BTRFS_EXTENT_ITEM_KEY)
+			continue;
+		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
+					   tree_mod_seq_elem.seq, &roots);
+		if (ret < 0)
+			break;
+		spin_lock(&fs_info->qgroup_lock);
+		seq = fs_info->qgroup_seq;
+		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
+
+		ulist_reinit(tmp);
+		ULIST_ITER_INIT(&uiter);
+		while ((unode = ulist_next(roots, &uiter))) {
+			struct btrfs_qgroup *qg;
+
+			qg = find_qgroup_rb(fs_info, unode->val);
+			if (!qg)
+				continue;
+
+			ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
+		}
+
+		/* this is similar to step 2 of btrfs_qgroup_account_ref */
+		ULIST_ITER_INIT(&uiter);
+		while ((unode = ulist_next(tmp, &uiter))) {
+			struct btrfs_qgroup *qg;
+			struct btrfs_qgroup_list *glist;
+
+			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
+			qg->rfer += found.offset;
+			qg->rfer_cmpr += found.offset;
+			WARN_ON(qg->tag >= seq);
+			WARN_ON(qg->refcnt >= seq);
+			if (qg->refcnt < seq)
+				qg->refcnt = seq + 1;
+			else
+				qg->refcnt = qg->refcnt + 1;
+			qgroup_dirty(fs_info, qg);
+
+			list_for_each_entry(glist, &qg->groups, next_group) {
+				ulist_add(tmp, glist->group->qgroupid,
+					  (uintptr_t)glist->group,
+					  GFP_ATOMIC);
+			}
+		}
+
+		qgroup_account_ref_step3(fs_info, roots, tmp, seq, -1,
+					 found.offset);
+
+		spin_unlock(&fs_info->qgroup_lock);
+		ulist_free(roots);
+	}
+
+	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+
+	return ret;
+}
+
+static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+{
+	struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
+						   work);
+	struct btrfs_path *path;
+	struct btrfs_trans_handle *trans = NULL;
+	struct btrfs_fs_info *fs_info = qscan->fs_info;
+	struct ulist *tmp = NULL;
+	struct extent_buffer *scratch_leaf = NULL;
+	int err = -ENOMEM;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		goto out;
+	tmp = ulist_alloc(GFP_NOFS);
+	if (!tmp)
+		goto out;
+	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
+	if (!scratch_leaf)
+		goto out;
+
+	err = 0;
+	while (!err) {
+		trans = btrfs_start_transaction(fs_info->fs_root, 0);
+		if (IS_ERR(trans)) {
+			err = PTR_ERR(trans);
+			break;
+		}
+		err = qgroup_rescan_leaf(qscan, path, trans, tmp, scratch_leaf);
+		if (err > 0)
+			btrfs_commit_transaction(trans, fs_info->fs_root);
+		else
+			btrfs_end_transaction(trans, fs_info->fs_root);
+	}
+
+out:
+	kfree(scratch_leaf);
+	ulist_free(tmp);
+	btrfs_free_path(path);
+	kfree(qscan);
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+
+	if (err == 2 &&
+	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+	} else if (err < 0) {
+		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+	}
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	if (err >= 0) {
+		pr_info("btrfs: qgroup scan completed%s\n",
+			err == 2 ? " (inconsistency flag cleared)" : "");
+	} else {
+		pr_err("btrfs: qgroup scan failed with %d\n", err);
+	}
+}
+
+static void
+qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
+{
+	qscan->work.func = btrfs_qgroup_rescan_worker;
+	qscan->fs_info = fs_info;
+
+	pr_info("btrfs: qgroup scan started\n");
+	btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
&qscan->work);
+}
+
+int
+btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
+{
+	int ret = 0;
+	struct rb_node *n;
+	struct btrfs_qgroup *qgroup;
+	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
+
+	if (!qscan)
+		return -ENOMEM;
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	spin_lock(&fs_info->qgroup_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+		ret = -EINPROGRESS;
+	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+		ret = -EINVAL;
+	if (ret) {
+		spin_unlock(&fs_info->qgroup_lock);
+		mutex_unlock(&fs_info->qgroup_rescan_lock);
+		kfree(qscan);
+		return ret;
+	}
+
+	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+	memset(&fs_info->qgroup_rescan_progress, 0,
+		sizeof(fs_info->qgroup_rescan_progress));
+
+	/* clear all current qgroup tracking information */
+	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
+		qgroup = rb_entry(n, struct btrfs_qgroup, node);
+		qgroup->rfer = 0;
+		qgroup->rfer_cmpr = 0;
+		qgroup->excl = 0;
+		qgroup->excl_cmpr = 0;
+	}
+	spin_unlock(&fs_info->qgroup_lock);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	qgroup_rescan_start(fs_info, qscan);
+
+	return 0;
+}
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index fa3a5f9..ca70f08 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
 
 #define BTRFS_QUOTA_CTL_ENABLE	1
 #define BTRFS_QUOTA_CTL_DISABLE	2
-#define BTRFS_QUOTA_CTL_RESCAN	3
+#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
 struct btrfs_ioctl_quota_ctl_args {
 	__u64 cmd;
 	__u64 status;
 };
 
+struct btrfs_ioctl_quota_rescan_args {
+	__u64	flags;
+	__u64   progress;
+	__u64   reserved[6];
+};
+
 struct btrfs_ioctl_qgroup_assign_args {
 	__u64 assign;
 	__u64 src;
@@ -502,6 +508,10 @@ struct btrfs_ioctl_send_args {
 			       struct btrfs_ioctl_qgroup_create_args)
 #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
 			       struct btrfs_ioctl_qgroup_limit_args)
+#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
+			       struct btrfs_ioctl_quota_rescan_args)
+#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
+			       struct btrfs_ioctl_quota_rescan_args)
 #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
 				   char[BTRFS_LABEL_SIZE])
 #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jan Schmidt

2013-Apr-16 08:45 UTC

head link

[PATCH v2 3/3] Btrfs: automatic rescan after "quota enable" command

When qgroup tracking is enabled, we do an automatic cycle of the new rescan
mechanism.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/qgroup.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bb081b5..0ea2c3e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1356,10 +1356,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_root *quota_root = fs_info->quota_root;
 	int ret = 0;
+	int start_rescan_worker = 0;
 
 	if (!quota_root)
 		goto out;
 
+	if (!fs_info->quota_enabled && fs_info->pending_quota_state)
+		start_rescan_worker = 1;
+
 	fs_info->quota_enabled = fs_info->pending_quota_state;
 
 	spin_lock(&fs_info->qgroup_lock);
@@ -1385,6 +1389,12 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 	if (ret)
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 
+	if (start_rescan_worker) {
+		ret = btrfs_qgroup_rescan(fs_info);
+		if (ret)
+			pr_err("btrfs: start rescan quota failed: %d\n", ret);
+	}
+
 out:
 
 	return ret;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 09:20 UTC

head link

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Hello Jan,
> The function is separated into a preparation part and the three accounting
> steps mentioned in the qgroups documentation. The goal is to make steps two
> and three usable by the rescan functionality. A side effect is that the
> function is restructured into readable subunits.

How about renaming the three functions like:

1> qgroup_walk_old_roots()
2> qgroup_walk_new_root()
3> qgroup_rewalk_old_root()

I''d like this function to be meaningful, but not just step1,2,3.
Maybe you can think out better function name.

Thanks,
Wang
> 
> Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
> ---
>  fs/btrfs/qgroup.c |  212
++++++++++++++++++++++++++++++-----------------------
>  1 files changed, 121 insertions(+), 91 deletions(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index b44124d..c38a0c5 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -1075,6 +1075,122 @@ int btrfs_qgroup_record_ref(struct
btrfs_trans_handle *trans,
>  	return 0;
>  }
>  
> +static void qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
> +				     struct ulist *roots, struct ulist *tmp,
> +				     u64 seq)
> +{
> +	struct ulist_node *unode;
> +	struct ulist_iterator uiter;
> +	struct ulist_node *tmp_unode;
> +	struct ulist_iterator tmp_uiter;
> +	struct btrfs_qgroup *qg;
> +
> +	ULIST_ITER_INIT(&uiter);
> +	while ((unode = ulist_next(roots, &uiter))) {
> +		qg = find_qgroup_rb(fs_info, unode->val);
> +		if (!qg)
> +			continue;
> +
> +		ulist_reinit(tmp);
> +						/* XXX id not needed */
> +		ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
> +		ULIST_ITER_INIT(&tmp_uiter);
> +		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
> +			struct btrfs_qgroup_list *glist;
> +
> +			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
> +			if (qg->refcnt < seq)
> +				qg->refcnt = seq + 1;
> +			else
> +				++qg->refcnt;
> +
> +			list_for_each_entry(glist, &qg->groups, next_group) {
> +				ulist_add(tmp, glist->group->qgroupid,
> +					  (u64)(uintptr_t)glist->group,
> +					  GFP_ATOMIC);
> +			}
> +		}
> +	}
> +}
> +
> +static void qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
> +				     struct ulist *roots, struct ulist *tmp,
> +				     u64 seq, int sgn, u64 num_bytes,
> +				     struct btrfs_qgroup *qgroup)
> +{
> +	struct ulist_node *unode;
> +	struct ulist_iterator uiter;
> +	struct btrfs_qgroup *qg;
> +	struct btrfs_qgroup_list *glist;
> +
> +	ulist_reinit(tmp);
> +	ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
> +
> +	ULIST_ITER_INIT(&uiter);
> +	while ((unode = ulist_next(tmp, &uiter))) {
> +
> +		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
> +		if (qg->refcnt < seq) {
> +			/* not visited by step 1 */
> +			qg->rfer += sgn * num_bytes;
> +			qg->rfer_cmpr += sgn * num_bytes;
> +			if (roots->nnodes == 0) {
> +				qg->excl += sgn * num_bytes;
> +				qg->excl_cmpr += sgn * num_bytes;
> +			}
> +			qgroup_dirty(fs_info, qg);
> +		}
> +		WARN_ON(qg->tag >= seq);
> +		qg->tag = seq;
> +
> +		list_for_each_entry(glist, &qg->groups, next_group) {
> +			ulist_add(tmp, glist->group->qgroupid,
> +				  (uintptr_t)glist->group, GFP_ATOMIC);
> +		}
> +	}
> +}
> +
> +static void qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
> +				     struct ulist *roots, struct ulist *tmp,
> +				     u64 seq, int sgn, u64 num_bytes)
> +{
> +	struct ulist_node *unode;
> +	struct ulist_iterator uiter;
> +	struct btrfs_qgroup *qg;
> +	struct ulist_node *tmp_unode;
> +	struct ulist_iterator tmp_uiter;
> +
> +	ULIST_ITER_INIT(&uiter);
> +	while ((unode = ulist_next(roots, &uiter))) {
> +		qg = find_qgroup_rb(fs_info, unode->val);
> +		if (!qg)
> +			continue;
> +
> +		ulist_reinit(tmp);
> +		ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
> +		ULIST_ITER_INIT(&tmp_uiter);
> +		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
> +			struct btrfs_qgroup_list *glist;
> +
> +			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
> +			if (qg->tag == seq)
> +				continue;
> +
> +			if (qg->refcnt - seq == roots->nnodes) {
> +				qg->excl -= sgn * num_bytes;
> +				qg->excl_cmpr -= sgn * num_bytes;
> +				qgroup_dirty(fs_info, qg);
> +			}
> +
> +			list_for_each_entry(glist, &qg->groups, next_group) {
> +				ulist_add(tmp, glist->group->qgroupid,
> +					  (uintptr_t)glist->group,
> +					  GFP_ATOMIC);
> +			}
> +		}
> +	}
> +}
> +
>  /*
>   * btrfs_qgroup_account_ref is called for every ref that is added to or
deleted
>   * from the fs. First, all roots referencing the extent are searched, and
> @@ -1090,10 +1206,8 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>  	struct btrfs_root *quota_root;
>  	u64 ref_root;
>  	struct btrfs_qgroup *qgroup;
> -	struct ulist_node *unode;
>  	struct ulist *roots = NULL;
>  	struct ulist *tmp = NULL;
> -	struct ulist_iterator uiter;
>  	u64 seq;
>  	int ret = 0;
>  	int sgn;
> @@ -1175,103 +1289,19 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>  	seq = fs_info->qgroup_seq;
>  	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
>  
> -	ULIST_ITER_INIT(&uiter);
> -	while ((unode = ulist_next(roots, &uiter))) {
> -		struct ulist_node *tmp_unode;
> -		struct ulist_iterator tmp_uiter;
> -		struct btrfs_qgroup *qg;
> -
> -		qg = find_qgroup_rb(fs_info, unode->val);
> -		if (!qg)
> -			continue;
> -
> -		ulist_reinit(tmp);
> -						/* XXX id not needed */
> -		ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
> -		ULIST_ITER_INIT(&tmp_uiter);
> -		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
> -			struct btrfs_qgroup_list *glist;
> -
> -			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
> -			if (qg->refcnt < seq)
> -				qg->refcnt = seq + 1;
> -			else
> -				++qg->refcnt;
> -
> -			list_for_each_entry(glist, &qg->groups, next_group) {
> -				ulist_add(tmp, glist->group->qgroupid,
> -					  (u64)(uintptr_t)glist->group,
> -					  GFP_ATOMIC);
> -			}
> -		}
> -	}
> +	qgroup_account_ref_step1(fs_info, roots, tmp, seq);
>  
>  	/*
>  	 * step 2: walk from the new root
>  	 */
> -	ulist_reinit(tmp);
> -	ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
> -	ULIST_ITER_INIT(&uiter);
> -	while ((unode = ulist_next(tmp, &uiter))) {
> -		struct btrfs_qgroup *qg;
> -		struct btrfs_qgroup_list *glist;
> -
> -		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
> -		if (qg->refcnt < seq) {
> -			/* not visited by step 1 */
> -			qg->rfer += sgn * node->num_bytes;
> -			qg->rfer_cmpr += sgn * node->num_bytes;
> -			if (roots->nnodes == 0) {
> -				qg->excl += sgn * node->num_bytes;
> -				qg->excl_cmpr += sgn * node->num_bytes;
> -			}
> -			qgroup_dirty(fs_info, qg);
> -		}
> -		WARN_ON(qg->tag >= seq);
> -		qg->tag = seq;
> -
> -		list_for_each_entry(glist, &qg->groups, next_group) {
> -			ulist_add(tmp, glist->group->qgroupid,
> -				  (uintptr_t)glist->group, GFP_ATOMIC);
> -		}
> -	}
> +	qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
> +				 node->num_bytes, qgroup);
>  
>  	/*
>  	 * step 3: walk again from old refs
>  	 */
> -	ULIST_ITER_INIT(&uiter);
> -	while ((unode = ulist_next(roots, &uiter))) {
> -		struct btrfs_qgroup *qg;
> -		struct ulist_node *tmp_unode;
> -		struct ulist_iterator tmp_uiter;
> -
> -		qg = find_qgroup_rb(fs_info, unode->val);
> -		if (!qg)
> -			continue;
> -
> -		ulist_reinit(tmp);
> -		ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
> -		ULIST_ITER_INIT(&tmp_uiter);
> -		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
> -			struct btrfs_qgroup_list *glist;
> -
> -			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
> -			if (qg->tag == seq)
> -				continue;
> -
> -			if (qg->refcnt - seq == roots->nnodes) {
> -				qg->excl -= sgn * node->num_bytes;
> -				qg->excl_cmpr -= sgn * node->num_bytes;
> -				qgroup_dirty(fs_info, qg);
> -			}
> -
> -			list_for_each_entry(glist, &qg->groups, next_group) {
> -				ulist_add(tmp, glist->group->qgroupid,
> -					  (uintptr_t)glist->group,
> -					  GFP_ATOMIC);
> -			}
> -		}
> -	}
> +	qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
> +				 node->num_bytes);
>  	ret = 0;
>  unlock:
>  	spin_unlock(&fs_info->qgroup_lock);


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 09:26 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Hello, Jan
> If qgroup tracking is out of sync, a rescan operation can be started. It
> iterates the complete extent tree and recalculates all qgroup tracking
data.
> This is an expensive operation and should not be used unless required.
> 
> A filesystem under rescan can still be umounted. The rescan continues on
the
> next mount.  Status information is provided with a separate ioctl while a
> rescan operation is in progress.
> 
> Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
> ---
>  fs/btrfs/ctree.h           |   17 ++-
>  fs/btrfs/disk-io.c         |    6 +
>  fs/btrfs/ioctl.c           |   83 ++++++++++--
>  fs/btrfs/qgroup.c          |  295
+++++++++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/btrfs.h |   12 ++-
>  5 files changed, 378 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 0d82922..bd4e2a7 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1019,9 +1019,9 @@ struct btrfs_block_group_item {
>   */
>  #define BTRFS_QGROUP_STATUS_FLAG_ON		(1ULL << 0)
>  /*
> - * SCANNING is set during the initialization phase
> + * RESCAN is set during the initialization phase
>   */
> -#define BTRFS_QGROUP_STATUS_FLAG_SCANNING	(1ULL << 1)
> +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN		(1ULL << 1)
>  /*
>   * Some qgroup entries are known to be out of date,
>   * either because the configuration has changed in a way that
> @@ -1050,7 +1050,7 @@ struct btrfs_qgroup_status_item {
>  	 * only used during scanning to record the progress
>  	 * of the scan. It contains a logical address
>  	 */
> -	__le64 scan;
> +	__le64 rescan;
>  } __attribute__ ((__packed__));
>  
>  struct btrfs_qgroup_info_item {
> @@ -1587,6 +1587,11 @@ struct btrfs_fs_info {
>  	/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
>  	u64 qgroup_seq;
>  
> +	/* qgroup rescan items */
> +	struct mutex qgroup_rescan_lock; /* protects the progress item */
> +	struct btrfs_key qgroup_rescan_progress;
> +	struct btrfs_workers qgroup_rescan_workers;
> +
>  	/* filesystem state */
>  	unsigned long fs_state;
>  
> @@ -2864,8 +2869,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct
btrfs_qgroup_status_item,
>  		   version, 64);
>  BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
>  		   flags, 64);
> -BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
> -		   scan, 64);
> +BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
> +		   rescan, 64);
>  
>  /* btrfs_qgroup_info_item */
>  BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
> @@ -3784,7 +3789,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle
*trans,
>  		       struct btrfs_fs_info *fs_info);
>  int btrfs_quota_disable(struct btrfs_trans_handle *trans,
>  			struct btrfs_fs_info *fs_info);
> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
> +int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
>  int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
>  			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
>  int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 6d19a0a..60d15fe 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2192,6 +2192,7 @@ int open_ctree(struct super_block *sb,
>  	fs_info->qgroup_seq = 1;
>  	fs_info->quota_enabled = 0;
>  	fs_info->pending_quota_state = 0;
> +	mutex_init(&fs_info->qgroup_rescan_lock);
>  
>  	btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
>  	btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
> @@ -2394,6 +2395,8 @@ int open_ctree(struct super_block *sb,
>  	btrfs_init_workers(&fs_info->readahead_workers,
"readahead",
>  			   fs_info->thread_pool_size,
>  			   &fs_info->generic_worker);
> +	btrfs_init_workers(&fs_info->qgroup_rescan_workers,
"qgroup-rescan", 1,
> +			   &fs_info->generic_worker);
>  
>  	/*
>  	 * endios are largely parallel and should have a very
> @@ -2428,6 +2431,7 @@ int open_ctree(struct super_block *sb,
>  	ret |= btrfs_start_workers(&fs_info->caching_workers);
>  	ret |= btrfs_start_workers(&fs_info->readahead_workers);
>  	ret |= btrfs_start_workers(&fs_info->flush_workers);
> +	ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
>  	if (ret) {
>  		err = -ENOMEM;
>  		goto fail_sb_buffer;
> @@ -2773,6 +2777,7 @@ fail_sb_buffer:
>  	btrfs_stop_workers(&fs_info->delayed_workers);
>  	btrfs_stop_workers(&fs_info->caching_workers);
>  	btrfs_stop_workers(&fs_info->flush_workers);
> +	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
>  fail_alloc:
>  fail_iput:
>  	btrfs_mapping_tree_free(&fs_info->mapping_tree);
> @@ -3463,6 +3468,7 @@ int close_ctree(struct btrfs_root *root)
>  	btrfs_stop_workers(&fs_info->caching_workers);
>  	btrfs_stop_workers(&fs_info->readahead_workers);
>  	btrfs_stop_workers(&fs_info->flush_workers);
> +	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
>  
>  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
>  	if (btrfs_test_opt(root, CHECK_INTEGRITY))
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 898c572..17b53df 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -3693,12 +3693,10 @@ static long btrfs_ioctl_quota_ctl(struct file
*file, void __user *arg)
>  		goto drop_write;
>  	}
>  
> -	if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
> -		trans = btrfs_start_transaction(root, 2);
> -		if (IS_ERR(trans)) {
> -			ret = PTR_ERR(trans);
> -			goto out;
> -		}
> +	trans = btrfs_start_transaction(root, 2);
> +	if (IS_ERR(trans)) {
> +		ret = PTR_ERR(trans);
> +		goto out;
>  	}
>  
>  	switch (sa->cmd) {
> @@ -3708,9 +3706,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file,
void __user *arg)
>  	case BTRFS_QUOTA_CTL_DISABLE:
>  		ret = btrfs_quota_disable(trans, root->fs_info);
>  		break;
> -	case BTRFS_QUOTA_CTL_RESCAN:
> -		ret = btrfs_quota_rescan(root->fs_info);
> -		break;
>  	default:
>  		ret = -EINVAL;
>  		break;
> @@ -3719,11 +3714,9 @@ static long btrfs_ioctl_quota_ctl(struct file *file,
void __user *arg)
>  	if (copy_to_user(arg, sa, sizeof(*sa)))
>  		ret = -EFAULT;
>  
> -	if (trans) {
> -		err = btrfs_commit_transaction(trans, root);
> -		if (err && !ret)
> -			ret = err;
> -	}
> +	err = btrfs_commit_transaction(trans, root);
> +	if (err && !ret)
> +		ret = err;
>  out:
>  	kfree(sa);
>  drop_write:
> @@ -3877,6 +3870,64 @@ drop_write:
>  	return ret;
>  }
>  
> +static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
> +{
> +	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
> +	struct btrfs_ioctl_quota_rescan_args *qsa;
> +	int ret;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	ret = mnt_want_write_file(file);
> +	if (ret)
> +		return ret;
> +
> +	qsa = memdup_user(arg, sizeof(*qsa));
> +	if (IS_ERR(qsa)) {
> +		ret = PTR_ERR(qsa);
> +		goto drop_write;
> +	}
> +
> +	if (qsa->flags) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	ret = btrfs_qgroup_rescan(root->fs_info);
> +
> +out:
> +	kfree(qsa);
> +drop_write:
> +	mnt_drop_write_file(file);
> +	return ret;
> +}
> +
> +static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user
*arg)
> +{
> +	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
> +	struct btrfs_ioctl_quota_rescan_args *qsa;
> +	int ret = 0;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
> +	if (!qsa)
> +		return -ENOMEM;
> +
> +	if (root->fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +		qsa->flags = 1;
> +		qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
> +	}
> +
> +	if (copy_to_user(arg, qsa, sizeof(*qsa)))
> +		ret = -EFAULT;
> +
> +	kfree(qsa);
> +	return ret;
> +}
> +
>  static long btrfs_ioctl_set_received_subvol(struct file *file,
>  					    void __user *arg)
>  {
> @@ -4115,6 +4166,10 @@ long btrfs_ioctl(struct file *file, unsigned int
>  		return btrfs_ioctl_qgroup_create(file, argp);
>  	case BTRFS_IOC_QGROUP_LIMIT:
>  		return btrfs_ioctl_qgroup_limit(file, argp);
> +	case BTRFS_IOC_QUOTA_RESCAN:
> +		return btrfs_ioctl_quota_rescan(file, argp);
> +	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
> +		return btrfs_ioctl_quota_rescan_status(file, argp);
>  	case BTRFS_IOC_DEV_REPLACE:
>  		return btrfs_ioctl_dev_replace(root, argp);
>  	case BTRFS_IOC_GET_FSLABEL:
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index c38a0c5..bb081b5 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -31,13 +31,13 @@
>  #include "locking.h"
>  #include "ulist.h"
>  #include "backref.h"
> +#include "extent_io.h"
>  
>  /* TODO XXX FIXME
>   *  - subvol delete -> delete when ref goes to 0? delete limits also?
>   *  - reorganize keys
>   *  - compressed
>   *  - sync
> - *  - rescan
>   *  - copy also limits on subvol creation
>   *  - limit
>   *  - caches fuer ulists
> @@ -98,6 +98,14 @@ struct btrfs_qgroup_list {
>  	struct btrfs_qgroup *member;
>  };
>  
> +struct qgroup_rescan {
> +	struct btrfs_work	work;
> +	struct btrfs_fs_info	*fs_info;
> +};
> +
> +static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
> +				struct qgroup_rescan *qscan);
> +
>  /* must be called with qgroup_lock held */
>  static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
>  					   u64 qgroupid)
> @@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info
*fs_info)
>  			}
>  			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
>  									  ptr);
> -			/* FIXME read scan element */
> +			fs_info->qgroup_rescan_progress.objectid > +				
btrfs_qgroup_status_rescan(l, ptr);
> +			if (fs_info->qgroup_flags &
> +			    BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +				struct qgroup_rescan *qscan > +					kmalloc(sizeof(*qscan),
GFP_NOFS);
> +				if (!qscan) {
> +					ret = -ENOMEM;
> +					goto out;
> +				}
> +				fs_info->qgroup_rescan_progress.type = 0;
> +				fs_info->qgroup_rescan_progress.offset = 0;
> +				qgroup_rescan_start(fs_info, qscan);
> +			}
>  			goto next1;
>  		}
>  
> @@ -719,9 +740,12 @@ static int update_qgroup_status_item(struct
btrfs_trans_handle *trans,
>  	l = path->nodes[0];
>  	slot = path->slots[0];
>  	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
> +	spin_lock(&fs_info->qgroup_lock);
>  	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
>  	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
> -	/* XXX scan */
> +	btrfs_set_qgroup_status_rescan(l, ptr,
> +				fs_info->qgroup_rescan_progress.objectid);
> +	spin_unlock(&fs_info->qgroup_lock);
>  
>  	btrfs_mark_buffer_dirty(l);
>  
> @@ -830,7 +854,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle
*trans,
>  	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
>  				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>  	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
> -	btrfs_set_qgroup_status_scan(leaf, ptr, 0);
> +	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>  
>  	btrfs_mark_buffer_dirty(leaf);
>  
> @@ -894,10 +918,11 @@ out:
>  	return ret;
>  }
>  
> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
> +static void qgroup_dirty(struct btrfs_fs_info *fs_info,
> +			 struct btrfs_qgroup *qgroup)
>  {
> -	/* FIXME */
> -	return 0;
> +	if (list_empty(&qgroup->dirty))
> +		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>  }
>  
>  int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
> @@ -1045,13 +1070,6 @@ unlock:
>  	return ret;
>  }
>  
> -static void qgroup_dirty(struct btrfs_fs_info *fs_info,
> -			 struct btrfs_qgroup *qgroup)
> -{
> -	if (list_empty(&qgroup->dirty))
> -		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
> -}
> -
>  /*
>   * btrfs_qgroup_record_ref is called when the ref is added or deleted. it
puts
>   * the modification into a list that''s later used by
btrfs_end_transaction to
> @@ -1256,6 +1274,15 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>  		BUG();
>  	}
>  
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
> +			mutex_unlock(&fs_info->qgroup_rescan_lock);
> +			return 0;
> +		}
> +	}
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
>  	/*
>  	 * the delayed ref sequence number we pass depends on the direction of
>  	 * the operation. for add operations, we pass (node->seq - 1) to skip
> @@ -1269,7 +1296,17 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>  	if (ret < 0)
>  		return ret;
>  
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
>  	spin_lock(&fs_info->qgroup_lock);
> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
> +			ret = 0;
> +			mutex_unlock(&fs_info->qgroup_rescan_lock);
> +			goto unlock;
> +		}
> +	}
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
>  	quota_root = fs_info->quota_root;
>  	if (!quota_root)
>  		goto unlock;
> @@ -1652,3 +1689,233 @@ void assert_qgroups_uptodate(struct
btrfs_trans_handle *trans)
>  		trans->delayed_ref_elem.seq);
>  	BUG();
>  }
> +
> +/*
> + * returns < 0 on error, 0 when more leafs are to be scanned.
> + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
> + */
> +static int
> +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
> +		   struct btrfs_trans_handle *trans, struct ulist *tmp,
> +		   struct extent_buffer *scratch_leaf)
> +{
> +	struct btrfs_key found;
> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
> +	struct ulist *roots = NULL;
> +	struct ulist_node *unode;
> +	struct ulist_iterator uiter;
> +	struct seq_list tree_mod_seq_elem = {};
> +	u64 seq;
> +	int slot;
> +	int ret;
> +
> +	path->leave_spinning = 1;
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	ret = btrfs_search_slot_for_read(fs_info->extent_root,
> +					 &fs_info->qgroup_rescan_progress,
> +					 path, 1, 0);
> +
> +	pr_debug("current progress key (%llu %u %llu), search_slot ret
%d\n",
> +		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
> +		 fs_info->qgroup_rescan_progress.type,
> +		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
> +		 ret);
> +
> +	if (ret) {
> +		/*
> +		 * The rescan is about to end, we will not be scanning any
> +		 * further blocks. We cannot unset the RESCAN flag here, because
> +		 * we want to commit the transaction if everything went well.
> +		 * To make the live accounting work in this phase, we set our
> +		 * scan progress pointer such that every real extent objectid
> +		 * will be smaller.
> +		 */
> +		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
> +		btrfs_release_path(path);
> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
> +		return ret;
> +	}
> +
> +	btrfs_item_key_to_cpu(path->nodes[0], &found,
> +			      btrfs_header_nritems(path->nodes[0]) - 1);
> +	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
> +
> +	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> +	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
> +	slot = path->slots[0];
> +	btrfs_release_path(path);
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
> +		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
> +		if (found.type != BTRFS_EXTENT_ITEM_KEY)
> +			continue;
> +		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
> +					   tree_mod_seq_elem.seq, &roots);
> +		if (ret < 0)
> +			break;

	I have sent the patch to issue the double free that use the function
	btrfs_find_all_roots(). So here if btrfs_find_all_roots() fails, please
	do not ulist_free(roots) more.
> +		spin_lock(&fs_info->qgroup_lock);
> +		seq = fs_info->qgroup_seq;
> +		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
> +
> +		ulist_reinit(tmp);
> +		ULIST_ITER_INIT(&uiter);
> +		while ((unode = ulist_next(roots, &uiter))) {
> +			struct btrfs_qgroup *qg;
> +
> +			qg = find_qgroup_rb(fs_info, unode->val);
> +			if (!qg)
> +				continue;
> +
> +			ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);

For this patch, you forget to add the check about ulist_add(), ulist_add() may
return -ENOMEM. In fact, i have sent the patch to fix this problem in qgroup.c
before.
So you don''t need to change patch1, but you dose need to add the check
in the patch2.
> +		}
> +
> +		/* this is similar to step 2 of btrfs_qgroup_account_ref */
> +		ULIST_ITER_INIT(&uiter);
> +		while ((unode = ulist_next(tmp, &uiter))) {
> +			struct btrfs_qgroup *qg;
> +			struct btrfs_qgroup_list *glist;
> +
> +			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
> +			qg->rfer += found.offset;
> +			qg->rfer_cmpr += found.offset;
> +			WARN_ON(qg->tag >= seq);
> +			WARN_ON(qg->refcnt >= seq);
> +			if (qg->refcnt < seq)
> +				qg->refcnt = seq + 1;
> +			else
> +				qg->refcnt = qg->refcnt + 1;
> +			qgroup_dirty(fs_info, qg);
> +
> +			list_for_each_entry(glist, &qg->groups, next_group) {
> +				ulist_add(tmp, glist->group->qgroupid,
> +					  (uintptr_t)glist->group,
> +					  GFP_ATOMIC);
> +			}
> +		}
> +
> +		qgroup_account_ref_step3(fs_info, roots, tmp, seq, -1,
> +					 found.offset);
> +
> +		spin_unlock(&fs_info->qgroup_lock);
> +		ulist_free(roots);
> +	}
> +
> +	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> +
> +	return ret;
> +}
> +
> +static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
> +{
> +	struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
> +						   work);
> +	struct btrfs_path *path;
> +	struct btrfs_trans_handle *trans = NULL;
> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
> +	struct ulist *tmp = NULL;
> +	struct extent_buffer *scratch_leaf = NULL;
> +	int err = -ENOMEM;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		goto out;
> +	tmp = ulist_alloc(GFP_NOFS);
> +	if (!tmp)
> +		goto out;
> +	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
> +	if (!scratch_leaf)
> +		goto out;
> +
> +	err = 0;
> +	while (!err) {
> +		trans = btrfs_start_transaction(fs_info->fs_root, 0);
> +		if (IS_ERR(trans)) {
> +			err = PTR_ERR(trans);
> +			break;
> +		}
> +		err = qgroup_rescan_leaf(qscan, path, trans, tmp, scratch_leaf);
> +		if (err > 0)
> +			btrfs_commit_transaction(trans, fs_info->fs_root);
> +		else
> +			btrfs_end_transaction(trans, fs_info->fs_root);
> +	}
> +
> +out:
> +	kfree(scratch_leaf);
> +	ulist_free(tmp);
> +	btrfs_free_path(path);
> +	kfree(qscan);
> +
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> +
> +	if (err == 2 &&
> +	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
{
> +		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> +	} else if (err < 0) {
> +		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> +	}
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	if (err >= 0) {
> +		pr_info("btrfs: qgroup scan completed%s\n",
> +			err == 2 ? " (inconsistency flag cleared)" : "");
> +	} else {
> +		pr_err("btrfs: qgroup scan failed with %d\n", err);
> +	}
> +}
> +
> +static void
> +qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan
*qscan)
> +{
> +	qscan->work.func = btrfs_qgroup_rescan_worker;
> +	qscan->fs_info = fs_info;
> +
> +	pr_info("btrfs: qgroup scan started\n");
> +	btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
&qscan->work);
> +}
> +
> +int
> +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
> +{
> +	int ret = 0;
> +	struct rb_node *n;
> +	struct btrfs_qgroup *qgroup;
> +	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
> +
> +	if (!qscan)
> +		return -ENOMEM;
> +
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	spin_lock(&fs_info->qgroup_lock);
> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
> +		ret = -EINPROGRESS;
> +	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
> +		ret = -EINVAL;
> +	if (ret) {
> +		spin_unlock(&fs_info->qgroup_lock);
> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
> +		kfree(qscan);
> +		return ret;
> +	}
> +
> +	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> +	memset(&fs_info->qgroup_rescan_progress, 0,
> +		sizeof(fs_info->qgroup_rescan_progress));
> +
> +	/* clear all current qgroup tracking information */
> +	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
> +		qgroup = rb_entry(n, struct btrfs_qgroup, node);
> +		qgroup->rfer = 0;
> +		qgroup->rfer_cmpr = 0;
> +		qgroup->excl = 0;
> +		qgroup->excl_cmpr = 0;
> +	}
> +	spin_unlock(&fs_info->qgroup_lock);
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	qgroup_rescan_start(fs_info, qscan);
> +
> +	return 0;
> +}
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index fa3a5f9..ca70f08 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
>  
>  #define BTRFS_QUOTA_CTL_ENABLE	1
>  #define BTRFS_QUOTA_CTL_DISABLE	2
> -#define BTRFS_QUOTA_CTL_RESCAN	3
> +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
>  struct btrfs_ioctl_quota_ctl_args {
>  	__u64 cmd;
>  	__u64 status;
>  };
>  
> +struct btrfs_ioctl_quota_rescan_args {
> +	__u64	flags;
> +	__u64   progress;
> +	__u64   reserved[6];
> +};
> +
>  struct btrfs_ioctl_qgroup_assign_args {
>  	__u64 assign;
>  	__u64 src;
> @@ -502,6 +508,10 @@ struct btrfs_ioctl_send_args {
>  			       struct btrfs_ioctl_qgroup_create_args)
>  #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
>  			       struct btrfs_ioctl_qgroup_limit_args)
> +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
> +			       struct btrfs_ioctl_quota_rescan_args)
> +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
> +			       struct btrfs_ioctl_quota_rescan_args)
>  #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
>  				   char[BTRFS_LABEL_SIZE])
>  #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jan Schmidt

2013-Apr-16 09:38 UTC

head link

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

On Tue, April 16, 2013 at 11:20 (+0200), Wang Shilong
wrote:> Hello Jan,
> 
>> The function is separated into a preparation part and the three
accounting
>> steps mentioned in the qgroups documentation. The goal is to make steps
two
>> and three usable by the rescan functionality. A side effect is that the
>> function is restructured into readable subunits.
> 
> 
> How about renaming the three functions like:
> 
> 1> qgroup_walk_old_roots()
> 2> qgroup_walk_new_root()
> 3> qgroup_rewalk_old_root()
> 
> I''d like this function to be meaningful, but not just step1,2,3.
> Maybe you can think out better function name.
I''d like to keep it like 1, 2, 3, because that matches the
documentation in the
qgroup pdf and the code has always been documented in those three steps.

Thanks,
-Jan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jan Schmidt

2013-Apr-16 09:39 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

On Tue, April 16, 2013 at 11:26 (+0200), Wang Shilong
wrote:> Hello, Jan
> 
>> If qgroup tracking is out of sync, a rescan operation can be started.
It
>> iterates the complete extent tree and recalculates all qgroup tracking
data.
>> This is an expensive operation and should not be used unless required.
>>
>> A filesystem under rescan can still be umounted. The rescan continues
on the
>> next mount.  Status information is provided with a separate ioctl while
a
>> rescan operation is in progress.
>>
>> Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
>> ---
>>  fs/btrfs/ctree.h           |   17 ++-
>>  fs/btrfs/disk-io.c         |    6 +
>>  fs/btrfs/ioctl.c           |   83 ++++++++++--
>>  fs/btrfs/qgroup.c          |  295
+++++++++++++++++++++++++++++++++++++++++--
>>  include/uapi/linux/btrfs.h |   12 ++-
>>  5 files changed, 378 insertions(+), 35 deletions(-)
>>
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index 0d82922..bd4e2a7 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -1019,9 +1019,9 @@ struct btrfs_block_group_item {
>>   */
>>  #define BTRFS_QGROUP_STATUS_FLAG_ON		(1ULL << 0)
>>  /*
>> - * SCANNING is set during the initialization phase
>> + * RESCAN is set during the initialization phase
>>   */
>> -#define BTRFS_QGROUP_STATUS_FLAG_SCANNING	(1ULL << 1)
>> +#define BTRFS_QGROUP_STATUS_FLAG_RESCAN		(1ULL << 1)
>>  /*
>>   * Some qgroup entries are known to be out of date,
>>   * either because the configuration has changed in a way that
>> @@ -1050,7 +1050,7 @@ struct btrfs_qgroup_status_item {
>>  	 * only used during scanning to record the progress
>>  	 * of the scan. It contains a logical address
>>  	 */
>> -	__le64 scan;
>> +	__le64 rescan;
>>  } __attribute__ ((__packed__));
>>  
>>  struct btrfs_qgroup_info_item {
>> @@ -1587,6 +1587,11 @@ struct btrfs_fs_info {
>>  	/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
>>  	u64 qgroup_seq;
>>  
>> +	/* qgroup rescan items */
>> +	struct mutex qgroup_rescan_lock; /* protects the progress item */
>> +	struct btrfs_key qgroup_rescan_progress;
>> +	struct btrfs_workers qgroup_rescan_workers;
>> +
>>  	/* filesystem state */
>>  	unsigned long fs_state;
>>  
>> @@ -2864,8 +2869,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct
btrfs_qgroup_status_item,
>>  		   version, 64);
>>  BTRFS_SETGET_FUNCS(qgroup_status_flags, struct
btrfs_qgroup_status_item,
>>  		   flags, 64);
>> -BTRFS_SETGET_FUNCS(qgroup_status_scan, struct
btrfs_qgroup_status_item,
>> -		   scan, 64);
>> +BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct
btrfs_qgroup_status_item,
>> +		   rescan, 64);
>>  
>>  /* btrfs_qgroup_info_item */
>>  BTRFS_SETGET_FUNCS(qgroup_info_generation, struct
btrfs_qgroup_info_item,
>> @@ -3784,7 +3789,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle
*trans,
>>  		       struct btrfs_fs_info *fs_info);
>>  int btrfs_quota_disable(struct btrfs_trans_handle *trans,
>>  			struct btrfs_fs_info *fs_info);
>> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
>> +int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
>>  int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
>>  			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
>>  int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 6d19a0a..60d15fe 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -2192,6 +2192,7 @@ int open_ctree(struct super_block *sb,
>>  	fs_info->qgroup_seq = 1;
>>  	fs_info->quota_enabled = 0;
>>  	fs_info->pending_quota_state = 0;
>> +	mutex_init(&fs_info->qgroup_rescan_lock);
>>  
>>  	btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
>>  	btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
>> @@ -2394,6 +2395,8 @@ int open_ctree(struct super_block *sb,
>>  	btrfs_init_workers(&fs_info->readahead_workers,
"readahead",
>>  			   fs_info->thread_pool_size,
>>  			   &fs_info->generic_worker);
>> +	btrfs_init_workers(&fs_info->qgroup_rescan_workers,
"qgroup-rescan", 1,
>> +			   &fs_info->generic_worker);
>>  
>>  	/*
>>  	 * endios are largely parallel and should have a very
>> @@ -2428,6 +2431,7 @@ int open_ctree(struct super_block *sb,
>>  	ret |= btrfs_start_workers(&fs_info->caching_workers);
>>  	ret |= btrfs_start_workers(&fs_info->readahead_workers);
>>  	ret |= btrfs_start_workers(&fs_info->flush_workers);
>> +	ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
>>  	if (ret) {
>>  		err = -ENOMEM;
>>  		goto fail_sb_buffer;
>> @@ -2773,6 +2777,7 @@ fail_sb_buffer:
>>  	btrfs_stop_workers(&fs_info->delayed_workers);
>>  	btrfs_stop_workers(&fs_info->caching_workers);
>>  	btrfs_stop_workers(&fs_info->flush_workers);
>> +	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
>>  fail_alloc:
>>  fail_iput:
>>  	btrfs_mapping_tree_free(&fs_info->mapping_tree);
>> @@ -3463,6 +3468,7 @@ int close_ctree(struct btrfs_root *root)
>>  	btrfs_stop_workers(&fs_info->caching_workers);
>>  	btrfs_stop_workers(&fs_info->readahead_workers);
>>  	btrfs_stop_workers(&fs_info->flush_workers);
>> +	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
>>  
>>  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
>>  	if (btrfs_test_opt(root, CHECK_INTEGRITY))
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 898c572..17b53df 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -3693,12 +3693,10 @@ static long btrfs_ioctl_quota_ctl(struct file
*file, void __user *arg)
>>  		goto drop_write;
>>  	}
>>  
>> -	if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
>> -		trans = btrfs_start_transaction(root, 2);
>> -		if (IS_ERR(trans)) {
>> -			ret = PTR_ERR(trans);
>> -			goto out;
>> -		}
>> +	trans = btrfs_start_transaction(root, 2);
>> +	if (IS_ERR(trans)) {
>> +		ret = PTR_ERR(trans);
>> +		goto out;
>>  	}
>>  
>>  	switch (sa->cmd) {
>> @@ -3708,9 +3706,6 @@ static long btrfs_ioctl_quota_ctl(struct file
*file, void __user *arg)
>>  	case BTRFS_QUOTA_CTL_DISABLE:
>>  		ret = btrfs_quota_disable(trans, root->fs_info);
>>  		break;
>> -	case BTRFS_QUOTA_CTL_RESCAN:
>> -		ret = btrfs_quota_rescan(root->fs_info);
>> -		break;
>>  	default:
>>  		ret = -EINVAL;
>>  		break;
>> @@ -3719,11 +3714,9 @@ static long btrfs_ioctl_quota_ctl(struct file
*file, void __user *arg)
>>  	if (copy_to_user(arg, sa, sizeof(*sa)))
>>  		ret = -EFAULT;
>>  
>> -	if (trans) {
>> -		err = btrfs_commit_transaction(trans, root);
>> -		if (err && !ret)
>> -			ret = err;
>> -	}
>> +	err = btrfs_commit_transaction(trans, root);
>> +	if (err && !ret)
>> +		ret = err;
>>  out:
>>  	kfree(sa);
>>  drop_write:
>> @@ -3877,6 +3870,64 @@ drop_write:
>>  	return ret;
>>  }
>>  
>> +static long btrfs_ioctl_quota_rescan(struct file *file, void __user
*arg)
>> +{
>> +	struct btrfs_root *root =
BTRFS_I(fdentry(file)->d_inode)->root;
>> +	struct btrfs_ioctl_quota_rescan_args *qsa;
>> +	int ret;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EPERM;
>> +
>> +	ret = mnt_want_write_file(file);
>> +	if (ret)
>> +		return ret;
>> +
>> +	qsa = memdup_user(arg, sizeof(*qsa));
>> +	if (IS_ERR(qsa)) {
>> +		ret = PTR_ERR(qsa);
>> +		goto drop_write;
>> +	}
>> +
>> +	if (qsa->flags) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	ret = btrfs_qgroup_rescan(root->fs_info);
>> +
>> +out:
>> +	kfree(qsa);
>> +drop_write:
>> +	mnt_drop_write_file(file);
>> +	return ret;
>> +}
>> +
>> +static long btrfs_ioctl_quota_rescan_status(struct file *file, void
__user *arg)
>> +{
>> +	struct btrfs_root *root =
BTRFS_I(fdentry(file)->d_inode)->root;
>> +	struct btrfs_ioctl_quota_rescan_args *qsa;
>> +	int ret = 0;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EPERM;
>> +
>> +	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
>> +	if (!qsa)
>> +		return -ENOMEM;
>> +
>> +	if (root->fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +		qsa->flags = 1;
>> +		qsa->progress =
root->fs_info->qgroup_rescan_progress.objectid;
>> +	}
>> +
>> +	if (copy_to_user(arg, qsa, sizeof(*qsa)))
>> +		ret = -EFAULT;
>> +
>> +	kfree(qsa);
>> +	return ret;
>> +}
>> +
>>  static long btrfs_ioctl_set_received_subvol(struct file *file,
>>  					    void __user *arg)
>>  {
>> @@ -4115,6 +4166,10 @@ long btrfs_ioctl(struct file *file, unsigned int
>>  		return btrfs_ioctl_qgroup_create(file, argp);
>>  	case BTRFS_IOC_QGROUP_LIMIT:
>>  		return btrfs_ioctl_qgroup_limit(file, argp);
>> +	case BTRFS_IOC_QUOTA_RESCAN:
>> +		return btrfs_ioctl_quota_rescan(file, argp);
>> +	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
>> +		return btrfs_ioctl_quota_rescan_status(file, argp);
>>  	case BTRFS_IOC_DEV_REPLACE:
>>  		return btrfs_ioctl_dev_replace(root, argp);
>>  	case BTRFS_IOC_GET_FSLABEL:
>> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
>> index c38a0c5..bb081b5 100644
>> --- a/fs/btrfs/qgroup.c
>> +++ b/fs/btrfs/qgroup.c
>> @@ -31,13 +31,13 @@
>>  #include "locking.h"
>>  #include "ulist.h"
>>  #include "backref.h"
>> +#include "extent_io.h"
>>  
>>  /* TODO XXX FIXME
>>   *  - subvol delete -> delete when ref goes to 0? delete limits
also?
>>   *  - reorganize keys
>>   *  - compressed
>>   *  - sync
>> - *  - rescan
>>   *  - copy also limits on subvol creation
>>   *  - limit
>>   *  - caches fuer ulists
>> @@ -98,6 +98,14 @@ struct btrfs_qgroup_list {
>>  	struct btrfs_qgroup *member;
>>  };
>>  
>> +struct qgroup_rescan {
>> +	struct btrfs_work	work;
>> +	struct btrfs_fs_info	*fs_info;
>> +};
>> +
>> +static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
>> +				struct qgroup_rescan *qscan);
>> +
>>  /* must be called with qgroup_lock held */
>>  static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info
*fs_info,
>>  					   u64 qgroupid)
>> @@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info
*fs_info)
>>  			}
>>  			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
>>  									  ptr);
>> -			/* FIXME read scan element */
>> +			fs_info->qgroup_rescan_progress.objectid >> +				
btrfs_qgroup_status_rescan(l, ptr);
>> +			if (fs_info->qgroup_flags &
>> +			    BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +				struct qgroup_rescan *qscan >> +					kmalloc(sizeof(*qscan),
GFP_NOFS);
>> +				if (!qscan) {
>> +					ret = -ENOMEM;
>> +					goto out;
>> +				}
>> +				fs_info->qgroup_rescan_progress.type = 0;
>> +				fs_info->qgroup_rescan_progress.offset = 0;
>> +				qgroup_rescan_start(fs_info, qscan);
>> +			}
>>  			goto next1;
>>  		}
>>  
>> @@ -719,9 +740,12 @@ static int update_qgroup_status_item(struct
btrfs_trans_handle *trans,
>>  	l = path->nodes[0];
>>  	slot = path->slots[0];
>>  	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
>> +	spin_lock(&fs_info->qgroup_lock);
>>  	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
>>  	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
>> -	/* XXX scan */
>> +	btrfs_set_qgroup_status_rescan(l, ptr,
>> +				fs_info->qgroup_rescan_progress.objectid);
>> +	spin_unlock(&fs_info->qgroup_lock);
>>  
>>  	btrfs_mark_buffer_dirty(l);
>>  
>> @@ -830,7 +854,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle
*trans,
>>  	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
>>  				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>>  	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
>> -	btrfs_set_qgroup_status_scan(leaf, ptr, 0);
>> +	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>>  
>>  	btrfs_mark_buffer_dirty(leaf);
>>  
>> @@ -894,10 +918,11 @@ out:
>>  	return ret;
>>  }
>>  
>> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
>> +static void qgroup_dirty(struct btrfs_fs_info *fs_info,
>> +			 struct btrfs_qgroup *qgroup)
>>  {
>> -	/* FIXME */
>> -	return 0;
>> +	if (list_empty(&qgroup->dirty))
>> +		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>>  }
>>  
>>  int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
>> @@ -1045,13 +1070,6 @@ unlock:
>>  	return ret;
>>  }
>>  
>> -static void qgroup_dirty(struct btrfs_fs_info *fs_info,
>> -			 struct btrfs_qgroup *qgroup)
>> -{
>> -	if (list_empty(&qgroup->dirty))
>> -		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>> -}
>> -
>>  /*
>>   * btrfs_qgroup_record_ref is called when the ref is added or deleted.
it puts
>>   * the modification into a list that''s later used by
btrfs_end_transaction to
>> @@ -1256,6 +1274,15 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>>  		BUG();
>>  	}
>>  
>> +	mutex_lock(&fs_info->qgroup_rescan_lock);
>> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +		if (fs_info->qgroup_rescan_progress.objectid <=
node->bytenr) {
>> +			mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +			return 0;
>> +		}
>> +	}
>> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +
>>  	/*
>>  	 * the delayed ref sequence number we pass depends on the direction
of
>>  	 * the operation. for add operations, we pass (node->seq - 1) to
skip
>> @@ -1269,7 +1296,17 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>>  	if (ret < 0)
>>  		return ret;
>>  
>> +	mutex_lock(&fs_info->qgroup_rescan_lock);
>>  	spin_lock(&fs_info->qgroup_lock);
>> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +		if (fs_info->qgroup_rescan_progress.objectid <=
node->bytenr) {
>> +			ret = 0;
>> +			mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +			goto unlock;
>> +		}
>> +	}
>> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +
>>  	quota_root = fs_info->quota_root;
>>  	if (!quota_root)
>>  		goto unlock;
>> @@ -1652,3 +1689,233 @@ void assert_qgroups_uptodate(struct
btrfs_trans_handle *trans)
>>  		trans->delayed_ref_elem.seq);
>>  	BUG();
>>  }
>> +
>> +/*
>> + * returns < 0 on error, 0 when more leafs are to be scanned.
>> + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
>> + */
>> +static int
>> +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path
*path,
>> +		   struct btrfs_trans_handle *trans, struct ulist *tmp,
>> +		   struct extent_buffer *scratch_leaf)
>> +{
>> +	struct btrfs_key found;
>> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
>> +	struct ulist *roots = NULL;
>> +	struct ulist_node *unode;
>> +	struct ulist_iterator uiter;
>> +	struct seq_list tree_mod_seq_elem = {};
>> +	u64 seq;
>> +	int slot;
>> +	int ret;
>> +
>> +	path->leave_spinning = 1;
>> +	mutex_lock(&fs_info->qgroup_rescan_lock);
>> +	ret = btrfs_search_slot_for_read(fs_info->extent_root,
>> +					 &fs_info->qgroup_rescan_progress,
>> +					 path, 1, 0);
>> +
>> +	pr_debug("current progress key (%llu %u %llu), search_slot ret
%d\n",
>> +		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
>> +		 fs_info->qgroup_rescan_progress.type,
>> +		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
>> +		 ret);
>> +
>> +	if (ret) {
>> +		/*
>> +		 * The rescan is about to end, we will not be scanning any
>> +		 * further blocks. We cannot unset the RESCAN flag here, because
>> +		 * we want to commit the transaction if everything went well.
>> +		 * To make the live accounting work in this phase, we set our
>> +		 * scan progress pointer such that every real extent objectid
>> +		 * will be smaller.
>> +		 */
>> +		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
>> +		btrfs_release_path(path);
>> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +		return ret;
>> +	}
>> +
>> +	btrfs_item_key_to_cpu(path->nodes[0], &found,
>> +			      btrfs_header_nritems(path->nodes[0]) - 1);
>> +	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
>> +
>> +	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
>> +	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
>> +	slot = path->slots[0];
>> +	btrfs_release_path(path);
>> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +
>> +	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
>> +		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
>> +		if (found.type != BTRFS_EXTENT_ITEM_KEY)
>> +			continue;
>> +		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
>> +					   tree_mod_seq_elem.seq, &roots);
>> +		if (ret < 0)
>> +			break;
> 
> 
> 	I have sent the patch to issue the double free that use the function
> 	btrfs_find_all_roots(). So here if btrfs_find_all_roots() fails, please
> 	do not ulist_free(roots) more.
I don''t see where I''m calling ulist_free(roots) in the error
path. One of us is
missing something :-)
>> +		spin_lock(&fs_info->qgroup_lock);
>> +		seq = fs_info->qgroup_seq;
>> +		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
>> +
>> +		ulist_reinit(tmp);
>> +		ULIST_ITER_INIT(&uiter);
>> +		while ((unode = ulist_next(roots, &uiter))) {
>> +			struct btrfs_qgroup *qg;
>> +
>> +			qg = find_qgroup_rb(fs_info, unode->val);
>> +			if (!qg)
>> +				continue;
>> +
>> +			ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
> 
> 
> For this patch, you forget to add the check about ulist_add(), ulist_add()
may
> return -ENOMEM. In fact, i have sent the patch to fix this problem in
qgroup.c before.
> So you don''t need to change patch1, but you dose need to add the
check in the patch2.
Thanks for noticing, I''ll send a fix in a few days to leave room for
more comments.

-Jan
>> +		}
>> +
>> +		/* this is similar to step 2 of btrfs_qgroup_account_ref */
>> +		ULIST_ITER_INIT(&uiter);
>> +		while ((unode = ulist_next(tmp, &uiter))) {
>> +			struct btrfs_qgroup *qg;
>> +			struct btrfs_qgroup_list *glist;
>> +
>> +			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
>> +			qg->rfer += found.offset;
>> +			qg->rfer_cmpr += found.offset;
>> +			WARN_ON(qg->tag >= seq);
>> +			WARN_ON(qg->refcnt >= seq);
>> +			if (qg->refcnt < seq)
>> +				qg->refcnt = seq + 1;
>> +			else
>> +				qg->refcnt = qg->refcnt + 1;
>> +			qgroup_dirty(fs_info, qg);
>> +
>> +			list_for_each_entry(glist, &qg->groups, next_group) {
>> +				ulist_add(tmp, glist->group->qgroupid,
>> +					  (uintptr_t)glist->group,
>> +					  GFP_ATOMIC);
>> +			}
>> +		}
>> +
>> +		qgroup_account_ref_step3(fs_info, roots, tmp, seq, -1,
>> +					 found.offset);
>> +
>> +		spin_unlock(&fs_info->qgroup_lock);
>> +		ulist_free(roots);
>> +	}
>> +
>> +	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
>> +
>> +	return ret;
>> +}
>> +
>> +static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
>> +{
>> +	struct qgroup_rescan *qscan = container_of(work, struct
qgroup_rescan,
>> +						   work);
>> +	struct btrfs_path *path;
>> +	struct btrfs_trans_handle *trans = NULL;
>> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
>> +	struct ulist *tmp = NULL;
>> +	struct extent_buffer *scratch_leaf = NULL;
>> +	int err = -ENOMEM;
>> +
>> +	path = btrfs_alloc_path();
>> +	if (!path)
>> +		goto out;
>> +	tmp = ulist_alloc(GFP_NOFS);
>> +	if (!tmp)
>> +		goto out;
>> +	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
>> +	if (!scratch_leaf)
>> +		goto out;
>> +
>> +	err = 0;
>> +	while (!err) {
>> +		trans = btrfs_start_transaction(fs_info->fs_root, 0);
>> +		if (IS_ERR(trans)) {
>> +			err = PTR_ERR(trans);
>> +			break;
>> +		}
>> +		err = qgroup_rescan_leaf(qscan, path, trans, tmp, scratch_leaf);
>> +		if (err > 0)
>> +			btrfs_commit_transaction(trans, fs_info->fs_root);
>> +		else
>> +			btrfs_end_transaction(trans, fs_info->fs_root);
>> +	}
>> +
>> +out:
>> +	kfree(scratch_leaf);
>> +	ulist_free(tmp);
>> +	btrfs_free_path(path);
>> +	kfree(qscan);
>> +
>> +	mutex_lock(&fs_info->qgroup_rescan_lock);
>> +	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
>> +
>> +	if (err == 2 &&
>> +	    fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
>> +		fs_info->qgroup_flags &=
~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>> +	} else if (err < 0) {
>> +		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>> +	}
>> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +
>> +	if (err >= 0) {
>> +		pr_info("btrfs: qgroup scan completed%s\n",
>> +			err == 2 ? " (inconsistency flag cleared)" :
"");
>> +	} else {
>> +		pr_err("btrfs: qgroup scan failed with %d\n", err);
>> +	}
>> +}
>> +
>> +static void
>> +qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct
qgroup_rescan *qscan)
>> +{
>> +	qscan->work.func = btrfs_qgroup_rescan_worker;
>> +	qscan->fs_info = fs_info;
>> +
>> +	pr_info("btrfs: qgroup scan started\n");
>> +	btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
&qscan->work);
>> +}
>> +
>> +int
>> +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
>> +{
>> +	int ret = 0;
>> +	struct rb_node *n;
>> +	struct btrfs_qgroup *qgroup;
>> +	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
>> +
>> +	if (!qscan)
>> +		return -ENOMEM;
>> +
>> +	mutex_lock(&fs_info->qgroup_rescan_lock);
>> +	spin_lock(&fs_info->qgroup_lock);
>> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
>> +		ret = -EINPROGRESS;
>> +	else if (!(fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_ON))
>> +		ret = -EINVAL;
>> +	if (ret) {
>> +		spin_unlock(&fs_info->qgroup_lock);
>> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +		kfree(qscan);
>> +		return ret;
>> +	}
>> +
>> +	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
>> +	memset(&fs_info->qgroup_rescan_progress, 0,
>> +		sizeof(fs_info->qgroup_rescan_progress));
>> +
>> +	/* clear all current qgroup tracking information */
>> +	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
>> +		qgroup = rb_entry(n, struct btrfs_qgroup, node);
>> +		qgroup->rfer = 0;
>> +		qgroup->rfer_cmpr = 0;
>> +		qgroup->excl = 0;
>> +		qgroup->excl_cmpr = 0;
>> +	}
>> +	spin_unlock(&fs_info->qgroup_lock);
>> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +
>> +	qgroup_rescan_start(fs_info, qscan);
>> +
>> +	return 0;
>> +}
>> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
>> index fa3a5f9..ca70f08 100644
>> --- a/include/uapi/linux/btrfs.h
>> +++ b/include/uapi/linux/btrfs.h
>> @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
>>  
>>  #define BTRFS_QUOTA_CTL_ENABLE	1
>>  #define BTRFS_QUOTA_CTL_DISABLE	2
>> -#define BTRFS_QUOTA_CTL_RESCAN	3
>> +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
>>  struct btrfs_ioctl_quota_ctl_args {
>>  	__u64 cmd;
>>  	__u64 status;
>>  };
>>  
>> +struct btrfs_ioctl_quota_rescan_args {
>> +	__u64	flags;
>> +	__u64   progress;
>> +	__u64   reserved[6];
>> +};
>> +
>>  struct btrfs_ioctl_qgroup_assign_args {
>>  	__u64 assign;
>>  	__u64 src;
>> @@ -502,6 +508,10 @@ struct btrfs_ioctl_send_args {
>>  			       struct btrfs_ioctl_qgroup_create_args)
>>  #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
>>  			       struct btrfs_ioctl_qgroup_limit_args)
>> +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
>> +			       struct btrfs_ioctl_quota_rescan_args)
>> +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
>> +			       struct btrfs_ioctl_quota_rescan_args)
>>  #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
>>  				   char[BTRFS_LABEL_SIZE])
>>  #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 09:48 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Jan Schmidt 写道:

..[snip]..> 
> I don''t see where I''m calling ulist_free(roots) in the
error path. One of us is
> missing something :-)

Yeah, you are right. i read a more ''}''.. ^_^

Thanks,
Wang
> 
>>> +		spin_lock(&fs_info->qgroup_lock);
>>> +		seq = fs_info->qgroup_seq;
>>> +		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
>>> +
>>> +		ulist_reinit(tmp);
>>> +		ULIST_ITER_INIT(&uiter);
>>> +		while ((unode = ulist_next(roots, &uiter))) {
>>> +			struct btrfs_qgroup *qg;
>>> +
>>> +			qg = find_qgroup_rb(fs_info, unode->val);
>>> +			if (!qg)
>>> +				continue;
>>> +
>>> +			ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
>>
>> For this patch, you forget to add the check about ulist_add(),
ulist_add() may
>> return -ENOMEM. In fact, i have sent the patch to fix this problem in
qgroup.c before.
>> So you don''t need to change patch1, but you dose need to add
the check in the patch2.
> 
> Thanks for noticing, I''ll send a fix in a few days to leave room
for more comments.
> 
> -Jan
> 
>>> +		}
[snip]

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 09:56 UTC

head link

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Hello Jan,
> On Tue, April 16, 2013 at 11:20 (+0200), Wang Shilong wrote:
>> Hello Jan,
>>
>>> The function is separated into a preparation part and the three
accounting
>>> steps mentioned in the qgroups documentation. The goal is to make
steps two
>>> and three usable by the rescan functionality. A side effect is that
the
>>> function is restructured into readable subunits.
>>
>> How about renaming the three functions like:
>>
>> 1> qgroup_walk_old_roots()
>> 2> qgroup_walk_new_root()
>> 3> qgroup_rewalk_old_root()
>>
>> I''d like this function to be meaningful, but not just
step1,2,3.
>> Maybe you can think out better function name.
> 
> I''d like to keep it like 1, 2, 3, because that matches the
documentation in the
> qgroup pdf and the code has always been documented in those three steps.

Oh, Yes, i have read the pdf carefully. I think the pdf document it three steps
just to make it clear that we need 3 steps. But static checker may want to know
what is 3 steps
just by the function name but not to read the pdf.

In fact the tree steps are just do:
1>walk old roots
2>walk new root
3>rewalk old root

So i think rename the function like these will make things better. ^_^

Thanks,
Wang
> 
> Thanks,
> -Jan
> --
> To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 10:08 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Hello Jan,
 
>  	slot = path->slots[0];
>  	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
> +	spin_lock(&fs_info->qgroup_lock);

Why we need hold qgroup_lock here? would you please explain...

Thanks,
Wang
>  	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
>  	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
> -	/* XXX scan */
> +	btrfs_set_qgroup_status_rescan(l, ptr,
> +				fs_info->qgroup_rescan_progress.objectid);
> +	spin_unlock(&fs_info->qgroup_lock);
>  
>  	btrfs_mark_buffer_dirty(l);
>  
> @@ -830,7 +854,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle
*trans,
>  	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
>  				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
>  	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
> -	btrfs_set_qgroup_status_scan(leaf, ptr, 0);
> +	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>  
>  	btrfs_mark_buffer_dirty(leaf);
>  
> @@ -894,10 +918,11 @@ out:
>  	return ret;
>  }
>  
> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
> +static void qgroup_dirty(struct btrfs_fs_info *fs_info,
> +			 struct btrfs_qgroup *qgroup)
>  {
> -	/* FIXME */
> -	return 0;
> +	if (list_empty(&qgroup->dirty))
> +		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
>  }
>  
>  int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
> @@ -1045,13 +1070,6 @@ unlock:
>  	return ret;
>  }
>  
> -static void qgroup_dirty(struct btrfs_fs_info *fs_info,
> -			 struct btrfs_qgroup *qgroup)
> -{
> -	if (list_empty(&qgroup->dirty))
> -		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
> -}
> -
>  /*
>   * btrfs_qgroup_record_ref is called when the ref is added or deleted. it
puts
>   * the modification into a list that''s later used by
btrfs_end_transaction to
> @@ -1256,6 +1274,15 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>  		BUG();
>  	}
>  
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
> +			mutex_unlock(&fs_info->qgroup_rescan_lock);
> +			return 0;
> +		}
> +	}
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
>  	/*
>  	 * the delayed ref sequence number we pass depends on the direction of
>  	 * the operation. for add operations, we pass (node->seq - 1) to skip
> @@ -1269,7 +1296,17 @@ int btrfs_qgroup_account_ref(struct
btrfs_trans_handle *trans,
>  	if (ret < 0)
>  		return ret;
>  
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
>  	spin_lock(&fs_info->qgroup_lock);
> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
> +			ret = 0;
> +			mutex_unlock(&fs_info->qgroup_rescan_lock);
> +			goto unlock;
> +		}
> +	}
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
>  	quota_root = fs_info->quota_root;
>  	if (!quota_root)
>  		goto unlock;
> @@ -1652,3 +1689,233 @@ void assert_qgroups_uptodate(struct
btrfs_trans_handle *trans)
>  		trans->delayed_ref_elem.seq);
>  	BUG();
>  }
> +
> +/*
> + * returns < 0 on error, 0 when more leafs are to be scanned.
> + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
> + */
> +static int
> +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
> +		   struct btrfs_trans_handle *trans, struct ulist *tmp,
> +		   struct extent_buffer *scratch_leaf)
> +{
> +	struct btrfs_key found;
> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
> +	struct ulist *roots = NULL;
> +	struct ulist_node *unode;
> +	struct ulist_iterator uiter;
> +	struct seq_list tree_mod_seq_elem = {};
> +	u64 seq;
> +	int slot;
> +	int ret;
> +
> +	path->leave_spinning = 1;
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	ret = btrfs_search_slot_for_read(fs_info->extent_root,
> +					 &fs_info->qgroup_rescan_progress,
> +					 path, 1, 0);
> +
> +	pr_debug("current progress key (%llu %u %llu), search_slot ret
%d\n",
> +		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
> +		 fs_info->qgroup_rescan_progress.type,
> +		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
> +		 ret);
> +
> +	if (ret) {
> +		/*
> +		 * The rescan is about to end, we will not be scanning any
> +		 * further blocks. We cannot unset the RESCAN flag here, because
> +		 * we want to commit the transaction if everything went well.
> +		 * To make the live accounting work in this phase, we set our
> +		 * scan progress pointer such that every real extent objectid
> +		 * will be smaller.
> +		 */
> +		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
> +		btrfs_release_path(path);
> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
> +		return ret;
> +	}
> +
> +	btrfs_item_key_to_cpu(path->nodes[0], &found,
> +			      btrfs_header_nritems(path->nodes[0]) - 1);
> +	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
> +
> +	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> +	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
> +	slot = path->slots[0];
> +	btrfs_release_path(path);
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
> +		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
> +		if (found.type != BTRFS_EXTENT_ITEM_KEY)
> +			continue;
> +		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
> +					   tree_mod_seq_elem.seq, &roots);
> +		if (ret < 0)
> +			break;
> +		spin_lock(&fs_info->qgroup_lock);
> +		seq = fs_info->qgroup_seq;
> +		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
> +
> +		ulist_reinit(tmp);
> +		ULIST_ITER_INIT(&uiter);
> +		while ((unode = ulist_next(roots, &uiter))) {
> +			struct btrfs_qgroup *qg;
> +
> +			qg = find_qgroup_rb(fs_info, unode->val);
> +			if (!qg)
> +				continue;
> +
> +			ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
> +		}
> +
> +		/* this is similar to step 2 of btrfs_qgroup_account_ref */
> +		ULIST_ITER_INIT(&uiter);
> +		while ((unode = ulist_next(tmp, &uiter))) {
> +			struct btrfs_qgroup *qg;
> +			struct btrfs_qgroup_list *glist;
> +
> +			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
> +			qg->rfer += found.offset;
> +			qg->rfer_cmpr += found.offset;
> +			WARN_ON(qg->tag >= seq);
> +			WARN_ON(qg->refcnt >= seq);
> +			if (qg->refcnt < seq)
> +				qg->refcnt = seq + 1;
> +			else
> +				qg->refcnt = qg->refcnt + 1;
> +			qgroup_dirty(fs_info, qg);
> +
> +			list_for_each_entry(glist, &qg->groups, next_group) {
> +				ulist_add(tmp, glist->group->qgroupid,
> +					  (uintptr_t)glist->group,
> +					  GFP_ATOMIC);
> +			}
> +		}
> +
> +		qgroup_account_ref_step3(fs_info, roots, tmp, seq, -1,
> +					 found.offset);
> +
> +		spin_unlock(&fs_info->qgroup_lock);
> +		ulist_free(roots);
> +	}
> +
> +	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> +
> +	return ret;
> +}
> +
> +static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
> +{
> +	struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
> +						   work);
> +	struct btrfs_path *path;
> +	struct btrfs_trans_handle *trans = NULL;
> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
> +	struct ulist *tmp = NULL;
> +	struct extent_buffer *scratch_leaf = NULL;
> +	int err = -ENOMEM;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		goto out;
> +	tmp = ulist_alloc(GFP_NOFS);
> +	if (!tmp)
> +		goto out;
> +	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
> +	if (!scratch_leaf)
> +		goto out;
> +
> +	err = 0;
> +	while (!err) {
> +		trans = btrfs_start_transaction(fs_info->fs_root, 0);
> +		if (IS_ERR(trans)) {
> +			err = PTR_ERR(trans);
> +			break;
> +		}
> +		err = qgroup_rescan_leaf(qscan, path, trans, tmp, scratch_leaf);
> +		if (err > 0)
> +			btrfs_commit_transaction(trans, fs_info->fs_root);
> +		else
> +			btrfs_end_transaction(trans, fs_info->fs_root);
> +	}
> +
> +out:
> +	kfree(scratch_leaf);
> +	ulist_free(tmp);
> +	btrfs_free_path(path);
> +	kfree(qscan);
> +
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> +
> +	if (err == 2 &&
> +	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
{
> +		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> +	} else if (err < 0) {
> +		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> +	}
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	if (err >= 0) {
> +		pr_info("btrfs: qgroup scan completed%s\n",
> +			err == 2 ? " (inconsistency flag cleared)" : "");
> +	} else {
> +		pr_err("btrfs: qgroup scan failed with %d\n", err);
> +	}
> +}
> +
> +static void
> +qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan
*qscan)
> +{
> +	qscan->work.func = btrfs_qgroup_rescan_worker;
> +	qscan->fs_info = fs_info;
> +
> +	pr_info("btrfs: qgroup scan started\n");
> +	btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
&qscan->work);
> +}
> +
> +int
> +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
> +{
> +	int ret = 0;
> +	struct rb_node *n;
> +	struct btrfs_qgroup *qgroup;
> +	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
> +
> +	if (!qscan)
> +		return -ENOMEM;
> +
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	spin_lock(&fs_info->qgroup_lock);
> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
> +		ret = -EINPROGRESS;
> +	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
> +		ret = -EINVAL;
> +	if (ret) {
> +		spin_unlock(&fs_info->qgroup_lock);
> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
> +		kfree(qscan);
> +		return ret;
> +	}
> +
> +	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> +	memset(&fs_info->qgroup_rescan_progress, 0,
> +		sizeof(fs_info->qgroup_rescan_progress));
> +
> +	/* clear all current qgroup tracking information */
> +	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
> +		qgroup = rb_entry(n, struct btrfs_qgroup, node);
> +		qgroup->rfer = 0;
> +		qgroup->rfer_cmpr = 0;
> +		qgroup->excl = 0;
> +		qgroup->excl_cmpr = 0;
> +	}
> +	spin_unlock(&fs_info->qgroup_lock);
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	qgroup_rescan_start(fs_info, qscan);
> +
> +	return 0;
> +}
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index fa3a5f9..ca70f08 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
>  
>  #define BTRFS_QUOTA_CTL_ENABLE	1
>  #define BTRFS_QUOTA_CTL_DISABLE	2
> -#define BTRFS_QUOTA_CTL_RESCAN	3
> +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
>  struct btrfs_ioctl_quota_ctl_args {
>  	__u64 cmd;
>  	__u64 status;
>  };
>  
> +struct btrfs_ioctl_quota_rescan_args {
> +	__u64	flags;
> +	__u64   progress;
> +	__u64   reserved[6];
> +};
> +
>  struct btrfs_ioctl_qgroup_assign_args {
>  	__u64 assign;
>  	__u64 src;
> @@ -502,6 +508,10 @@ struct btrfs_ioctl_send_args {
>  			       struct btrfs_ioctl_qgroup_create_args)
>  #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
>  			       struct btrfs_ioctl_qgroup_limit_args)
> +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
> +			       struct btrfs_ioctl_quota_rescan_args)
> +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
> +			       struct btrfs_ioctl_quota_rescan_args)
>  #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
>  				   char[BTRFS_LABEL_SIZE])
>  #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

David Sterba

2013-Apr-16 10:32 UTC

head link

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

On Tue, Apr 16, 2013 at 05:56:05PM +0800, Wang Shilong
wrote:> But static checker may want to know what is 3 steps
> just by the function name but not to read the pdf.
I''m curious what static checker you mean and how a function name helps
there.

thanks,
david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jan Schmidt

2013-Apr-16 10:34 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

On Tue, April 16, 2013 at 12:08 (+0200), Wang Shilong
wrote:> Hello Jan,
>  
> 
>>  	slot = path->slots[0];
>>  	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
>> +	spin_lock(&fs_info->qgroup_lock);
> 
> 
> Why we need hold qgroup_lock here? would you please explain...
It would have been easier for me if you had left the relevant context in there,
but I finally found it.

Thinking again about it, as update_qgroup_status_item is only called from
transaction commit context, we can do without a spinlock here. I meant to
protect fs_info->qgroup_flags and fs_info->qgroup_rescan_progress, but it
seems
not required.

Thanks,
-Jan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 10:47 UTC

head link

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Hi David,
> On Tue, Apr 16, 2013 at 05:56:05PM +0800, Wang Shilong wrote:
>> But static checker may want to know what is 3 steps
>> just by the function name but not to read the pdf.
> 
> I''m curious what static checker you mean and how a function name
helps
> there.

I mean that other developers will get more information by the function name.
In fact, the tree steps they do more things.So maybe
the function name of mine is not good enough.
But i really hope that a meaningful function name gives more information. I
don''t
insist on this anyway ^_^

Thanks,
Wang
> 
> thanks,
> david
> --
> To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

David Sterba

2013-Apr-16 10:52 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

On Tue, Apr 16, 2013 at 10:45:19AM +0200, Jan Schmidt
wrote:> +static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user
*arg)
> +{
> +	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
> +	struct btrfs_ioctl_quota_rescan_args *qsa;
> +	int ret = 0;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
The reserved field increased the size of qsa to 64 bytes, thinking about
it again, an early ENOMEM is a good indicator that the system is unable
to get memory so starting a bigger operation does not make much sense
anyway. Keep it as it is.
> +	if (!qsa)
> +		return -ENOMEM;
> +
> +	if (root->fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +		qsa->flags = 1;
> +		qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
> +	}
> +
> +	if (copy_to_user(arg, qsa, sizeof(*qsa)))
> +		ret = -EFAULT;
> +
> +	kfree(qsa);
> +	return ret;
> +}
> +
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
>  
>  #define BTRFS_QUOTA_CTL_ENABLE	1
>  #define BTRFS_QUOTA_CTL_DISABLE	2
> -#define BTRFS_QUOTA_CTL_RESCAN	3
> +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
Looks ok.

david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

David Sterba

2013-Apr-16 10:59 UTC

head link

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

On Tue, Apr 16, 2013 at 06:47:15PM +0800, Wang Shilong
wrote:> I mean that other developers will get more information by the function
name.
I don''t dare to suggest putting a comment before the functions

david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 12:22 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Hello Jan, more comments below..

[...snip..]
>  
> +
> +static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user
*arg)
> +{
> +	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
> +	struct btrfs_ioctl_quota_rescan_args *qsa;
> +	int ret = 0;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
> +	if (!qsa)
> +		return -ENOMEM;
> +
	Here, i think we should hold qgroup_rescan_lock and group_lock:

	1> qgroup_rescan protect BTRFS_QGROUP_STATUS_RESCAN  
	2>quota disabling may happen this time..so group_lock should also be held
here.

> +	if (root->fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> +		qsa->flags = 1;
> +		qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
> +	}
> +
> +	if (copy_to_user(arg, qsa, sizeof(*qsa)))
> +		ret = -EFAULT;
> +
> +	kfree(qsa);
> +	return ret;
> +}
> +
>  
[….snip...]> 
> +
> +/*
> + * returns < 0 on error, 0 when more leafs are to be scanned.
> + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
> + */
> +static int
> +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
> +		   struct btrfs_trans_handle *trans, struct ulist *tmp,
> +		   struct extent_buffer *scratch_leaf)
> +{
> +	struct btrfs_key found;
> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
> +	struct ulist *roots = NULL;
> +	struct ulist_node *unode;
> +	struct ulist_iterator uiter;
> +	struct seq_list tree_mod_seq_elem = {};
> +	u64 seq;
> +	int slot;
> +	int ret;
> +
> +	path->leave_spinning = 1;
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
Here in qgroup_rescan_leaf(), we don''t need hold group_rescan_lock.
Because qgroup_rescan_lock is used to protect qgroup_flag, in
group_rescan_leaf().
we don''t change qgroup_flag.. So we don''t need hold the
group_rescan_lock.

Maybe we can just remove the lock qgroup_rescan_lock,  and i think what
qgroup_rscan_lock
does that qgroup_lock can replace.

> +	ret = btrfs_search_slot_for_read(fs_info->extent_root,
> +					 &fs_info->qgroup_rescan_progress,
> +					 path, 1, 0);
> +
> +	pr_debug("current progress key (%llu %u %llu), search_slot ret
%d\n",
> +		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
> +		 fs_info->qgroup_rescan_progress.type,
> +		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
> +		 ret);
> +
> +	if (ret) {
> +		/*
> +		 * The rescan is about to end, we will not be scanning any
> +		 * further blocks. We cannot unset the RESCAN flag here, because
> +		 * we want to commit the transaction if everything went well.
> +		 * To make the live accounting work in this phase, we set our
> +		 * scan progress pointer such that every real extent objectid
> +		 * will be smaller.
> +		 */
> +		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
> +		btrfs_release_path(path);
> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
> +		return ret;
> +	}
> +
> +	btrfs_item_key_to_cpu(path->nodes[0], &found,
> +			      btrfs_header_nritems(path->nodes[0]) - 1);
> +	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
> +
> +	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> +	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
> +	slot = path->slots[0];
> +	btrfs_release_path(path);
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
> +		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
> +		if (found.type != BTRFS_EXTENT_ITEM_KEY)
> +			continue;
> +		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
> +					   tree_mod_seq_elem.seq, &roots);
> +		if (ret < 0)
> +			break;
> +		spin_lock(&fs_info->qgroup_lock);
Quota may has been disabled now, so please adds the check, otherwise
we may get a NULL pointer panic here.


Thanks,
Wang> +		seq = fs_info->qgroup_seq;
> +		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
> +
> +		ulist_reinit(tmp);
> +		ULIST_ITER_INIT(&uiter);
> +		while ((unode = ulist_next(roots, &uiter))) {
> +			struct btrfs_qgroup *qg;
> +
> +			qg = find_qgroup_rb(fs_info, unode->val);
> +			if (!qg)
> +				continue;
> +
> +			ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
> +		}
> +
> +		/* this is similar to step 2 of btrfs_qgroup_account_ref */
> +		ULIST_ITER_INIT(&uiter);
> +		while ((unode = ulist_next(tmp, &uiter))) {
> +			struct btrfs_qgroup *qg;
> +			struct btrfs_qgroup_list *glist;
> +
> +			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
> +			qg->rfer += found.offset;
> +			qg->rfer_cmpr += found.offset;
> +			WARN_ON(qg->tag >= seq);
> +			WARN_ON(qg->refcnt >= seq);
> +			if (qg->refcnt < seq)
> +				qg->refcnt = seq + 1;
> +			else
> +				qg->refcnt = qg->refcnt + 1;
> +			qgroup_dirty(fs_info, qg);
> +
> +			list_for_each_entry(glist, &qg->groups, next_group) {
> +				ulist_add(tmp, glist->group->qgroupid,
> +					  (uintptr_t)glist->group,
> +					  GFP_ATOMIC);
> +			}
> +		}
> +
> +		qgroup_account_ref_step3(fs_info, roots, tmp, seq, -1,
> +					 found.offset);
> +
> +		spin_unlock(&fs_info->qgroup_lock);
> +		ulist_free(roots);
> +	}
> +
> +	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> +
> +	return ret;
> +}
> +
> +static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
> +{
> +	struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
> +						   work);
> +	struct btrfs_path *path;
> +	struct btrfs_trans_handle *trans = NULL;
> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
> +	struct ulist *tmp = NULL;
> +	struct extent_buffer *scratch_leaf = NULL;
> +	int err = -ENOMEM;
> +
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		goto out;
> +	tmp = ulist_alloc(GFP_NOFS);
> +	if (!tmp)
> +		goto out;
> +	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
> +	if (!scratch_leaf)
> +		goto out;
> +
> +	err = 0;
> +	while (!err) {
> +		trans = btrfs_start_transaction(fs_info->fs_root, 0);
> +		if (IS_ERR(trans)) {
> +			err = PTR_ERR(trans);
> +			break;
> +		}
> +		err = qgroup_rescan_leaf(qscan, path, trans, tmp, scratch_leaf);
> +		if (err > 0)
> +			btrfs_commit_transaction(trans, fs_info->fs_root);
> +		else
> +			btrfs_end_transaction(trans, fs_info->fs_root);
> +	}
> +
> +out:
> +	kfree(scratch_leaf);
> +	ulist_free(tmp);
> +	btrfs_free_path(path);
> +	kfree(qscan);
> +
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> +
> +	if (err == 2 &&
> +	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
{
> +		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> +	} else if (err < 0) {
> +		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> +	}
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	if (err >= 0) {
> +		pr_info("btrfs: qgroup scan completed%s\n",
> +			err == 2 ? " (inconsistency flag cleared)" : "");
> +	} else {
> +		pr_err("btrfs: qgroup scan failed with %d\n", err);
> +	}
> +}
> +
> +static void
> +qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan
*qscan)
> +{
> +	qscan->work.func = btrfs_qgroup_rescan_worker;
> +	qscan->fs_info = fs_info;
> +
> +	pr_info("btrfs: qgroup scan started\n");
> +	btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
&qscan->work);
> +}
> +
> +int
> +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
> +{
> +	int ret = 0;
> +	struct rb_node *n;
> +	struct btrfs_qgroup *qgroup;
> +	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
> +
> +	if (!qscan)
> +		return -ENOMEM;
> +
> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> +	spin_lock(&fs_info->qgroup_lock);
> +	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
> +		ret = -EINPROGRESS;
> +	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
> +		ret = -EINVAL;
> +	if (ret) {
> +		spin_unlock(&fs_info->qgroup_lock);
> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
> +		kfree(qscan);
> +		return ret;
> +	}
> +
> +	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> +	memset(&fs_info->qgroup_rescan_progress, 0,
> +		sizeof(fs_info->qgroup_rescan_progress));
> +
> +	/* clear all current qgroup tracking information */
> +	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
> +		qgroup = rb_entry(n, struct btrfs_qgroup, node);
> +		qgroup->rfer = 0;
> +		qgroup->rfer_cmpr = 0;
> +		qgroup->excl = 0;
> +		qgroup->excl_cmpr = 0;
> +	}
> +	spin_unlock(&fs_info->qgroup_lock);
> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> +	qgroup_rescan_start(fs_info, qscan);
> +
> +	return 0;
> +}
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index fa3a5f9..ca70f08 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
> 
> #define BTRFS_QUOTA_CTL_ENABLE	1
> #define BTRFS_QUOTA_CTL_DISABLE	2
> -#define BTRFS_QUOTA_CTL_RESCAN	3
> +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
> struct btrfs_ioctl_quota_ctl_args {
> 	__u64 cmd;
> 	__u64 status;
> };
> 
> +struct btrfs_ioctl_quota_rescan_args {
> +	__u64	flags;
> +	__u64   progress;
> +	__u64   reserved[6];
> +};
> +
> struct btrfs_ioctl_qgroup_assign_args {
> 	__u64 assign;
> 	__u64 src;
> @@ -502,6 +508,10 @@ struct btrfs_ioctl_send_args {
> 			       struct btrfs_ioctl_qgroup_create_args)
> #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
> 			       struct btrfs_ioctl_qgroup_limit_args)
> +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
> +			       struct btrfs_ioctl_quota_rescan_args)
> +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
> +			       struct btrfs_ioctl_quota_rescan_args)
> #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
> 				   char[BTRFS_LABEL_SIZE])
> #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Wang Shilong

2013-Apr-16 12:55 UTC

head link

Re: [PATCH v2 3/3] Btrfs: automatic rescan after "quota enable" command

Hi Chris,

	I'' ve put my efforts in btrfs quota for a period of time. and i send a
bunch of patches
about btrfs quota.
	If you pull Jan''s qgroup rescan patches, i''d like that you
pull my this patch firstly:
	https://patchwork.kernel.org/patch/2402871/
	Jan''s group rescan patch relies on this patch.

Besides, i make a patch-set  to fix a btrfs quota''s race condition:
	https://patchwork.kernel.org/patch/2402901/
	https://patchwork.kernel.org/patch/2402911/
	https://patchwork.kernel.org/patch/2402881/
	https://patchwork.kernel.org/patch/2402891/
	https://patchwork.kernel.org/patch/2402921/

Some bug fixes about btrfs quota:
	https://patchwork.kernel.org/patch/2356111/	
	https://patchwork.kernel.org/patch/2402941/
	https://patchwork.kernel.org/patch/2407571/
	https://patchwork.kernel.org/patch/2420731/	
	https://patchwork.kernel.org/patch/2426721/
	https://patchwork.kernel.org/patch/2445051/
	https://patchwork.kernel.org/patch/2368341/
	https://patchwork.kernel.org/patch/2368291/
	

This patch improve performances of  ulist that btrfs quota ,send
relies on this:
	https://patchwork.kernel.org/patch/2435001/

some minor cleanups:
	https://patchwork.kernel.org/patch/2441951/ 
	https://patchwork.kernel.org/patch/2444521/
	https://patchwork.kernel.org/patch/2448741/


These patches are mainly related to btrfs quota. and i have sent them
to btrfs list  reviewed by the people for a period of time. Thanks to Miao
Xie''s help
to my efforts in btrfs. And Arne Jasen, Jan schdmit , David really help review
my patch, many thanks.

Every efforts i have made is to make btrfs quota works well. After you pull
my patches and Jan''s qgroup rescan patch, i will have a deep look at
codes, and
test Jan''s patch.

I am really newbie, and usually  makes a lot of mistakes.. forgive me if i do
something wrong….


Thanks,
Wang
> 
> Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
> ---
> fs/btrfs/qgroup.c |   10 ++++++++++
> 1 files changed, 10 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
> index bb081b5..0ea2c3e 100644
> --- a/fs/btrfs/qgroup.c
> +++ b/fs/btrfs/qgroup.c
> @@ -1356,10 +1356,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle
*trans,
> {
> 	struct btrfs_root *quota_root = fs_info->quota_root;
> 	int ret = 0;
> +	int start_rescan_worker = 0;
> 
> 	if (!quota_root)
> 		goto out;
> 
> +	if (!fs_info->quota_enabled &&
fs_info->pending_quota_state)
> +		start_rescan_worker = 1;
> +
> 	fs_info->quota_enabled = fs_info->pending_quota_state;
> 
> 	spin_lock(&fs_info->qgroup_lock);
> @@ -1385,6 +1389,12 @@ int btrfs_run_qgroups(struct btrfs_trans_handle
*trans,
> 	if (ret)
> 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> 
> +	if (start_rescan_worker) {
> +		ret = btrfs_qgroup_rescan(fs_info);
> +		if (ret)
> +			pr_err("btrfs: start rescan quota failed: %d\n", ret);
> +	}
> +
> out:
> 
> 	return ret;
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe
linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jan Schmidt

2013-Apr-17 15:20 UTC

head link

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

On Tue, April 16, 2013 at 14:22 (+0200), Wang Shilong
wrote:> 
> Hello Jan, more comments below..
> 
> [...snip..]
> 
>>  
>> +
>> +static long btrfs_ioctl_quota_rescan_status(struct file *file, void
__user *arg)
>> +{
>> +	struct btrfs_root *root =
BTRFS_I(fdentry(file)->d_inode)->root;
>> +	struct btrfs_ioctl_quota_rescan_args *qsa;
>> +	int ret = 0;
>> +
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		return -EPERM;
>> +
>> +	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
>> +	if (!qsa)
>> +		return -ENOMEM;
>> +
> 
> 	Here, i think we should hold qgroup_rescan_lock and group_lock:
> 
> 	1> qgroup_rescan protect BTRFS_QGROUP_STATUS_RESCAN  
> 	2>quota disabling may happen this time..so group_lock should also be
held here.
It''s just a status call for user space, I don''t really care
about exact
synchronization here. *If* we wanted to do that, I would have moved the code
into qgroup.c, because all the code that requires any qgroup locks is there. But
I''d really want to keep it simple. You cannot get completely garbage
information
that way, you only could race with someone just starting off or finishing a
rescan operation. I don''t think that really matters in the end.
> 
> 
>> +	if (root->fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
>> +		qsa->flags = 1;
>> +		qsa->progress =
root->fs_info->qgroup_rescan_progress.objectid;
>> +	}
>> +
>> +	if (copy_to_user(arg, qsa, sizeof(*qsa)))
>> +		ret = -EFAULT;
>> +
>> +	kfree(qsa);
>> +	return ret;
>> +}
>> +
>>  
> [….snip...]
>>
>> +
>> +/*
>> + * returns < 0 on error, 0 when more leafs are to be scanned.
>> + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
>> + */
>> +static int
>> +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path
*path,
>> +		   struct btrfs_trans_handle *trans, struct ulist *tmp,
>> +		   struct extent_buffer *scratch_leaf)
>> +{
>> +	struct btrfs_key found;
>> +	struct btrfs_fs_info *fs_info = qscan->fs_info;
>> +	struct ulist *roots = NULL;
>> +	struct ulist_node *unode;
>> +	struct ulist_iterator uiter;
>> +	struct seq_list tree_mod_seq_elem = {};
>> +	u64 seq;
>> +	int slot;
>> +	int ret;
>> +
>> +	path->leave_spinning = 1;
>> +	mutex_lock(&fs_info->qgroup_rescan_lock);
> 
> Here in qgroup_rescan_leaf(), we don''t need hold
group_rescan_lock.
> Because qgroup_rescan_lock is used to protect qgroup_flag, in
group_rescan_leaf().
> we don''t change qgroup_flag.. So we don''t need hold the
group_rescan_lock.
> 
> Maybe we can just remove the lock qgroup_rescan_lock,  and i think what
qgroup_rscan_lock
> does that qgroup_lock can replace.
No, we cannot. We need the mutex for the following tree search and tie it to the
following update of the qgroup_rescan_progress. In fact, that''s the
only reason
I introduced it, but I don''t want to hold a spin lock for a whole tree
search.
If we do not make sure the search operation and the progress update happen under
the same lock, we can end up with a tree block being found by thread A, then
thread B checks the rescan_progress, then thread A updates the rescan_progress
according to the found block and doing the rescan. That would result in wrong
tracking information.
> 
> 
>> +	ret = btrfs_search_slot_for_read(fs_info->extent_root,
>> +					 &fs_info->qgroup_rescan_progress,
>> +					 path, 1, 0);
>> +
>> +	pr_debug("current progress key (%llu %u %llu), search_slot ret
%d\n",
>> +		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
>> +		 fs_info->qgroup_rescan_progress.type,
>> +		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
>> +		 ret);
>> +
>> +	if (ret) {
>> +		/*
>> +		 * The rescan is about to end, we will not be scanning any
>> +		 * further blocks. We cannot unset the RESCAN flag here, because
>> +		 * we want to commit the transaction if everything went well.
>> +		 * To make the live accounting work in this phase, we set our
>> +		 * scan progress pointer such that every real extent objectid
>> +		 * will be smaller.
>> +		 */
>> +		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
>> +		btrfs_release_path(path);
>> +		mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +		return ret;
>> +	}
>> +
>> +	btrfs_item_key_to_cpu(path->nodes[0], &found,
>> +			      btrfs_header_nritems(path->nodes[0]) - 1);
>> +	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
>> +
>> +	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
>> +	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
>> +	slot = path->slots[0];
>> +	btrfs_release_path(path);
>> +	mutex_unlock(&fs_info->qgroup_rescan_lock);
>> +
>> +	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
>> +		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
>> +		if (found.type != BTRFS_EXTENT_ITEM_KEY)
>> +			continue;
>> +		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
>> +					   tree_mod_seq_elem.seq, &roots);
>> +		if (ret < 0)
>> +			break;
>> +		spin_lock(&fs_info->qgroup_lock);
> 
> Quota may has been disabled now, so please adds the check, otherwise
> we may get a NULL pointer panic here.
Quota cannot be disabled without a transaction commit, and we''re
holding a
transaction open here.

What is in fact missing is stopping the rescan worker thread when quota is
disabled, though. I''ll put that into v3.

Thanks,
-Jan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Btrfs devel - Apr 2013 - [PATCH v2 0/3] Btrfs: quota rescan for 3.10

[PATCH v2 0/3] Btrfs: quota rescan for 3.10

[PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

[PATCH v2 2/3] Btrfs: rescan for qgroups

[PATCH v2 3/3] Btrfs: automatic rescan after "quota enable" command

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Re: [PATCH v2 1/3] Btrfs: split btrfs_qgroup_account_ref into four functions

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups

Re: [PATCH v2 3/3] Btrfs: automatic rescan after "quota enable" command

Re: [PATCH v2 2/3] Btrfs: rescan for qgroups