Pull out the actual free space check, so setting an inode''s initial
space_info is done by the caller instead of a wonky goto.
Signed-off-by: Sage Weil <sage@newdream.net>
---
fs/btrfs/extent-tree.c | 81 +++++++++++++++++++++++++++++------------------
1 files changed, 50 insertions(+), 31 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 63d86ae..cb50944 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2830,25 +2830,33 @@ alloc:
return 0;
}
-/*
- * This will check the space that the inode allocates from to make sure we have
- * enough space for bytes.
- */
-int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes)
+static int __alloc_chunk(struct btrfs_root *root, u64 bytes)
{
- struct btrfs_space_info *data_sinfo;
- int ret = 0, committed = 0;
+ int ret;
+ u64 alloc_target;
+ struct btrfs_trans_handle *trans;
- /* make sure bytes are sectorsize aligned */
- bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize -
1);
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
- data_sinfo = BTRFS_I(inode)->space_info;
- if (!data_sinfo)
- goto alloc;
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ bytes + 2 * 1024 * 1024,
+ alloc_target, 0);
+ btrfs_end_transaction(trans, root);
+ return ret;
+}
+
+int btrfs_check_data_free_space_info(struct btrfs_root *root,
+ struct btrfs_space_info *data_sinfo,
+ u64 bytes,
+ struct inode *inode)
+{
+ int ret = 0, committed = 0;
-again:
/* make sure we have enough space to handle the data first */
+again:
spin_lock(&data_sinfo->lock);
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
@@ -2861,27 +2869,11 @@ again:
* to alloc a new chunk.
*/
if (!data_sinfo->full) {
- u64 alloc_target;
-
data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);
-alloc:
- alloc_target = btrfs_get_alloc_profile(root, 1);
- trans = btrfs_start_transaction(root, 1);
- if (!trans)
- return -ENOMEM;
-
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- bytes + 2 * 1024 * 1024,
- alloc_target, 0);
- btrfs_end_transaction(trans, root);
+ ret = __alloc_chunk(root, bytes);
if (ret)
return ret;
-
- if (!data_sinfo) {
- btrfs_set_inode_space_info(root, inode);
- data_sinfo = BTRFS_I(inode)->space_info;
- }
goto again;
}
spin_unlock(&data_sinfo->lock);
@@ -2914,7 +2906,34 @@ alloc:
data_sinfo->bytes_may_use += bytes;
BTRFS_I(inode)->reserved_bytes += bytes;
spin_unlock(&data_sinfo->lock);
+ return 0;
+}
+/*
+ * This will check the space that the inode allocates from to make sure we have
+ * enough space for bytes.
+ */
+int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+ int ret;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize -
1);
+
+ data_sinfo = BTRFS_I(inode)->space_info;
+ if (!data_sinfo) {
+ ret = __alloc_chunk(root, bytes);
+ if (ret)
+ return ret;
+ btrfs_set_inode_space_info(root, inode);
+ data_sinfo = BTRFS_I(inode)->space_info;
+ }
+
+ ret = btrfs_check_data_free_space_info(root, data_sinfo, bytes, inode);
+ if (ret)
+ return ret;
return btrfs_check_metadata_free_space(root);
}
--
1.5.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Sage Weil
2009-Sep-25 21:10 UTC
[PATCH 2/2] Btrfs: add TRANS_RESV_START ioctl to check/reserve free space on transaction start
The current TRANS_START ioctl is problematic when the volume fills up
because a process can get ENOSPC on any operation that''s supoosed to be
"contained" within its transaction without any prior warning. This
defines a new ioctl, TRANS_RESV_START, that checks and sets aside space
for the entire transaction, so that the process will get ENOSPC right
away.
The bytes_ioctl_trans_reserved is only checked and adjusted by the
TRANS_RESV_START ioctl; any write()s that follow will be allowed to use
up that reserved space. This is clearly imperfect (a mixture of an
ioctl transaction workload and a regular workload will violate the
reservations), but unavoidable with the current user transaction approach
because we don''t know whether any given operation is contained by a
user transaction or not.
The ''ops'' field isn''t used yet. Its intended to set
a bound on the
number of transaction operations and thus btree modifications, for when
the metadata space reservation gets smarter.
Signed-off-by: Sage Weil <sage@newdream.net>
---
fs/btrfs/ctree.h | 7 ++++
fs/btrfs/extent-tree.c | 57 ++++++++++++++++++++++++++++--
fs/btrfs/ioctl.c | 92 +++++++++++++++++++++++++++++++++++++++++++----
fs/btrfs/ioctl.h | 6 +++
4 files changed, 151 insertions(+), 11 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80599b4..62e00df 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -683,6 +683,9 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for
delalloc */
+ u64 bytes_ioctl_trans_reserved; /* number of bytes reserved by ioctl
+ transactions */
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
@@ -2025,8 +2028,12 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info
*info);
int btrfs_check_metadata_free_space(struct btrfs_root *root);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
+int btrfs_check_ioctl_trans_free_space(struct btrfs_root *root,
+ u64 bytes, u64 ops);
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes);
+void btrfs_free_reserved_ioctl_trans_space(struct btrfs_root *root,
+ u64 bytes, u64 ops);
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cb50944..b626ee2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2668,6 +2668,7 @@ static int update_space_info(struct btrfs_fs_info *info,
u64 flags,
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_delalloc = 0;
+ found->bytes_ioctl_trans_reserved = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
@@ -2848,6 +2849,14 @@ static int __alloc_chunk(struct btrfs_root *root, u64
bytes)
return ret;
}
+/*
+ * if @inode is defined, reserve bytes on that inode. otherwise, we
+ * are reserving the space for an ioctl transaction, and need to
+ * subtract off other ioctl reserved space too. we _only_ check ioctl
+ * reserved space here: the write() that follows needs to be able to
+ * use it, and we don''t know which writes "belong" to which
ioctl
+ * transactions.
+ */
int btrfs_check_data_free_space_info(struct btrfs_root *root,
struct btrfs_space_info *data_sinfo,
u64 bytes,
@@ -2861,6 +2870,7 @@ again:
if (data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
+ (inode ? 0 : data_sinfo->bytes_ioctl_trans_reserved) -
data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
struct btrfs_trans_handle *trans;
@@ -2879,7 +2889,8 @@ again:
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
- if (!committed && !root->fs_info->open_ioctl_trans) {
+ if (!committed &&
+ (!inode || !root->fs_info->open_ioctl_trans)) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
if (!trans)
@@ -2903,8 +2914,12 @@ again:
(unsigned long long)data_sinfo->total_bytes);
return -ENOSPC;
}
- data_sinfo->bytes_may_use += bytes;
- BTRFS_I(inode)->reserved_bytes += bytes;
+ if (inode) {
+ data_sinfo->bytes_may_use += bytes;
+ BTRFS_I(inode)->reserved_bytes += bytes;
+ } else {
+ data_sinfo->bytes_ioctl_trans_reserved += bytes;
+ }
spin_unlock(&data_sinfo->lock);
return 0;
}
@@ -2937,6 +2952,42 @@ int btrfs_check_data_free_space(struct btrfs_root *root,
struct inode *inode,
return btrfs_check_metadata_free_space(root);
}
+int btrfs_check_ioctl_trans_free_space(struct btrfs_root *root, u64 bytes,
+ u64 ops)
+{
+ struct btrfs_space_info *data_sinfo;
+ u64 alloc_target;
+ int ret;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize -
1);
+
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ data_sinfo = __find_space_info(root->fs_info, alloc_target);
+ ret = btrfs_check_data_free_space_info(root, data_sinfo, bytes, NULL);
+ if (ret)
+ return ret;
+ return btrfs_check_metadata_free_space(root);
+}
+
+void btrfs_free_reserved_ioctl_trans_space(struct btrfs_root *root, u64 bytes,
+ u64 ops)
+{
+ struct btrfs_space_info *data_sinfo;
+ u64 alloc_target;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize -
1);
+
+ if (bytes || ops) {
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ data_sinfo = __find_space_info(root->fs_info, alloc_target);
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_ioctl_trans_reserved -= bytes;
+ spin_unlock(&data_sinfo->lock);
+ }
+}
+
/*
* if there was an error for whatever reason after calling
* btrfs_check_data_free_space, call this so we can cleanup the counters.
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 647838b..3765730 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1211,11 +1211,76 @@ static long btrfs_ioctl_clone_range(struct file *file,
void __user *argp)
* basically own the machine, and have a very in depth understanding
* of all the possible deadlocks and enospc problems.
*/
+struct btrfs_ioctl_trans {
+ struct btrfs_trans_handle *trans;
+ u64 reserved_bytes, reserved_ops;
+};
+
+static long btrfs_ioctl_trans_resv_start(struct file *file, void __user *argp)
+{
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_ioctl_trans *ioctl_trans;
+ struct btrfs_ioctl_trans_resv_start resv;
+ int ret;
+
+ ret = -EPERM;
+ if (!capable(CAP_SYS_ADMIN))
+ goto out;
+
+ ret = -EINPROGRESS;
+ if (file->private_data)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&resv, argp, sizeof(resv)))
+ goto out;
+
+ ret = mnt_want_write(file->f_path.mnt);
+ if (ret)
+ goto out;
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ root->fs_info->open_ioctl_trans++;
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ ret = btrfs_check_ioctl_trans_free_space(root, resv.bytes, resv.ops);
+ if (ret)
+ goto out_drop;
+
+ ret = -ENOMEM;
+ ioctl_trans = kzalloc(sizeof(*ioctl_trans), GFP_KERNEL);
+ if (!ioctl_trans)
+ goto out_free_bytes;
+
+ ioctl_trans->trans = btrfs_start_ioctl_transaction(root, 0);
+ if (!ioctl_trans->trans)
+ goto out_free_ioctl_trans;
+
+ ioctl_trans->reserved_bytes = resv.bytes;
+ ioctl_trans->reserved_ops = resv.ops;
+ file->private_data = ioctl_trans;
+ return 0;
+
+out_free_ioctl_trans:
+ kfree(ioctl_trans);
+out_free_bytes:
+ btrfs_free_reserved_ioctl_trans_space(root, resv.bytes, resv.ops);
+out_drop:
+ mutex_lock(&root->fs_info->trans_mutex);
+ root->fs_info->open_ioctl_trans--;
+ mutex_unlock(&root->fs_info->trans_mutex);
+ mnt_drop_write(file->f_path.mnt);
+out:
+ return ret;
+}
+
+
static long btrfs_ioctl_trans_start(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
+ struct btrfs_ioctl_trans *ioctl_trans;
int ret;
ret = -EPERM;
@@ -1235,13 +1300,19 @@ static long btrfs_ioctl_trans_start(struct file *file)
mutex_unlock(&root->fs_info->trans_mutex);
ret = -ENOMEM;
- trans = btrfs_start_ioctl_transaction(root, 0);
- if (!trans)
+ ioctl_trans = kzalloc(sizeof(*ioctl_trans), GFP_KERNEL);
+ if (!ioctl_trans)
goto out_drop;
- file->private_data = trans;
+ ioctl_trans->trans = btrfs_start_ioctl_transaction(root, 0);
+ if (ioctl_trans->trans)
+ goto out_free_ioctl_trans;
+
+ file->private_data = ioctl_trans;
return 0;
+out_free_ioctl_trans:
+ kfree(ioctl_trans);
out_drop:
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans--;
@@ -1261,14 +1332,17 @@ long btrfs_ioctl_trans_end(struct file *file)
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
+ struct btrfs_ioctl_trans *ioctl_trans;
- trans = file->private_data;
- if (!trans)
+ ioctl_trans = file->private_data;
+ if (!ioctl_trans)
return -EINVAL;
file->private_data = NULL;
- btrfs_end_transaction(trans, root);
+ btrfs_free_reserved_ioctl_trans_space(root, ioctl_trans->reserved_bytes,
+ ioctl_trans->reserved_ops);
+ btrfs_end_transaction(ioctl_trans->trans, root);
+ kfree(ioctl_trans);
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans--;
@@ -1311,6 +1385,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
return btrfs_ioctl_clone_range(file, argp);
+ case BTRFS_IOC_TRANS_RESV_START:
+ return btrfs_ioctl_trans_resv_start(file, argp);
case BTRFS_IOC_TRANS_START:
return btrfs_ioctl_trans_start(file);
case BTRFS_IOC_TRANS_END:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index bc49914..9810980 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -48,6 +48,12 @@ struct btrfs_ioctl_clone_range_args {
* use by applications that know how to avoid the
* resulting deadlocks
*/
+/* check/reserve disk space for the transaction up-front */
+struct btrfs_ioctl_trans_resv_start {
+ __u64 bytes, ops;
+};
+#define BTRFS_IOC_TRANS_RESV_START _IOW(BTRFS_IOCTL_MAGIC, 5, \
+ struct btrfs_ioctl_trans_resv_start)
#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
--
1.5.6.5
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Chris Mason
2009-Sep-29 17:12 UTC
Re: [PATCH 1/2] Btrfs: refactor btrfs_check_data_free_space
On Fri, Sep 25, 2009 at 02:10:55PM -0700, Sage Weil wrote:> Pull out the actual free space check, so setting an inode''s initial > space_info is done by the caller instead of a wonky goto. >Hi Sage, I''ve just pushed out an updated enospc branch. Could you please verify your patches against it? If you and Josef are happy with them, I''ll pull them in. -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sage Weil
2009-Sep-29 18:49 UTC
Re: [PATCH 1/2] Btrfs: refactor btrfs_check_data_free_space
On Tue, 29 Sep 2009, Chris Mason wrote:> On Fri, Sep 25, 2009 at 02:10:55PM -0700, Sage Weil wrote: > > Pull out the actual free space check, so setting an inode''s initial > > space_info is done by the caller instead of a wonky goto. > > > > Hi Sage, > > I''ve just pushed out an updated enospc branch. Could you please verify > your patches against it? If you and Josef are happy with them, I''ll > pull them in.I''m resending rebased patches now (things changed a bit). The first cleanup patch is unrelated to ENOSPC, but needed for the last one to apply, and I didn''t see it pushed out yet. Josef, does the ioctl trans reservation (4/4) look reasonable? It can fall over if you mix ioctl transactions and regular work, but that''s not really avoidable without a more fundamental change to the API (see my email from last week). thanks- sage -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html