Pull out the actual free space check, so setting an inode''s initial space_info is done by the caller instead of a wonky goto. Signed-off-by: Sage Weil <sage@newdream.net> --- fs/btrfs/extent-tree.c | 81 +++++++++++++++++++++++++++++------------------ 1 files changed, 50 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 63d86ae..cb50944 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2830,25 +2830,33 @@ alloc: return 0; } -/* - * This will check the space that the inode allocates from to make sure we have - * enough space for bytes. - */ -int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, - u64 bytes) +static int __alloc_chunk(struct btrfs_root *root, u64 bytes) { - struct btrfs_space_info *data_sinfo; - int ret = 0, committed = 0; + int ret; + u64 alloc_target; + struct btrfs_trans_handle *trans; - /* make sure bytes are sectorsize aligned */ - bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + alloc_target = btrfs_get_alloc_profile(root, 1); + trans = btrfs_start_transaction(root, 1); + if (!trans) + return -ENOMEM; - data_sinfo = BTRFS_I(inode)->space_info; - if (!data_sinfo) - goto alloc; + ret = do_chunk_alloc(trans, root->fs_info->extent_root, + bytes + 2 * 1024 * 1024, + alloc_target, 0); + btrfs_end_transaction(trans, root); + return ret; +} + +int btrfs_check_data_free_space_info(struct btrfs_root *root, + struct btrfs_space_info *data_sinfo, + u64 bytes, + struct inode *inode) +{ + int ret = 0, committed = 0; -again: /* make sure we have enough space to handle the data first */ +again: spin_lock(&data_sinfo->lock); if (data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - @@ -2861,27 +2869,11 @@ again: * to alloc a new chunk. */ if (!data_sinfo->full) { - u64 alloc_target; - data_sinfo->force_alloc = 1; spin_unlock(&data_sinfo->lock); -alloc: - alloc_target = btrfs_get_alloc_profile(root, 1); - trans = btrfs_start_transaction(root, 1); - if (!trans) - return -ENOMEM; - - ret = do_chunk_alloc(trans, root->fs_info->extent_root, - bytes + 2 * 1024 * 1024, - alloc_target, 0); - btrfs_end_transaction(trans, root); + ret = __alloc_chunk(root, bytes); if (ret) return ret; - - if (!data_sinfo) { - btrfs_set_inode_space_info(root, inode); - data_sinfo = BTRFS_I(inode)->space_info; - } goto again; } spin_unlock(&data_sinfo->lock); @@ -2914,7 +2906,34 @@ alloc: data_sinfo->bytes_may_use += bytes; BTRFS_I(inode)->reserved_bytes += bytes; spin_unlock(&data_sinfo->lock); + return 0; +} +/* + * This will check the space that the inode allocates from to make sure we have + * enough space for bytes. + */ +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes) +{ + struct btrfs_space_info *data_sinfo; + int ret; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + data_sinfo = BTRFS_I(inode)->space_info; + if (!data_sinfo) { + ret = __alloc_chunk(root, bytes); + if (ret) + return ret; + btrfs_set_inode_space_info(root, inode); + data_sinfo = BTRFS_I(inode)->space_info; + } + + ret = btrfs_check_data_free_space_info(root, data_sinfo, bytes, inode); + if (ret) + return ret; return btrfs_check_metadata_free_space(root); } -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sage Weil
2009-Sep-25 21:10 UTC
[PATCH 2/2] Btrfs: add TRANS_RESV_START ioctl to check/reserve free space on transaction start
The current TRANS_START ioctl is problematic when the volume fills up because a process can get ENOSPC on any operation that''s supoosed to be "contained" within its transaction without any prior warning. This defines a new ioctl, TRANS_RESV_START, that checks and sets aside space for the entire transaction, so that the process will get ENOSPC right away. The bytes_ioctl_trans_reserved is only checked and adjusted by the TRANS_RESV_START ioctl; any write()s that follow will be allowed to use up that reserved space. This is clearly imperfect (a mixture of an ioctl transaction workload and a regular workload will violate the reservations), but unavoidable with the current user transaction approach because we don''t know whether any given operation is contained by a user transaction or not. The ''ops'' field isn''t used yet. Its intended to set a bound on the number of transaction operations and thus btree modifications, for when the metadata space reservation gets smarter. Signed-off-by: Sage Weil <sage@newdream.net> --- fs/btrfs/ctree.h | 7 ++++ fs/btrfs/extent-tree.c | 57 ++++++++++++++++++++++++++++-- fs/btrfs/ioctl.c | 92 +++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/ioctl.h | 6 +++ 4 files changed, 151 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80599b4..62e00df 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -683,6 +683,9 @@ struct btrfs_space_info { u64 bytes_may_use; /* number of bytes that may be used for delalloc */ + u64 bytes_ioctl_trans_reserved; /* number of bytes reserved by ioctl + transactions */ + int full; /* indicates that we cannot allocate any more chunks for this space */ int force_alloc; /* set if we need to force a chunk alloc for @@ -2025,8 +2028,12 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_metadata_free_space(struct btrfs_root *root); int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); +int btrfs_check_ioctl_trans_free_space(struct btrfs_root *root, + u64 bytes, u64 ops); void btrfs_free_reserved_data_space(struct btrfs_root *root, struct inode *inode, u64 bytes); +void btrfs_free_reserved_ioctl_trans_space(struct btrfs_root *root, + u64 bytes, u64 ops); void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, u64 bytes); void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cb50944..b626ee2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2668,6 +2668,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_reserved = 0; found->bytes_readonly = 0; found->bytes_delalloc = 0; + found->bytes_ioctl_trans_reserved = 0; found->full = 0; found->force_alloc = 0; *space_info = found; @@ -2848,6 +2849,14 @@ static int __alloc_chunk(struct btrfs_root *root, u64 bytes) return ret; } +/* + * if @inode is defined, reserve bytes on that inode. otherwise, we + * are reserving the space for an ioctl transaction, and need to + * subtract off other ioctl reserved space too. we _only_ check ioctl + * reserved space here: the write() that follows needs to be able to + * use it, and we don''t know which writes "belong" to which ioctl + * transactions. + */ int btrfs_check_data_free_space_info(struct btrfs_root *root, struct btrfs_space_info *data_sinfo, u64 bytes, @@ -2861,6 +2870,7 @@ again: if (data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - + (inode ? 0 : data_sinfo->bytes_ioctl_trans_reserved) - data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { struct btrfs_trans_handle *trans; @@ -2879,7 +2889,8 @@ again: spin_unlock(&data_sinfo->lock); /* commit the current transaction and try again */ - if (!committed && !root->fs_info->open_ioctl_trans) { + if (!committed && + (!inode || !root->fs_info->open_ioctl_trans)) { committed = 1; trans = btrfs_join_transaction(root, 1); if (!trans) @@ -2903,8 +2914,12 @@ again: (unsigned long long)data_sinfo->total_bytes); return -ENOSPC; } - data_sinfo->bytes_may_use += bytes; - BTRFS_I(inode)->reserved_bytes += bytes; + if (inode) { + data_sinfo->bytes_may_use += bytes; + BTRFS_I(inode)->reserved_bytes += bytes; + } else { + data_sinfo->bytes_ioctl_trans_reserved += bytes; + } spin_unlock(&data_sinfo->lock); return 0; } @@ -2937,6 +2952,42 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, return btrfs_check_metadata_free_space(root); } +int btrfs_check_ioctl_trans_free_space(struct btrfs_root *root, u64 bytes, + u64 ops) +{ + struct btrfs_space_info *data_sinfo; + u64 alloc_target; + int ret; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + alloc_target = btrfs_get_alloc_profile(root, 1); + data_sinfo = __find_space_info(root->fs_info, alloc_target); + ret = btrfs_check_data_free_space_info(root, data_sinfo, bytes, NULL); + if (ret) + return ret; + return btrfs_check_metadata_free_space(root); +} + +void btrfs_free_reserved_ioctl_trans_space(struct btrfs_root *root, u64 bytes, + u64 ops) +{ + struct btrfs_space_info *data_sinfo; + u64 alloc_target; + + /* make sure bytes are sectorsize aligned */ + bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); + + if (bytes || ops) { + alloc_target = btrfs_get_alloc_profile(root, 1); + data_sinfo = __find_space_info(root->fs_info, alloc_target); + spin_lock(&data_sinfo->lock); + data_sinfo->bytes_ioctl_trans_reserved -= bytes; + spin_unlock(&data_sinfo->lock); + } +} + /* * if there was an error for whatever reason after calling * btrfs_check_data_free_space, call this so we can cleanup the counters. diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 647838b..3765730 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1211,11 +1211,76 @@ static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) * basically own the machine, and have a very in depth understanding * of all the possible deadlocks and enospc problems. */ +struct btrfs_ioctl_trans { + struct btrfs_trans_handle *trans; + u64 reserved_bytes, reserved_ops; +}; + +static long btrfs_ioctl_trans_resv_start(struct file *file, void __user *argp) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_trans *ioctl_trans; + struct btrfs_ioctl_trans_resv_start resv; + int ret; + + ret = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = -EINPROGRESS; + if (file->private_data) + goto out; + + ret = -EFAULT; + if (copy_from_user(&resv, argp, sizeof(resv))) + goto out; + + ret = mnt_want_write(file->f_path.mnt); + if (ret) + goto out; + + mutex_lock(&root->fs_info->trans_mutex); + root->fs_info->open_ioctl_trans++; + mutex_unlock(&root->fs_info->trans_mutex); + + ret = btrfs_check_ioctl_trans_free_space(root, resv.bytes, resv.ops); + if (ret) + goto out_drop; + + ret = -ENOMEM; + ioctl_trans = kzalloc(sizeof(*ioctl_trans), GFP_KERNEL); + if (!ioctl_trans) + goto out_free_bytes; + + ioctl_trans->trans = btrfs_start_ioctl_transaction(root, 0); + if (!ioctl_trans->trans) + goto out_free_ioctl_trans; + + ioctl_trans->reserved_bytes = resv.bytes; + ioctl_trans->reserved_ops = resv.ops; + file->private_data = ioctl_trans; + return 0; + +out_free_ioctl_trans: + kfree(ioctl_trans); +out_free_bytes: + btrfs_free_reserved_ioctl_trans_space(root, resv.bytes, resv.ops); +out_drop: + mutex_lock(&root->fs_info->trans_mutex); + root->fs_info->open_ioctl_trans--; + mutex_unlock(&root->fs_info->trans_mutex); + mnt_drop_write(file->f_path.mnt); +out: + return ret; +} + + static long btrfs_ioctl_trans_start(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; + struct btrfs_ioctl_trans *ioctl_trans; int ret; ret = -EPERM; @@ -1235,13 +1300,19 @@ static long btrfs_ioctl_trans_start(struct file *file) mutex_unlock(&root->fs_info->trans_mutex); ret = -ENOMEM; - trans = btrfs_start_ioctl_transaction(root, 0); - if (!trans) + ioctl_trans = kzalloc(sizeof(*ioctl_trans), GFP_KERNEL); + if (!ioctl_trans) goto out_drop; - file->private_data = trans; + ioctl_trans->trans = btrfs_start_ioctl_transaction(root, 0); + if (ioctl_trans->trans) + goto out_free_ioctl_trans; + + file->private_data = ioctl_trans; return 0; +out_free_ioctl_trans: + kfree(ioctl_trans); out_drop: mutex_lock(&root->fs_info->trans_mutex); root->fs_info->open_ioctl_trans--; @@ -1261,14 +1332,17 @@ long btrfs_ioctl_trans_end(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; + struct btrfs_ioctl_trans *ioctl_trans; - trans = file->private_data; - if (!trans) + ioctl_trans = file->private_data; + if (!ioctl_trans) return -EINVAL; file->private_data = NULL; - btrfs_end_transaction(trans, root); + btrfs_free_reserved_ioctl_trans_space(root, ioctl_trans->reserved_bytes, + ioctl_trans->reserved_ops); + btrfs_end_transaction(ioctl_trans->trans, root); + kfree(ioctl_trans); mutex_lock(&root->fs_info->trans_mutex); root->fs_info->open_ioctl_trans--; @@ -1311,6 +1385,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_clone(file, arg, 0, 0, 0); case BTRFS_IOC_CLONE_RANGE: return btrfs_ioctl_clone_range(file, argp); + case BTRFS_IOC_TRANS_RESV_START: + return btrfs_ioctl_trans_resv_start(file, argp); case BTRFS_IOC_TRANS_START: return btrfs_ioctl_trans_start(file); case BTRFS_IOC_TRANS_END: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index bc49914..9810980 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -48,6 +48,12 @@ struct btrfs_ioctl_clone_range_args { * use by applications that know how to avoid the * resulting deadlocks */ +/* check/reserve disk space for the transaction up-front */ +struct btrfs_ioctl_trans_resv_start { + __u64 bytes, ops; +}; +#define BTRFS_IOC_TRANS_RESV_START _IOW(BTRFS_IOCTL_MAGIC, 5, \ + struct btrfs_ioctl_trans_resv_start) #define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) #define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) #define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Chris Mason
2009-Sep-29 17:12 UTC
Re: [PATCH 1/2] Btrfs: refactor btrfs_check_data_free_space
On Fri, Sep 25, 2009 at 02:10:55PM -0700, Sage Weil wrote:> Pull out the actual free space check, so setting an inode''s initial > space_info is done by the caller instead of a wonky goto. >Hi Sage, I''ve just pushed out an updated enospc branch. Could you please verify your patches against it? If you and Josef are happy with them, I''ll pull them in. -chris -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sage Weil
2009-Sep-29 18:49 UTC
Re: [PATCH 1/2] Btrfs: refactor btrfs_check_data_free_space
On Tue, 29 Sep 2009, Chris Mason wrote:> On Fri, Sep 25, 2009 at 02:10:55PM -0700, Sage Weil wrote: > > Pull out the actual free space check, so setting an inode''s initial > > space_info is done by the caller instead of a wonky goto. > > > > Hi Sage, > > I''ve just pushed out an updated enospc branch. Could you please verify > your patches against it? If you and Josef are happy with them, I''ll > pull them in.I''m resending rebased patches now (things changed a bit). The first cleanup patch is unrelated to ENOSPC, but needed for the last one to apply, and I didn''t see it pushed out yet. Josef, does the ioctl trans reservation (4/4) look reasonable? It can fall over if you mix ioctl transactions and regular work, but that''s not really avoidable without a more fundamental change to the API (see my email from last week). thanks- sage -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html