Chris Mason wrote:> On Tue, 2008-08-05 at 22:15 +0800, Yan Zheng wrote:
>> Hello,
>>
>> This patch adapts nodatacow code for the new data ordered code. Ordered
>> extents are used in all cases. It avoid writepage_start_hook kicking
off
>> nodatacow IO contiguously. This patch also makes btrfs wait for ordered
>> extents before creating snapshots. It''s important for nodatcow
IO since
>> creating snapshots invalidates the results of reference checking.
>>
>
> Thanks Yan! Can you please change this to make it only wait for
> nodatacow ordered extents?
>
OK, Here is the new patch.
Regards
YZ
---
diff -r b1c27a6f049b ctree.h
--- a/ctree.h Mon Aug 04 23:23:47 2008 -0400
+++ b/ctree.h Tue Aug 05 22:12:08 2008 +0800
@@ -1403,7 +1403,8 @@ static inline struct dentry *fdentry(str
}
/* extent-tree.c */
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
struct btrfs_key *key, u64 bytenr);
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
diff -r b1c27a6f049b extent-tree.c
--- a/extent-tree.c Mon Aug 04 23:23:47 2008 -0400
+++ b/extent-tree.c Wed Aug 06 00:07:51 2008 +0800
@@ -893,10 +893,10 @@ out:
return ret;
}
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
struct btrfs_key *key, u64 bytenr)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *old_root;
struct btrfs_path *path = NULL;
struct extent_buffer *eb;
@@ -908,6 +908,7 @@ int btrfs_cross_ref_exists(struct btrfs_
int level;
int ret;
+ BUG_ON(trans == NULL);
BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
ret = get_reference_status(root, bytenr, 0, key->objectid,
&min_generation, &ref_count);
@@ -917,7 +918,6 @@ int btrfs_cross_ref_exists(struct btrfs_
if (ref_count != 1)
return 1;
- trans = btrfs_start_transaction(root, 0);
old_root = root->dirty_root->root;
ref_generation = old_root->root_key.offset;
@@ -973,7 +973,6 @@ out:
out:
if (path)
btrfs_free_path(path);
- btrfs_end_transaction(trans, root);
return ret;
}
@@ -3320,7 +3319,7 @@ again:
mutex_unlock(&root->fs_info->alloc_mutex);
btrfs_start_delalloc_inodes(root);
- btrfs_wait_ordered_extents(tree_root);
+ btrfs_wait_ordered_extents(tree_root, 0);
mutex_lock(&root->fs_info->alloc_mutex);
@@ -3407,7 +3406,7 @@ next:
btrfs_clean_old_snapshots(tree_root);
btrfs_start_delalloc_inodes(root);
- btrfs_wait_ordered_extents(tree_root);
+ btrfs_wait_ordered_extents(tree_root, 0);
trans = btrfs_start_transaction(tree_root, 1);
btrfs_commit_transaction(trans, tree_root);
diff -r b1c27a6f049b inode.c
--- a/inode.c Mon Aug 04 23:23:47 2008 -0400
+++ b/inode.c Tue Aug 05 22:12:08 2008 +0800
@@ -166,7 +166,7 @@ static int cow_file_range(struct inode *
cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
- ins.offset);
+ ins.offset, 0);
BUG_ON(ret);
if (num_bytes < cur_alloc_size) {
printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
@@ -187,31 +187,32 @@ static int run_delalloc_nocow(struct ino
u64 extent_start;
u64 extent_end;
u64 bytenr;
- u64 cow_end;
u64 loops = 0;
u64 total_fs_bytes;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_group_cache *block_group;
+ struct btrfs_trans_handle *trans;
struct extent_buffer *leaf;
int found_type;
struct btrfs_path *path;
struct btrfs_file_extent_item *item;
int ret;
- int err;
+ int err = 0;
struct btrfs_key found_key;
total_fs_bytes =
btrfs_super_total_bytes(&root->fs_info->super_copy);
path = btrfs_alloc_path();
BUG_ON(!path);
+ trans = btrfs_join_transaction(root, 1);
+ BUG_ON(!trans);
again:
ret = btrfs_lookup_file_extent(NULL, root, path,
inode->i_ino, start, 0);
if (ret < 0) {
- btrfs_free_path(path);
- return ret;
- }
-
- cow_end = end;
+ err = ret;
+ goto out;
+ }
+
if (ret != 0) {
if (path->slots[0] == 0)
goto not_found;
@@ -244,12 +245,11 @@ again:
if (start < extent_start || start >= extent_end)
goto not_found;
- cow_end = min(end, extent_end - 1);
bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
if (bytenr == 0)
goto not_found;
- if (btrfs_cross_ref_exists(root, &found_key, bytenr))
+ if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
goto not_found;
/*
* we may be called by the resizer, make sure we''re inside
@@ -260,24 +260,32 @@ again:
if (!block_group || block_group->ro)
goto not_found;
+ bytenr += btrfs_file_extent_offset(leaf, item);
+ extent_num_bytes = min(end + 1, extent_end) - start;
+ ret = btrfs_add_ordered_extent(inode, start, bytenr,
+ extent_num_bytes, 1);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
+
+ btrfs_release_path(root, path);
start = extent_end;
+ if (start <= end) {
+ loops++;
+ goto again;
+ }
} else {
- goto not_found;
- }
-loop:
- if (start > end) {
+not_found:
+ btrfs_end_transaction(trans, root);
btrfs_free_path(path);
- return 0;
- }
- btrfs_release_path(root, path);
- loops++;
- goto again;
-
-not_found:
- btrfs_release_path(root, path);
- cow_file_range(inode, start, end);
- start = end + 1;
- goto loop;
+ return cow_file_range(inode, start, end);
+ }
+out:
+ WARN_ON(err);
+ btrfs_end_transaction(trans, root);
+ btrfs_free_path(path);
+ return err;
}
static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
@@ -382,6 +390,11 @@ int btrfs_submit_bio_hook(struct inode *
BUG_ON(ret);
if (!(rw & (1 << BIO_RW))) {
+ goto mapit;
+ }
+
+ if (btrfs_test_opt(root, NODATASUM) ||
+ btrfs_test_flag(inode, NODATASUM)) {
goto mapit;
}
@@ -527,6 +540,8 @@ static int btrfs_finish_ordered_io(struc
ordered_extent = btrfs_lookup_ordered_extent(inode, start);
BUG_ON(!ordered_extent);
+ if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
+ goto nocow;
lock_extent(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
@@ -567,6 +582,7 @@ static int btrfs_finish_ordered_io(struc
unlock_extent(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
GFP_NOFS);
+nocow:
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
diff -r b1c27a6f049b ioctl.c
--- a/ioctl.c Mon Aug 04 23:23:47 2008 -0400
+++ b/ioctl.c Tue Aug 05 22:12:08 2008 +0800
@@ -36,6 +36,7 @@
#include <linux/bit_spinlock.h>
#include <linux/version.h>
#include <linux/xattr.h>
+#include <linux/vmalloc.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
diff -r b1c27a6f049b ordered-data.c
--- a/ordered-data.c Mon Aug 04 23:23:47 2008 -0400
+++ b/ordered-data.c Wed Aug 06 00:41:00 2008 +0800
@@ -152,7 +152,7 @@ static inline struct rb_node *tree_searc
* inserted.
*/
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
- u64 start, u64 len)
+ u64 start, u64 len, int nocow)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
@@ -168,6 +168,8 @@ int btrfs_add_ordered_extent(struct inod
entry->start = start;
entry->len = len;
entry->inode = inode;
+ if (nocow)
+ set_bit(BTRFS_ORDERED_NOCOW, &entry->flags);
/* one ref for the tree */
atomic_set(&entry->refs, 1);
@@ -303,10 +305,11 @@ int btrfs_remove_ordered_extent(struct i
return 0;
}
-int btrfs_wait_ordered_extents(struct btrfs_root *root)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
{
struct list_head splice;
struct list_head *cur;
+ struct list_head *tmp;
struct btrfs_ordered_extent *ordered;
struct inode *inode;
@@ -314,10 +317,16 @@ int btrfs_wait_ordered_extents(struct bt
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
- while(!list_empty(&splice)) {
+ list_for_each_safe(cur, tmp, &splice) {
cur = splice.next;
ordered = list_entry(cur, struct btrfs_ordered_extent,
root_extent_list);
+ if (nocow_only &&
+ !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+ cond_resched_lock(&root->fs_info->ordered_extent_lock);
+ continue;
+ }
+
list_del_init(&ordered->root_extent_list);
atomic_inc(&ordered->refs);
inode = ordered->inode;
@@ -338,6 +347,7 @@ int btrfs_wait_ordered_extents(struct bt
spin_lock(&root->fs_info->ordered_extent_lock);
}
+ list_splice_init(&splice, &root->fs_info->ordered_extents);
spin_unlock(&root->fs_info->ordered_extent_lock);
return 0;
}
diff -r b1c27a6f049b ordered-data.h
--- a/ordered-data.h Mon Aug 04 23:23:47 2008 -0400
+++ b/ordered-data.h Wed Aug 06 00:07:08 2008 +0800
@@ -64,6 +64,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
+#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
+
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
@@ -125,7 +127,7 @@ int btrfs_dec_test_ordered_pending(struc
int btrfs_dec_test_ordered_pending(struct inode *inode,
u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
- u64 start, u64 len);
+ u64 start, u64 len, int nocow);
int btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
@@ -143,5 +145,5 @@ int btrfs_wait_on_page_writeback_range(s
pgoff_t start, pgoff_t end);
int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
loff_t end, int sync_mode);
-int btrfs_wait_ordered_extents(struct btrfs_root *root);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
#endif
diff -r b1c27a6f049b transaction.c
--- a/transaction.c Mon Aug 04 23:23:47 2008 -0400
+++ b/transaction.c Wed Aug 06 00:08:20 2008 +0800
@@ -438,6 +438,7 @@ static noinline int add_dirty_roots(stru
free_extent_buffer(root->commit_root);
root->commit_root = NULL;
+ root->dirty_root = NULL;
spin_lock(&root->list_lock);
list_del_init(&dirty->root->dead_list);
@@ -461,6 +462,7 @@ static noinline int add_dirty_roots(stru
sizeof(struct btrfs_disk_key));
root->root_item.drop_level = 0;
root->commit_root = NULL;
+ root->dirty_root = NULL;
root->root_key.offset = root->fs_info->generation;
btrfs_set_root_bytenr(&root->root_item,
root->node->start);
@@ -762,7 +764,11 @@ int btrfs_commit_transaction(struct btrf
}
do {
+ int snap_pending = 0;
joined = cur_trans->num_joined;
+ if (!list_empty(&trans->transaction->pending_snapshots))
+ snap_pending = 1;
+
WARN_ON(cur_trans != trans->transaction);
prepare_to_wait(&cur_trans->writer_wait, &wait,
TASK_UNINTERRUPTIBLE);
@@ -773,6 +779,11 @@ int btrfs_commit_transaction(struct btrf
timeout = 1;
mutex_unlock(&root->fs_info->trans_mutex);
+
+ if (snap_pending) {
+ ret = btrfs_wait_ordered_extents(root, 1);
+ BUG_ON(ret);
+ }
schedule_timeout(timeout);
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html