Hello,
Ok here are all my fsync changes, including the fix for the put_transaction
stuff that I posted earlier. I figure it'll be easier to just commit it as
one
thing then each individual peice. The additional thing that I did was add a
radix tree that tracks the current outstanding transactions. I also added a
field to the in memory btrfs inode that keeps track of the last transaction that
modified the inode. So when we go to do a fsync on the inode if the commit was
already done or the transaction was removed (meaning it had been committed and
freed) we just exit out. So here are the numbers. This is running fs_mark with
the following command
fs_mark -d /mnt/btrfs-test/default/ -s 10240 -n 1000
run 10 times. First run is without any of the patches, with the exception of
the put_transaction/trans_mutex fix.
FSUse% Count Size Files/sec App Overhead
0 1000 10240 72.4 6366
0 1000 10240 54.4 6340
0 1000 10240 62.3 6451
0 1000 10240 54.9 6427
0 1000 10240 65.7 6385
0 1000 10240 56.0 6440
0 1000 10240 59.9 6541
0 1000 10240 60.7 6365
0 1000 10240 66.7 6407
0 1000 10240 56.9 6402
AVG: 61 files/sec
And this is the numbers with the patch thats attached to this email
FSUse% Count Size Files/sec App Overhead
0 1000 10240 73.8 6315
0 1000 10240 146.9 6396
0 1000 10240 110.9 6269
0 1000 10240 78.1 6279
0 1000 10240 91.5 6330
0 1000 10240 82.9 6219
0 1000 10240 84.4 6221
0 1000 10240 108.6 6345
0 1000 10240 86.5 6293
0 1000 10240 79.8 6321
AVG: 94.32 files/sec
Now I'm kind of uncertain about how I track the last_trans for the inode, I
think its right but I'm not entirely sure, so if I need to be doing this
elsewhere please let me know. Thanks much,
Josef
diff -r f6da57af2473 btrfs_inode.h
--- a/btrfs_inode.h Wed Aug 08 20:17:12 2007 -0400
+++ b/btrfs_inode.h Thu Aug 09 16:04:17 2007 -0400
@@ -25,6 +25,11 @@ struct btrfs_inode {
struct btrfs_block_group_cache *block_group;
struct btrfs_key location;
struct inode vfs_inode;
+
+ /*
+ * transid of the trans_handle that last modified this inode
+ */
+ u64 last_trans;
};
static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
{
diff -r f6da57af2473 ctree.h
--- a/ctree.h Wed Aug 08 20:17:12 2007 -0400
+++ b/ctree.h Thu Aug 09 16:18:12 2007 -0400
@@ -300,6 +300,7 @@ struct btrfs_fs_info {
struct radix_tree_root block_group_data_radix;
struct radix_tree_root extent_map_radix;
struct radix_tree_root extent_ins_radix;
+ struct radix_tree_root transaction_map_radix;
u64 generation;
struct btrfs_transaction *running_transaction;
struct btrfs_super_block *disk_super;
diff -r f6da57af2473 disk-io.c
--- a/disk-io.c Wed Aug 08 20:17:12 2007 -0400
+++ b/disk-io.c Thu Aug 09 16:52:26 2007 -0400
@@ -431,6 +431,7 @@ struct btrfs_root *open_ctree(struct sup
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL);
+ INIT_RADIX_TREE(&fs_info->transaction_map_radix, GFP_KERNEL);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
sb_set_blocksize(sb, 4096);
diff -r f6da57af2473 file.c
--- a/file.c Wed Aug 08 20:17:12 2007 -0400
+++ b/file.c Thu Aug 09 17:24:26 2007 -0400
@@ -694,22 +694,41 @@ static int btrfs_sync_file(struct file *
{
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
+ int ret = 0;
struct btrfs_trans_handle *trans;
+ struct btrfs_transaction *last_trans;
/*
- * FIXME, use inode generation number to check if we can skip the
- * commit
+ * check the transaction that last modified this inode
+ * and see if its already been committed
*/
mutex_lock(&root->fs_info->fs_mutex);
+ if (!BTRFS_I(inode)->last_trans)
+ goto out;
+ mutex_lock(&root->fs_info->trans_mutex);
+ last_trans =
radix_tree_lookup(&root->fs_info->transaction_map_radix,
+ BTRFS_I(inode)->last_trans);
+ if (!last_trans) {
+ BTRFS_I(inode)->last_trans = 0;
+ mutex_unlock(&root->fs_info->trans_mutex);
+ goto out;
+ } else if (last_trans->commit_done) {
+ mutex_unlock(&root->fs_info->trans_mutex);
+ goto out;
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ /*
+ * ok we haven't committed the transaction yet, lets do a commit
+ */
trans = btrfs_start_transaction(root, 1);
if (!trans) {
ret = -ENOMEM;
goto out;
}
ret = btrfs_commit_transaction(trans, root);
+out:
mutex_unlock(&root->fs_info->fs_mutex);
-out:
return ret > 0 ? EIO : ret;
}
diff -r f6da57af2473 inode.c
--- a/inode.c Wed Aug 08 20:17:12 2007 -0400
+++ b/inode.c Thu Aug 09 17:23:57 2007 -0400
@@ -193,6 +193,7 @@ static int btrfs_update_inode(struct btr
fill_inode_item(inode_item, inode);
btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_set_inode_last_trans(trans, inode);
ret = 0;
failed:
btrfs_release_path(root, path);
@@ -2253,6 +2254,7 @@ struct inode *btrfs_alloc_inode(struct s
ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
+ ei->last_trans = 0;
return &ei->vfs_inode;
}
diff -r f6da57af2473 transaction.c
--- a/transaction.c Wed Aug 08 20:17:12 2007 -0400
+++ b/transaction.c Thu Aug 09 17:33:22 2007 -0400
@@ -31,7 +31,8 @@ static struct workqueue_struct *trans_wq
#define BTRFS_ROOT_TRANS_TAG 0
#define BTRFS_ROOT_DEFRAG_TAG 1
-static void put_transaction(struct btrfs_transaction *transaction)
+static void put_transaction(struct btrfs_transaction *transaction,
+ struct btrfs_root *root)
{
WARN_ON(transaction->use_count == 0);
transaction->use_count--;
@@ -39,6 +40,8 @@ static void put_transaction(struct btrfs
WARN_ON(total_trans == 0);
total_trans--;
list_del_init(&transaction->list);
+ radix_tree_delete(&root->fs_info->transaction_map_radix,
+ (unsigned long)transaction->transid);
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
@@ -49,13 +52,15 @@ static int join_transaction(struct btrfs
struct btrfs_transaction *cur_trans;
cur_trans = root->fs_info->running_transaction;
if (!cur_trans) {
+ int ret;
cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
GFP_NOFS);
total_trans++;
BUG_ON(!cur_trans);
root->fs_info->generation++;
root->fs_info->running_transaction = cur_trans;
- cur_trans->num_writers = 0;
+ cur_trans->num_writers = 1;
+ cur_trans->num_joined = 0;
cur_trans->transid = root->fs_info->generation;
init_waitqueue_head(&cur_trans->writer_wait);
init_waitqueue_head(&cur_trans->commit_wait);
@@ -65,8 +70,15 @@ static int join_transaction(struct btrfs
cur_trans->start_time = get_seconds();
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
init_bit_radix(&cur_trans->dirty_pages);
- }
- cur_trans->num_writers++;
+ ret = radix_tree_insert(&root->fs_info->transaction_map_radix,
+ (unsigned long)cur_trans->transid,
+ cur_trans);
+ BUG_ON(ret);
+ } else {
+ cur_trans->num_writers++;
+ cur_trans->num_joined++;
+ }
+
return 0;
}
@@ -128,7 +140,7 @@ int btrfs_end_transaction(struct btrfs_t
cur_trans->num_writers--;
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
- put_transaction(cur_trans);
+ put_transaction(cur_trans, root);
mutex_unlock(&root->fs_info->trans_mutex);
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -426,7 +438,8 @@ int btrfs_commit_transaction(struct btrf
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- int ret = 0;
+ int ret = 0, joined = 0;
+ unsigned long timeout = 1;
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
struct list_head dirty_fs_roots;
@@ -446,7 +459,11 @@ int btrfs_commit_transaction(struct btrf
mutex_unlock(&root->fs_info->fs_mutex);
ret = wait_for_commit(root, cur_trans);
BUG_ON(ret);
- put_transaction(cur_trans);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ put_transaction(cur_trans, root);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
mutex_lock(&root->fs_info->fs_mutex);
return 0;
}
@@ -461,25 +478,35 @@ int btrfs_commit_transaction(struct btrf
mutex_unlock(&root->fs_info->trans_mutex);
wait_for_commit(root, prev_trans);
- put_transaction(prev_trans);
mutex_lock(&root->fs_info->fs_mutex);
mutex_lock(&root->fs_info->trans_mutex);
+ put_transaction(prev_trans, root);
}
}
- while (trans->transaction->num_writers > 1) {
+
+ do {
+ joined = cur_trans->num_joined;
WARN_ON(cur_trans != trans->transaction);
- prepare_to_wait(&trans->transaction->writer_wait, &wait,
+ prepare_to_wait(&cur_trans->writer_wait, &wait,
TASK_UNINTERRUPTIBLE);
- if (trans->transaction->num_writers <= 1)
- break;
+ if (cur_trans->num_writers <= 1 &&
+ (cur_trans->num_joined != joined))
+ break;
+
+ if (cur_trans->num_writers > 1)
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ else
+ timeout = 1;
mutex_unlock(&root->fs_info->fs_mutex);
mutex_unlock(&root->fs_info->trans_mutex);
- schedule();
+ schedule_timeout(timeout);
mutex_lock(&root->fs_info->fs_mutex);
mutex_lock(&root->fs_info->trans_mutex);
- finish_wait(&trans->transaction->writer_wait, &wait);
- }
+ finish_wait(&cur_trans->writer_wait, &wait);
+ } while (cur_trans->num_writers > 1 ||
+ (cur_trans->num_joined != joined));
+
finish_wait(&trans->transaction->writer_wait, &wait);
WARN_ON(cur_trans != trans->transaction);
ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
@@ -510,8 +537,8 @@ int btrfs_commit_transaction(struct btrf
mutex_lock(&root->fs_info->trans_mutex);
cur_trans->commit_done = 1;
wake_up(&cur_trans->commit_wait);
- put_transaction(cur_trans);
- put_transaction(cur_trans);
+ put_transaction(cur_trans, root);
+ put_transaction(cur_trans, root);
if (root->fs_info->closing)
list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
else
diff -r f6da57af2473 transaction.h
--- a/transaction.h Wed Aug 08 20:17:12 2007 -0400
+++ b/transaction.h Thu Aug 09 16:32:04 2007 -0400
@@ -23,6 +23,7 @@ struct btrfs_transaction {
struct btrfs_transaction {
u64 transid;
unsigned long num_writers;
+ unsigned long num_joined;
int in_commit;
int use_count;
int commit_done;
@@ -57,6 +58,12 @@ static inline void btrfs_update_inode_bl
BTRFS_I(inode)->block_group = trans->block_group;
}
+static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
+ struct inode *inode)
+{
+ BTRFS_I(inode)->last_trans = trans->transaction->transid;
+}
+
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,