Hello,
Here''s a quick implementation of NFS support for btrfs. It works
mostly. But
its still a bit buggy. I suspect it has to do something with inode locking.
When I unmount and mount a couple of times, it stops working i.e, when I do an
''ls'', it keeps waiting indefinitely. This is seen when
accessing the filesystem
from a local mount point as well.
Can anybody please review and tell me what stupid mistake I''m
committing ?
Thanks and Regards,
Balaji Rao
National Institute of Technology, Karnataka
---
#include <linux/fs.h>
#include <linux/types.h>
#include "ctree.h"
#include "disk-io.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "export.h"
static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
int connectable)
{ struct inode *inode = dentry->d_inode;
int len = *max_len;
u64 objectid, root_objectid;
u32 generation;
__le32 *fh = (__force __le32 *) fh_in;
if ((len < 5 )|| (connectable && len < 8)) {
return 255;
}
objectid = BTRFS_I(inode)->location.objectid;
root_objectid = BTRFS_I(inode)->root->objectid;
generation = inode->i_generation;
len = 5;
fh[0] = cpu_to_le32((u32)(objectid >> 32));
fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff));
fh[2] = cpu_to_le32((u32)(root_objectid >> 32));
fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff));
fh[4] = cpu_to_le32(generation);
if (connectable && !S_ISDIR(inode->i_mode)) {
struct inode *parent;
spin_lock(&dentry->d_lock);
parent = dentry->d_parent->d_inode;
objectid = BTRFS_I(parent)->location.objectid;
generation = parent->i_generation;
fh[5] = cpu_to_le32((u32)(objectid >> 32));
fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff));
fh[7] = cpu_to_le32(generation);
spin_unlock(&dentry->d_lock);
len += 3;
}
*max_len = len;
return len;
}
static struct dentry * btrfs_get_dentry(struct super_block *sb,
u64 objectid, u64 root_objectid, u32 generation)
{
struct inode *inode;
struct dentry *result;
inode = btrfs_ilookup(sb, objectid, root_objectid);
if (IS_ERR(inode))
return (void *)inode;
if(generation != inode->i_generation) {
iput(inode);
return ERR_PTR(-ESTALE);
}
result = d_alloc_anon(inode);
if(!result) {
iput(inode);
return ERR_PTR(-ENOMEM);
}
return result;
}
static struct dentry *btrfs_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
u64 objectid, root_objectid;
u32 generation;
if (fh_len < 8 || fh_type != 8) {
return NULL;
}
root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32;
root_objectid |= (u64)le32_to_cpu(fid->raw[3]);
objectid = (u64)le32_to_cpu(fid->raw[5]) << 32;
objectid |= (u64)le32_to_cpu(fid->raw[6]);
generation = le32_to_cpu(fid->raw[7]);
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
}
static struct dentry *btrfs_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
u64 objectid, root_objectid;
u32 generation;
if (fh_len < 5 || fh_type > 5) {
return NULL;
}
objectid = (u64)le32_to_cpu(fid->raw[0]) << 32;
objectid |= (u64)le32_to_cpu(fid->raw[1]);
root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32;
root_objectid |= (u64)le32_to_cpu(fid->raw[3]);
generation = le32_to_cpu(fid->raw[4]);
return btrfs_get_dentry(sb, objectid, root_objectid, generation);
}
static struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = child->d_inode;
struct inode *inode;
struct dentry *parent;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
u32 nritems;
int slot;
u64 objectid;
int ret;
path = btrfs_alloc_path();
key.objectid = dir->i_ino;
btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
BUG_ON(ret == 0);
ret = 0;
leaf = path->nodes[0];
slot = path->slots[0];
nritems = btrfs_header_nritems(leaf);
if (slot >= nritems)
return ERR_PTR(-EINVAL);
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != dir->i_ino ||
key.type != BTRFS_INODE_REF_KEY) {
return ERR_PTR(-EINVAL);
}
btrfs_free_path(path);
objectid = key.offset;
inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
parent = d_alloc_anon(inode);
if (!parent) {
iput(inode);
parent = ERR_PTR(-ENOMEM);
}
return parent;
}
const struct export_operations btrfs_export_ops = {
.encode_fh = btrfs_encode_fh,
.fh_to_dentry = btrfs_fh_to_dentry,
.fh_to_parent = btrfs_fh_to_parent,
.get_parent = btrfs_get_parent,
};
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote:> Hello, > > Here''s a quick implementation of NFS support for btrfs. It works mostly. But > its still a bit buggy. I suspect it has to do something with inode locking. > When I unmount and mount a couple of times, it stops working i.e, when I do an > ''ls'', it keeps waiting indefinitely. This is seen when accessing the filesystem > from a local mount point as well. > > Can anybody please review and tell me what stupid mistake I''m committing ? >You may want to send things in patch form. I''d recommend using the hg quilt plugin, its nice for adding patches easily.> > #include <linux/fs.h> > #include <linux/types.h> > #include "ctree.h" > #include "disk-io.h" > #include "btrfs_inode.h" > #include "print-tree.h" > #include "export.h" > > static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, > int connectable) > { struct inode *inode = dentry->d_inode; > int len = *max_len; > u64 objectid, root_objectid; > u32 generation; > __le32 *fh = (__force __le32 *) fh_in; > > if ((len < 5 )|| (connectable && len < 8)) { > return 255; > }Magic numbers are a no-no.> > objectid = BTRFS_I(inode)->location.objectid; > root_objectid = BTRFS_I(inode)->root->objectid; > generation = inode->i_generation; > > len = 5; > fh[0] = cpu_to_le32((u32)(objectid >> 32)); > fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff)); > fh[2] = cpu_to_le32((u32)(root_objectid >> 32)); > fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff)); > fh[4] = cpu_to_le32(generation); > > if (connectable && !S_ISDIR(inode->i_mode)) { > struct inode *parent; > > spin_lock(&dentry->d_lock); > > parent = dentry->d_parent->d_inode; > objectid = BTRFS_I(parent)->location.objectid; > generation = parent->i_generation; > > fh[5] = cpu_to_le32((u32)(objectid >> 32)); > fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff)); > fh[7] = cpu_to_le32(generation); > > spin_unlock(&dentry->d_lock); > > len += 3;there is no need to take the d_lock here.> } > > *max_len = len; > return len; > > } > > static struct dentry * btrfs_get_dentry(struct super_block *sb, > u64 objectid, u64 root_objectid, u32 generation) > { > struct inode *inode; > struct dentry *result; > > inode = btrfs_ilookup(sb, objectid, root_objectid); > if (IS_ERR(inode)) > return (void *)inode; > > if(generation != inode->i_generation) { > iput(inode); > return ERR_PTR(-ESTALE); > } > > result = d_alloc_anon(inode); > if(!result) { > iput(inode); > return ERR_PTR(-ENOMEM); > } > > return result; > } > > static struct dentry *btrfs_fh_to_parent(struct super_block *sb, > struct fid *fid, int fh_len, int fh_type) > { > u64 objectid, root_objectid; > u32 generation; > > if (fh_len < 8 || fh_type != 8) { > return NULL; > } > > > root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32; > root_objectid |= (u64)le32_to_cpu(fid->raw[3]); > objectid = (u64)le32_to_cpu(fid->raw[5]) << 32; > objectid |= (u64)le32_to_cpu(fid->raw[6]); > generation = le32_to_cpu(fid->raw[7]); > > return btrfs_get_dentry(sb, objectid, root_objectid, generation); > } > > static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, > struct fid *fid, int fh_len, int fh_type) > { > u64 objectid, root_objectid; > u32 generation; > > if (fh_len < 5 || fh_type > 5) { > return NULL; > } > > objectid = (u64)le32_to_cpu(fid->raw[0]) << 32; > objectid |= (u64)le32_to_cpu(fid->raw[1]); > root_objectid = (u64)le32_to_cpu(fid->raw[2]) << 32; > root_objectid |= (u64)le32_to_cpu(fid->raw[3]); > generation = le32_to_cpu(fid->raw[4]); > > return btrfs_get_dentry(sb, objectid, root_objectid, generation); > } > > static struct dentry *btrfs_get_parent(struct dentry *child) > { > struct inode *dir = child->d_inode; > struct inode *inode; > struct dentry *parent; > struct btrfs_root *root = BTRFS_I(dir)->root; > struct btrfs_key key; > struct btrfs_path *path; > struct extent_buffer *leaf; > u32 nritems; > int slot; > u64 objectid; > int ret; > > path = btrfs_alloc_path(); > > key.objectid = dir->i_ino; > btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); > key.offset = 0; > ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); > BUG_ON(ret == 0); > ret = 0; > > leaf = path->nodes[0]; > slot = path->slots[0]; > nritems = btrfs_header_nritems(leaf); > if (slot >= nritems) > return ERR_PTR(-EINVAL); > > btrfs_item_key_to_cpu(leaf, &key, slot); > if (key.objectid != dir->i_ino || > key.type != BTRFS_INODE_REF_KEY) { > return ERR_PTR(-EINVAL); > } >in both of the above error cases you don''t free the path, you need to do that.> btrfs_free_path(path); > objectid = key.offset; > inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); > > parent = d_alloc_anon(inode); > if (!parent) { > iput(inode); > parent = ERR_PTR(-ENOMEM); > } > > return parent; > } > > const struct export_operations btrfs_export_ops = { > .encode_fh = btrfs_encode_fh, > .fh_to_dentry = btrfs_fh_to_dentry, > .fh_to_parent = btrfs_fh_to_parent, > .get_parent = btrfs_get_parent, > };I don''t know much about NFS so I can''t really comment on much of anything else other than the obvious problems, but that should point you in a general direction. Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jun 30, 2008 at 08:50:46PM +0530, Balaji Rao R wrote:> On Mon, Jun 30, 2008 at 8:20 PM, Josef Bacik <jbacik@redhat.com> wrote: > > On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote: > >> Hello, > >> > >> Here''s a quick implementation of NFS support for btrfs. It works mostly. But > >> its still a bit buggy. I suspect it has to do something with inode locking. > >> When I unmount and mount a couple of times, it stops working i.e, when I do an > >> ''ls'', it keeps waiting indefinitely. This is seen when accessing the filesystem > >> from a local mount point as well. > >> > >> Can anybody please review and tell me what stupid mistake I''m committing ? > >> > > > > You may want to send things in patch form. I''d recommend using the hg quilt > > plugin, its nice for adding patches easily. > Yes, since this was just an RFC, i thought it would be better to send > just the file I created. > > > >> > >> #include <linux/fs.h> > >> #include <linux/types.h> > >> #include "ctree.h" > >> #include "disk-io.h" > >> #include "btrfs_inode.h" > >> #include "print-tree.h" > >> #include "export.h" > >> > >> static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, > >> int connectable) > >> { struct inode *inode = dentry->d_inode; > >> int len = *max_len; > >> u64 objectid, root_objectid; > >> u32 generation; > >> __le32 *fh = (__force __le32 *) fh_in; > >> > >> if ((len < 5 )|| (connectable && len < 8)) { > >> return 255; > >> } > > > > Magic numbers are a no-no. > OK :) Will change > > > >> > >> objectid = BTRFS_I(inode)->location.objectid; > >> root_objectid = BTRFS_I(inode)->root->objectid; > >> generation = inode->i_generation; > >> > >> len = 5; > >> fh[0] = cpu_to_le32((u32)(objectid >> 32)); > >> fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff)); > >> fh[2] = cpu_to_le32((u32)(root_objectid >> 32)); > >> fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff)); > >> fh[4] = cpu_to_le32(generation); > >> > >> if (connectable && !S_ISDIR(inode->i_mode)) { > >> struct inode *parent; > >> > >> spin_lock(&dentry->d_lock); > >> > >> parent = dentry->d_parent->d_inode; > >> objectid = BTRFS_I(parent)->location.objectid; > >> generation = parent->i_generation; > >> > >> fh[5] = cpu_to_le32((u32)(objectid >> 32)); > >> fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff)); > >> fh[7] = cpu_to_le32(generation); > >> > >> spin_unlock(&dentry->d_lock); > >> > >> len += 3; > > > > there is no need to take the d_lock here. > OK. Will remove it. It was done at fs/ocfs2/export.c. So probably i > thought it was necessary. > > <snip> >Ahh see this is where me not being familiar with NFS gets me in trouble, you do need the d_lock in that case. Sorry about that. Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jun 30, 2008 at 8:20 PM, Josef Bacik <jbacik@redhat.com> wrote:> On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote: >> Hello, >> >> Here''s a quick implementation of NFS support for btrfs. It works mostly. But >> its still a bit buggy. I suspect it has to do something with inode locking. >> When I unmount and mount a couple of times, it stops working i.e, when I do an >> ''ls'', it keeps waiting indefinitely. This is seen when accessing the filesystem >> from a local mount point as well. >> >> Can anybody please review and tell me what stupid mistake I''m committing ? >> > > You may want to send things in patch form. I''d recommend using the hg quilt > plugin, its nice for adding patches easily.Yes, since this was just an RFC, i thought it would be better to send just the file I created.> >> >> #include <linux/fs.h> >> #include <linux/types.h> >> #include "ctree.h" >> #include "disk-io.h" >> #include "btrfs_inode.h" >> #include "print-tree.h" >> #include "export.h" >> >> static int btrfs_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, >> int connectable) >> { struct inode *inode = dentry->d_inode; >> int len = *max_len; >> u64 objectid, root_objectid; >> u32 generation; >> __le32 *fh = (__force __le32 *) fh_in; >> >> if ((len < 5 )|| (connectable && len < 8)) { >> return 255; >> } > > Magic numbers are a no-no.OK :) Will change> >> >> objectid = BTRFS_I(inode)->location.objectid; >> root_objectid = BTRFS_I(inode)->root->objectid; >> generation = inode->i_generation; >> >> len = 5; >> fh[0] = cpu_to_le32((u32)(objectid >> 32)); >> fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff)); >> fh[2] = cpu_to_le32((u32)(root_objectid >> 32)); >> fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff)); >> fh[4] = cpu_to_le32(generation); >> >> if (connectable && !S_ISDIR(inode->i_mode)) { >> struct inode *parent; >> >> spin_lock(&dentry->d_lock); >> >> parent = dentry->d_parent->d_inode; >> objectid = BTRFS_I(parent)->location.objectid; >> generation = parent->i_generation; >> >> fh[5] = cpu_to_le32((u32)(objectid >> 32)); >> fh[6] = cpu_to_le32((u32)(objectid & 0xffffffff)); >> fh[7] = cpu_to_le32(generation); >> >> spin_unlock(&dentry->d_lock); >> >> len += 3; > > there is no need to take the d_lock here.OK. Will remove it. It was done at fs/ocfs2/export.c. So probably i thought it was necessary. <snip>>> static struct dentry *btrfs_get_parent(struct dentry *child) >> { >> struct inode *dir = child->d_inode; >> struct inode *inode; >> struct dentry *parent; >> struct btrfs_root *root = BTRFS_I(dir)->root; >> struct btrfs_key key; >> struct btrfs_path *path; >> struct extent_buffer *leaf; >> u32 nritems; >> int slot; >> u64 objectid; >> int ret; >> >> path = btrfs_alloc_path(); >> >> key.objectid = dir->i_ino; >> btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); >> key.offset = 0; >> ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); >> BUG_ON(ret == 0); >> ret = 0; >> >> leaf = path->nodes[0]; >> slot = path->slots[0]; >> nritems = btrfs_header_nritems(leaf); >> if (slot >= nritems) >> return ERR_PTR(-EINVAL); >> >> btrfs_item_key_to_cpu(leaf, &key, slot); >> if (key.objectid != dir->i_ino || >> key.type != BTRFS_INODE_REF_KEY) { >> return ERR_PTR(-EINVAL); >> } >> > > in both of the above error cases you don''t free the path, you need to do that.Yes. Will do. Thanks for the review! -- warm regards Balaji Rao -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Jun 29, 2008 at 05:05:31AM +0530, Balaji Rao wrote:> len = 5; > fh[0] = cpu_to_le32((u32)(objectid >> 32)); > fh[1] = cpu_to_le32((u32)(objectid & 0xffffffff)); > fh[2] = cpu_to_le32((u32)(root_objectid >> 32)); > fh[3] = cpu_to_le32((u32)(root_objectid & 0xffffffff)); > fh[4] = cpu_to_le32(generation);Please use C struct types for the file handles. Take a look at fs/xfs/linux-2.6/xfs_export.[ch] for an example.> static struct dentry * btrfs_get_dentry(struct super_block *sb, > u64 objectid, u64 root_objectid, u32 generation) > { > struct inode *inode; > struct dentry *result; > > inode = btrfs_ilookup(sb, objectid, root_objectid); > if (IS_ERR(inode)) > return (void *)inode;This one will only find inodes when they already are in memory. You need to something similar to btrfs_lookup minus the btrfs_inode_by_name call here. In fact it would make a lot of sense to factor that part of btrfs_lookup out into a btrfs_iget helper that takes a struct btrfs_key pointer and returns the inode for you. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jun 30, 2008 at 11:08:11AM -0400, Josef Bacik wrote:> > >> > > >> spin_unlock(&dentry->d_lock); > > >> > > >> len += 3; > > > > > > there is no need to take the d_lock here. > > OK. Will remove it. It was done at fs/ocfs2/export.c. So probably i > > thought it was necessary. > > > > <snip> > > > > Ahh see this is where me not being familiar with NFS gets me in trouble, you do > need the d_lock in that case. Sorry about that.It''s not really about nfs, dereferencing ->d_parent needs d_lock. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Balaji Rao <balajirrao@gmail.com>
This patch introduces a btrfs_iget helper to be used in NFS support.
Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
ctree.h | 2 ++
inode.c | 55 +++++++++++++++++++++++++++++++++++++------------------
2 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/ctree.h b/ctree.h
index c88f1e1..411d576 100644
--- a/ctree.h
+++ b/ctree.h
@@ -1698,6 +1698,8 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64
objectid,
struct btrfs_root *root);
struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
u64 root_objectid);
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *is_new);
int btrfs_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to);
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
diff --git a/inode.c b/inode.c
index a26d365..5344526 100644
--- a/inode.c
+++ b/inode.c
@@ -1881,6 +1881,33 @@ struct inode *btrfs_iget_locked(struct super_block *s,
u64 objectid,
return inode;
}
+/* Get an inode object given its location and corresponding root.
+ * Returns in *is_new if the inode was read from disk
+ */
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+ struct btrfs_root *root, int *is_new)
+{
+ struct inode *inode;
+
+ inode = btrfs_iget_locked(s, location->objectid, root);
+ if (!inode)
+ return ERR_PTR(-EACCES);
+
+ if (inode->i_state & I_NEW) {
+ BTRFS_I(inode)->root = root;
+ memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
+ btrfs_read_locked_inode(inode);
+ unlock_new_inode(inode);
+ if (is_new)
+ *is_new = 1;
+ } else {
+ if (is_new)
+ *is_new = 0;
+ }
+
+ return inode;
+}
+
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
@@ -1889,7 +1916,7 @@ static struct dentry *btrfs_lookup(struct inode *dir,
struct dentry *dentry,
struct btrfs_root *root = bi->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
- int ret, do_orphan = 0;
+ int ret, new, do_orphan = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -1907,23 +1934,15 @@ static struct dentry *btrfs_lookup(struct inode *dir,
struct dentry *dentry,
return ERR_PTR(ret);
if (ret > 0)
return ERR_PTR(-ENOENT);
-
- inode = btrfs_iget_locked(dir->i_sb, location.objectid,
- sub_root);
- if (!inode)
- return ERR_PTR(-EACCES);
- if (inode->i_state & I_NEW) {
- /* the inode and parent dir are two different roots */
- if (sub_root != root) {
- igrab(inode);
- sub_root->inode = inode;
- do_orphan = 1;
- }
- BTRFS_I(inode)->root = sub_root;
- memcpy(&BTRFS_I(inode)->location, &location,
- sizeof(location));
- btrfs_read_locked_inode(inode);
- unlock_new_inode(inode);
+ inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ /* the inode and parent dir are two different roots */
+ if (new && root != sub_root) {
+ igrab(inode);
+ sub_root->inode = inode;
+ do_orphan = 1;
}
}
--
1.5.5.1
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@intel.com Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Balaji Rao <balajirrao@gmail.com>
Here''s an implementation of NFS support for btrfs. It relies on the
fixes which are going in to 2.6.28 for the NFS readdir/lookup deadlock.
This uses the btrfs_iget helper introduced previously.
[dwmw2: Tidy up a little, switch to d_obtain_alias() w/compat.]
Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
Makefile | 2 +-
compat.h | 10 ++++
export.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
export.h | 17 ++++++
super.c | 2 +
5 files changed, 204 insertions(+), 1 deletions(-)
create mode 100644 export.c
create mode 100644 export.h
diff --git a/Makefile b/Makefile
index a4b3817..75f8818 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o
root-tree.o dir-item.o \
transaction.o bit-radix.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- ref-cache.o
+ ref-cache.o export.o
btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o
else
diff --git a/compat.h b/compat.h
index b3349a6..1040f07 100644
--- a/compat.h
+++ b/compat.h
@@ -5,6 +5,16 @@
#define trylock_page(page) (!TestSetPageLocked(page))
#endif
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
+static inline struct dentry *d_obtain_alias(struct inode *inode)
+{
+ struct dentry *d = d_alloc_anon(inode);
+ if (!d)
+ iput(inode);
+ return d;
+}
+#endif
+
/*
* Even if AppArmor isn''t enabled, it still has different prototypes.
* Add more distro/version pairs here to declare which has AppArmor applied.
diff --git a/export.c b/export.c
new file mode 100644
index 0000000..253080a
--- /dev/null
+++ b/export.c
@@ -0,0 +1,174 @@
+#include <linux/fs.h>
+#include <linux/types.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "btrfs_inode.h"
+#include "print-tree.h"
+#include "export.h"
+#include "compat.h"
+
+/* The size of encoded fh is the type number of the fh itself */
+#define BTRFS_FID_NON_CONNECTABLE 5
+#define BTRFS_FID_CONNECTABLE 8
+
+static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+ int connectable)
+{
+ struct btrfs_fid *fid = (struct btrfs_fid *)fh;
+ struct inode *inode = dentry->d_inode;
+ int len = *max_len;
+
+ if ((len < BTRFS_FID_NON_CONNECTABLE) ||
+ (connectable && len < BTRFS_FID_CONNECTABLE))
+ return 255;
+
+ len = BTRFS_FID_NON_CONNECTABLE;
+
+ fid->objectid = BTRFS_I(inode)->location.objectid;
+ fid->root_objectid = BTRFS_I(inode)->root->objectid;
+ fid->gen = inode->i_generation;
+
+ if (connectable && !S_ISDIR(inode->i_mode)) {
+ struct inode *parent;
+
+ spin_lock(&dentry->d_lock);
+
+ parent = dentry->d_parent->d_inode;
+ fid->parent_objectid = BTRFS_I(parent)->location.objectid;
+ fid->parent_gen = parent->i_generation;
+
+ spin_unlock(&dentry->d_lock);
+ len = BTRFS_FID_CONNECTABLE;
+ }
+ *max_len = len;
+
+ /* We return length itself for the type */
+ return len;
+
+}
+
+static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation)
+{
+ struct btrfs_root *root;
+ struct inode *inode;
+ struct dentry *result;
+ struct btrfs_key key;
+
+ key.objectid = objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+ key.offset = 0;
+
+ root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid);
+ inode = btrfs_iget(sb, &key, root, NULL);
+ if (IS_ERR(inode))
+ return (void *)inode;
+
+ if (generation != inode->i_generation) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+
+ result = d_alloc_anon(inode);
+ if (!result) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return result;
+}
+
+static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid
*fh,
+ int fh_len, int fh_type)
+{
+ struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+ u64 objectid, root_objectid;
+ u32 generation;
+
+ if (fh_type != BTRFS_FID_CONNECTABLE)
+ return NULL;
+
+ root_objectid = fid->root_objectid;
+ objectid = fid->parent_objectid;
+ generation = fid->parent_gen;
+
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid
*fh,
+ int fh_len, int fh_type)
+{
+ struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+ u64 objectid, root_objectid;
+ u32 generation;
+
+ if (fh_type > BTRFS_FID_CONNECTABLE)
+ return NULL;
+
+ objectid = fid->objectid;
+ root_objectid = fid->root_objectid;
+ generation = fid->gen;
+
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_get_parent(struct dentry *child)
+{
+ struct inode *dir = child->d_inode;
+ struct inode *inode;
+ struct dentry *parent;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int slot;
+ u64 objectid;
+ int ret;
+
+ path = btrfs_alloc_path();
+
+ key.objectid = dir->i_ino;
+ btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ BUG_ON(ret == 0);
+ ret = 0;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (slot >= nritems)
+ goto out;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
+ goto out;
+
+ btrfs_free_path(path);
+ objectid = key.offset;
+
+ /* Build a new key for the inode item */
+ key.objectid = objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.offset = 0;
+
+ inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+
+ parent = d_obtain_alias(inode);
+ if (!parent)
+ parent = ERR_PTR(-ENOMEM);
+
+ return parent;
+
+out:
+ btrfs_free_path(path);
+ return ERR_PTR(-EINVAL);
+}
+
+const struct export_operations btrfs_export_ops = {
+ .encode_fh = btrfs_encode_fh,
+ .fh_to_dentry = btrfs_fh_to_dentry,
+ .fh_to_parent = btrfs_fh_to_parent,
+ .get_parent = btrfs_get_parent,
+};
diff --git a/export.h b/export.h
new file mode 100644
index 0000000..019ca3a
--- /dev/null
+++ b/export.h
@@ -0,0 +1,17 @@
+#ifndef BTRFS_EXPORT_H
+#define BTRFS_EXPORT_H
+
+#include <linux/exportfs.h>
+
+extern const struct export_operations btrfs_export_ops;
+
+struct btrfs_fid {
+ u64 objectid;
+ u64 root_objectid;
+ u32 gen;
+
+ u64 parent_objectid;
+ u32 parent_gen;
+} __attribute__ ((packed));
+
+#endif
diff --git a/super.c b/super.c
index eb4b357..e830e0e 100644
--- a/super.c
+++ b/super.c
@@ -46,6 +46,7 @@
#include "xattr.h"
#include "volumes.h"
#include "version.h"
+#include "export.h"
#define BTRFS_SUPER_MAGIC 0x9123683E
@@ -303,6 +304,7 @@ static int btrfs_fill_super(struct super_block * sb,
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_magic = BTRFS_SUPER_MAGIC;
sb->s_op = &btrfs_super_ops;
+ sb->s_export_op = &btrfs_export_ops;
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
sb->s_flags |= MS_POSIXACL;
--
1.5.5.1
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@intel.com Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
David Woodhouse
2008-Aug-12 13:46 UTC
[PATCH 3/3] Implement our own copy of the nfsd readdir hack, for older kernels
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
inode.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 102 insertions(+), 2 deletions(-)
diff --git a/inode.c b/inode.c
index 5344526..faa5543 100644
--- a/inode.c
+++ b/inode.c
@@ -1956,7 +1956,8 @@ static unsigned char btrfs_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
-static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int btrfs_real_readdir(struct file *filp, void *dirent,
+ filldir_t filldir)
{
struct inode *inode = filp->f_dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2105,6 +2106,101 @@ err:
return ret;
}
+/* Kernels earlier than 2.6.28 still have the NFS deadlock where nfsd
+ will call the file system''s ->lookup() method from within its
+ filldir callback, which in turn was called from the file system''s
+ ->readdir() method. And will deadlock for many file systems. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+struct nfshack_dirent {
+ u64 ino;
+ loff_t offset;
+ int namlen;
+ unsigned int d_type;
+ char name[];
+};
+
+struct nfshack_readdir {
+ char *dirent;
+ size_t used;
+};
+
+
+
+static int btrfs_nfshack_filldir(void *__buf, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct nfshack_readdir *buf = __buf;
+ struct nfshack_dirent *de = (void *)(buf->dirent + buf->used);
+ unsigned int reclen;
+
+ reclen = ALIGN(sizeof(struct nfshack_dirent) + namlen, sizeof(u64));
+ if (buf->used + reclen > PAGE_SIZE)
+ return -EINVAL;
+
+ de->namlen = namlen;
+ de->offset = offset;
+ de->ino = ino;
+ de->d_type = d_type;
+ memcpy(de->name, name, namlen);
+ buf->used += reclen;
+
+ return 0;
+}
+
+static int btrfs_nfshack_readdir(struct file *file, void *dirent,
+ filldir_t filldir)
+{
+ struct nfshack_readdir buf;
+ struct nfshack_dirent *de;
+ int err;
+ int size;
+ loff_t offset;
+
+ buf.dirent = (void *)__get_free_page(GFP_KERNEL);
+ if (!buf.dirent)
+ return -ENOMEM;
+
+ offset = file->f_pos;
+
+ while (1) {
+ unsigned int reclen;
+
+ buf.used = 0;
+
+ err = btrfs_real_readdir(file, &buf, btrfs_nfshack_filldir);
+ if (err)
+ break;
+
+ size = buf.used;
+
+ if (!size)
+ break;
+
+ de = (struct nfshack_dirent *)buf.dirent;
+ while (size > 0) {
+ offset = de->offset;
+
+ if (filldir(dirent, de->name, de->namlen, de->offset,
+ de->ino, de->d_type))
+ goto done;
+ offset = file->f_pos;
+
+ reclen = ALIGN(sizeof(*de) + de->namlen,
+ sizeof(u64));
+ size -= reclen;
+ de = (struct nfshack_dirent *)((char *)de + reclen);
+ }
+ }
+
+ done:
+ free_page((unsigned long)buf.dirent);
+ file->f_pos = offset;
+
+ return err;
+}
+#endif
+
int btrfs_write_inode(struct inode *inode, int wait)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3661,7 +3757,11 @@ static struct inode_operations
btrfs_dir_ro_inode_operations = {
static struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .readdir = btrfs_readdir,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+ .readdir = btrfs_nfshack_readdir,
+#else /* NFSd readdir/lookup deadlock is fixed */
+ .readdir = btrfs_real_readdir,
+#endif
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl,
--
1.5.5.1
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@intel.com Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
> +struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, > + struct btrfs_root *root, int *is_new) > +{ > + struct inode *inode; > + > + inode = btrfs_iget_locked(s, location->objectid, root); > + if (!inode) > + return ERR_PTR(-EACCES); > + > + if (inode->i_state & I_NEW) { > + BTRFS_I(inode)->root = root; > + memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); > + btrfs_read_locked_inode(inode); > + unlock_new_inode(inode); > + if (is_new) > + *is_new = 1; > + } else { > + if (is_new) > + *is_new = 0; > + } > + > + return inode;Please leave the unlock_new_inode to the caller and kill the is_new parameter. Gives much nicer to read code, and generally making sure the inode is 100% ready and correct before unlocking it is a good idea, too so that e.g. no one can actually access an inode marked bad due to a generation number mismatch before it''s marked bad.> +} > + > static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, > struct nameidata *nd) > { > @@ -1889,7 +1916,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, > struct btrfs_root *root = bi->root; > struct btrfs_root *sub_root = root; > struct btrfs_key location; > - int ret, do_orphan = 0; > + int ret, new, do_orphan = 0; > > if (dentry->d_name.len > BTRFS_NAME_LEN) > return ERR_PTR(-ENAMETOOLONG); > @@ -1907,23 +1934,15 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, > return ERR_PTR(ret); > if (ret > 0) > return ERR_PTR(-ENOENT); > - > - inode = btrfs_iget_locked(dir->i_sb, location.objectid, > - sub_root); > - if (!inode) > - return ERR_PTR(-EACCES); > - if (inode->i_state & I_NEW) { > - /* the inode and parent dir are two different roots */ > - if (sub_root != root) { > - igrab(inode); > - sub_root->inode = inode; > - do_orphan = 1; > - } > - BTRFS_I(inode)->root = sub_root; > - memcpy(&BTRFS_I(inode)->location, &location, > - sizeof(location)); > - btrfs_read_locked_inode(inode); > - unlock_new_inode(inode); > + inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); > + if (IS_ERR(inode)) > + return ERR_CAST(inode); > + > + /* the inode and parent dir are two different roots */ > + if (new && root != sub_root) { > + igrab(inode); > + sub_root->inode = inode; > + do_orphan = 1; > } > } > > -- > 1.5.5.1 > > > -- > David Woodhouse Open Source Technology Centre > David.Woodhouse@intel.com Intel Corporation > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html---end quoted text--- -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 12, 2008 at 02:46:46PM +0100, David Woodhouse wrote:> +static inline struct dentry *d_obtain_alias(struct inode *inode) > +{ > + struct dentry *d = d_alloc_anon(inode); > + if (!d) > + iput(inode); > + return d; > +} > +#endifI''m not sure when al wants to merge with Linus, but the for-next naming makes it sound like the tree is the .28 queue. Also please take the full implementation of d_obtain_alias from http://git.kernel.org/?p=linux/kernel/git/viro/vfs-2.6.git;a=commitdiff;h=10cdb734be3c4175b977ba18eafbaba8e5716291 please. Your light implementation will crash and burn when btrfs_iget returns an error.> +/* The size of encoded fh is the type number of the fh itself */ > +#define BTRFS_FID_NON_CONNECTABLE 5 > +#define BTRFS_FID_CONNECTABLE 8That was an assumption in the very old code, but I think it''s a very bad idea. Just give your filehandles a uniqueue type number, e.g. in the 0x4? range so that people looking at nfs traffic using a packet analyzer know what kind of fhs they are actually dealing with.> + if (IS_ERR(inode)) > + return (void *)inode; > + > + if (generation != inode->i_generation) { > + iput(inode); > + return ERR_PTR(-ESTALE); > + } > + > + result = d_alloc_anon(inode); > + if (!result) { > + iput(inode); > + return ERR_PTR(-ENOMEM); > + }Didn''t you intend to use d_obtain_alias? -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2008-08-12 at 14:51 -0400, Christoph Hellwig wrote:> On Tue, Aug 12, 2008 at 02:46:46PM +0100, David Woodhouse wrote: > > +static inline struct dentry *d_obtain_alias(struct inode *inode) > > +{ > > + struct dentry *d = d_alloc_anon(inode); > > + if (!d) > > + iput(inode); > > + return d; > > +} > > +#endif > > I''m not sure when al wants to merge with Linus, but the for-next naming > makes it sound like the tree is the .28 queue. Also please take the > full implementation of d_obtain_alias from > > http://git.kernel.org/?p=linux/kernel/git/viro/vfs-2.6.git;a=commitdiff;h=10cdb734be3c4175b977ba18eafbaba8e5716291 > > please. Your light implementation will crash and burn when btrfs_iget > returns an error.Well spotted; thanks.> > +/* The size of encoded fh is the type number of the fh itself */ > > +#define BTRFS_FID_NON_CONNECTABLE 5 > > +#define BTRFS_FID_CONNECTABLE 8 > > That was an assumption in the very old code, but I think it''s a very > bad idea. Just give your filehandles a uniqueue type number, e.g. in > the 0x4? range so that people looking at nfs traffic using a packet > analyzer know what kind of fhs they are actually dealing with.Ok, I''ll submit the patch to include/linux/exportfs.h separately -- the numbers might as well be reserved immediately. Btw, I''m not convinced nfsd does the right thing when we return 255 from ->encode_fh(). It seems to leak a refcount somewhere in nfs code, so I can''t stop the server and can''t unmount the file system.> > + if (IS_ERR(inode)) > > + return (void *)inode; > > + > > + if (generation != inode->i_generation) { > > + iput(inode); > > + return ERR_PTR(-ESTALE); > > + } > > + > > + result = d_alloc_anon(inode); > > + if (!result) { > > + iput(inode); > > + return ERR_PTR(-ENOMEM); > > + } > > Didn''t you intend to use d_obtain_alias?Oops, yes. I did use it once; I missed the second one though. I''ll merge this into the patch in question... diff --git a/compat.h b/compat.h index 1040f07..d45fb37 100644 --- a/compat.h +++ b/compat.h @@ -8,7 +8,14 @@ #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) static inline struct dentry *d_obtain_alias(struct inode *inode) { - struct dentry *d = d_alloc_anon(inode); + struct dentry *d; + + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + + d = d_alloc_anon(inode); if (!d) iput(inode); return d; diff --git a/export.c b/export.c index 253080a..34e4631 100644 --- a/export.c +++ b/export.c @@ -7,9 +7,13 @@ #include "export.h" #include "compat.h" -/* The size of encoded fh is the type number of the fh itself */ -#define BTRFS_FID_NON_CONNECTABLE 5 -#define BTRFS_FID_CONNECTABLE 8 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +#define FILEID_BTRFS_WITHOUT_PARENT 0x4e +#define FILEID_BTRFS_WITH_PARENT 0x4f +#endif + +#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) +#define BTRFS_FID_SIZE_CONNECTABLE (sizeof(struct btrfs_fid)/4) static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, int connectable) @@ -17,12 +21,14 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, struct btrfs_fid *fid = (struct btrfs_fid *)fh; struct inode *inode = dentry->d_inode; int len = *max_len; + int type; - if ((len < BTRFS_FID_NON_CONNECTABLE) || - (connectable && len < BTRFS_FID_CONNECTABLE)) + if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || + (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) return 255; - len = BTRFS_FID_NON_CONNECTABLE; + len = BTRFS_FID_SIZE_NON_CONNECTABLE; + type = FILEID_BTRFS_WITHOUT_PARENT; fid->objectid = BTRFS_I(inode)->location.objectid; fid->root_objectid = BTRFS_I(inode)->root->objectid; @@ -38,13 +44,12 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, fid->parent_gen = parent->i_generation; spin_unlock(&dentry->d_lock); - len = BTRFS_FID_CONNECTABLE; + len = BTRFS_FID_SIZE_CONNECTABLE; + type = FILEID_BTRFS_WITH_PARENT; } - *max_len = len; - - /* We return length itself for the type */ - return len; + *max_len = len; + return type; } static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, @@ -69,11 +74,9 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, return ERR_PTR(-ESTALE); } - result = d_alloc_anon(inode); - if (!result) { - iput(inode); + result = d_obtain_alias(inode); + if (!result) return ERR_PTR(-ENOMEM); - } return result; } @@ -85,7 +88,8 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, u64 objectid, root_objectid; u32 generation; - if (fh_type != BTRFS_FID_CONNECTABLE) + if (fh_type != FILEID_BTRFS_WITH_PARENT || + fh_len != BTRFS_FID_SIZE_CONNECTABLE) return NULL; root_objectid = fid->root_objectid; @@ -102,7 +106,10 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, u64 objectid, root_objectid; u32 generation; - if (fh_type > BTRFS_FID_CONNECTABLE) + if ((fh_type != FILEID_BTRFS_WITH_PARENT || + fh_len != BTRFS_FID_SIZE_CONNECTABLE) && + (fh_type != FILEID_BTRFS_WITHOUT_PARENT || + fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) return NULL; objectid = fid->objectid; -- David Woodhouse Open Source Technology Centre David.Woodhouse@intel.com Intel Corporation -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2008-08-12 at 14:46 -0400, Christoph Hellwig wrote:> Please leave the unlock_new_inode to the caller and kill the is_new > parameter. Gives much nicer to read code, and generally making sure the > inode is 100% ready and correct before unlocking it is a good idea, too > so that e.g. no one can actually access an inode marked bad due to a > generation number mismatch before it''s marked bad.Unconvinced -- I don''t like the idea that we''d sometimes return a locked inode, sometimes not, and force the callers to handle that. And the inode _is_ ready and correct. We don''t mark inodes bad because of a generation number mismatch -- we just return -ESTALE and drop our refcount on the inode. The inode itself is fine and there''s no issue with it being unlocked. The code in btrfs_lookup() which goes... /* the inode and parent dir are two different roots */ if (new && root != sub_root) { igrab(inode); sub_root->inode = inode; do_orphan = 1; } ... should also be fine when the inode is unlocked, too. -- David Woodhouse Open Source Technology Centre David.Woodhouse@intel.com Intel Corporation -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2008-08-13 at 10:07 +0100, David Woodhouse wrote:> /* the inode and parent dir are two different roots */ > if (new && root != sub_root) { > igrab(inode); > sub_root->inode = inode; > do_orphan = 1; > }Hmmmm.... looking at that, don''t we need to store the _parent_ root object ID in the file handle too, if it can be different from the child? Something like this, perhaps. Given that I think we''ve already given up on exporting by NFSv2, do we care about another 8 bytes in the fh size? diff --git a/export.c b/export.c index 34e4631..1b8875c 100644 --- a/export.c +++ b/export.c @@ -8,12 +8,14 @@ #include "compat.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -#define FILEID_BTRFS_WITHOUT_PARENT 0x4e -#define FILEID_BTRFS_WITH_PARENT 0x4f +#define FILEID_BTRFS_WITHOUT_PARENT 0x4d +#define FILEID_BTRFS_WITH_PARENT 0x4e +#define FILEID_BTRFS_WITH_PARENT_ROOT 0x4f #endif #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) -#define BTRFS_FID_SIZE_CONNECTABLE (sizeof(struct btrfs_fid)/4) +#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) +#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, int connectable) @@ -36,16 +38,25 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, if (connectable && !S_ISDIR(inode->i_mode)) { struct inode *parent; + u64 parent_root_id; spin_lock(&dentry->d_lock); parent = dentry->d_parent->d_inode; fid->parent_objectid = BTRFS_I(parent)->location.objectid; fid->parent_gen = parent->i_generation; + parent_root_id = BTRFS_I(parent)->root->objectid; spin_unlock(&dentry->d_lock); - len = BTRFS_FID_SIZE_CONNECTABLE; - type = FILEID_BTRFS_WITH_PARENT; + + if (parent_root_id != fid->root_objectid) { + fid->parent_root_objectid = parent_root_id; + len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; + type = FILEID_BTRFS_WITH_PARENT_ROOT; + } else { + len = BTRFS_FID_SIZE_CONNECTABLE; + type = FILEID_BTRFS_WITH_PARENT; + } } *max_len = len; @@ -88,11 +99,17 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, u64 objectid, root_objectid; u32 generation; - if (fh_type != FILEID_BTRFS_WITH_PARENT || - fh_len != BTRFS_FID_SIZE_CONNECTABLE) + if (fh_type == FILEID_BTRFS_WITH_PARENT) { + if (fh_len != BTRFS_FID_SIZE_CONNECTABLE) + return NULL; + root_objectid = fid->root_objectid; + } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) { + if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) + return NULL; + root_objectid = fid->parent_root_objectid; + } else return NULL; - root_objectid = fid->root_objectid; objectid = fid->parent_objectid; generation = fid->parent_gen; @@ -108,6 +125,8 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, if ((fh_type != FILEID_BTRFS_WITH_PARENT || fh_len != BTRFS_FID_SIZE_CONNECTABLE) && + (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT || + fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) && (fh_type != FILEID_BTRFS_WITHOUT_PARENT || fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) return NULL; diff --git a/export.h b/export.h index 019ca3a..074348a 100644 --- a/export.h +++ b/export.h @@ -12,6 +12,8 @@ struct btrfs_fid { u64 parent_objectid; u32 parent_gen; + + u64 parent_root_objectid; } __attribute__ ((packed)); #endif -- David Woodhouse Open Source Technology Centre David.Woodhouse@intel.com Intel Corporation -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Balaji Rao <balajirrao@gmail.com>
Here''s an implementation of NFS support for btrfs. It relies on the
fixes which are going in to 2.6.28 for the NFS readdir/lookup deadlock.
This uses the btrfs_iget helper introduced previously.
[dwmw2: Tidy up a little, switch to d_obtain_alias() w/compat routine,
change fh_type, store parent''s root object ID where needed]
Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
This replaces patch 2 of the sequence I sent out yesterday; the other
two patches remain the same.
Makefile | 2 +-
compat.h | 17 +++++
export.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
export.h | 19 ++++++
super.c | 2 +
5 files changed, 239 insertions(+), 1 deletions(-)
create mode 100644 export.c
create mode 100644 export.h
diff --git a/Makefile b/Makefile
index a4b3817..75f8818 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o
root-tree.o dir-item.o \
transaction.o bit-radix.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- ref-cache.o
+ ref-cache.o export.o
btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o
else
diff --git a/compat.h b/compat.h
index b3349a6..d45fb37 100644
--- a/compat.h
+++ b/compat.h
@@ -5,6 +5,23 @@
#define trylock_page(page) (!TestSetPageLocked(page))
#endif
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27)
+static inline struct dentry *d_obtain_alias(struct inode *inode)
+{
+ struct dentry *d;
+
+ if (!inode)
+ return NULL;
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ d = d_alloc_anon(inode);
+ if (!d)
+ iput(inode);
+ return d;
+}
+#endif
+
/*
* Even if AppArmor isn''t enabled, it still has different prototypes.
* Add more distro/version pairs here to declare which has AppArmor applied.
diff --git a/export.c b/export.c
new file mode 100644
index 0000000..1b8875c
--- /dev/null
+++ b/export.c
@@ -0,0 +1,200 @@
+#include <linux/fs.h>
+#include <linux/types.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "btrfs_inode.h"
+#include "print-tree.h"
+#include "export.h"
+#include "compat.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+#define FILEID_BTRFS_WITHOUT_PARENT 0x4d
+#define FILEID_BTRFS_WITH_PARENT 0x4e
+#define FILEID_BTRFS_WITH_PARENT_ROOT 0x4f
+#endif
+
+#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid,
parent_objectid)/4)
+#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid,
parent_root_objectid)/4)
+#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4)
+
+static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+ int connectable)
+{
+ struct btrfs_fid *fid = (struct btrfs_fid *)fh;
+ struct inode *inode = dentry->d_inode;
+ int len = *max_len;
+ int type;
+
+ if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
+ (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+ return 255;
+
+ len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+ type = FILEID_BTRFS_WITHOUT_PARENT;
+
+ fid->objectid = BTRFS_I(inode)->location.objectid;
+ fid->root_objectid = BTRFS_I(inode)->root->objectid;
+ fid->gen = inode->i_generation;
+
+ if (connectable && !S_ISDIR(inode->i_mode)) {
+ struct inode *parent;
+ u64 parent_root_id;
+
+ spin_lock(&dentry->d_lock);
+
+ parent = dentry->d_parent->d_inode;
+ fid->parent_objectid = BTRFS_I(parent)->location.objectid;
+ fid->parent_gen = parent->i_generation;
+ parent_root_id = BTRFS_I(parent)->root->objectid;
+
+ spin_unlock(&dentry->d_lock);
+
+ if (parent_root_id != fid->root_objectid) {
+ fid->parent_root_objectid = parent_root_id;
+ len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
+ type = FILEID_BTRFS_WITH_PARENT_ROOT;
+ } else {
+ len = BTRFS_FID_SIZE_CONNECTABLE;
+ type = FILEID_BTRFS_WITH_PARENT;
+ }
+ }
+
+ *max_len = len;
+ return type;
+}
+
+static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation)
+{
+ struct btrfs_root *root;
+ struct inode *inode;
+ struct dentry *result;
+ struct btrfs_key key;
+
+ key.objectid = objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+ key.offset = 0;
+
+ root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid);
+ inode = btrfs_iget(sb, &key, root, NULL);
+ if (IS_ERR(inode))
+ return (void *)inode;
+
+ if (generation != inode->i_generation) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+
+ result = d_obtain_alias(inode);
+ if (!result)
+ return ERR_PTR(-ENOMEM);
+
+ return result;
+}
+
+static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid
*fh,
+ int fh_len, int fh_type)
+{
+ struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+ u64 objectid, root_objectid;
+ u32 generation;
+
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+ if (fh_len != BTRFS_FID_SIZE_CONNECTABLE)
+ return NULL;
+ root_objectid = fid->root_objectid;
+ } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
+ if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
+ return NULL;
+ root_objectid = fid->parent_root_objectid;
+ } else
+ return NULL;
+
+ objectid = fid->parent_objectid;
+ generation = fid->parent_gen;
+
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid
*fh,
+ int fh_len, int fh_type)
+{
+ struct btrfs_fid *fid = (struct btrfs_fid *) fh;
+ u64 objectid, root_objectid;
+ u32 generation;
+
+ if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
+ fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
+ (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
+ fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
+ (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
+ fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
+ return NULL;
+
+ objectid = fid->objectid;
+ root_objectid = fid->root_objectid;
+ generation = fid->gen;
+
+ return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+}
+
+static struct dentry *btrfs_get_parent(struct dentry *child)
+{
+ struct inode *dir = child->d_inode;
+ struct inode *inode;
+ struct dentry *parent;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int slot;
+ u64 objectid;
+ int ret;
+
+ path = btrfs_alloc_path();
+
+ key.objectid = dir->i_ino;
+ btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
+ key.offset = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ BUG_ON(ret == 0);
+ ret = 0;
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (slot >= nritems)
+ goto out;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
+ goto out;
+
+ btrfs_free_path(path);
+ objectid = key.offset;
+
+ /* Build a new key for the inode item */
+ key.objectid = objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+ key.offset = 0;
+
+ inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+
+ parent = d_obtain_alias(inode);
+ if (!parent)
+ parent = ERR_PTR(-ENOMEM);
+
+ return parent;
+
+out:
+ btrfs_free_path(path);
+ return ERR_PTR(-EINVAL);
+}
+
+const struct export_operations btrfs_export_ops = {
+ .encode_fh = btrfs_encode_fh,
+ .fh_to_dentry = btrfs_fh_to_dentry,
+ .fh_to_parent = btrfs_fh_to_parent,
+ .get_parent = btrfs_get_parent,
+};
diff --git a/export.h b/export.h
new file mode 100644
index 0000000..074348a
--- /dev/null
+++ b/export.h
@@ -0,0 +1,19 @@
+#ifndef BTRFS_EXPORT_H
+#define BTRFS_EXPORT_H
+
+#include <linux/exportfs.h>
+
+extern const struct export_operations btrfs_export_ops;
+
+struct btrfs_fid {
+ u64 objectid;
+ u64 root_objectid;
+ u32 gen;
+
+ u64 parent_objectid;
+ u32 parent_gen;
+
+ u64 parent_root_objectid;
+} __attribute__ ((packed));
+
+#endif
diff --git a/super.c b/super.c
index eb4b357..e830e0e 100644
--- a/super.c
+++ b/super.c
@@ -46,6 +46,7 @@
#include "xattr.h"
#include "volumes.h"
#include "version.h"
+#include "export.h"
#define BTRFS_SUPER_MAGIC 0x9123683E
@@ -303,6 +304,7 @@ static int btrfs_fill_super(struct super_block * sb,
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_magic = BTRFS_SUPER_MAGIC;
sb->s_op = &btrfs_super_ops;
+ sb->s_export_op = &btrfs_export_ops;
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
sb->s_flags |= MS_POSIXACL;
--
1.5.5.1
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@intel.com Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs"
in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html