Josef Bacik
2011-May-19 17:58 UTC
[PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
Btrfs (and I''d venture most other fs''s) stores its indexes in nice disk order for readdir, but unfortunately in the case of anything that stats the files in order that readdir spits back (like oh say ls) that means we still have to do the normal lookup of the file, which means looking up our other index and then looking up the inode. What I want is a way to create dummy dentries when we find them in readdir so that when ls or anything else subsequently does a stat(), we already have the location information in the dentry and can go straight to the inode itself. The lookup stuff just assumes that if it finds a dentry it is done, it doesn''t perform a lookup. So add a DCACHE_NEED_LOOKUP flag so that the lookup code knows it still needs to run i_op->lookup() on the parent to get the inode for the dentry. I have tested this with btrfs and I went from something that looks like this http://people.redhat.com/jwhiter/ls-noreada.png To this http://people.redhat.com/jwhiter/ls-good.png Thats a savings of 1300 seconds, or 22 minutes. That is a significant savings. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> --- fs/namei.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/dcache.h | 1 + 2 files changed, 49 insertions(+), 0 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index e3c4f11..a1bff4f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1198,6 +1198,29 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, } /* + * We already have a dentry, but require a lookup to be performed on the parent + * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error. + * parent->d_inode->i_mutex must be held. d_lookup must have verified that no + * child exists while under i_mutex. + */ +static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode = parent->d_inode; + struct dentry *old; + + /* Don''t create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(inode))) + return ERR_PTR(-ENOENT); + + old = inode->i_op->lookup(inode, dentry, nd); + if (unlikely(old)) { + dput(dentry); + dentry = old; + } + return dentry; +} +/* * It''s more convoluted than I''d like it to be, but... it''s still fairly * small and for now I''d prefer to have fast path as straight as possible. * It _is_ time-critical. @@ -1236,6 +1259,13 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, goto unlazy; } } + if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { + if (nameidata_dentry_drop_rcu(nd, dentry)) + return -ECHILD; + dput(dentry); + dentry = NULL; + goto retry; + } path->mnt = mnt; path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode, false))) @@ -1250,6 +1280,12 @@ unlazy: } } else { dentry = __d_lookup(parent, name); + if (unlikely(!dentry)) + goto retry; + if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { + dput(dentry); + dentry = NULL; + } } retry: @@ -1268,6 +1304,18 @@ retry: /* known good */ need_reval = 0; status = 1; + } else if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { + struct dentry *old; + + dentry->d_flags &= ~DCACHE_NEED_LOOKUP; + dentry = d_inode_lookup(parent, dentry, nd); + if (IS_ERR(dentry)) { + mutex_unlock(&dir->i_mutex); + return PTR_ERR(dentry); + } + /* known good */ + need_reval = 0; + status = 1; } mutex_unlock(&dir->i_mutex); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 19d90a5..a8b2457 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -216,6 +216,7 @@ struct dentry_operations { #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ #define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ +#define DCACHE_NEED_LOOKUP 0x80000 /* dentry requires i_op->lookup */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2011-May-19 17:58 UTC
[PATCH 2/2] Btrfs: load the key from the dir item in readdir into a fake dentry
In btrfs we have 2 indexes for inodes. One is for readdir, it''s in this nice sequential order and works out brilliantly for readdir. However if you use ls, it usually stat''s each file it gets from readdir. This is where the second index comes in, which is based on a hash of the name of the file. So then the lookup has to lookup this index, and then lookup the inode. The index lookup is going to be in random order (since its based on the name hash), which gives us less than stellar performance. Since we know the inode location from the readdir index, I create a dummy dentry and copy the location key into dentry->d_fsdata. Then on lookup if we have d_fsdata we use that location to lookup the inode, avoiding looking up the other directory index. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> --- fs/btrfs/inode.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 49 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6228a30..3cd246c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4120,12 +4120,23 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *sub_root = root; struct btrfs_key location; int index; - int ret; + int ret = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ret = btrfs_inode_by_name(dir, dentry, &location); + if (dentry->d_fsdata) { + memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); + kfree(dentry->d_fsdata); + dentry->d_fsdata = NULL; + /* + * We need to unhash this dentry so we can rehash it when we + * find the inode. + */ + d_drop(dentry); + } else { + ret = btrfs_inode_by_name(dir, dentry, &location); + } if (ret < 0) return ERR_PTR(ret); @@ -4180,6 +4191,12 @@ static int btrfs_dentry_delete(const struct dentry *dentry) return 0; } +static void btrfs_dentry_release(struct dentry *dentry) +{ + if (dentry->d_fsdata) + kfree(dentry->d_fsdata); +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { @@ -4206,6 +4223,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; + struct qstr q; int ret; struct extent_buffer *leaf; int slot; @@ -4284,6 +4302,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; + struct dentry *tmp; if (verify_dir_item(root, leaf, di)) break; @@ -4304,6 +4323,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); + q.name = name_ptr; + q.len = name_len; + q.hash = full_name_hash(q.name, q.len); + tmp = d_lookup(filp->f_dentry, &q); + if (!tmp) { + struct btrfs_key *newkey; + + newkey = kzalloc(sizeof(struct btrfs_key), + GFP_NOFS); + if (!newkey) + goto no_dentry; + tmp = d_alloc(filp->f_dentry, &q); + if (!tmp) { + kfree(newkey); + dput(tmp); + goto no_dentry; + } + memcpy(newkey, &location, + sizeof(struct btrfs_key)); + tmp->d_fsdata = newkey; + tmp->d_flags |= DCACHE_NEED_LOOKUP; + d_rehash(tmp); + dput(tmp); + } else { + dput(tmp); + } +no_dentry: /* is this a reference to our own snapshot? If so * skip it */ @@ -7566,4 +7612,5 @@ static const struct inode_operations btrfs_symlink_inode_operations = { const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, + .d_release = btrfs_dentry_release, }; -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Andreas Dilger
2011-May-19 19:03 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
On May 19, 2011, at 11:58, Josef Bacik wrote:> Btrfs (and I''d venture most other fs''s) stores its indexes in nice disk order > for readdir, but unfortunately in the case of anything that stats the files in > order that readdir spits back (like oh say ls) that means we still have to do > the normal lookup of the file, which means looking up our other index and then > looking up the inode. What I want is a way to create dummy dentries when we > find them in readdir so that when ls or anything else subsequently does a > stat(), we already have the location information in the dentry and can go > straight to the inode itself. The lookup stuff just assumes that if it finds a > dentry it is done, it doesn''t perform a lookup. So add a DCACHE_NEED_LOOKUP > flag so that the lookup code knows it still needs to run i_op->lookup() on the > parent to get the inode for the dentry. I have tested this with btrfs and I > went from something that looks like this > > http://people.redhat.com/jwhiter/ls-noreada.png > > To this > > http://people.redhat.com/jwhiter/ls-good.png > > Thats a savings of 1300 seconds, or 22 minutes. That is a significant savings. > Thanks,This comment should probably mention the number of files being tested, in order to make a 1300s savings meaningful. Similarly, it would be better to provide the absolute times of tests in case these URLs disappear in the future. "That reduces the time to do "ls -l" on a 1M file directory from 2181s to 855s."> Signed-off-by: Josef Bacik <josef@redhat.com> > --- > fs/namei.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ > include/linux/dcache.h | 1 + > 2 files changed, 49 insertions(+), 0 deletions(-) > > diff --git a/fs/namei.c b/fs/namei.c > index e3c4f11..a1bff4f 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -1198,6 +1198,29 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, > } > > /* > + * We already have a dentry, but require a lookup to be performed on the parent > + * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error. > + * parent->d_inode->i_mutex must be held. d_lookup must have verified that no > + * child exists while under i_mutex. > + */ > +static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry, > + struct nameidata *nd) > +{ > + struct inode *inode = parent->d_inode; > + struct dentry *old; > + > + /* Don''t create child dentry for a dead directory. */ > + if (unlikely(IS_DEADDIR(inode))) > + return ERR_PTR(-ENOENT); > + > + old = inode->i_op->lookup(inode, dentry, nd); > + if (unlikely(old)) { > + dput(dentry); > + dentry = old; > + } > + return dentry; > +} > +/* > * It''s more convoluted than I''d like it to be, but... it''s still fairly > * small and for now I''d prefer to have fast path as straight as possible. > * It _is_ time-critical. > @@ -1236,6 +1259,13 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, > goto unlazy; > } > } > + if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { > + if (nameidata_dentry_drop_rcu(nd, dentry)) > + return -ECHILD; > + dput(dentry); > + dentry = NULL; > + goto retry; > + } > path->mnt = mnt; > path->dentry = dentry; > if (likely(__follow_mount_rcu(nd, path, inode, false))) > @@ -1250,6 +1280,12 @@ unlazy: > } > } else { > dentry = __d_lookup(parent, name); > + if (unlikely(!dentry)) > + goto retry; > + if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { > + dput(dentry); > + dentry = NULL; > + } > } > > retry: > @@ -1268,6 +1304,18 @@ retry: > /* known good */ > need_reval = 0; > status = 1; > + } else if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { > + struct dentry *old; > + > + dentry->d_flags &= ~DCACHE_NEED_LOOKUP; > + dentry = d_inode_lookup(parent, dentry, nd);Would it make sense to keep DCACHE_NEED_LOOKUP set in d_flags until _after_ the call to d_inode_lookup()? That way the filesystem can positively know it is doing the inode lookup from d_fsdata, instead of just inferring it from the presence of d_fsdata? It is already the filesystem that is setting DCACHE_NEED_LOOKUP, so it should really be the one clearing this flag also. I''m concerned that there may be filesystems that need d_fsdata for something already, so the presence/absence of d_fsdata is not a clear indication to the underlying filesystem of whether to do an inode lookup based on d_fsdata, which might mean that it needs to keep yet another flag for this purpose.> + if (IS_ERR(dentry)) { > + mutex_unlock(&dir->i_mutex); > + return PTR_ERR(dentry); > + } > + /* known good */ > + need_reval = 0; > + status = 1; > } > mutex_unlock(&dir->i_mutex); > } > diff --git a/include/linux/dcache.h b/include/linux/dcache.h > index 19d90a5..a8b2457 100644 > --- a/include/linux/dcache.h > +++ b/include/linux/dcache.h > @@ -216,6 +216,7 @@ struct dentry_operations { > #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ > #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ > #define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ > +#define DCACHE_NEED_LOOKUP 0x80000 /* dentry requires i_op->lookup */ > #define DCACHE_MANAGED_DENTRY \ > (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) > > -- > 1.7.2.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.htmlCheers, Andreas -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2011-May-19 19:43 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
On Thu, May 19, 2011 at 01:03:18PM -0600, Andreas Dilger wrote:> On May 19, 2011, at 11:58, Josef Bacik wrote: > > Btrfs (and I''d venture most other fs''s) stores its indexes in nice disk order > > for readdir, but unfortunately in the case of anything that stats the files in > > order that readdir spits back (like oh say ls) that means we still have to do > > the normal lookup of the file, which means looking up our other index and then > > looking up the inode. What I want is a way to create dummy dentries when we > > find them in readdir so that when ls or anything else subsequently does a > > stat(), we already have the location information in the dentry and can go > > straight to the inode itself. The lookup stuff just assumes that if it finds a > > dentry it is done, it doesn''t perform a lookup. So add a DCACHE_NEED_LOOKUP > > flag so that the lookup code knows it still needs to run i_op->lookup() on the > > parent to get the inode for the dentry. I have tested this with btrfs and I > > went from something that looks like this > > > > http://people.redhat.com/jwhiter/ls-noreada.png > > > > To this > > > > http://people.redhat.com/jwhiter/ls-good.png > > > > Thats a savings of 1300 seconds, or 22 minutes. That is a significant savings. > > Thanks, > > This comment should probably mention the number of files being tested, in order > to make a 1300s savings meaningful. Similarly, it would be better to provide > the absolute times of tests in case these URLs disappear in the future. > > "That reduces the time to do "ls -l" on a 1M file directory from 2181s to 855s." >Good point, I will include that in my next posting.> > Signed-off-by: Josef Bacik <josef@redhat.com> > > --- > > fs/namei.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ > > include/linux/dcache.h | 1 + > > 2 files changed, 49 insertions(+), 0 deletions(-) > > > > diff --git a/fs/namei.c b/fs/namei.c > > index e3c4f11..a1bff4f 100644 > > --- a/fs/namei.c > > +++ b/fs/namei.c > > @@ -1198,6 +1198,29 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, > > } > > > > /* > > + * We already have a dentry, but require a lookup to be performed on the parent > > + * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error. > > + * parent->d_inode->i_mutex must be held. d_lookup must have verified that no > > + * child exists while under i_mutex. > > + */ > > +static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry, > > + struct nameidata *nd) > > +{ > > + struct inode *inode = parent->d_inode; > > + struct dentry *old; > > + > > + /* Don''t create child dentry for a dead directory. */ > > + if (unlikely(IS_DEADDIR(inode))) > > + return ERR_PTR(-ENOENT); > > + > > + old = inode->i_op->lookup(inode, dentry, nd); > > + if (unlikely(old)) { > > + dput(dentry); > > + dentry = old; > > + } > > + return dentry; > > +} > > +/* > > * It''s more convoluted than I''d like it to be, but... it''s still fairly > > * small and for now I''d prefer to have fast path as straight as possible. > > * It _is_ time-critical. > > @@ -1236,6 +1259,13 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, > > goto unlazy; > > } > > } > > + if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { > > + if (nameidata_dentry_drop_rcu(nd, dentry)) > > + return -ECHILD; > > + dput(dentry); > > + dentry = NULL; > > + goto retry; > > + } > > path->mnt = mnt; > > path->dentry = dentry; > > if (likely(__follow_mount_rcu(nd, path, inode, false))) > > @@ -1250,6 +1280,12 @@ unlazy: > > } > > } else { > > dentry = __d_lookup(parent, name); > > + if (unlikely(!dentry)) > > + goto retry; > > + if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { > > + dput(dentry); > > + dentry = NULL; > > + } > > } > > > > retry: > > @@ -1268,6 +1304,18 @@ retry: > > /* known good */ > > need_reval = 0; > > status = 1; > > + } else if (unlikely(dentry->d_flags & DCACHE_NEED_LOOKUP)) { > > + struct dentry *old; > > + > > + dentry->d_flags &= ~DCACHE_NEED_LOOKUP; > > + dentry = d_inode_lookup(parent, dentry, nd); > > Would it make sense to keep DCACHE_NEED_LOOKUP set in d_flags until _after_ > the call to d_inode_lookup()? That way the filesystem can positively know > it is doing the inode lookup from d_fsdata, instead of just inferring it > from the presence of d_fsdata? It is already the filesystem that is setting > DCACHE_NEED_LOOKUP, so it should really be the one clearing this flag also. > > I''m concerned that there may be filesystems that need d_fsdata for something > already, so the presence/absence of d_fsdata is not a clear indication to > the underlying filesystem of whether to do an inode lookup based on d_fsdata, > which might mean that it needs to keep yet another flag for this purpose. >Another good point, I had toyed with the thought of just having helpers do all the dirty work so all fs''s could be using the same interface for using this trick, I''ll push this work into those helpers to make sure its done consistently. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Andi Kleen
2011-May-20 20:07 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
Josef Bacik <josef@redhat.com> writes:> Btrfs (and I''d venture most other fs''s) stores its indexes in nice disk order > for readdir, but unfortunately in the case of anything that stats the files in > order that readdir spits back (like oh say ls) that means we still have to do > the normal lookup of the file, which means looking up our other index and then > looking up the inode. What I want is a way to create dummy dentries when we > find them in readdir so that when ls or anything else subsequently does a > stat(), we already have the location information in the dentry and can goFunny I remember discussing this optimization a long time ago with Andrea. But the problem is still that it has the potential to pollute the dcache a lot when someone is reading a large directory. Consider the find / case. You don''t want that to turn over all of your dcache. So if you do that you need some way to put the dummy dentries on a special LRU list that gets cleaned quickly and is kept small. -Andi -- ak@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Andreas Dilger
2011-May-20 20:51 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
On 2011-05-20, at 2:07 PM, Andi Kleen <andi@firstfloor.org> wrote:> Josef Bacik <josef@redhat.com> writes: > >> Btrfs (and I''d venture most other fs''s) stores its indexes in nice disk order >> for readdir, but unfortunately in the case of anything that stats the files in >> order that readdir spits back (like oh say ls) that means we still have to do >> the normal lookup of the file, which means looking up our other index and then >> looking up the inode. What I want is a way to create dummy dentries when we >> find them in readdir so that when ls or anything else subsequently does a >> stat(), we already have the location information in the dentry and can go > > Funny I remember discussing this optimization a long time ago with > Andrea. But the problem is still that it has the potential to pollute > the dcache a lot when someone is reading a large directory. > > Consider the find / case. You don''t want that to turn over all > of your dcache. > > So if you do that you need some way to put the dummy dentries on a > special LRU list that gets cleaned quickly and is kept small.Actually, I recall years ago looking into something similar. It should be possible to drop such dentries even under GFP_NOFS because they can''t be dirty and don''t need any inode operations to free. Putting them at the end of the cache LRU instead of the head would allow them to be dropped quickly under memory pressure. Cheers, Andreas-- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Andi Kleen
2011-May-20 21:31 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
> Putting them at the end of the cache LRU instead of the head would allow them to be dropped quickly under memory pressure.This still would fill up your memory for find /, potentially pushing out other stuff. -Andi -- ak@linux.intel.com -- Speaking for myself only. -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2011-May-21 00:30 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
On 05/20/2011 05:31 PM, Andi Kleen wrote:>> Putting them at the end of the cache LRU instead of the head would allow them to be dropped quickly under memory pressure. > > This still would fill up your memory for find /, potentially pushing > out other stuff. > > -AndiSo these things are just hashed on dput, so they don''t have any references to them and they are automatically put on the LRU list, so if we get under memory pressure they will be easily discarded, especially if nobody is actually stating them. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Andi Kleen
2011-May-21 03:00 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
On Fri, May 20, 2011 at 08:30:19PM -0400, Josef Bacik wrote:> On 05/20/2011 05:31 PM, Andi Kleen wrote: > >> Putting them at the end of the cache LRU instead of the head would allow them to be dropped quickly under memory pressure. > > > > This still would fill up your memory for find /, potentially pushing > > out other stuff. > > > > -Andi > > So these things are just hashed on dput, so they don''t have any > references to them and they are automatically put on the LRU list, so if > we get under memory pressure they will be easily discarded, especially > if nobody is actually stating them. Thanks,They are allocated. The allocation will push out other things too. There''s no mechanism to only push dentries when allocating other dentries, or limit the total consumption from the dcache. -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dave Chinner
2011-May-21 04:11 UTC
Re: [PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
On Sat, May 21, 2011 at 05:00:50AM +0200, Andi Kleen wrote:> On Fri, May 20, 2011 at 08:30:19PM -0400, Josef Bacik wrote: > > On 05/20/2011 05:31 PM, Andi Kleen wrote: > > >> Putting them at the end of the cache LRU instead of the head would allow them to be dropped quickly under memory pressure. > > > > > > This still would fill up your memory for find /, potentially pushing > > > out other stuff. > > > > > > -Andi > > > > So these things are just hashed on dput, so they don''t have any > > references to them and they are automatically put on the LRU list, so if > > we get under memory pressure they will be easily discarded, especially > > if nobody is actually stating them. Thanks, > > They are allocated. The allocation will push out other things too. > There''s no mechanism to only push dentries when allocating other dentries, > or limit the total consumption from the dcache.FWIW, I''m in the process of resurrecting my per-superblock VFS cache shrinker patch which would make doing such limiting easier. That is, the fake dentries could be accounted and tracked on their own per-sb LRU and when over a threshold (global and/or per-sb) the per-sb shrinker could be called directly to free a number of fake dentries. That way the sb generating them all would self-limit without greatly affecting the working set of dentries on other filesystems... Cheers, Dave. -- Dave Chinner david@fromorbit.com -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2011-May-26 14:48 UTC
[PATCH 1/2] fs: add a DCACHE_NEED_LOOKUP flag for d_flags
Btrfs (and I''d venture most other fs''s) stores its indexes in nice disk order for readdir, but unfortunately in the case of anything that stats the files in order that readdir spits back (like oh say ls) that means we still have to do the normal lookup of the file, which means looking up our other index and then looking up the inode. What I want is a way to create dummy dentries when we find them in readdir so that when ls or anything else subsequently does a stat(), we already have the location information in the dentry and can go straight to the inode itself. The lookup stuff just assumes that if it finds a dentry it is done, it doesn''t perform a lookup. So add a DCACHE_NEED_LOOKUP flag so that the lookup code knows it still needs to run i_op->lookup() on the parent to get the inode for the dentry. I have tested this with btrfs and I went from something that looks like this http://people.redhat.com/jwhiter/ls-noreada.png To this http://people.redhat.com/jwhiter/ls-good.png Thats a savings of 1300 seconds, or 22 minutes. That is a significant savings. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> --- fs/dcache.c | 34 ++++++++++++++++++++++++++++++- fs/namei.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/dcache.h | 7 ++++++ 3 files changed, 90 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 22a0ef4..7fc0e30 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -343,6 +343,24 @@ void d_drop(struct dentry *dentry) EXPORT_SYMBOL(d_drop); /* + * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag + * @dentry: dentry to drop + * + * This is called when we do a lookup on a placeholder dentry that needed to be + * looked up. The dentry should have been hashed in order for it to be found by + * the lookup code, but now needs to be unhashed while we do the actual lookup + * and clear the DCACHE_NEED_LOOKUP flag. + */ +void d_clear_need_lookup(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __d_drop(dentry); + dentry->d_flags &= ~DCACHE_NEED_LOOKUP; + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(d_clear_need_lookup); + +/* * Finish off a dentry we''ve decided to kill. * dentry->d_lock must be held, returns with it unlocked. * If ref is non-zero, then decrement the refcount too. @@ -431,8 +449,13 @@ repeat: if (d_unhashed(dentry)) goto kill_it; - /* Otherwise leave it cached and ensure it''s on the LRU */ - dentry->d_flags |= DCACHE_REFERENCED; + /* + * If this dentry needs lookup, don''t set the referenced flag so that it + * is more likely to be cleaned up by the dcache shrinker in case of + * memory pressure. + */ + if (!d_need_lookup(dentry)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_count--; @@ -1703,6 +1726,13 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, } /* + * We are going to instantiate this dentry, unhash it and clear the + * lookup flag so we can do that. + */ + if (unlikely(d_need_lookup(found))) + d_clear_need_lookup(found); + + /* * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ diff --git a/fs/namei.c b/fs/namei.c index e3c4f11..fc8bc60 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1198,6 +1198,30 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, } /* + * We already have a dentry, but require a lookup to be performed on the parent + * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error. + * parent->d_inode->i_mutex must be held. d_lookup must have verified that no + * child exists while under i_mutex. + */ +static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode = parent->d_inode; + struct dentry *old; + + /* Don''t create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(inode))) + return ERR_PTR(-ENOENT); + + old = inode->i_op->lookup(inode, dentry, nd); + if (unlikely(old)) { + dput(dentry); + dentry = old; + } + return dentry; +} + +/* * It''s more convoluted than I''d like it to be, but... it''s still fairly * small and for now I''d prefer to have fast path as straight as possible. * It _is_ time-critical. @@ -1236,6 +1260,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, goto unlazy; } } + if (unlikely(d_need_lookup(dentry))) + goto unlazy; path->mnt = mnt; path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode, false))) @@ -1252,6 +1278,10 @@ unlazy: dentry = __d_lookup(parent, name); } + if (dentry && unlikely(d_need_lookup(dentry))) { + dput(dentry); + dentry = NULL; + } retry: if (unlikely(!dentry)) { struct inode *dir = parent->d_inode; @@ -1268,6 +1298,17 @@ retry: /* known good */ need_reval = 0; status = 1; + } else if (unlikely(d_need_lookup(dentry))) { + struct dentry *old; + + dentry = d_inode_lookup(parent, dentry, nd); + if (IS_ERR(dentry)) { + mutex_unlock(&dir->i_mutex); + return PTR_ERR(dentry); + } + /* known good */ + need_reval = 0; + status = 1; } mutex_unlock(&dir->i_mutex); } @@ -1755,6 +1796,16 @@ static struct dentry *__lookup_hash(struct qstr *name, */ dentry = d_lookup(base, name); + if (dentry && d_need_lookup(dentry)) { + /* + * __lookup_hash is called with the parent dir''s i_mutex already + * held, so we are good to go here. + */ + dentry = d_inode_lookup(base, dentry, nd); + if (IS_ERR(dentry)) + return dentry; + } + if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) dentry = do_revalidate(dentry, nd); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 19d90a5..5fa5bd3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -216,6 +216,7 @@ struct dentry_operations { #define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ #define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ #define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ +#define DCACHE_NEED_LOOKUP 0x80000 /* dentry requires i_op->lookup */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) @@ -416,6 +417,12 @@ static inline bool d_mountpoint(struct dentry *dentry) return dentry->d_flags & DCACHE_MOUNTED; } +static inline bool d_need_lookup(struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_NEED_LOOKUP; +} + +extern void d_clear_need_lookup(struct dentry *dentry); extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); extern int sysctl_vfs_cache_pressure; -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html