ext2 & ext3 lists, Attatched is a patch that allows ext2 and ext3 to link correctly when the kernel is configured with a large NR_CPUS. We do have an immediate need for this patch. Any opinions on this? The per-cpu lists are causing the kmalloc to fail due to allocating more than the max. thanks mh -- Martin Hicks Wild Open Source Inc. mort@wildopensource.com 613-266-2296 # This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.1356 -> 1.1357 # fs/ext2/super.c 1.55 -> 1.56 # fs/ext3/super.c 1.79 -> 1.80 # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/11/04 mort@green.bork.org 1.1357 # Fix over-sized kmalloc() calls in ext[23]_get_super() when there are # large numbers of processors. This is due to the per-processor # variables in the ext[23] superblock structure. Replace kmalloc() with # vmalloc() # -------------------------------------------- # diff -Nru a/fs/ext2/super.c b/fs/ext2/super.c --- a/fs/ext2/super.c Tue Nov 4 07:19:37 2003 +++ b/fs/ext2/super.c Tue Nov 4 07:19:37 2003 @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/parser.h> @@ -145,7 +146,7 @@ kfree(sbi->s_debts); brelse (sbi->s_sbh); sb->s_fs_info = NULL; - kfree(sbi); + vfree(sbi); return; } @@ -571,7 +572,7 @@ int db_count; int i, j; - sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); + sbi = vmalloc(sizeof(*sbi)); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c --- a/fs/ext3/super.c Tue Nov 4 07:19:37 2003 +++ b/fs/ext3/super.c Tue Nov 4 07:19:37 2003 @@ -25,6 +25,7 @@ #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/parser.h> @@ -421,7 +422,7 @@ ext3_blkdev_remove(sbi); } sb->s_fs_info = NULL; - kfree(sbi); + vfree(sbi); return; } @@ -1044,7 +1045,7 @@ int i; int needs_recovery; - sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); + sbi = vmalloc(sizeof(*sbi)); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi;
On Wed, 2003-11-05 at 10:18, Martin Hicks wrote:> ext2 & ext3 lists, > > Attatched is a patch that allows ext2 and ext3 to link correctly when > the kernel is configured with a large NR_CPUS. We do have an immediate > need for this patch. > > Any opinions on this? The per-cpu lists are causing the kmalloc to fail > due to allocating more than the max.The last patch worked alright on a machine with an ext[23] root, but I missed a kfree()->vfree() conversion on the error path. This patch seems to make it better. mh -- Martin Hicks Wild Open Source Inc. mort@wildopensource.com 613-266-2296 # This is a BitKeeper generated patch for the following project: # Project Name: Linux kernel tree # This patch format is intended for GNU patch command version 2.5 or higher. # This patch includes the following deltas: # ChangeSet 1.1356 -> 1.1358 # fs/ext2/super.c 1.55 -> 1.57 # fs/ext3/super.c 1.79 -> 1.81 # # The following is the BitKeeper ChangeSet Log # -------------------------------------------- # 03/11/04 mort@green.bork.org 1.1357 # Fix over-sized kmalloc() calls in ext[23]_get_super() when there are # large numbers of processors. This is due to the per-processor # variables in the ext[23] superblock structure. Replace kmalloc() with # vmalloc() # -------------------------------------------- # 03/11/05 mort@green.i.bork.org 1.1358 # Missed a kfree->vfree change. # -------------------------------------------- # diff -Nru a/fs/ext2/super.c b/fs/ext2/super.c --- a/fs/ext2/super.c Wed Nov 5 08:48:29 2003 +++ b/fs/ext2/super.c Wed Nov 5 08:48:29 2003 @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/parser.h> @@ -145,7 +146,7 @@ kfree(sbi->s_debts); brelse (sbi->s_sbh); sb->s_fs_info = NULL; - kfree(sbi); + vfree(sbi); return; } @@ -571,7 +572,7 @@ int db_count; int i, j; - sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); + sbi = vmalloc(sizeof(*sbi)); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; @@ -847,7 +848,7 @@ brelse(bh); failed_sbi: sb->s_fs_info = NULL; - kfree(sbi); + vfree(sbi); return -EINVAL; } diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c --- a/fs/ext3/super.c Wed Nov 5 08:48:29 2003 +++ b/fs/ext3/super.c Wed Nov 5 08:48:29 2003 @@ -25,6 +25,7 @@ #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/parser.h> @@ -421,7 +422,7 @@ ext3_blkdev_remove(sbi); } sb->s_fs_info = NULL; - kfree(sbi); + vfree(sbi); return; } @@ -1044,7 +1045,7 @@ int i; int needs_recovery; - sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); + sbi = vmalloc(sizeof(*sbi)); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; @@ -1404,7 +1405,7 @@ brelse(bh); out_fail: sb->s_fs_info = NULL; - kfree(sbi); + vfree(sbi); return -EINVAL; }
On Nov 05, 2003 10:18 -0500, Martin Hicks wrote:> Attatched is a patch that allows ext2 and ext3 to link correctly when > the kernel is configured with a large NR_CPUS. We do have an immediate > need for this patch. > > Any opinions on this? The per-cpu lists are causing the kmalloc to fail > due to allocating more than the max.Given that the kmalloc limit is at least 128kB it seems that we would start eating a fair chunk of our vmalloc space, which is a terrible thing to waste since we don't really need contiguous address space for this. Another option is to make s_dirs_counter, s_free*_counter, s_blockgroup_lock all be pointers, and decide at compile time whether they will point to a struct that is part of ext3_sb_info, or externally kmalloc'd. Something like: struct ext3_sb_info { : : struct percpu_counter *s_freeblocks_counter; struct percpu_counter *s_freeinodes_counter; struct percpu_counter *s_dirs_counter; struct blockgroup_lock *s_blockgroup_lock; #if (NR_CPUS < 8) /* or whatever */ struct percpu_counter s_freeblocks_counter_array; struct percpu_counter s_freeinodes_counter_array; struct percpu_counter s_dirs_counter_array; struct blockgroup_lock s_blockgroup_lock_array; #endif } int ext3_fill_super() { : : sbi = kmalloc(sizeof(*sbi), GFP_KERNEL); #if (NR_CPUS < 8) /* see ext3_sb_info */ sbi->s_freeblocks_counter = &sbi->s_freeblocks_counter_array; sbi->s_freeinodes_counter = &sbi->s_freeinodes_counter_array; sbi->s_dirs_counter = &sbi->s_dirs_counter_array; sbi->s_blockgroup_lock = &sbi->s_blockgroup_lock_array; #else sbi->s_freenodes_counter = kmalloc(sizeof(*sbi->s_freenodes_counter); /* check if alloc OK, handle */ percpu_counter_init(sbi->s_freenodes_counter); : #endif } and then you need to change references to (&sbi->s_freeblocks_counter) to be just (sbi->s_freeblocks_counter). Cheers, Andreas -- Andreas Dilger http://sourceforge.net/projects/ext2resize/ http://www-mddsp.enel.ucalgary.ca/People/adilger/
On Wed, Nov 05, 2003 at 10:18:38AM -0500, Martin Hicks wrote:> > ext2 & ext3 lists, > > Attatched is a patch that allows ext2 and ext3 to link correctly when > the kernel is configured with a large NR_CPUS. We do have an immediate > need for this patch.That's run correctly, I assume....> Any opinions on this? The per-cpu lists are causing the kmalloc to fail > due to allocating more than the max.vmalloc()'s are not free. They burn kmap address space, TLB entries, etc. How many CPU's are necessary before we run into problems, anyway? I'd be much happier if there was a kvmalloc() call which resolved to either kmalloc() or vmalloc() depending on how insane the number of CPU's were, or if kvmalloc() programmatically made the determination at runtime, and kvfree() checked the address to determine whether it was a kmalloc()-returned address or a vmalloc()-returned address, and called kfree() or vfree() as appropriate. - Ted
Christoph Hellwig
2003-Nov-06 08:38 UTC
Re: [Ext2-devel] [PATCH] Link breaks for large NR_CPUS
On Wed, Nov 05, 2003 at 10:18:38AM -0500, Martin Hicks wrote:> > ext2 & ext3 lists, > > Attatched is a patch that allows ext2 and ext3 to link correctly when > the kernel is configured with a large NR_CPUS. We do have an immediate > need for this patch. > > Any opinions on this? The per-cpu lists are causing the kmalloc to fail > due to allocating more than the max.Shouldn't you use alloc_percpu instead? -- Christoph Hellwig <hch@lst.de> - Freelance Hacker Contact me for driver hacking and kernel development consulting
Martin Hicks
2003-Nov-09 23:15 UTC
Re: [Ext2-devel] Re: [PATCH] Link breaks for large NR_CPUS
On Sun, 2003-11-09 at 16:08, Andrew Morton wrote:> Martin Hicks <mort@wildopensource.com> wrote: > > > > patches attached are to change percpu_counters to use alloc_percpu() and > > the patch to ext3 to make it have pointers to percpu_counters. The > > latter is probably not even required anymore, since there is no array of > > size NR_CPUS in the struct percpu_counters anymore. > > Could you please confirm that the percpu_counter patch alone is sufficient? > > Remember that both ext2 and ext3 use percpu_counters.Confirmed. Both compile with NR_CPUS=512 and ext3 seems to work fine on my ia64 machine. mh -- Martin Hicks Wild Open Source Inc. mort@wildopensource.com 613-266-2296
Andrew Morton
2003-Nov-09 23:29 UTC
Re: [Ext2-devel] Re: [PATCH] Link breaks for large NR_CPUS
Martin Hicks <mort@wildopensource.com> wrote:> > On Sun, 2003-11-09 at 16:08, Andrew Morton wrote: > > Martin Hicks <mort@wildopensource.com> wrote: > > > > > > patches attached are to change percpu_counters to use alloc_percpu() and > > > the patch to ext3 to make it have pointers to percpu_counters. The > > > latter is probably not even required anymore, since there is no array of > > > size NR_CPUS in the struct percpu_counters anymore. > > > > Could you please confirm that the percpu_counter patch alone is sufficient? > > > > Remember that both ext2 and ext3 use percpu_counters. > > Confirmed. Both compile with NR_CPUS=512 and ext3 seems to work fine on > my ia64 machine.OK, I shall queue that change up for 2.6.1, thanks.