Tom Judge
2007-Apr-06 10:04 UTC
Repeatable crash with mkdir causing a divide by zero error
Hi, I have seen some problems with a new file system that I created yesterday in that I could repeatedly get the system to crash in with a mkdir. Here is the disk information mfid1: <MFI Logical Disk> on mfi1 mfid1: 5716992MB (11708399616 sectors) RAID volume 'Images' is optimal I created a new file system tuned for 64k blocks, an average file size of 1Mb, and 2500 files per directory. newfs -b 65535 -g 1048576 -h 2500 /dev/mfid1p1 mount /dev/mfid1p1 /compere mkdir /compere/images mkdir /compere/images/1999 (Also tested with mkdir test; mkdir test/1998) The system is and amd64 system running 6.2-RELEASE and the pmap.c patch. I have 3 cores cause by 3 different apps (rsync, gmkdir, mkdir) and can provide any more information if required. I have attached a back trace, unfortunatly I cannot do any testing as the system is now in testing (newfs -b 65535 -g 1048576 /dev/mfid1p1 was used and seems not to cause the bug). kgdb /usr/obj/usr/src/sys/PE2950/kernel.debug /var/crash/vmcore.2 [GDB will not be able to debug user-mode threads: /usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"] GNU gdb 6.1.1 [FreeBSD] Copyright 2004 Free Software Foundation, Inc. GDB is free software, covered by the GNU General Public License, and you are welcome to change it and/or distribute copies of it under certain conditions. Type "show copying" to see the conditions. There is absolutely no warranty for GDB. Type "show warranty" for details. This GDB was configured as "amd64-marcel-freebsd". Unread portion of the kernel message buffer: Fatal trap 18: integer divide fault while in kernel mode cpuid = 0; apic id = 00 instruction pointer = 0x8:0xffffffff80391347 stack pointer = 0x10:0xffffffffa78736f0 frame pointer = 0x10:0xffffff0001d7a600 code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 1206 (mkdir) trap number = 18 panic: integer divide fault cpuid = 0 Uptime: 4m29s Dumping 1023 MB (2 chunks) chunk 0: 1MB (156 pages) ... ok chunk 1: 1023MB (261800 pages) 1007 991 975 959 943 927 911 895 879 863 847 831 815 799 783 767 751 735 719 703 687 671 655 639 623 607 591 575 559 543 527 511 495 479 463 447 431 415 399 383 367 351 335 319 303 287 271 255 239 223 207 191 175 159 143 127 111 95 79 63 47 31 15 #0 doadump () at pcpu.h:172 172 pcpu.h: No such file or directory. in pcpu.h (kgdb) bt #0 doadump () at pcpu.h:172 #1 0x0000000000000004 in ?? () #2 0xffffffff8029a557 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:409 #3 0xffffffff8029abf1 in panic (fmt=0xffffff0029753000 "X?/") at /usr/src/sys/kern/kern_shutdown.c:565 #4 0xffffffff803f62ff in trap_fatal (frame=0xffffff0029753000, eva=18446742974994109272) at /usr/src/sys/amd64/amd64/trap.c:660 #5 0xffffffff803f67a2 in trap (frame {tf_rdi = 0, tf_rsi = 0, tf_rdx = 0, tf_rcx = 1951858688, tf_r8 = 2500, tf_r9 = 2975, tf_rax = 1951858688, tf_rbx = -2050457600, tf_rbp = -1099480717824, tf_r10 = 246016, tf_r11 = 184512, tf_r12 = -1098707543808, tf_r13 = 246015, tf_r14 = -2050457600, tf_r15 = 255, tf_trapno = 18, tf_addr = 0, tf_flags = 2147483648012, tf_err = 0, tf_rip = -2143743161, tf_cs = 8, tf_rflags = 66182, tf_rsp = -1484310784, tf_ss = 16}) at /usr/src/sys/amd64/amd64/trap.c:469 #6 0xffffffff803e1a6b in calltrap () at /usr/src/sys/amd64/amd64/exception.S:168 #7 0xffffffff80391347 in ffs_valloc (pvp=0xffffff002f24d7c0, mode=16877, cred=0x0, vpp=0xffffffffa7873798) at libkern.h:56 #8 0xffffffff803b8a5e in ufs_mkdir (ap=0xffffffffa78739a0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:1386 #9 0xffffffff8043b355 in VOP_MKDIR_APV (vop=0x74570000, a=0xffffffffa78739a0) at vnode_if.c:1251 #10 0xffffffff80310e19 in kern_mkdir (td=0xffffff002f24d7c0, path=0xffffff003dabe400 "", segflg=4, mode=511) at vnode_if.h:653 #11 0xffffffff803f7151 in syscall (frame {tf_rdi = 140737488348678, tf_rsi = 511, tf_rdx = 4294967295, tf_rcx = 1, tf_r8 = 0, tf_r9 = 140737488347272, tf_rax = 136, tf_rbx = 2, tf_rbp = 140737488348024, tf_r10 = 4294967295, tf_r11 = 582, tf_r12 = 140737488348678, tf_r13 = 140737488348008, tf_r14 = 0, tf_r15 = 0, tf_trapno = 12, tf_addr = 34367037072, tf_flags = 0, tf_err = 2, tf_rip = 34367037084, tf_cs = 43, tf_rflags = 518, tf_rsp = 140737488347720, tf_ss = 35}) at /usr/src/sys/amd64/amd64/trap.c:792 #12 0xffffffff803e1c08 in Xfast_syscall () at /usr/src/sys/amd64/amd64/exception.S:270 #13 0x00000008006f5e9c in ?? () Previous frame inner to this frame (corrupt stack?) (kgdb) frame 7 #7 0xffffffff80391347 in ffs_valloc (pvp=0xffffff002f24d7c0, mode=16877, cred=0x0, vpp=0xffffffffa7873798) at libkern.h:56 56 static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); } (kgdb) list 51 static __inline int imax(int a, int b) { return (a > b ? a : b); } 52 static __inline int imin(int a, int b) { return (a < b ? a : b); } 53 static __inline long lmax(long a, long b) { return (a > b ? a : b); } 54 static __inline long lmin(long a, long b) { return (a < b ? a : b); } 55 static __inline u_int max(u_int a, u_int b) { return (a > b ? a : b); } 56 static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); } 57 static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); } 58 static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); } 59 static __inline u_long ulmax(u_long a, u_long b) { return (a > b ? a : b); } 60 static __inline u_long ulmin(u_long a, u_long b) { return (a < b ? a : b); } (kgdb) frame 8 #8 0xffffffff803b8a5e in ufs_mkdir (ap=0xffffffffa78739a0) at /usr/src/sys/ufs/ufs/ufs_vnops.c:1386 1386 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); (kgdb) list 1381 /* 1382 * Must simulate part of ufs_makeinode here to acquire the inode, 1383 * but not have it entered in the parent directory. The entry is 1384 * made later after writing "." and ".." entries. 1385 */ 1386 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); 1387 if (error) 1388 goto out; 1389 ip = VTOI(tvp); 1390 ip->i_gid = dp->i_gid; (kgdb)
Kris Kennaway
2007-Apr-06 18:42 UTC
Repeatable crash with mkdir causing a divide by zero error
On Fri, Apr 06, 2007 at 11:05:22AM +0100, Tom Judge wrote:> Hi, > > I have seen some problems with a new file system that I created > yesterday in that I could repeatedly get the system to crash in with a > mkdir. > > Here is the disk information > mfid1: <MFI Logical Disk> on mfi1 > mfid1: 5716992MB (11708399616 sectors) RAID volume 'Images' is optimal > > I created a new file system tuned for 64k blocks, an average file size > of 1Mb, and 2500 files per directory. > > newfs -b 65535 -g 1048576 -h 2500 /dev/mfid1p1 > mount /dev/mfid1p1 /compere > mkdir /compere/images > mkdir /compere/images/1999 > > (Also tested with mkdir test; mkdir test/1998) > > The system is and amd64 system running 6.2-RELEASE and the pmap.c patch. > I have 3 cores cause by 3 different apps (rsync, gmkdir, mkdir) and > can provide any more information if required. I have attached a back > trace, unfortunatly I cannot do any testing as the system is now in > testing (newfs -b 65535 -g 1048576 /dev/mfid1p1 was used and seems not > to cause the bug).This might be simple to fix, but please file a PR if it does not get picked up by someone on this list. Kris> > > kgdb /usr/obj/usr/src/sys/PE2950/kernel.debug /var/crash/vmcore.2 > [GDB will not be able to debug user-mode threads: > /usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"] > GNU gdb 6.1.1 [FreeBSD] > Copyright 2004 Free Software Foundation, Inc. > GDB is free software, covered by the GNU General Public License, and you are > welcome to change it and/or distribute copies of it under certain > conditions. > Type "show copying" to see the conditions. > There is absolutely no warranty for GDB. Type "show warranty" for details. > This GDB was configured as "amd64-marcel-freebsd". > > Unread portion of the kernel message buffer: > > > Fatal trap 18: integer divide fault while in kernel mode > cpuid = 0; apic id = 00 > instruction pointer = 0x8:0xffffffff80391347 > stack pointer = 0x10:0xffffffffa78736f0 > frame pointer = 0x10:0xffffff0001d7a600 > code segment = base 0x0, limit 0xfffff, type 0x1b > = DPL 0, pres 1, long 1, def32 0, gran 1 > processor eflags = interrupt enabled, resume, IOPL = 0 > current process = 1206 (mkdir) > trap number = 18 > panic: integer divide fault > cpuid = 0 > Uptime: 4m29s > Dumping 1023 MB (2 chunks) > chunk 0: 1MB (156 pages) ... ok > chunk 1: 1023MB (261800 pages) 1007 991 975 959 943 927 911 895 879 > 863 847 831 815 799 783 767 751 735 719 703 687 671 655 639 623 607 591 > 575 559 543 527 511 495 479 463 447 431 415 399 383 367 351 335 319 303 > 287 271 255 239 223 207 191 175 159 143 127 111 95 79 63 47 31 15 > > #0 doadump () at pcpu.h:172 > 172 pcpu.h: No such file or directory. > in pcpu.h > (kgdb) bt > #0 doadump () at pcpu.h:172 > #1 0x0000000000000004 in ?? () > #2 0xffffffff8029a557 in boot (howto=260) at > /usr/src/sys/kern/kern_shutdown.c:409 > #3 0xffffffff8029abf1 in panic (fmt=0xffffff0029753000 "X?/") at > /usr/src/sys/kern/kern_shutdown.c:565 > #4 0xffffffff803f62ff in trap_fatal (frame=0xffffff0029753000, > eva=18446742974994109272) at /usr/src/sys/amd64/amd64/trap.c:660 > #5 0xffffffff803f67a2 in trap (frame> {tf_rdi = 0, tf_rsi = 0, tf_rdx = 0, tf_rcx = 1951858688, tf_r8 = > 2500, tf_r9 = 2975, tf_rax = 1951858688, tf_rbx = -2050457600, tf_rbp = > -1099480717824, tf_r10 = 246016, tf_r11 = 184512, tf_r12 = > -1098707543808, tf_r13 = 246015, tf_r14 = -2050457600, tf_r15 = 255, > tf_trapno = 18, tf_addr = 0, tf_flags = 2147483648012, tf_err = 0, > tf_rip = -2143743161, tf_cs = 8, tf_rflags = 66182, tf_rsp = > -1484310784, tf_ss = 16}) at /usr/src/sys/amd64/amd64/trap.c:469 > #6 0xffffffff803e1a6b in calltrap () at > /usr/src/sys/amd64/amd64/exception.S:168 > #7 0xffffffff80391347 in ffs_valloc (pvp=0xffffff002f24d7c0, > mode=16877, cred=0x0, vpp=0xffffffffa7873798) at libkern.h:56 > #8 0xffffffff803b8a5e in ufs_mkdir (ap=0xffffffffa78739a0) at > /usr/src/sys/ufs/ufs/ufs_vnops.c:1386 > #9 0xffffffff8043b355 in VOP_MKDIR_APV (vop=0x74570000, > a=0xffffffffa78739a0) at vnode_if.c:1251 > #10 0xffffffff80310e19 in kern_mkdir (td=0xffffff002f24d7c0, > path=0xffffff003dabe400 "", segflg=4, mode=511) at vnode_if.h:653 > #11 0xffffffff803f7151 in syscall (frame> {tf_rdi = 140737488348678, tf_rsi = 511, tf_rdx = 4294967295, > tf_rcx = 1, tf_r8 = 0, tf_r9 = 140737488347272, tf_rax = 136, tf_rbx = > 2, tf_rbp = 140737488348024, tf_r10 = 4294967295, tf_r11 = 582, tf_r12 = > 140737488348678, tf_r13 = 140737488348008, tf_r14 = 0, tf_r15 = 0, > tf_trapno = 12, tf_addr = 34367037072, tf_flags = 0, tf_err = 2, tf_rip > = 34367037084, tf_cs = 43, tf_rflags = 518, tf_rsp = 140737488347720, > tf_ss = 35}) > at /usr/src/sys/amd64/amd64/trap.c:792 > #12 0xffffffff803e1c08 in Xfast_syscall () at > /usr/src/sys/amd64/amd64/exception.S:270 > #13 0x00000008006f5e9c in ?? () > Previous frame inner to this frame (corrupt stack?) > (kgdb) frame 7 > #7 0xffffffff80391347 in ffs_valloc (pvp=0xffffff002f24d7c0, > mode=16877, cred=0x0, vpp=0xffffffffa7873798) at libkern.h:56 > 56 static __inline u_int min(u_int a, u_int b) { return (a < b ? a > : b); } > (kgdb) list > 51 static __inline int imax(int a, int b) { return (a > b ? a : b); } > 52 static __inline int imin(int a, int b) { return (a < b ? a : b); } > 53 static __inline long lmax(long a, long b) { return (a > b ? a : > b); } > 54 static __inline long lmin(long a, long b) { return (a < b ? a : > b); } > 55 static __inline u_int max(u_int a, u_int b) { return (a > b ? a > : b); } > 56 static __inline u_int min(u_int a, u_int b) { return (a < b ? a > : b); } > 57 static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b > ? a : b); } > 58 static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b > ? a : b); } > 59 static __inline u_long ulmax(u_long a, u_long b) { return (a > b > ? a : b); } > 60 static __inline u_long ulmin(u_long a, u_long b) { return (a < b > ? a : b); } > (kgdb) frame 8 > #8 0xffffffff803b8a5e in ufs_mkdir (ap=0xffffffffa78739a0) at > /usr/src/sys/ufs/ufs/ufs_vnops.c:1386 > 1386 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); > (kgdb) list > 1381 /* > 1382 * Must simulate part of ufs_makeinode here to acquire > the inode, > 1383 * but not have it entered in the parent directory. The > entry is > 1384 * made later after writing "." and ".." entries. > 1385 */ > 1386 error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); > 1387 if (error) > 1388 goto out; > 1389 ip = VTOI(tvp); > 1390 ip->i_gid = dp->i_gid; > (kgdb) > _______________________________________________ > freebsd-stable@freebsd.org mailing list > http://lists.freebsd.org/mailman/listinfo/freebsd-stable > To unsubscribe, send any mail to "freebsd-stable-unsubscribe@freebsd.org" >-------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 187 bytes Desc: not available Url : http://lists.freebsd.org/pipermail/freebsd-stable/attachments/20070406/d5e26a3c/attachment.pgp