I've been having stability problems in a new dual-core opteron machine. When it first crashed we were running a number of multicast applications that were listening to, recording, and rebroadcasting data. I have been able to re-simulate the data play upto the crash and can recreate the panic 1 out of every 5 or 6 replays. the hardware is: - 2 275 (2.2Ghz) dual-core processors - tyan 2881 k8sr motherboard (bios 2.05v) - 2 Gb DDR 400Mhz PC3200 Registered ECC memory - SEAGATE ST3146807LC scsi drive - intel dual-port gigabit ethernet card I have experienced the problem in both 5.4-RELEASE and STABLE (stable as-of today). I am running a custom SMP kernel. The most recent panic message was: Fatal trap 12: page fault while in kernel mode cpuid = 0; apic id = 00 fault virtual address = 0x88 fault code = supervisor read, page not present instruction pointer = 0x8:0xffffffff802af0da stack pointer = 0x10:0xffffffffb1c16a60 frame pointer = 0x10:0xffffff005b8cd000 code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 63 (pagedaemon) trap number = 12 panic: page fault cpuid = 0 boot() called on cpu#0 Uptime: 1h3m10s Dumping 2047 MB 16 32 48 64 80 96 112 128 144 160 176 192 208 224 240 256 272 288 304 320 336 352 368 384 400 416 432 448 464 480 496 512 528 544 560 576 592 608 624 640 656 672 688 704 720 736 752 768 784 800 816 832 848 864 880 896 912 928 944 960 976 992 1008 1024 1040 1056 1072 1088 1104 1120 1136 1152 1168 1184 1200 1216 1232 1248 1264 1280 1296 1312 1328 1344 1360 1376 1392 1408 1424 1440 1456 1472 1488 1504 1520 1536 1552 1568 1584 1600 1616 1632 1648 1664 1680 1696 1712 1728 1744 1760 1776 1792 1808 1824 1840 1856 1872 1888 1904 1920 1936 1952 1968 1984 2000 2016 2032 Every time it crashes the current process is pagedaemon, and the instruction pointer points to 'thread_fini'. kgdb shows: host1# kgdb /usr/obj/usr/src/sys/LOCAL/kernel.debug /usr/tmp/crash/vmcore.6 [GDB will not be able to debug user-mode threads: /usr/lib/libthread_db.so: Undefined symbol "ps_pglobal_lookup"] GNU gdb 6.1.1 [FreeBSD] This GDB was configured as "amd64-marcel-freebsd". #0 doadump () at pcpu.h:167 167 pcpu.h: No such file or directory. in pcpu.h (kgdb) where #0 doadump () at pcpu.h:167 #1 0x0000000000000000 in ?? () #2 0xffffffff802a2bd7 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:410 #3 0xffffffff802a340f in panic (fmt=0xffffff007b78c500 "\uffff\022y{") at /usr/src/sys/kern/kern_shutdown.c:566 #4 0xffffffff80412f8a in trap_fatal (frame=0xffffff007b78c500, eva=18446742976269456104) at /usr/src/sys/amd64/amd64/trap.c:639 #5 0xffffffff804132af in trap_pfault (frame=0xffffffffb1c169b0, usermode=0) at /usr/src/sys/amd64/amd64/trap.c:562 #6 0xffffffff80413553 in trap (frame {tf_rdi = -1097427386128, tf_rsi = -1097440115456, tf_rdx = 99343, tf_rcx = 0, tf_r8 = 0, tf_r9 = 0, tf_rax = 99343, tf_rbx = 0, tf_rbp -1097975672832, tf_r10 = 4503599627366400, tf_r11 = 3424, tf_r12 = 4, tf_r13 = 4, tf_r14 = -1098264600680, tf_r15 = -1097364252144, tf_trapno 12, tf_addr = 136, tf_flags = -1098264600680, tf_err = 0, tf_rip -2144669478, tf_cs = 8, tf_rflags = 66050, tf_rsp = -1312724368, tf_ss 16}) at /usr/src/sys/amd64/amd64/trap.c:341 #7 0xffffffff80400c0b in calltrap () at /usr/src/sys/amd64/amd64/exception.S:171 #8 0xffffff007c3b00f0 in ?? () #9 0xffffff007b78c500 in ?? () #10 0x000000000001840f in ?? () #11 0x0000000000000000 in ?? () #12 0x0000000000000000 in ?? () #13 0x0000000000000000 in ?? () #14 0x000000000001840f in ?? () #15 0x0000000000000000 in ?? () #16 0xffffff005b8cd000 in ?? () #17 0x000ffffffffff000 in ?? () #18 0x0000000000000d60 in ?? () #19 0x0000000000000004 in ?? () #20 0x0000000000000004 in ?? () #21 0xffffff004a541f98 in ?? () #22 0xffffff007ffe5a10 in ?? () #23 0x000000000000000c in ?? () #24 0x0000000000000088 in ?? () #25 0xffffff004a541f98 in ?? () #26 0x0000000000000000 in ?? () #27 0xffffffff802af0da in thread_fini (mem=0x0, size=0) at /usr/src/sys/kern/kern_thread.c:271 #28 0x0000000000000010 in ?? () #29 0x0000000000000001 in ?? () #30 0xffffff007ffe5a00 in ?? () #31 0xffffff005b8cdf98 in ?? () #32 0xffffffff803f67ff in zone_drain (zone=0x8) at /usr/src/sys/vm/uma_core.c:749 #33 0xffffffff803f43b6 in zone_foreach (zfunc=0xffffffff803f6630 <zone_drain>) at /usr/src/sys/vm/uma_core.c:1494 #34 0xffffffff803f7fc9 in uma_reclaim () at /usr/src/sys/vm/uma_core.c:2623 #35 0xffffffff803f1dac in vm_pageout () at /usr/src/sys/vm/vm_pageout.c:674 #36 0xffffffff802898cc in fork_exit (callout=0xffffffff803f17b0 <vm_pageout>, arg=0x0, frame=0xffffffffb1c16c50) at /usr/src/sys/kern/kern_fork.c:791 #37 0xffffffff80400e0e in fork_trampoline () at /usr/src/sys/amd64/amd64/exception.S:296 #38 0x0000000000000000 in ?? () #39 0x0000000000000000 in ?? () #40 0x0000000000000001 in ?? () #41 0x0000000000000000 in ?? () #42 0x0000000000000000 in ?? () #43 0x0000000000000000 in ?? () #44 0x0000000000000000 in ?? () #45 0x0000000000000000 in ?? () #46 0x0000000000000000 in ?? () #47 0x0000000000000000 in ?? () #48 0x0000000000000000 in ?? () #49 0x0000000000000000 in ?? () #50 0x0000000000000000 in ?? () #51 0x0000000000000000 in ?? () #52 0x0000000000000000 in ?? () #53 0x0000000000000000 in ?? () #54 0x0000000000000000 in ?? () #55 0x0000000000000000 in ?? () #56 0x0000000000000000 in ?? () #57 0x0000000000000000 in ?? () #58 0x0000000000000000 in ?? () #59 0x0000000000000000 in ?? () #60 0x0000000000000000 in ?? () #61 0x0000000000000000 in ?? () #62 0x0000000000000000 in ?? () #63 0x0000000000000000 in ?? () #64 0x0000000000000000 in ?? () #65 0x0000000000000000 in ?? () #66 0x0000000000000000 in ?? () #67 0x0000000000000000 in ?? () #68 0x0000000000000000 in ?? () #69 0x0000000000000000 in ?? () #70 0x00000000007ff000 in ?? () #71 0xffffffff8062fad4 in vm_page_max_wired () #72 0xffffffff8062f1c0 in vm_page_queue_free_mtx () #73 0x0000000000000001 in ?? () #74 0xffffff007b7912e8 in ?? () #75 0xffffff007b7d5000 in ?? () #76 0xffffffffb1c16a08 in ?? () #77 0xffffff007b78c500 in ?? () #78 0xffffffff802b777c in sched_switch (td=0x0, newtd=0x0, flags=1) at /usr/src/sys/kern/sched_4bsd.c:881 #79 0x0000000000000000 in ?? () #80 0x0000000000000000 in ?? () #81 0x0000000000000000 in ?? () #82 0x0000000000000000 in ?? () #83 0x0000000000000000 in ?? () #84 0x0000000000000000 in ?? () #85 0x0000000000000000 in ?? () #86 0x0000000000000000 in ?? () #87 0x0000000000000000 in ?? () #88 0x0000000000000000 in ?? () #89 0x0000000000000000 in ?? () #90 0x0000000000000000 in ?? () #91 0x0000000000000000 in ?? () #92 0x0000000000000000 in ?? () #93 0x0000000000000000 in ?? () #94 0x0000000000000000 in ?? () #95 0x0000000000000000 in ?? () #96 0x0000000000000000 in ?? () #97 0x0000000000000000 in ?? () #98 0x0000000000000000 in ?? () #99 0x0000000000000000 in ?? () #100 0x0000000000000000 in ?? () #101 0x0000000000000000 in ?? () #102 0x0000000000000000 in ?? () #103 0x0000000000000000 in ?? () #104 0x0000000000000000 in ?? () #105 0x0000000000000000 in ?? () #106 0x0000000000000000 in ?? () #107 0x0000000000000000 in ?? () #108 0x0000000000000000 in ?? () #109 0x0000000000000000 in ?? () #110 0x0000000000000000 in ?? () #111 0x0000000000000000 in ?? () #112 0x0000000000000000 in ?? () #113 0x0000000000000000 in ?? () #114 0x0000000000000000 in ?? () #115 0x0000000000000000 in ?? () #116 0x0000000000000000 in ?? () #117 0x0000000000000000 in ?? () #118 0x0000000000000000 in ?? () #119 0x0000000000000000 in ?? () #120 0x0000000000000000 in ?? () #121 0x0000000000000000 in ?? () #122 0x0000000000000000 in ?? () #123 0x0000000000000000 in ?? () #124 0x0000000000000000 in ?? () #125 0x0000000000000000 in ?? () #126 0x0000000000000000 in ?? () #127 0x0000000000000000 in ?? () #128 0x0000000000000000 in ?? () #129 0x0000000000000000 in ?? () #130 0x0000000000000000 in ?? () #131 0x0000000000000000 in ?? () #132 0x0000000000000000 in ?? () #133 0x0000000000000000 in ?? () #134 0x0000000000000000 in ?? () #135 0x0000000000000000 in ?? () #136 0x0000000000000000 in ?? () #137 0x0000000000000000 in ?? () #138 0x0000000000000000 in ?? () #139 0x0000000000000000 in ?? () #140 0x0000000000000000 in ?? () #141 0x0000000000000000 in ?? () #142 0x0000000000000000 in ?? () #143 0x0000000000000000 in ?? () #144 0x0000000000000000 in ?? () #145 0x0000000000000000 in ?? () #146 0x0000000000000000 in ?? () #147 0x0000000000000000 in ?? () #148 0x0000000000000000 in ?? () #149 0x0000000000000000 in ?? () #150 0x0000000000000000 in ?? () Cannot access memory at address 0xffffffffb1c17000 kernel config: machine amd64 cpu HAMMER ident CUSTOM makeoptions DEBUG=-g options SCHED_4BSD # 4BSD scheduler options INET # InterNETworking options INET6 # IPv6 communications protocols options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options MD_ROOT # MD is a potential root device options NFSCLIENT # Network Filesystem Client options NFSSERVER # Network Filesystem Server options NFS_ROOT # NFS usable as /, requires NFSCLIENT options NTFS # NT File System options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_GPT # GUID Partition Tables. options COMPAT_43 # Needed by COMPAT_LINUX32 options COMPAT_IA32 # Compatible with i386 binaries options COMPAT_FREEBSD4 # Compatible with FreeBSD4 options COMPAT_LINUX32 # Compatible with i386 linux binaries options SCSI_DELAY=15000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options KBD_INSTALL_CDEV # install a CDEV entry in /dev options AHC_REG_PRETTY_PRINT # Print register bitfields in debug # output. Adds ~128k to driver. options AHD_REG_PRETTY_PRINT # Print register bitfields in debug options ADAPTIVE_GIANT # Giant mutex is adaptive. options SMP device atpic # 8259A compatability options LINPROCFS # Cannot be a module yet. device acpi device isa device pci device fdc device ata device atadisk # ATA disk drives device ataraid # ATA RAID drives device atapicd # ATAPI CDROM drives device atapifd # ATAPI floppy drives device atapist # ATAPI tape drives options ATA_STATIC_ID # Static device numbering device ahc # AHA2940 and onboard AIC7xxx devices device ahd # AHA39320/29320 and onboard AIC79xx devices device aic # Adaptec 15[012]x SCSI adapters, AIC-6[23]60. device scbus # SCSI bus (required for SCSI) device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct SCSI access) device ses # SCSI Environmental Services (and SAF-TE) device atkbdc # AT keyboard controller device atkbd # AT keyboard device psm # PS/2 mouse device vga # VGA video card driver device splash # Splash screen and screen saver support device sc device sio # 8250, 16[45]50 based serial ports device ppc device ppbus # Parallel port bus (required) device lpt # Printer device ppi # Parallel port interface device device em # Intel PRO/1000 adapter Gigabit Ethernet Card device miibus # MII bus support device bge # Broadcom BCM570xx Gigabit Ethernet device loop # Network loopback device mem # Memory and kernel memory devices device io # I/O device device random # Entropy device device ether # Ethernet support device sl # Kernel SLIP device ppp # Kernel PPP device tun # Packet tunnel. device pty # Pseudo-ttys (telnet etc) device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device faith # IPv6-to-IPv4 relaying (translation) device bpf # Berkeley packet filter device uhci # UHCI PCI->USB interface device ohci # OHCI PCI->USB interface device usb # USB Bus (required) device ugen # Generic device uhid # "Human Interface Devices" device ukbd # Keyboard device ulpt # Printer device umass # Disks/Mass storage - Requires scbus and da device ums # Mouse any thoughts? thanks. - Rob Watt
Kris Kennaway
2005-Jun-30 20:41 UTC
fatal trap 12 in pagedaemon on dual-core opteron machine
On Thu, Jun 30, 2005 at 04:00:47PM -0400, Rob Watt wrote:> #7 0xffffffff80400c0b in calltrap () at > /usr/src/sys/amd64/amd64/exception.S:171 > #8 0xffffff007c3b00f0 in ?? () > #9 0xffffff007b78c500 in ?? () > #10 0x000000000001840f in ?? () > #11 0x0000000000000000 in ?? () > #12 0x0000000000000000 in ?? ()[..] All these bogus stack frames can be caused by having compiled the kernel with -O2 instead of -O. Is this the case? Kris -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 187 bytes Desc: not available Url : http://lists.freebsd.org/pipermail/freebsd-stable/attachments/20050630/77fe585f/attachment.bin
On Thu, 30 Jun 2005, Kris Kennaway wrote:> On Thu, Jun 30, 2005 at 04:00:47PM -0400, Rob Watt wrote: > > > #7 0xffffffff80400c0b in calltrap () at > > /usr/src/sys/amd64/amd64/exception.S:171 > > #8 0xffffff007c3b00f0 in ?? () > > #9 0xffffff007b78c500 in ?? () > > #10 0x000000000001840f in ?? () > > #11 0x0000000000000000 in ?? () > > #12 0x0000000000000000 in ?? () > > [..] > > All these bogus stack frames can be caused by having compiled the > kernel with -O2 instead of -O. Is this the case?It seems the default for amd64 is to compile with: COPTFLAGS="-O2 -frename-registers -pipe" I changed the -O2 to -O, and there are still a large number of bogus stack frames (although there are more readable frames then before): #0 doadump () at pcpu.h:167 #1 0x0000000000000000 in ?? () #2 0xffffffff802aca23 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:410 #3 0xffffffff802ace8b in panic (fmt=0xffffff007b78c500 "\uffff\022y{") at /usr/src/sys/kern/kern_shutdown.c:566 #4 0xffffffff804275bc in trap_fatal (frame=0xffffff007b78c500, eva=18446742976269456104) at /usr/src/sys/amd64/amd64/trap.c:639 #5 0xffffffff80427220 in trap_pfault (frame=0xffffffffb1c129c0, usermode=0) at /usr/src/sys/amd64/amd64/trap.c:562 #6 0xffffffff80426e99 in trap (frame {tf_rdi = -1097427386128, tf_rsi = -1097440115456, tf_rdx = 100956, tf_rcx = 0, tf_r8 = 0, tf_r9 = 0, tf_rax = 100956, tf_rbx = 0, tf_rbp -1098510893056, tf_r10 = 30, tf_r11 = 29, tf_r12 = -1097364252160, tf_r13 = -2143265920, tf_r14 = 0, tf_r15 = -2141262160, tf_trapno = 12, tf_addr 136, tf_flags = 0, tf_err = 0, tf_rip = -2144628916, tf_cs = 8, tf_rflags = 66050, tf_rsp = -1312740736, tf_ss = 16}) at /usr/src/sys/amd64/amd64/trap.c:341 #7 0xffffffff80413c5b in calltrap () at /usr/src/sys/amd64/amd64/exception.S:171 #8 0xffffff007c3b00f0 in ?? () #9 0xffffff007b78c500 in ?? () #10 0x0000000000018a5c in ?? () #11 0x0000000000000000 in ?? () #12 0x0000000000000000 in ?? () #13 0x0000000000000000 in ?? () #14 0x0000000000018a5c in ?? () #15 0x0000000000000000 in ?? () #16 0xffffff003ba60000 in ?? () #17 0x000000000000001e in ?? () #18 0x000000000000001d in ?? () #19 0xffffff007ffe5a00 in ?? () #20 0xffffffff80405b80 in vm_pageout_page_stats () at /usr/src/sys/vm/vm_pageout.c:1350 #21 0x0000000000000000 in ?? () #22 0xffffffff805eeeb0 in sysctl___kern_sched_runq_fuzz () #23 0x000000000000000c in ?? () #24 0x0000000000000088 in ?? () #25 0x0000000000000000 in ?? () #26 0x0000000000000000 in ?? () #27 0xffffffff802b8f4c in thread_fini (mem=0x0, size=0) at /usr/src/sys/kern/kern_thread.c:271 #28 0x0000000000000010 in ?? () #29 0xffffff007ffe4620 in ?? () #30 0x0000000000000000 in ?? () #31 0xffffff003ba60f98 in ?? () #32 0xffffffff80407a41 in zone_drain (zone=0x10202) at /usr/src/sys/vm/uma_core.c:749 #33 0xffffffff80408ed6 in zone_foreach (zfunc=0xffffffff80407810 <zone_drain>) at /usr/src/sys/vm/uma_core.c:1494 #34 0xffffffff8040acb5 in uma_reclaim () at /usr/src/sys/vm/uma_core.c:2623 #35 0xffffffff80404836 in vm_pageout_scan (pass=0) at /usr/src/sys/vm/vm_pageout.c:674 #36 0xffffffff80405f1e in vm_pageout () at /usr/src/sys/vm/vm_pageout.c:1476 #37 0xffffffff80292e4b in fork_exit (callout=0xffffffff80405b80 <vm_pageout>, arg=0x0, frame=0xffffffffb1c12c50) at /usr/src/sys/kern/kern_fork.c:791 #38 0xffffffff80413e5e in fork_trampoline () at /usr/src/sys/amd64/amd64/exception.S:296 #39 0x0000000000000000 in ?? () #40 0x0000000000000000 in ?? () #41 0x0000000000000001 in ?? () #42 0x0000000000000000 in ?? () #43 0x0000000000000000 in ?? () #44 0x0000000000000000 in ?? () #45 0x0000000000000000 in ?? () #46 0x0000000000000000 in ?? () #47 0x0000000000000000 in ?? () #48 0x0000000000000000 in ?? () #49 0x0000000000000000 in ?? () #50 0x0000000000000000 in ?? () #51 0x0000000000000000 in ?? () #52 0x0000000000000000 in ?? () #53 0x0000000000000000 in ?? () #54 0x0000000000000000 in ?? () #55 0x0000000000000000 in ?? () #56 0x0000000000000000 in ?? () #57 0x0000000000000000 in ?? () #58 0x0000000000000000 in ?? () #59 0x0000000000000000 in ?? () #60 0x0000000000000000 in ?? () #61 0x0000000000000000 in ?? () #62 0x0000000000000000 in ?? () #63 0x0000000000000000 in ?? () #64 0x0000000000000000 in ?? () #65 0x0000000000000000 in ?? () #66 0x0000000000000000 in ?? () #67 0x0000000000000000 in ?? () #68 0x0000000000000000 in ?? () #69 0x0000000000000000 in ?? () #70 0x0000000000000000 in ?? () #71 0x000000000081e000 in ?? () #72 0xffffffff806457f4 in vm_page_max_wired () #73 0x0000000000000000 in ?? () #74 0x0000000000000001 in ?? () #75 0xffffff007b7912e8 in ?? () #76 0xffffff007b7f5000 in ?? () #77 0xffffffffb1c12ae8 in ?? () #78 0xffffff007b78c500 in ?? () #79 0xffffffff802c0c84 in sched_switch (td=0x0, newtd=0x0, flags=1) at /usr/src/sys/kern/sched_4bsd.c:881 ... - Rob Watt