Herve Boulouis
2008-Nov-06 01:30 UTC
Multiple panics with 7.1-PRERELEASE amd64 and varnish
Hi, Just put 3 boxes with varnish into production last night and I've got a few differents panics in 8hours. Traffic is very low (each box generates 10 Mbit/s). All boxes have the same 7.1-PRERELEASE (from around end of september) and 4GB of ram and varnish is launched with the following command line : /usr/local/sbin/varnishd -f /usr/local/etc/varnish/default.vcl \ -u varnish \ -n /opt/varnish \ -s file,/opt/varnish/storage.bin,4G \ -P /var/run/varnishd.pid \ -t 7464960000s \ -w 32,4096,120 \ -p listen_depth=4096 \ -p thread_pool_min=32 \ -p thread_pool_max=4096 \ -p lru_interval=3600 \ -T 0.0.0.0:3000 \ -h classic,500009 Sysctl tunings : www2:~# cat /etc/sysctl.conf net.inet.tcp.tso=0 debug.minidump=1 net.inet.tcp.rfc1323=1 kern.ipc.maxsockbuf=1024000 net.inet.tcp.sendspace=512000 net.inet.tcp.recvspace=512000 www2:~# cat /boot/loader.conf kern.ipc.nmbclusters=65536 kern.maxfiles=2097152 kern.maxfilesperproc=104856 kern.ipc.somaxconn=16384 net.inet.ip.portrange.last=65535 kern.threads.max_threads_per_proc=4096 kern.ipc.maxpipekva=104857600 What is strange is that those 3 boxes where hammered with siege and we had no problems with them generating 900Mbit/s of traffic. Are these panics known ? Can they be related to the sysctl ? Do i need to CC freebsd-amd64 ? Panic #1 (Box 2): #2 0xffffffff803c6b8d in boot (howto=260) at ../../../kern/kern_shutdown.c:418 #3 0xffffffff803c6e48 in panic (fmt=Variable "fmt" is not available. ) at ../../../kern/kern_shutdown.c:572 #4 0xffffffff806260b6 in trap_fatal (frame=0xffffffffcd816890, eva=138) at ../../../amd64/amd64/trap.c:764 #5 0xffffffff80626311 in trap_pfault (frame=0xffffffffcd816890, usermode=0) at ../../../amd64/amd64/trap.c:680 #6 0xffffffff80626c53 in trap (frame=0xffffffffcd816890) at ../../../amd64/amd64/trap.c:449 #7 0xffffffff8060d5be in calltrap () at ../../../amd64/amd64/exception.S:209 #8 0xffffffff80599dda in vm_object_clear_flag (object=0x0, bits=Variable "bits" is not available. ) at ../../../vm/vm_object.c:269 #9 0xffffffff804302a6 in cluster_wbuild (vp=0xffffff000eabfbd0, size=16384, start_lbn=58905, len=2) at ../../../kern/vfs_cluster.c:925 #10 0xffffffff804275de in vfs_bio_awrite (bp=0xffffffffa4d0b660) at ../../../kern/vfs_bio.c:1668 #11 0xffffffff8057800e in ffs_syncvnode (vp=0xffffff000eabfbd0, waitfor=Variable "waitfor" is not available. ) at ../../../ufs/ffs/ffs_vnops.c:283 #12 0xffffffff80573c91 in ffs_sync (mp=0xffffff000e5cc6f0, waitfor=3, td=0xffffff000e568a50) at ../../../ufs/ffs/ffs_vfsops.c:1234 #13 0xffffffff8043f821 in sync_fsync (ap=Variable "ap" is not available. ) at ../../../kern/vfs_subr.c:3217 #14 0xffffffff8065f552 in VOP_FSYNC_APV (vop=Variable "vop" is not available. ) at vnode_if.c:1007 #15 0xffffffff8043ff71 in sched_sync () at vnode_if.h:538 #16 0xffffffff803a6a01 in fork_exit (callout=0xffffffff8043f8f0 <sched_sync>, arg=0x0, frame=0xffffffffcd816c80) at ../../../kern/kern_fork.c:804 #17 0xffffffff8060d98e in fork_trampoline () at ../../../amd64/amd64/exception.S:455 Panic #2 (Box 2): #2 0xffffffff803c6b8d in boot (howto=260) at ../../../kern/kern_shutdown.c:418 #3 0xffffffff803c6e48 in panic (fmt=Variable "fmt" is not available. ) at ../../../kern/kern_shutdown.c:572 #4 0xffffffff806260b6 in trap_fatal (frame=0xffffffffcd816890, eva=138) at ../../../amd64/amd64/trap.c:764 #5 0xffffffff80626311 in trap_pfault (frame=0xffffffffcd816890, usermode=0) at ../../../amd64/amd64/trap.c:680 #6 0xffffffff80626c53 in trap (frame=0xffffffffcd816890) at ../../../amd64/amd64/trap.c:449 #7 0xffffffff8060d5be in calltrap () at ../../../amd64/amd64/exception.S:209 #8 0xffffffff80599dda in vm_object_clear_flag (object=0x0, bits=Variable "bits" is not available. ) at ../../../vm/vm_object.c:269 #9 0xffffffff804302a6 in cluster_wbuild (vp=0xffffff000ea4adc8, size=16384, start_lbn=20817, len=5) at ../../../kern/vfs_cluster.c:925 #10 0xffffffff804275de in vfs_bio_awrite (bp=0xffffffffa4d83660) at ../../../kern/vfs_bio.c:1668 #11 0xffffffff8057800e in ffs_syncvnode (vp=0xffffff000ea4adc8, waitfor=Variable "waitfor" is not available. ) at ../../../ufs/ffs/ffs_vnops.c:283 #12 0xffffffff80573c91 in ffs_sync (mp=0xffffff000e6c96f0, waitfor=3, td=0xffffff000e568a50) at ../../../ufs/ffs/ffs_vfsops.c:1234 #13 0xffffffff8043f821 in sync_fsync (ap=Variable "ap" is not available. ) at ../../../kern/vfs_subr.c:3217 #14 0xffffffff8065f552 in VOP_FSYNC_APV (vop=Variable "vop" is not available. ) at vnode_if.c:1007 #15 0xffffffff8043ff71 in sched_sync () at vnode_if.h:538 #16 0xffffffff803a6a01 in fork_exit (callout=0xffffffff8043f8f0 <sched_sync>, arg=0x0, frame=0xffffffffcd816c80) at ../../../kern/kern_fork.c:804 #17 0xffffffff8060d98e in fork_trampoline () at ../../../amd64/amd64/exception.S:455 Panic #3 (Box 1): #3 0xffffffff803c6e48 in panic (fmt=Variable "fmt" is not available. ) at ../../../kern/kern_shutdown.c:572 #4 0xffffffff8059d76b in vm_page_unwire (m=0xffffff00d9e3ff70, activate=0) at ../../../vm/vm_page.c:1410 #5 0xffffffff80429cc8 in vfs_vmio_release (bp=0xffffffffa4ca5860) at ../../../kern/vfs_bio.c:1539 #6 0xffffffff8042b72b in getnewbuf (slpflag=0, slptimeo=0, size=Variable "size" is not available. ) at ../../../kern/vfs_bio.c:1847 #7 0xffffffff8042ccbe in getblk (vp=0xffffff001c6493f0, blkno=0, size=2048, slpflag=Variable "slpflag" is not available. ) at ../../../kern/vfs_bio.c:2602 #8 0xffffffff8042d65c in breadn (vp=0xffffff001c6493f0, blkno=Variable "blkno" is not available. ) at ../../../kern/vfs_bio.c:786 #9 0xffffffff8042d759 in bread (vp=Variable "vp" is not available. ) at ../../../kern/vfs_bio.c:734 #10 0xffffffff80578c92 in ffs_read (ap=Variable "ap" is not available. ) at ../../../ufs/ffs/ffs_vnops.c:502 #11 0xffffffff8065f222 in VOP_READ_APV (vop=Variable "vop" is not available. ) at vnode_if.c:637 #12 0xffffffff805838b0 in ufs_readdir (ap=0xffffffffcfca4a80) at vnode_if.h:344 #13 0xffffffff8065f5d6 in VOP_READDIR_APV (vop=Variable "vop" is not available. ) at vnode_if.c:1407 #14 0xffffffff80448d6f in getdirentries (td=0xffffff000ef42000, uap=dwarf2_read_address: Corrupted DWARF expression. ) at vnode_if.h:747 #15 0xffffffff8062668d in syscall (frame=0xffffffffcfca4c80) at ../../../amd64/amd64/trap.c:907 #16 0xffffffff8060d7cb in Xfast_syscall () at ../../../amd64/amd64/exception.S:330 Panic #4 (Box 0): #1 0x0000000000000000 in ?? () #2 0xffffffff803c6b8d in boot (howto=260) at ../../../kern/kern_shutdown.c:418 #3 0xffffffff803c6e48 in panic (fmt=Variable "fmt" is not available. ) at ../../../kern/kern_shutdown.c:572 #4 0xffffffff806260b6 in trap_fatal (frame=0xffffffffcd8162a0, eva=138) at ../../../amd64/amd64/trap.c:764 #5 0xffffffff80626311 in trap_pfault (frame=0xffffffffcd8162a0, usermode=0) at ../../../amd64/amd64/trap.c:680 #6 0xffffffff80626c53 in trap (frame=0xffffffffcd8162a0) at ../../../amd64/amd64/trap.c:449 #7 0xffffffff8060d5be in calltrap () at ../../../amd64/amd64/exception.S:209 #8 0xffffffff80599dda in vm_object_clear_flag (object=0x0, bits=Variable "bits" is not available. ) at ../../../vm/vm_object.c:269 #9 0xffffffff804302a6 in cluster_wbuild (vp=0xffffff000ea991f8, size=16384, start_lbn=57442, len=2) at ../../../kern/vfs_cluster.c:925 #10 0xffffffff804305ff in cluster_write (vp=0xffffff000ea991f8, bp=0xffffffffa5394360, filesize=4294967296, seqcount=127) at ../../../kern/vfs_cluster.c:570 #11 0xffffffff8057897a in ffs_write (ap=0xffffffffcd8166a0) at ../../../ufs/ffs/ffs_vnops.c:771 #12 0xffffffff80660834 in VOP_WRITE_APV (vop=0xffffffff80869280, a=0xffffffffcd8166a0) at vnode_if.c:691 #13 0xffffffff805a4ee5 in vnode_pager_generic_putpages (vp=0xffffff000ea991f8, m=0xffffffffcd816860, bytecount=Variable "bytecount" is not available. ) at vnode_if.h:373 #14 0xffffffff80430c31 in vop_stdputpages (ap=Variable "ap" is not available. ) at ../../../kern/vfs_default.c:550 #15 0xffffffff8065fc36 in VOP_PUTPAGES_APV (vop=Variable "vop" is not available. ) at vnode_if.c:2189 #16 0xffffffff805a5085 in vnode_pager_putpages (object=0xffffff000e6b6d00, m=0xffffffffcd816860, count=2, sync=8, rtvals=0xffffffffcd8167e0) at vnode_if.h:1164 #17 0xffffffff8059f79e in vm_pageout_flush (mc=0xffffffffcd816860, count=2, flags=8) at vm_pager.h:147 #18 0xffffffff8059baab in vm_object_page_collect_flush (object=0xffffff000e6b6d00, p=0xffffff00db42f178, curgeneration=Variable "curgeneration" is not available. ) at ../../../vm/vm_object.c:973 #19 0xffffffff8059bed6 in vm_object_page_clean (object=0xffffff000e6b6d00, start=0, end=Variable "end" is not available. ) at ../../../vm/vm_object.c:865 #20 0xffffffff8043f4ee in vfs_msync (mp=0xffffff000e69e378, flags=2) at ../../../kern/vfs_subr.c:2995 #21 0xffffffff8043f80a in sync_fsync (ap=Variable "ap" is not available. ) at ../../../kern/vfs_subr.c:3216 #22 0xffffffff8065f552 in VOP_FSYNC_APV (vop=Variable "vop" is not available. ) at vnode_if.c:1007 #23 0xffffffff8043ff71 in sched_sync () at vnode_if.h:538 #24 0xffffffff803a6a01 in fork_exit (callout=0xffffffff8043f8f0 <sched_sync>, arg=0x0, frame=0xffffffffcd816c80) at ../../../kern/kern_fork.c:804 #25 0xffffffff8060d98e in fork_trampoline () at ../../../amd64/amd64/exception.S:455 -- Herve Boulouis
Herve Boulouis
2008-Nov-06 03:45 UTC
Multiple panics with 7.1-PRERELEASE amd64 and varnish
Le 06/11/2008 11:29, Herve Boulouis a écrit:> > All boxes have the same 7.1-PRERELEASE (from around end of september) and 4GB of ram and varnish is launched with the following command line :I forgot to add that the kernel config is pretty much GENERIC (without KDTRACE_FRAME and KDTRACE_HOOKS) -- Herve Boulouis
Herve Boulouis
2008-Nov-06 06:07 UTC
Multiple panics with 7.1-PRERELEASE amd64 and varnish
Le 06/11/2008 11:29, Herve Boulouis a écrit: I just tried to reboot one of the boxes without kern.ipc.maxpipekva=104857600 to check for kva problems but crashes persists, though the stack is completely different now. This time I included all the corrupt parts of the stack that I had stripped in my original email but they are similar (from frame 18 to end). Any ideas ? Unread portion of the kernel message buffer: vm_page_free: pindex(188034), busy(1), VPO_BUSY(0), hold(0) panic: vm_page_free: freeing busy page cpuid = 2 Uptime: 1h1m2s Physical memory: 4085 MB Dumping 289 MB: 274 258 242 226 210 194 178 162 146 130 114 98 82 66 50 34 18 2 #0 doadump () at pcpu.h:195 195 pcpu.h: No such file or directory. in pcpu.h (kgdb) bt #0 doadump () at pcpu.h:195 #1 0x0000000000000000 in ?? () #2 0xffffffff803c6b8d in boot (howto=260) at ../../../kern/kern_shutdown.c:418 #3 0xffffffff803c6e48 in panic (fmt=Variable "fmt" is not available. ) at ../../../kern/kern_shutdown.c:572 #4 0xffffffff8059d816 in vm_page_free_toq (m=0x0) at ../../../vm/vm_page.c:1281 #5 0xffffffff8059d9c1 in vm_page_free (m=Variable "m" is not available. ) at ../../../vm/vm_page.c:498 #6 0xffffffff80411795 in socow_iodone (addr=Variable "addr" is not available. ) at ../../../kern/uipc_cow.c:92 #7 0xffffffff804129ca in mb_free_ext (m=0xffffff000ba64c00) at ../../../kern/uipc_mbuf.c:257 #8 0xffffffff80416781 in sbdrop_internal (sb=0xffffff000b4b2cc8, len=15469) at mbuf.h:515 #9 0xffffffff804168c0 in sbdrop_locked (sb=Variable "sb" is not available. ) at ../../../kern/uipc_sockbuf.c:898 #10 0xffffffff80418e05 in soisdisconnected (so=0xffffff000b4b2b40) at ../../../kern/uipc_socket.c:3158 #11 0xffffffff804e15b3 in tcp_close (tp=0xffffff000b6a8888) at ../../../netinet/tcp_subr.c:782 #12 0xffffffff804e16fa in tcp_drop (tp=0xffffff000b6a8888, errno=60) at ../../../netinet/tcp_subr.c:662 #13 0xffffffff804e65c2 in tcp_timer_rexmt (xtp=Variable "xtp" is not available. ) at ../../../netinet/tcp_timer.c:455 #14 0xffffffff803d81a3 in softclock (dummy=Variable "dummy" is not available. ) at ../../../kern/kern_timeout.c:274 #15 0xffffffff803a9a91 in ithread_loop (arg=Variable "arg" is not available. ) at ../../../kern/kern_intr.c:1088 #16 0xffffffff803a6a01 in fork_exit (callout=0xffffffff803a98d8 <ithread_loop>, arg=0xffffff00010fcb80, frame=0xffffffffad853c80) at ../../../kern/kern_fork.c:804 #17 0xffffffff8060d98e in fork_trampoline () at ../../../amd64/amd64/exception.S:455 #18 0x0000000000000000 in ?? () #19 0x0000000000000000 in ?? () #20 0x0000000000000001 in ?? () #21 0x0000000000000000 in ?? () #22 0x0000000000000000 in ?? () #23 0x0000000000000000 in ?? () #24 0x0000000000000000 in ?? () #25 0x0000000000000000 in ?? () #26 0x0000000000000000 in ?? () #27 0x0000000000000000 in ?? () #28 0x0000000000000000 in ?? () #29 0x0000000000000000 in ?? () #30 0x0000000000000000 in ?? () #31 0x0000000000000000 in ?? () #32 0x0000000000000000 in ?? () #33 0x0000000000000000 in ?? () #34 0x0000000000000000 in ?? () #35 0x0000000000000000 in ?? () #36 0x0000000000000000 in ?? () #37 0x0000000000000000 in ?? () #49 0xffffff000110e370 in ?? () #50 0xffffffff803e5d68 in sched_switch (td=0xffffffff803a98d8, newtd=Variable "newtd" is not available. ) at ../../../kern/sched_ule.c:1938 #51 0x0000000000000000 in ?? () #52 0x0000000000000000 in ?? () #53 0x0000000000000000 in ?? () #54 0x0000000000000000 in ?? () #55 0x0000000000000000 in ?? () #56 0x0000000000000000 in ?? () #57 0x0000000000000000 in ?? () #58 0x0000000000000000 in ?? () #59 0x0000000000000000 in ?? () #60 0x0000000000000000 in ?? () #61 0x0000000000000000 in ?? () #62 0x0000000000000000 in ?? () #63 0x0000000000000000 in ?? () #64 0x0000000000000000 in ?? () #65 0x0000000000000000 in ?? () #66 0x0000000000000000 in ?? () #67 0x0000000000000000 in ?? () #68 0x0000000000000000 in ?? () #69 0x0000000000000000 in ?? () #70 0x0000000000000000 in ?? () ---Type <return> to continue, or q <return> to quit--- #71 0x0000000000000000 in ?? () #72 0x0000000000000000 in ?? () #73 0x0000000000000000 in ?? () #74 0x0000000000000000 in ?? () #75 0x0000000000000000 in ?? () #76 0x0000000000000000 in ?? () #77 0x0000000000000000 in ?? () #78 0x0000000000000000 in ?? () #79 0x0000000000000000 in ?? () #80 0x0000000000000000 in ?? () #81 0x0000000000000000 in ?? () #82 0x0000000000000000 in ?? () #83 0x0000000000000000 in ?? () #84 0x0000000000000000 in ?? () #85 0x0000000000000000 in ?? () #86 0x0000000000000000 in ?? () #87 0x0000000000000000 in ?? () #88 0x0000000000000000 in ?? () #89 0x0000000000000000 in ?? () #90 0x0000000000000000 in ?? () #91 0x0000000000000000 in ?? () #92 0x0000000000000000 in ?? () #93 0x0000000000000000 in ?? () #94 0x0000000000000000 in ?? () #95 0x0000000000000000 in ?? () #96 0x0000000000000000 in ?? () #97 0x0000000000000000 in ?? () #98 0x0000000000000000 in ?? () #99 0x0000000000000000 in ?? () #100 0x0000000000000000 in ?? () #101 0x0000000000000000 in ?? () #102 0x0000000000000000 in ?? () #103 0x0000000000000000 in ?? () #104 0x0000000000000000 in ?? () #105 0x0000000000000000 in ?? () #106 0x0000000000000000 in ?? () #107 0x0000000000000000 in ?? () #108 0x0000000000000000 in ?? () #109 0x0000000000000000 in ?? () #110 0x0000000000000000 in ?? () #111 0x0000000000000000 in ?? () #112 0x0000000000000000 in ?? () #113 0x0000000000000000 in ?? () #114 0x0000000000000000 in ?? () #115 0x0000000000000000 in ?? () #116 0x0000000000000000 in ?? () #117 0x0000000000000000 in ?? () #118 0x0000000000000000 in ?? () Cannot access memory at address 0xffffffffad854000 (kgdb) -- Herve Boulouis
Herve Boulouis
2008-Nov-14 07:14 UTC
Multiple panics with 7.1-PRERELEASE amd64/i386 and varnish
Le 06/11/2008 16:05, Herve Boulouis a écrit:> Le 06/11/2008 11:29, Herve Boulouis a écrit: > > I just tried to reboot one of the boxes without kern.ipc.maxpipekva=104857600 to check for kva problems > but crashes persists, though the stack is completely different now. This time I included all the corrupt > parts of the stack that I had stripped in my original email but they are similar (from frame 18 to end). > > Any ideas ?We just found the same kind of crash with a 7.0-STABLE i386 from August so there is a serious bug in the kernel making varnish with file backend totaly unusable on FreeBSD 7. Backtrace : Fatal trap 12: page fault while in kernel mode cpuid = 0; apic id = 00 fault virtual address = 0x52 fault code = supervisor read, page not present instruction pointer = 0x20:0xc093b90a stack pointer = 0x28:0xe4475ad0 frame pointer = 0x28:0xe4475ad0 code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, def32 1, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 34 (syncer) trap number = 12 panic: page fault cpuid = 0 Uptime: 28m52s Physical memory: 1011 MB Dumping 148 MB: 133 117 101 85 69 53 37 21 5 Reading symbols from /boot/kernel/acpi.ko...Reading symbols from /boot/kernel/acpi.ko.symbols...done. done. Loaded symbols for /boot/kernel/acpi.ko #0 doadump () at pcpu.h:195 195 pcpu.h: No such file or directory. in pcpu.h (kgdb) bt #0 doadump () at pcpu.h:195 #1 0xc071b3a6 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:418 #2 0xc071b67e in panic (fmt=Variable "fmt" is not available. ) at /usr/src/sys/kern/kern_shutdown.c:572 #3 0xc09ec2dc in trap_fatal (frame=0xe4475a90, eva=82) at /usr/src/sys/i386/i386/trap.c:899 #4 0xc09ec54b in trap_pfault (frame=0xe4475a90, usermode=0, eva=82) at /usr/src/sys/i386/i386/trap.c:812 #5 0xc09ecf32 in trap (frame=0xe4475a90) at /usr/src/sys/i386/i386/trap.c:490 #6 0xc09d31cb in calltrap () at /usr/src/sys/i386/i386/exception.s:139 #7 0xc093b90a in vm_object_pip_add (object=0x0, i=1) at /usr/src/sys/vm/vm_object.c:273 #8 0xc078c207 in cluster_wbuild (vp=0xc4616564, size=16384, start_lbn=3, len=3) at /usr/src/sys/kern/vfs_cluster.c:925 #9 0xc07829a6 in vfs_bio_awrite (bp=0xd7fde3bc) at /usr/src/sys/kern/vfs_bio.c:1668 #10 0xc091593e in ffs_syncvnode (vp=0xc4616564, waitfor=3) at /usr/src/sys/ufs/ffs/ffs_vnops.c:283 #11 0xc0910e8d in ffs_sync (mp=0xc4209b30, waitfor=3, td=0xc4044660) at /usr/src/sys/ufs/ffs/ffs_vfsops.c:1234 #12 0xc079d4ef in sync_fsync (ap=0xe4475cd4) at /usr/src/sys/kern/vfs_subr.c:3217 #13 0xc0a01392 in VOP_FSYNC_APV (vop=0xc0affa60, a=0xe4475cd4) at vnode_if.c:1007 #14 0xc079dcd5 in sched_sync () at vnode_if.h:538 #15 0xc06f77f4 in fork_exit (callout=0xc079d5d0 <sched_sync>, arg=0x0, frame=0xe4475d38) at /usr/src/sys/kern/kern_fork.c:781 #16 0xc09d3240 in fork_trampoline () at /usr/src/sys/i386/i386/exception.s:205 (kgdb) I kept the vmcores (i386 and amd64) if someone needs them. Regards, -- Herve Boulouis