I have a RELENG_7 box running in a netboot environment that is crashing every few days. I dont have a disk on it yet, so I have no where to send the coredump. But I hooked up a serial cable and made it drop to debugger. We are going to try a USB connected disk and configure it as swap so that we can then try and drop the coredump to it. Any other suggestions on how to track this down ? db> where Tracing pid 24 tid 100023 td 0xc4cc4cc0 kdb_enter(c08587d4,e5301000,1,e5273a9c,e5273a8c,...) at kdb_enter+0x33 vm_fault(c1071000,e5301000,1,0,14990740,...) at vm_fault+0x178 trap_pfault(0,0,c1072d80,c1072d38,c4d50558,...) at trap_pfault+0x20e trap(e5273bdc) at trap+0x3fa calltrap() at calltrap+0x6 --- trap 0xc, eip = 0xc07f5cb6, esp = 0xe5273c1c, ebp = 0xe5273c54 --- generic_bcopy(e5300836,7ac5,0,c4dacc00,0,...) at generic_bcopy+0x1a rl_rxeof(c05c3466,c4cc4cc0,0,1,1273cbc,...) at rl_rxeof+0x139 rl_intr(c4d27000,0,c0841c05,46b,0,...) at rl_intr+0xba ithread_loop(c4d809d0,e5273d38,0,0,0,...) at ithread_loop+0x1ab fork_exit(c059da60,c4d809d0,e5273d38) at fork_exit+0x99 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe5273d70, ebp = 0 --- db> panic panic: from debugger cpuid = 1 KDB: stack backtrace: db_trace_self_wrapper(c0845ff1,e52736cc,c05c3367,c4d97340,e52736c8,...) at db_trace_self_wrapper+0x26 kdb_backtrace(c4d97340,e52736c8,c05e1a92,c0830310,c4d97374,...) at kdb_backtrace+0x29 mi_switch(1,0,1,104,0,...) at mi_switch+0x47 sched_bind(c4cc4cc0,0,c0843f3c,10e,e527370c,...) at sched_bind+0x60 boot(c0844067,1,0,0,1,...) at boot+0x47 panic(c0829200,e5273810,c0467c95,c05e3cc3,0,...) at panic+0x13b db_panic(c05e3cc3,0,ffffffff,e527377c,c0469c00,...) at db_panic+0x17 db_command_loop(c05e3cc3,0,86,1,0,...) at db_command_loop+0x2f5 db_trap(a,0,1,a,e5273924,...) at db_trap+0xc5 kdb_trap(a,0,e5273924,0,c4d50558,...) at kdb_trap+0x96 trap(e5273924) at trap+0x57b calltrap() at calltrap+0x6 --- trap 0xa, eip = 0xc05e3cc3, esp = 0xe5273964, ebp = 0xe527398c --- kdb_enter(c08587d4,e5301000,1,e5273a9c,e5273a8c,...) at kdb_enter+0x33 vm_fault(c1071000,e5301000,1,0,14990740,...) at vm_fault+0x178 trap_pfault(0,0,c1072d80,c1072d38,c4d50558,...) at trap_pfault+0x20e trap(e5273bdc) at trap+0x3fa calltrap() at calltrap+0x6 --- trap 0xc, eip = 0xc07f5cb6, esp = 0xe5273c1c, ebp = 0xe5273c54 --- generic_bcopy(e5300836,7ac5,0,c4dacc00,0,...) at generic_bcopy+0x1a rl_rxeof(c05c3466,c4cc4cc0,0,1,1273cbc,...) at rl_rxeof+0x139 rl_intr(c4d27000,0,c0841c05,46b,0,...) at rl_intr+0xba ithread_loop(c4d809d0,e5273d38,0,0,0,...) at ithread_loop+0x1ab fork_exit(c059da60,c4d809d0,e5273d38) at fork_exit+0x99 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe5273d70, ebp = 0 --- db> -------------------------------------------------------------------- Mike Tancsa, tel +1 519 651 3400 Sentex Communications, mike@sentex.net Providing Internet since 1994 www.sentex.net Cambridge, Ontario Canada www.sentex.net/mike
On Tue, Mar 25, 2008 at 02:44:58PM -0400, Mike Tancsa wrote: > I have a RELENG_7 box running in a netboot environment that is > crashing every few days. I dont have a disk on it yet, so I have no > where to send the coredump. But I hooked up a serial cable and made > it drop to debugger. We are going to try a USB connected disk and > configure it as swap so that we can then try and drop the coredump to > it. Any other suggestions on how to track this down ? > I guess rl(4) hardware received too long/short frame such that subsequent code in driver tried to copy recevied frame with invalid length. I don't have data sheet for rl(4) hardwares so I'm not sure how this can happen. Anyway, try attached patch. > > db> where > Tracing pid 24 tid 100023 td 0xc4cc4cc0 > kdb_enter(c08587d4,e5301000,1,e5273a9c,e5273a8c,...) at kdb_enter+0x33 > vm_fault(c1071000,e5301000,1,0,14990740,...) at vm_fault+0x178 > trap_pfault(0,0,c1072d80,c1072d38,c4d50558,...) at trap_pfault+0x20e > trap(e5273bdc) at trap+0x3fa > calltrap() at calltrap+0x6 > --- trap 0xc, eip = 0xc07f5cb6, esp = 0xe5273c1c, ebp = 0xe5273c54 --- > generic_bcopy(e5300836,7ac5,0,c4dacc00,0,...) at generic_bcopy+0x1a > rl_rxeof(c05c3466,c4cc4cc0,0,1,1273cbc,...) at rl_rxeof+0x139 > rl_intr(c4d27000,0,c0841c05,46b,0,...) at rl_intr+0xba > ithread_loop(c4d809d0,e5273d38,0,0,0,...) at ithread_loop+0x1ab > fork_exit(c059da60,c4d809d0,e5273d38) at fork_exit+0x99 > fork_trampoline() at fork_trampoline+0x8 > --- trap 0, eip = 0, esp = 0xe5273d70, ebp = 0 --- > db> panic > panic: from debugger > cpuid = 1 > KDB: stack backtrace: > db_trace_self_wrapper(c0845ff1,e52736cc,c05c3367,c4d97340,e52736c8,...) > at db_trace_self_wrapper+0x26 > kdb_backtrace(c4d97340,e52736c8,c05e1a92,c0830310,c4d97374,...) at > kdb_backtrace+0x29 > mi_switch(1,0,1,104,0,...) at mi_switch+0x47 > sched_bind(c4cc4cc0,0,c0843f3c,10e,e527370c,...) at sched_bind+0x60 > boot(c0844067,1,0,0,1,...) at boot+0x47 > panic(c0829200,e5273810,c0467c95,c05e3cc3,0,...) at panic+0x13b > db_panic(c05e3cc3,0,ffffffff,e527377c,c0469c00,...) at db_panic+0x17 > db_command_loop(c05e3cc3,0,86,1,0,...) at db_command_loop+0x2f5 > db_trap(a,0,1,a,e5273924,...) at db_trap+0xc5 > kdb_trap(a,0,e5273924,0,c4d50558,...) at kdb_trap+0x96 > trap(e5273924) at trap+0x57b > calltrap() at calltrap+0x6 > --- trap 0xa, eip = 0xc05e3cc3, esp = 0xe5273964, ebp = 0xe527398c --- > kdb_enter(c08587d4,e5301000,1,e5273a9c,e5273a8c,...) at kdb_enter+0x33 > vm_fault(c1071000,e5301000,1,0,14990740,...) at vm_fault+0x178 > trap_pfault(0,0,c1072d80,c1072d38,c4d50558,...) at trap_pfault+0x20e > trap(e5273bdc) at trap+0x3fa > calltrap() at calltrap+0x6 > --- trap 0xc, eip = 0xc07f5cb6, esp = 0xe5273c1c, ebp = 0xe5273c54 --- > generic_bcopy(e5300836,7ac5,0,c4dacc00,0,...) at generic_bcopy+0x1a > rl_rxeof(c05c3466,c4cc4cc0,0,1,1273cbc,...) at rl_rxeof+0x139 > rl_intr(c4d27000,0,c0841c05,46b,0,...) at rl_intr+0xba > ithread_loop(c4d809d0,e5273d38,0,0,0,...) at ithread_loop+0x1ab > fork_exit(c059da60,c4d809d0,e5273d38) at fork_exit+0x99 > fork_trampoline() at fork_trampoline+0x8 > --- trap 0, eip = 0, esp = 0xe5273d70, ebp = 0 --- > db> > -- Regards, Pyun YongHyeon -------------- next part -------------- --- sys/pci/if_rl.c.orig 2008-03-04 13:07:34.000000000 +0900 +++ sys/pci/if_rl.c 2008-03-26 09:35:49.000000000 +0900 @@ -1117,17 +1117,19 @@ * datasheet makes absolutely no mention of this and * RealTek should be shot for this. */ - if ((uint16_t)(rxstat >> 16) == RL_RXSTAT_UNFINISHED) + total_len = rxstat >> 16; + if (total_len == RL_RXSTAT_UNFINISHED) break; - if (!(rxstat & RL_RXSTAT_RXOK)) { + if (!(rxstat & RL_RXSTAT_RXOK) || + total_len < ETHER_MIN_LEN || + total_len > ETHER_MAX_LEN + ETHER_VLAN_ENCAP_LEN) { ifp->if_ierrors++; rl_init_locked(sc); return; } /* No errors; receive the packet. */ - total_len = rxstat >> 16; rx_bytes += total_len + 4; /*
At 08:51 PM 3/25/2008, Pyun YongHyeon wrote:>On Tue, Mar 25, 2008 at 02:44:58PM -0400, Mike Tancsa wrote: > > I have a RELENG_7 box running in a netboot environment that is > > crashing every few days. I dont have a disk on it yet, so I have no > > where to send the coredump. But I hooked up a serial cable and made > > it drop to debugger. We are going to try a USB connected disk and > > configure it as swap so that we can then try and drop the coredump to > > it. Any other suggestions on how to track this down ? > > > >I guess rl(4) hardware received too long/short frame such that >subsequent code in driver tried to copy recevied frame with invalid >length. I don't have data sheet for rl(4) hardwares so I'm not sure >how this can happen. >Anyway, try attached patch.Hi, So far so good. The box has been running quite some time with no panic. By now, it would have crashed a few times. Thanks for fixing this! Hopefully, it can be committed to the tree and eventually MFC'd. ---Mike> > > > db> where > > Tracing pid 24 tid 100023 td 0xc4cc4cc0 > > kdb_enter(c08587d4,e5301000,1,e5273a9c,e5273a8c,...) at kdb_enter+0x33 > > vm_fault(c1071000,e5301000,1,0,14990740,...) at vm_fault+0x178 > > trap_pfault(0,0,c1072d80,c1072d38,c4d50558,...) at trap_pfault+0x20e > > trap(e5273bdc) at trap+0x3fa > > calltrap() at calltrap+0x6 > > --- trap 0xc, eip = 0xc07f5cb6, esp = 0xe5273c1c, ebp = 0xe5273c54 --- > > generic_bcopy(e5300836,7ac5,0,c4dacc00,0,...) at generic_bcopy+0x1a > > rl_rxeof(c05c3466,c4cc4cc0,0,1,1273cbc,...) at rl_rxeof+0x139 > > rl_intr(c4d27000,0,c0841c05,46b,0,...) at rl_intr+0xba > > ithread_loop(c4d809d0,e5273d38,0,0,0,...) at ithread_loop+0x1ab > > fork_exit(c059da60,c4d809d0,e5273d38) at fork_exit+0x99 > > fork_trampoline() at fork_trampoline+0x8 > > --- trap 0, eip = 0, esp = 0xe5273d70, ebp = 0 --- > > db> panic > > panic: from debugger > > cpuid = 1 > > KDB: stack backtrace: > > db_trace_self_wrapper(c0845ff1,e52736cc,c05c3367,c4d97340,e52736c8,...) > > at db_trace_self_wrapper+0x26 > > kdb_backtrace(c4d97340,e52736c8,c05e1a92,c0830310,c4d97374,...) at > > kdb_backtrace+0x29 > > mi_switch(1,0,1,104,0,...) at mi_switch+0x47 > > sched_bind(c4cc4cc0,0,c0843f3c,10e,e527370c,...) at sched_bind+0x60 > > boot(c0844067,1,0,0,1,...) at boot+0x47 > > panic(c0829200,e5273810,c0467c95,c05e3cc3,0,...) at panic+0x13b > > db_panic(c05e3cc3,0,ffffffff,e527377c,c0469c00,...) at db_panic+0x17 > > db_command_loop(c05e3cc3,0,86,1,0,...) at db_command_loop+0x2f5 > > db_trap(a,0,1,a,e5273924,...) at db_trap+0xc5 > > kdb_trap(a,0,e5273924,0,c4d50558,...) at kdb_trap+0x96 > > trap(e5273924) at trap+0x57b > > calltrap() at calltrap+0x6 > > --- trap 0xa, eip = 0xc05e3cc3, esp = 0xe5273964, ebp = 0xe527398c --- > > kdb_enter(c08587d4,e5301000,1,e5273a9c,e5273a8c,...) at kdb_enter+0x33 > > vm_fault(c1071000,e5301000,1,0,14990740,...) at vm_fault+0x178 > > trap_pfault(0,0,c1072d80,c1072d38,c4d50558,...) at trap_pfault+0x20e > > trap(e5273bdc) at trap+0x3fa > > calltrap() at calltrap+0x6 > > --- trap 0xc, eip = 0xc07f5cb6, esp = 0xe5273c1c, ebp = 0xe5273c54 --- > > generic_bcopy(e5300836,7ac5,0,c4dacc00,0,...) at generic_bcopy+0x1a > > rl_rxeof(c05c3466,c4cc4cc0,0,1,1273cbc,...) at rl_rxeof+0x139 > > rl_intr(c4d27000,0,c0841c05,46b,0,...) at rl_intr+0xba > > ithread_loop(c4d809d0,e5273d38,0,0,0,...) at ithread_loop+0x1ab > > fork_exit(c059da60,c4d809d0,e5273d38) at fork_exit+0x99 > > fork_trampoline() at fork_trampoline+0x8 > > --- trap 0, eip = 0, esp = 0xe5273d70, ebp = 0 --- > > db> > > > >-- >Regards, >Pyun YongHyeon >