Hi folks.
Today I got a new locking issue.
This is the first time I got it, and it's merely reproduced.
The box has lost both remote connection and local access.
No SIGINFO output on the local console even.
Jumping in ddb> shows the next:
1) first, this is a 8-way web server. No processes on runqueue except one httpd
(i.e. ps shows R in its state):
db> bt 9114
Tracing pid 9114 tid 100854 td 0xcabfe190
sched_switch(3401572752,3354648080,6) at sched_switch+323
mi_switch(6,3354648080,3354648424,3231755728,4013194124,...) at mi_switch+442
maybe_preempt(3354648080) at maybe_preempt+196
sched_add(3354648080,4,3401589408,3401589408,3401573096,...) at sched_add+600
setrunqueue(3231755728,0) at setrunqueue+99
kseq_load_add(3231755728,3401572752,3404215344,4013194200,2097222,...) at kseq_l
oad_add+95
sched_clock(3227874104,3231974076,0,3231974076,0,...) at sched_clock+341
_end(0,0,23872,22116064,2058572708,...) at 4013194220
2) although a simple bt shows the following:
db> bt
Tracing pid 16 tid 100000 td 0xc7cfe000
kdb_enter(c094016e) at kdb_enter+0x2b
siointr1(c7f93000) at siointr1+0xce
siointr(c7f93000) at siointr+0x5e
intr_execute_handlers(c7cf24c8,e687ac94,4,e687acd8,c0899013,...) at intr_execute
_handlers+0xe1
lapic_handle_intr(37) at lapic_handle_intr+0x2e
Xapic_isr1() at Xapic_isr1+0x33
--- interrupt, eip = 0xc0b94165, esp = 0xe687acd8, ebp = 0xe687acd8 ---
acpi_cpu_c1(ea4621f1,22d3014d,c7cfe000,c7cfe000,2,...) at acpi_cpu_c1+0x5
acpi_cpu_idle(e687ad10,c066c3f1,c7cfc000,c066c35c,e687ad24,...) at acpi_cpu_idle
+0x152
cpu_idle(c7cfc000,c066c35c,e687ad24,c066c0dd,0,...) at cpu_idle+0x28
idle_proc(0,e687ad38) at idle_proc+0x95
fork_exit(c066c35c,0,e687ad38) at fork_exit+0x71
fork_trampoline() at fork_trampoline+0x8
--- trap 0x1, eip = 0, esp = 0xe687ad6c, ebp = 0 ---
3) most httpd, sshd, and possibly some other processes stuck in Giant:
db> ps
pid ppid pgrp uid state wmesg wchan cmd
11568 98516 98436 0 L *Giant 0xc8bf3880 exim-4.66-0
11456 7001 7001 14544 LL *Giant 0xc8bf3880 httpd
11400 46597 46597 36386 L *Giant 0xc8bf3880 httpd
11302 6998 6998 27116 L *Giant 0xc8bf3880 httpd
11251 6711 6711 7638 L *Giant 0xc8bf3880 httpd
11197 6711 6711 7638 L *Giant 0xc8bf3880 httpd
11196 6711 6711 7638 L *Giant 0xc8bf3880 httpd
11146 6711 6711 7638 L *Giant 0xc8bf3880 httpd
11138 90916 90916 37117 L *Giant 0xc8bf3880 httpd
11029 6711 6711 7638 L *Giant 0xc8bf3880 httpd
11026 6711 6711 7638 L *Giant 0xc8bf3880 httpd
10981 6711 6711 7638 L *Giant 0xc8bf3880 httpd
10930 7969 7969 33194 S accept 0xcd056302 httpd
10832 6711 6711 7638 L *Giant 0xc8bf3880 httpd
10783 7001 7001 14544 L *Giant 0xc8bf3880 httpd
10782 6998 6998 27116 S lockf 0xd1e392c0 httpd
10688 6711 6711 7638 L *Giant 0xc8bf3880 httpd
10681 6711 6711 7638 L *Giant 0xc8bf3880 httpd
10675 7001 7001 14544 S sbwait 0xd1312a64 httpd
10576 58722 58722 4868 L *Giant 0xc8bf3880 httpd
10541 59002 59002 19140 L *Giant 0xc8bf3880 httpd
10540 58722 58722 4868 L *Giant 0xc8bf3880 httpd
10500 7001 7001 14544 S sbwait 0xce1aae90 httpd
10403 8143 8143 38211 L *Giant 0xc8bf3880 httpd
10357 7001 7001 14544 L *Giant 0xc8bf3880 httpd
10050 7001 7001 14544 L *Giant 0xc8bf3880 httpd
9704 90238 90238 22123 S select 0xc0a12944 httpd
9515 86902 86902 0 L *Giant 0xc8bf3880 proftpd
9491 41120 41120 37989 L *Giant 0xc8bf3880 httpd
9399 41120 41120 37989 L *Giant 0xc8bf3880 httpd
9186 7062 7062 20966 S accept 0xcb9b5892 httpd
9185 7062 7062 20966 L *Giant 0xc8bf3880 httpd
9148 8550 8550 13034 L *Giant 0xc8bf3880 httpd
9147 8261 8261 26789 L *Giant 0xc8bf3880 httpd
9146 8261 8261 26789 L *Giant 0xc8bf3880 httpd
9114 8550 8550 13034 R CPU 5 httpd
9111 6842 6842 36260 L *Giant 0xc8bf3880 httpd
9081 6842 6842 36260 S lockf 0xc8534780 httpd
9018 8042 8042 36016 S select 0xc0a12944 httpd
8988 3412 3412 2583 L *Giant 0xc8bf3880 httpd
8915 7583 7583 36227 L *Giant 0xc8bf3880 httpd
8342 41404 41404 37685 L *Giant 0xc8bf3880 httpd
8031 6711 6711 7638 S sbwait 0xd141454c httpd
7862 8505 8505 36816 L *Giant 0xc8bf3880 httpd
7634 7344 7344 2488 L *Giant 0xc8bf3880 httpd
7556 7730 7730 21562 S lockf 0xc836ca00 httpd
7418 8543 8543 13369 S accept 0xcd28303a httpd
7257 8558 8558 36731 L *Giant 0xc8bf3880 httpd
7144 8058 8058 36611 L *Giant 0xc8bf3880 httpd
7089 7730 7730 21562 L *Giant 0xc8bf3880 httpd
6143 7684 7684 36144 S accept 0xcc05c03a httpd
5548 7349 7349 36645 L *Giant 0xc8bf3880 httpd
5383 7001 7001 14544 S sbwait 0xd0a20900 httpd
4851 6753 6753 1742 L *Giant 0xc8bf3880 httpd
4793 6753 6753 1742 L *Giant 0xc8bf3880 httpd
4571 7304 7304 28968 L *Giant 0xc8bf3880 httpd
4347 3412 3412 2583 L *Giant 0xc8bf3880 httpd
4186 8404 8404 36328 S accept 0xcccaa466 httpd
3736 7001 7001 14544 S sbwait 0xd123c20c httpd
3634 86902 86902 0 L *Giant 0xc8bf3880 proftpd
1999 11633 11633 13556 L *Giant 0xc8bf3880 httpd
879 90916 90916 37117 L *Giant 0xc8bf3880 httpd
713 6845 6845 36251 L *Giant 0xc8bf3880 httpd
322 6307 6307 4538 S accept 0xc86b95ca httpd
98806 8404 8404 36328 S accept 0xcccaa466 httpd
98516 98451 98436 0 S wait 0xcc0d8430 exim-4.66-0
98502 7603 7603 36802 L *Giant 0xc8bf3880 httpd
98485 7603 7603 36802 L *Giant 0xc8bf3880 httpd
98471 7896 7896 36493 L *Giant 0xc8bf3880 httpd
98451 98447 98436 0 S wait 0xd0e12860 sh
98447 98436 98436 0 S wait 0xcaf3cc90 lockf
98436 98423 98436 0 Ss wait 0xc858a000 sh
98423 6726 6726 0 S piperd 0xc9c0eb28 cron
98399 7896 7896 36493 L *Giant 0xc8bf3880 httpd
98342 62229 62229 36247 L *Giant 0xc8bf3880 httpd
98340 62229 62229 36247 S accept 0xc9e839f6 httpd
98334 7304 7304 28968 S lockf 0xc8200c40 httpd
98297 7061 7061 36233 S accept 0xcbf7dcbe httpd
98289 7543 7543 36353 L *Giant 0xc8bf3880 httpd
98277 7031 7031 36202 L *Giant 0xc8bf3880 httpd
98276 7031 7031 36202 L *Giant 0xc8bf3880 httpd
98248 97266 98248 36737 S+ select 0xc0a12944 mc
98223 7061 7061 36233 S accept 0xcbf7dcbe httpd
98219 7977 7977 36183 L *Giant 0xc8bf3880 httpd
98153 8045 8045 2394 S accept 0xccf4f03a httpd
98040 8251 8251 36181 L *Giant 0xc8bf3880 httpd
98012 8251 8251 36181 L *Giant 0xc8bf3880 httpd
97983 8372 8372 36505 S accept 0xcd3b8cbe httpd
97981 7618 7618 36616 S accept 0xcc56f892 httpd
97958 15380 15380 36755 L *Giant 0xc8bf3880 httpd
97956 15380 15380 36755 S accept 0xd093c466 httpd
97921 7660 7660 36763 S accept 0xcc3319f6 httpd
97898 7584 7584 7134 S accept 0xcc05c5ca httpd
97886 7677 7677 36136 L *Giant 0xc8bf3880 httpd
97877 7677 7677 36136 S accept 0xcbc75e22 httpd
97823 7001 7001 14544 S sbwait 0xd177bf08 httpd
97807 8344 8344 36737 L *Giant 0xc8bf3880 httpd
97727 7543 7543 36353 L *Giant 0xc8bf3880 httpd
97699 7651 7651 36240 L *Giant 0xc8bf3880 httpd
97692 8484 8484 36632 S accept 0xccca99f6 httpd
97682 80307 80307 36453 LL *Giant 0xc8bf3880 httpd
97668 7823 7823 36122 S accept 0xcca7f19e httpd
97647 7488 7488 36130 L *Giant 0xc8bf3880 httpd
97521 7488 7488 36130 L *Giant 0xc8bf3880 httpd
97492 8674 8674 27415 L *Giant 0xc8bf3880 httpd
97489 6621 6621 36062 S accept 0xcb1b59f6 httpd
97404 7524 7524 25829 S accept 0xccf8072e httpd
97403 6842 6842 36260 S sbwait 0xd1140c40 httpd
97396 8674 8674 27415 L *Giant 0xc8bf3880 httpd
97349 80307 80307 36453 L *Giant 0xc8bf3880 httpd
97342 7706 7706 14417 S accept 0xccfdeb5a httpd
97324 7977 7977 36183 L *Giant 0xc8bf3880 httpd
97319 8667 8667 36523 S accept 0xcd6a672e httpd
97311 7120 7120 36257 L *Giant 0xc8bf3880 httpd
97305 7120 7120 36257 L *Giant 0xc8bf3880 httpd
97293 8595 8595 36266 S accept 0xccf4d9f6 httpd
97266 97265 97266 36737 Ss+ wait 0xc8e3b648 bash
97265 97237 97237 36737 L *Giant 0xc8bf3880 sshd
97237 1107 97237 0 Ss sbwait 0xcd6a520c sshd
4) tracing random processes waiting in Giant:
db> bt 11568
Tracing pid 11568 tid 102623 td 0xce272000
sched_switch(3458670592,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3458670592,0,0,0) at _mtx_lock_sleep+252
msleep(3688690704,3231788928,76,3230830241,0) at msleep+716
bwait(3688690704,76,3230830241) at bwait+95
bufwait(3688690704,1,0,0,4053851052,...) at bufwait+26
breadn(3487414672,0,0,2048,0,...) at breadn+486
bread(3487414672,0,0,2048,0,...) at bread+32
ffs_read(4053851124) at ffs_read+539
VOP_READ_APV(3231536928,4053851124) at VOP_READ_APV+56
vn_read(3418793016,4053851324,3522853248,0,3458670592) at vn_read+561
dofileread(3458670592,6,3418793016,4053851324,4294967295,...) at dofileread+133
kern_readv(3458670592,6,4053851324,135356416,4096,...) at kern_readv+54
read(3458670592,4053851396) at read+69
syscall(3216965691,59,3216965691,0,675511712,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (3, FreeBSD ELF32, read), eip = 675361615, esp = 3217023628, ebp = 3
217023656 ---
db> bt 11456
Tracing pid 11456 tid 102521 td 0xd17c7190
sched_switch(3514593680,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3514593680,0,0,0) at _mtx_lock_sleep+252
vm_fault(3403956520,672894976,2,8) at vm_fault+421
trap_pfault(4046785848,1,672898616,672898616,0,...) at trap_pfault+291
trap(671744059,3216965691,3216965691,0,138252288,...) at trap+491
calltrap() at calltrap+5
--- trap 12, eip = 672436781, esp = 3217025340, ebp = 3217025416 ---
db> bt 11400
Tracing pid 11400 tid 101283 td 0xcbb4d640
sched_switch(3417626176,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3417626176,0,0,0) at _mtx_lock_sleep+252
namei(4021980160) at namei+492
kern_stat(3417626176,137444900,0,4021980276) at kern_stat+53
stat(3417626176,4021980420) at stat+27
syscall(3216965691,672923707,3216965691,671783368,3217026412,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672785491, esp = 3217025548, ebp
db> bt 11302
Tracing pid 11302 tid 102454 td 0xd17c3960
sched_switch(3514579296,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3514579296,0,0,0) at _mtx_lock_sleep+252
flock(3514579296,4046601476) at flock+115
syscall(3230203963,59,59,671795656,3217026436,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (131, FreeBSD ELF32, flock), eip = 672454479, esp = 3217025820, ebp
= 3217025848 ---
db> bt 97265
Tracing pid 97265 tid 102463 td 0xc83b67d0
sched_switch(3359336400,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3359336400,0,0,0) at _mtx_lock_sleep+252
giant_poll(3431429632,64,3359336400,3431429632,3231320352,...) at giant_poll+54
devfs_poll_f(3362780288,64,3520869760,3359336400) at devfs_poll_f+47
selscan(3359336400,4033203092,4033203076,9) at selscan+497
kern_select(3359336400,9,134724512,134724528,0,...) at kern_select+977
select(3359336400,4033203460) at select+68
syscall(3216965691,3216965691,3216965691,0,3217023260,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (93, FreeBSD ELF32, select), eip = 674546383, esp = 3217023148, ebp
= 3217023224 ---
db> bt 97305
Tracing pid 97305 tid 102153 td 0xd1b61190
sched_switch(3518370192,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3518370192,0,0,0) at _mtx_lock_sleep+252
namei(4043836416) at namei+492
kern_stat(3518370192,135532244,0,4043836532) at kern_stat+53
stat(3518370192,4043836676) at stat+27
syscall(135397435,59,3216965691,671783368,3217026408,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672785491, esp = 3217025548, ebp
3217025608 ---
db> bt 97492
Tracing pid 97492 tid 102248 td 0xc82e44b0
sched_switch(3358475440,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3358475440,0,0,0) at _mtx_lock_sleep+252
namei(4039949312) at namei+492
kern_stat(3358475440,140045796,0,4039949428) at kern_stat+53
stat(3358475440,4039949572) at stat+27
syscall(671809595,3216965691,3216965691,671828424,3217026408,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672830547, esp = 3217025548, ebp
3217025608 ---
db> bt 97807
Tracing pid 97807 tid 102519 td 0xce8497d0
sched_switch(3464796112,0,1) at sched_switch+323
mi_switch(1,0) at mi_switch+442
turnstile_wait(3231738976,3401572752) at turnstile_wait+759
_mtx_lock_sleep(3231738976,3464796112,0,0,0) at _mtx_lock_sleep+252
namei(4049681408) at namei+492
kern_stat(3464796112,136572492,0,4049681524) at kern_stat+53
stat(3464796112,4049681668) at stat+27
syscall(671875131,59,3216965691,671783368,3217026436,...) at syscall+703
Xint0x80_syscall() at Xint0x80_syscall+31
--- syscall (188, FreeBSD ELF32, stat), eip = 672785491, esp = 3217025580, ebp
3217025640 ---
5) kernel is built with kdb/ddb and ctrl-alt-esc enabled, so this can
be a non-complete output.
db> show lockedvnods
Locked vnodes
0xcfddb990: tag ufs, type VREG
usecount 1, writecount 0, refcount 3 mountedhere 0
flags ()
v_object 0xcf9f8d68 ref 0 pages 1
lock type ufs: SHARED (count 1)
ino 242117869, on dev aacdu0s1g
--
wbr,
pluknet