nmbd has been dying on me occasionally. I'm running mandrake 9.2 with
samba3-server-3.0.0-2mdk. We've got our users in ldap but I'd seen this
symptom previously when I was using an earlier version of samba that didn't
support ldap. There is nothing useful in the logs and a PS shows a nmb
process still running.
This last time around, I had a ptrace running on both nmbd processes and found
that the parent process was the one still running, but the child process
had died. Here's the end of the ptrace of the child:
poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1
ioctl(9, FIONREAD, [81]) = 0
recvfrom(9, "\t\1\201\200\0\1\0\2\0\0\0\0\vBORDER-ACCT\tbordere"...,
1024, 0, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 81
close(9) = 0
write(5, "BORDER-ACCT\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
88) = 88
read(6, "ACCTMAX\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
88) = 88
open("/etc/hosts", O_RDONLY) = 9
fcntl64(9, F_GETFD) = 0
fcntl64(9, F_SETFD, FD_CLOEXEC) = 0
fstat64(9, {st_mode=S_IFREG|0644, st_size=154, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x40018000
read(9, "# Generated automatically from h"..., 4096) = 154
read(9, "", 4096) = 0
close(9) = 0
munmap(0x40018000, 4096) = 0
socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9
connect(9, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, 28) = 0
send(9, "\t\2\1\0\0\1\0\0\0\0\0\0\7ACCTMAX\tborderent\3c"..., 39, 0) =
39
gettimeofday({1080304120, 623860}, NULL) = 0
poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1
ioctl(9, FIONREAD, [55]) = 0
recvfrom(9, "\t\2\201\200\0\1\0\1\0\0\0\0\7ACCTMAX\tborderent\3c"...,
1024, 0, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 55
close(9) = 0
write(5, "ACCTMAX\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
88) = 88
read(6, "BARBARA3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
88) = 88
open("/etc/hosts", O_RDONLY) = 9
fcntl64(9, F_GETFD) = 0
fcntl64(9, F_SETFD, FD_CLOEXEC) = 0
fstat64(9, {st_mode=S_IFREG|0644, st_size=154, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x40018000
read(9, "# Generated automatically from h"..., 4096) = 154
read(9, "", 4096) = 0
close(9) = 0
munmap(0x40018000, 4096) = 0
socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9
connect(9, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, 28) = 0
send(9, "\t\3\1\0\0\1\0\0\0\0\0\0\10BARBARA3\tborderent\3"..., 40, 0)
= 40
gettimeofday({1080325192, 628139}, NULL) = 0
poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1
ioctl(9, FIONREAD, [40]) = 0
recvfrom(9, "\t\3\205\203\0\1\0\0\0\0\0\0\10BARBARA3\tborderent\3"...,
1024, 0, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 40
close(9) = 0
socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9
connect(9, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, 28) = 0
send(9, "\t\4\1\0\0\1\0\0\0\0\0\0\10BARBARA3\10trinsics\3c"..., 39, 0)
= 39
gettimeofday({1080325192, 680132}, NULL) = 0
poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1
ioctl(9, FIONREAD, [39]) = 0
recvfrom(9,
"\t\4\205\203\0\1\0\0\0\0\0\0\10BARBARA3\10trinsics\3c"..., 1024, 0,
{sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 39
close(9) = 0
socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9
connect(9, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, 28) = 0
send(9, "\t\5\1\0\0\1\0\0\0\0\0\0\10BARBARA3\0\0\1\0\1", 26, 0) = 26
gettimeofday({1080325192, 698896}, NULL) = 0
poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1
ioctl(9, FIONREAD, [26]) = 0
recvfrom(9, "\t\5\205\203\0\1\0\0\0\0\0\0\10BARBARA3\0\0\1\0\1", 1024,
0, {sa_family=AF_INET, sin_port=htons(53),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 26
close(9) = 0
rt_sigprocmask(SIG_BLOCK, [PIPE], [FPE USR2], 8) = 0
getpid() = 15056
geteuid32() = 0
getpid() = 15056
geteuid32() = 0
time(NULL) = 1080325192
write(8, "0o\2\1\4cj\4\34ou=Hosts,dc=borderent,d"..., 113) = 113
select(1024, [8], [], NULL, NULL) = 1 (in [8])
read(8, "", 8) = 0
write(8, "0\5\2\1\5B\0", 7) = -1 EPIPE (Broken pipe)
shutdown(8, 2 /* send and receive */) = -1 ENOTCONN (Transport endpoint is not
connected)
close(8) = 0
time([1080325192]) = 1080325192
getpid() = 15056
rt_sigaction(SIGPIPE, {0x402192e0, [], SA_RESTORER, 0x4016aca8}, {SIG_DFL}, 8) =
0
socket(PF_UNIX, SOCK_DGRAM, 0) = 8
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
connect(8, {sa_family=AF_UNIX, path="/dev/log"}, 16) = -1 ECONNREFUSED
(Connection refused)
close(8) = 0
rt_sigaction(SIGPIPE, {SIG_DFL}, NULL, 8) = 0
getpid() = 15056
geteuid32() = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 8
setsockopt(8, SOL_TCP, TCP_NODELAY, [1], 4) = 0
fcntl64(8, F_GETFL) = 0x2 (flags O_RDWR)
fcntl64(8, F_SETFL, O_RDWR|O_NONBLOCK) = 0
connect(8, {sa_family=AF_INET, sin_port=htons(389),
sin_addr=inet_addr("127.0.0.1")}, 16) = -1 EINPROGRESS (Operation now
in progress)
select(1024, NULL, [8], NULL, {30, 0}) = 1 (out [8], left {30, 0})
getpeername(8, {sa_family=AF_INET, sin_port=htons(389),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
fcntl64(8, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)
fcntl64(8, F_SETFL, O_RDWR) = 0
getpeername(8, {sa_family=AF_INET, sin_port=htons(389),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
uname({sys="Linux", node="Border.BorderEnt.Com", ...}) = 0
time(NULL) = 1080325192
write(8, "0\f\2\1\1`\7\2\1\3\4\0\200\0", 14) = 14
time(NULL) = 1080325192
select(1024, [8], [], NULL, {30, 0}) = 1 (in [8], left {30, 0})
read(8, "0\f\2\1\1a\7\n", 8) = 8
read(8, "\1\0\4\0\4\0", 6) = 6
time(NULL) = 1080325192
setsockopt(8, SOL_SOCKET, SO_KEEPALIVE, [0], 4) = 0
fcntl64(8, F_SETFD, FD_CLOEXEC) = 0
getsockname(8, {sa_family=AF_INET, sin_port=htons(4256),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
getpeername(8, {sa_family=AF_INET, sin_port=htons(389),
sin_addr=inet_addr("127.0.0.1")}, [16]) = 0
time([1080325192]) = 1080325192
time(NULL) = 1080325192
write(8, "0o\2\1\2cj\4\34ou=Hosts,dc=borderent,d"..., 113) = 113
select(1024, [8], [], NULL, NULL) = 1 (in [8])
read(8, "0\37\2\1\2e\32\n", 8) = 8
read(8, "\1 \4\23dc=borderent,dc=com\4\0", 25) = 25
time(NULL) = 1080325192
rt_sigprocmask(SIG_SETMASK, [FPE USR2], NULL, 8) = 0
--- SIGPIPE (Broken pipe) @ 0 (0) ---
+++ killed by SIGPIPE +++
I'm in over my head on this one. Any idea where I should look next?
Chris
--
Chris Garrigues http://www.DeepEddy.Com/~cwg/
Trinsic Solutions http://www.trinsics.com
1611-B West 6th Street
Austin, TX 78703-5074 512-322-0180
If you don't apply what you've learned, you haven't
learned anything.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 235 bytes
Desc: not available
Url :
http://lists.samba.org/archive/samba/attachments/20040326/66ec91a3/attachment.bin
I have upgraded to 3.0.2a-2mdk in order to solve another issue, but this still happens about once a day (but not always the same time). Can anybody suggest a useful debugging strategy?> From: Chris Garrigues <cwg-dated-1080758824.5e01a5@deepeddy.com> > Date: Fri, 26 Mar 2004 12:47:02 -0600 > > nmbd has been dying on me occasionally. I'm running mandrake 9.2 with > samba3-server-3.0.0-2mdk. We've got our users in ldap but I'd seen this > symptom previously when I was using an earlier version of samba that didn't > support ldap. There is nothing useful in the logs and a PS shows a nmb > process still running. > > > This last time around, I had a ptrace running on both nmbd processes and found > that the parent process was the one still running, but the child process > had died. Here's the end of the ptrace of the child: > > poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1 > ioctl(9, FIONREAD, [81]) = 0 > recvfrom(9, "\t\1\201\200\0\1\0\2\0\0\0\0\vBORDER-ACCT\tbordere"..., 1024, > 0, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1")} > , [16]) = 81 > close(9) = 0 > write(5, "BORDER-ACCT\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 88) = > 88 > read(6, "ACCTMAX\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 88) > 88 > open("/etc/hosts", O_RDONLY) = 9 > fcntl64(9, F_GETFD) = 0 > fcntl64(9, F_SETFD, FD_CLOEXEC) = 0 > fstat64(9, {st_mode=S_IFREG|0644, st_size=154, ...}) = 0 > mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) > 0x40018000 > read(9, "# Generated automatically from h"..., 4096) = 154 > read(9, "", 4096) = 0 > close(9) = 0 > munmap(0x40018000, 4096) = 0 > socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9 > connect(9, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127. > 0.0.1")}, 28) = 0 > send(9, "\t\2\1\0\0\1\0\0\0\0\0\0\7ACCTMAX\tborderent\3c"..., 39, 0) = 39 > gettimeofday({1080304120, 623860}, NULL) = 0 > poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1 > ioctl(9, FIONREAD, [55]) = 0 > recvfrom(9, "\t\2\201\200\0\1\0\1\0\0\0\0\7ACCTMAX\tborderent\3c"..., 1024, > 0, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1") > }, [16]) = 55 > close(9) = 0 > write(5, "ACCTMAX\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 88) > = 88 > read(6, "BARBARA3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 88) = > 88 > open("/etc/hosts", O_RDONLY) = 9 > fcntl64(9, F_GETFD) = 0 > fcntl64(9, F_SETFD, FD_CLOEXEC) = 0 > fstat64(9, {st_mode=S_IFREG|0644, st_size=154, ...}) = 0 > mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) > 0x40018000 > read(9, "# Generated automatically from h"..., 4096) = 154 > read(9, "", 4096) = 0 > close(9) = 0 > munmap(0x40018000, 4096) = 0 > socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9 > connect(9, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127. > 0.0.1")}, 28) = 0 > send(9, "\t\3\1\0\0\1\0\0\0\0\0\0\10BARBARA3\tborderent\3"..., 40, 0) = 40 > gettimeofday({1080325192, 628139}, NULL) = 0 > poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1 > ioctl(9, FIONREAD, [40]) = 0 > recvfrom(9, "\t\3\205\203\0\1\0\0\0\0\0\0\10BARBARA3\tborderent\3"..., 1024 > , 0, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1" > )}, [16]) = 40 > close(9) = 0 > socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9 > connect(9, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127. > 0.0.1")}, 28) = 0 > send(9, "\t\4\1\0\0\1\0\0\0\0\0\0\10BARBARA3\10trinsics\3c"..., 39, 0) = 39 > gettimeofday({1080325192, 680132}, NULL) = 0 > poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1 > ioctl(9, FIONREAD, [39]) = 0 > recvfrom(9, "\t\4\205\203\0\1\0\0\0\0\0\0\10BARBARA3\10trinsics\3c"..., 102 > 4, 0, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1 > ")}, [16]) = 39 > close(9) = 0 > socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 9 > connect(9, {sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127. > 0.0.1")}, 28) = 0 > send(9, "\t\5\1\0\0\1\0\0\0\0\0\0\10BARBARA3\0\0\1\0\1", 26, 0) = 26 > gettimeofday({1080325192, 698896}, NULL) = 0 > poll([{fd=9, events=POLLIN, revents=POLLIN}], 1, 5000) = 1 > ioctl(9, FIONREAD, [26]) = 0 > recvfrom(9, "\t\5\205\203\0\1\0\0\0\0\0\0\10BARBARA3\0\0\1\0\1", 1024, 0, { > sa_family=AF_INET, sin_port=htons(53), sin_addr=inet_addr("127.0.0.1")}, [1 > 6]) = 26 > close(9) = 0 > rt_sigprocmask(SIG_BLOCK, [PIPE], [FPE USR2], 8) = 0 > getpid() = 15056 > geteuid32() = 0 > getpid() = 15056 > geteuid32() = 0 > time(NULL) = 1080325192 > write(8, "0o\2\1\4cj\4\34ou=Hosts,dc=borderent,d"..., 113) = 113 > select(1024, [8], [], NULL, NULL) = 1 (in [8]) > read(8, "", 8) = 0 > write(8, "0\5\2\1\5B\0", 7) = -1 EPIPE (Broken pipe) > shutdown(8, 2 /* send and receive */) = -1 ENOTCONN (Transport endpoint i > s not connected) > close(8) = 0 > time([1080325192]) = 1080325192 > getpid() = 15056 > rt_sigaction(SIGPIPE, {0x402192e0, [], SA_RESTORER, 0x4016aca8}, {SIG_DFL}, > 8) = 0 > socket(PF_UNIX, SOCK_DGRAM, 0) = 8 > fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 > connect(8, {sa_family=AF_UNIX, path="/dev/log"}, 16) = -1 ECONNREFUSED (Con > nection refused) > close(8) = 0 > rt_sigaction(SIGPIPE, {SIG_DFL}, NULL, 8) = 0 > getpid() = 15056 > geteuid32() = 0 > socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 8 > setsockopt(8, SOL_TCP, TCP_NODELAY, [1], 4) = 0 > fcntl64(8, F_GETFL) = 0x2 (flags O_RDWR) > fcntl64(8, F_SETFL, O_RDWR|O_NONBLOCK) = 0 > connect(8, {sa_family=AF_INET, sin_port=htons(389), sin_addr=inet_addr("127 > .0.0.1")}, 16) = -1 EINPROGRESS (Operation now in progress) > select(1024, NULL, [8], NULL, {30, 0}) = 1 (out [8], left {30, 0}) > getpeername(8, {sa_family=AF_INET, sin_port=htons(389), sin_addr=inet_addr( > "127.0.0.1")}, [16]) = 0 > fcntl64(8, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK) > fcntl64(8, F_SETFL, O_RDWR) = 0 > getpeername(8, {sa_family=AF_INET, sin_port=htons(389), sin_addr=inet_addr( > "127.0.0.1")}, [16]) = 0 > uname({sys="Linux", node="Border.BorderEnt.Com", ...}) = 0 > time(NULL) = 1080325192 > write(8, "0\f\2\1\1`\7\2\1\3\4\0\200\0", 14) = 14 > time(NULL) = 1080325192 > select(1024, [8], [], NULL, {30, 0}) = 1 (in [8], left {30, 0}) > read(8, "0\f\2\1\1a\7\n", 8) = 8 > read(8, "\1\0\4\0\4\0", 6) = 6 > time(NULL) = 1080325192 > setsockopt(8, SOL_SOCKET, SO_KEEPALIVE, [0], 4) = 0 > fcntl64(8, F_SETFD, FD_CLOEXEC) = 0 > getsockname(8, {sa_family=AF_INET, sin_port=htons(4256), sin_addr=inet_addr > ("127.0.0.1")}, [16]) = 0 > getpeername(8, {sa_family=AF_INET, sin_port=htons(389), sin_addr=inet_addr( > "127.0.0.1")}, [16]) = 0 > time([1080325192]) = 1080325192 > time(NULL) = 1080325192 > write(8, "0o\2\1\2cj\4\34ou=Hosts,dc=borderent,d"..., 113) = 113 > select(1024, [8], [], NULL, NULL) = 1 (in [8]) > read(8, "0\37\2\1\2e\32\n", 8) = 8 > read(8, "\1 \4\23dc=borderent,dc=com\4\0", 25) = 25 > time(NULL) = 1080325192 > rt_sigprocmask(SIG_SETMASK, [FPE USR2], NULL, 8) = 0 > --- SIGPIPE (Broken pipe) @ 0 (0) --- > +++ killed by SIGPIPE +++ > > I'm in over my head on this one. Any idea where I should look next?-- Chris Garrigues http://www.DeepEddy.Com/~cwg/ Trinsic Solutions http://www.trinsics.com 1611-B West 6th Street Austin, TX 78703-5074 512-322-0180 If you don't apply what you've learned, you haven't learned anything. -------------- next part -------------- A non-text attachment was scrubbed... Name: not available Type: application/pgp-signature Size: 235 bytes Desc: not available Url : http://lists.samba.org/archive/samba/attachments/20040407/200018ec/attachment.bin