Konstantin Belousov
2015-Aug-27 20:16 UTC
Latest stable (r287104) bash leaves zombies on exit
On Thu, Aug 27, 2015 at 08:53:09PM +0200, Michiel Boland wrote:> The xterm program has a SIGCHLD signal handler that calls wait(). > If the handler is invoked while xterm is exiting, a deadlock occurs in rtld. > > Cheers > Michiel > > #0 _umtx_op_err () at /usr/src/lib/libthr/arch/amd64/amd64/_umtx_op_err.S:37 > #1 0x000000080305a2b0 in __thr_rwlock_rdlock (rwlock=0x803272980, > flags=<value optimized out>, tsp=<value optimized out>) > at /usr/src/lib/libthr/thread/thr_umtx.c:277 > #2 0x000000080306179c in _thr_rtld_rlock_acquire (lock=0x803272980) > at thr_umtx.h:196Please from the frame 2, do p *curthread.> #3 0x00000008006a72c2 in rlock_acquire (lock=0x8008ba860, > lockstate=0x7fffffffd5b8) at /usr/src/libexec/rtld-elf/rtld_lock.c:201 > #4 0x00000008006a0c8d in _rtld_bind (obj=0x8006bc000, reloff=6840) > at /usr/src/libexec/rtld-elf/rtld.c:701 > #5 0x000000080069e46d in _rtld_bind_start () > at /usr/src/libexec/rtld-elf/amd64/rtld_start.S:121 > #6 0x0000000000445d34 in reapchild (n=20) at main.c:5177 > #7 <signal handler called>It is more or less obvious that the reapchild(), which is the signal handler, was called directly without a libthr thr_sighandler() wrapped around it. This should be the cause of the problem. I just verified that the signal handler is correctly wrapped for me, on the latest stable/10. Both with the pre-linked libthr.so and with the library loaded dynamically at runtime. I used the test program at the end of the message, put breakpoint on the sigusr2_handler, and looked at the backtrace, which must include thr_sighandler(). It did in my case, for binary built with and without -lpthread. Can you verify the presence of thr_sighandler() in the backtrace for this test program, on your system ?> #8 objlist_call_fini () at /usr/src/libexec/rtld-elf/rtld.c:769 > #9 0x00000008006a0c2b in rtld_exit () at /usr/src/libexec/rtld-elf/rtld.c:2710 > #10 0x00000008024e5406 in __cxa_finalize (dso=0x0) > at /usr/src/lib/libc/stdlib/atexit.c:200 > #11 0x000000080248692c in exit (status=0) at /usr/src/lib/libc/stdlib/exit.c:67 > #12 0x0000000000445f35 in Exit (n=0) at main.c:5078 > #13 0x0000000000456020 in Cleanup (code=0) at misc.c:5238 > #14 0x000000000044da49 in NormalExit () at misc.c:5222 > #15 0x000000000045a616 in readPtyData (xw=0x804cdc000, select_mask=0x6add80, > data=0x804d64000) at ptydata.c:221 > #16 0x0000000000421c48 in in_put (xw=0x804cdc000) at charproc.c:4700 > #17 0x0000000000421b6a in doinput () at charproc.c:4856 > #18 0x000000000041d992 in VTparse (xw=0x804cdc000) at charproc.c:4382 > #19 0x000000000041d87a in VTRun (xw=0x804cdc000) at charproc.c:6997 > #20 0x0000000000442c01 in main (argc=3, argv=0x7fffffffe6d0) at main.c:2607 > > #6 0x0000000000445d34 in reapchild (n=20) at main.c:5177 > 5177 pid = wait(NULL); > Current language: auto; currently minimal > (gdb) l > 5172 int olderrno = errno; > 5173 int pid; > 5174 > 5175 DEBUG_MSG("handle:reapchild\n"); > 5176 > 5177 pid = wait(NULL); > 5178/* $Id: rtld_sigresolv.c,v 1.6 2015/08/27 19:40:42 kostik Exp kostik $ */ /* /usr/local/opt/gcc-5.2.0/bin/gcc -Wall -Wextra -gdwarf-2 -O -o rtld_sigresolv rtld_sigresolv.c */ #include <sys/types.h> #include <sys/wait.h> #include <dlfcn.h> #include <err.h> #include <pthread.h> #include <signal.h> #include <stdlib.h> #include <string.h> static void sigusr1_handler(int signo __unused, siginfo_t *si __unused, void *u __unused) { } static void sigusr2_handler(int signo __unused, siginfo_t *si __unused, void *u __unused) { /* Do something which triggers symbol resolution. */ wait(NULL); } static void atexit_code(void) { raise(SIGUSR2); } static void * dummy_thread(void *arg __unused) { return (NULL); } int main(void) { struct sigaction sa; void *thr_handle; int (*pthr_create)(pthread_t *, const pthread_attr_t *, void *(*)(void *), void *); pthread_t pthread; int error; memset(&sa, 0, sizeof(sa)); sa.sa_flags = SA_SIGINFO; sa.sa_sigaction = sigusr1_handler; error = sigaction(SIGUSR1, &sa, NULL); if (error != 0) err(1, "sigaction SIGUSR1"); sa.sa_sigaction = sigusr2_handler; error = sigaction(SIGUSR2, &sa, NULL); if (error != 0) err(1, "sigaction SIGUSR2"); raise(SIGUSR1); /* pre-resolve the raise symbol */ error = atexit(atexit_code); if (error != 0) err(1, "atexit"); thr_handle = dlopen("libpthread.so", RTLD_LAZY | RTLD_GLOBAL); if (thr_handle == NULL) errx(1, "dlopen(\"libpthread.so\"): %s", dlerror()); pthr_create = dlsym(thr_handle, "pthread_create"); if (pthr_create == NULL) errx(1, "dlsym(\"pthread_create\"): %s", dlerror()); error = pthr_create(&pthread, NULL, dummy_thread, NULL); if (error != 0) errc(1, error, "pthread_create"); return (0); }
On 08/27/2015 22:16, Konstantin Belousov wrote: [...]> I just verified that the signal handler is correctly wrapped for me, on > the latest stable/10. Both with the pre-linked libthr.so and with the > library loaded dynamically at runtime. I used the test program at the > end of the message, put breakpoint on the sigusr2_handler, and looked > at the backtrace, which must include thr_sighandler(). It did in my > case, for binary built with and without -lpthread. > > Can you verify the presence of thr_sighandler() in the backtrace for > this test program, on your system ?Verified, see below. Cheers Michiel Breakpoint 1, sigusr2_handler (signo=31, si=0x7fffffffe430, u=0x7fffffffe0c0) at rtld_sigresolv.c:24 24 wait(NULL); Current language: auto; currently minimal (gdb) bt #0 sigusr2_handler (signo=31, si=0x7fffffffe430, u=0x7fffffffe0c0) at rtld_sigresolv.c:24 #1 0x000000080100d947 in handle_signal (actp=<value optimized out>, sig=31, info=0x7fffffffe430, ucp=0x7fffffffe0c0) at /usr/src/lib/libthr/thread/thr_sig.c:243 #2 0x000000080100d158 in thr_sighandler (sig=<value optimized out>, info=<value optimized out>, _ucp=<value optimized out>) at /usr/src/lib/libthr/thread/thr_sig.c:188 #3 <signal handler called> #4 thr_kill () at thr_kill.S:3 #5 0x0000000800965066 in __raise (s=<value optimized out>) at /usr/src/lib/libc/gen/raise.c:51 #6 0x0000000000400c72 in atexit_code () at rtld_sigresolv.c:31 #7 0x000000080093d406 in __cxa_finalize (dso=0x0) at /usr/src/lib/libc/stdlib/atexit.c:200 #8 0x00000008008de92c in exit (status=0) at /usr/src/lib/libc/stdlib/exit.c:67 #9 0x0000000000400946 in _start (ap=<value optimized out>, cleanup=<value optimized out>) at /usr/src/lib/csu/amd64/crt1.c:78 #10 0x0000000800621000 in ?? () #11 0x0000000000000000 in ?? ()