I've had a long-standing problem syncing my home directory from one Linux system (RH Linux 7.3) to another (RH Linux 9). I am now running rsync version 2.6.2 on both ends. I use ssh as the data transport. The two machines are separated by a VPN over the Internet. What happens is that the sending rsync just appears to hang. The receiving rsync processes are no longer running when I go to look, typically the next morning. I finally managed to capture an strace of the receiving rsync processes, which I've attached below. The child receiver process gets an error return on a write(), informs its parent by sending it two error messages, and calls exit(11). The parent receiver process reads the first of the two error messages, but not the second. It handles the SIGCHLD signal after read()ing the first error message. Thereafter, the patent's select() no longer includes the file descriptor on which (presumably) the second error message is waiting. And it never informs the sending rsync process of the error. I'm hoping that there's enough information here to allow an rsync developer (is Wayne the only one left?) to come up with a fix. -- cut here -- [pid 9977] memcpy(0x0807ac00, "rity: MIME headers sanitized on xxxxxx\n\tSee http://www.impsec.or", 64) = 0x0807ac00 [pid 9977] memcpy(0x0807ac00, "g/email-tools/sanitizer-intro.html\n\tfor details. $Revision: 1.13", 64) = 0x0807ac00 [pid 9977] memcpy(0x0807ac00, "3 $Date: 2002-01-05 17:09:21-08 \nX-S", 36) = 0x0807ac00 [pid 9977] memcpy(0x40275eb8, "> ABC Data Solutions Phone: 800-555-3900\n> 123 Main St. Fax: 800-555-8908\n> Xxxxxxx, "..., 336) = 0x40275eb8 [pid 9977] write(3, "se a serial interface.\n\n\nThe one thing I'm not sure about is whether or not they will support a\nspecialied printer like a Dymo l"..., 262144 <unfinished ...> [pid 9977] SYS_write(3, "se a serial interface.\n\n\nThe one thing I'm not sure about is whether or not they will support a\nspecialied printer like a Dymo l"..., 262144) = -5 [pid 9977] <... write resumed> ) = -1 [pid 9977] __errno_location() = 0x40030b00 [pid 9977] __errno_location() = 0x40030b00 [pid 9977] strerror(5) = "Input/output error" [pid 9977] asprintf(0x08077584, 0x0806e507, 0x08082a20, 0x0806ede4, 0xbfffd570) = 30 [pid 9977] vsnprintf("write failed on "/v/home/tom/./Mail/synergy-l": Input/output error\n", 1024, "write failed on %s: %s\n", 0xbfffa458) = 67 [pid 9977] malloc(12) = 0x08088e40 [pid 9977] malloc(71) = 0x08091dd8 [pid 9977] memcpy(0x08091ddc, "write failed on "/v/home/tom/./Mail/synergy-l": Input/output error\n", 67) = 0x08091ddc [pid 9977] write(4, "C", 71 <unfinished ...> [pid 9977] SYS_write(4, "C", 71 <unfinished ...> [pid 9944] <... SYS__newselect resumed> ) = 1 [pid 9977] <... SYS_write resumed> ) = 71 [pid 9944] <... select resumed> ) = 1 [pid 9977] <... write resumed> ) = 71 [pid 9944] __errno_location( <unfinished ...> [pid 9977] free(0x08091dd8 <unfinished ...> [pid 9944] <... __errno_location resumed> ) = 0x40030b00 [pid 9977] <... free resumed> ) = <void> [pid 9944] select(4, 0xbfff9c40, 0, 0, 0xbfff9bb8 <unfinished ...> [pid 9977] free(0x08088e40 <unfinished ...> [pid 9944] SYS__newselect(4, 0xbfff9c40, 0, 0, 0xbfff9bb8 <unfinished ...> [pid 9977] <... free resumed> ) = <void> [pid 9944] <... SYS__newselect resumed> ) = 1 [pid 9977] signal(10, 0x00000001 <unfinished ...> [pid 9944] <... select resumed> ) = 1 [pid 9944] read(3, <unfinished ...> [pid 9977] SYS_rt_sigaction(10, 0xbfffa1e0, 0xbfffa150, 8, 10) = 0 [pid 9944] SYS_read(3, <unfinished ...> [pid 9977] <... signal resumed> ) = 0x08051d50 [pid 9944] <... SYS_read resumed> "C", 4) = 4 [pid 9977] signal(12, 0x00000001 <unfinished ...> [pid 9944] <... read resumed> "C", 4) = 4 [pid 9944] __errno_location( <unfinished ...> [pid 9977] SYS_rt_sigaction(12, 0xbfffa1e0, 0xbfffa150, 8, 12) = 0 [pid 9944] <... __errno_location resumed> ) = 0x40030b00 [pid 9977] <... signal resumed> ) = 0x08051d80 [pid 9944] select(4, 0xbfff9c40, 0, 0, 0xbfff9bb8 <unfinished ...> [pid 9977] unlink("Mail/.synergy-l.p8Qrzc" <unfinished ...> [pid 9944] SYS__newselect(4, 0xbfff9c40, 0, 0, 0xbfff9bb8 <unfinished ...> [pid 9977] SYS_unlink(0xbfffb570, 0, 11, 255, 0x0806dca3 <unfinished ...> [pid 9944] <... SYS__newselect resumed> ) = 1 [pid 9944] <... select resumed> ) = 1 [pid 9944] read(3, <unfinished ...> [pid 9944] SYS_read(3, "write failed on "/v/home/tom/./Mail/synergy-l": Input/output error\n", 67) = 67 [pid 9944] <... read resumed> "write failed on "/v/home/tom/./Mail/synergy-l": Input/output error\n", 67) = 67 [pid 9944] memcpy(0xbfff8c94, "write failed on "/v/home/tom/./Mail/synergy-l": Input/output error\n", 67) = 0xbfff8c94 [pid 9944] __errno_location() = 0x40030b00 [pid 9944] select(2, 0, 0xbfff8be0, 0, 0xbfff8b58 <unfinished ...> [pid 9944] SYS__newselect(2, 0, 0xbfff8be0, 0, 0xbfff8b58 <unfinished ...> [pid 9977] <... SYS_unlink resumed> ) = 0 [pid 9977] <... unlink resumed> ) = 0 [pid 9977] vsnprintf("rsync error: error in file IO (code 11) at receiver.c(255)\n", 1024, "rsync error: %s (code %d) at %s(%d)\n", 0xbfffa3d8) = 59 [pid 9977] malloc(12) = 0x08089920 [pid 9977] malloc(63) = 0x08091dd8 [pid 9977] memcpy(0x08091ddc, "rsync error: error in file IO (code 11) at receiver.c(255)\n", 59) = 0x08091ddc [pid 9977] write(4, ";", 63 <unfinished ...> [pid 9977] SYS_write(4, ";", 63) = 63 [pid 9977] <... write resumed> ) = 63 [pid 9977] free(0x08091dd8) = <void> [pid 9977] free(0x08089920) = <void> [pid 9977] exit(11 <unfinished ...> [pid 9977] SYS_exit_group(11 <unfinished ...> [pid 9977] +++ exited (status 11) +++ <... SYS__newselect resumed> ) = -514 --- SIGCHLD (Child exited) --- waitpid(-1, 0xbfff8834, 1 <unfinished ...> SYS_wait4(-1, 0xbfff8834, 1, 0, -1) = 9977 <... waitpid resumed> ) = 9977 waitpid(-1, 0xbfff8834, 1 <unfinished ...> SYS_wait4(-1, 0xbfff8834, 1, 0, -1) = -10 <... waitpid resumed> ) = -1 SYS_sigreturn(2, 0xbfff8834, 1, 0, 0xbfff8b58) = -4 <... select resumed> ) = -1 __errno_location() = 0x40030b00 select(2, 0, 0xbfff8be0, 0, 0xbfff8b58 <unfinished ...> SYS__newselect(2, 0, 0xbfff8be0, 0, 0xbfff8b58) = 0 <... select resumed> ) = 0 __errno_location() = 0x40030b00 select(2, 0, 0xbfff8be0, 0, 0xbfff8b58 <unfinished ...> SYS__newselect(2, 0, 0xbfff8be0, 0, 0xbfff8b58) = 0 <... select resumed> ) = 0 __errno_location() = 0x40030b00 select(2, 0, 0xbfff8be0, 0, 0xbfff8b58 <unfinished ...> SYS__newselect(2, 0, 0xbfff8be0, 0, 0xbfff8b58) = 0 <... select resumed> ) = 0 __errno_location() = 0x40030b00 ... (repeats indefinitely) -- end -- -- Thomas J. Pinkl Health Business Systems, Inc.