Hi
I rsync from 4 machines to a single destination one, and I am certain the
filenames they sync do not overlap. But from time to time rsync from a source
machine gets stuck (not always the same source machine).
I followed https://rsync.samba.org/issues.html and I gathered strace, lsof and
netstat when the freeze occurred. I'd like to ask for any further debug info
or ideas!
General info
===========
Both destination and source run the same version
$ rsync --version
rsync version 3.1.0 protocol version 31
Copyright (C) 1996-2013 by Andrew Tridgell, Wayne Davison, and others.
Web site: http://rsync.samba.org/
Capabilities:
64-bit files, 64-bit inums, 64-bit timestamps, 64-bit long ints,
socketpairs, hardlinks, symlinks, IPv6, batchfiles, inplace,
append, ACLs, xattrs, iconv, symtimes, prealloc
rsync comes with ABSOLUTELY NO WARRANTY. This is free software, and you
are welcome to redistribute it under certain conditions. See the GNU
General Public Licence for details.
Both destination and source are Ubuntu 14.04
SOURCE MACHINE
=============
$ sudo ps -fe | grep rsync
abc 3794 3793 0 01:12 ? 00:00:00 /bin/sh -c flock -xn
/tmp/LOCKFILE -c "/usr/bin/rsync --compress --compress-level=9
--bwlimit=512k --recursive --delay-updates --quiet --update
--exclude=\"/.*\" /SRC_PATH/ DEST_____IP:/DEST_PATH/"
abc 3795 3794 0 01:12 ? 00:00:00 flock -xn /tmp/LOCKFILE -c
/usr/bin/rsync --compress --compress-level=9 --bwlimit=512k --recursive
--delay-updates --quiet --update --exclude="/.*" /SRC_PATH/
DEST_____IP:/DEST_PATH/
abc 3796 3795 0 01:12 ? 00:00:00 /bin/sh -c /usr/bin/rsync
--compress --compress-level=9 --bwlimit=512k --recursive --delay-updates --quiet
--update --exclude="/.*" /SRC_PATH/ DEST_____IP:/DEST_PATH/
abc 3797 3796 0 01:12 ? 00:03:14 /usr/bin/rsync --compress
--compress-level=9 --bwlimit=512k --recursive --delay-updates --quiet --update
--exclude=/.* /SRC_PATH/ DEST_____IP:/DEST_PATH/
abc 3798 3797 0 01:12 ? 00:00:13 ssh DEST_____IP rsync --server
-urze.iLs --compress-level=9 --bwlimit=512 --delay-updates . /DEST_PATH/
vangelis 8431 8371 0 18:04 pts/7 00:00:00 grep --color=auto rsync
$ sudo strace -p 3794
Process 3794 attached
wait4(-1, ^CProcess 3794 detached
<detached ...>
$ sudo strace -p 3795
Process 3795 attached
wait4(3796, ^CProcess 3795 detached
<detached ...>
$ sudo strace -yy -p 3796
Process 3796 attached
wait4(-1, ^CProcess 3796 detached
<detached ...>
$ sudo strace -yy -p 3797
Process 3797 attached
select(6, [], [5<socket:[1465572750]>], [], {19, 795683}^CProcess 3797
detached
<detached ...>
$ sudo strace -yy -p 3798
Process 3798 attached
select(7, [3<socket:[1465572769]>], [5<socket:[1465572752]>], NULL,
NULL^CProcess 3798 detached
<detached ...>
$ sudo lsof -p 3797
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
rsync 3797 abc cwd DIR 182,861281 4096 530855 /SRC_PATH
rsync 3797 abc rtd DIR 182,861281 4096 2 /
rsync 3797 abc txt REG 182,861281 390704 140858
/usr/bin/rsync
rsync 3797 abc mem REG 182,861281 18624 93
/lib/x86_64-linux-gnu/libattr.so.1.1.0
rsync 3797 abc mem REG 182,861281 1857312 542
/lib/x86_64-linux-gnu/libc-2.19.so
rsync 3797 abc mem REG 182,861281 48112 328
/lib/x86_64-linux-gnu/libpopt.so.0.0.0
rsync 3797 abc mem REG 182,861281 100728 30
/lib/x86_64-linux-gnu/libz.so.1.2.8
rsync 3797 abc mem REG 182,861281 31168 91
/lib/x86_64-linux-gnu/libacl.so.1.1.0
rsync 3797 abc mem REG 182,861281 149120 560
/lib/x86_64-linux-gnu/ld-2.19.so
rsync 3797 abc mem REG 182,861281 26258 154026
/usr/lib/x86_64-linux-gnu/gconv/gconv-modules.cache
rsync 3797 abc 0r FIFO 0,8 0t0 1465572739 pipe
rsync 3797 abc 1u REG 182,861281 0 394802
(deleted)/tmp/tmpfWc3cI4
rsync 3797 abc 2u REG 182,861281 0 394802
(deleted)/tmp/tmpfWc3cI4
rsync 3797 abc 3u REG 182,861281 0 397245
/tmp/LOCKFILE
rsync 3797 abc 4r REG 182,861281 1085878503 786474
(deleted)/SRC_PATH/SUBDIR/file.gz
rsync 3797 abc 5u unix 0xffff881bcb29e100 0t0 1465572750 socket
rsync 3797 abc 6u unix 0xffff880a31641100 0t0 1465572751 socket
$ sudo lsof -p 3798
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
ssh 3798 abc cwd DIR 182,861281 4096 1312999 /home/abc
ssh 3798 abc rtd DIR 182,861281 4096 2 /
ssh 3798 abc txt REG 182,861281 641664 140798
/usr/bin/ssh
ssh 3798 abc mem REG 182,861281 43616 536
/lib/x86_64-linux-gnu/libnss_files-2.19.so
ssh 3798 abc mem REG 182,861281 47760 571
/lib/x86_64-linux-gnu/libnss_nis-2.19.so
ssh 3798 abc mem REG 182,861281 97296 523
/lib/x86_64-linux-gnu/libnsl-2.19.so
ssh 3798 abc mem REG 182,861281 39824 566
/lib/x86_64-linux-gnu/libnss_compat-2.19.so
ssh 3798 abc mem REG 182,861281 141574 544
/lib/x86_64-linux-gnu/libpthread-2.19.so
ssh 3798 abc mem REG 182,861281 14256 338
/lib/x86_64-linux-gnu/libkeyutils.so.1.4
ssh 3798 abc mem REG 182,861281 43672 136697
/usr/lib/x86_64-linux-gnu/libkrb5support.so.0.1
ssh 3798 abc mem REG 182,861281 14592 275
/lib/x86_64-linux-gnu/libcom_err.so.2.1
ssh 3798 abc mem REG 182,861281 186824 136690
/usr/lib/x86_64-linux-gnu/libk5crypto.so.3.1
ssh 3798 abc mem REG 182,861281 831616 136694
/usr/lib/x86_64-linux-gnu/libkrb5.so.3.3
ssh 3798 abc mem REG 182,861281 252032 280
/lib/x86_64-linux-gnu/libpcre.so.3.13.1
ssh 3798 abc mem REG 182,861281 1857312 542
/lib/x86_64-linux-gnu/libc-2.19.so
ssh 3798 abc mem REG 182,861281 285848 136692
/usr/lib/x86_64-linux-gnu/libgssapi_krb5.so.2.2
ssh 3798 abc mem REG 182,861281 101240 518
/lib/x86_64-linux-gnu/libresolv-2.19.so
ssh 3798 abc mem REG 182,861281 100728 30
/lib/x86_64-linux-gnu/libz.so.1.2.8
ssh 3798 abc mem REG 182,861281 14664 550
/lib/x86_64-linux-gnu/libdl-2.19.so
ssh 3798 abc mem REG 182,861281 1926432 33
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0
ssh 3798 abc mem REG 182,861281 134296 36
/lib/x86_64-linux-gnu/libselinux.so.1
ssh 3798 abc mem REG 182,861281 149120 560
/lib/x86_64-linux-gnu/ld-2.19.so
ssh 3798 abc 0u unix 0xffff880a37b1b1c0 0t0 1465572749 socket
ssh 3798 abc 1u unix 0xffff880a3d5e1780 0t0 1465572752 socket
ssh 3798 abc 2u REG 182,861281 0 394802
(deleted)/tmp/tmpfWc3cI4
ssh 3798 abc 3u IPv4 1465572769 0t0 TCP
feeds3.abc.co.uk:55340->feeds-web1.abc.co.uk:ssh (ESTABLISHED)
ssh 3798 abc 5u unix 0xffff880a3d5e1780 0t0 1465572752 socket
ssh 3798 abc 6u REG 182,861281 0 394802
(deleted)/tmp/tmpfWc3cI4
$ sudo netstat -anp | egrep "(3794|3795|3796|3797|3798)"
tcp 0 0 IP_________37:55340 DEST_____IP:22 ESTABLISHED
3798/ssh
tcp 0 0 IP_________37:5553 IP________132:53796
ESTABLISHED 8282/perl
unix 3 [ ] STREAM CONNECTED 1465572752 3798/ssh
unix 3 [ ] STREAM CONNECTED 1465572751 3797/rsync
unix 3 [ ] STREAM CONNECTED 1465572750 3797/rsync
unix 3 [ ] STREAM CONNECTED 1465572749 3798/ssh
DESTINATION MACHINE
==================
$ sudo netstat -atlp | grep "IP_________14"
tcp 0 0 IP_________14:ssh IP_________40:55340 ESTABLISHED
32097/sshd: abc [pr
$ sudo netstat -antp | egrep "(32097|32212)"
tcp 0 0 IP_________14:22 IP_________40:55340 ESTABLISHED
32097/sshd: abc [pr
$ sudo pstree --show-pids 32097
sshd(32097)───sshd(32212)
$ sudo strace -yy -p 32097
Process 32097 attached
restart_syscall(<... resuming interrupted call ...>^CProcess 32097
detached
<detached ...>
$ sudo strace -yy -p 32212
Process 32212 attached
select(14, [3<socket:[6021256]> 6<pipe:[6021267]>], [], NULL,
NULL^CProcess 32212 detached
<detached ...>
$ sudo lsof -p 32212
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
sshd 32212 abc cwd DIR 252,2 4096 2 /
sshd 32212 abc rtd DIR 252,2 4096 2 /
sshd 32212 abc txt REG 252,2 770944 1052832
/usr/sbin/sshd
sshd 32212 abc DEL REG 0,5 6017648 /dev/zero
sshd 32212 abc mem REG 252,2 14464 12060640
/lib/x86_64-linux-gnu/security/pam_env.so
sshd 32212 abc mem REG 252,2 22896 12060651
/lib/x86_64-linux-gnu/security/pam_limits.so
sshd 32212 abc mem REG 252,2 10320 12060631
/lib/x86_64-linux-gnu/security/pam_mail.so
sshd 32212 abc mem REG 252,2 10344 12060643
/lib/x86_64-linux-gnu/security/pam_motd.so
sshd 32212 abc mem REG 252,2 14592 11931111
/lib/x86_64-linux-gnu/libpam_misc.so.0.82.0
sshd 32212 abc mem REG 252,2 38920 11930990
/lib/x86_64-linux-gnu/libnih-dbus.so.1.0.0
sshd 32212 abc mem REG 252,2 96280 11931065
/lib/x86_64-linux-gnu/libnih.so.1.0.0
sshd 32212 abc mem REG 252,2 108480 11931142
/lib/x86_64-linux-gnu/libcgmanager.so.0.0.0
sshd 32212 abc mem REG 252,2 42864 12061049
/lib/x86_64-linux-gnu/security/pam_systemd.so
sshd 32212 abc mem REG 252,2 10376 12060633
/lib/x86_64-linux-gnu/security/pam_umask.so
sshd 32212 abc mem REG 252,2 10288 12060636
/lib/x86_64-linux-gnu/security/pam_keyinit.so
sshd 32212 abc mem REG 252,2 10344 12060673
/lib/x86_64-linux-gnu/security/pam_loginuid.so
sshd 32212 abc mem REG 252,2 18752 12060650
/lib/x86_64-linux-gnu/security/pam_selinux.so
sshd 32212 abc mem REG 252,2 10272 12060638
/lib/x86_64-linux-gnu/security/pam_nologin.so
sshd 32212 abc mem REG 252,2 18952 11931054
/lib/x86_64-linux-gnu/libcap.so.2.24
sshd 32212 abc mem REG 252,2 10376 12060669
/lib/x86_64-linux-gnu/security/pam_cap.so
sshd 32212 abc mem REG 252,2 6112 12060634
/lib/x86_64-linux-gnu/security/pam_permit.so
sshd 32212 abc mem REG 252,2 6024 12060632
/lib/x86_64-linux-gnu/security/pam_deny.so
sshd 32212 abc mem REG 252,2 60288 12060663
/lib/x86_64-linux-gnu/security/pam_unix.so
sshd 32212 abc mem REG 252,2 22952 11931136
/lib/x86_64-linux-gnu/libnss_dns-2.19.so
sshd 32212 abc mem REG 252,2 43616 11931097
/lib/x86_64-linux-gnu/libnss_files-2.19.so
sshd 32212 abc mem REG 252,2 47760 11932248
/lib/x86_64-linux-gnu/libnss_nis-2.19.so
sshd 32212 abc mem REG 252,2 39824 11931504
/lib/x86_64-linux-gnu/libnss_compat-2.19.so
sshd 32212 abc mem REG 252,2 101240 11931020
/lib/x86_64-linux-gnu/libresolv-2.19.so
sshd 32212 abc mem REG 252,2 14256 11931023
/lib/x86_64-linux-gnu/libkeyutils.so.1.4
sshd 32212 abc mem REG 252,2 43672 1442199
/usr/lib/x86_64-linux-gnu/libkrb5support.so.0.1
sshd 32212 abc mem REG 252,2 186824 1442370
/usr/lib/x86_64-linux-gnu/libk5crypto.so.3.1
sshd 32212 abc mem REG 252,2 31792 11931148
/lib/x86_64-linux-gnu/librt-2.19.so
sshd 32212 abc mem REG 252,2 141574 11931125
/lib/x86_64-linux-gnu/libpthread-2.19.so
sshd 32212 abc mem REG 252,2 252032 11931089
/lib/x86_64-linux-gnu/libpcre.so.3.13.1
sshd 32212 abc mem REG 252,2 14664 11931137
/lib/x86_64-linux-gnu/libdl-2.19.so
sshd 32212 abc mem REG 252,2 97296 11931061
/lib/x86_64-linux-gnu/libnsl-2.19.so
sshd 32212 abc mem REG 252,2 1857312 11931119
/lib/x86_64-linux-gnu/libc-2.19.so
sshd 32212 abc mem REG 252,2 14592 11931002
/lib/x86_64-linux-gnu/libcom_err.so.2.1
sshd 32212 abc mem REG 252,2 831616 1442299
/usr/lib/x86_64-linux-gnu/libkrb5.so.3.3
sshd 32212 abc mem REG 252,2 290520 1442171
/usr/lib/x86_64-linux-gnu/libgssapi_krb5.so.2.2
sshd 32212 abc mem REG 252,2 43368 11931150
/lib/x86_64-linux-gnu/libcrypt-2.19.so
sshd 32212 abc mem REG 252,2 100728 11931120
/lib/x86_64-linux-gnu/libz.so.1.2.8
sshd 32212 abc mem REG 252,2 10680 11931014
/lib/x86_64-linux-gnu/libutil-2.19.so
sshd 32212 abc mem REG 252,2 1934624 11930991
/lib/x86_64-linux-gnu/libcrypto.so.1.0.0
sshd 32212 abc mem REG 252,2 281552 11930996
/lib/x86_64-linux-gnu/libdbus-1.so.3.7.6
sshd 32212 abc mem REG 252,2 14536 1442179
/usr/lib/x86_64-linux-gnu/libck-connector.so.0.0.0
sshd 32212 abc mem REG 252,2 134296 11931101
/lib/x86_64-linux-gnu/libselinux.so.1
sshd 32212 abc mem REG 252,2 55856 11931058
/lib/x86_64-linux-gnu/libpam.so.0.83.1
sshd 32212 abc mem REG 252,2 104936 11930995
/lib/x86_64-linux-gnu/libaudit.so.1.0.0
sshd 32212 abc mem REG 252,2 36632 11931139
/lib/x86_64-linux-gnu/libwrap.so.0.7.6
sshd 32212 abc mem REG 252,2 149120 11931276
/lib/x86_64-linux-gnu/ld-2.19.so
sshd 32212 abc DEL REG 0,5 6020490 /dev/zero
sshd 32212 abc 0u CHR 1,3 0t0 6 /dev/null
sshd 32212 abc 1u CHR 1,3 0t0 6 /dev/null
sshd 32212 abc 2u CHR 1,3 0t0 6 /dev/null
sshd 32212 abc 3u IPv4 6021256 0t0 TCP
IP_________14:ssh->IP_________40:55340 (ESTABLISHED)
sshd 32212 abc 4u unix 0xffff88032fb46000 0t0 6017653 socket
sshd 32212 abc 5u unix 0xffff880100cd6c00 0t0 6015704 socket
sshd 32212 abc 6r FIFO 0,10 0t0 6021267 pipe
sshd 32212 abc 7w FIFO 0,18 0t0 604
/run/systemd/sessions/1622.ref
sshd 32212 abc 8w FIFO 0,10 0t0 6021267 pipe
Regards
Vangelis