Hiya,
I can see it's a regular subject on this list.
I, like others wanted to use rsync to synchronise two block
devices (as it happens one lvm volume and one nbd device served
by qemu-img on a remote host from a qcow2 disk image so that I
can keep the old versions)
As I couldn't find any report of it being done successfully,
I'm just sharing my findings as it might benefit others.
There's a copy-devices patch to rsync available, but it only
allows to rsync a block device into a regular file. Someone
suggested that the --keep-device option be added to rsync for
that, but I don't think anyone did implement it.
Then I started to look at alternatives including: xdelta,
xdelta3, bsdiff, bdiff, bdelta, open-vcdiff. Unfortunately,
among those supporting block devices, none were able to "patch"
a device without rewriting it fully (which defeats my purpose as
I only want the modifications to be written so the qcow2 image
grows only as much).
Then I decided the best option was to enhance the copy-devices
patch so that it can rsync devices inplace. Which I did. See the
patch below, a quick and dirty proof-of-concept one, which only
works if passed the proper combination of options.
To be used as:
rsync -B1024 --copy-devices --no-whole-file \ 
  --stats --progress --inplace dev1 dev2
However I realised that rsync didn't minimise the writes on dev2
at all. In my testing on 100MB ext3 file systems, differing by
only about 3 MB (3 MB worth of differing 1024B blocks), rsync
would write 30MB worth of data to dev2 (growing the qcow2 file
by as much) (and before you ask, the same thing happens with
regular files with an unmodified rsync).
I wonder if it's not a bug. I started investigating a bit, but
the rsync code that handles matching blocks is hard to follow.
What I gathered was that for a lot of blocks (blocks full of
zeros btw), rsync sender would tell the receiver to copy the
blocks from the last block of the source file instead of telling
it the block had not changed. My suspicion is that it does that
because from some point in processing the file, is stops
splitting the source and destination devices at the same
offsets.
I stopped investigating there when I realised that what I was
trying to do was very straightforward and that I shouldn't need
to be looking at code of that complexity to achieve it.
Then I realised that it could be done as a perl one-liner
(almost):
perl -'MDigest::MD5 md5' -ne 'BEGIN{$/=\1024};print md5($_)'
dev2 |
  perl -'MDigest::MD5 md5' -ne 'BEGIN{$/=\1024};$b=md5($_);
    read STDIN,$a,16;if ($a eq $b) {print "s"} else {print
"c" . $_}' dev1 |
   perl -ne 'BEGIN{$/=\1} if ($_ eq"s") {$s++} else {if ($s) {
    seek STDOUT,$s*1024,1; $s=0}; read ARGV,$buf,1024; print $buf}'
1<> dev2
And if dev2 is on a remote host, run the 1st and last perl over
ssh (add some lzop or gzip compression to save bandwith if need
be):
ssh remote "
  perl -'MDigest::MD5 md5' -ne 'BEGIN{\$/=\1024};print md5(\$_)'
dev2 | lzop -c" |
  lzop -dc | perl -'MDigest::MD5 md5' -ne
'BEGIN{$/=\1024};$b=md5($_);
    read STDIN,$a,16;if ($a eq $b) {print "s"} else {print
"c" . $_}' dev1 | lzop -c |
  ssh remote "lzop -dc |
   perl -ne 'BEGIN{\$/=\1} if (\$_ eq\"s\") {\$s++} else {if (\$s)
{
    seek STDOUT,\$s*1024,1; \$s=0}; read ARGV,\$buf,1024; print \$buf}'
1<> dev2"
That was 30 times faster than rsync, used about as much
bandwidth and only wrote 3MB worth of data to dev2 instead of
30.
Contrary to rsync, that perl solution is not going to be
bandwidth efficient when data has been moved/copied around on
the block device, but I'm not too concerned about bandwith, more
about as much is written to devices and it should be as
efficient as rsync (and again, as it happens it seems to be a
lot more efficient, which again could be a bug in rsync).
Cheers,
Stephane
Here is the patch. Again, not ready to be included in the patch
distribution, it's just a proof of concept.
diff -pur rsync-3.0.7/generator.c rsync-3.0.7.new/generator.c
--- rsync-3.0.7/generator.c	2009-12-23 19:36:27.000000000 +0000
+++ rsync-3.0.7.new/generator.c	2010-06-15 11:08:41.919557425 +0100
@@ -39,6 +39,7 @@ extern int preserve_acls;
 extern int preserve_xattrs;
 extern int preserve_links;
 extern int preserve_devices;
+extern int copy_devices;
 extern int preserve_specials;
 extern int preserve_hard_links;
 extern int preserve_executability;
@@ -980,7 +981,7 @@ static int try_dests_reg(struct file_str
 
 	do {
 		pathjoin(cmpbuf, MAXPATHLEN, basis_dir[j], fname);
-		if (link_stat(cmpbuf, &sxp->st, 0) < 0 ||
!S_ISREG(sxp->st.st_mode))
+		if (link_stat(cmpbuf, &sxp->st, 0) < 0 ||
!(S_ISREG(sxp->st.st_mode) || (copy_devices &&
IS_DEVICE(sxp->st.st_mode))))
 			continue;
 		switch (match_level) {
 		case 0:
@@ -1695,7 +1696,7 @@ static void recv_generator(char *fname, 
 		goto cleanup;
 	}
 
-	if (!S_ISREG(file->mode)) {
+	if (!(S_ISREG(file->mode) || (copy_devices &&
IS_DEVICE(file->mode)))) {
 		if (solo_file)
 			fname = f_name(file, NULL);
 		rprintf(FINFO, "skipping non-regular file \"%s\"\n",
fname);
@@ -1733,7 +1734,7 @@ static void recv_generator(char *fname, 
 	fnamecmp = fname;
 	fnamecmp_type = FNAMECMP_FNAME;
 
-	if (statret == 0 && !S_ISREG(sx.st.st_mode)) {
+	if (statret == 0 && !(S_ISREG(sx.st.st_mode) || (copy_devices
&& IS_DEVICE(sx.st.st_mode)))) {
 		if (delete_item(fname, sx.st.st_mode, del_opts | DEL_FOR_FILE) != 0)
 			goto cleanup;
 		statret = -1;
@@ -1820,6 +1821,26 @@ static void recv_generator(char *fname, 
 		goto cleanup;
 	}
 
+	/* On Linux systems (at least), st_size is typically 0 for devices.
+	 * If so, try to determine the actual device size. */
+	if (IS_DEVICE(sx.st.st_mode) && sx.st.st_size == 0) {
+	        int fd;
+
+		fd = open(fname, O_RDONLY);
+
+		if (fd < 0)
+			rsyserr(FERROR, errno, "failed to open %s to determine size",
fname);
+		else {
+			OFF_T off = lseek(fd, 0, SEEK_END);
+			if (off == (OFF_T) -1)
+				rsyserr(FERROR, errno, "failed to seek to end of %s to determine
size", fname);
+			else {
+				sx.st.st_size = off;
+			}
+			close(fd);
+		}
+	}
+
 	if (append_mode > 0 && sx.st.st_size >= F_LENGTH(file)) {
 #ifdef SUPPORT_HARD_LINKS
 		if (F_IS_HLINKED(file))
diff -pur rsync-3.0.7/options.c rsync-3.0.7.new/options.c
--- rsync-3.0.7/options.c	2009-12-21 22:40:41.000000000 +0000
+++ rsync-3.0.7.new/options.c	2010-06-14 10:24:49.329958121 +0100
@@ -48,6 +48,7 @@ int append_mode = 0;
 int keep_dirlinks = 0;
 int copy_dirlinks = 0;
 int copy_links = 0;
+int copy_devices = 0;
 int preserve_links = 0;
 int preserve_hard_links = 0;
 int preserve_acls = 0;
@@ -349,6 +350,7 @@ void usage(enum logcode F)
   rprintf(F," -o, --owner                 preserve owner (super-user
only)\n");
   rprintf(F," -g, --group                 preserve group\n");
   rprintf(F,"     --devices               preserve device files
(super-user only)\n");
+  rprintf(F,"     --copy-devices          copy device contents as regular
file\n");
   rprintf(F,"     --specials              preserve special files\n");
   rprintf(F," -D                          same as --devices
--specials\n");
   rprintf(F," -t, --times                 preserve modification
times\n");
@@ -507,6 +509,7 @@ static struct poptOption long_options[] 
   {"no-D",             0,  POPT_ARG_NONE,   0, OPT_NO_D, 0, 0 },
   {"devices",          0,  POPT_ARG_VAL,    &preserve_devices, 1,
0, 0 },
   {"no-devices",       0,  POPT_ARG_VAL,    &preserve_devices, 0,
0, 0 },
+  {"copy-devices",     0,  POPT_ARG_NONE,   ©_devices, 0, 0,
0 },
   {"specials",         0,  POPT_ARG_VAL,    &preserve_specials,
1, 0, 0 },
   {"no-specials",      0,  POPT_ARG_VAL,    &preserve_specials,
0, 0, 0 },
   {"links",           'l', POPT_ARG_VAL,   
&preserve_links, 1, 0, 0 },
@@ -2069,6 +2072,9 @@ void server_options(char **args, int *ar
 	else if (remove_source_files)
 		args[ac++] = "--remove-sent-files";
 
+	if (copy_devices)
+		args[ac++] = "--copy-devices";
+
 	if (ac > MAX_SERVER_ARGS) { /* Not possible... */
 		rprintf(FERROR, "argc overflow in server_options().\n");
 		exit_cleanup(RERR_MALLOC);
diff -pur rsync-3.0.7/receiver.c rsync-3.0.7.new/receiver.c
--- rsync-3.0.7/receiver.c	2009-04-12 20:48:59.000000000 +0100
+++ rsync-3.0.7.new/receiver.c	2010-06-15 15:13:13.577550534 +0100
@@ -49,6 +49,7 @@ extern int keep_partial;
 extern int checksum_seed;
 extern int inplace;
 extern int delay_updates;
+extern int copy_devices;
 extern mode_t orig_umask;
 extern struct stats stats;
 extern char *tmpdir;
@@ -286,7 +287,7 @@ static int receive_data(int f_in, char *
 
 #ifdef HAVE_FTRUNCATE
 	if (inplace && fd != -1
-	 && ftruncate(fd, offset) < 0) {
+	 && offset < size_r && ftruncate(fd, offset) < 0) {
 		rsyserr(FERROR_XFER, errno, "ftruncate failed on %s",
 			full_fname(fname));
 	}
@@ -668,11 +669,25 @@ int recv_files(int f_in, char *local_nam
 			continue;
 		}
 
-		if (fd1 != -1 && !S_ISREG(st.st_mode)) {
+		if (fd1 != -1 && !(S_ISREG(st.st_mode) || (copy_devices &&
IS_DEVICE(st.st_mode)))) {
 			close(fd1);
 			fd1 = -1;
 		}
 
+		/* On Linux systems (at least), st_size is typically 0 for devices.
+		 * If so, try to determine the actual device size. */
+		if (fd1 != -1 && IS_DEVICE(st.st_mode) && st.st_size == 0) {
+			OFF_T off = lseek(fd1, 0, SEEK_END);
+			if (off == (OFF_T) -1)
+				rsyserr(FERROR, errno, "failed to seek to end of %s to determine
size", fname);
+			else {
+				st.st_size = off;
+				off = lseek(fd1, 0, SEEK_SET);
+				if (off != 0)
+					rsyserr(FERROR, errno, "failed to seek back to beginning of %s to
read it", fname);
+			}
+		}
+
 		/* If we're not preserving permissions, change the file-list's
 		 * mode based on the local permissions and some heuristics. */
 		if (!preserve_perms) {
diff -pur rsync-3.0.7/rsync.c rsync-3.0.7.new/rsync.c
--- rsync-3.0.7/rsync.c	2009-12-19 21:39:49.000000000 +0000
+++ rsync-3.0.7.new/rsync.c	2010-06-14 10:24:49.339960221 +0100
@@ -34,6 +34,7 @@ extern int preserve_xattrs;
 extern int preserve_perms;
 extern int preserve_executability;
 extern int preserve_times;
+extern int copy_devices;
 extern int am_root;
 extern int am_server;
 extern int am_sender;
@@ -328,7 +329,8 @@ int read_ndx_and_attrs(int f_in, int *if
 
 	if (iflags & ITEM_TRANSFER) {
 		int i = ndx - cur_flist->ndx_start;
-		if (i < 0 || !S_ISREG(cur_flist->files[i]->mode)) {
+		struct file_struct *file = cur_flist->files[i];
+		if (i < 0 || !(S_ISREG(file->mode) || (copy_devices &&
IS_DEVICE(file->mode)))) {
 			rprintf(FERROR,
 				"received request to transfer non-regular file: %d [%s]\n",
 				ndx, who_am_i());
diff -pur rsync-3.0.7/sender.c rsync-3.0.7.new/sender.c
--- rsync-3.0.7/sender.c	2009-12-13 01:23:03.000000000 +0000
+++ rsync-3.0.7.new/sender.c	2010-06-15 15:09:50.657553229 +0100
@@ -309,6 +309,20 @@ void send_files(int f_in, int f_out)
 			exit_cleanup(RERR_PROTOCOL);
 		}
 
+		/* On Linux systems (at least), st_size is typically 0 for devices.
+		 * If so, try to determine the actual device size. */
+		if (IS_DEVICE(st.st_mode) && st.st_size == 0) {
+			OFF_T off = lseek(fd, 0, SEEK_END);
+			if (off == (OFF_T) -1)
+				rsyserr(FERROR, errno, "failed to seek to end of %s to determine
size", fname);
+			else {
+				st.st_size = off;
+				off = lseek(fd, 0, SEEK_SET);
+				if (off != 0)
+					rsyserr(FERROR, errno, "failed to seek back to beginning of %s to
read it", fname);
+			}
+		}
+
 		if (st.st_size) {
 			int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE);
 			mbuf = map_file(fd, st.st_size, read_size, s->blength);
So this is an interesting problem to think about. It seems to take this form: - Data comes in fixed-size blocks - Blocks may be copied elsewhere - Blocks may be modified in place - Blocks may be copied with modification BUT - There are no insertions/deletions - The overall file does not change in size (or does so in terms of whole blocks) This suggests restarting the "match with preceding data in the same file" algorithm at each block boundary. Also, it could match either data at lower address or higher addresses, but assuming we progress from lower to higher, the "lower address" data obviously has to be from the new version of the file and "higher address" data from the old version. A simple form of the algorithm would attempt to produce, in order, a directive for each block that would (a) leave the block as is, (b) copy the block from another block on the destination, or (c) provide new data for the block. Case (c) could be further refined to analyze the block using the traditional rsync algorithm to reduce the number of bytes transferred. While rsync obviously works for arbitrary data, it is organized to detect similarities at any offset, and thus to handle insertion/deletion of bytes well. I further observe that *in place* update, if performed in a predetermined order, such as low to high addresses, deals less efficiently with some shufflings of data than with others. In particular, if an application copies from data from low addresses to high ones, and the algorithm also processes in that same order, then the copied data will have to be resynthesized/transferred, since the old copy will have been written over at the destination before we get to place for the new copy. It would be possible in principle to do in-place update *and* to use a scratch temp file at the destination to hold data that you were about to overwrite and will want later in the process. This avoid network bandwidth at the cost of I/O bandwidth / space at the destination. I think the maximum space required is half the size of the old version of the file at the destination, though the simplest use of the temp file might lead to an upper bound of the size of the target file (imagine a cyclic shuffling of blocks, where all but the last move up one "slot" and the last moves to the first slot). Maybe rsync already does this? I don't know enough about the internals of rsync to know what all this might suggest ... Regards -- Eliot Moss
Hi, On Tue, Jun 15, 2010 at 04:12:18PM +0100, Stephane Chazelas wrote:> Hiya, > > I can see it's a regular subject on this list. > > I, like others wanted to use rsync to synchronise two block > devices (as it happens one lvm volume and one nbd device served > by qemu-img on a remote host from a qcow2 disk image so that I > can keep the old versions)I already wrote a patch, but without response there. http://lists.samba.org/archive/rsync/2010-January/024538.html Oh yeah, the patch is not in the archive! 8-) Ok, including it here now: <<<<<<<<<<<<<<<<<<<< snip >>>>>>>>>>>>>>>>>>>> diff --git a/flist.c b/flist.c index 6d450bf..68f1ea0 100644 --- a/flist.c +++ b/flist.c @@ -63,6 +63,7 @@ extern int non_perishable_cnt; extern int prune_empty_dirs; extern int copy_links; extern int copy_unsafe_links; +extern int rw_devices; extern int protocol_version; extern int sanitize_paths; extern int munge_symlinks; @@ -228,7 +229,7 @@ static int readlink_stat(const char *path, STRUCT_STAT *stp, char *linkbuf) #endif } -int link_stat(const char *path, STRUCT_STAT *stp, int follow_dirlinks) +static int link_stat2(const char *path, STRUCT_STAT *stp, int follow_dirlinks) { #ifdef SUPPORT_LINKS if (copy_links) @@ -246,6 +247,28 @@ int link_stat(const char *path, STRUCT_STAT *stp, int follow_dirlinks) #endif } +int link_stat(const char *path, STRUCT_STAT *stp, int follow_dirlinks) +{ + if (link_stat2(path, stp, follow_dirlinks) != 0) + return -1; + if (rw_devices && S_ISBLK(stp->st_mode) && stp->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(path, O_RDONLY, 0); + if (fdx == -1) + rsyserr(FERROR, errno, "failed to open device %s to determine size", path); + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", path); + else + stp->st_size = off; + close(fdx); + } + } + return 0; +} + static inline int is_daemon_excluded(const char *fname, int is_dir) { if (daemon_filter_list.head @@ -671,7 +694,7 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file, #endif strlcpy(lastname, fname, MAXPATHLEN); - if (S_ISREG(mode) || S_ISLNK(mode)) + if (S_ISREG(mode) || S_ISLNK(mode) || (rw_devices && S_ISBLK(mode))) stats.total_size += F_LENGTH(file); } @@ -1351,7 +1374,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, #ifdef HAVE_STRUCT_STAT_ST_RDEV if (IS_DEVICE(st.st_mode)) { tmp_rdev = st.st_rdev; - st.st_size = 0; + if (!rw_devices || !S_ISBLK(st.st_mode)) + st.st_size = 0; } else if (IS_SPECIAL(st.st_mode)) st.st_size = 0; #endif diff --git a/generator.c b/generator.c index e7c1ef7..78169ef 100644 --- a/generator.c +++ b/generator.c @@ -39,6 +39,7 @@ extern int preserve_acls; extern int preserve_xattrs; extern int preserve_links; extern int preserve_devices; +extern int rw_devices; extern int preserve_specials; extern int preserve_hard_links; extern int preserve_executability; @@ -1181,6 +1182,23 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir); stat_errno = errno; + + if (statret == 0 && IS_DEVICE(sx.st.st_mode) && sx.st.st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(fname, O_RDONLY, 0); + if ( fdx == -1 ) { + rsyserr(FERROR, errno, "failed to open device %s to determine size", fname); + } + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else + sx.st.st_size = off; + close(fdx); + } + } } if (missing_args == 2 && file->mode == 0) { @@ -1494,7 +1512,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, goto cleanup; } - if (!S_ISREG(file->mode)) { + if (!(S_ISREG(file->mode) || (rw_devices && IS_DEVICE(file->mode)))) { if (solo_file) fname = f_name(file, NULL); rprintf(FINFO, "skipping non-regular file \"%s\"\n", fname); @@ -1532,7 +1550,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, fnamecmp = fname; fnamecmp_type = FNAMECMP_FNAME; - if (statret == 0 && !S_ISREG(sx.st.st_mode)) { + if (statret == 0 && !(S_ISREG(sx.st.st_mode) || (rw_devices && IS_DEVICE(sx.st.st_mode)))) { if (delete_item(fname, sx.st.st_mode, del_opts | DEL_FOR_FILE) != 0) goto cleanup; statret = -1; diff --git a/options.c b/options.c index 6ee3e01..0d60293 100644 --- a/options.c +++ b/options.c @@ -48,6 +48,7 @@ int append_mode = 0; int keep_dirlinks = 0; int copy_dirlinks = 0; int copy_links = 0; +int rw_devices = 0; int preserve_links = 0; int preserve_hard_links = 0; int preserve_acls = 0; @@ -695,6 +696,7 @@ void usage(enum logcode F) rprintf(F," -o, --owner preserve owner (super-user only)\n"); rprintf(F," -g, --group preserve group\n"); rprintf(F," --devices preserve device files (super-user only)\n"); + rprintf(F," --rw-devices read/write device contents as regular file (implies --inplace)\n"); rprintf(F," --specials preserve special files\n"); rprintf(F," -D same as --devices --specials\n"); rprintf(F," -t, --times preserve modification times\n"); @@ -863,6 +865,7 @@ static struct poptOption long_options[] = { {"no-D", 0, POPT_ARG_NONE, 0, OPT_NO_D, 0, 0 }, {"devices", 0, POPT_ARG_VAL, &preserve_devices, 1, 0, 0 }, {"no-devices", 0, POPT_ARG_VAL, &preserve_devices, 0, 0, 0 }, + {"rw-devices", 0, POPT_ARG_NONE, &rw_devices, 0, 0, 0 }, {"specials", 0, POPT_ARG_VAL, &preserve_specials, 1, 0, 0 }, {"no-specials", 0, POPT_ARG_VAL, &preserve_specials, 0, 0, 0 }, {"links", 'l', POPT_ARG_VAL, &preserve_links, 1, 0, 0 }, @@ -1801,6 +1804,11 @@ int parse_arguments(int *argc_p, const char ***argv_p) set_output_verbosity(verbose, DEFAULT_PRIORITY); + if (rw_devices) { + inplace = 1; + ignore_times = 1; + } + if (do_stats) { parse_output_words(info_words, info_levels, verbose > 1 ? "stats3" : "stats2", DEFAULT_PRIORITY); @@ -2661,6 +2669,9 @@ void server_options(char **args, int *argc_p) else if (remove_source_files) args[ac++] = "--remove-sent-files"; + if (rw_devices) + args[ac++] = "--rw-devices"; + if (ac > MAX_SERVER_ARGS) { /* Not possible... */ rprintf(FERROR, "argc overflow in server_options().\n"); exit_cleanup(RERR_MALLOC); diff --git a/receiver.c b/receiver.c index 6688dda..ec11802 100644 --- a/receiver.c +++ b/receiver.c @@ -37,6 +37,7 @@ extern int protocol_version; extern int relative_paths; extern int preserve_hard_links; extern int preserve_perms; +extern int rw_devices; extern int preserve_xattrs; extern int basis_dir_cnt; extern int make_backups; @@ -199,6 +200,7 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file) static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, const char *fname, int fd, OFF_T total_size) { + STRUCT_STAT st; static char file_sum1[MAX_DIGEST_LEN]; struct map_struct *mapbuf; struct sum_struct sum; @@ -321,10 +323,14 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, goto report_write_error; #ifdef HAVE_FTRUNCATE - if (inplace && fd != -1 - && ftruncate(fd, offset) < 0) { - rsyserr(FERROR_XFER, errno, "ftruncate failed on %s", - full_fname(fname)); + (void)do_fstat(fd,&st); + /* Makes no sense to attempt to ftruncate() a block device: */ + if (!(IS_DEVICE(st.st_mode))) { + if (inplace && fd != -1 + && ftruncate(fd, offset) < 0) { + rsyserr(FERROR_XFER, errno, "ftruncate failed on %s", + full_fname(fname)); + } } #endif @@ -732,11 +738,25 @@ int recv_files(int f_in, int f_out, char *local_name) continue; } - if (fd1 != -1 && !S_ISREG(st.st_mode)) { + if (fd1 != -1 && !(S_ISREG(st.st_mode) || (rw_devices && IS_DEVICE(st.st_mode)))) { close(fd1); fd1 = -1; } + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + if (fd1 != -1 && IS_DEVICE(st.st_mode) && st.st_size == 0) { + OFF_T off = lseek(fd1, 0, SEEK_END); + if (off == (OFF_T) -1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else { + st.st_size = off; + off = lseek(fd1, 0, SEEK_SET); + if (off != 0) + rsyserr(FERROR, errno, "failed to seek back to beginning of %s to read it", fname); + } + } + /* If we're not preserving permissions, change the file-list's * mode based on the local permissions and some heuristics. */ if (!preserve_perms) { diff --git a/rsync.c b/rsync.c index cefbe5f..6b6ffea 100644 --- a/rsync.c +++ b/rsync.c @@ -33,6 +33,7 @@ extern int preserve_xattrs; extern int preserve_perms; extern int preserve_executability; extern int preserve_times; +extern int rw_devices; extern int am_root; extern int am_server; extern int am_sender; @@ -397,7 +398,8 @@ int read_ndx_and_attrs(int f_in, int f_out, int *iflag_ptr, uchar *type_ptr, if (iflags & ITEM_TRANSFER) { int i = ndx - cur_flist->ndx_start; - if (i < 0 || !S_ISREG(cur_flist->files[i]->mode)) { + struct file_struct *file = cur_flist->files[i]; + if (i < 0 || !(S_ISREG(file->mode) || (rw_devices && IS_DEVICE(file->mode)))) { rprintf(FERROR, "received request to transfer non-regular file: %d [%s]\n", ndx, who_am_i()); diff --git a/syscall.c b/syscall.c index aba0009..7096110 100644 --- a/syscall.c +++ b/syscall.c @@ -36,6 +36,7 @@ extern int read_only; extern int list_only; extern int preserve_perms; extern int preserve_executability; +extern int rw_devices; #define RETURN_ERROR_IF(x,e) \ do { \ @@ -286,20 +287,56 @@ int do_mkstemp(char *template, mode_t perms) int do_stat(const char *fname, STRUCT_STAT *st) { #ifdef USE_STAT64_FUNCS - return stat64(fname, st); + if (stat64(fname, st) != 0) + return -1; #else - return stat(fname, st); + if (stat(fname, st) != 0) + return -1; #endif + if (rw_devices && S_ISBLK(st->st_mode) && st->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(fname, O_RDONLY, 0); + if (fdx == -1) + rsyserr(FERROR, errno, "failed to open device %s to determine size", fname); + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else + st->st_size = off; + close(fdx); + } + } + return 0; } int do_lstat(const char *fname, STRUCT_STAT *st) { #ifdef SUPPORT_LINKS # ifdef USE_STAT64_FUNCS - return lstat64(fname, st); + if (lstat64(fname, st) != 0) + return -1; # else - return lstat(fname, st); + if (lstat(fname, st) != 0) + return -1; # endif + if (rw_devices && S_ISBLK(st->st_mode) && st->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(fname, O_RDONLY, 0); + if (fdx == -1) + rsyserr(FERROR, errno, "failed to open device %s to determine size", fname); + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else + st->st_size = off; + close(fdx); + } + } + return 0; #else return do_stat(fname, st); #endif @@ -308,10 +345,30 @@ int do_lstat(const char *fname, STRUCT_STAT *st) int do_fstat(int fd, STRUCT_STAT *st) { #ifdef USE_STAT64_FUNCS - return fstat64(fd, st); + if (fstat64(fd, st) != 0) + return -1; #else - return fstat(fd, st); + if (fstat(fd, st) != 0) + return -1; #endif + if (rw_devices && S_ISBLK(st->st_mode) && st->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + OFF_T off_save = lseek(fd, 0, SEEK_CUR); + if (off_save == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek on device inode %lld to read current position", (long long int)(st->st_ino)); + else { + OFF_T off = lseek(fd, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end on device inode %lld to determine size", (long long int)(st->st_ino)); + else + st->st_size = off; + off = lseek(fd, off_save, SEEK_SET); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to origin position on device inode %lld", (long long int)(st->st_ino)); + } + } + return 0; } OFF_T do_lseek(int fd, OFF_T offset, int whence) <<<<<<<<<<<<<<<<<<<< snip >>>>>>>>>>>>>>>>>>>> Can someone review a patch and include it somewhere? Thanks -- Zito
Hi, On Tue, Jun 15, 2010 at 04:12:18PM +0100, Stephane Chazelas wrote:> Hiya, > > I can see it's a regular subject on this list. > > I, like others wanted to use rsync to synchronise two block > devices (as it happens one lvm volume and one nbd device served > by qemu-img on a remote host from a qcow2 disk image so that I > can keep the old versions)I already wrote a patch, but without response there. http://lists.samba.org/archive/rsync/2010-January/024538.html Oh yeah, the patch is not in the archive! 8-) Ok, including it here now: <<<<<<<<<<<<<<<<<<<< snip >>>>>>>>>>>>>>>>>>>> diff --git a/flist.c b/flist.c index 6d450bf..68f1ea0 100644 --- a/flist.c +++ b/flist.c @@ -63,6 +63,7 @@ extern int non_perishable_cnt; extern int prune_empty_dirs; extern int copy_links; extern int copy_unsafe_links; +extern int rw_devices; extern int protocol_version; extern int sanitize_paths; extern int munge_symlinks; @@ -228,7 +229,7 @@ static int readlink_stat(const char *path, STRUCT_STAT *stp, char *linkbuf) #endif } -int link_stat(const char *path, STRUCT_STAT *stp, int follow_dirlinks) +static int link_stat2(const char *path, STRUCT_STAT *stp, int follow_dirlinks) { #ifdef SUPPORT_LINKS if (copy_links) @@ -246,6 +247,28 @@ int link_stat(const char *path, STRUCT_STAT *stp, int follow_dirlinks) #endif } +int link_stat(const char *path, STRUCT_STAT *stp, int follow_dirlinks) +{ + if (link_stat2(path, stp, follow_dirlinks) != 0) + return -1; + if (rw_devices && S_ISBLK(stp->st_mode) && stp->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(path, O_RDONLY, 0); + if (fdx == -1) + rsyserr(FERROR, errno, "failed to open device %s to determine size", path); + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", path); + else + stp->st_size = off; + close(fdx); + } + } + return 0; +} + static inline int is_daemon_excluded(const char *fname, int is_dir) { if (daemon_filter_list.head @@ -671,7 +694,7 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file, #endif strlcpy(lastname, fname, MAXPATHLEN); - if (S_ISREG(mode) || S_ISLNK(mode)) + if (S_ISREG(mode) || S_ISLNK(mode) || (rw_devices && S_ISBLK(mode))) stats.total_size += F_LENGTH(file); } @@ -1351,7 +1374,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, #ifdef HAVE_STRUCT_STAT_ST_RDEV if (IS_DEVICE(st.st_mode)) { tmp_rdev = st.st_rdev; - st.st_size = 0; + if (!rw_devices || !S_ISBLK(st.st_mode)) + st.st_size = 0; } else if (IS_SPECIAL(st.st_mode)) st.st_size = 0; #endif diff --git a/generator.c b/generator.c index e7c1ef7..78169ef 100644 --- a/generator.c +++ b/generator.c @@ -39,6 +39,7 @@ extern int preserve_acls; extern int preserve_xattrs; extern int preserve_links; extern int preserve_devices; +extern int rw_devices; extern int preserve_specials; extern int preserve_hard_links; extern int preserve_executability; @@ -1181,6 +1182,23 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir); stat_errno = errno; + + if (statret == 0 && IS_DEVICE(sx.st.st_mode) && sx.st.st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(fname, O_RDONLY, 0); + if ( fdx == -1 ) { + rsyserr(FERROR, errno, "failed to open device %s to determine size", fname); + } + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else + sx.st.st_size = off; + close(fdx); + } + } } if (missing_args == 2 && file->mode == 0) { @@ -1494,7 +1512,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, goto cleanup; } - if (!S_ISREG(file->mode)) { + if (!(S_ISREG(file->mode) || (rw_devices && IS_DEVICE(file->mode)))) { if (solo_file) fname = f_name(file, NULL); rprintf(FINFO, "skipping non-regular file \"%s\"\n", fname); @@ -1532,7 +1550,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, fnamecmp = fname; fnamecmp_type = FNAMECMP_FNAME; - if (statret == 0 && !S_ISREG(sx.st.st_mode)) { + if (statret == 0 && !(S_ISREG(sx.st.st_mode) || (rw_devices && IS_DEVICE(sx.st.st_mode)))) { if (delete_item(fname, sx.st.st_mode, del_opts | DEL_FOR_FILE) != 0) goto cleanup; statret = -1; diff --git a/options.c b/options.c index 6ee3e01..0d60293 100644 --- a/options.c +++ b/options.c @@ -48,6 +48,7 @@ int append_mode = 0; int keep_dirlinks = 0; int copy_dirlinks = 0; int copy_links = 0; +int rw_devices = 0; int preserve_links = 0; int preserve_hard_links = 0; int preserve_acls = 0; @@ -695,6 +696,7 @@ void usage(enum logcode F) rprintf(F," -o, --owner preserve owner (super-user only)\n"); rprintf(F," -g, --group preserve group\n"); rprintf(F," --devices preserve device files (super-user only)\n"); + rprintf(F," --rw-devices read/write device contents as regular file (implies --inplace)\n"); rprintf(F," --specials preserve special files\n"); rprintf(F," -D same as --devices --specials\n"); rprintf(F," -t, --times preserve modification times\n"); @@ -863,6 +865,7 @@ static struct poptOption long_options[] = { {"no-D", 0, POPT_ARG_NONE, 0, OPT_NO_D, 0, 0 }, {"devices", 0, POPT_ARG_VAL, &preserve_devices, 1, 0, 0 }, {"no-devices", 0, POPT_ARG_VAL, &preserve_devices, 0, 0, 0 }, + {"rw-devices", 0, POPT_ARG_NONE, &rw_devices, 0, 0, 0 }, {"specials", 0, POPT_ARG_VAL, &preserve_specials, 1, 0, 0 }, {"no-specials", 0, POPT_ARG_VAL, &preserve_specials, 0, 0, 0 }, {"links", 'l', POPT_ARG_VAL, &preserve_links, 1, 0, 0 }, @@ -1801,6 +1804,11 @@ int parse_arguments(int *argc_p, const char ***argv_p) set_output_verbosity(verbose, DEFAULT_PRIORITY); + if (rw_devices) { + inplace = 1; + ignore_times = 1; + } + if (do_stats) { parse_output_words(info_words, info_levels, verbose > 1 ? "stats3" : "stats2", DEFAULT_PRIORITY); @@ -2661,6 +2669,9 @@ void server_options(char **args, int *argc_p) else if (remove_source_files) args[ac++] = "--remove-sent-files"; + if (rw_devices) + args[ac++] = "--rw-devices"; + if (ac > MAX_SERVER_ARGS) { /* Not possible... */ rprintf(FERROR, "argc overflow in server_options().\n"); exit_cleanup(RERR_MALLOC); diff --git a/receiver.c b/receiver.c index 6688dda..ec11802 100644 --- a/receiver.c +++ b/receiver.c @@ -37,6 +37,7 @@ extern int protocol_version; extern int relative_paths; extern int preserve_hard_links; extern int preserve_perms; +extern int rw_devices; extern int preserve_xattrs; extern int basis_dir_cnt; extern int make_backups; @@ -199,6 +200,7 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file) static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, const char *fname, int fd, OFF_T total_size) { + STRUCT_STAT st; static char file_sum1[MAX_DIGEST_LEN]; struct map_struct *mapbuf; struct sum_struct sum; @@ -321,10 +323,14 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, goto report_write_error; #ifdef HAVE_FTRUNCATE - if (inplace && fd != -1 - && ftruncate(fd, offset) < 0) { - rsyserr(FERROR_XFER, errno, "ftruncate failed on %s", - full_fname(fname)); + (void)do_fstat(fd,&st); + /* Makes no sense to attempt to ftruncate() a block device: */ + if (!(IS_DEVICE(st.st_mode))) { + if (inplace && fd != -1 + && ftruncate(fd, offset) < 0) { + rsyserr(FERROR_XFER, errno, "ftruncate failed on %s", + full_fname(fname)); + } } #endif @@ -732,11 +738,25 @@ int recv_files(int f_in, int f_out, char *local_name) continue; } - if (fd1 != -1 && !S_ISREG(st.st_mode)) { + if (fd1 != -1 && !(S_ISREG(st.st_mode) || (rw_devices && IS_DEVICE(st.st_mode)))) { close(fd1); fd1 = -1; } + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + if (fd1 != -1 && IS_DEVICE(st.st_mode) && st.st_size == 0) { + OFF_T off = lseek(fd1, 0, SEEK_END); + if (off == (OFF_T) -1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else { + st.st_size = off; + off = lseek(fd1, 0, SEEK_SET); + if (off != 0) + rsyserr(FERROR, errno, "failed to seek back to beginning of %s to read it", fname); + } + } + /* If we're not preserving permissions, change the file-list's * mode based on the local permissions and some heuristics. */ if (!preserve_perms) { diff --git a/rsync.c b/rsync.c index cefbe5f..6b6ffea 100644 --- a/rsync.c +++ b/rsync.c @@ -33,6 +33,7 @@ extern int preserve_xattrs; extern int preserve_perms; extern int preserve_executability; extern int preserve_times; +extern int rw_devices; extern int am_root; extern int am_server; extern int am_sender; @@ -397,7 +398,8 @@ int read_ndx_and_attrs(int f_in, int f_out, int *iflag_ptr, uchar *type_ptr, if (iflags & ITEM_TRANSFER) { int i = ndx - cur_flist->ndx_start; - if (i < 0 || !S_ISREG(cur_flist->files[i]->mode)) { + struct file_struct *file = cur_flist->files[i]; + if (i < 0 || !(S_ISREG(file->mode) || (rw_devices && IS_DEVICE(file->mode)))) { rprintf(FERROR, "received request to transfer non-regular file: %d [%s]\n", ndx, who_am_i()); diff --git a/syscall.c b/syscall.c index aba0009..7096110 100644 --- a/syscall.c +++ b/syscall.c @@ -36,6 +36,7 @@ extern int read_only; extern int list_only; extern int preserve_perms; extern int preserve_executability; +extern int rw_devices; #define RETURN_ERROR_IF(x,e) \ do { \ @@ -286,20 +287,56 @@ int do_mkstemp(char *template, mode_t perms) int do_stat(const char *fname, STRUCT_STAT *st) { #ifdef USE_STAT64_FUNCS - return stat64(fname, st); + if (stat64(fname, st) != 0) + return -1; #else - return stat(fname, st); + if (stat(fname, st) != 0) + return -1; #endif + if (rw_devices && S_ISBLK(st->st_mode) && st->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(fname, O_RDONLY, 0); + if (fdx == -1) + rsyserr(FERROR, errno, "failed to open device %s to determine size", fname); + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else + st->st_size = off; + close(fdx); + } + } + return 0; } int do_lstat(const char *fname, STRUCT_STAT *st) { #ifdef SUPPORT_LINKS # ifdef USE_STAT64_FUNCS - return lstat64(fname, st); + if (lstat64(fname, st) != 0) + return -1; # else - return lstat(fname, st); + if (lstat(fname, st) != 0) + return -1; # endif + if (rw_devices && S_ISBLK(st->st_mode) && st->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + int fdx = do_open(fname, O_RDONLY, 0); + if (fdx == -1) + rsyserr(FERROR, errno, "failed to open device %s to determine size", fname); + else { + OFF_T off = lseek(fdx, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end of %s to determine size", fname); + else + st->st_size = off; + close(fdx); + } + } + return 0; #else return do_stat(fname, st); #endif @@ -308,10 +345,30 @@ int do_lstat(const char *fname, STRUCT_STAT *st) int do_fstat(int fd, STRUCT_STAT *st) { #ifdef USE_STAT64_FUNCS - return fstat64(fd, st); + if (fstat64(fd, st) != 0) + return -1; #else - return fstat(fd, st); + if (fstat(fd, st) != 0) + return -1; #endif + if (rw_devices && S_ISBLK(st->st_mode) && st->st_size == 0) { + /* On Linux systems (at least), st_size is typically 0 for devices. + * If so, try to determine the actual device size. */ + OFF_T off_save = lseek(fd, 0, SEEK_CUR); + if (off_save == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek on device inode %lld to read current position", (long long int)(st->st_ino)); + else { + OFF_T off = lseek(fd, 0, SEEK_END); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to end on device inode %lld to determine size", (long long int)(st->st_ino)); + else + st->st_size = off; + off = lseek(fd, off_save, SEEK_SET); + if (off == (OFF_T)-1) + rsyserr(FERROR, errno, "failed to seek to origin position on device inode %lld", (long long int)(st->st_ino)); + } + } + return 0; } OFF_T do_lseek(int fd, OFF_T offset, int whence) <<<<<<<<<<<<<<<<<<<< snip >>>>>>>>>>>>>>>>>>>> Can someone review a patch and include it somewhere? Thanks -- Zito