Recently various needs for multiple version handling were discussed and I put forward a plan of mine. Subsequently the proposal for a --compare-file=<FILE> switch had support, so I have implemented this. I have also implemented an experimental --compare-auto which decides which file to match against using a rule. Instructions for patch: 1. Install rsync-2.5.6 source 2. "patch -p1 < rsync-2.5.6-arh1.patch" (the code below) 3. edit configure to add "arh1" to the RSYNC_VERSION string and run ./configure, or if you've already run this, edit config.h to add "arh1" to the RSYNC_VERSION string. 4. "make proto" - to update proto.h file 5. "make" Here's rsync-2.5.6-arh1.patch: -----cut here----- diff -aur rsync-2.5.6/generator.c rsync-arh/generator.c --- rsync-2.5.6/generator.c Thu Aug 29 14:44:55 2002 +++ rsync-arh/generator.c Fri Oct 17 15:48:56 2003 @@ -5,6 +5,7 @@ Copyright (C) 1996-2000 by Andrew Tridgell Copyright (C) Paul Mackerras 1996 Copyright (C) 2002 by Martin Pool <mbp@samba.org> + Copyright (C) 2003, Andy Henson, Zexia Access Ltd This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,6 +42,8 @@ extern int always_checksum; extern int modify_window; extern char *compare_dest; +extern char *compare_file; +extern int compare_auto; extern int link_dest; @@ -357,29 +360,36 @@ fnamecmp = fname; - if ((statret == -1) && (compare_dest != NULL)) { - /* try the file at compare_dest instead */ + if ((statret == -1) && compare_auto) { + compare_file = findcomparename(fname,fnamecmpbuf); + } else if ((statret == -1) && (compare_dest != NULL)) { + snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s", + compare_dest,fname); + compare_file = fnamecmpbuf; + } + + if ((statret == -1) && (compare_file != NULL)) { + /*try this file instead (--compare-dest, --compare-file, --compare-auto)*/ int saveerrno = errno; - snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname); - statret = link_stat(fnamecmpbuf,&st); + statret = link_stat(compare_file,&st); if (!S_ISREG(st.st_mode)) statret = -1; if (statret == -1) errno = saveerrno; #if HAVE_LINK else if (link_dest && !dry_run) { - if (do_link(fnamecmpbuf, fname) != 0) { + if (do_link(compare_file, fname) != 0) { if (verbose > 0) rprintf(FINFO,"link %s => %s : %s\n", - fnamecmpbuf, + compare_file, fname, strerror(errno)); } - fnamecmp = fnamecmpbuf; + fnamecmp = compare_file; } #endif else - fnamecmp = fnamecmpbuf; + fnamecmp = compare_file; } if (statret == -1) { @@ -534,3 +544,86 @@ write_int(f,-1); } } + + + +char * findcomparename(const char* fname, char* buf) + /* returns compare name, a valid file with name similar to @param fname. + * Implements the --compare-auto name function. + * May use @param buf as buffer for the name (size is MAXPATHLEN). */ + + /* The algorithm: scans the directory for filenames where the names + match once version information is stripped out. Version information + is assumed to be digits after one of - . ; and it continues until + either . and non-digit or - and non-digit, t, p, r. This rather + odd rule permits 2.4-test2, 2.4-rc4, 2.4-pre3 to be ignored as versions. + Finally it selects the most recent of these which has a size no smaller + than 90% of the biggest of any of them. + I acknowlege these are pretty arbitrary rules - arh 17 October 2003 */ +{ + char newname[MAXPATHLEN]; + char tmpname[MAXPATHLEN]; + time_t newtime=0; + size_t newsize=0; + struct dirent *di; + DIR *d; + char* dirname; + char *name; + + strncpy(buf,fname,MAXPATHLEN); + dirname = buf; + name = strrchr(buf,'/'); + if (name) + *name++ = 0; //terminate name at end of directory part + else { + name = (char*)fname; + dirname = "."; + } + if (compare_dest) + dirname = compare_dest; + if (verbose > 1) + rprintf(FINFO,"findcomparename: dir %s name %s\n",dirname,name); + d = opendir(dirname); + if (d) { + for (di = readdir(d); di; di = readdir(d)) { + char *dname = d_name(di); + char *p = name; + char *q = dname; + STRUCT_STAT st; + /* are files version-stripped names identical? */ + while (*p && *q && *p==*q) { + if ((*p=='-'||*p=='.'||*p==';') && isdigit(p[1])) /* skip version part */ + do ++p; while (*p && (*p!='-' || isdigit(p[1])||p[1]=='t'||p[1]=='p'||p[1]=='r') && (*p!='.' || isdigit(p[1]))); + if ((*q=='-'||*q=='.'||*q==';') && isdigit(q[1])) /* skip version part */ + do ++q; while (*q && (*q!='-' || isdigit(q[1])||q[1]=='t'||q[1]=='p'||q[1]=='r') && (*q!='.' || isdigit(q[1]))); + ++p; + ++q; + } + if (*p != *q) + continue; /* not identical */ + /* identical: take best fit */ + if (verbose > 1) + rprintf(FINFO,"findcomparename: candidate %s\n",dname); + strcpy(tmpname,dirname); + strcat(tmpname,"/"); + strncat(tmpname,dname,MAXPATHLEN-strlen(tmpname)); + tmpname[MAXPATHLEN-1]=0; + (void) link_stat(tmpname,&st); + if ((st.st_size >= newsize*9/10 && st.st_ctime >= newtime) || st.st_size >= newsize*10/9) + { + newsize=st.st_size; + newtime=st.st_ctime; + strcpy(newname,tmpname); + } + } + closedir(d); + if (newsize > 0) /* ie, if we found one... */ + { + strcpy(buf,newname); + if (verbose > 1) + rprintf(FINFO,"findcomparename: chose %s\n",buf); + return buf; + } + } + return 0; +} diff -aur rsync-2.5.6/options.c rsync-arh/options.c --- rsync-2.5.6/options.c Tue Jan 28 03:11:57 2003 +++ rsync-arh/options.c Fri Oct 17 15:28:18 2003 @@ -2,6 +2,7 @@ * * Copyright (C) 1998-2001 by Andrew Tridgell <tridge@samba.org> * Copyright (C) 2000, 2001, 2002 by Martin Pool <mbp@samba.org> + * Copyright (C) 2003, Andy Henson, Zexia Access Ltd <andy.31016@zexia.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -83,6 +84,7 @@ int ignore_errors=0; int modify_window=0; int blocking_io=-1; +int compare_auto=0; /** Network address family. **/ @@ -104,6 +106,7 @@ char *backup_suffix = BACKUP_SUFFIX; char *tmpdir = NULL; char *compare_dest = NULL; +char *compare_file = NULL; char *config_file = NULL; char *shell_cmd = NULL; char *log_format = NULL; @@ -155,7 +158,7 @@ rprintf(f, "%s version %s protocol version %d\n", RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION); rprintf(f, - "Copyright (C) 1996-2002 by Andrew Tridgell and others\n"); + "Copyright (C) 1996-2003 by Andrew Tridgell and others\n"); rprintf(f, "<http://rsync.samba.org/>\n"); rprintf(f, "Capabilities: %d-bit files, %ssocketpairs, " "%shard links, %ssymlinks, batchfiles, \n", @@ -247,8 +250,10 @@ rprintf(F," -I, --ignore-times don't exclude files that match length and time\n"); rprintf(F," --size-only only use file size when determining if a file should be transferred\n"); rprintf(F," --modify-window=NUM Timestamp window (seconds) for file match (default=%d)\n",modify_window); - rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n"); + rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n"); rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); + rprintf(F," --compare-file=FILE also compare to FILE\n"); + rprintf(F," -A, --compare-auto also compare to automatically selected file\n"); rprintf(F," -P equivalent to --partial --progress\n"); rprintf(F," -z, --compress compress file data\n"); rprintf(F," --exclude=PATTERN exclude files matching PATTERN\n"); @@ -345,6 +350,8 @@ {"timeout", 0, POPT_ARG_INT, &io_timeout , 0, 0, 0 }, {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir , 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, &compare_dest , 0, 0, 0 }, + {"compare-file", 0, POPT_ARG_STRING, &compare_file , 0, 0, 0 }, + {"compare-auto", 'A', POPT_ARG_NONE, 0 , 'A', 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, /* TODO: Should this take an optional int giving the compression level? */ {"compress", 'z', POPT_ARG_NONE, &do_compression , 0, 0, 0 }, @@ -567,6 +574,10 @@ preserve_devices=1; break; + case 'A': + compare_auto=1; + break; + case OPT_SENDER: if (!am_server) { usage(FERROR); @@ -615,6 +626,13 @@ } } + if ((compare_dest||compare_auto) && compare_file) { + snprintf(err_buf,sizeof(err_buf), + "--compare-file cannot be used with another --compare-xxx option\n"); + rprintf(FERROR,"ERROR: --compare-file cannot be used with another --compare-xxx option\n"); + return 0; + } + if (write_batch && read_batch) { snprintf(err_buf,sizeof(err_buf), "write-batch and read-batch can not be used together\n"); @@ -841,6 +859,13 @@ args[ac++] = link_dest ? "--link-dest" : "--compare-dest"; args[ac++] = compare_dest; } + if (compare_file && am_sender) { + args[ac++] = "--compare-file"; + args[ac++] = compare_file; + } + if (compare_auto && am_sender) { + args[ac++] = "--compare-auto"; + } *argc = ac; } diff -aur rsync-2.5.6/proto.h rsync-arh/proto.h --- rsync-2.5.6/proto.h Mon Jan 27 03:35:09 2003 +++ rsync-arh/proto.h Fri Oct 17 08:26:52 2003 @@ -31,7 +31,7 @@ void sum_init(void); void sum_update(char *p, int len); void sum_end(char *sum); -void close_all(void); +void close_all(); void _exit_cleanup(int code, const char *file, int line); void cleanup_disable(void); void cleanup_set(char *fnametmp, char *fname, struct file_struct *file, @@ -93,6 +93,7 @@ char *f_name(struct file_struct *f); void recv_generator(char *fname, struct file_list *flist, int i, int f_out); void generate_files(int f,struct file_list *flist,char *local_name,int f_recv); +char * findcomparename(const char* fname, char* buf); int main(int argc, char *argv[]); void init_hard_links(struct file_list *flist); int check_hard_link(struct file_struct *file); @@ -224,7 +225,6 @@ OFF_T do_lseek(int fd, OFF_T offset, int whence); void *do_mmap(void *start, int len, int prot, int flags, int fd, OFF_T offset); char *d_name(struct dirent *di); -int main(int argc, char **argv); int main (int argc, char *argv[]); void set_compression(char *fname); void send_token(int f,int token,struct map_struct *buf,OFF_T offset, @@ -232,6 +232,7 @@ int recv_token(int f,char **data); void see_token(char *data, int toklen); int main(int argc, char **argv); +int main(int argc, char **argv); void add_uid(uid_t uid); void add_gid(gid_t gid); void send_uid_list(int f); diff -aur rsync-2.5.6/receiver.c rsync-arh/receiver.c --- rsync-2.5.6/receiver.c Mon Jan 20 23:32:17 2003 +++ rsync-arh/receiver.c Fri Oct 17 09:57:48 2003 @@ -2,6 +2,7 @@ Copyright (C) 1996-2000 by Andrew Tridgell Copyright (C) Paul Mackerras 1996 + Copyright (C) 2003, Andy Henson, Zexia Access Ltd This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,6 +35,8 @@ extern int io_error; extern char *tmpdir; extern char *compare_dest; +extern char *compare_file; +extern int compare_auto; extern int make_backups; extern char *backup_suffix; @@ -373,11 +376,17 @@ /* open the file */ fd1 = do_open(fnamecmp, O_RDONLY, 0); - if ((fd1 == -1) && (compare_dest != NULL)) { - /* try the file at compare_dest instead */ + if ((fd1 == -1) && compare_auto) { + compare_file = findcomparename(fname,fnamecmpbuf); + } else if ((fd1 == -1) && (compare_dest != NULL)) { snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s", compare_dest,fname); - fnamecmp = fnamecmpbuf; + compare_file = fnamecmpbuf; + } + + if ((fd1 == -1) && (compare_file != NULL)) { + /*try this file instead (--compare-dest, --compare-file, --compare-auto)*/ + fnamecmp = compare_file; fd1 = do_open(fnamecmp, O_RDONLY, 0); } @@ -446,6 +455,8 @@ if (!am_server) { log_transfer(file, fname); + if (compare_auto && (verbose > 1)) + rprintf(FINFO,"(compare-file %s)\n",fnamecmp); } /* recv file data */ diff -aur rsync-2.5.6/rsync.1 rsync-arh/rsync.1 --- rsync-2.5.6/rsync.1 Tue Jan 28 03:11:57 2003 +++ rsync-arh/rsync.1 Fri Oct 17 10:42:54 2003 @@ -364,6 +364,8 @@ --modify-window=NUM Timestamp window (seconds) for file match (default=0) -T --temp-dir=DIR create temporary files in directory DIR --compare-dest=DIR also compare destination files relative to DIR + --compare-file=FILE also compare to FILE + -A --compare-auto also compare to automatically selected file --link-dest=DIR create hardlinks to DIR for unchanged files -P equivalent to --partial --progress -z, --compress compress file data @@ -781,8 +783,8 @@ .IP "\fB--compare-dest=DIR\fP" This option instructs rsync to use DIR on the destination machine as an additional directory to compare destination -files against when doing transfers if the files are missing in the -destination directory\&. This is useful for doing transfers to a new +files against when doing transfers {if the files are missing in the +destination directory}\&. This is useful for doing transfers to a new destination while leaving existing files intact, and then doing a flash-cutover when all files have been successfully transferred (for example by moving directories around and removing the old directory, @@ -792,6 +794,18 @@ have a chance to be completed\&. If DIR is a relative path, it is relative to the destination directory\&. .IP +.IP "\fB--compare-file=FILE\fP" +This option instructs rsync to use FILE on the destination machine as +an additional file to compare against when doing transfers {if the +files are missing in the destination directory}\&. +.IP +.IP "\fB--compare-auto\fP" +This option instructs rsync to automatically select a file on the destination +machine to compare against when doing transfers {if the +files are missing in the destination directory}\&. The file will be selected +from the files in the same directory unless --compare-dest is used to select +another directory\&. +.IP .IP "\fB--link-dest=DIR\fP" This option behaves like \fB--compare-dest\fP but also will create hard links from \fIDIR\fP to the destination directory for -----cut here----- Andy Henson Zexia Access Ltd