Miroslav Rudisin
2003-Nov-17 03:55 UTC
[PATCH] --source-filter && --dest-filter for rsync 2.5.6
Hi, I needed to filter content of files (encrypt), before they are sent over the network to backup server. The easiest way to do this was modifying Kyle Jones's "--dest-filter" patch. Somebody was asking there this feature in the past, so I'm sending this patch to list. Implementation details: -filtering disables rsync alogrithm -source filter makes temporary files in /tmp (there should by enough disk space) -if source filter fails, data is send unfiltered -failure of destination filter, causes data loss!!! -if filter changes size of file, you should use --times-only option to prevent repeated transfers of unchanged files miEro diff -ur rsync-2.5.6/generator.c rsync-2.5.6-filtered/generator.c --- rsync-2.5.6/generator.c 2002-08-29 16:44:55.000000000 +0200 +++ rsync-2.5.6-filtered/generator.c 2003-11-16 13:57:54.000000000 +0100 @@ -35,6 +35,7 @@ extern int block_size; extern int csum_length; extern int ignore_times; +extern int times_only; extern int size_only; extern int io_timeout; extern int remote_version; @@ -48,8 +49,10 @@ static int skip_file(char *fname, struct file_struct *file, STRUCT_STAT *st) { - if (st->st_size != file->length) { - return 0; + if (! times_only) { + if (st->st_size != file->length) { + return 0; + } } if (link_dest) { if((st->st_mode & ~_S_IFMT) != (file->mode & ~_S_IFMT)) { @@ -59,6 +62,9 @@ return 0; } } + if (times_only) { + return (cmp_modtime(st->st_mtime,file->modtime) == 0); + } /* if always checksum is set then we use the checksum instead diff -ur rsync-2.5.6/options.c rsync-2.5.6-filtered/options.c --- rsync-2.5.6/options.c 2003-01-28 04:11:57.000000000 +0100 +++ rsync-2.5.6-filtered/options.c 2003-11-16 14:06:29.000000000 +0100 @@ -48,6 +48,9 @@ int dry_run=0; int local_server=0; int ignore_times=0; +char *source_filter = NULL; +char *dest_filter = NULL; +int times_only=0; int delete_mode=0; int delete_excluded=0; int one_file_system=0; @@ -246,6 +249,7 @@ rprintf(F," --timeout=TIME set IO timeout in seconds\n"); rprintf(F," -I, --ignore-times don't exclude files that match length and time\n"); rprintf(F," --size-only only use file size when determining if a file should be transferred\n"); + rprintf(F," --times-only only use file modification time when determining if a file should be transferred\n"); rprintf(F," --modify-window=NUM Timestamp window (seconds) for file match (default=%d)\n",modify_window); rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n"); rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); @@ -270,6 +274,8 @@ rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per second\n"); rprintf(F," --write-batch=PREFIX write batch fileset starting with PREFIX\n"); rprintf(F," --read-batch=PREFIX read batch fileset starting with PREFIX\n"); + rprintf(F," --source-filter=COMMAND filter file through COMMAND at source\n"); + rprintf(F," --dest-filter=COMMAND filter file through COMMAND at destination\n"); rprintf(F," -h, --help show this help screen\n"); #ifdef INET6 rprintf(F," -4 prefer IPv4\n"); @@ -283,6 +289,7 @@ } enum {OPT_VERSION = 1000, OPT_SUFFIX, OPT_SENDER, OPT_SERVER, OPT_EXCLUDE, + OPT_SOURCE_FILTER, OPT_DEST_FILTER, OPT_EXCLUDE_FROM, OPT_DELETE, OPT_DELETE_EXCLUDED, OPT_NUMERIC_IDS, OPT_RSYNC_PATH, OPT_FORCE, OPT_TIMEOUT, OPT_DAEMON, OPT_CONFIG, OPT_PORT, OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_STATS, OPT_PARTIAL, OPT_PROGRESS, @@ -300,6 +307,9 @@ {"rsync-path", 0, POPT_ARG_STRING, &rsync_path, 0, 0, 0 }, {"password-file", 0, POPT_ARG_STRING, &password_file, 0, 0, 0 }, {"ignore-times", 'I', POPT_ARG_NONE, &ignore_times , 0, 0, 0 }, + {"times-only", 0, POPT_ARG_NONE, ×_only , 0, 0, 0 }, + {"source-filter", 0, POPT_ARG_STRING, &source_filter , OPT_SOURCE_FILTER, 0, 0 }, + {"dest-filter", 0, POPT_ARG_STRING, &dest_filter , OPT_DEST_FILTER, 0, 0 }, {"size-only", 0, POPT_ARG_NONE, &size_only , 0, 0, 0 }, {"modify-window", 0, POPT_ARG_INT, &modify_window, OPT_MODIFY_WINDOW, 0, 0 }, {"one-file-system", 'x', POPT_ARG_NONE, &one_file_system , 0, 0, 0 }, @@ -471,6 +481,16 @@ print_rsync_version(FINFO); exit_cleanup(0); + case OPT_SOURCE_FILTER: + /* source_filter already set by popt */ + whole_file = 1; + break; + + case OPT_DEST_FILTER: + /* dest_filter already set by popt */ + whole_file = 1; + break; + case OPT_SUFFIX: /* The value has already been set by popt, but * we need to remember that a suffix was specified @@ -631,6 +651,13 @@ return 0; } + if ((source_filter || dest_filter) && no_whole_file) { + snprintf(err_buf,sizeof(err_buf), + "no-whole-file can not be used with source-filter or dest-filter\n"); + rprintf(FERROR,"ERROR: no-whole-file can not be used with source-filter or dest-filter\n"); + return 0; + } + *argv = poptGetArgs(pc); if (*argv) *argc = count_args(*argv); @@ -784,6 +811,17 @@ if (delete_excluded) args[ac++] = "--delete-excluded"; + if (times_only) + args[ac++] = "--times-only"; + + if (dest_filter) { + static char buf[1000]; + /* have to single quote the arg to keep the + remote shell from splitting it */ + snprintf(buf, sizeof(buf), "--dest-filter='%s'", dest_filter); + args[ac++] = buf; + } + if (size_only) args[ac++] = "--size-only"; diff -ur rsync-2.5.6/pipe.c rsync-2.5.6-filtered/pipe.c --- rsync-2.5.6/pipe.c 2002-04-08 09:39:56.000000000 +0200 +++ rsync-2.5.6-filtered/pipe.c 2003-11-16 13:20:34.000000000 +0100 @@ -146,3 +146,90 @@ } +pid_t run_filter(char *command[], int out, int *pipe_to_filter) +{ + pid_t pid; + int pipefds[2]; + extern int blocking_io; + + if (verbose >= 2) { + print_child_argv(command); + } + + if (pipe(pipefds) < 0) { + rprintf(FERROR, "pipe: %s\n", strerror(errno)); + exit_cleanup(RERR_IPC); + } + + pid = fork(); + if (pid == -1) { + rprintf(FERROR, "fork: %s\n", strerror(errno)); + exit_cleanup(RERR_IPC); + } + + if (pid == 0) { + extern int orig_umask; + if (dup2(pipefds[0], STDIN_FILENO) < 0) { + rprintf(FERROR, "Failed dup2 to child stdin : %s\n", + strerror(errno)); + exit_cleanup(RERR_IPC); + } + if (dup2(out, STDOUT_FILENO) < 0) { + rprintf(FERROR, "Failed dup2 to child stdout : %s\n", + strerror(errno)); + exit_cleanup(RERR_IPC); + } + close(pipefds[1]); + umask(orig_umask); + set_blocking(STDIN_FILENO); + if (blocking_io) { + set_blocking(STDOUT_FILENO); + } + execvp(command[0], command); + rprintf(FERROR, "Failed to exec %s : %s\n", + command[0], strerror(errno)); + exit_cleanup(RERR_IPC); + } + + *pipe_to_filter = pipefds[1]; + + return pid; +} + +pid_t run_filter_on_file(char *command[], int out, int in) +{ + pid_t pid; + extern int blocking_io; + + if (verbose >= 2) { + print_child_argv(command); + } + + pid = fork(); + if (pid == -1) { + rprintf(FERROR, "fork: %s\n", strerror(errno)); + exit_cleanup(RERR_IPC); + } + + if (pid == 0) { + if (dup2(in, STDIN_FILENO) < 0) { + rprintf(FERROR, "Failed dup2 to child stdin : %s\n", + strerror(errno)); + exit_cleanup(RERR_IPC); + } + if (dup2(out, STDOUT_FILENO) < 0) { + rprintf(FERROR, "Failed dup2 to child stdout : %s\n", + strerror(errno)); + exit_cleanup(RERR_IPC); + } + if (blocking_io) { + set_blocking(STDOUT_FILENO); + } + execvp(command[0], command); + rprintf(FERROR, "Failed to exec %s : %s\n", + command[0], strerror(errno)); + exit_cleanup(RERR_IPC); + } + + return pid; +} diff -ur rsync-2.5.6/proto.h rsync-2.5.6-filtered/proto.h --- rsync-2.5.6/proto.h 2003-01-27 04:35:09.000000000 +0100 +++ rsync-2.5.6-filtered/proto.h 2003-11-16 13:20:48.000000000 +0100 @@ -181,6 +181,8 @@ pid_t piped_child(char **command, int *f_in, int *f_out); pid_t local_child(int argc, char **argv,int *f_in,int *f_out, int (*child_main)(int, char*[])); +pid_t run_filter(char *command[], int in, int *pipe_to_filter); +pid_t run_filter_on_file(char *command[], int out, int in); void end_progress(OFF_T size); void show_progress(OFF_T ofs, OFF_T size); void delete_files(struct file_list *flist); diff -ur rsync-2.5.6/receiver.c rsync-2.5.6-filtered/receiver.c --- rsync-2.5.6/receiver.c 2003-01-21 00:32:17.000000000 +0100 +++ rsync-2.5.6-filtered/receiver.c 2003-11-16 11:44:14.000000000 +0100 @@ -320,11 +320,32 @@ extern int delete_after; extern int orig_umask; struct stats initial_stats; + pid_t pid = 0; /* assignment to get rid of compiler warning */ + int status; + extern char *dest_filter; +#define MAX_FILTER_ARGS 100 + char *filter_argv[MAX_FILTER_ARGS + 1]; if (verbose > 2) { rprintf(FINFO,"recv_files(%d) starting\n",flist->count); } + if (dest_filter) { + char *p; + char *sep = " \t"; + int i; + for (p = strtok(dest_filter, sep), i = 0; + p && i < MAX_FILTER_ARGS; + p = strtok(0, sep)) { + filter_argv[i++] = p; + } + filter_argv[i] = 0; + if (p) { + rprintf(FERROR,"Too many arguments to dest-filter (> %d)\n", i); + exit_cleanup(RERR_SYNTAX); + } + } + while (1) { cleanup_disable(); @@ -448,16 +469,34 @@ log_transfer(file, fname); } + if (dest_filter) { + pid = run_filter(filter_argv, fd2, &fd2); + } + /* recv file data */ recv_ok = receive_data(f_in,buf,fd2,fname,file->length); + if (dest_filter) { + close(fd2); + wait_process(pid, &status); + if (status != 0) { + rprintf(FERROR,"filter %s exited code: %d\n", + dest_filter, status); + if (buf) unmap_file(buf); + if (fd1 != -1) close(fd1); + continue; + } + } + log_recv(file, &initial_stats); if (buf) unmap_file(buf); if (fd1 != -1) { close(fd1); } - close(fd2); + if (! dest_filter) { + close(fd2); + } if (verbose > 2) rprintf(FINFO,"renaming %s to %s\n",fnametmp,fname); diff -ur rsync-2.5.6/rsync.1 rsync-2.5.6-filtered/rsync.1 --- rsync-2.5.6/rsync.1 2003-01-28 04:11:57.000000000 +0100 +++ rsync-2.5.6-filtered/rsync.1 2003-11-16 14:07:36.000000000 +0100 @@ -361,6 +361,7 @@ --timeout=TIME set IO timeout in seconds -I, --ignore-times don\'t exclude files that match length and time --size-only only use file size when determining if a file should be transferred + --times-only only use file modification time when determining if a file should be transferred --modify-window=NUM Timestamp window (seconds) for file match (default=0) -T --temp-dir=DIR create temporary files in directory DIR --compare-dest=DIR also compare destination files relative to DIR @@ -386,6 +387,8 @@ --bwlimit=KBPS limit I/O bandwidth, KBytes per second --read-batch=PREFIX read batch fileset starting with PREFIX --write-batch=PREFIX write batch fileset starting with PREFIX + --source-filter=COMMAND filter file through COMMAND at source + --dest-filter=COMMAND filter file through COMMAND at destination -h, --help show this help screen @@ -437,6 +440,12 @@ after using another mirroring system which may not preserve timestamps exactly\&. .IP +.IP "\fB--times-only\fP" +With this option, rsync will ignore size and file content +differences when deciding whether to transfer a file\&. Only +a difference in file modification time will cause a file to be +transferred. +.IP .IP "\fB--modify-window\fP" When comparing two timestamps rsync treats the timestamps as being equal if they are within the value of @@ -938,6 +947,32 @@ using the fileset whose filenames start with PREFIX\&. See the "BATCH MODE" section for details\&. .IP +.IP "\fB --source-filter=COMMAND\fP" +This option allows you to specify a filter program that will be +applied to the contents of all transferred regular files before +the data is sent to destination. COMMAND will receive the data on its +standard input and it should write the filtered data to standard +output. COMMAND should exit non-zero if it cannot process the +data or if it encounters an error when writing the data to stdout. +Example: --source-filter="gzip -9" will cause remote files to be compressed. +Use of --source-filter automatically enables --whole-file. +If your filter does not output the same number of bytes that it +received on input, you should use --times-only to disable size and +content checks on subsequent rsync runs. +.IP +.IP "\fB --dest-filter=COMMAND\fP" +This option allows you to specify a filter program that will be +applied to the contents of all transferred regular files before +the data is written to disk. COMMAND will receive the data on its +standard input and it should write the filtered data to standard +output. COMMAND should exit non-zero if it cannot process the +data or if it encounters an error when writing the data to stdout. +Example: --dest-filter="gzip -9" will cause remote files to be compressed. +Use of --dest-filter automatically enables --whole-file. +If your filter does not output the same number of bytes that it +received on input, you should use --times-only to disable size and +content checks on subsequent rsync runs. +.IP .PP .SH "EXCLUDE PATTERNS" .PP diff -ur rsync-2.5.6/sender.c rsync-2.5.6-filtered/sender.c --- rsync-2.5.6/sender.c 2002-04-09 08:03:50.000000000 +0200 +++ rsync-2.5.6-filtered/sender.c 2003-11-16 14:33:17.000000000 +0100 @@ -109,6 +109,27 @@ char buff[CHUNK_SIZE]; /* dw */ int j; /* dw */ int done; /* dw */ + extern char *source_filter; +#define MAX_FILTER_ARGS 100 + char *filter_argv[MAX_FILTER_ARGS + 1]; + char *tmp = 0, *tmpl = "/tmp/rsync-filtered_sourceXXXXXX"; + int unlink_tmp = 0; + + if (source_filter) { + char *p; + char *sep = " \t"; + int i; + for (p = strtok(source_filter, sep), i = 0; + p && i < MAX_FILTER_ARGS; + p = strtok(0, sep)) { + filter_argv[i++] = p; + } + filter_argv[i] = 0; + if (p) { + rprintf(FERROR,"Too many arguments to source-filter (> %d)\n", i); + exit_cleanup(RERR_SYNTAX); + } + } if (verbose > 2) rprintf(FINFO,"send_files starting\n"); @@ -178,7 +199,34 @@ write_batch_csum_info(&i,flist->count,s); if (!read_batch) { + unlink_tmp=0; + fd = do_open(fname, O_RDONLY, 0); + + if (fd != -1 && source_filter) { + int fd2, status; + pid_t pid = 0; /* assignment to get rid of compiler warning */ + + tmp = strdup(tmpl); + fd2 = mkstemp(tmp); + if (fd2 == -1) + rprintf(FERROR,"mkstemp %s failed: %s\n",tmp,strerror(errno)); + else { + pid = run_filter_on_file(filter_argv, fd2, fd); + close(fd); + close(fd2); + wait_process(pid, &status); + if (status != 0) { + rprintf(FERROR,"bypassing source filter %s; exited with code: %d\n",source_filter,status); + fd = do_open(fname, O_RDONLY, 0); + } + else { + fd = do_open(tmp, O_RDONLY, 0); + unlink_tmp = 1; + } + } + } + if (fd == -1) { io_error = 1; rprintf(FERROR,"send_files failed to open %s: %s\n", @@ -275,6 +323,7 @@ if (!read_batch) { /* dw */ if (buf) unmap_file(buf); close(fd); + if (unlink_tmp) unlink(tmp); } free_sums(s);
Apparently Analagous Threads
- DO NOT REPLY [Bug 4995] New: source_filter dest_filter patch fails in 2.6.9
- File descriptors in pipe.c
- PATCH: fast copy of files in local server mode
- Problems reloading plugin extending AR model
- Pipe creation problem: From Java app using rsync + ssh on windows with cygwin