On Sun, Aug 01, 2004 at 06:16:05PM -0400, Chris Shoemaker
wrote:> Attached is a patch that makes window strides constant when files are
> walked with a constant block size. In these cases, it completely
> avoids all memmoves.
Seems like a good start to me. Here's a patch I created that also makes
these changes:
- The map_file() function now takes the window-size directly
rather than the block-size. This lets the the caller choose
the value.
- Figure out an appropriate window-size for the receiver,
sender, generator, and the file_checksum() function to send to
map_file().
- Also removed the (offset > 2*CHUNK_SIZE) check in map_ptr().
(Did you leave this in for a reason?)
- The sender now calls map_ptr() with a range of memory that
encompasses both the rolling-checksum data and the data at
last_match that we may need to reread.
- Defined MAX_BLOCK_SIZE as a separate value from MAX_MAP_SIZE.
- Increased the size of MAX_MAP_SIZE.
I think this should improve several things. Comments?
..wayne..
-------------- next part --------------
--- checksum.c 2 Aug 2004 16:49:20 -0000 1.33
+++ checksum.c 2 Aug 2004 17:48:02 -0000
@@ -102,7 +102,7 @@ void file_checksum(char *fname,char *sum
if (fd == -1)
return;
- buf = map_file(fd, size, CSUM_CHUNK);
+ buf = map_file(fd, size, CSUM_CHUNK * 1024);
mdfour_begin(&m);
--- fileio.c 20 Jul 2004 21:35:52 -0000 1.15
+++ fileio.c 2 Aug 2004 17:48:05 -0000
@@ -143,7 +143,7 @@ int write_file(int f,char *buf,size_t le
read(). It gives sliding window access to a file. mmap() is not
used because of the possibility of another program (such as a
mailer) truncating the file thus giving us a SIGBUS */
-struct map_struct *map_file(int fd, OFF_T len, size_t block_size)
+struct map_struct *map_file(int fd, OFF_T len, size_t window_size)
{
struct map_struct *map;
@@ -153,7 +153,7 @@ struct map_struct *map_file(int fd, OFF_
memset(map, 0, sizeof map[0]);
map->fd = fd;
map->file_size = len;
- map->def_window_size = MAX(MAX_MAP_SIZE, block_size * 32);
+ map->def_window_size = window_size;
return map;
}
@@ -181,12 +181,7 @@ char *map_ptr(struct map_struct *map,OFF
}
/* nope, we are going to have to do a read. Work out our desired window */
- if (offset > 2*CHUNK_SIZE) {
- window_start = offset - 2*CHUNK_SIZE;
- window_start &= ~((OFF_T)(CHUNK_SIZE-1)); /* assumes power of 2 */
- } else {
- window_start = 0;
- }
+ window_start = offset;
window_size = map->def_window_size;
if (window_start + window_size > map->file_size) {
window_size = map->file_size - window_start;
--- generator.c 29 Jul 2004 16:45:48 -0000 1.107
+++ generator.c 2 Aug 2004 17:48:05 -0000
@@ -154,7 +154,7 @@ static void sum_sizes_sqroot(struct sum_
c >>= 1;
} while (c >= 8); /* round to multiple of 8 */
blength = MAX(blength, BLOCK_SIZE);
- blength = MIN(blength, MAX_MAP_SIZE);
+ blength = MIN(blength, MAX_BLOCK_SIZE);
}
if (protocol_version < 27) {
@@ -208,9 +208,13 @@ static void generate_and_send_sums(int f
sum_sizes_sqroot(&sum, len);
- if (len > 0)
- mapbuf = map_file(fd, len, sum.blength);
- else
+ if (len > 0) {
+ OFF_T map_size = MAX_MAP_SIZE;
+ if (map_size % sum.blength)
+ map_size += sum.blength - (map_size % sum.blength);
+
+ mapbuf = map_file(fd, len, map_size);
+ } else
mapbuf = NULL;
write_sum_head(f_out, &sum);
--- match.c 16 Jul 2004 20:06:24 -0000 1.67
+++ match.c 2 Aug 2004 17:48:05 -0000
@@ -141,11 +141,12 @@ static void matched(int f,struct sum_str
static void hash_search(int f,struct sum_struct *s,
struct map_struct *buf, OFF_T len)
{
- OFF_T offset, end;
+ OFF_T offset, end, backup;
unsigned int k;
size_t want_i;
char sum2[SUM_LENGTH];
uint32 s1, s2, sum;
+ int more;
schar *map;
/* want_i is used to encourage adjacent matches, allowing the RLL
@@ -271,14 +272,20 @@ static void hash_search(int f,struct sum
} while (++j < s->count && targets[j].t == t);
null_tag:
+ backup = offset - last_match;
+ /* We sometimes read 1 byte prior to last_match... */
+ if (backup < 0)
+ backup = 0;
+
/* Trim off the first byte from the checksum */
- map = (schar *)map_ptr(buf, offset, k+1);
+ more = offset + k < len;
+ map = (schar *)map_ptr(buf, offset - backup, k + more) + backup;
s1 -= map[0] + CHAR_OFFSET;
s2 -= k * (map[0]+CHAR_OFFSET);
/* Add on the next byte (if there is one) to the checksum */
- if (k < (len-offset)) {
- s1 += (map[k]+CHAR_OFFSET);
+ if (more) {
+ s1 += map[k] + CHAR_OFFSET;
s2 += s1;
} else
--k;
@@ -289,9 +296,8 @@ static void hash_search(int f,struct sum
match. The 3 reads are caused by the
running match, the checksum update and the
literal send. */
- if (offset > last_match
- && offset-last_match >= CHUNK_SIZE+s->blength
- && end-offset > CHUNK_SIZE) {
+ if (backup >= CHUNK_SIZE + s->blength
+ && end - offset > CHUNK_SIZE) {
matched(f,s,buf,offset - s->blength, -2);
}
} while (++offset < end);
--- options.c 2 Aug 2004 07:40:34 -0000 1.170
+++ options.c 2 Aug 2004 17:48:05 -0000
@@ -660,10 +660,10 @@ int parse_arguments(int *argc, const cha
}
#endif
- if (block_size > MAX_MAP_SIZE) {
+ if (block_size > MAX_BLOCK_SIZE) {
rprintf(FINFO, "limiting block-size to %d bytes\n",
- MAX_MAP_SIZE);
- block_size = MAX_MAP_SIZE;
+ MAX_BLOCK_SIZE);
+ block_size = MAX_BLOCK_SIZE;
}
if (write_batch && read_batch) {
--- receiver.c 2 Aug 2004 04:50:33 -0000 1.103
+++ receiver.c 2 Aug 2004 17:48:06 -0000
@@ -222,7 +222,13 @@ static int receive_data(int f_in, char *
read_sum_head(f_in, &sum);
if (fd_r >= 0 && size_r > 0) {
- mapbuf = map_file(fd_r, size_r, sum.blength);
+ OFF_T map_size = MAX(sum.blength * 2, 16*1024);
+ map_size = MIN(map_size, MAX_MAP_SIZE);
+ if (sum.blength && (map_size % sum.blength))
+ map_size += sum.blength - (map_size % sum.blength);
+
+ mapbuf = map_file(fd_r, size_r, map_size);
+
if (verbose > 2) {
rprintf(FINFO, "recv mapped %s of size %.0f\n",
safe_fname(fname_r), (double)size_r);
--- rsync.h 29 Jul 2004 16:06:38 -0000 1.209
+++ rsync.h 2 Aug 2004 17:48:06 -0000
@@ -90,7 +90,8 @@
#define SPARSE_WRITE_SIZE (1024)
#define WRITE_SIZE (32*1024)
#define CHUNK_SIZE (32*1024)
-#define MAX_MAP_SIZE (256*1024)
+#define MAX_MAP_SIZE (512*1024)
+#define MAX_BLOCK_SIZE (256*1024)
#define IO_BUFFER_SIZE (4092)
#define IOERR_GENERAL (1<<0) /* For backward compatibility, this must ==
1 */
--- sender.c 26 Jul 2004 16:36:59 -0000 1.45
+++ sender.c 2 Aug 2004 17:48:06 -0000
@@ -208,7 +208,14 @@ void send_files(struct file_list *flist,
return;
}
- mbuf = st.st_size ? map_file(fd, st.st_size, s->blength) : NULL;
+ if (st.st_size) {
+ OFF_T map_size = MAX(s->blength * 3, MAX_MAP_SIZE);
+ if (s->blength && (map_size % s->blength))
+ map_size += s->blength - (map_size % s->blength);
+
+ mbuf = map_file(fd, st.st_size, map_size);
+ } else
+ mbuf = NULL;
if (verbose > 2) {
rprintf(FINFO, "send_files mapped %s of size %.0f\n",