Justin Yackoski
2002-Feb-08 16:17 UTC
[Samba] FIX to detect infinite file systems when using smbclient
I've had a problem with smbclient when using the recurse command in that it will happily follow directories until it gets to the hard limit of 40 deep. I know you can disable symlinks for smbd but that doesn't help when *someone else* recursively links their file system. I created a patch against 2.2.3a which adds a "saferecurse" command which acts exactly in the same way (and uses the same code) as the recurse command except that it checks for recursive file systems. I implemented it so that you can still use recurse since I can see times when recurse may have the desired effect. Enabling "saferecurse" automatically enables "recurse" and disabling "recurse" disables "saferecurse" since that seemed the most logical way to handle having the two similar options. It works similarly to wget in that it hashes the contents of each new directory it encounters and compares it against a list of hashes of the directories smbclient has already seen. Since smbclient doesn't buffer its output but writes it one file at a tile (at least thats what I believe it does...) the easiest way was to do the checking after a directorie's contents have been displayed, mget'd, etc. The difference is that if a duplicate directory is found, it de-queues all of that directory's sub-directories, stopping the recursion. It would perhaps be a better solution to not display the contents at all or something, but that would require buffering the directory contents until it was known that it wasn't a duplicate. This fix checks the server's file system, not the local file system, so maybe mput with recurse enabled is still vulnerable to this problem, although admittedly it is more your own responsibility if your local file system is recursive. I tested it and it works as far as I can tell. I tried it on a large directory structure (15000+ files) and it was not much slower than the regular recurse option, which was one of the goals I had in writing it. I used the hash_table and mdfour algorithms which were already included in samba's source code for most of the dirty work, so I don't believe it adds any requirements or anything like that. I would *greatly* appreciate it if this option was included in samba, even if it doesn't make it into the 2.2.3 release since I have been working on Netster which (similar to FemFind) indexes file shares of the local network to allow searching later and couldn't find a better way to resolve the problem of the network crawling program getting stuck in these recursive file systems. If this is included in samba's code, I would also greatly appreciate receiving credit someplace, although I don't know what samba's standard practice for that sort of thing is. Below is the patch, I'm sure it could be improved on but I have limited knowledge of both C and samba's internals. A fair portion of the code is completely copied from places inside client.c and modified to fit these purposes. Justin Yackoski -- SkiingYAC.com Web Design & Programming skiingyac.com --- client/clientold.c Sat Feb 2 19:46:38 2002 +++ client/client.c Fri Feb 8 18:28:06 2002 @@ -79,6 +79,10 @@ int printmode = 1; static BOOL recurse = False; +static BOOL saferecurse = False; +static hash_table saferecurse_hash_table; +static int dirs_added = 0; +char dummy_value[] = "a"; BOOL lowercase = False; struct in_addr dest_ip; @@ -346,14 +350,97 @@ } static BOOL do_list_recurse; +static BOOL do_list_saferecurse; static BOOL do_list_dirs; static char *do_list_queue = 0; static long do_list_queue_size = 0; static long do_list_queue_start = 0; static long do_list_queue_end = 0; +static char *saferecurse_current_dir = 0; +static long saferecurse_current_dir_size = 0; +static long saferecurse_current_dir_end = 0; +static long saferecurse_current_dir_start = 0; static void (*do_list_fn)(file_info *); /**************************************************************************** +functions for saferecurse_current_dir + ****************************************************************************/ + +/* + * saferecurse_current_dir is a string with the name, size, and mtime of + * all files and directories in the current directory, which will later + * be hashed if saferecurse is being used + */ +static void reset_saferecurse_hash_table( void ) +{ + static BOOL initialised; + if (initialised) { + hash_clear(&saferecurse_hash_table); + } + + initialised = hash_table_init( &saferecurse_hash_table, 512, + (compare_function)(strcmp)); +} + +static void reset_saferecurse_current_dir(void) +{ + SAFE_FREE(saferecurse_current_dir); + saferecurse_current_dir = 0; + saferecurse_current_dir_size = 0; + saferecurse_current_dir_start = 0; + saferecurse_current_dir_end = 0; +} + +static void init_saferecurse_current_dir(void) +{ + reset_saferecurse_current_dir(); + saferecurse_current_dir_size = 1024; + saferecurse_current_dir = malloc(saferecurse_current_dir_size); + if (saferecurse_current_dir == 0) { + DEBUG(0,("malloc fail for size %d\n", + (int)saferecurse_current_dir_size)); + reset_saferecurse_current_dir(); + } else { + memset(saferecurse_current_dir, 0, + saferecurse_current_dir_size); + } +} + +static void add_saferecurse_current_dir(const char* entry) +{ + char *dlq; + + long new_end = saferecurse_current_dir_end + ((long)strlen(entry)) + 1; + while (new_end > saferecurse_current_dir_size) + { + saferecurse_current_dir_size *= 2; + DEBUG(4,("enlarging saferecurse_current_dir to %d\n", + (int)saferecurse_current_dir_size)); + dlq = Realloc(saferecurse_current_dir, + saferecurse_current_dir_size); + if (!dlq) { + DEBUG(0,("failure enlarging saferecurse_current_dir to %d bytes\n", + (int)saferecurse_current_dir_size)); + reset_saferecurse_current_dir(); + } else { + saferecurse_current_dir = dlq; + memset(saferecurse_current_dir + + saferecurse_current_dir_size / 2, + 0, saferecurse_current_dir_size / 2); + } + } + if (saferecurse_current_dir) + { + pstrcpy(saferecurse_current_dir + saferecurse_current_dir_end, + entry); + saferecurse_current_dir_end = new_end; + DEBUG(4,("added %s to saferecurse_current_dir (start=%d, end=%d)\n", + entry, (int)saferecurse_current_dir_start, + (int)saferecurse_current_dir_end)); + } +} + +/**************************************************************************** functions for do_list_queue ****************************************************************************/ @@ -471,6 +558,7 @@ ****************************************************************************/ static void do_list_helper(file_info *f, const char *mask, void *state) { + char *c; if (f->mode & aDIR) { if (do_list_dirs && do_this_one(f)) { do_list_fn(f); @@ -488,6 +576,10 @@ pstrcat(mask2, f->name); pstrcat(mask2,"\\*"); add_to_do_list_queue(mask2); + if (saferecurse) + { + dirs_added++; + } } return; } @@ -495,6 +587,12 @@ if (do_this_one(f)) { do_list_fn(f); } + asprintf(&c, "%s%d%d", f->name, f->size, f->mtime); + if (saferecurse) + { + add_saferecurse_current_dir(c); + } + SAFE_FREE(c); } @@ -505,6 +603,11 @@ { static int in_do_list = 0; + if (saferecurse) + { + reset_saferecurse_hash_table(); + } + if (in_do_list && rec) { fprintf(stderr, "INTERNAL ERROR: do_list called recursively when the recursive flag is true\n"); @@ -533,7 +636,39 @@ */ pstring head; pstrcpy(head, do_list_queue_head()); + if (saferecurse) + { + init_saferecurse_current_dir(); + dirs_added = 0; + } cli_list(cli, head, attribute, do_list_helper, NULL); + if (saferecurse) + { + char hashkey[16]; + mdfour(hashkey, saferecurse_current_dir, + saferecurse_current_dir_end); + if (hash_lookup(&saferecurse_hash_table, hashkey) != NULL) + { + /* must remove last dirs_added dirs we queued + * which are all the subdirs of the current dir + * since a match in the hash table means we've + * seen this dir before. Still list its contents + * but don't go into its subdirs. Maybe not the + * optimal way but to not list contents would mean + * buffering output until we know the dir hasn't + * been seen */ + DEBUG(3,("unsafe dir found, de-queueing subdirs")); + while (dirs_added > 0) + { + remove_do_list_queue_head(); + dirs_added--; + } + } + else + { + hash_insert(&saferecurse_hash_table, dummy_value, hashkey); + } + } remove_do_list_queue_head(); if ((! do_list_queue_empty()) && (fn == display_finfo)) { @@ -1732,10 +1867,24 @@ static void cmd_recurse(void) { recurse = !recurse; + if (!recurse) //turning off recursion also turns off saferecurse flag + { + saferecurse = False; + } DEBUG(2,("directory recursion is now %s\n",recurse?"on":"off")); } /**************************************************************************** +toggle the saferecurse flag +****************************************************************************/ +static void cmd_saferecurse(void) +{ + saferecurse = !saferecurse; + recurse = saferecurse; //also turn on/off recurse flag + DEBUG(2,("safe directory recursion is now %s\n",saferecurse?"on":"off")); +} + +/**************************************************************************** toggle the translate flag ****************************************************************************/ static void cmd_translate(void) @@ -1921,6 +2070,7 @@ {"quit",cmd_quit,"logoff the server",{COMPL_NONE,COMPL_NONE}}, {"rd",cmd_rmdir,"<directory> remove a directory",{COMPL_NONE,COMPL_NONE}}, {"recurse",cmd_recurse,"toggle directory recursion for mget and mput",{COMPL_NONE,COMPL_NONE}}, + {"saferecurse",cmd_saferecurse,"toggle safe directory recursion for mget and mput",{COMPL_NONE,COMPL_NONE}}, {"rename",cmd_rename,"<src> <dest> rename some files",{COMPL_REMOTE,COMPL_REMOTE}}, {"rm",cmd_del,"<mask> delete all matching files",{COMPL_REMOTE,COMPL_NONE}}, {"rmdir",cmd_rmdir,"<directory> remove a directory",{COMPL_NONE,COMPL_NONE}}, @@ -2400,7 +2550,9 @@ if (!cli) return 1; - recurse=True; +// recurse=True; +// use saferecursion by default for tar, or sometimes get BIG tarballs + saferecurse=True; if (*base_directory) do_cd(base_directory);