I apologize, this patch should have been sent awhile ago. Between a lot of things that needed to be completed at work as a sysadmin for the Research Computing group at Purdue University and retiring from work after 38 years at the end of May (2012), I basically ignored it. This patch is from a clone of my workstation that I just activated at home. The patch was primarily written to fix the problem(s) that was(were) causing `ssh-keyscan' to abort its runs prematurely, mainly connection errors with an attendant immediate abort when sensed. It also addresses what I consider to be some "buggy" code, some error message clarification and adds a command line option so that a STDERR message will be printed for ALL hosts submitted for processing. The patch has been running with no apparent problems on several machines most specifically on Ubuntu, Debian and Solaris. I think I started using some form of it for openssh-5.6p1 and it has not basically changed for at least the last 4 openssh*p1 releases. See Bugzilla #1213 for some discussion. The Debian system and hardware was probably changed to a Redhat system with new hardware after I left. `ssh-keyscan' was running there once an hour. Ummm - I think I followed your coding specs except for one "#include" that was not in alphabetical order. Actually, that list was already out of order. Hey, is there any web page that can be used to submit patches? -- Paul Townsend (former sysadmin at Purdue University) diff -u openssh-6.1p1/kex.c.orig openssh-6.1p1/kex.c --- openssh-6.1p1/kex.c.orig 2010-09-24 08:11:14.000000000 -0400 +++ openssh-6.1p1/kex.c 2012-05-08 20:47:32.666853000 -0400 @@ -49,6 +49,7 @@ #include "dispatch.h" #include "monitor.h" #include "roaming.h" +#include "canohost.h" #if OPENSSL_VERSION_NUMBER >= 0x00907000L # if defined(HAVE_EVP_SHA256) @@ -366,11 +367,19 @@ choose_hostkeyalg(Kex *k, char *client, char *server) { char *hostkeyalg = match_list(client, server, NULL); - if (hostkeyalg == NULL) - fatal("no hostkey alg"); + if (hostkeyalg == NULL) { + if (k->server) + fatal("bad '%.100s' hostkey alg request from %.200s", client, get_remote_ipaddr()); + else + fatal("no '%.100s' hostkey alg(s) for %.200s", client, get_remote_ipaddr()); + } +/* + * Note that if KEY_UNSPEC is returned, BOTH the client and the server + * have the same bad key string. + */ k->hostkey_type = key_type_from_name(hostkeyalg); if (k->hostkey_type == KEY_UNSPEC) - fatal("bad hostkey alg '%s'", hostkeyalg); + fatal("unknown hostkey alg '%s'", hostkeyalg); xfree(hostkeyalg); } diff -u openssh-6.1p1/packet.c.orig openssh-6.1p1/packet.c --- openssh-6.1p1/packet.c.orig 2012-03-08 18:28:07.000000000 -0500 +++ openssh-6.1p1/packet.c 2012-11-23 18:18:00.233636000 -0500 @@ -1018,6 +1018,17 @@ } /* + * The following two global variables exist to pass connection error + * conditions detected by code in packet_read_seqnr() to ssh-keyscan. + */ + +int connclosed = 0; /* = 1 if connection closed by remote server */ + /* prior to necessary data being read */ +int conntimedout = 0; /* = 1 if connection timed out locally while */ + /* waiting for data from remote server */ + /* both currently used in ssh-keyscan.c */ + +/* * Waits until a packet has been received, and returns its type. Note that * no other data is processed until this returns, so this function should not * be used during the interactive session. @@ -1033,6 +1044,9 @@ DBG(debug("packet_read()")); + connclosed = 0; + conntimedout = 0; + setp = (fd_set *)xcalloc(howmany(active_state->connection_in + 1, NFDBITS), sizeof(fd_mask)); @@ -1087,6 +1101,7 @@ } } if (ret == 0) { + conntimedout = 1; logit("Connection to %.200s timed out while " "waiting to read", get_remote_ipaddr()); cleanup_exit(255); @@ -1098,11 +1113,12 @@ sizeof(buf), &cont); } while (len == 0 && cont); if (len == 0) { + connclosed = 1; /* if anybody wants to know */ logit("Connection closed by %.200s", get_remote_ipaddr()); cleanup_exit(255); } if (len < 0) - fatal("Read from socket failed: %.100s", strerror(errno)); + fatal("Read from %.200s failed: %.100s", get_remote_ipaddr(), strerror(errno)); /* Append it to the buffer. */ packet_process_incoming(buf, len); } diff -u openssh-6.1p1/ssh-keyscan.1.orig openssh-6.1p1/ssh-keyscan.1 --- openssh-6.1p1/ssh-keyscan.1.orig 2010-08-31 08:41:14.000000000 -0400 +++ openssh-6.1p1/ssh-keyscan.1 2012-05-08 20:47:32.776848000 -0400 @@ -15,7 +15,7 @@ .Sh SYNOPSIS .Nm ssh-keyscan .Bk -words -.Op Fl 46Hv +.Op Fl 46HLv .Op Fl f Ar file .Op Fl p Ar port .Op Fl T Ar timeout @@ -73,6 +73,8 @@ .Nm sshd , but they do not reveal identifying information should the file's contents be disclosed. +.It Fl L +If specified, all hosts for which no key is acquired will be logged. .It Fl p Ar port Port to connect to on the remote host. .It Fl T Ar timeout diff -u openssh-6.1p1/ssh-keyscan.c.orig openssh-6.1p1/ssh-keyscan.c --- openssh-6.1p1/ssh-keyscan.c.orig 2011-05-05 00:05:12.000000000 -0400 +++ openssh-6.1p1/ssh-keyscan.c 2012-05-08 20:47:32.826852000 -0400 @@ -45,6 +45,7 @@ #include "atomicio.h" #include "misc.h" #include "hostfile.h" +#include "canohost.h" /* Flag indicating whether IPv4 or IPv6. This can be set on the command line. Default value is AF_UNSPEC means both IPv4 and IPv6. */ @@ -61,15 +62,20 @@ int hash_hosts = 0; /* Hash hostname on output */ +int log_verbose = 0; /* list all hosts checked */ + #define MAXMAXFD 256 /* The number of seconds after which to give up on a TCP connection */ +/* and the maximum time to wait for kex data from the remote server.*/ int timeout = 5; int maxfd; #define MAXCON (maxfd - 10) extern char *__progname; +extern int connclosed; +extern int conntimedout; fd_set *read_wait; size_t read_wait_nfdset; int ncon; @@ -243,7 +249,23 @@ { int j; +/* + * New fd and socket. Clear the possibly cached IP-address of the + * remote host (kex.c:canonical_host_ip) of the previous socket. Also + * clear the packet_read_seqnr() "Connection closed ..." and "Connection + * to ... timed out ..." flags (called by dispatch_run()). + */ + clear_cached_addr(); + connclosed = 0; + conntimedout = 0; + packet_set_connection(c->c_fd, c->c_fd); +/* + * Use our "timeout" value to set the maximum allowed wait time for data + * to become available in the `packet.c:packet_read_seqnr()' function. + */ + packet_set_timeout(timeout, 1); + enable_compat20(); myproposal[PROPOSAL_SERVER_HOST_KEY_ALGS] = c->c_keytype == KT_DSA? "ssh-dss" : (c->c_keytype == KT_RSA ? "ssh-rsa" : @@ -296,8 +318,11 @@ memset(&hints, 0, sizeof(hints)); hints.ai_family = IPv4or6; hints.ai_socktype = SOCK_STREAM; - if ((gaierr = getaddrinfo(host, strport, &hints, &aitop)) != 0) - fatal("getaddrinfo %s: %s", host, ssh_gai_strerror(gaierr)); + if ((gaierr = getaddrinfo(host, strport, &hints, &aitop)) != 0) { + error("getaddrinfo %s: %s", host, ssh_gai_strerror(gaierr)); + s = -1; + return s; + } for (ai = aitop; ai; ai = ai->ai_next) { s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (s < 0) { @@ -388,8 +413,20 @@ { con *c = &fdcon[s]; int ret; + char *name; - ret = conalloc(c->c_namelist, c->c_output_name, c->c_keytype); +/* + * If "connclosed" isn't set, do the next host from c->c_namelist. Else, + * restore the original string in c->c_namebase and redo the current + * host. + */ + name = c->c_namelist; /* do next in list ?? */ + if (connclosed != 0) { /* nope so */ + if (name && *name != '\0') /* restore separator, if any */ + *(name - 1) = ','; /* and */ + name = c->c_namebase; /* redo current */ + } + ret = conalloc(name, c->c_output_name, c->c_keytype); confree(s); return (ret); } @@ -419,10 +456,12 @@ if (n == 0) { switch (errno) { case EPIPE: - error("%s: Connection closed by remote host", c->c_name); + error("read (%s): Connection closed by remote host", c->c_name); break; case ECONNREFUSED: - break; + if (! log_verbose) + break; + /* fall thru */ default: error("read (%s): %s", c->c_name, strerror(errno)); break; @@ -443,14 +482,20 @@ datafellows = 0; if (c->c_keytype != KT_RSA1) { if (!ssh2_capable(remote_major, remote_minor)) { + if (log_verbose) + logit("%s doesn't support ssh2", c->c_name); + else debug("%s doesn't support ssh2", c->c_name); - confree(s); - return; + confree(s); + return; } } else if (remote_major != 1) { + if (log_verbose) + logit("%s doesn't support ssh1", c->c_name); + else debug("%s doesn't support ssh1", c->c_name); - confree(s); - return; + confree(s); + return; } fprintf(stderr, "# %s %s\n", c->c_name, chop(buf)); n = snprintf(buf, sizeof buf, "SSH-%d.%d-OpenSSH-keyscan\r\n", @@ -466,13 +511,17 @@ confree(s); return; } +/* Read and print one of the ssh2 keys for this host. */ if (c->c_keytype != KT_RSA1) { keyprint(c, keygrab_ssh2(c)); confree(s); return; } +/* Continue the process of getting the ssh1 key. */ c->c_status = CS_SIZE; contouch(s); + return; + } static void @@ -520,7 +569,7 @@ struct timeval seltime, now; fd_set *r, *e; con *c; - int i; + int i, s; gettimeofday(&now, NULL); c = TAILQ_FIRST(&tq); @@ -550,20 +599,100 @@ if (FD_ISSET(i, e)) { error("%s: exception!", fdcon[i].c_name); confree(i); - } else if (FD_ISSET(i, r)) + } else if (FD_ISSET(i, r)) { conread(i); +/* + * Break if the read attempt in the `packet.c:packet_read_seqnr()' + * function failed because our "local" timeout was exceeded or because + * the remote host closed the connection before the packet data read + * was complete. The remote closure probably occurred because the + * LoginGraceTime was exceeded on the remote `sshd' server. + */ + if (conntimedout || connclosed) + break; + } } xfree(r); xfree(e); +/* + * If we have the "conntimedout" condition, the read attempt failed + * because the "local" timeout (set by the `packet_set_timeout()' + * function call) was exceeded. Give all hosts that currently have a + * "fdcon[s]" entry a fresh timeout. + */ + i = -1; c = TAILQ_FIRST(&tq); - while (c && (c->c_tv.tv_sec < now.tv_sec || - (c->c_tv.tv_sec == now.tv_sec && c->c_tv.tv_usec < now.tv_usec))) { - int s = c->c_fd; + + if (conntimedout) { + while (c) { + s = c->c_fd; +/* + * If i >= 0, fdcon[i] should be the first entry "touch"ed by + * the call to contouch() below. + */ + if (s == i) + break; +/* + * Save fd of first "touch"ed entry. If we encounter it again, we'll + * know that we've cycled through all of the original queue. + */ + contouch(s); /* a fresh timeout for fdcon[s] */ + if (i < 0) + i = s; + + c = TAILQ_NEXT(c, c_link); + } + + conntimedout = 0; + + return; + } + +/* + * If we have the "connclosed" condition, the read failed because the + * remote server closed the connection before sending the key. All hosts + * that currently have a viable "fdcon[s]" entry will be recycled below + * to negate the time used waiting for the server to respond. This is + * a very kludgy way to do this and should be necessary only if the + * "local" timeout value exceeds the remote servers LoginGraceTime or + * if there are a lot of very slow servers out there. + * + * Loop through the remaining open TAILQ entries. The loop covers two + * conditions: all entries for "connclosed" (described above) and the + * per entry timeout that occurs while waiting for the remote server to + * send its return greeting. + */ + while (c && (connclosed || + (c->c_tv.tv_sec < now.tv_sec || + (c->c_tv.tv_sec == now.tv_sec && + c->c_tv.tv_usec < now.tv_usec)))) { + s = c->c_fd; +/* + * If i >= 0, fdcon[i] should be the first of any new allocations that + * were made as a result of the call(s) to conrecycle() below. + */ + if (s == i) + break; + +/* + * If requested and if not recycling because of "connclosed", list this + * host as a connection time out. + */ + if (log_verbose && connclosed == 0) + logit("%s: Connection timed out.", c->c_name); + +/* + * Save fd of first new allocation. If we encounter it again, we'll + * know that we've cycled through all of the original queue. + */ + s = conrecycle(s); + if (i < 0) + i = s; c = TAILQ_NEXT(c, c_link); - conrecycle(s); } + connclosed = 0; } static void @@ -583,6 +712,19 @@ } } +/* + * Convert general remote aborts to continues while the `dispatch_run()' + * function is being executed. + */ +void +cleanup_exit(int i) +{ + if (nonfatal_fatal) + longjmp(kexjmp, -1); + else + exit(i); +} + void fatal(const char *fmt,...) { @@ -601,7 +743,7 @@ usage(void) { fprintf(stderr, - "usage: %s [-46Hv] [-f file] [-p port] [-T timeout] [-t type]\n" + "usage: %s [-46HLv] [-f file] [-p port] [-T timeout] [-t type]\n" "\t\t [host | addrlist namelist] ...\n", __progname); exit(1); @@ -629,11 +771,14 @@ if (argc <= 1) usage(); - while ((opt = getopt(argc, argv, "Hv46p:T:t:f:")) != -1) { + while ((opt = getopt(argc, argv, "HLv46p:T:t:f:")) != -1) { switch (opt) { case 'H': hash_hosts = 1; break; + case 'L': + log_verbose = 1; + break; case 'p': ssh_port = a2port(optarg); if (ssh_port <= 0) { @@ -714,7 +859,7 @@ fdlim_set(maxfd); fdcon = xcalloc(maxfd, sizeof(con)); - read_wait_nfdset = howmany(maxfd, NFDBITS); + read_wait_nfdset = howmany(maxfd + 1, NFDBITS); read_wait = xcalloc(read_wait_nfdset, sizeof(fd_mask)); for (j = 0; j < fopt_count; j++) {