Wei Liu
2013-Jan-09 09:50 UTC
[PATCH V6] Switch from select() to poll() in xenconsoled''s IO loop
In Linux select() typically supports up to 1024 file descriptors. This can be a problem when user tries to boot up many guests. Switching to poll() has minimum impact on existing code and has better scalibility. pollfd array is dynamically allocated / reallocated. If the array fails to expand, we just ignore the incoming fd. Change from V2: * remove unnecessary malloc in initialize_pollfd_arrays * use ROUND_UP to get new size of arrays Change from V3: * remove initialize and destroy function for array * embedded tracking structure in struct domain, eliminate fd_to_pollfd Change from V4: * make xs_pollfd local to io.c * add back the 5 ms fuzz * handle POLLERR and POLLHUP * treat POLLPRI as error if set in revents * abort if xenconsoled''s own fds get screwed * handle broken tty if tty''s fds get screwed Change from V5: * make some handles local to handle_io * remove redundant xc interface Signed-off-by: Wei Liu <wei.liu2@citrix.com> --- tools/console/daemon/io.c | 209 +++++++++++++++++++++++++++++---------------- 1 file changed, 136 insertions(+), 73 deletions(-) diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c index 48fe151..6762cba 100644 --- a/tools/console/daemon/io.c +++ b/tools/console/daemon/io.c @@ -28,7 +28,7 @@ #include <stdlib.h> #include <errno.h> #include <string.h> -#include <sys/select.h> +#include <poll.h> #include <fcntl.h> #include <unistd.h> #include <termios.h> @@ -66,9 +66,12 @@ extern int discard_overflowed_data; static int log_time_hv_needts = 1; static int log_time_guest_needts = 1; static int log_hv_fd = -1; -static evtchn_port_or_error_t log_hv_evtchn = -1; -static xc_interface *xch; /* why does xenconsoled have two xc handles ? */ -static xc_evtchn *xce_handle = NULL; + +static struct pollfd *fds; +static unsigned int current_array_size; +static unsigned int nr_fds; + +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) struct buffer { char *data; @@ -81,7 +84,9 @@ struct buffer { struct domain { int domid; int master_fd; + struct pollfd *master_pollfd; int slave_fd; + struct pollfd *slave_pollfd; int log_fd; bool is_dead; unsigned last_seen; @@ -92,6 +97,7 @@ struct domain { evtchn_port_or_error_t local_port; evtchn_port_or_error_t remote_port; xc_evtchn *xce_handle; + struct pollfd *xce_pollfd; struct xencons_interface *interface; int event_count; long long next_period; @@ -769,6 +775,17 @@ static int ring_free_bytes(struct domain *dom) return (sizeof(intf->in) - space); } +static void domain_handle_broken_tty(struct domain *dom, int recreate) +{ + domain_close_tty(dom); + + if (recreate) { + domain_create_tty(dom); + } else { + shutdown_domain(dom); + } +} + static void handle_tty_read(struct domain *dom) { ssize_t len = 0; @@ -794,13 +811,7 @@ static void handle_tty_read(struct domain *dom) * keep the slave open for the duration. */ if (len < 0) { - domain_close_tty(dom); - - if (domain_is_valid(dom->domid)) { - domain_create_tty(dom); - } else { - shutdown_domain(dom); - } + domain_handle_broken_tty(dom, domain_is_valid(dom->domid)); } else if (domain_is_valid(dom->domid)) { prod = intf->in_prod; for (i = 0; i < len; i++) { @@ -828,14 +839,7 @@ static void handle_tty_write(struct domain *dom) if (len < 1) { dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n", dom->domid, len, errno); - - domain_close_tty(dom); - - if (domain_is_valid(dom->domid)) { - domain_create_tty(dom); - } else { - shutdown_domain(dom); - } + domain_handle_broken_tty(dom, domain_is_valid(dom->domid)); } else { buffer_advance(&dom->buffer, len); } @@ -883,7 +887,7 @@ static void handle_xs(void) free(vec); } -static void handle_hv_logs(void) +static void handle_hv_logs(xc_evtchn *xce_handle) { char buffer[1024*16]; char *bufptr = buffer; @@ -894,7 +898,7 @@ static void handle_hv_logs(void) if ((port = xc_evtchn_pending(xce_handle)) == -1) return; - if (xc_readconsolering(xch, bufptr, &size, 0, 1, &index) == 0 && size > 0) { + if (xc_readconsolering(xc, bufptr, &size, 0, 1, &index) == 0 && size > 0) { int logret; if (log_time_hv) logret = write_with_timestamp(log_hv_fd, buffer, size, @@ -928,18 +932,54 @@ static void handle_log_reload(void) } } +static struct pollfd *set_fds(int fd, short events) +{ + struct pollfd *ret; + if (current_array_size < nr_fds + 1) { + struct pollfd *new_fds = NULL; + unsigned long newsize; + + /* Round up to 2^8 boundary, in practice this just + * make newsize larger than current_array_size. + */ + newsize = ROUNDUP(nr_fds + 1, 8); + + new_fds = realloc(fds, sizeof(struct pollfd)*newsize); + if (!new_fds) + goto fail; + fds = new_fds; + + memset(&fds[0] + current_array_size, 0, + sizeof(struct pollfd) * (newsize-current_array_size)); + current_array_size = newsize; + } + + fds[nr_fds].fd = fd; + fds[nr_fds].events = events; + ret = &fds[nr_fds]; + nr_fds++; + + return ret; +fail: + dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd); + return NULL; +} + +static void reset_fds(void) +{ + nr_fds = 0; + memset(fds, 0, sizeof(struct pollfd) * current_array_size); +} + void handle_io(void) { - fd_set readfds, writefds; int ret; + evtchn_port_or_error_t log_hv_evtchn = -1; + struct pollfd *xce_pollfd = NULL; + struct pollfd *xs_pollfd = NULL; + xc_evtchn *xce_handle = NULL; if (log_hv) { - xch = xc_interface_open(0,0,0); - if (!xch) { - dolog(LOG_ERR, "Failed to open xc handle: %d (%s)", - errno, strerror(errno)); - goto out; - } xce_handle = xc_evtchn_open(NULL, 0); if (xce_handle == NULL) { dolog(LOG_ERR, "Failed to open xce handle: %d (%s)", @@ -959,21 +999,17 @@ void handle_io(void) for (;;) { struct domain *d, *n; - int max_fd = -1; - struct timeval timeout; + int poll_timeout; /* timeout in milliseconds */ struct timespec ts; long long now, next_timeout = 0; - FD_ZERO(&readfds); - FD_ZERO(&writefds); + reset_fds(); - FD_SET(xs_fileno(xs), &readfds); - max_fd = MAX(xs_fileno(xs), max_fd); + xs_pollfd = set_fds(xs_fileno(xs), POLLIN|POLLPRI); - if (log_hv) { - FD_SET(xc_evtchn_fd(xce_handle), &readfds); - max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd); - } + if (log_hv) + xce_pollfd = set_fds(xc_evtchn_fd(xce_handle), + POLLIN|POLLPRI); if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) return; @@ -982,10 +1018,12 @@ void handle_io(void) /* Re-calculate any event counter allowances & unblock domains with new allowance */ for (d = dom_head; d; d = d->next) { - /* Add 5ms of fuzz since select() often returns - a couple of ms sooner than requested. Without - the fuzz we typically do an extra spin in select() - with a 1/2 ms timeout every other iteration */ + /* CS 16257:955ee4fa1345 introduces a 5ms fuzz + * for select(), it is not clear poll() has + * similar behavior (returning a couple of ms + * sooner than requested) as well. Just leave + * the fuzz here. Remove it with a separate + * patch if necessary */ if ((now+5) > d->next_period) { d->next_period = now + RATE_LIMIT_PERIOD; if (d->event_count >= RATE_LIMIT_ALLOWANCE) { @@ -1006,75 +1044,101 @@ void handle_io(void) !d->buffer.max_capacity || d->buffer.size < d->buffer.max_capacity) { int evtchn_fd = xc_evtchn_fd(d->xce_handle); - FD_SET(evtchn_fd, &readfds); - max_fd = MAX(evtchn_fd, max_fd); + d->xce_pollfd = set_fds(evtchn_fd, + POLLIN|POLLPRI); } } if (d->master_fd != -1) { + short events = 0; if (!d->is_dead && ring_free_bytes(d)) - FD_SET(d->master_fd, &readfds); + events |= POLLIN; if (!buffer_empty(&d->buffer)) - FD_SET(d->master_fd, &writefds); - max_fd = MAX(d->master_fd, max_fd); + events |= POLLOUT; + + if (events) + d->master_pollfd + set_fds(d->master_fd, + events|POLLPRI); } } /* If any domain has been rate limited, we need to work - out what timeout to supply to select */ + out what timeout to supply to poll */ if (next_timeout) { long long duration = (next_timeout - now); if (duration <= 0) /* sanity check */ duration = 1; - timeout.tv_sec = duration / 1000; - timeout.tv_usec = ((duration - (timeout.tv_sec * 1000)) - * 1000); + poll_timeout = (int)duration; } - ret = select(max_fd + 1, &readfds, &writefds, 0, - next_timeout ? &timeout : NULL); + ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1); if (log_reload) { handle_log_reload(); log_reload = 0; } - /* Abort if select failed, except for EINTR cases + /* Abort if poll failed, except for EINTR cases which indicate a possible log reload */ if (ret == -1) { if (errno == EINTR) continue; - dolog(LOG_ERR, "Failure in select: %d (%s)", + dolog(LOG_ERR, "Failure in poll: %d (%s)", errno, strerror(errno)); break; } - if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds)) - handle_hv_logs(); + if (log_hv && xce_pollfd) { + if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) { + dolog(LOG_ERR, + "Failure in poll xce_handle: %d (%s)", + errno, strerror(errno)); + break; + } else if (xce_pollfd->revents & POLLIN) + handle_hv_logs(xce_handle); + } if (ret <= 0) continue; - if (FD_ISSET(xs_fileno(xs), &readfds)) - handle_xs(); + if (xs_pollfd) { + if (xs_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) { + dolog(LOG_ERR, + "Failure in poll xs_handle: %d (%s)", + errno, strerror(errno)); + break; + } else if (xs_pollfd->revents & POLLIN) + handle_xs(); + } for (d = dom_head; d; d = n) { n = d->next; if (d->event_count < RATE_LIMIT_ALLOWANCE) { if (d->xce_handle != NULL && - FD_ISSET(xc_evtchn_fd(d->xce_handle), - &readfds)) - handle_ring_read(d); + d->xce_pollfd && + !(d->xce_pollfd->revents & + ~(POLLIN|POLLOUT|POLLPRI)) && + (d->xce_pollfd->revents & + POLLIN)) + handle_ring_read(d); } - if (d->master_fd != -1 && FD_ISSET(d->master_fd, - &readfds)) - handle_tty_read(d); - - if (d->master_fd != -1 && FD_ISSET(d->master_fd, - &writefds)) - handle_tty_write(d); + if (d->master_fd != -1 && d->master_pollfd) { + if (d->master_pollfd->revents & + ~(POLLIN|POLLOUT|POLLPRI)) + domain_handle_broken_tty(d, + domain_is_valid(d->domid)); + else { + if (d->master_pollfd->revents & + POLLIN) + handle_tty_read(d); + if (d->master_pollfd->revents & + POLLOUT) + handle_tty_write(d); + } + } if (d->last_seen != enum_pass) shutdown_domain(d); @@ -1084,15 +1148,14 @@ void handle_io(void) } } + free(fds); + current_array_size = 0; + out: if (log_hv_fd != -1) { close(log_hv_fd); log_hv_fd = -1; } - if (xch) { - xc_interface_close(xch); - xch = 0; - } if (xce_handle != NULL) { xc_evtchn_close(xce_handle); xce_handle = NULL; -- 1.7.10.4
Ian Campbell
2013-Jan-09 11:02 UTC
Re: [PATCH V6] Switch from select() to poll() in xenconsoled''s IO loop
On Wed, 2013-01-09 at 09:50 +0000, Wei Liu wrote:> In Linux select() typically supports up to 1024 file descriptors. This can be > a problem when user tries to boot up many guests. Switching to poll() has > minimum impact on existing code and has better scalibility. > > pollfd array is dynamically allocated / reallocated. If the array fails to > expand, we just ignore the incoming fd. > > Change from V2: > * remove unnecessary malloc in initialize_pollfd_arrays > * use ROUND_UP to get new size of arrays > > Change from V3: > * remove initialize and destroy function for array > * embedded tracking structure in struct domain, eliminate fd_to_pollfd > > Change from V4: > * make xs_pollfd local to io.c > * add back the 5 ms fuzz > * handle POLLERR and POLLHUP > * treat POLLPRI as error if set in revents > * abort if xenconsoled''s own fds get screwed > * handle broken tty if tty''s fds get screwed > > Change from V5: > * make some handles local to handle_ioThe code now resets some of these to -1/NULL on exit from handle_io, which isn''t really necessary but I''m not to bothered about that.> * remove redundant xc interface > > Signed-off-by: Wei Liu <wei.liu2@citrix.com>Acked-by: Ian Campbell <ian.campbell@citrix.com>
Ian Campbell
2013-Jan-11 12:25 UTC
Re: [PATCH V6] Switch from select() to poll() in xenconsoled''s IO loop
On Wed, 2013-01-09 at 11:02 +0000, Ian Campbell wrote:> On Wed, 2013-01-09 at 09:50 +0000, Wei Liu wrote: > > In Linux select() typically supports up to 1024 file descriptors. This can be > > a problem when user tries to boot up many guests. Switching to poll() has > > minimum impact on existing code and has better scalibility. > > > > pollfd array is dynamically allocated / reallocated. If the array fails to > > expand, we just ignore the incoming fd.> Acked-by: Ian Campbell <ian.campbell@citrix.com>And now applied. Ian.
Ian Campbell
2013-Jan-13 10:24 UTC
Re: [PATCH V6] Switch from select() to poll() in xenconsoled''s IO loop
On Fri, 2013-01-11 at 12:25 +0000, Ian Campbell wrote:> On Wed, 2013-01-09 at 11:02 +0000, Ian Campbell wrote: > > On Wed, 2013-01-09 at 09:50 +0000, Wei Liu wrote: > > > In Linux select() typically supports up to 1024 file descriptors. This can be > > > a problem when user tries to boot up many guests. Switching to poll() has > > > minimum impact on existing code and has better scalibility. > > > > > > pollfd array is dynamically allocated / reallocated. If the array fails to > > > expand, we just ignore the incoming fd. > > > Acked-by: Ian Campbell <ian.campbell@citrix.com> > > And now applied.I''m afraid I''ve now reverted this, the bisector <E1Tu7of-0004FF-Mf@woking.cam.xci-test.com> fingered it as responsible for the failures in test flight 14869 onwards. Ian.