Ian Jackson
2012-May-16 15:25 UTC
[PATCH 0/3] libxl: event handling fixes related to console/pygrub Roger Pau Monne <roger.pau@citrix.com>
Thanks to Roger Pau Monne and Ian Campbell for the reports. With these changes, xl create -c works for me again with pygrub. Apparently I had not tested that recently enough; sorry :-/. 1/3 libxl: events: debugging output relating to ao''s 2/3 libxl: Do not use-after-free on ao progress reporting 3/3 libxl, xl: fix bootloader immediate console attach
Ian Jackson
2012-May-16 15:25 UTC
[PATCH 1/3] libxl: events: debugging output relating to ao''s
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> --- tools/libxl/libxl_event.c | 36 ++++++++++++++++++++++++++++++++++-- tools/libxl/libxl_internal.h | 12 ++++++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c index bdbbdd4..7e71a88 100644 --- a/tools/libxl/libxl_event.c +++ b/tools/libxl/libxl_event.c @@ -1006,11 +1006,14 @@ static void egc_run_callbacks(libxl__egc *egc) LIBXL_TAILQ_FOREACH_SAFE(ev, &egc->occurred_for_callback, link, ev_tmp) { LIBXL_TAILQ_REMOVE(&egc->occurred_for_callback, ev, link); + LOG(DEBUG,"event %p callback type=%s", + ev, libxl_event_type_to_string(ev->type)); CTX->event_hooks->event_occurs(CTX->event_hooks_user, ev); } LIBXL_TAILQ_FOREACH_SAFE(aop, &egc->aops_for_callback, entry, aop_tmp) { LIBXL_TAILQ_REMOVE(&egc->aops_for_callback, aop, entry); + LOG(DEBUG,"ao %p: progress report: callback aop=%p", aop->ao, aop); aop->how->callback(CTX, aop->ev, aop->how->for_callback); CTX_LOCK; @@ -1023,6 +1026,7 @@ static void egc_run_callbacks(libxl__egc *egc) LIBXL_TAILQ_FOREACH_SAFE(ao, &egc->aos_for_callback, entry_for_callback, ao_tmp) { LIBXL_TAILQ_REMOVE(&egc->aos_for_callback, ao, entry_for_callback); + LOG(DEBUG,"ao %p: completion callback", ao); ao->how.callback(CTX, ao->rc, ao->how.u.for_callback); CTX_LOCK; ao->notified = 1; @@ -1382,7 +1386,9 @@ int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r, void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao) { + AO_GC; if (!ao) return; + LOG(DEBUG,"ao %p: destroy",ao); if (ao->poller) libxl__poller_put(ctx, ao->poller); ao->magic = LIBXL__AO_MAGIC_DESTROYED; libxl__free_all(&ao->gc); @@ -1392,6 +1398,7 @@ void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao) void libxl__ao_abort(libxl__ao *ao) { AO_GC; + LOG(DEBUG,"ao %p: abort",ao); assert(ao->magic == LIBXL__AO_MAGIC); assert(ao->in_initiator); assert(!ao->complete); @@ -1408,6 +1415,8 @@ libxl__gc *libxl__ao_inprogress_gc(libxl__ao *ao) void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc) { + AO_GC; + LOG(DEBUG,"ao %p: complete, rc=%d",ao,rc); assert(ao->magic == LIBXL__AO_MAGIC); assert(!ao->complete); ao->complete = 1; @@ -1437,6 +1446,8 @@ void libxl__ao_complete_check_progress_reports(libxl__egc *egc, libxl__ao *ao) /* don''t bother with this if we''re not in the event loop */ libxl__poller_wakeup(egc, ao->poller); } else if (ao->how.callback) { + AO_GC; + LOG(DEBUG,"ao %p: complete for callback",ao); LIBXL_TAILQ_INSERT_TAIL(&egc->aos_for_callback, ao, entry_for_callback); } else { libxl_event *ev; @@ -1453,7 +1464,8 @@ void libxl__ao_complete_check_progress_reports(libxl__egc *egc, libxl__ao *ao) } libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t domid, - const libxl_asyncop_how *how) + const libxl_asyncop_how *how, + const char *file, int line, const char *func) { libxl__ao *ao; @@ -1473,6 +1485,10 @@ libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t domid, ao->poller = libxl__poller_get(ctx); if (!ao->poller) goto out; } + libxl__log(ctx,XTL_DEBUG,-1,file,line,func, + "ao %p: create: how=%p callback=%p poller=%p", + ao, how, ao->how.callback, ao->poller); + return ao; out: @@ -1481,7 +1497,8 @@ libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t domid, } -int libxl__ao_inprogress(libxl__ao *ao) +int libxl__ao_inprogress(libxl__ao *ao, + const char *file, int line, const char *func) { AO_GC; int rc; @@ -1491,6 +1508,14 @@ int libxl__ao_inprogress(libxl__ao *ao) assert(ao->in_initiator); ao->constructing = 0; + libxl__log(CTX,XTL_DEBUG,-1,file,line,func, + "ao %p: inprogress: poller=%p, flags=%s%s%s%s", + ao, ao->poller, + ao->constructing ? "o" : "", + ao->in_initiator ? "i" : "", + ao->complete ? "c" : "", + ao->notified ? "n" : ""); + if (ao->poller) { /* Caller wants it done synchronously. */ /* We use a fresh gc, so that we can free things @@ -1507,6 +1532,8 @@ int libxl__ao_inprogress(libxl__ao *ao) break; } + LOG(DEBUG,"ao %p: not ready, waiting",ao); + rc = eventloop_iteration(&egc,ao->poller); if (rc) { /* Oh dear, this is quite unfortunate. */ @@ -1557,8 +1584,10 @@ void libxl__ao_progress_gethow(libxl_asyncprogress_how *in_state, void libxl__ao_progress_report(libxl__egc *egc, libxl__ao *ao, const libxl_asyncprogress_how *how, libxl_event *ev) { + AO_GC; ev->for_user = how->for_event; if (how->callback == dummy_asyncprogress_callback_ignore) { + LOG(DEBUG,"ao %p: progress report: ignored",ao); /* ignore */ } else if (how->callback) { libxl__aop_occurred *aop = libxl__zalloc(&egc->gc, sizeof(*aop)); @@ -1567,7 +1596,10 @@ void libxl__ao_progress_report(libxl__egc *egc, libxl__ao *ao, aop->ev = ev; aop->how = how; LIBXL_TAILQ_INSERT_TAIL(&egc->aops_for_callback, aop, entry); + LOG(DEBUG,"ao %p: progress report: callback queued aop=%p",ao,aop); } else { + LOG(DEBUG,"ao %p: progress report: event queued ev=%p type=%s", + ao, ev, libxl_event_type_to_string(ev->type)); libxl__event_occurred(egc, ev); } } diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 73b9915..a3a87d8 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1548,14 +1548,16 @@ _hidden void libxl__egc_cleanup(libxl__egc *egc); #define AO_CREATE(ctx, domid, ao_how) \ libxl__ctx_lock(ctx); \ - libxl__ao *ao = libxl__ao_create(ctx, domid, ao_how); \ + libxl__ao *ao = libxl__ao_create(ctx, domid, ao_how, \ + __FILE__, __LINE__, __func__); \ if (!ao) { libxl__ctx_unlock(ctx); return ERROR_NOMEM; } \ libxl__egc egc[1]; LIBXL_INIT_EGC(egc[0],ctx); \ AO_GC; #define AO_INPROGRESS ({ \ libxl_ctx *ao__ctx = libxl__gc_owner(&ao->gc); \ - int ao__rc = libxl__ao_inprogress(ao); \ + int ao__rc = libxl__ao_inprogress(ao, \ + __FILE__, __LINE__, __func__); \ libxl__ctx_unlock(ao__ctx); /* gc is now invalid */ \ EGC_FREE; \ (ao__rc); \ @@ -1581,8 +1583,10 @@ _hidden void libxl__egc_cleanup(libxl__egc *egc); /* All of these MUST be called with the ctx locked. * libxl__ao_inprogress MUST be called with the ctx locked exactly once. */ _hidden libxl__ao *libxl__ao_create(libxl_ctx*, uint32_t domid, - const libxl_asyncop_how*); -_hidden int libxl__ao_inprogress(libxl__ao *ao); /* temporarily unlocks */ + const libxl_asyncop_how*, + const char *file, int line, const char *func); +_hidden int libxl__ao_inprogress(libxl__ao *ao, + const char *file, int line, const char *func); /* temporarily unlocks */ _hidden void libxl__ao_abort(libxl__ao *ao); _hidden void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc); _hidden libxl__gc *libxl__ao_inprogress_gc(libxl__ao *ao); -- 1.7.2.5
Ian Jackson
2012-May-16 15:25 UTC
[PATCH 2/3] libxl: Do not use-after-free on ao progress reporting
We need to call libxl__free_all after egc_run_callbacks since some of the callbacks might be ao progress reports allocated from the egc''s gc. Fixes a segfault in egc_run_callbacks. Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> --- tools/libxl/libxl_event.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c index 7e71a88..d3bb4da 100644 --- a/tools/libxl/libxl_event.c +++ b/tools/libxl/libxl_event.c @@ -1039,9 +1039,9 @@ static void egc_run_callbacks(libxl__egc *egc) void libxl__egc_cleanup(libxl__egc *egc) { EGC_GC; - libxl__free_all(gc); - egc_run_callbacks(egc); + + libxl__free_all(gc); } /* -- 1.7.2.5
Ian Jackson
2012-May-16 15:25 UTC
[PATCH 3/3] libxl, xl: fix bootloader immediate console attach
Fix bugs related to console handling: * In libxl_primary_console_exec, if libxl__domain_type fails, do not abort, but instead log and return an error. This can occur if the domid is invalid. * In xl''s autoconnect_console, rename the ctx formal parameter so that when postfork creates a new ctx and puts it in the global ctx, we don''t end up using the old one (via the formal parameter which has shadowed the global). * In xl''s autoconnect_console, pass the domid from the event to libxl_primary_console_exec, rather than using the global domid (which has not yet been set, since it is only set at completion of the ao). This causes xl create -c to once more work with pygrub. Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> --- tools/libxl/libxl.c | 4 ++++ tools/libxl/xl_cmdimpl.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 4d01cf8..1e0105e 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -1188,6 +1188,10 @@ int libxl_primary_console_exec(libxl_ctx *ctx, uint32_t domid_vm) case LIBXL_DOMAIN_TYPE_PV: rc = libxl_console_exec(ctx, domid_vm, 0, LIBXL_CONSOLE_TYPE_PV); break; + case -1: + LOG(ERROR,"unable to get domain type for domid=%"PRIu32,domid_vm); + rc = ERROR_FAIL; + break; default: abort(); } diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 5fc5cde..9f182c2 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -1457,9 +1457,11 @@ static int freemem(libxl_domain_build_info *b_info) return ERROR_NOMEM; } -static void autoconnect_console(libxl_ctx *ctx, libxl_event *ev, void *priv) +static void autoconnect_console(libxl_ctx *ctx_ignored, + libxl_event *ev, void *priv) { pid_t *pid = priv; + uint32_t bldomid = ev->domid; libxl_event_free(ctx, ev); @@ -1473,7 +1475,7 @@ static void autoconnect_console(libxl_ctx *ctx, libxl_event *ev, void *priv) postfork(); sleep(1); - libxl_primary_console_exec(ctx, domid); + libxl_primary_console_exec(ctx, bldomid); /* Do not return. xl continued in child process */ fprintf(stderr, "Unable to attach console\n"); _exit(1); -- 1.7.2.5
Ian Campbell
2012-May-17 10:46 UTC
Re: [PATCH 1/3] libxl: events: debugging output relating to ao''s
On Wed, 2012-05-16 at 16:25 +0100, Ian Jackson wrote:> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> > --- > tools/libxl/libxl_event.c | 36 ++++++++++++++++++++++++++++++++++-- > tools/libxl/libxl_internal.h | 12 ++++++++---- > 2 files changed, 42 insertions(+), 6 deletions(-) > > diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c > index bdbbdd4..7e71a88 100644 > --- a/tools/libxl/libxl_event.c > +++ b/tools/libxl/libxl_event.c > @@ -1006,11 +1006,14 @@ static void egc_run_callbacks(libxl__egc *egc) > > LIBXL_TAILQ_FOREACH_SAFE(ev, &egc->occurred_for_callback, link, ev_tmp) { > LIBXL_TAILQ_REMOVE(&egc->occurred_for_callback, ev, link); > + LOG(DEBUG,"event %p callback type=%s", > + ev, libxl_event_type_to_string(ev->type)); > CTX->event_hooks->event_occurs(CTX->event_hooks_user, ev); > } > > LIBXL_TAILQ_FOREACH_SAFE(aop, &egc->aops_for_callback, entry, aop_tmp) { > LIBXL_TAILQ_REMOVE(&egc->aops_for_callback, aop, entry); > + LOG(DEBUG,"ao %p: progress report: callback aop=%p", aop->ao, aop); > aop->how->callback(CTX, aop->ev, aop->how->for_callback); > > CTX_LOCK; > @@ -1023,6 +1026,7 @@ static void egc_run_callbacks(libxl__egc *egc) > LIBXL_TAILQ_FOREACH_SAFE(ao, &egc->aos_for_callback, > entry_for_callback, ao_tmp) { > LIBXL_TAILQ_REMOVE(&egc->aos_for_callback, ao, entry_for_callback); > + LOG(DEBUG,"ao %p: completion callback", ao); > ao->how.callback(CTX, ao->rc, ao->how.u.for_callback); > CTX_LOCK; > ao->notified = 1; > @@ -1382,7 +1386,9 @@ int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r, > > void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao) > { > + AO_GC; > if (!ao) return; > + LOG(DEBUG,"ao %p: destroy",ao); > if (ao->poller) libxl__poller_put(ctx, ao->poller); > ao->magic = LIBXL__AO_MAGIC_DESTROYED; > libxl__free_all(&ao->gc); > @@ -1392,6 +1398,7 @@ void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao) > void libxl__ao_abort(libxl__ao *ao) > { > AO_GC; > + LOG(DEBUG,"ao %p: abort",ao); > assert(ao->magic == LIBXL__AO_MAGIC); > assert(ao->in_initiator); > assert(!ao->complete); > @@ -1408,6 +1415,8 @@ libxl__gc *libxl__ao_inprogress_gc(libxl__ao *ao) > > void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc) > { > + AO_GC; > + LOG(DEBUG,"ao %p: complete, rc=%d",ao,rc); > assert(ao->magic == LIBXL__AO_MAGIC); > assert(!ao->complete); > ao->complete = 1; > @@ -1437,6 +1446,8 @@ void libxl__ao_complete_check_progress_reports(libxl__egc *egc, libxl__ao *ao) > /* don''t bother with this if we''re not in the event loop */ > libxl__poller_wakeup(egc, ao->poller); > } else if (ao->how.callback) { > + AO_GC;I''d prefer that we kept these sorts of magic infrastructure macro things at the tops of functions.> @@ -1507,6 +1532,8 @@ int libxl__ao_inprogress(libxl__ao *ao) > break; > } > > + LOG(DEBUG,"ao %p: not ready, waiting",ao);This one is too verbose, with xl -vvv cr -c it spams the pygrub console every second making it unusable. Even before that point there is a good dozen of them printed out. I noticed that very little of this stuff ends up in /var/log/xen/xl-<NAME>.log. I''m not sure why though since it would seem the obvious place for it to go. I guess it happens in the foreground not the daemon? Ian.
Ian Campbell
2012-May-17 10:49 UTC
Re: [PATCH 2/3] libxl: Do not use-after-free on ao progress reporting
On Wed, 2012-05-16 at 16:25 +0100, Ian Jackson wrote:> We need to call libxl__free_all after egc_run_callbacks since some of > the callbacks might be ao progress reports allocated from the egc''s > gc. > > Fixes a segfault in egc_run_callbacks. > > Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>Acked-by: Ian Campbell <ian.campbell@citrix.com>> --- > tools/libxl/libxl_event.c | 4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > > diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c > index 7e71a88..d3bb4da 100644 > --- a/tools/libxl/libxl_event.c > +++ b/tools/libxl/libxl_event.c > @@ -1039,9 +1039,9 @@ static void egc_run_callbacks(libxl__egc *egc) > void libxl__egc_cleanup(libxl__egc *egc) > { > EGC_GC; > - libxl__free_all(gc); > - > egc_run_callbacks(egc); > + > + libxl__free_all(gc); > } > > /*
Ian Campbell
2012-May-17 10:51 UTC
Re: [PATCH 3/3] libxl, xl: fix bootloader immediate console attach
On Wed, 2012-05-16 at 16:25 +0100, Ian Jackson wrote:> Fix bugs related to console handling: > > * In libxl_primary_console_exec, if libxl__domain_type fails, do > not abort, but instead log and return an error. This can occur > if the domid is invalid. > > * In xl''s autoconnect_console, rename the ctx formal parameter so > that when postfork creates a new ctx and puts it in the global ctx, > we don''t end up using the old one (via the formal parameter which > has shadowed the global).I''ll refresh my "xl: remove all local "ctx" variables" after this goes in and resend the remainder (if there is any).> > * In xl''s autoconnect_console, pass the domid from the event > to libxl_primary_console_exec, rather than using the global domid > (which has not yet been set, since it is only set at completion > of the ao). > > This causes xl create -c to once more work with pygrub. > > Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>Acked-by: Ian Campbell <ian.campbell@citrix.com>> --- > tools/libxl/libxl.c | 4 ++++ > tools/libxl/xl_cmdimpl.c | 6 ++++-- > 2 files changed, 8 insertions(+), 2 deletions(-) > > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > index 4d01cf8..1e0105e 100644 > --- a/tools/libxl/libxl.c > +++ b/tools/libxl/libxl.c > @@ -1188,6 +1188,10 @@ int libxl_primary_console_exec(libxl_ctx *ctx, uint32_t domid_vm) > case LIBXL_DOMAIN_TYPE_PV: > rc = libxl_console_exec(ctx, domid_vm, 0, LIBXL_CONSOLE_TYPE_PV); > break; > + case -1: > + LOG(ERROR,"unable to get domain type for domid=%"PRIu32,domid_vm); > + rc = ERROR_FAIL; > + break; > default: > abort(); > } > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index 5fc5cde..9f182c2 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -1457,9 +1457,11 @@ static int freemem(libxl_domain_build_info *b_info) > return ERROR_NOMEM; > } > > -static void autoconnect_console(libxl_ctx *ctx, libxl_event *ev, void *priv) > +static void autoconnect_console(libxl_ctx *ctx_ignored, > + libxl_event *ev, void *priv) > { > pid_t *pid = priv; > + uint32_t bldomid = ev->domid; > > libxl_event_free(ctx, ev); > > @@ -1473,7 +1475,7 @@ static void autoconnect_console(libxl_ctx *ctx, libxl_event *ev, void *priv) > postfork(); > > sleep(1); > - libxl_primary_console_exec(ctx, domid); > + libxl_primary_console_exec(ctx, bldomid); > /* Do not return. xl continued in child process */ > fprintf(stderr, "Unable to attach console\n"); > _exit(1);
Ian Campbell
2012-May-17 13:46 UTC
Re: [PATCH 0/3] libxl: event handling fixes related to console/pygrub Roger Pau Monne <roger.pau@citrix.com>
On Wed, 2012-05-16 at 16:25 +0100, Ian Jackson wrote:> Thanks to Roger Pau Monne and Ian Campbell for the reports. > With these changes, xl create -c works for me again with pygrub. > Apparently I had not tested that recently enough; sorry :-/. > > 1/3 libxl: events: debugging output relating to ao''sI didn''t apply this, it causes too much see my reply to the patch and to <1337090999-15608-1-git-send-email-ian.jackson@eu.citrix.com>> 2/3 libxl: Do not use-after-free on ao progress reporting > 3/3 libxl, xl: fix bootloader immediate console attachI have applied these two, thanks. Ian.
Ian Jackson
2012-May-18 11:11 UTC
Re: [PATCH 1/3] libxl: events: debugging output relating to ao''s
Ian Campbell writes ("Re: [Xen-devel] [PATCH 1/3] libxl: events: debugging output relating to ao''s"):> On Wed, 2012-05-16 at 16:25 +0100, Ian Jackson wrote:...> > @@ -1437,6 +1446,8 @@ void libxl__ao_complete_check_progress_reports(libxl__egc *egc, libxl__ao *ao) > > /* don''t bother with this if we''re not in the event loop */ > > libxl__poller_wakeup(egc, ao->poller); > > } else if (ao->how.callback) { > > + AO_GC; > > I''d prefer that we kept these sorts of magic infrastructure macro things > at the tops of functions.Right. The reason I did that was because I didn''t want to have to reason about the possible validity of the gc throughout this function, which after all is capable of destroying the ao. But actually I don''t need the gc (which is just as well), but only the ctx. The function already has a call to libxl__gc_owner in it. So I have instead moved that call to the top and now there is a ctx which is valid throughout the function.> > @@ -1507,6 +1532,8 @@ int libxl__ao_inprogress(libxl__ao *ao) > > break; > > } > > > > + LOG(DEBUG,"ao %p: not ready, waiting",ao); > > This one is too verbose, with xl -vvv cr -c it spams the pygrub console > every second making it unusable. Even before that point there is a good > dozen of them printed out.I will redo the patches so the debugging is off by default and needs to be enabled at compile-time.> I noticed that very little of this stuff ends up > in /var/log/xen/xl-<NAME>.log. I''m not sure why though since it would > seem the obvious place for it to go. I guess it happens in the > foreground not the daemon?Yes. Arguably -c should redirect the log to the logfile, or perhaps xl should always copy all the log output to the logfile. (But note that xl itself has various output functions which do not use xtl.) Ian.