Rob Hoes
2013-Nov-26 23:15 UTC
[PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Ocaml has a heap lock which must be held whenever ocaml code is running. Ocaml usually drops this lock when it enters a potentially blocking low-level function, such as writing to a file. Libxl has its own lock, which it may acquire when being called. Things get interesting when libxl calls back into ocaml code. There is a risk of ending up in a deadlock when a thread holds both locks at the same time, then temporarily drop the ocaml lock, while another thread calls another libxl function. To avoid deadlocks, we drop the ocaml heap lock before entering libxl, and reacquire it in callbacks to ocaml. This way, the ocaml heap lock is never held together with the libxl lock, except in osevent registration callbacks, and xentoollog callbacks. If we guarantee to not call any libxl functions inside those callbacks, we can avoid deadlocks. This patch handle the dropping and reacquiring of the ocaml heap lock by the caml_enter_blocking_section and caml_leave_blocking_section functions, and related macros. We are also careful to not call any functions that access the ocaml heap while the ocaml heap lock is dropped. This often involves copying ocaml values to C before dropping the ocaml lock. Signed-off-by: Rob Hoes <rob.hoes@citrix.com> CC: Ian Campbell <ian.campbell@citrix.com> CC: Ian Jackson <ian.jackson@eu.citrix.com> CC: Dave Scott <dave.scott@eu.citrix.com> --- tools/ocaml/libs/xentoollog/Makefile | 3 + tools/ocaml/libs/xentoollog/xentoollog_stubs.c | 13 +- tools/ocaml/libs/xl/Makefile | 5 +- tools/ocaml/libs/xl/xenlight_stubs.c | 265 +++++++++++++++++++----- 4 files changed, 229 insertions(+), 57 deletions(-) diff --git a/tools/ocaml/libs/xentoollog/Makefile b/tools/ocaml/libs/xentoollog/Makefile index e535ba5..471b428 100644 --- a/tools/ocaml/libs/xentoollog/Makefile +++ b/tools/ocaml/libs/xentoollog/Makefile @@ -2,6 +2,9 @@ TOPLEVEL=$(CURDIR)/../.. XEN_ROOT=$(TOPLEVEL)/../.. include $(TOPLEVEL)/common.make +# allow mixed declarations and code +CFLAGS += -Wno-declaration-after-statement + CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) OCAMLINCLUDE + diff --git a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c index 3b2f91b..daf48fe 100644 --- a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c +++ b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c @@ -31,6 +31,11 @@ #include "caml_xentoollog.h" +/* The following is equal to the CAMLreturn macro, but without the return */ +#define CAMLdone do{ \ +caml_local_roots = caml__frame; \ +}while (0) + #define XTL ((xentoollog_logger *) Xtl_val(handle)) static char * dup_String_val(value s) @@ -81,6 +86,7 @@ static void stub_xtl_ocaml_vmessage(struct xentoollog_logger *logger, const char *format, va_list al) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 4); struct caml_xtl *xtl = (struct caml_xtl*)logger; @@ -103,7 +109,8 @@ static void stub_xtl_ocaml_vmessage(struct xentoollog_logger *logger, free(msg); caml_callbackN(*func, 4, args); - CAMLreturn0; + CAMLdone; + caml_enter_blocking_section(); } static void stub_xtl_ocaml_progress(struct xentoollog_logger *logger, @@ -111,6 +118,7 @@ static void stub_xtl_ocaml_progress(struct xentoollog_logger *logger, const char *doing_what /* no \r,\n */, int percent, unsigned long done, unsigned long total) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 5); struct caml_xtl *xtl = (struct caml_xtl*)logger; @@ -129,7 +137,8 @@ static void stub_xtl_ocaml_progress(struct xentoollog_logger *logger, args[4] = caml_copy_int64(total); caml_callbackN(*func, 5, args); - CAMLreturn0; + CAMLdone; + caml_enter_blocking_section(); } static void xtl_destroy(struct xentoollog_logger *logger) diff --git a/tools/ocaml/libs/xl/Makefile b/tools/ocaml/libs/xl/Makefile index 0408cc2..61eb44c 100644 --- a/tools/ocaml/libs/xl/Makefile +++ b/tools/ocaml/libs/xl/Makefile @@ -2,8 +2,9 @@ TOPLEVEL=$(CURDIR)/../.. XEN_ROOT=$(TOPLEVEL)/../.. include $(TOPLEVEL)/common.make -# ignore unused generated functions -CFLAGS += -Wno-unused +# ignore unused generated functions and allow mixed declarations and code +CFLAGS += -Wno-unused -Wno-declaration-after-statement + CFLAGS += $(CFLAGS_libxenlight) CFLAGS += -I ../xentoollog diff --git a/tools/ocaml/libs/xl/xenlight_stubs.c b/tools/ocaml/libs/xl/xenlight_stubs.c index 88cca20..50b0325 100644 --- a/tools/ocaml/libs/xl/xenlight_stubs.c +++ b/tools/ocaml/libs/xl/xenlight_stubs.c @@ -34,6 +34,11 @@ #include "caml_xentoollog.h" +/* The following is equal to the CAMLreturn macro, but without the return */ +#define CAMLdone do{ \ +caml_local_roots = caml__frame; \ +}while (0) + #define Ctx_val(x)(*((libxl_ctx **) Data_custom_val(x))) #define CTX ((libxl_ctx *) Ctx_val(ctx)) @@ -374,6 +379,7 @@ static char *String_option_val(value v) void async_callback(libxl_ctx *ctx, int rc, void *for_callback) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocal1(error); int *task = (int *) for_callback; @@ -390,19 +396,22 @@ void async_callback(libxl_ctx *ctx, int rc, void *for_callback) error = Val_some(Val_error(rc)); caml_callback2(*func, error, (value) for_callback); + CAMLdone; + caml_enter_blocking_section(); } -static libxl_asyncop_how *aohow_val(value async, libxl_asyncop_how *ao_how) +static libxl_asyncop_how *aohow_val(value async) { CAMLparam1(async); + libxl_asyncop_how *ao_how = NULL; if (async != Val_none) { + libxl_asyncop_how *ao_how = malloc(sizeof(*ao_how)); ao_how->callback = async_callback; ao_how->u.for_callback = (void *) Some_val(async); - CAMLreturnT(libxl_asyncop_how *, ao_how); } - else - CAMLreturnT(libxl_asyncop_how *, NULL); + + CAMLreturnT(libxl_asyncop_how *, ao_how); } value stub_libxl_domain_create_new(value ctx, value domain_config, value async, value unit) @@ -411,7 +420,7 @@ value stub_libxl_domain_create_new(value ctx, value domain_config, value async, int ret; libxl_domain_config c_dconfig; uint32_t c_domid; - libxl_asyncop_how ao_how; + libxl_asyncop_how *ao_how; libxl_domain_config_init(&c_dconfig); ret = domain_config_val(CTX, &c_dconfig, domain_config); @@ -420,9 +429,14 @@ value stub_libxl_domain_create_new(value ctx, value domain_config, value async, failwith_xl(ret, "domain_create_new"); } - ret = libxl_domain_create_new(CTX, &c_dconfig, &c_domid, - aohow_val(async, &ao_how), NULL); + ao_how = aohow_val(async); + caml_enter_blocking_section(); + ret = libxl_domain_create_new(CTX, &c_dconfig, &c_domid, ao_how, NULL); + caml_leave_blocking_section(); + + if (ao_how) + free(ao_how); libxl_domain_config_dispose(&c_dconfig); if (ret != 0) @@ -439,7 +453,8 @@ value stub_libxl_domain_create_restore(value ctx, value domain_config, value par libxl_domain_config c_dconfig; libxl_domain_restore_params c_params; uint32_t c_domid; - libxl_asyncop_how ao_how; + libxl_asyncop_how *ao_how; + int restore_fd; libxl_domain_config_init(&c_dconfig); ret = domain_config_val(CTX, &c_dconfig, domain_config); @@ -455,9 +470,16 @@ value stub_libxl_domain_create_restore(value ctx, value domain_config, value par failwith_xl(ret, "domain_create_restore"); } - ret = libxl_domain_create_restore(CTX, &c_dconfig, &c_domid, Int_val(Field(params, 0)), - &c_params, aohow_val(async, &ao_how), NULL); + ao_how = aohow_val(async); + restore_fd = Int_val(Field(params, 0)); + + caml_enter_blocking_section(); + ret = libxl_domain_create_restore(CTX, &c_dconfig, &c_domid, restore_fd, + &c_params, ao_how, NULL); + caml_leave_blocking_section(); + if (ao_how) + free(ao_how); libxl_domain_config_dispose(&c_dconfig); libxl_domain_restore_params_dispose(&c_params); @@ -471,8 +493,12 @@ value stub_libxl_domain_shutdown(value ctx, value domid) { CAMLparam2(ctx, domid); int ret; + uint32_t c_domid = Int_val(domid); + + caml_enter_blocking_section(); + ret = libxl_domain_shutdown(CTX, c_domid); + caml_leave_blocking_section(); - ret = libxl_domain_shutdown(CTX, Int_val(domid)); if (ret != 0) failwith_xl(ret, "domain_shutdown"); @@ -483,8 +509,12 @@ value stub_libxl_domain_reboot(value ctx, value domid) { CAMLparam2(ctx, domid); int ret; + uint32_t c_domid = Int_val(domid); + + caml_enter_blocking_section(); + ret = libxl_domain_reboot(CTX, c_domid); + caml_leave_blocking_section(); - ret = libxl_domain_reboot(CTX, Int_val(domid)); if (ret != 0) failwith_xl(ret, "domain_reboot"); @@ -495,9 +525,16 @@ value stub_libxl_domain_destroy(value ctx, value domid, value async, value unit) { CAMLparam4(ctx, domid, async, unit); int ret; - libxl_asyncop_how ao_how; + uint32_t c_domid = Int_val(domid); + libxl_asyncop_how *ao_how = aohow_val(async); + + caml_enter_blocking_section(); + ret = libxl_domain_destroy(CTX, c_domid, ao_how); + caml_leave_blocking_section(); + + if (ao_how) + free(ao_how); - ret = libxl_domain_destroy(CTX, Int_val(domid), aohow_val(async, &ao_how)); if (ret != 0) failwith_xl(ret, "domain_destroy"); @@ -508,10 +545,17 @@ value stub_libxl_domain_suspend(value ctx, value domid, value fd, value async, v { CAMLparam5(ctx, domid, fd, async, unit); int ret; - libxl_asyncop_how ao_how; + uint32_t c_domid = Int_val(domid); + int c_fd = Int_val(fd); + libxl_asyncop_how *ao_how = aohow_val(async); + + caml_enter_blocking_section(); + ret = libxl_domain_suspend(CTX, c_domid, c_fd, 0, ao_how); + caml_leave_blocking_section(); + + if (ao_how) + free(ao_how); - ret = libxl_domain_suspend(CTX, Int_val(domid), Int_val(fd), 0, - aohow_val(async, &ao_how)); if (ret != 0) failwith_xl(ret, "domain_suspend"); @@ -522,8 +566,12 @@ value stub_libxl_domain_pause(value ctx, value domid) { CAMLparam2(ctx, domid); int ret; + uint32_t c_domid = Int_val(domid); + + caml_enter_blocking_section(); + ret = libxl_domain_pause(CTX, c_domid); + caml_leave_blocking_section(); - ret = libxl_domain_pause(CTX, Int_val(domid)); if (ret != 0) failwith_xl(ret, "domain_pause"); @@ -534,8 +582,12 @@ value stub_libxl_domain_unpause(value ctx, value domid) { CAMLparam2(ctx, domid); int ret; + uint32_t c_domid = Int_val(domid); + + caml_enter_blocking_section(); + ret = libxl_domain_unpause(CTX, c_domid); + caml_leave_blocking_section(); - ret = libxl_domain_unpause(CTX, Int_val(domid)); if (ret != 0) failwith_xl(ret, "domain_unpause"); @@ -552,13 +604,17 @@ value stub_xl_device_##type##_##op(value ctx, value info, value domid, \ CAMLparam5(ctx, info, domid, async, unit); \ libxl_device_##type c_info; \ int ret, marker_var; \ - libxl_asyncop_how ao_how; \ + uint32_t c_domid = Int_val(domid); \ + libxl_asyncop_how *ao_how = aohow_val(async); \ \ device_##type##_val(CTX, &c_info, info); \ \ - ret = libxl_##fn##_##op(CTX, Int_val(domid), &c_info, \ - aohow_val(async, &ao_how)); \ + caml_enter_blocking_section(); \ + ret = libxl_##fn##_##op(CTX, c_domid, &c_info, ao_how); \ + caml_leave_blocking_section(); \ \ + if (ao_how) \ + free(ao_how); \ libxl_device_##type##_dispose(&c_info); \ \ if (ret != 0) \ @@ -584,9 +640,16 @@ value stub_xl_device_nic_of_devid(value ctx, value domid, value devid) CAMLparam3(ctx, domid, devid); CAMLlocal1(nic); libxl_device_nic c_nic; - libxl_devid_to_device_nic(CTX, Int_val(domid), Int_val(devid), &c_nic); + uint32_t c_domid = Int_val(domid); + int c_devid = Int_val(devid); + + caml_enter_blocking_section(); + libxl_devid_to_device_nic(CTX, c_domid, c_devid, &c_nic); + caml_leave_blocking_section(); + nic = Val_device_nic(&c_nic); libxl_device_nic_dispose(&c_nic); + CAMLreturn(nic); } @@ -596,11 +659,12 @@ value stub_xl_device_nic_list(value ctx, value domid) CAMLlocal2(list, temp); libxl_device_nic *c_list; int i, nb; - uint32_t c_domid; - - c_domid = Int_val(domid); + uint32_t c_domid = Int_val(domid); + caml_enter_blocking_section(); c_list = libxl_device_nic_list(CTX, c_domid, &nb); + caml_leave_blocking_section(); + if (!c_list) failwith_xl(ERROR_FAIL, "nic_list"); @@ -624,11 +688,12 @@ value stub_xl_device_disk_list(value ctx, value domid) CAMLlocal2(list, temp); libxl_device_disk *c_list; int i, nb; - uint32_t c_domid; - - c_domid = Int_val(domid); + uint32_t c_domid = Int_val(domid); + caml_enter_blocking_section(); c_list = libxl_device_disk_list(CTX, c_domid, &nb); + caml_leave_blocking_section(); + if (!c_list) failwith_xl(ERROR_FAIL, "disk_list"); @@ -651,9 +716,19 @@ value stub_xl_device_disk_of_vdev(value ctx, value domid, value vdev) CAMLparam3(ctx, domid, vdev); CAMLlocal1(disk); libxl_device_disk c_disk; - libxl_vdev_to_device_disk(CTX, Int_val(domid), String_val(vdev), &c_disk); + char *c_vdev; + uint32_t c_domid = Int_val(domid); + + c_vdev = strdup(String_val(vdev)); + + caml_enter_blocking_section(); + libxl_vdev_to_device_disk(CTX, c_domid, c_vdev, &c_disk); + caml_leave_blocking_section(); + disk = Val_device_disk(&c_disk); libxl_device_disk_dispose(&c_disk); + free(c_vdev); + CAMLreturn(disk); } @@ -663,11 +738,12 @@ value stub_xl_device_pci_list(value ctx, value domid) CAMLlocal2(list, temp); libxl_device_pci *c_list; int i, nb; - uint32_t c_domid; - - c_domid = Int_val(domid); + uint32_t c_domid = Int_val(domid); + caml_enter_blocking_section(); c_list = libxl_device_pci_list(CTX, c_domid, &nb); + caml_leave_blocking_section(); + if (!c_list) failwith_xl(ERROR_FAIL, "pci_list"); @@ -690,10 +766,13 @@ value stub_xl_device_pci_assignable_add(value ctx, value info, value rebind) CAMLparam3(ctx, info, rebind); libxl_device_pci c_info; int ret, marker_var; + int c_rebind = (int) Bool_val(rebind); device_pci_val(CTX, &c_info, info); - ret = libxl_device_pci_assignable_add(CTX, &c_info, (int) Bool_val(rebind)); + caml_enter_blocking_section(); + ret = libxl_device_pci_assignable_add(CTX, &c_info, c_rebind); + caml_leave_blocking_section(); libxl_device_pci_dispose(&c_info); @@ -708,10 +787,13 @@ value stub_xl_device_pci_assignable_remove(value ctx, value info, value rebind) CAMLparam3(ctx, info, rebind); libxl_device_pci c_info; int ret, marker_var; + int c_rebind = (int) Bool_val(rebind); device_pci_val(CTX, &c_info, info); - ret = libxl_device_pci_assignable_remove(CTX, &c_info, (int) Bool_val(rebind)); + caml_enter_blocking_section(); + ret = libxl_device_pci_assignable_remove(CTX, &c_info, c_rebind); + caml_leave_blocking_section(); libxl_device_pci_dispose(&c_info); @@ -729,7 +811,10 @@ value stub_xl_device_pci_assignable_list(value ctx) int i, nb; uint32_t c_domid; + caml_enter_blocking_section(); c_list = libxl_device_pci_assignable_list(CTX, &nb); + caml_leave_blocking_section(); + if (!c_list) failwith_xl(ERROR_FAIL, "pci_assignable_list"); @@ -754,7 +839,9 @@ value stub_xl_physinfo_get(value ctx) libxl_physinfo c_physinfo; int ret; + caml_enter_blocking_section(); ret = libxl_get_physinfo(CTX, &c_physinfo); + caml_leave_blocking_section(); if (ret != 0) failwith_xl(ret, "get_physinfo"); @@ -773,7 +860,9 @@ value stub_xl_cputopology_get(value ctx) libxl_cputopology *c_topology; int i, nr; + caml_enter_blocking_section(); c_topology = libxl_get_cpu_topology(CTX, &nr); + caml_leave_blocking_section(); if (!c_topology) failwith_xl(ERROR_FAIL, "get_cpu_topologyinfo"); @@ -801,7 +890,10 @@ value stub_xl_dominfo_list(value ctx) libxl_dominfo *c_domlist; int i, nb; + caml_enter_blocking_section(); c_domlist = libxl_list_domain(CTX, &nb); + caml_leave_blocking_section(); + if (!c_domlist) failwith_xl(ERROR_FAIL, "dominfo_list"); @@ -826,8 +918,12 @@ value stub_xl_dominfo_get(value ctx, value domid) CAMLlocal1(dominfo); libxl_dominfo c_dominfo; int ret; + uint32_t c_domid = Int_val(domid); + + caml_enter_blocking_section(); + ret = libxl_domain_info(CTX, &c_dominfo, c_domid); + caml_leave_blocking_section(); - ret = libxl_domain_info(CTX, &c_dominfo, Int_val(domid)); if (ret != 0) failwith_xl(ERROR_FAIL, "domain_info"); dominfo = Val_dominfo(&c_dominfo); @@ -841,8 +937,12 @@ value stub_xl_domain_sched_params_get(value ctx, value domid) CAMLlocal1(scinfo); libxl_domain_sched_params c_scinfo; int ret; + uint32_t c_domid = Int_val(domid); + + caml_enter_blocking_section(); + ret = libxl_domain_sched_params_get(CTX, c_domid, &c_scinfo); + caml_leave_blocking_section(); - ret = libxl_domain_sched_params_get(CTX, Int_val(domid), &c_scinfo); if (ret != 0) failwith_xl(ret, "domain_sched_params_get"); @@ -858,10 +958,13 @@ value stub_xl_domain_sched_params_set(value ctx, value domid, value scinfo) CAMLparam3(ctx, domid, scinfo); libxl_domain_sched_params c_scinfo; int ret; + uint32_t c_domid = Int_val(domid); domain_sched_params_val(CTX, &c_scinfo, scinfo); - ret = libxl_domain_sched_params_set(CTX, Int_val(domid), &c_scinfo); + caml_enter_blocking_section(); + ret = libxl_domain_sched_params_set(CTX, c_domid, &c_scinfo); + caml_leave_blocking_section(); libxl_domain_sched_params_dispose(&c_scinfo); @@ -875,12 +978,15 @@ value stub_xl_send_trigger(value ctx, value domid, value trigger, value vcpuid) { CAMLparam4(ctx, domid, trigger, vcpuid); int ret; + uint32_t c_domid = Int_val(domid); libxl_trigger c_trigger = LIBXL_TRIGGER_UNKNOWN; + int c_vcpuid = Int_val(vcpuid); trigger_val(CTX, &c_trigger, trigger); - ret = libxl_send_trigger(CTX, Int_val(domid), - c_trigger, Int_val(vcpuid)); + caml_enter_blocking_section(); + ret = libxl_send_trigger(CTX, c_domid, c_trigger, c_vcpuid); + caml_leave_blocking_section(); if (ret != 0) failwith_xl(ret, "send_trigger"); @@ -892,8 +998,12 @@ value stub_xl_send_sysrq(value ctx, value domid, value sysrq) { CAMLparam3(ctx, domid, sysrq); int ret; + uint32_t c_domid = Int_val(domid); + int c_sysrq = Int_val(sysrq); - ret = libxl_send_sysrq(CTX, Int_val(domid), Int_val(sysrq)); + caml_enter_blocking_section(); + ret = libxl_send_sysrq(CTX, c_domid, c_sysrq); + caml_leave_blocking_section(); if (ret != 0) failwith_xl(ret, "send_sysrq"); @@ -909,7 +1019,10 @@ value stub_xl_send_debug_keys(value ctx, value keys) c_keys = dup_String_val(keys); + caml_enter_blocking_section(); ret = libxl_send_debug_keys(CTX, c_keys); + caml_leave_blocking_section(); + free(c_keys); if (ret != 0) @@ -933,9 +1046,12 @@ value stub_libxl_xen_console_read_start(value ctx, value clear) { CAMLparam2(ctx, clear); CAMLlocal1(handle); + int c_clear = Int_val(clear); libxl_xen_console_reader *cr; - cr = libxl_xen_console_read_start(CTX, Int_val(clear)); + caml_enter_blocking_section(); + cr = libxl_xen_console_read_start(CTX, c_clear); + caml_leave_blocking_section(); handle = caml_alloc_custom(&libxl_console_reader_custom_operations, sizeof(cr), 0, 1); Console_reader_val(handle) = cr; @@ -965,7 +1081,9 @@ value stub_libxl_xen_console_read_line(value ctx, value reader) char *c_line; libxl_xen_console_reader *cr = (libxl_xen_console_reader *) Console_reader_val(reader); + caml_enter_blocking_section(); ret = libxl_xen_console_read_line(CTX, cr, &c_line); + caml_leave_blocking_section(); if (ret < 0) failwith_xl(ret, "xen_console_read_line"); @@ -982,7 +1100,9 @@ value stub_libxl_xen_console_read_finish(value ctx, value reader) CAMLparam2(ctx, reader); libxl_xen_console_reader *cr = (libxl_xen_console_reader *) Console_reader_val(reader); + caml_enter_blocking_section(); libxl_xen_console_read_finish(CTX, cr); + caml_leave_blocking_section(); CAMLreturn(Val_unit); } @@ -1074,6 +1194,7 @@ value Val_poll_events(short events) int fd_register(void *user, int fd, void **for_app_registration_out, short events, void *for_libxl) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 4); static value *func = NULL; @@ -1089,12 +1210,15 @@ int fd_register(void *user, int fd, void **for_app_registration_out, args[3] = (value) for_libxl; caml_callbackN(*func, 4, args); - CAMLreturn(0); + CAMLdone; + caml_enter_blocking_section(); + return 0; } int fd_modify(void *user, int fd, void **for_app_registration_update, short events) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 3); static value *func = NULL; @@ -1109,11 +1233,14 @@ int fd_modify(void *user, int fd, void **for_app_registration_update, args[2] = Val_poll_events(events); caml_callbackN(*func, 3, args); - CAMLreturn(0); + CAMLdone; + caml_enter_blocking_section(); + return 0; } void fd_deregister(void *user, int fd, void *for_app_registration) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 2); static value *func = NULL; @@ -1127,12 +1254,14 @@ void fd_deregister(void *user, int fd, void *for_app_registration) args[1] = Val_int(fd); caml_callbackN(*func, 2, args); - CAMLreturn0; + CAMLdone; + caml_enter_blocking_section(); } int timeout_register(void *user, void **for_app_registration_out, struct timeval abs, void *for_libxl) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 4); static value *func = NULL; @@ -1148,12 +1277,15 @@ int timeout_register(void *user, void **for_app_registration_out, args[3] = (value) for_libxl; caml_callbackN(*func, 4, args); - CAMLreturn(0); + CAMLdone; + caml_enter_blocking_section(); + return 0; } int timeout_modify(void *user, void **for_app_registration_update, struct timeval abs) { + caml_leave_blocking_section(); CAMLparam0(); static value *func = NULL; @@ -1163,13 +1295,16 @@ int timeout_modify(void *user, void **for_app_registration_update, } caml_callback(*func, (value) user); - CAMLreturn(0); + CAMLdone; + caml_enter_blocking_section(); + return 0; } void timeout_deregister(void *user, void *for_app_registration) { + caml_leave_blocking_section(); failwith_xl(ERROR_FAIL, "timeout_deregister not yet implemented"); - return; + caml_enter_blocking_section(); } value stub_libxl_osevent_register_hooks(value ctx, value user) @@ -1186,7 +1321,10 @@ value stub_libxl_osevent_register_hooks(value ctx, value user) hooks->timeout_modify = timeout_modify; hooks->timeout_deregister = timeout_deregister; + caml_enter_blocking_section(); libxl_osevent_register_hooks(CTX, hooks, (void *) user); + caml_leave_blocking_section(); + result = caml_alloc(1, Abstract_tag); *((libxl_osevent_hooks **) result) = hooks; @@ -1197,15 +1335,25 @@ value stub_libxl_osevent_occurred_fd(value ctx, value for_libxl, value fd, value events, value revents) { CAMLparam5(ctx, for_libxl, fd, events, revents); - libxl_osevent_occurred_fd(CTX, (void *) for_libxl, Int_val(fd), - Poll_events_val(events), Poll_events_val(revents)); + int c_fd = Int_val(fd); + short c_events = Poll_events_val(events); + short c_revents = Poll_events_val(revents); + + caml_enter_blocking_section(); + libxl_osevent_occurred_fd(CTX, (void *) for_libxl, c_fd, c_events, c_revents); + caml_leave_blocking_section(); + CAMLreturn(Val_unit); } value stub_libxl_osevent_occurred_timeout(value ctx, value for_libxl) { CAMLparam2(ctx, for_libxl); + + caml_enter_blocking_section(); libxl_osevent_occurred_timeout(CTX, (void *) for_libxl); + caml_leave_blocking_section(); + CAMLreturn(Val_unit); } @@ -1216,6 +1364,7 @@ struct user_with_ctx { void event_occurs(void *user, libxl_event *event) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 2); struct user_with_ctx *c_user = (struct user_with_ctx *) user; @@ -1231,12 +1380,14 @@ void event_occurs(void *user, libxl_event *event) libxl_event_free(c_user->ctx, event); caml_callbackN(*func, 2, args); - CAMLreturn0; + CAMLdone; + caml_enter_blocking_section(); } void disaster(void *user, libxl_event_type type, const char *msg, int errnoval) { + caml_leave_blocking_section(); CAMLparam0(); CAMLlocalN(args, 4); struct user_with_ctx *c_user = (struct user_with_ctx *) user; @@ -1253,7 +1404,8 @@ void disaster(void *user, libxl_event_type type, args[3] = Val_int(errnoval); caml_callbackN(*func, 4, args); - CAMLreturn0; + CAMLdone; + caml_enter_blocking_section(); } value stub_libxl_event_register_callbacks(value ctx, value user) @@ -1272,7 +1424,10 @@ value stub_libxl_event_register_callbacks(value ctx, value user) hooks->event_occurs = event_occurs; hooks->disaster = disaster; + caml_enter_blocking_section(); libxl_event_register_callbacks(CTX, hooks, (void *) c_user); + caml_leave_blocking_section(); + result = caml_alloc(1, Abstract_tag); *((libxl_event_hooks **) result) = hooks; @@ -1282,9 +1437,13 @@ value stub_libxl_event_register_callbacks(value ctx, value user) value stub_libxl_evenable_domain_death(value ctx, value domid, value user) { CAMLparam3(ctx, domid, user); + uint32_t c_domid = Int_val(domid); + int c_user = Int_val(user); libxl_evgen_domain_death *evgen_out; - libxl_evenable_domain_death(CTX, Int_val(domid), Int_val(user), &evgen_out); + caml_enter_blocking_section(); + libxl_evenable_domain_death(CTX, c_domid, c_user, &evgen_out); + caml_leave_blocking_section(); CAMLreturn(Val_unit); } -- 1.7.10.4
Dave Scott
2013-Nov-26 23:28 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Thanks, the whole series looks good to me... Acked-by: David Scott <dave.scott@eu.citrix.com>> -----Original Message----- > From: Rob Hoes [mailto:rob.hoes@citrix.com] > Sent: 26 November 2013 11:15 PM > To: xen-devel@lists.xen.org > Cc: Ian Campbell; Ian Jackson; Dave Scott; Rob Hoes; Ian Jackson > Subject: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before > calling into libxl > > Ocaml has a heap lock which must be held whenever ocaml code is running. > Ocaml usually drops this lock when it enters a potentially blocking low-level > function, such as writing to a file. Libxl has its own lock, which it may acquire > when being called. > > Things get interesting when libxl calls back into ocaml code. There is a risk > of ending up in a deadlock when a thread holds both locks at the same time, > then temporarily drop the ocaml lock, while another thread calls another > libxl function. > > To avoid deadlocks, we drop the ocaml heap lock before entering libxl, and > reacquire it in callbacks to ocaml. This way, the ocaml heap lock is never > held together with the libxl lock, except in osevent registration callbacks, > and xentoollog callbacks. If we guarantee to not call any libxl functions > inside those callbacks, we can avoid deadlocks. > > This patch handle the dropping and reacquiring of the ocaml heap lock by > the caml_enter_blocking_section and caml_leave_blocking_section > functions, and related macros. We are also careful to not call any functions > that access the ocaml heap while the ocaml heap lock is dropped. This often > involves copying ocaml values to C before dropping the ocaml lock. > > Signed-off-by: Rob Hoes <rob.hoes@citrix.com> > CC: Ian Campbell <ian.campbell@citrix.com> > CC: Ian Jackson <ian.jackson@eu.citrix.com> > CC: Dave Scott <dave.scott@eu.citrix.com> > --- > tools/ocaml/libs/xentoollog/Makefile | 3 + > tools/ocaml/libs/xentoollog/xentoollog_stubs.c | 13 +- > tools/ocaml/libs/xl/Makefile | 5 +- > tools/ocaml/libs/xl/xenlight_stubs.c | 265 +++++++++++++++++++----- > 4 files changed, 229 insertions(+), 57 deletions(-) > > diff --git a/tools/ocaml/libs/xentoollog/Makefile > b/tools/ocaml/libs/xentoollog/Makefile > index e535ba5..471b428 100644 > --- a/tools/ocaml/libs/xentoollog/Makefile > +++ b/tools/ocaml/libs/xentoollog/Makefile > @@ -2,6 +2,9 @@ TOPLEVEL=$(CURDIR)/../.. > XEN_ROOT=$(TOPLEVEL)/../.. > include $(TOPLEVEL)/common.make > > +# allow mixed declarations and code > +CFLAGS += -Wno-declaration-after-statement > + > CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) OCAMLINCLUDE +> > diff --git a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > index 3b2f91b..daf48fe 100644 > --- a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > +++ b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > @@ -31,6 +31,11 @@ > > #include "caml_xentoollog.h" > > +/* The following is equal to the CAMLreturn macro, but without the > +return */ #define CAMLdone do{ \ caml_local_roots = caml__frame; \ > +}while (0) > + > #define XTL ((xentoollog_logger *) Xtl_val(handle)) > > static char * dup_String_val(value s) > @@ -81,6 +86,7 @@ static void stub_xtl_ocaml_vmessage(struct > xentoollog_logger *logger, > const char *format, > va_list al) > { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 4); > struct caml_xtl *xtl = (struct caml_xtl*)logger; @@ -103,7 +109,8 @@ > static void stub_xtl_ocaml_vmessage(struct xentoollog_logger *logger, > free(msg); > > caml_callbackN(*func, 4, args); > - CAMLreturn0; > + CAMLdone; > + caml_enter_blocking_section(); > } > > static void stub_xtl_ocaml_progress(struct xentoollog_logger *logger, @@ - > 111,6 +118,7 @@ static void stub_xtl_ocaml_progress(struct > xentoollog_logger *logger, > const char *doing_what /* no \r,\n */, > int percent, unsigned long done, unsigned long total) { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 5); > struct caml_xtl *xtl = (struct caml_xtl*)logger; @@ -129,7 +137,8 @@ > static void stub_xtl_ocaml_progress(struct xentoollog_logger *logger, > args[4] = caml_copy_int64(total); > > caml_callbackN(*func, 5, args); > - CAMLreturn0; > + CAMLdone; > + caml_enter_blocking_section(); > } > > static void xtl_destroy(struct xentoollog_logger *logger) diff --git > a/tools/ocaml/libs/xl/Makefile b/tools/ocaml/libs/xl/Makefile index > 0408cc2..61eb44c 100644 > --- a/tools/ocaml/libs/xl/Makefile > +++ b/tools/ocaml/libs/xl/Makefile > @@ -2,8 +2,9 @@ TOPLEVEL=$(CURDIR)/../.. > XEN_ROOT=$(TOPLEVEL)/../.. > include $(TOPLEVEL)/common.make > > -# ignore unused generated functions > -CFLAGS += -Wno-unused > +# ignore unused generated functions and allow mixed declarations and > +code CFLAGS += -Wno-unused -Wno-declaration-after-statement > + > CFLAGS += $(CFLAGS_libxenlight) > CFLAGS += -I ../xentoollog > > diff --git a/tools/ocaml/libs/xl/xenlight_stubs.c > b/tools/ocaml/libs/xl/xenlight_stubs.c > index 88cca20..50b0325 100644 > --- a/tools/ocaml/libs/xl/xenlight_stubs.c > +++ b/tools/ocaml/libs/xl/xenlight_stubs.c > @@ -34,6 +34,11 @@ > > #include "caml_xentoollog.h" > > +/* The following is equal to the CAMLreturn macro, but without the > +return */ #define CAMLdone do{ \ caml_local_roots = caml__frame; \ > +}while (0) > + > #define Ctx_val(x)(*((libxl_ctx **) Data_custom_val(x))) #define CTX > ((libxl_ctx *) Ctx_val(ctx)) > > @@ -374,6 +379,7 @@ static char *String_option_val(value v) > > void async_callback(libxl_ctx *ctx, int rc, void *for_callback) { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocal1(error); > int *task = (int *) for_callback; > @@ -390,19 +396,22 @@ void async_callback(libxl_ctx *ctx, int rc, void > *for_callback) > error = Val_some(Val_error(rc)); > > caml_callback2(*func, error, (value) for_callback); > + CAMLdone; > + caml_enter_blocking_section(); > } > > -static libxl_asyncop_how *aohow_val(value async, libxl_asyncop_how > *ao_how) > +static libxl_asyncop_how *aohow_val(value async) > { > CAMLparam1(async); > + libxl_asyncop_how *ao_how = NULL; > > if (async != Val_none) { > + libxl_asyncop_how *ao_how = malloc(sizeof(*ao_how)); > ao_how->callback = async_callback; > ao_how->u.for_callback = (void *) Some_val(async); > - CAMLreturnT(libxl_asyncop_how *, ao_how); > } > - else > - CAMLreturnT(libxl_asyncop_how *, NULL); > + > + CAMLreturnT(libxl_asyncop_how *, ao_how); > } > > value stub_libxl_domain_create_new(value ctx, value domain_config, > value async, value unit) @@ -411,7 +420,7 @@ value > stub_libxl_domain_create_new(value ctx, value domain_config, value async, > int ret; > libxl_domain_config c_dconfig; > uint32_t c_domid; > - libxl_asyncop_how ao_how; > + libxl_asyncop_how *ao_how; > > libxl_domain_config_init(&c_dconfig); > ret = domain_config_val(CTX, &c_dconfig, domain_config); @@ - > 420,9 +429,14 @@ value stub_libxl_domain_create_new(value ctx, value > domain_config, value async, > failwith_xl(ret, "domain_create_new"); > } > > - ret = libxl_domain_create_new(CTX, &c_dconfig, &c_domid, > - aohow_val(async, &ao_how), NULL); > + ao_how = aohow_val(async); > > + caml_enter_blocking_section(); > + ret = libxl_domain_create_new(CTX, &c_dconfig, &c_domid, ao_how, > NULL); > + caml_leave_blocking_section(); > + > + if (ao_how) > + free(ao_how); > libxl_domain_config_dispose(&c_dconfig); > > if (ret != 0) > @@ -439,7 +453,8 @@ value stub_libxl_domain_create_restore(value ctx, > value domain_config, value par > libxl_domain_config c_dconfig; > libxl_domain_restore_params c_params; > uint32_t c_domid; > - libxl_asyncop_how ao_how; > + libxl_asyncop_how *ao_how; > + int restore_fd; > > libxl_domain_config_init(&c_dconfig); > ret = domain_config_val(CTX, &c_dconfig, domain_config); @@ - > 455,9 +470,16 @@ value stub_libxl_domain_create_restore(value ctx, value > domain_config, value par > failwith_xl(ret, "domain_create_restore"); > } > > - ret = libxl_domain_create_restore(CTX, &c_dconfig, &c_domid, > Int_val(Field(params, 0)), > - &c_params, aohow_val(async, &ao_how), NULL); > + ao_how = aohow_val(async); > + restore_fd = Int_val(Field(params, 0)); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_create_restore(CTX, &c_dconfig, &c_domid, > restore_fd, > + &c_params, ao_how, NULL); > + caml_leave_blocking_section(); > > + if (ao_how) > + free(ao_how); > libxl_domain_config_dispose(&c_dconfig); > libxl_domain_restore_params_dispose(&c_params); > > @@ -471,8 +493,12 @@ value stub_libxl_domain_shutdown(value ctx, value > domid) { > CAMLparam2(ctx, domid); > int ret; > + uint32_t c_domid = Int_val(domid); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_shutdown(CTX, c_domid); > + caml_leave_blocking_section(); > > - ret = libxl_domain_shutdown(CTX, Int_val(domid)); > if (ret != 0) > failwith_xl(ret, "domain_shutdown"); > > @@ -483,8 +509,12 @@ value stub_libxl_domain_reboot(value ctx, value > domid) { > CAMLparam2(ctx, domid); > int ret; > + uint32_t c_domid = Int_val(domid); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_reboot(CTX, c_domid); > + caml_leave_blocking_section(); > > - ret = libxl_domain_reboot(CTX, Int_val(domid)); > if (ret != 0) > failwith_xl(ret, "domain_reboot"); > > @@ -495,9 +525,16 @@ value stub_libxl_domain_destroy(value ctx, value > domid, value async, value unit) { > CAMLparam4(ctx, domid, async, unit); > int ret; > - libxl_asyncop_how ao_how; > + uint32_t c_domid = Int_val(domid); > + libxl_asyncop_how *ao_how = aohow_val(async); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_destroy(CTX, c_domid, ao_how); > + caml_leave_blocking_section(); > + > + if (ao_how) > + free(ao_how); > > - ret = libxl_domain_destroy(CTX, Int_val(domid), aohow_val(async, > &ao_how)); > if (ret != 0) > failwith_xl(ret, "domain_destroy"); > > @@ -508,10 +545,17 @@ value stub_libxl_domain_suspend(value ctx, value > domid, value fd, value async, v { > CAMLparam5(ctx, domid, fd, async, unit); > int ret; > - libxl_asyncop_how ao_how; > + uint32_t c_domid = Int_val(domid); > + int c_fd = Int_val(fd); > + libxl_asyncop_how *ao_how = aohow_val(async); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_suspend(CTX, c_domid, c_fd, 0, ao_how); > + caml_leave_blocking_section(); > + > + if (ao_how) > + free(ao_how); > > - ret = libxl_domain_suspend(CTX, Int_val(domid), Int_val(fd), 0, > - aohow_val(async, &ao_how)); > if (ret != 0) > failwith_xl(ret, "domain_suspend"); > > @@ -522,8 +566,12 @@ value stub_libxl_domain_pause(value ctx, value > domid) { > CAMLparam2(ctx, domid); > int ret; > + uint32_t c_domid = Int_val(domid); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_pause(CTX, c_domid); > + caml_leave_blocking_section(); > > - ret = libxl_domain_pause(CTX, Int_val(domid)); > if (ret != 0) > failwith_xl(ret, "domain_pause"); > > @@ -534,8 +582,12 @@ value stub_libxl_domain_unpause(value ctx, value > domid) { > CAMLparam2(ctx, domid); > int ret; > + uint32_t c_domid = Int_val(domid); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_unpause(CTX, c_domid); > + caml_leave_blocking_section(); > > - ret = libxl_domain_unpause(CTX, Int_val(domid)); > if (ret != 0) > failwith_xl(ret, "domain_unpause"); > > @@ -552,13 +604,17 @@ value stub_xl_device_##type##_##op(value ctx, > value info, value domid, \ > CAMLparam5(ctx, info, domid, async, unit); \ > libxl_device_##type c_info; \ > int ret, marker_var; \ > - libxl_asyncop_how ao_how; \ > + uint32_t c_domid = Int_val(domid); > \ > + libxl_asyncop_how *ao_how = aohow_val(async); > \ > \ > device_##type##_val(CTX, &c_info, info); \ > \ > - ret = libxl_##fn##_##op(CTX, Int_val(domid), &c_info, \ > - aohow_val(async, &ao_how)); \ > + caml_enter_blocking_section(); > \ > + ret = libxl_##fn##_##op(CTX, c_domid, &c_info, ao_how); > \ > + caml_leave_blocking_section(); > \ > \ > + if (ao_how) \ > + free(ao_how); \ > libxl_device_##type##_dispose(&c_info); > \ > \ > if (ret != 0) \ > @@ -584,9 +640,16 @@ value stub_xl_device_nic_of_devid(value ctx, value > domid, value devid) > CAMLparam3(ctx, domid, devid); > CAMLlocal1(nic); > libxl_device_nic c_nic; > - libxl_devid_to_device_nic(CTX, Int_val(domid), Int_val(devid), > &c_nic); > + uint32_t c_domid = Int_val(domid); > + int c_devid = Int_val(devid); > + > + caml_enter_blocking_section(); > + libxl_devid_to_device_nic(CTX, c_domid, c_devid, &c_nic); > + caml_leave_blocking_section(); > + > nic = Val_device_nic(&c_nic); > libxl_device_nic_dispose(&c_nic); > + > CAMLreturn(nic); > } > > @@ -596,11 +659,12 @@ value stub_xl_device_nic_list(value ctx, value > domid) > CAMLlocal2(list, temp); > libxl_device_nic *c_list; > int i, nb; > - uint32_t c_domid; > - > - c_domid = Int_val(domid); > + uint32_t c_domid = Int_val(domid); > > + caml_enter_blocking_section(); > c_list = libxl_device_nic_list(CTX, c_domid, &nb); > + caml_leave_blocking_section(); > + > if (!c_list) > failwith_xl(ERROR_FAIL, "nic_list"); > > @@ -624,11 +688,12 @@ value stub_xl_device_disk_list(value ctx, value > domid) > CAMLlocal2(list, temp); > libxl_device_disk *c_list; > int i, nb; > - uint32_t c_domid; > - > - c_domid = Int_val(domid); > + uint32_t c_domid = Int_val(domid); > > + caml_enter_blocking_section(); > c_list = libxl_device_disk_list(CTX, c_domid, &nb); > + caml_leave_blocking_section(); > + > if (!c_list) > failwith_xl(ERROR_FAIL, "disk_list"); > > @@ -651,9 +716,19 @@ value stub_xl_device_disk_of_vdev(value ctx, value > domid, value vdev) > CAMLparam3(ctx, domid, vdev); > CAMLlocal1(disk); > libxl_device_disk c_disk; > - libxl_vdev_to_device_disk(CTX, Int_val(domid), String_val(vdev), > &c_disk); > + char *c_vdev; > + uint32_t c_domid = Int_val(domid); > + > + c_vdev = strdup(String_val(vdev)); > + > + caml_enter_blocking_section(); > + libxl_vdev_to_device_disk(CTX, c_domid, c_vdev, &c_disk); > + caml_leave_blocking_section(); > + > disk = Val_device_disk(&c_disk); > libxl_device_disk_dispose(&c_disk); > + free(c_vdev); > + > CAMLreturn(disk); > } > > @@ -663,11 +738,12 @@ value stub_xl_device_pci_list(value ctx, value > domid) > CAMLlocal2(list, temp); > libxl_device_pci *c_list; > int i, nb; > - uint32_t c_domid; > - > - c_domid = Int_val(domid); > + uint32_t c_domid = Int_val(domid); > > + caml_enter_blocking_section(); > c_list = libxl_device_pci_list(CTX, c_domid, &nb); > + caml_leave_blocking_section(); > + > if (!c_list) > failwith_xl(ERROR_FAIL, "pci_list"); > > @@ -690,10 +766,13 @@ value stub_xl_device_pci_assignable_add(value > ctx, value info, value rebind) > CAMLparam3(ctx, info, rebind); > libxl_device_pci c_info; > int ret, marker_var; > + int c_rebind = (int) Bool_val(rebind); > > device_pci_val(CTX, &c_info, info); > > - ret = libxl_device_pci_assignable_add(CTX, &c_info, (int) > Bool_val(rebind)); > + caml_enter_blocking_section(); > + ret = libxl_device_pci_assignable_add(CTX, &c_info, c_rebind); > + caml_leave_blocking_section(); > > libxl_device_pci_dispose(&c_info); > > @@ -708,10 +787,13 @@ value > stub_xl_device_pci_assignable_remove(value ctx, value info, value rebind) > CAMLparam3(ctx, info, rebind); > libxl_device_pci c_info; > int ret, marker_var; > + int c_rebind = (int) Bool_val(rebind); > > device_pci_val(CTX, &c_info, info); > > - ret = libxl_device_pci_assignable_remove(CTX, &c_info, (int) > Bool_val(rebind)); > + caml_enter_blocking_section(); > + ret = libxl_device_pci_assignable_remove(CTX, &c_info, c_rebind); > + caml_leave_blocking_section(); > > libxl_device_pci_dispose(&c_info); > > @@ -729,7 +811,10 @@ value stub_xl_device_pci_assignable_list(value ctx) > int i, nb; > uint32_t c_domid; > > + caml_enter_blocking_section(); > c_list = libxl_device_pci_assignable_list(CTX, &nb); > + caml_leave_blocking_section(); > + > if (!c_list) > failwith_xl(ERROR_FAIL, "pci_assignable_list"); > > @@ -754,7 +839,9 @@ value stub_xl_physinfo_get(value ctx) > libxl_physinfo c_physinfo; > int ret; > > + caml_enter_blocking_section(); > ret = libxl_get_physinfo(CTX, &c_physinfo); > + caml_leave_blocking_section(); > > if (ret != 0) > failwith_xl(ret, "get_physinfo"); > @@ -773,7 +860,9 @@ value stub_xl_cputopology_get(value ctx) > libxl_cputopology *c_topology; > int i, nr; > > + caml_enter_blocking_section(); > c_topology = libxl_get_cpu_topology(CTX, &nr); > + caml_leave_blocking_section(); > > if (!c_topology) > failwith_xl(ERROR_FAIL, "get_cpu_topologyinfo"); @@ - > 801,7 +890,10 @@ value stub_xl_dominfo_list(value ctx) > libxl_dominfo *c_domlist; > int i, nb; > > + caml_enter_blocking_section(); > c_domlist = libxl_list_domain(CTX, &nb); > + caml_leave_blocking_section(); > + > if (!c_domlist) > failwith_xl(ERROR_FAIL, "dominfo_list"); > > @@ -826,8 +918,12 @@ value stub_xl_dominfo_get(value ctx, value domid) > CAMLlocal1(dominfo); > libxl_dominfo c_dominfo; > int ret; > + uint32_t c_domid = Int_val(domid); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_info(CTX, &c_dominfo, c_domid); > + caml_leave_blocking_section(); > > - ret = libxl_domain_info(CTX, &c_dominfo, Int_val(domid)); > if (ret != 0) > failwith_xl(ERROR_FAIL, "domain_info"); > dominfo = Val_dominfo(&c_dominfo); > @@ -841,8 +937,12 @@ value stub_xl_domain_sched_params_get(value ctx, > value domid) > CAMLlocal1(scinfo); > libxl_domain_sched_params c_scinfo; > int ret; > + uint32_t c_domid = Int_val(domid); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_sched_params_get(CTX, c_domid, &c_scinfo); > + caml_leave_blocking_section(); > > - ret = libxl_domain_sched_params_get(CTX, Int_val(domid), > &c_scinfo); > if (ret != 0) > failwith_xl(ret, "domain_sched_params_get"); > > @@ -858,10 +958,13 @@ value stub_xl_domain_sched_params_set(value > ctx, value domid, value scinfo) > CAMLparam3(ctx, domid, scinfo); > libxl_domain_sched_params c_scinfo; > int ret; > + uint32_t c_domid = Int_val(domid); > > domain_sched_params_val(CTX, &c_scinfo, scinfo); > > - ret = libxl_domain_sched_params_set(CTX, Int_val(domid), > &c_scinfo); > + caml_enter_blocking_section(); > + ret = libxl_domain_sched_params_set(CTX, c_domid, &c_scinfo); > + caml_leave_blocking_section(); > > libxl_domain_sched_params_dispose(&c_scinfo); > > @@ -875,12 +978,15 @@ value stub_xl_send_trigger(value ctx, value domid, > value trigger, value vcpuid) { > CAMLparam4(ctx, domid, trigger, vcpuid); > int ret; > + uint32_t c_domid = Int_val(domid); > libxl_trigger c_trigger = LIBXL_TRIGGER_UNKNOWN; > + int c_vcpuid = Int_val(vcpuid); > > trigger_val(CTX, &c_trigger, trigger); > > - ret = libxl_send_trigger(CTX, Int_val(domid), > - c_trigger, Int_val(vcpuid)); > + caml_enter_blocking_section(); > + ret = libxl_send_trigger(CTX, c_domid, c_trigger, c_vcpuid); > + caml_leave_blocking_section(); > > if (ret != 0) > failwith_xl(ret, "send_trigger"); > @@ -892,8 +998,12 @@ value stub_xl_send_sysrq(value ctx, value domid, > value sysrq) { > CAMLparam3(ctx, domid, sysrq); > int ret; > + uint32_t c_domid = Int_val(domid); > + int c_sysrq = Int_val(sysrq); > > - ret = libxl_send_sysrq(CTX, Int_val(domid), Int_val(sysrq)); > + caml_enter_blocking_section(); > + ret = libxl_send_sysrq(CTX, c_domid, c_sysrq); > + caml_leave_blocking_section(); > > if (ret != 0) > failwith_xl(ret, "send_sysrq"); > @@ -909,7 +1019,10 @@ value stub_xl_send_debug_keys(value ctx, value > keys) > > c_keys = dup_String_val(keys); > > + caml_enter_blocking_section(); > ret = libxl_send_debug_keys(CTX, c_keys); > + caml_leave_blocking_section(); > + > free(c_keys); > > if (ret != 0) > @@ -933,9 +1046,12 @@ value stub_libxl_xen_console_read_start(value ctx, > value clear) { > CAMLparam2(ctx, clear); > CAMLlocal1(handle); > + int c_clear = Int_val(clear); > libxl_xen_console_reader *cr; > > - cr = libxl_xen_console_read_start(CTX, Int_val(clear)); > + caml_enter_blocking_section(); > + cr = libxl_xen_console_read_start(CTX, c_clear); > + caml_leave_blocking_section(); > > handle > caml_alloc_custom(&libxl_console_reader_custom_operations, sizeof(cr), 0, > 1); > Console_reader_val(handle) = cr; > @@ -965,7 +1081,9 @@ value stub_libxl_xen_console_read_line(value ctx, > value reader) > char *c_line; > libxl_xen_console_reader *cr = (libxl_xen_console_reader *) > Console_reader_val(reader); > > + caml_enter_blocking_section(); > ret = libxl_xen_console_read_line(CTX, cr, &c_line); > + caml_leave_blocking_section(); > > if (ret < 0) > failwith_xl(ret, "xen_console_read_line"); @@ -982,7 > +1100,9 @@ value stub_libxl_xen_console_read_finish(value ctx, value > reader) > CAMLparam2(ctx, reader); > libxl_xen_console_reader *cr = (libxl_xen_console_reader *) > Console_reader_val(reader); > > + caml_enter_blocking_section(); > libxl_xen_console_read_finish(CTX, cr); > + caml_leave_blocking_section(); > > CAMLreturn(Val_unit); > } > @@ -1074,6 +1194,7 @@ value Val_poll_events(short events) int > fd_register(void *user, int fd, void **for_app_registration_out, > short events, void *for_libxl) { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 4); > static value *func = NULL; > @@ -1089,12 +1210,15 @@ int fd_register(void *user, int fd, void > **for_app_registration_out, > args[3] = (value) for_libxl; > > caml_callbackN(*func, 4, args); > - CAMLreturn(0); > + CAMLdone; > + caml_enter_blocking_section(); > + return 0; > } > > int fd_modify(void *user, int fd, void **for_app_registration_update, > short events) > { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 3); > static value *func = NULL; > @@ -1109,11 +1233,14 @@ int fd_modify(void *user, int fd, void > **for_app_registration_update, > args[2] = Val_poll_events(events); > > caml_callbackN(*func, 3, args); > - CAMLreturn(0); > + CAMLdone; > + caml_enter_blocking_section(); > + return 0; > } > > void fd_deregister(void *user, int fd, void *for_app_registration) { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 2); > static value *func = NULL; > @@ -1127,12 +1254,14 @@ void fd_deregister(void *user, int fd, void > *for_app_registration) > args[1] = Val_int(fd); > > caml_callbackN(*func, 2, args); > - CAMLreturn0; > + CAMLdone; > + caml_enter_blocking_section(); > } > > int timeout_register(void *user, void **for_app_registration_out, > struct timeval abs, void *for_libxl) { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 4); > static value *func = NULL; > @@ -1148,12 +1277,15 @@ int timeout_register(void *user, void > **for_app_registration_out, > args[3] = (value) for_libxl; > > caml_callbackN(*func, 4, args); > - CAMLreturn(0); > + CAMLdone; > + caml_enter_blocking_section(); > + return 0; > } > > int timeout_modify(void *user, void **for_app_registration_update, > struct timeval abs) { > + caml_leave_blocking_section(); > CAMLparam0(); > static value *func = NULL; > > @@ -1163,13 +1295,16 @@ int timeout_modify(void *user, void > **for_app_registration_update, > } > > caml_callback(*func, (value) user); > - CAMLreturn(0); > + CAMLdone; > + caml_enter_blocking_section(); > + return 0; > } > > void timeout_deregister(void *user, void *for_app_registration) { > + caml_leave_blocking_section(); > failwith_xl(ERROR_FAIL, "timeout_deregister not yet > implemented"); > - return; > + caml_enter_blocking_section(); > } > > value stub_libxl_osevent_register_hooks(value ctx, value user) @@ -1186,7 > +1321,10 @@ value stub_libxl_osevent_register_hooks(value ctx, value user) > hooks->timeout_modify = timeout_modify; > hooks->timeout_deregister = timeout_deregister; > > + caml_enter_blocking_section(); > libxl_osevent_register_hooks(CTX, hooks, (void *) user); > + caml_leave_blocking_section(); > + > result = caml_alloc(1, Abstract_tag); > *((libxl_osevent_hooks **) result) = hooks; > > @@ -1197,15 +1335,25 @@ value stub_libxl_osevent_occurred_fd(value ctx, > value for_libxl, value fd, > value events, value revents) > { > CAMLparam5(ctx, for_libxl, fd, events, revents); > - libxl_osevent_occurred_fd(CTX, (void *) for_libxl, Int_val(fd), > - Poll_events_val(events), Poll_events_val(revents)); > + int c_fd = Int_val(fd); > + short c_events = Poll_events_val(events); > + short c_revents = Poll_events_val(revents); > + > + caml_enter_blocking_section(); > + libxl_osevent_occurred_fd(CTX, (void *) for_libxl, c_fd, c_events, > c_revents); > + caml_leave_blocking_section(); > + > CAMLreturn(Val_unit); > } > > value stub_libxl_osevent_occurred_timeout(value ctx, value for_libxl) { > CAMLparam2(ctx, for_libxl); > + > + caml_enter_blocking_section(); > libxl_osevent_occurred_timeout(CTX, (void *) for_libxl); > + caml_leave_blocking_section(); > + > CAMLreturn(Val_unit); > } > > @@ -1216,6 +1364,7 @@ struct user_with_ctx { > > void event_occurs(void *user, libxl_event *event) { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 2); > struct user_with_ctx *c_user = (struct user_with_ctx *) user; @@ - > 1231,12 +1380,14 @@ void event_occurs(void *user, libxl_event *event) > libxl_event_free(c_user->ctx, event); > > caml_callbackN(*func, 2, args); > - CAMLreturn0; > + CAMLdone; > + caml_enter_blocking_section(); > } > > void disaster(void *user, libxl_event_type type, > const char *msg, int errnoval) { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 4); > struct user_with_ctx *c_user = (struct user_with_ctx *) user; @@ - > 1253,7 +1404,8 @@ void disaster(void *user, libxl_event_type type, > args[3] = Val_int(errnoval); > > caml_callbackN(*func, 4, args); > - CAMLreturn0; > + CAMLdone; > + caml_enter_blocking_section(); > } > > value stub_libxl_event_register_callbacks(value ctx, value user) @@ - > 1272,7 +1424,10 @@ value stub_libxl_event_register_callbacks(value ctx, > value user) > hooks->event_occurs = event_occurs; > hooks->disaster = disaster; > > + caml_enter_blocking_section(); > libxl_event_register_callbacks(CTX, hooks, (void *) c_user); > + caml_leave_blocking_section(); > + > result = caml_alloc(1, Abstract_tag); > *((libxl_event_hooks **) result) = hooks; > > @@ -1282,9 +1437,13 @@ value stub_libxl_event_register_callbacks(value > ctx, value user) value stub_libxl_evenable_domain_death(value ctx, value > domid, value user) { > CAMLparam3(ctx, domid, user); > + uint32_t c_domid = Int_val(domid); > + int c_user = Int_val(user); > libxl_evgen_domain_death *evgen_out; > > - libxl_evenable_domain_death(CTX, Int_val(domid), Int_val(user), > &evgen_out); > + caml_enter_blocking_section(); > + libxl_evenable_domain_death(CTX, c_domid, c_user, &evgen_out); > + caml_leave_blocking_section(); > > CAMLreturn(Val_unit); > } > -- > 1.7.10.4
Ian Campbell
2013-Nov-27 11:43 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
If you resent a single patch can you use "git send-email --in-reply-to" to chain it into the original thread so I don''t loose it when I come to apply. Thanks. On Tue, 2013-11-26 at 23:15 +0000, Rob Hoes wrote:> Ocaml has a heap lock which must be held whenever ocaml code is running. Ocaml > usually drops this lock when it enters a potentially blocking low-level > function, such as writing to a file. Libxl has its own lock, which it may > acquire when being called. > > Things get interesting when libxl calls back into ocaml code. There is a risk > of ending up in a deadlock when a thread holds both locks at the same time, > then temporarily drop the ocaml lock, while another thread calls another libxl > function. > > To avoid deadlocks, we drop the ocaml heap lock before entering libxl, and > reacquire it in callbacks to ocaml. This way, the ocaml heap lock is never held > together with the libxl lock, except in osevent registration callbacks, and > xentoollog callbacks. If we guarantee to not call any libxl functions inside > those callbacks, we can avoid deadlocks. > > This patch handle the dropping and reacquiring of the ocaml heap lock by the > caml_enter_blocking_section and caml_leave_blocking_section functions, and > related macros. We are also careful to not call any functions that access the > ocaml heap while the ocaml heap lock is dropped. This often involves copying > ocaml values to C before dropping the ocaml lock. > > Signed-off-by: Rob Hoes <rob.hoes@citrix.com> > CC: Ian Campbell <ian.campbell@citrix.com> > CC: Ian Jackson <ian.jackson@eu.citrix.com> > CC: Dave Scott <dave.scott@eu.citrix.com> > --- > tools/ocaml/libs/xentoollog/Makefile | 3 + > tools/ocaml/libs/xentoollog/xentoollog_stubs.c | 13 +- > tools/ocaml/libs/xl/Makefile | 5 +- > tools/ocaml/libs/xl/xenlight_stubs.c | 265 +++++++++++++++++++----- > 4 files changed, 229 insertions(+), 57 deletions(-) > > diff --git a/tools/ocaml/libs/xentoollog/Makefile b/tools/ocaml/libs/xentoollog/Makefile > index e535ba5..471b428 100644 > --- a/tools/ocaml/libs/xentoollog/Makefile > +++ b/tools/ocaml/libs/xentoollog/Makefile > @@ -2,6 +2,9 @@ TOPLEVEL=$(CURDIR)/../.. > XEN_ROOT=$(TOPLEVEL)/../.. > include $(TOPLEVEL)/common.make > > +# allow mixed declarations and code > +CFLAGS += -Wno-declaration-after-statement > + > CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) > OCAMLINCLUDE +> > diff --git a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > index 3b2f91b..daf48fe 100644 > --- a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > +++ b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > @@ -31,6 +31,11 @@ > > #include "caml_xentoollog.h" > > +/* The following is equal to the CAMLreturn macro, but without the return */ > +#define CAMLdone do{ \ > +caml_local_roots = caml__frame; \ > +}while (0) > + > #define XTL ((xentoollog_logger *) Xtl_val(handle)) > > static char * dup_String_val(value s) > @@ -81,6 +86,7 @@ static void stub_xtl_ocaml_vmessage(struct xentoollog_logger *logger, > const char *format, > va_list al) > { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocalN(args, 4); > struct caml_xtl *xtl = (struct caml_xtl*)logger; > @@ -103,7 +109,8 @@ static void stub_xtl_ocaml_vmessage(struct xentoollog_logger *logger, > free(msg); > > caml_callbackN(*func, 4, args); > - CAMLreturn0; > + CAMLdone; > + caml_enter_blocking_section(); > }So the concern here is that CAMLreturn0/CAMLparam0 might call into ocaml heap code, and trigger the gc and therefore need the lock? is that right? I might have been tempted to wrap each callback in a macro/function which ensured the locking was appropriate rather than enabling -Wno-declaration-after-statement but this works too. We use no-declaration-after-statement in libxl itself so I have no objection to it. The dropping/taking of the lock is a bit repetitive as well. As a future cleanup you might consider #define LIBXL(call) ({ \ int libxl_retval;\ caml_leave...();\ libxl_retval = call;\ caml_enter...();\ libxl_retval; }) foo = LIBXL( libxl_do_something(CTX) ); I dunno, maybe that''s not cleaner. Or s/LIBXL/CAML_UNLOCKED/ perhaps is more expressive. Some of those ";" might need to be ",".> -static libxl_asyncop_how *aohow_val(value async, libxl_asyncop_how *ao_how) > +static libxl_asyncop_how *aohow_val(value async) > { > CAMLparam1(async); > + libxl_asyncop_how *ao_how = NULL; > > if (async != Val_none) { > + libxl_asyncop_how *ao_how = malloc(sizeof(*ao_how)); > ao_how->callback = async_callback; > ao_how->u.for_callback = (void *) Some_val(async); > - CAMLreturnT(libxl_asyncop_how *, ao_how); > } > - else > - CAMLreturnT(libxl_asyncop_how *, NULL); > + > + CAMLreturnT(libxl_asyncop_how *, ao_how); > } > > value stub_libxl_domain_create_new(value ctx, value domain_config, value async, value unit) > @@ -411,7 +420,7 @@ value stub_libxl_domain_create_new(value ctx, value domain_config, value async, > int ret; > libxl_domain_config c_dconfig; > uint32_t c_domid; > - libxl_asyncop_how ao_how; > + libxl_asyncop_how *ao_how;These changes seem to have something (subtle) to do with the lifecycle of the ao_how rather than any of the locking stuff which is the meat of this change? Does the ao_how need to be dynamically allocated rather than stack based for some reason? If this relates the locking stuff can we get a description in the commit message please.
Rob Hoes
2013-Nov-27 13:49 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Ian Campbell wrote:> If you resent a single patch can you use "git send-email --in-reply-to" > to chain it into the original thread so I don''t loose it when I come to > apply. Thanks.Ok, will do. [...]> > diff --git a/tools/ocaml/libs/xentoollog/Makefile > > b/tools/ocaml/libs/xentoollog/Makefile > > index e535ba5..471b428 100644 > > --- a/tools/ocaml/libs/xentoollog/Makefile > > +++ b/tools/ocaml/libs/xentoollog/Makefile > > @@ -2,6 +2,9 @@ TOPLEVEL=$(CURDIR)/../.. > > XEN_ROOT=$(TOPLEVEL)/../.. > > include $(TOPLEVEL)/common.make > > > > +# allow mixed declarations and code > > +CFLAGS += -Wno-declaration-after-statement > > + > > CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) OCAMLINCLUDE +> > > > diff --git a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > > b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > > index 3b2f91b..daf48fe 100644 > > --- a/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > > +++ b/tools/ocaml/libs/xentoollog/xentoollog_stubs.c > > @@ -31,6 +31,11 @@ > > > > #include "caml_xentoollog.h" > > > > +/* The following is equal to the CAMLreturn macro, but without the > > +return */ #define CAMLdone do{ \ caml_local_roots = caml__frame; \ > > +}while (0) > > + > > #define XTL ((xentoollog_logger *) Xtl_val(handle)) > > > > static char * dup_String_val(value s) @@ -81,6 +86,7 @@ static void > > stub_xtl_ocaml_vmessage(struct xentoollog_logger *logger, > > const char *format, > > va_list al) > > { > > + caml_leave_blocking_section(); > > CAMLparam0(); > > CAMLlocalN(args, 4); > > struct caml_xtl *xtl = (struct caml_xtl*)logger; @@ -103,7 +109,8 @@ > > static void stub_xtl_ocaml_vmessage(struct xentoollog_logger *logger, > > free(msg); > > > > caml_callbackN(*func, 4, args); > > - CAMLreturn0; > > + CAMLdone; > > + caml_enter_blocking_section(); > > } > > So the concern here is that CAMLreturn0/CAMLparam0 might call into ocaml > heap code, and trigger the gc and therefore need the lock? is that right?Exactly.> I might have been tempted to wrap each callback in a macro/function which > ensured the locking was appropriate rather than enabling -Wno-declaration- > after-statement but this works too.That was another option indeed. It''s just that the return types vary a bit, which makes this slightly more complicated.> We use no-declaration-after-statement in libxl itself so I have no > objection to it.Yes, I copied it from libxl :)> The dropping/taking of the lock is a bit repetitive as well. As a future > cleanup you might consider > #define LIBXL(call) ({ \ > int libxl_retval;\ > caml_leave...();\ > libxl_retval = call;\ > caml_enter...();\ > libxl_retval; > }) > > foo = LIBXL( libxl_do_something(CTX) ); > > I dunno, maybe that''s not cleaner. Or s/LIBXL/CAML_UNLOCKED/ perhaps is > more expressive. Some of those ";" might need to be ",".Yes, I think that makes sense. I''ll keep it in mind.> > -static libxl_asyncop_how *aohow_val(value async, libxl_asyncop_how > > *ao_how) > > +static libxl_asyncop_how *aohow_val(value async) > > { > > CAMLparam1(async); > > + libxl_asyncop_how *ao_how = NULL; > > > > if (async != Val_none) { > > + libxl_asyncop_how *ao_how = malloc(sizeof(*ao_how)); > > ao_how->callback = async_callback; > > ao_how->u.for_callback = (void *) Some_val(async); > > - CAMLreturnT(libxl_asyncop_how *, ao_how); > > } > > - else > > - CAMLreturnT(libxl_asyncop_how *, NULL); > > + > > + CAMLreturnT(libxl_asyncop_how *, ao_how); > > } > > > > value stub_libxl_domain_create_new(value ctx, value domain_config, > > value async, value unit) @@ -411,7 +420,7 @@ value > stub_libxl_domain_create_new(value ctx, value domain_config, value async, > > int ret; > > libxl_domain_config c_dconfig; > > uint32_t c_domid; > > - libxl_asyncop_how ao_how; > > + libxl_asyncop_how *ao_how; > > These changes seem to have something (subtle) to do with the lifecycle of > the ao_how rather than any of the locking stuff which is the meat of this > change? Does the ao_how need to be dynamically allocated rather than stack > based for some reason? > > If this relates the locking stuff can we get a description in the commit > message please.This fall into the "copying ocaml values to C before dropping the ocaml lock" category: it avoids calling the aohow_val function inside the argument list of the libxl functions, which are called without the ocaml lock. I reorganised the aohow_val function a little, just to make it a little nicer to use. The dynamic allocation is not for the locking stuff. Cheers, Rob
Ian Campbell
2013-Nov-27 13:56 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Wed, 2013-11-27 at 13:49 +0000, Rob Hoes wrote:> > > -static libxl_asyncop_how *aohow_val(value async, libxl_asyncop_how > > > *ao_how) > > > +static libxl_asyncop_how *aohow_val(value async) > > > { > > > CAMLparam1(async); > > > + libxl_asyncop_how *ao_how = NULL; > > > > > > if (async != Val_none) { > > > + libxl_asyncop_how *ao_how = malloc(sizeof(*ao_how)); > > > ao_how->callback = async_callback; > > > ao_how->u.for_callback = (void *) Some_val(async); > > > - CAMLreturnT(libxl_asyncop_how *, ao_how); > > > } > > > - else > > > - CAMLreturnT(libxl_asyncop_how *, NULL); > > > + > > > + CAMLreturnT(libxl_asyncop_how *, ao_how); > > > } > > > > > > value stub_libxl_domain_create_new(value ctx, value domain_config, > > > value async, value unit) @@ -411,7 +420,7 @@ value > > stub_libxl_domain_create_new(value ctx, value domain_config, value async, > > > int ret; > > > libxl_domain_config c_dconfig; > > > uint32_t c_domid; > > > - libxl_asyncop_how ao_how; > > > + libxl_asyncop_how *ao_how; > > > > These changes seem to have something (subtle) to do with the lifecycle of > > the ao_how rather than any of the locking stuff which is the meat of this > > change? Does the ao_how need to be dynamically allocated rather than stack > > based for some reason? > > > > If this relates the locking stuff can we get a description in the commit > > message please. > > This fall into the "copying ocaml values to C before dropping the > ocaml lock" category: it avoids calling the aohow_val function inside > the argument list of the libxl functions, which are called without the > ocaml lock.I see, I''d have done it with stack_ao_how and *ao_how as the return value rather than dynamic allocation but OK. Acked-by: Ian Campbell <ian.campbell@citrix.com>
Ian Jackson
2013-Nov-28 16:37 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Rob Hoes writes ("[PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl"):> To avoid deadlocks, we drop the ocaml heap lock before entering > libxl, and reacquire it in callbacks to ocaml. This way, the ocaml > heap lock is never held together with the libxl lock, except in > osevent registration callbacks, and xentoollog callbacks. If we > guarantee to not call any libxl functions inside those callbacks, we > can avoid deadlocks.I think this is right.> This patch handle the dropping and reacquiring of the ocaml heap lock by the > caml_enter_blocking_section and caml_leave_blocking_section functions, and > related macros. We are also careful to not call any functions that access the > ocaml heap while the ocaml heap lock is dropped. This often involves copying > ocaml values to C before dropping the ocaml lock.Right.> void async_callback(libxl_ctx *ctx, int rc, void *for_callback) > { > + caml_leave_blocking_section(); > CAMLparam0(); > CAMLlocal1(error); > int *task = (int *) for_callback; > @@ -390,19 +396,22 @@ void async_callback(libxl_ctx *ctx, int rc, void *for_callback) > error = Val_some(Val_error(rc)); > > caml_callback2(*func, error, (value) for_callback); > + CAMLdone; > + caml_enter_blocking_section(); > } > > -static libxl_asyncop_how *aohow_val(value async, libxl_asyncop_how *ao_how) > +static libxl_asyncop_how *aohow_val(value async) > { > CAMLparam1(async); > + libxl_asyncop_how *ao_how = NULL; > > if (async != Val_none) { > + libxl_asyncop_how *ao_how = malloc(sizeof(*ao_how)); > ao_how->callback = async_callback; > ao_how->u.for_callback = (void *) Some_val(async); > - CAMLreturnT(libxl_asyncop_how *, ao_how); > } > - else > - CAMLreturnT(libxl_asyncop_how *, NULL); > + > + CAMLreturnT(libxl_asyncop_how *, ao_how); > }I don''t see here how "async" is protected from being gc''d (or, perhaps, moved by the ocaml gc) during the execution of the long-running libxl operation. The pointer to it is hidden inside the (now malloc''d) ao_how. AIUI the "CAMLparam1...CAMLreturnT" pair protect it during the aohow_val function ? I think this problem probably existed beforehand too. Also, is it really safe to bury these calls to CAMLparam1 etc. on async inside aohow_val ? AIUI these CAML gc protection things need to occur as the very first thing inside the ocaml stub function, before calling anything which might trigger the ocaml gc. I haven''t gone through all the callers of aohow_val looking for calls into ocaml before aohow_val but it seems a fragile pattern. So in summary I think aohow_val ought to: assume that a ref to async has been taken by its caller for the duration of this entry (so not use CAMLparam1 on it) but that that will be dropped while the function completes (so it needs to make a new ref by calling caml_register_global_root and release it again in caml_remove_global_root after the callback is done).> @@ -508,10 +545,17 @@ value stub_libxl_domain_suspend(value ctx, value domid, value fd, value async, v > { > CAMLparam5(ctx, domid, fd, async, unit); > int ret; > - libxl_asyncop_how ao_how; > + uint32_t c_domid = Int_val(domid); > + int c_fd = Int_val(fd); > + libxl_asyncop_how *ao_how = aohow_val(async); > + > + caml_enter_blocking_section(); > + ret = libxl_domain_suspend(CTX, c_domid, c_fd, 0, ao_how); > + caml_leave_blocking_section(); > + > + if (ao_how) > + free(ao_how); > > - ret = libxl_domain_suspend(CTX, Int_val(domid), Int_val(fd), 0, > - aohow_val(async, &ao_how)); > if (ret != 0) > failwith_xl(ret, "domain_suspend");These functions are getting more and more boilerplatey :-/. Ian.
Ian Jackson
2013-Nov-28 16:40 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Ian Campbell writes ("Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl"):> On Wed, 2013-11-27 at 13:49 +0000, Rob Hoes wrote: > > This fall into the "copying ocaml values to C before dropping the > > ocaml lock" category: it avoids calling the aohow_val function inside > > the argument list of the libxl functions, which are called without the > > ocaml lock. > > I see, I''d have done it with stack_ao_how and *ao_how as the return > value rather than dynamic allocation but OK.I don''t think this change is actually logically interconnected with the locking changes. See my other mail just sent. Ian.
Rob Hoes
2013-Nov-28 17:50 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Ian Jackson wrote:> > -static libxl_asyncop_how *aohow_val(value async, libxl_asyncop_how > > *ao_how) > > +static libxl_asyncop_how *aohow_val(value async) > > { > > CAMLparam1(async); > > + libxl_asyncop_how *ao_how = NULL; > > > > if (async != Val_none) { > > + libxl_asyncop_how *ao_how = malloc(sizeof(*ao_how)); > > ao_how->callback = async_callback; > > ao_how->u.for_callback = (void *) Some_val(async); > > - CAMLreturnT(libxl_asyncop_how *, ao_how); > > } > > - else > > - CAMLreturnT(libxl_asyncop_how *, NULL); > > + > > + CAMLreturnT(libxl_asyncop_how *, ao_how); > > } > > I don''t see here how "async" is protected from being gc''d (or, perhaps, > moved by the ocaml gc) during the execution of the long-running libxl > operation. The pointer to it is hidden inside the (now malloc''d) ao_how. > AIUI the "CAMLparam1...CAMLreturnT" pair protect it during the aohow_val > function ?To keep the async user values "alive", we keep them in a set in OCaml for the duration of the async call. See the code that deals with "async_users" in xenlight.ml.in. The GC will definitely not destroy these values. I''m not sure if the GC may _move_ the value (I assumed it wouldn''t)... Dave, do you know?> I think this problem probably existed beforehand too. > > Also, is it really safe to bury these calls to CAMLparam1 etc. on async > inside aohow_val ? AIUI these CAML gc protection things need to occur as > the very first thing inside the ocaml stub function, before calling > anything which might trigger the ocaml gc.The CAMLparam macros indeed need to be called as soon as you enter a stub, but that is what we do. A function like stub_libxl_domain_create_new calls CAMLparam on async in the first line. CAMLparam is then called a second time on async, inside aohow_val, even though it may not be strictly needed. This is fine, however, as long as CAMLparam and CAMLreturn occur in pairs and are properly nested. CAMLreturn does nothing more than restore the list of roots to what it was before the preceeding CAMLparam. I think it is good practise to just use CAMLparam + CAMLreturn in every function that deals with ocaml values, even if they might be "optimised away", just to avoid possible mistakes. Rob> I haven''t gone through all the callers of aohow_val looking for calls into > ocaml before aohow_val but it seems a fragile pattern. > > So in summary I think aohow_val ought to: assume that a ref to async has > been taken by its caller for the duration of this entry (so not use > CAMLparam1 on it) but that that will be dropped while the function > completes (so it needs to make a new ref by calling > caml_register_global_root and release it again in caml_remove_global_root > after the callback is done). > > > > @@ -508,10 +545,17 @@ value stub_libxl_domain_suspend(value ctx, value > > domid, value fd, value async, v { > > CAMLparam5(ctx, domid, fd, async, unit); > > int ret; > > - libxl_asyncop_how ao_how; > > + uint32_t c_domid = Int_val(domid); > > + int c_fd = Int_val(fd); > > + libxl_asyncop_how *ao_how = aohow_val(async); > > + > > + caml_enter_blocking_section(); > > + ret = libxl_domain_suspend(CTX, c_domid, c_fd, 0, ao_how); > > + caml_leave_blocking_section(); > > + > > + if (ao_how) > > + free(ao_how); > > > > - ret = libxl_domain_suspend(CTX, Int_val(domid), Int_val(fd), 0, > > - aohow_val(async, &ao_how)); > > if (ret != 0) > > failwith_xl(ret, "domain_suspend"); > > These functions are getting more and more boilerplatey :-/. > > Ian.
Rob Hoes
2013-Nov-28 17:52 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Ian Jackson wrote:> Ian Campbell writes ("Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml > heap lock before calling into libxl"): > > On Wed, 2013-11-27 at 13:49 +0000, Rob Hoes wrote: > > > This fall into the "copying ocaml values to C before dropping the > > > ocaml lock" category: it avoids calling the aohow_val function > > > inside the argument list of the libxl functions, which are called > > > without the ocaml lock. > > > > I see, I''d have done it with stack_ao_how and *ao_how as the return > > value rather than dynamic allocation but OK. > > I don''t think this change is actually logically interconnected with the > locking changes. See my other mail just sent.As I see it, the locking changes did require this to be changed. I hope my reply to the other mail makes it clear. Rob
Ian Jackson
2013-Nov-28 18:11 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
Rob Hoes writes ("RE: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl"):> Ian Jackson wrote: > > I don''t see here how "async" is protected from being gc''d (or, perhaps, > > moved by the ocaml gc) during the execution of the long-running libxl > > operation. The pointer to it is hidden inside the (now malloc''d) ao_how. > > AIUI the "CAMLparam1...CAMLreturnT" pair protect it during the aohow_val > > function ? > > To keep the async user values "alive", we keep them in a set in > OCaml for the duration of the async call. See the code that deals > with "async_users" in xenlight.ml.in. The GC will definitely not > destroy these values.Surely that can''t be right. You''re relying on the ocaml code to do the memory management right. Effectively you''ve made an unsafe interface!> I''m not sure if the GC may _move_ the value (I assumed it wouldn''t)...I thought GCs in these kind of languages were entitled to move things to which they thought they had all the references.> > Also, is it really safe to bury these calls to CAMLparam1 etc. on async > > inside aohow_val ? AIUI these CAML gc protection things need to occur as > > the very first thing inside the ocaml stub function, before calling > > anything which might trigger the ocaml gc. > > The CAMLparam macros indeed need to be called as soon as you enter a > stub, but that is what we do. A function like > stub_libxl_domain_create_new calls CAMLparam on async in the first > line.Right...> I think it is good practise to just use CAMLparam + CAMLreturn in > every function that deals with ocaml values, even if they might be > "optimised away", just to avoid possible mistakes.If you think so, OK. But in this case it results in you making an overly cautious assumption about the calling context which prevents you from calling aohow_value inside your libxl function arguments. Ian.
Rob Hoes
2013-Nov-28 19:43 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On 28 Nov 2013, at 18:11, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:> Rob Hoes writes ("RE: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl"): >> Ian Jackson wrote: >>> I don''t see here how "async" is protected from being gc''d (or, perhaps, >>> moved by the ocaml gc) during the execution of the long-running libxl >>> operation. The pointer to it is hidden inside the (now malloc''d) ao_how. >>> AIUI the "CAMLparam1...CAMLreturnT" pair protect it during the aohow_val >>> function ? >> >> To keep the async user values "alive", we keep them in a set in >> OCaml for the duration of the async call. See the code that deals >> with "async_users" in xenlight.ml.in. The GC will definitely not >> destroy these values. > > Surely that can''t be right. You''re relying on the ocaml code to do > the memory management right. Effectively you''ve made an unsafe > interface!Well, this ocaml code (xenlight.ml/xenlight.mli) is the thing that exposes the interface. And you need to use it in a certain way for it to work well. For example, you also need to first call the functions that register callbacks before you can do anything event-related.>> I''m not sure if the GC may _move_ the value (I assumed it wouldn''t)... > > I thought GCs in these kind of languages were entitled to move things > to which they thought they had all the references. > >>> Also, is it really safe to bury these calls to CAMLparam1 etc. on async >>> inside aohow_val ? AIUI these CAML gc protection things need to occur as >>> the very first thing inside the ocaml stub function, before calling >>> anything which might trigger the ocaml gc. >> >> The CAMLparam macros indeed need to be called as soon as you enter a >> stub, but that is what we do. A function like >> stub_libxl_domain_create_new calls CAMLparam on async in the first >> line. > > Right... > >> I think it is good practise to just use CAMLparam + CAMLreturn in >> every function that deals with ocaml values, even if they might be >> "optimised away", just to avoid possible mistakes. > > If you think so, OK. > > But in this case it results in you making an overly cautious > assumption about the calling context which prevents you from calling > aohow_value inside your libxl function arguments.We don''t call aohow_value inside the libxl function arguments, because it accesses ocaml values while the ocaml heap lock is not held. This isn’t about the CAMLparam/return macros. Rob
Ian Campbell
2013-Nov-29 08:39 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Thu, 2013-11-28 at 18:11 +0000, Ian Jackson wrote:> Rob Hoes writes ("RE: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl"): > > Ian Jackson wrote: > > > I don''t see here how "async" is protected from being gc''d (or, perhaps, > > > moved by the ocaml gc) during the execution of the long-running libxl > > > operation. The pointer to it is hidden inside the (now malloc''d) ao_how. > > > AIUI the "CAMLparam1...CAMLreturnT" pair protect it during the aohow_val > > > function ? > > > > To keep the async user values "alive", we keep them in a set in > > OCaml for the duration of the async call. See the code that deals > > with "async_users" in xenlight.ml.in. The GC will definitely not > > destroy these values. > > Surely that can''t be right. You''re relying on the ocaml code to do > the memory management right. Effectively you''ve made an unsafe > interface! > > > I''m not sure if the GC may _move_ the value (I assumed it wouldn''t)... > > I thought GCs in these kind of languages were entitled to move things > to which they thought they had all the references.Me too, and the ocaml books explicitly say that it will, e.g. http://caml.inria.fr/pub/docs/oreilly-book/html/book-ora116.html Any allocation in the Objective CAML heap can trigger a garbage collection, which will deallocate unused memory blocks and may move live blocks. Does the gc have some sort of horror mode. where it would move stuff around at every allocation etc? Perhaps a debug option here to always manually invoke the gc when you acquire/drop the lock and in other key places? I wonder if you need to use these functions:> /* [caml_register_global_root] registers a global C variable as a memory root > for the duration of the program, or until [caml_remove_global_root] is > called. */ > > CAMLextern void caml_register_global_root (value *); > > /* [caml_remove_global_root] removes a memory root registered on a global C > variable with [caml_register_global_root]. */ > > CAMLextern void caml_remove_global_root (value *);To keep the for_callback value live? Ian.
Rob Hoes
2013-Nov-29 09:03 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On 29 Nov 2013, at 08:39, Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Thu, 2013-11-28 at 18:11 +0000, Ian Jackson wrote: >> Rob Hoes writes ("RE: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl"): >>> Ian Jackson wrote: >>>> I don''t see here how "async" is protected from being gc''d (or, perhaps, >>>> moved by the ocaml gc) during the execution of the long-running libxl >>>> operation. The pointer to it is hidden inside the (now malloc''d) ao_how. >>>> AIUI the "CAMLparam1...CAMLreturnT" pair protect it during the aohow_val >>>> function ? >>> >>> To keep the async user values "alive", we keep them in a set in >>> OCaml for the duration of the async call. See the code that deals >>> with "async_users" in xenlight.ml.in. The GC will definitely not >>> destroy these values. >> >> Surely that can''t be right. You''re relying on the ocaml code to do >> the memory management right. Effectively you''ve made an unsafe >> interface! >> >>> I''m not sure if the GC may _move_ the value (I assumed it wouldn''t)... >> >> I thought GCs in these kind of languages were entitled to move things >> to which they thought they had all the references. > > Me too, and the ocaml books explicitly say that it will, e.g. > http://caml.inria.fr/pub/docs/oreilly-book/html/book-ora116.html > Any allocation in the Objective CAML heap can trigger a garbage > collection, which will deallocate unused memory blocks and may > move live blocks. > > Does the gc have some sort of horror mode. where it would move stuff > around at every allocation etc? Perhaps a debug option here to always > manually invoke the gc when you acquire/drop the lock and in other key > places? > > I wonder if you need to use these functions: >> /* [caml_register_global_root] registers a global C variable as a memory root >> for the duration of the program, or until [caml_remove_global_root] is >> called. */ >> >> CAMLextern void caml_register_global_root (value *); >> >> /* [caml_remove_global_root] removes a memory root registered on a global C >> variable with [caml_register_global_root]. */ >> >> CAMLextern void caml_remove_global_root (value *); > > To keep the for_callback value live?Perhaps, yes, if a global root is something that is not only never deallocated, but also never moved. I remember we discussed this topic when revising v1 of the series, and I ended up with the current implementation that keeps a reference to the value at the ocaml level. I wonder how ocaml tracks live values when they may move around. I’ll see what I can find out. Rob
Ian Campbell
2013-Nov-29 10:48 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Fri, 2013-11-29 at 09:03 +0000, Rob Hoes wrote:> On 29 Nov 2013, at 08:39, Ian Campbell <Ian.Campbell@citrix.com> wrote: > > I wonder if you need to use these functions: > >> /* [caml_register_global_root] registers a global C variable as a memory root > >> for the duration of the program, or until [caml_remove_global_root] is > >> called. */ > >> > >> CAMLextern void caml_register_global_root (value *); > >> > >> /* [caml_remove_global_root] removes a memory root registered on a global C > >> variable with [caml_register_global_root]. */ > >> > >> CAMLextern void caml_remove_global_root (value *); > > > > To keep the for_callback value live? > > Perhaps, yes, if a global root is something that is not only never > deallocated, but also never moved. I remember we discussed this topic > when revising v1 of the series, and I ended up with the current > implementation that keeps a reference to the value at the ocaml level. > > I wonder how ocaml tracks live values when they may move around. I’ll > see what I can find out.My google-fu wasn't sufficient to answer this, perhaps you have a hotline to someone who knows though ;-) Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Rob Hoes
2013-Nov-29 18:33 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On 29 Nov 2013, at 10:48, Ian Campbell <Ian.Campbell@citrix.com> wrote:> On Fri, 2013-11-29 at 09:03 +0000, Rob Hoes wrote: >> On 29 Nov 2013, at 08:39, Ian Campbell <Ian.Campbell@citrix.com> wrote: >>> I wonder if you need to use these functions: >>>> /* [caml_register_global_root] registers a global C variable as a memory root >>>> for the duration of the program, or until [caml_remove_global_root] is >>>> called. */ >>>> >>>> CAMLextern void caml_register_global_root (value *); >>>> >>>> /* [caml_remove_global_root] removes a memory root registered on a global C >>>> variable with [caml_register_global_root]. */ >>>> >>>> CAMLextern void caml_remove_global_root (value *); >>> >>> To keep the for_callback value live? >> >> Perhaps, yes, if a global root is something that is not only never >> deallocated, but also never moved. I remember we discussed this topic >> when revising v1 of the series, and I ended up with the current >> implementation that keeps a reference to the value at the ocaml level. >> >> I wonder how ocaml tracks live values when they may move around. I’ll >> see what I can find out. > > My google-fu wasn''t sufficient to answer this, perhaps you have a > hotline to someone who knows though ;-)Right, I found out some more about this. Ocaml’s GC works on two generations. There is a small minor heap for the newest values, and a major heap for the older ones. When the minor heap gets full, the GC moves all values that can be reached from the set of GC roots to the major heap using a "Stop&Copy" algorithm, and then considers the whole minor heap as free space. Space from the major heap is reclaimed at a slower pace using a “Mark&Sweep” algorithm, which basically frees up unused values in place. Occasionally, the major heap is compacted (defragmented). See http://caml.inria.fr/pub/docs/oreilly-book/html/book-ora087.html (and the page before it) for details. All this means that it is quite likely indeed that a value is moved around at least once if it stays around for a bit. This of course has implications for C bindings. It turns out that the CAMLparam/CAMLlocal/CAMLreturn macros don’t just protect values from being destroyed by the GC. They also allow the GC to update the pointers when values move around (the “value” type in C is a pointer to something on the ocaml heap). (There is an interesting thread that talks about this, here: http://groups.yahoo.com/neo/groups/ocaml_beginners/conversations/topics/11278). This first of all explains why those macros need to be used in all functions that use ocaml values, even if they are not primitives that are directly called by ocaml, such as aohow_val. Only if the function does absolutely no allocations, and the heap lock is not dropped, it may be safe to omit them. Secondly, storing a value (ocaml heap pointer) in C for a while, and expecting it to still be up-to-date after a while, is wrong. In the case of the for_callback pointer in libxl, we currently give it the our ocaml value directly, and therefore may get out-of-date. I think that one way to fix this, is to store the value in a global C variable which is registered with the GC using caml_register_global_root, and use a pointer to _that_ as for_callback. Or we could use some other C-allocated variables to pass around, and maintain a mapping to the ocaml values. Actually, there is one case for which the current code does actually seem alright: when using integers. In this case, the “value” is not a pointer, but the integer itself. Cheers, Rob
Ian Campbell
2013-Dec-02 09:55 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Fri, 2013-11-29 at 18:33 +0000, Rob Hoes wrote:> On 29 Nov 2013, at 10:48, Ian Campbell <Ian.Campbell@citrix.com> wrote: > > On Fri, 2013-11-29 at 09:03 +0000, Rob Hoes wrote: > >> On 29 Nov 2013, at 08:39, Ian Campbell <Ian.Campbell@citrix.com> wrote: > >>> I wonder if you need to use these functions: > >>>> /* [caml_register_global_root] registers a global C variable as a memory root > >>>> for the duration of the program, or until [caml_remove_global_root] is > >>>> called. */ > >>>> > >>>> CAMLextern void caml_register_global_root (value *); > >>>> > >>>> /* [caml_remove_global_root] removes a memory root registered on a global C > >>>> variable with [caml_register_global_root]. */ > >>>> > >>>> CAMLextern void caml_remove_global_root (value *); > >>> > >>> To keep the for_callback value live? > >> > >> Perhaps, yes, if a global root is something that is not only never > >> deallocated, but also never moved. I remember we discussed this topic > >> when revising v1 of the series, and I ended up with the current > >> implementation that keeps a reference to the value at the ocaml level. > >> > >> I wonder how ocaml tracks live values when they may move around. I’ll > >> see what I can find out. > > > > My google-fu wasn't sufficient to answer this, perhaps you have a > > hotline to someone who knows though ;-) > > Right, I found out some more about this.TL;DR: There will need to be a refresh of this patch? [...]> Secondly, storing a value (ocaml heap pointer) in C for a while, and > expecting it to still be up-to-date after a while, is wrong. In the > case of the for_callback pointer in libxl, we currently give it the > our ocaml value directly, and therefore may get out-of-date. I think > that one way to fix this, is to store the value in a global C variable > which is registered with the GC using caml_register_global_root, and > use a pointer to _that_ as for_callback. Or we could use some other > C-allocated variables to pass around, and maintain a mapping to the > ocaml values.Can't you just use caml_register_global_root on the address of your value for_callback? I presume you can call this register/unregister pair many times (for different events etc)? Otherwise you need to build some sort of C-side data structure to hold all the various values, which is doable but a bit tedious I imagine. The ocaml source has a few examples of using this function, e.g. for signals or as part of caml_register_named_value.> Actually, there is one case for which the current code does actually > seem alright: when using integers. In this case, the “value” is not a > pointer, but the integer itself.Is your for_callback an integer now? That might be a reasonable short term (i.e. for 4.4) way to solve this issue? Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Rob Hoes
2013-Dec-02 11:48 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
> > Right, I found out some more about this. > > TL;DR: There will need to be a refresh of this patch?I think we can keep it simple; see below.> [...] > > Secondly, storing a value (ocaml heap pointer) in C for a while, and > > expecting it to still be up-to-date after a while, is wrong. In the > > case of the for_callback pointer in libxl, we currently give it the > > our ocaml value directly, and therefore may get out-of-date. I think > > that one way to fix this, is to store the value in a global C variable > > which is registered with the GC using caml_register_global_root, and > > use a pointer to _that_ as for_callback. Or we could use some other > > C-allocated variables to pass around, and maintain a mapping to the > > ocaml values. > > Can't you just use caml_register_global_root on the address of your value > for_callback? I presume you can call this register/unregister pair many > times (for different events etc)?I think you need to register a value, not a pointer to a value.> Otherwise you need to build some sort of C-side data structure to hold all > the various values, which is doable but a bit tedious I imagine.I think we can malloc a new "value" (pointer to ocaml heap), make it equal to the for_callback value, register this with the GC (so it can update this heap pointer when the actual value moves), and give a pointer to it to libxl. And then of course deregister and free before we give it back to ocaml. I'll try this now.> The ocaml source has a few examples of using this function, e.g. for > signals or as part of caml_register_named_value. > > > Actually, there is one case for which the current code does actually > > seem alright: when using integers. In this case, the “value” is not a > > pointer, but the integer itself. > > Is your for_callback an integer now? That might be a reasonable short term > (i.e. for 4.4) way to solve this issue?I was thinking about that as a fallback indeed, but I'll try out the above first. Cheers, Rob _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Ian Campbell
2013-Dec-02 11:52 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Mon, 2013-12-02 at 11:48 +0000, Rob Hoes wrote:> > > Right, I found out some more about this. > > > > TL;DR: There will need to be a refresh of this patch? > > I think we can keep it simple; see below. > > > [...] > > > Secondly, storing a value (ocaml heap pointer) in C for a while, and > > > expecting it to still be up-to-date after a while, is wrong. In the > > > case of the for_callback pointer in libxl, we currently give it the > > > our ocaml value directly, and therefore may get out-of-date. I think > > > that one way to fix this, is to store the value in a global C variable > > > which is registered with the GC using caml_register_global_root, and > > > use a pointer to _that_ as for_callback. Or we could use some other > > > C-allocated variables to pass around, and maintain a mapping to the > > > ocaml values. > > > > Can''t you just use caml_register_global_root on the address of your value > > for_callback? I presume you can call this register/unregister pair many > > times (for different events etc)? > > I think you need to register a value, not a pointer to a value.CAMLextern void caml_register_global_root (value *); It takes a pointer to the value to register. Is there any reason why you can''t pass &for_callback given "value for_callback" in the local context?> > > Otherwise you need to build some sort of C-side data structure to hold all > > the various values, which is doable but a bit tedious I imagine. > > I think we can malloc a new "value" (pointer to ocaml heap), make it > equal to the for_callback value, register this with the GC (so it can > update this heap pointer when the actual value moves), and give a > pointer to it to libxl. And then of course deregister and free before > we give it back to ocaml.I think this (moving the heap pointer from the GC) would require the prototype to be "value **" not "value *". Ian.
Rob Hoes
2013-Dec-02 11:58 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
> > > Can''t you just use caml_register_global_root on the address of your > > > value for_callback? I presume you can call this register/unregister > > > pair many times (for different events etc)? > > > > I think you need to register a value, not a pointer to a value. > > CAMLextern void caml_register_global_root (value *); > > It takes a pointer to the value to register. Is there any reason why you > can''t pass &for_callback given "value for_callback" in the local context?I think we''d then have to rely on libxl to never copy to pointer internally, and throw away the original one. I am worried that this may happen, because the ao_how value is temporary.> > > > > Otherwise you need to build some sort of C-side data structure to > > > hold all the various values, which is doable but a bit tedious I > imagine. > > > > I think we can malloc a new "value" (pointer to ocaml heap), make it > > equal to the for_callback value, register this with the GC (so it can > > update this heap pointer when the actual value moves), and give a > > pointer to it to libxl. And then of course deregister and free before > > we give it back to ocaml. > > I think this (moving the heap pointer from the GC) would require the > prototype to be "value **" not "value *". > > Ian.
Ian Campbell
2013-Dec-02 12:05 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Mon, 2013-12-02 at 11:58 +0000, Rob Hoes wrote:> > > > Can''t you just use caml_register_global_root on the address of your > > > > value for_callback? I presume you can call this register/unregister > > > > pair many times (for different events etc)? > > > > > > I think you need to register a value, not a pointer to a value. > > > > CAMLextern void caml_register_global_root (value *); > > > > It takes a pointer to the value to register. Is there any reason why you > > can''t pass &for_callback given "value for_callback" in the local context? > > I think we''d then have to rely on libxl to never copy to pointer > internally, and throw away the original one. I am worried that this > may happen, because the ao_how value is temporary.libxl will for certain take a copy of the pointer passed into the ao_how, because it stashes it for later use. Either I''m very confused about what you are suggesting or pointers in C don''t work like you seem to think. AFAICT caml_register_global_root takes a pointer to the value and locks down the value itself, preventing it from moving. There is no way for it to do anything else given a pointer and not a pointer to a pointer. Ian.> > > > > > > > Otherwise you need to build some sort of C-side data structure to > > > > hold all the various values, which is doable but a bit tedious I > > imagine. > > > > > > I think we can malloc a new "value" (pointer to ocaml heap), make it > > > equal to the for_callback value, register this with the GC (so it can > > > update this heap pointer when the actual value moves), and give a > > > pointer to it to libxl. And then of course deregister and free before > > > we give it back to ocaml. > > > > I think this (moving the heap pointer from the GC) would require the > > prototype to be "value **" not "value *". > > > > Ian. >
Rob Hoes
2013-Dec-02 12:11 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
> On Mon, 2013-12-02 at 11:58 +0000, Rob Hoes wrote: > > > > > Can''t you just use caml_register_global_root on the address of > > > > > your value for_callback? I presume you can call this > > > > > register/unregister pair many times (for different events etc)? > > > > > > > > I think you need to register a value, not a pointer to a value. > > > > > > CAMLextern void caml_register_global_root (value *); > > > > > > It takes a pointer to the value to register. Is there any reason why > > > you can''t pass &for_callback given "value for_callback" in the local > context? > > > > I think we''d then have to rely on libxl to never copy to pointer > > internally, and throw away the original one. I am worried that this > > may happen, because the ao_how value is temporary. > > libxl will for certain take a copy of the pointer passed into the ao_how, > because it stashes it for later use. > > Either I''m very confused about what you are suggesting or pointers in C > don''t work like you seem to think. > > AFAICT caml_register_global_root takes a pointer to the value and locks > down the value itself, preventing it from moving. There is no way for it > to do anything else given a pointer and not a pointer to a pointer.That''s the point, I don''t think that is how it works. I think the value on the heap can still move around, but registering a pointer to it as a root means that: 1. the value does not get dropped by the GC; 2. the root/pointer will be updated by the GC when the value itself moves, so that you can still find it back. Rob> Ian. > > > > > > > > > > > > Otherwise you need to build some sort of C-side data structure > > > > > to hold all the various values, which is doable but a bit > > > > > tedious I > > > imagine. > > > > > > > > I think we can malloc a new "value" (pointer to ocaml heap), make > > > > it equal to the for_callback value, register this with the GC (so > > > > it can update this heap pointer when the actual value moves), and > > > > give a pointer to it to libxl. And then of course deregister and > > > > free before we give it back to ocaml. > > > > > > I think this (moving the heap pointer from the GC) would require the > > > prototype to be "value **" not "value *". > > > > > > Ian. > > >
Ian Campbell
2013-Dec-02 12:30 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Mon, 2013-12-02 at 12:11 +0000, Rob Hoes wrote:> > On Mon, 2013-12-02 at 11:58 +0000, Rob Hoes wrote: > > > > > > Can''t you just use caml_register_global_root on the address of > > > > > > your value for_callback? I presume you can call this > > > > > > register/unregister pair many times (for different events etc)? > > > > > > > > > > I think you need to register a value, not a pointer to a value. > > > > > > > > CAMLextern void caml_register_global_root (value *); > > > > > > > > It takes a pointer to the value to register. Is there any reason why > > > > you can''t pass &for_callback given "value for_callback" in the local > > context? > > > > > > I think we''d then have to rely on libxl to never copy to pointer > > > internally, and throw away the original one. I am worried that this > > > may happen, because the ao_how value is temporary. > > > > libxl will for certain take a copy of the pointer passed into the ao_how, > > because it stashes it for later use. > > > > Either I''m very confused about what you are suggesting or pointers in C > > don''t work like you seem to think. > > > > AFAICT caml_register_global_root takes a pointer to the value and locks > > down the value itself, preventing it from moving. There is no way for it > > to do anything else given a pointer and not a pointer to a pointer. > > That''s the point, I don''t think that is how it works. > > I think the value on the heap can still move around, but registering a pointer to it as a root means that: > 1. the value does not get dropped by the GC; > 2. the root/pointer will be updated by the GC when the value itself moves,There is no way this #2 can happen, the C interface takes a "value *" and therefore is only able to update the value and not the pointer to it. Do you mean: foo(value for_callback) { value *p = malloc(SOME) *p = for_callback value pval = Val_ptr(p); register_global(p) return (something containing p) } ? This would make p safe to pass to libxl, where it can copy it around to its hearts content. In this case the ocaml runtime can never change "p" from the point of view of the C code in order to move it around (since p is passed by value not reference to the registration fn). It could change *p but that doesn''t equate to moving the value around. Maybe we''ve just been talking at cross purposes? BTW I see now why foo(value for_callback) { register_global(&for_callback) } won''t work -- &for_callback is a stack address. Ian.
Rob Hoes
2013-Dec-02 14:06 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
> > I think the value on the heap can still move around, but registering a > pointer to it as a root means that: > > 1. the value does not get dropped by the GC; 2. the root/pointer will > > be updated by the GC when the value itself moves, > > There is no way this #2 can happen, the C interface takes a "value *" > and therefore is only able to update the value and not the pointer to it.I think the confusion here is due to the word "value". The C type "value" is a pointer to something (an ocaml value) on the ocaml heap, or an integer, or a pointer to something outside the ocaml heap. See http://caml.inria.fr/pub/docs/manual-ocaml-4.00/manual033.html#toc143. So when the GC updates something of the "value" type, it means that a pointer to something on the heap is updated.> Do you mean: > foo(value for_callback) > { > value *p = malloc(SOME) > *p = for_callback > value pval = Val_ptr(p); > register_global(p) > return (something containing p) > } > ? > > This would make p safe to pass to libxl, where it can copy it around to > its hearts content.This is indeed what I mean. Here, p is a pointer to something of type "value", which can be updated by the GC. The thing of type "value" is itself a pointer to something on the ocaml heap (or an integer, or a pointer to a C variable). The thing on the heap can move around, which is now fine, because the pointer to it (the "value") can be updated by the GC.> In this case the ocaml runtime can never change "p" from the point of view > of the C code in order to move it around (since p is passed by value not > reference to the registration fn). It could change *p but that doesn''t > equate to moving the value around. > > Maybe we''ve just been talking at cross purposes?I think we basically agree :) Rob> BTW I see now why > foo(value for_callback) > { > register_global(&for_callback) > } > won''t work -- &for_callback is a stack address. > > Ian.
Ian Campbell
2013-Dec-02 14:10 UTC
Re: [PATCH v6 08/12] libxl: ocaml: drop the ocaml heap lock before calling into libxl
On Mon, 2013-12-02 at 14:06 +0000, Rob Hoes wrote:> > > I think the value on the heap can still move around, but registering a > > pointer to it as a root means that: > > > 1. the value does not get dropped by the GC; 2. the root/pointer will > > > be updated by the GC when the value itself moves, > > > > There is no way this #2 can happen, the C interface takes a "value *" > > and therefore is only able to update the value and not the pointer to it. > > I think the confusion here is due to the word "value".Indeed. [...]> I think we basically agree :)Yes, sorry for the noise. Ian.