Konrad Rzeszutek Wilk
2013-May-13 16:56 UTC
[PATCH] Fix QEMU HVM hotplug race in QEMU traditional (Xen 4.1, Xen 4.2, and Xen 4.3) (v1).
Hey Ian and Stefano, Please see this thread: http://lists.xen.org/archives/html/xen-devel/2013-05/msg01053.html for the debug patches and some of the discussion. These three patches fix an race that has been in QEMU traditional for a long time. The guts of the bug is that if you have a guest with these options: vcpus=1 maxvcpus=32 and do ''xl vcpu-set <guest> 32'' the guest OS only seems to hotplug 13 or 14 of the vCPUs. To hotplug the rest I have to play games by offlining/onlining arbitrary number of them. Or I can do ''xl vcpu-set <guest> 8'', then ''16'', ''24'', etc. In groups of eight. These patches fix this and should be considered for backport. hw/piix4acpi.c | 18 ++++++++------- monitor.c | 4 ++-- sysemu.h | 4 +++- xenstore.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 81 insertions(+), 18 deletions(-) Konrad Rzeszutek Wilk (3): piix4acpi, xen, vcpu hotplug: Split the notification from the changes. piix4acpi, xen: Clarify that the qemu_set_irq calls just do an IRQ pulse. piix4acpi, xen, hotplug: Fix race with ACPI AML code and hotplug.
Konrad Rzeszutek Wilk
2013-May-13 16:56 UTC
[PATCH 1/3] piix4acpi, xen, vcpu hotplug: Split the notification from the changes.
This is a prepatory patch that splits the notification of an vCPU change from the actual changes to the vCPU array. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- hw/piix4acpi.c | 12 ++++++++---- monitor.c | 4 ++-- sysemu.h | 4 +++- xenstore.c | 7 +++++-- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/hw/piix4acpi.c b/hw/piix4acpi.c index fb1e5c3..54d566b 100644 --- a/hw/piix4acpi.c +++ b/hw/piix4acpi.c @@ -814,22 +814,26 @@ static int disable_processor(GPEState *g, int cpu) return 1; } -void qemu_cpu_add_remove(int cpu, int state) +int qemu_cpu_add_remove(int cpu, int state) { if ((cpu <0) || (cpu >= vcpus)) { fprintf(stderr, "vcpu out of range, should be [0~%d]\n", vcpus - 1); - return; + return -EINVAL; } if (state) { if (!enable_processor(&gpe_state, cpu)) - return; + return 0; } else { if (!disable_processor(&gpe_state, cpu)) - return; + return 0; } fprintf(logfile, "%s vcpu %d\n", state ? "Add" : "Remove", cpu); + return 1; +} +void qemu_cpu_notify(void) +{ if (gpe_state.gpe0_en[0] & 4) { qemu_set_irq(sci_irq, 1); qemu_set_irq(sci_irq, 0); diff --git a/monitor.c b/monitor.c index 8915a6e..fa89c88 100644 --- a/monitor.c +++ b/monitor.c @@ -1485,8 +1485,8 @@ static void do_cpu_set_nr(int value, const char *status) term_printf("invalid status: %s\n", status); return; } - - qemu_cpu_add_remove(value, state); + if (qemu_cpu_add_remove(value, state)) + qemu_cpu_notify(); } /* Please update qemu-doc.texi when adding or changing commands */ diff --git a/sysemu.h b/sysemu.h index 66b8ab2..968258a 100644 --- a/sysemu.h +++ b/sysemu.h @@ -173,9 +173,11 @@ extern int drive_add(const char *file, const char *fmt, ...); extern int drive_init(struct drive_opt *arg, int snapshot, void *machine); /* acpi */ -void qemu_cpu_add_remove(int cpu, int state); +/* Returns 0 if nothing changed, 1 if added or removed and < 0 for errors. */ +int qemu_cpu_add_remove(int cpu, int state); void qemu_system_hot_add_init(void); void qemu_system_device_hot_add(int pcibus, int slot, int state); +void qemu_cpu_notify(void); /* device-hotplug */ diff --git a/xenstore.c b/xenstore.c index d3a4588..c861d36 100644 --- a/xenstore.c +++ b/xenstore.c @@ -1005,6 +1005,7 @@ static void xenstore_process_vcpu_set_event(char **vec) char *act = NULL; char *vcpustr, *node = vec[XS_WATCH_PATH]; unsigned int vcpu, len; + int changed = -EINVAL; vcpustr = strstr(node, "cpu/"); if (!vcpustr) { @@ -1020,13 +1021,15 @@ static void xenstore_process_vcpu_set_event(char **vec) } if (!strncmp(act, "online", len)) - qemu_cpu_add_remove(vcpu, 1); + changed = qemu_cpu_add_remove(vcpu, 1); else if (!strncmp(act, "offline", len)) - qemu_cpu_add_remove(vcpu, 0); + changed = qemu_cpu_add_remove(vcpu, 0); else fprintf(stderr, "vcpu-set: command error.\n"); free(act); + if (changed > 0) + qemu_cpu_notify(); return; } -- 1.8.1.4
Konrad Rzeszutek Wilk
2013-May-13 16:56 UTC
[PATCH 2/3] piix4acpi, xen: Clarify that the qemu_set_irq calls just do an IRQ pulse.
The "qemu_cpu_notify" raises and lowers the ACPI SCI line when the vCPU state has changed. Instead of doing the two functions, just use one function that describes exactly what it does. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- hw/piix4acpi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/piix4acpi.c b/hw/piix4acpi.c index 54d566b..bf916d9 100644 --- a/hw/piix4acpi.c +++ b/hw/piix4acpi.c @@ -834,8 +834,6 @@ int qemu_cpu_add_remove(int cpu, int state) } void qemu_cpu_notify(void) { - if (gpe_state.gpe0_en[0] & 4) { - qemu_set_irq(sci_irq, 1); - qemu_set_irq(sci_irq, 0); - } + if (gpe_state.gpe0_en[0] & 4) + qemu_irq_pulse(sci_irq); } -- 1.8.1.4
Konrad Rzeszutek Wilk
2013-May-13 16:56 UTC
[PATCH 3/3] piix4acpi, xen, hotplug: Fix race with ACPI AML code and hotplug.
This is a race so the amount varies but on a 4PCPU box I seem to get only ~14 out of 16 vCPUs I want to online. The issue at hand is that QEMU xenstore.c hotplug code changes the vCPU array and triggers an ACPI SCI for each vCPU online/offline change. That means we modify the array of vCPUs as the guests ACPI AML code is reading it - resulting in the guest reading the data only once and not changing the CPU states appropiately. The fix is to seperate the vCPU array changes from the ACPI SCI notification. The code now will enumerate all of the vCPUs and change the vCPU array if there is a need for a change. If a change did occur then only _one_ ACPI SCI pulse is sent to the guest. The vCPU array at that point has the online/offline modified to what the user wanted to have. Specifically, if a user provided this command: xl vcpu-set latest 16 (guest config has vcpus=1, maxvcpus=32) QEMU and the guest (in this case Linux) would do: QEMU: Guest OS: -xenstore_process_vcpu_set_event -> Gets an XenBus notification for CPU1 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI - ACPI SCI kicks in -> Gets an XenBus notification for CPU2 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI -> Gets an XenBus notification for CPU3 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI ... - Method(PRST) invoked -> Gets an XenBus notification for CPU12 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI - reads AF00 for CPU state [gets 0xff] - reads AF02 [gets 0x7f] -> Gets an XenBus notification for CPU13 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI .. until VCPU 16 - Method PRST updates PR01 through 13 FLG entry. - PR01->PR13 _MAD invoked. - Brings up 13 CPUs. While QEMU updates the rest of the cpus_state bitfields the ACPI AML only does the CPU hotplug on those it had read. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- xenstore.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/xenstore.c b/xenstore.c index c861d36..31dbbe5 100644 --- a/xenstore.c +++ b/xenstore.c @@ -999,25 +999,24 @@ void xenstore_record_dm_state(const char *state) { xenstore_record_dm("state", state); } - -static void xenstore_process_vcpu_set_event(char **vec) +static int xenstore_process_one_vcpu_set_event(char *node) { char *act = NULL; - char *vcpustr, *node = vec[XS_WATCH_PATH]; + char *vcpustr; unsigned int vcpu, len; int changed = -EINVAL; vcpustr = strstr(node, "cpu/"); if (!vcpustr) { fprintf(stderr, "vcpu-set: watch node error.\n"); - return; + return changed; } sscanf(vcpustr, "cpu/%u", &vcpu); act = xs_read(xsh, XBT_NULL, node, &len); if (!act) { fprintf(stderr, "vcpu-set: no command yet.\n"); - return; + return changed; } if (!strncmp(act, "online", len)) @@ -1028,8 +1027,65 @@ static void xenstore_process_vcpu_set_event(char **vec) fprintf(stderr, "vcpu-set: command error.\n"); free(act); - if (changed > 0) + return changed; +} +static void xenstore_process_vcpu_set_event(char **vec) +{ + int changed = 0, rc, i, num = 0; + char *vcpu, **dir; + char *path = vec[XS_WATCH_PATH]; + + /* + * Process the event right away in case the loop below fails + * to enumerate the correct vCPU. + */ + rc = xenstore_process_one_vcpu_set_event(path); + if (rc > 0) + changed = 1; + /* + * We get: /local/domain/<domid>/cpu/<vcpu>/availability or + * (at init) /local/domain/<domid>/cpu [ignore it] and need to + * iterate over /local/domain/<domid>/cpu/ directory. + */ + vcpu = strstr(path, "cpu/"); + if (!vcpu) { + fprintf(stderr,"[%s]: %s has no CPU!\n", __func__, path); + return; + } + /* Eliminate ''/availability'' */ + vcpu[3] = ''\0''; + dir = xs_directory(xsh, XBT_NULL, path, &num); + + if (!dir) { + fprintf(stderr, "[%s]: directory %s has no dirs!\n", __func__, path); + return; + } + if (num != vcpus) + fprintf(stderr, "[%s]: %d (number of vcpu entries) != %d (maxvcpus)! "\ + "Continuing on..\n", __func__, num, vcpus); + + for (i = 0; i < num; i++) { + char *attr; + ssize_t len = strlen(path) + strlen(dir[i]) + strlen("availability") + + 3 /* account for two ''/'' and ''\0''*/; + attr = malloc(len); + + /* Construct "/local/domain/<domid>/cpu" (path) with <vcpu> (attr), + * and "availability" with ''/'' sprinkled around. */ + snprintf(attr, len, "%s/%s/%s", path, dir[i], "availability"); + rc = xenstore_process_one_vcpu_set_event(attr); + + free(attr); + if (rc > 0) + changed = 1; + if (rc < 0) + break; + } + free (dir); + if (changed > 0) { + fprintf(stderr, "Notifying OS about CPU hotplug changes.\n"); qemu_cpu_notify(); + } return; } -- 1.8.1.4
Stefano Stabellini
2013-May-13 17:01 UTC
Re: [PATCH 1/3] piix4acpi, xen, vcpu hotplug: Split the notification from the changes.
On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote:> This is a prepatory patch that splits the notification > of an vCPU change from the actual changes to the vCPU array. > > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>> hw/piix4acpi.c | 12 ++++++++---- > monitor.c | 4 ++-- > sysemu.h | 4 +++- > xenstore.c | 7 +++++-- > 4 files changed, 18 insertions(+), 9 deletions(-) > > diff --git a/hw/piix4acpi.c b/hw/piix4acpi.c > index fb1e5c3..54d566b 100644 > --- a/hw/piix4acpi.c > +++ b/hw/piix4acpi.c > @@ -814,22 +814,26 @@ static int disable_processor(GPEState *g, int cpu) > return 1; > } > > -void qemu_cpu_add_remove(int cpu, int state) > +int qemu_cpu_add_remove(int cpu, int state) > { > if ((cpu <0) || (cpu >= vcpus)) { > fprintf(stderr, "vcpu out of range, should be [0~%d]\n", vcpus - 1); > - return; > + return -EINVAL; > } > > if (state) { > if (!enable_processor(&gpe_state, cpu)) > - return; > + return 0; > } else { > if (!disable_processor(&gpe_state, cpu)) > - return; > + return 0; > } > fprintf(logfile, "%s vcpu %d\n", state ? "Add" : "Remove", cpu); > > + return 1; > +} > +void qemu_cpu_notify(void) > +{ > if (gpe_state.gpe0_en[0] & 4) { > qemu_set_irq(sci_irq, 1); > qemu_set_irq(sci_irq, 0); > diff --git a/monitor.c b/monitor.c > index 8915a6e..fa89c88 100644 > --- a/monitor.c > +++ b/monitor.c > @@ -1485,8 +1485,8 @@ static void do_cpu_set_nr(int value, const char *status) > term_printf("invalid status: %s\n", status); > return; > } > - > - qemu_cpu_add_remove(value, state); > + if (qemu_cpu_add_remove(value, state)) > + qemu_cpu_notify(); > } > > /* Please update qemu-doc.texi when adding or changing commands */ > diff --git a/sysemu.h b/sysemu.h > index 66b8ab2..968258a 100644 > --- a/sysemu.h > +++ b/sysemu.h > @@ -173,9 +173,11 @@ extern int drive_add(const char *file, const char *fmt, ...); > extern int drive_init(struct drive_opt *arg, int snapshot, void *machine); > > /* acpi */ > -void qemu_cpu_add_remove(int cpu, int state); > +/* Returns 0 if nothing changed, 1 if added or removed and < 0 for errors. */ > +int qemu_cpu_add_remove(int cpu, int state); > void qemu_system_hot_add_init(void); > void qemu_system_device_hot_add(int pcibus, int slot, int state); > +void qemu_cpu_notify(void); > > /* device-hotplug */ > > diff --git a/xenstore.c b/xenstore.c > index d3a4588..c861d36 100644 > --- a/xenstore.c > +++ b/xenstore.c > @@ -1005,6 +1005,7 @@ static void xenstore_process_vcpu_set_event(char **vec) > char *act = NULL; > char *vcpustr, *node = vec[XS_WATCH_PATH]; > unsigned int vcpu, len; > + int changed = -EINVAL; > > vcpustr = strstr(node, "cpu/"); > if (!vcpustr) { > @@ -1020,13 +1021,15 @@ static void xenstore_process_vcpu_set_event(char **vec) > } > > if (!strncmp(act, "online", len)) > - qemu_cpu_add_remove(vcpu, 1); > + changed = qemu_cpu_add_remove(vcpu, 1); > else if (!strncmp(act, "offline", len)) > - qemu_cpu_add_remove(vcpu, 0); > + changed = qemu_cpu_add_remove(vcpu, 0); > else > fprintf(stderr, "vcpu-set: command error.\n"); > > free(act); > + if (changed > 0) > + qemu_cpu_notify(); > return; > } > > -- > 1.8.1.4 >
Stefano Stabellini
2013-May-13 17:01 UTC
Re: [PATCH 2/3] piix4acpi, xen: Clarify that the qemu_set_irq calls just do an IRQ pulse.
On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote:> The "qemu_cpu_notify" raises and lowers the ACPI SCI line when the > vCPU state has changed. > > Instead of doing the two functions, just use one function that > describes exactly what it does. > > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>> hw/piix4acpi.c | 6 ++---- > 1 file changed, 2 insertions(+), 4 deletions(-) > > diff --git a/hw/piix4acpi.c b/hw/piix4acpi.c > index 54d566b..bf916d9 100644 > --- a/hw/piix4acpi.c > +++ b/hw/piix4acpi.c > @@ -834,8 +834,6 @@ int qemu_cpu_add_remove(int cpu, int state) > } > void qemu_cpu_notify(void) > { > - if (gpe_state.gpe0_en[0] & 4) { > - qemu_set_irq(sci_irq, 1); > - qemu_set_irq(sci_irq, 0); > - } > + if (gpe_state.gpe0_en[0] & 4) > + qemu_irq_pulse(sci_irq); > } > -- > 1.8.1.4 >
Stefano Stabellini
2013-May-13 17:17 UTC
Re: [PATCH 3/3] piix4acpi, xen, hotplug: Fix race with ACPI AML code and hotplug.
On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote:> This is a race so the amount varies but on a 4PCPU box > I seem to get only ~14 out of 16 vCPUs I want to online. > > The issue at hand is that QEMU xenstore.c hotplug code changes > the vCPU array and triggers an ACPI SCI for each vCPU > online/offline change. That means we modify the array of vCPUs > as the guests ACPI AML code is reading it - resulting in > the guest reading the data only once and not changing the > CPU states appropiately. > > The fix is to seperate the vCPU array changes from the ACPI SCI > notification. The code now will enumerate all of the vCPUs > and change the vCPU array if there is a need for a change. > If a change did occur then only _one_ ACPI SCI pulse is sent > to the guest. The vCPU array at that point has the online/offline > modified to what the user wanted to have. > > Specifically, if a user provided this command: > xl vcpu-set latest 16 > > (guest config has vcpus=1, maxvcpus=32) QEMU and the guest > (in this case Linux) would do: > > QEMU: Guest OS: > -xenstore_process_vcpu_set_event > -> Gets an XenBus notification for CPU1 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > - ACPI SCI kicks in > > -> Gets an XenBus notification for CPU2 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > > -> Gets an XenBus notification for CPU3 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > ... > - Method(PRST) invoked > > -> Gets an XenBus notification for CPU12 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > - reads AF00 for CPU state > [gets 0xff] > - reads AF02 [gets 0x7f] > > -> Gets an XenBus notification for CPU13 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > > .. until VCPU 16 > - Method PRST updates > PR01 through 13 FLG > entry. > - PR01->PR13 _MAD > invoked. > > - Brings up 13 CPUs. > > While QEMU updates the rest of the cpus_state bitfields the ACPI AML > only does the CPU hotplug on those it had read. > > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > --- > xenstore.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 62 insertions(+), 6 deletions(-) > > diff --git a/xenstore.c b/xenstore.c > index c861d36..31dbbe5 100644 > --- a/xenstore.c > +++ b/xenstore.c > @@ -999,25 +999,24 @@ void xenstore_record_dm_state(const char *state) > { > xenstore_record_dm("state", state); > } > - > -static void xenstore_process_vcpu_set_event(char **vec) > +static int xenstore_process_one_vcpu_set_event(char *node) > { > char *act = NULL; > - char *vcpustr, *node = vec[XS_WATCH_PATH]; > + char *vcpustr; > unsigned int vcpu, len; > int changed = -EINVAL; > > vcpustr = strstr(node, "cpu/"); > if (!vcpustr) { > fprintf(stderr, "vcpu-set: watch node error.\n"); > - return; > + return changed; > } > sscanf(vcpustr, "cpu/%u", &vcpu); > > act = xs_read(xsh, XBT_NULL, node, &len); > if (!act) { > fprintf(stderr, "vcpu-set: no command yet.\n"); > - return; > + return changed; > } > > if (!strncmp(act, "online", len)) > @@ -1028,8 +1027,65 @@ static void xenstore_process_vcpu_set_event(char **vec) > fprintf(stderr, "vcpu-set: command error.\n"); > > free(act); > - if (changed > 0) > + return changed; > +} > +static void xenstore_process_vcpu_set_event(char **vec) > +{ > + int changed = 0, rc, i, num = 0; > + char *vcpu, **dir; > + char *path = vec[XS_WATCH_PATH]; > + > + /* > + * Process the event right away in case the loop below fails > + * to enumerate the correct vCPU. > + */ > + rc = xenstore_process_one_vcpu_set_event(path); > + if (rc > 0) > + changed = 1;I am not sure I follow you here: why the loop below would fail?> + /* > + * We get: /local/domain/<domid>/cpu/<vcpu>/availability or > + * (at init) /local/domain/<domid>/cpu [ignore it] and need to > + * iterate over /local/domain/<domid>/cpu/ directory. > + */ > + vcpu = strstr(path, "cpu/"); > + if (!vcpu) { > + fprintf(stderr,"[%s]: %s has no CPU!\n", __func__, path); > + return; > + } > + /* Eliminate ''/availability'' */ > + vcpu[3] = ''\0''; > + dir = xs_directory(xsh, XBT_NULL, path, &num); > + > + if (!dir) { > + fprintf(stderr, "[%s]: directory %s has no dirs!\n", __func__, path); > + return; > + } > + if (num != vcpus) > + fprintf(stderr, "[%s]: %d (number of vcpu entries) != %d (maxvcpus)! "\ > + "Continuing on..\n", __func__, num, vcpus); > + > + for (i = 0; i < num; i++) { > + char *attr; > + ssize_t len = strlen(path) + strlen(dir[i]) + strlen("availability") + > + 3 /* account for two ''/'' and ''\0''*/; > + attr = malloc(len);just use XEN_BUFSIZE and put in on the stack> + > + /* Construct "/local/domain/<domid>/cpu" (path) with <vcpu> (attr), > + * and "availability" with ''/'' sprinkled around. */ > + snprintf(attr, len, "%s/%s/%s", path, dir[i], "availability"); > + rc = xenstore_process_one_vcpu_set_event(attr); > + > + free(attr); > + if (rc > 0) > + changed = 1; > + if (rc < 0) > + break;Given that the user could provide a mask of vcpus to enable/disable, shouldn''t we just process them all anyway?> + } > + free (dir); > + if (changed > 0) { > + fprintf(stderr, "Notifying OS about CPU hotplug changes.\n"); > qemu_cpu_notify(); > + } > return; > } > > -- > 1.8.1.4 >
Konrad Rzeszutek Wilk
2013-May-13 18:20 UTC
Re: [PATCH 3/3] piix4acpi, xen, hotplug: Fix race with ACPI AML code and hotplug.
On Mon, May 13, 2013 at 06:17:50PM +0100, Stefano Stabellini wrote:> On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote: > > This is a race so the amount varies but on a 4PCPU box > > I seem to get only ~14 out of 16 vCPUs I want to online. > > > > The issue at hand is that QEMU xenstore.c hotplug code changes > > the vCPU array and triggers an ACPI SCI for each vCPU > > online/offline change. That means we modify the array of vCPUs > > as the guests ACPI AML code is reading it - resulting in > > the guest reading the data only once and not changing the > > CPU states appropiately. > > > > The fix is to seperate the vCPU array changes from the ACPI SCI > > notification. The code now will enumerate all of the vCPUs > > and change the vCPU array if there is a need for a change. > > If a change did occur then only _one_ ACPI SCI pulse is sent > > to the guest. The vCPU array at that point has the online/offline > > modified to what the user wanted to have. > > > > Specifically, if a user provided this command: > > xl vcpu-set latest 16 > > > > (guest config has vcpus=1, maxvcpus=32) QEMU and the guest > > (in this case Linux) would do: > > > > QEMU: Guest OS: > > -xenstore_process_vcpu_set_event > > -> Gets an XenBus notification for CPU1 > > -> Updates the gpe_state.cpus_state bitfield. > > -> Pulses the ACPI SCI > > - ACPI SCI kicks in > > > > -> Gets an XenBus notification for CPU2 > > -> Updates the gpe_state.cpus_state bitfield. > > -> Pulses the ACPI SCI > > > > -> Gets an XenBus notification for CPU3 > > -> Updates the gpe_state.cpus_state bitfield. > > -> Pulses the ACPI SCI > > ... > > - Method(PRST) invoked > > > > -> Gets an XenBus notification for CPU12 > > -> Updates the gpe_state.cpus_state bitfield. > > -> Pulses the ACPI SCI > > - reads AF00 for CPU state > > [gets 0xff] > > - reads AF02 [gets 0x7f] > > > > -> Gets an XenBus notification for CPU13 > > -> Updates the gpe_state.cpus_state bitfield. > > -> Pulses the ACPI SCI > > > > .. until VCPU 16 > > - Method PRST updates > > PR01 through 13 FLG > > entry. > > - PR01->PR13 _MAD > > invoked. > > > > - Brings up 13 CPUs. > > > > While QEMU updates the rest of the cpus_state bitfields the ACPI AML > > only does the CPU hotplug on those it had read. > > > > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > --- > > xenstore.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ > > 1 file changed, 62 insertions(+), 6 deletions(-) > > > > diff --git a/xenstore.c b/xenstore.c > > index c861d36..31dbbe5 100644 > > --- a/xenstore.c > > +++ b/xenstore.c > > @@ -999,25 +999,24 @@ void xenstore_record_dm_state(const char *state) > > { > > xenstore_record_dm("state", state); > > } > > - > > -static void xenstore_process_vcpu_set_event(char **vec) > > +static int xenstore_process_one_vcpu_set_event(char *node) > > { > > char *act = NULL; > > - char *vcpustr, *node = vec[XS_WATCH_PATH]; > > + char *vcpustr; > > unsigned int vcpu, len; > > int changed = -EINVAL; > > > > vcpustr = strstr(node, "cpu/"); > > if (!vcpustr) { > > fprintf(stderr, "vcpu-set: watch node error.\n"); > > - return; > > + return changed; > > } > > sscanf(vcpustr, "cpu/%u", &vcpu); > > > > act = xs_read(xsh, XBT_NULL, node, &len); > > if (!act) { > > fprintf(stderr, "vcpu-set: no command yet.\n"); > > - return; > > + return changed; > > } > > > > if (!strncmp(act, "online", len)) > > @@ -1028,8 +1027,65 @@ static void xenstore_process_vcpu_set_event(char **vec) > > fprintf(stderr, "vcpu-set: command error.\n"); > > > > free(act); > > - if (changed > 0) > > + return changed; > > +} > > +static void xenstore_process_vcpu_set_event(char **vec) > > +{ > > + int changed = 0, rc, i, num = 0; > > + char *vcpu, **dir; > > + char *path = vec[XS_WATCH_PATH]; > > + > > + /* > > + * Process the event right away in case the loop below fails > > + * to enumerate the correct vCPU. > > + */ > > + rc = xenstore_process_one_vcpu_set_event(path); > > + if (rc > 0) > > + changed = 1; > > I am not sure I follow you here: why the loop below would fail?If there is an error (for example the xs_read fails), then the loop below would stop as rc == -EINVAL.> > > + /* > > + * We get: /local/domain/<domid>/cpu/<vcpu>/availability or > > + * (at init) /local/domain/<domid>/cpu [ignore it] and need to > > + * iterate over /local/domain/<domid>/cpu/ directory. > > + */ > > + vcpu = strstr(path, "cpu/"); > > + if (!vcpu) { > > + fprintf(stderr,"[%s]: %s has no CPU!\n", __func__, path); > > + return; > > + } > > + /* Eliminate ''/availability'' */ > > + vcpu[3] = ''\0''; > > + dir = xs_directory(xsh, XBT_NULL, path, &num); > > + > > + if (!dir) { > > + fprintf(stderr, "[%s]: directory %s has no dirs!\n", __func__, path); > > + return; > > + } > > + if (num != vcpus) > > + fprintf(stderr, "[%s]: %d (number of vcpu entries) != %d (maxvcpus)! "\ > > + "Continuing on..\n", __func__, num, vcpus); > > + > > + for (i = 0; i < num; i++) { > > + char *attr; > > + ssize_t len = strlen(path) + strlen(dir[i]) + strlen("availability") + > > + 3 /* account for two ''/'' and ''\0''*/; > > + attr = malloc(len); > > just use XEN_BUFSIZE and put in on the stackOK.> > > > + > > + /* Construct "/local/domain/<domid>/cpu" (path) with <vcpu> (attr), > > + * and "availability" with ''/'' sprinkled around. */ > > + snprintf(attr, len, "%s/%s/%s", path, dir[i], "availability"); > > + rc = xenstore_process_one_vcpu_set_event(attr); > > + > > + free(attr); > > + if (rc > 0) > > + changed = 1; > > + if (rc < 0) > > + break; > > Given that the user could provide a mask of vcpus to enable/disable, > shouldn''t we just process them all anyway?rc < 0 is if an error occurs. 0 is if there are no changes. And xenstore_process_one_vcpu_set_event could fail (say xs_read). The top of the function would process the vCPU for which the event occured. Please keep in mind that this function is called for _each_ vCPU that has changed.> > + free (dir); > > + if (changed > 0) { > > + fprintf(stderr, "Notifying OS about CPU hotplug changes.\n"); > > qemu_cpu_notify(); > > + } > > return; > > } > > > > -- > > 1.8.1.4 > >
Konrad Rzeszutek Wilk
2013-May-13 19:28 UTC
Re: [PATCH 3/3] piix4acpi, xen, hotplug: Fix race with ACPI AML code and hotplug.
On Mon, May 13, 2013 at 02:20:00PM -0400, Konrad Rzeszutek Wilk wrote:> On Mon, May 13, 2013 at 06:17:50PM +0100, Stefano Stabellini wrote: > > On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote: > > > This is a race so the amount varies but on a 4PCPU box > > > I seem to get only ~14 out of 16 vCPUs I want to online. > > > > > > The issue at hand is that QEMU xenstore.c hotplug code changes > > > the vCPU array and triggers an ACPI SCI for each vCPU > > > online/offline change. That means we modify the array of vCPUs > > > as the guests ACPI AML code is reading it - resulting in > > > the guest reading the data only once and not changing the > > > CPU states appropiately. > > > > > > The fix is to seperate the vCPU array changes from the ACPI SCI > > > notification. The code now will enumerate all of the vCPUs > > > and change the vCPU array if there is a need for a change. > > > If a change did occur then only _one_ ACPI SCI pulse is sent > > > to the guest. The vCPU array at that point has the online/offline > > > modified to what the user wanted to have. > > > > > > Specifically, if a user provided this command: > > > xl vcpu-set latest 16 > > > > > > (guest config has vcpus=1, maxvcpus=32) QEMU and the guest > > > (in this case Linux) would do: > > > > > > QEMU: Guest OS: > > > -xenstore_process_vcpu_set_event > > > -> Gets an XenBus notification for CPU1 > > > -> Updates the gpe_state.cpus_state bitfield. > > > -> Pulses the ACPI SCI > > > - ACPI SCI kicks in > > > > > > -> Gets an XenBus notification for CPU2 > > > -> Updates the gpe_state.cpus_state bitfield. > > > -> Pulses the ACPI SCI > > > > > > -> Gets an XenBus notification for CPU3 > > > -> Updates the gpe_state.cpus_state bitfield. > > > -> Pulses the ACPI SCI > > > ... > > > - Method(PRST) invoked > > > > > > -> Gets an XenBus notification for CPU12 > > > -> Updates the gpe_state.cpus_state bitfield. > > > -> Pulses the ACPI SCI > > > - reads AF00 for CPU state > > > [gets 0xff] > > > - reads AF02 [gets 0x7f] > > > > > > -> Gets an XenBus notification for CPU13 > > > -> Updates the gpe_state.cpus_state bitfield. > > > -> Pulses the ACPI SCI > > > > > > .. until VCPU 16 > > > - Method PRST updates > > > PR01 through 13 FLG > > > entry. > > > - PR01->PR13 _MAD > > > invoked. > > > > > > - Brings up 13 CPUs. > > > > > > While QEMU updates the rest of the cpus_state bitfields the ACPI AML > > > only does the CPU hotplug on those it had read. > > > > > > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > > --- > > > xenstore.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ > > > 1 file changed, 62 insertions(+), 6 deletions(-) > > > > > > diff --git a/xenstore.c b/xenstore.c > > > index c861d36..31dbbe5 100644 > > > --- a/xenstore.c > > > +++ b/xenstore.c > > > @@ -999,25 +999,24 @@ void xenstore_record_dm_state(const char *state) > > > { > > > xenstore_record_dm("state", state); > > > } > > > - > > > -static void xenstore_process_vcpu_set_event(char **vec) > > > +static int xenstore_process_one_vcpu_set_event(char *node) > > > { > > > char *act = NULL; > > > - char *vcpustr, *node = vec[XS_WATCH_PATH]; > > > + char *vcpustr; > > > unsigned int vcpu, len; > > > int changed = -EINVAL; > > > > > > vcpustr = strstr(node, "cpu/"); > > > if (!vcpustr) { > > > fprintf(stderr, "vcpu-set: watch node error.\n"); > > > - return; > > > + return changed; > > > } > > > sscanf(vcpustr, "cpu/%u", &vcpu); > > > > > > act = xs_read(xsh, XBT_NULL, node, &len); > > > if (!act) { > > > fprintf(stderr, "vcpu-set: no command yet.\n"); > > > - return; > > > + return changed; > > > } > > > > > > if (!strncmp(act, "online", len)) > > > @@ -1028,8 +1027,65 @@ static void xenstore_process_vcpu_set_event(char **vec) > > > fprintf(stderr, "vcpu-set: command error.\n"); > > > > > > free(act); > > > - if (changed > 0) > > > + return changed; > > > +} > > > +static void xenstore_process_vcpu_set_event(char **vec) > > > +{ > > > + int changed = 0, rc, i, num = 0; > > > + char *vcpu, **dir; > > > + char *path = vec[XS_WATCH_PATH]; > > > + > > > + /* > > > + * Process the event right away in case the loop below fails > > > + * to enumerate the correct vCPU. > > > + */ > > > + rc = xenstore_process_one_vcpu_set_event(path); > > > + if (rc > 0) > > > + changed = 1; > > > > I am not sure I follow you here: why the loop below would fail? > > If there is an error (for example the xs_read fails), then the loop > below would stop as rc == -EINVAL. > > > > > > + /* > > > + * We get: /local/domain/<domid>/cpu/<vcpu>/availability or > > > + * (at init) /local/domain/<domid>/cpu [ignore it] and need to > > > + * iterate over /local/domain/<domid>/cpu/ directory. > > > + */ > > > + vcpu = strstr(path, "cpu/"); > > > + if (!vcpu) { > > > + fprintf(stderr,"[%s]: %s has no CPU!\n", __func__, path); > > > + return; > > > + } > > > + /* Eliminate ''/availability'' */ > > > + vcpu[3] = ''\0''; > > > + dir = xs_directory(xsh, XBT_NULL, path, &num); > > > + > > > + if (!dir) { > > > + fprintf(stderr, "[%s]: directory %s has no dirs!\n", __func__, path); > > > + return; > > > + } > > > + if (num != vcpus) > > > + fprintf(stderr, "[%s]: %d (number of vcpu entries) != %d (maxvcpus)! "\ > > > + "Continuing on..\n", __func__, num, vcpus); > > > + > > > + for (i = 0; i < num; i++) { > > > + char *attr; > > > + ssize_t len = strlen(path) + strlen(dir[i]) + strlen("availability") + > > > + 3 /* account for two ''/'' and ''\0''*/; > > > + attr = malloc(len); > > > > just use XEN_BUFSIZE and put in on the stack > > OK. > > > > > > > + > > > + /* Construct "/local/domain/<domid>/cpu" (path) with <vcpu> (attr), > > > + * and "availability" with ''/'' sprinkled around. */ > > > + snprintf(attr, len, "%s/%s/%s", path, dir[i], "availability"); > > > + rc = xenstore_process_one_vcpu_set_event(attr); > > > + > > > + free(attr); > > > + if (rc > 0) > > > + changed = 1; > > > + if (rc < 0) > > > + break; > > > > Given that the user could provide a mask of vcpus to enable/disable, > > shouldn''t we just process them all anyway? > > rc < 0 is if an error occurs. 0 is if there are no changes. > > And xenstore_process_one_vcpu_set_event could fail (say xs_read). The top > of the function would process the vCPU for which the event occured. Please > keep in mind that this function is called for _each_ vCPU that has changed. > > > > + free (dir); > > > + if (changed > 0) { > > > + fprintf(stderr, "Notifying OS about CPU hotplug changes.\n"); > > > qemu_cpu_notify(); > > > + } > > > return; > > > } > > > > > > --New patch [I am still unsure whether you want me to just continue with the loop and ignore errors - and still at the start of the function process the event or restructure it so that the errors are ignored and the loop iterates over all of the vCPUs?] From 950424a5e6a481dd1b2b4fb2645e7a852c199b7f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Date: Mon, 13 May 2013 11:55:42 -0400 Subject: [PATCH] piix4acpi, xen, hotplug: Fix race with ACPI AML code and hotplug. This is a race so the amount varies but on a 4PCPU box I seem to get only ~14 out of 16 vCPUs I want to online. The issue at hand is that QEMU xenstore.c hotplug code changes the vCPU array and triggers an ACPI SCI for each vCPU online/offline change. That means we modify the array of vCPUs as the guests ACPI AML code is reading it - resulting in the guest reading the data only once and not changing the CPU states appropiately. The fix is to seperate the vCPU array changes from the ACPI SCI notification. The code now will enumerate all of the vCPUs and change the vCPU array if there is a need for a change. If a change did occur then only _one_ ACPI SCI pulse is sent to the guest. The vCPU array at that point has the online/offline modified to what the user wanted to have. Specifically, if a user provided this command: xl vcpu-set latest 16 (guest config has vcpus=1, maxvcpus=32) QEMU and the guest (in this case Linux) would do: QEMU: Guest OS: -xenstore_process_vcpu_set_event -> Gets an XenBus notification for CPU1 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI - ACPI SCI kicks in -> Gets an XenBus notification for CPU2 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI -> Gets an XenBus notification for CPU3 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI ... - Method(PRST) invoked -> Gets an XenBus notification for CPU12 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI - reads AF00 for CPU state [gets 0xff] - reads AF02 [gets 0x7f] -> Gets an XenBus notification for CPU13 -> Updates the gpe_state.cpus_state bitfield. -> Pulses the ACPI SCI .. until VCPU 16 - Method PRST updates PR01 through 13 FLG entry. - PR01->PR13 _MAD invoked. - Brings up 13 CPUs. While QEMU updates the rest of the cpus_state bitfields the ACPI AML only does the CPU hotplug on those it had read. [v1: Use stack for the ''attr'' instead of malloc/free] Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- xenstore.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 59 insertions(+), 6 deletions(-) diff --git a/xenstore.c b/xenstore.c index c861d36..b0d6f77 100644 --- a/xenstore.c +++ b/xenstore.c @@ -22,6 +22,7 @@ #include "pci.h" #include "qemu-timer.h" #include "qemu-xen.h" +#include "xen_backend.h" struct xs_handle *xsh = NULL; static char *media_filename[MAX_DRIVES+1]; @@ -999,25 +1000,24 @@ void xenstore_record_dm_state(const char *state) { xenstore_record_dm("state", state); } - -static void xenstore_process_vcpu_set_event(char **vec) +static int xenstore_process_one_vcpu_set_event(char *node) { char *act = NULL; - char *vcpustr, *node = vec[XS_WATCH_PATH]; + char *vcpustr; unsigned int vcpu, len; int changed = -EINVAL; vcpustr = strstr(node, "cpu/"); if (!vcpustr) { fprintf(stderr, "vcpu-set: watch node error.\n"); - return; + return changed; } sscanf(vcpustr, "cpu/%u", &vcpu); act = xs_read(xsh, XBT_NULL, node, &len); if (!act) { fprintf(stderr, "vcpu-set: no command yet.\n"); - return; + return changed; } if (!strncmp(act, "online", len)) @@ -1028,8 +1028,61 @@ static void xenstore_process_vcpu_set_event(char **vec) fprintf(stderr, "vcpu-set: command error.\n"); free(act); - if (changed > 0) + return changed; +} +static void xenstore_process_vcpu_set_event(char **vec) +{ + int changed = 0, rc, i, num = 0; + char *vcpu, **dir; + char *path = vec[XS_WATCH_PATH]; + + /* + * Process the event right away in case the loop below fails + * to get to vCPU that is in the event. + */ + rc = xenstore_process_one_vcpu_set_event(path); + if (rc > 0) + changed = 1; + /* + * We get: /local/domain/<domid>/cpu/<vcpu>/availability or + * (at init) /local/domain/<domid>/cpu [ignore it] and need to + * iterate over /local/domain/<domid>/cpu/ directory. + */ + vcpu = strstr(path, "cpu/"); + if (!vcpu) { + fprintf(stderr,"[%s]: %s has no CPU!\n", __func__, path); + return; + } + /* Eliminate ''/availability'' */ + vcpu[3] = ''\0''; + dir = xs_directory(xsh, XBT_NULL, path, &num); + + if (!dir) { + fprintf(stderr, "[%s]: directory %s has no dirs!\n", __func__, path); + return; + } + if (num != vcpus) + fprintf(stderr, "[%s]: %d (number of vcpu entries) != %d (maxvcpus)! "\ + "Continuing on..\n", __func__, num, vcpus); + + for (i = 0; i < num; i++) { + char attr[XEN_BUFSIZE]; + + /* Construct "/local/domain/<domid>/cpu" (path) with <vcpu> (attr), + * and "availability" with ''/'' sprinkled around. */ + snprintf(attr, XEN_BUFSIZE, "%s/%s/%s", path, dir[i], "availability"); + rc = xenstore_process_one_vcpu_set_event(attr); + + if (rc > 0) + changed = 1; + if (rc < 0) /* say xs_read failed */ + break; + } + free (dir); + if (changed > 0) { + fprintf(stderr, "Notifying OS about CPU hotplug changes.\n"); qemu_cpu_notify(); + } return; } -- 1.7.7.6
George Dunlap
2013-May-14 09:14 UTC
Re: [PATCH 1/3] piix4acpi, xen, vcpu hotplug: Split the notification from the changes.
On Mon, May 13, 2013 at 6:01 PM, Stefano Stabellini <stefano.stabellini@eu.citrix.com> wrote:> On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote: >> This is a prepatory patch that splits the notification >> of an vCPU change from the actual changes to the vCPU array. >> >> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>It looks like these only touch cpu hotplug code, so there should be little risk of affecting anything else, right? So from a release perspective, all the patches: Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Konrad Rzeszutek Wilk
2013-May-14 13:22 UTC
Re: [PATCH 1/3] piix4acpi, xen, vcpu hotplug: Split the notification from the changes.
On Tue, May 14, 2013 at 10:14:49AM +0100, George Dunlap wrote:> On Mon, May 13, 2013 at 6:01 PM, Stefano Stabellini > <stefano.stabellini@eu.citrix.com> wrote: > > On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote: > >> This is a prepatory patch that splits the notification > >> of an vCPU change from the actual changes to the vCPU array. > >> > >> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > > > Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> > > It looks like these only touch cpu hotplug code, so there should be > little risk of affecting anything else, right?Right.> > So from a release perspective, all the patches: > Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Stefano Stabellini
2013-May-14 13:55 UTC
Re: [PATCH 3/3] piix4acpi, xen, hotplug: Fix race with ACPI AML code and hotplug.
On Mon, 13 May 2013, Konrad Rzeszutek Wilk wrote:> >From 950424a5e6a481dd1b2b4fb2645e7a852c199b7f Mon Sep 17 00:00:00 2001 > From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Date: Mon, 13 May 2013 11:55:42 -0400 > Subject: [PATCH] piix4acpi, xen, hotplug: Fix race with ACPI AML code and > hotplug. > > This is a race so the amount varies but on a 4PCPU box > I seem to get only ~14 out of 16 vCPUs I want to online. > > The issue at hand is that QEMU xenstore.c hotplug code changes > the vCPU array and triggers an ACPI SCI for each vCPU > online/offline change. That means we modify the array of vCPUs > as the guests ACPI AML code is reading it - resulting in > the guest reading the data only once and not changing the > CPU states appropiately. > > The fix is to seperate the vCPU array changes from the ACPI SCI > notification. The code now will enumerate all of the vCPUs > and change the vCPU array if there is a need for a change. > If a change did occur then only _one_ ACPI SCI pulse is sent > to the guest. The vCPU array at that point has the online/offline > modified to what the user wanted to have. > > Specifically, if a user provided this command: > xl vcpu-set latest 16 > > (guest config has vcpus=1, maxvcpus=32) QEMU and the guest > (in this case Linux) would do: > > QEMU: Guest OS: > -xenstore_process_vcpu_set_event > -> Gets an XenBus notification for CPU1 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > - ACPI SCI kicks in > > -> Gets an XenBus notification for CPU2 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > > -> Gets an XenBus notification for CPU3 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > ... > - Method(PRST) invoked > > -> Gets an XenBus notification for CPU12 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > - reads AF00 for CPU state > [gets 0xff] > - reads AF02 [gets 0x7f] > > -> Gets an XenBus notification for CPU13 > -> Updates the gpe_state.cpus_state bitfield. > -> Pulses the ACPI SCI > > .. until VCPU 16 > - Method PRST updates > PR01 through 13 FLG > entry. > - PR01->PR13 _MAD > invoked. > > - Brings up 13 CPUs. > > While QEMU updates the rest of the cpus_state bitfields the ACPI AML > only does the CPU hotplug on those it had read. > > [v1: Use stack for the ''attr'' instead of malloc/free] > Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>> xenstore.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----- > 1 files changed, 59 insertions(+), 6 deletions(-) > > diff --git a/xenstore.c b/xenstore.c > index c861d36..b0d6f77 100644 > --- a/xenstore.c > +++ b/xenstore.c > @@ -22,6 +22,7 @@ > #include "pci.h" > #include "qemu-timer.h" > #include "qemu-xen.h" > +#include "xen_backend.h" > > struct xs_handle *xsh = NULL; > static char *media_filename[MAX_DRIVES+1]; > @@ -999,25 +1000,24 @@ void xenstore_record_dm_state(const char *state) > { > xenstore_record_dm("state", state); > } > - > -static void xenstore_process_vcpu_set_event(char **vec) > +static int xenstore_process_one_vcpu_set_event(char *node) > { > char *act = NULL; > - char *vcpustr, *node = vec[XS_WATCH_PATH]; > + char *vcpustr; > unsigned int vcpu, len; > int changed = -EINVAL; > > vcpustr = strstr(node, "cpu/"); > if (!vcpustr) { > fprintf(stderr, "vcpu-set: watch node error.\n"); > - return; > + return changed; > } > sscanf(vcpustr, "cpu/%u", &vcpu); > > act = xs_read(xsh, XBT_NULL, node, &len); > if (!act) { > fprintf(stderr, "vcpu-set: no command yet.\n"); > - return; > + return changed; > } > > if (!strncmp(act, "online", len)) > @@ -1028,8 +1028,61 @@ static void xenstore_process_vcpu_set_event(char **vec) > fprintf(stderr, "vcpu-set: command error.\n"); > > free(act); > - if (changed > 0) > + return changed; > +} > +static void xenstore_process_vcpu_set_event(char **vec) > +{ > + int changed = 0, rc, i, num = 0; > + char *vcpu, **dir; > + char *path = vec[XS_WATCH_PATH]; > + > + /* > + * Process the event right away in case the loop below fails > + * to get to vCPU that is in the event. > + */ > + rc = xenstore_process_one_vcpu_set_event(path); > + if (rc > 0) > + changed = 1; > + /* > + * We get: /local/domain/<domid>/cpu/<vcpu>/availability or > + * (at init) /local/domain/<domid>/cpu [ignore it] and need to > + * iterate over /local/domain/<domid>/cpu/ directory. > + */ > + vcpu = strstr(path, "cpu/"); > + if (!vcpu) { > + fprintf(stderr,"[%s]: %s has no CPU!\n", __func__, path); > + return; > + } > + /* Eliminate ''/availability'' */ > + vcpu[3] = ''\0''; > + dir = xs_directory(xsh, XBT_NULL, path, &num); > + > + if (!dir) { > + fprintf(stderr, "[%s]: directory %s has no dirs!\n", __func__, path); > + return; > + } > + if (num != vcpus) > + fprintf(stderr, "[%s]: %d (number of vcpu entries) != %d (maxvcpus)! "\ > + "Continuing on..\n", __func__, num, vcpus); > + > + for (i = 0; i < num; i++) { > + char attr[XEN_BUFSIZE]; > + > + /* Construct "/local/domain/<domid>/cpu" (path) with <vcpu> (attr), > + * and "availability" with ''/'' sprinkled around. */ > + snprintf(attr, XEN_BUFSIZE, "%s/%s/%s", path, dir[i], "availability"); > + rc = xenstore_process_one_vcpu_set_event(attr); > + > + if (rc > 0) > + changed = 1; > + if (rc < 0) /* say xs_read failed */ > + break; > + } > + free (dir); > + if (changed > 0) { > + fprintf(stderr, "Notifying OS about CPU hotplug changes.\n"); > qemu_cpu_notify(); > + } > return; > }