Roger Pau Monne
2013-Nov-15 15:50 UTC
[PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
The aim of this patch is to define a stable way in which PVH is going to do AP bringup. Since we are running inside of a HVM container, PVH should only need to set flags, cr3 and user_regs in order to bring up a vCPU, the rest can be set once the vCPU is started using the bare metal methods. Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> Cc: George Dunlap <george.dunlap@eu.citrix.com> Cc: Mukesh Rathor <mukesh.rathor@oracle.com> Cc: Jan Beulich <JBeulich@suse.com> Cc: Tim Deegan <tim@xen.org> Cc: Keir Fraser <keir@xen.org> --- xen/arch/x86/domain.c | 11 +++-------- xen/arch/x86/hvm/vmx/vmx.c | 4 ---- xen/include/public/arch-x86/xen.h | 6 ++---- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index a3868f9..b643eaf 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -704,9 +704,11 @@ int arch_set_info_guest( /* PVH 32bitfixme */ ASSERT(!compat); - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || c(user_regs.cs) || c(user_regs.ss) || c(user_regs.es) || c(user_regs.ds) || c(user_regs.fs) || c(user_regs.gs) || + c(kernel_ss) || c(kernel_sp) || c.nat->gs_base_kernel || c.nat->gdt_ents || c.nat->fs_base || c.nat->gs_base_user ) return -EINVAL; } @@ -745,13 +747,6 @@ int arch_set_info_guest( if ( has_hvm_container_vcpu(v) ) { - /* - * NB: TF_kernel_mode is set unconditionally for HVM guests, - * so we always use the gs_base_kernel here. If we change this - * function to imitate the PV functionality, we'll need to - * make it pay attention to the kernel bit. - */ - hvm_set_info_guest(v, compat ? 0 : c.nat->gs_base_kernel); if ( is_hvm_vcpu(v) || v->is_initialised ) goto out; diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index f0132a4..90999ca 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1492,10 +1492,6 @@ static void vmx_set_info_guest(struct vcpu *v, uint64_t gs_base_kernel) __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow); } - /* PVH 32bitfixme */ - if ( is_pvh_vcpu(v) ) - __vmwrite(GUEST_GS_BASE, gs_base_kernel); - vmx_vmcs_exit(v); } diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h index 5d220ce..46736ae 100644 --- a/xen/include/public/arch-x86/xen.h +++ b/xen/include/public/arch-x86/xen.h @@ -161,10 +161,8 @@ typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ * - For HVM guests, the structures read include: fpu_ctxt (if * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] * - * - PVH guests are the same as HVM guests, but additionally set cr3, - * and for 64-bit guests, gs_base_kernel. Additionally, the following - * entries must be 0: ctrlreg[1], ldt_base, ldt_ents, user_regs.{cs, - * ss, es, ds, fs, gs), gdt_ents, fs_base, and gs_base_user. + * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to + * set cr3. All other unused fields must be set to 0. */ struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ -- 1.7.7.5 (Apple Git-26) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Nov-15 16:32 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> wrote: > --- a/xen/arch/x86/domain.c > +++ b/xen/arch/x86/domain.c > @@ -704,9 +704,11 @@ int arch_set_info_guest( > /* PVH 32bitfixme */ > ASSERT(!compat); > > - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || > + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || > + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) ||I think it should actually be a bug for the guest to request an all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE would seem to be a valid requirement to be set. Apart from that ctrlreg[] is an 8-element array... And I don''t see debugreg[] being verified at all.> c(user_regs.cs) || c(user_regs.ss) || c(user_regs.es) || > c(user_regs.ds) || c(user_regs.fs) || c(user_regs.gs) || > + c(kernel_ss) || c(kernel_sp) || c.nat->gs_base_kernel ||So George and/or Mukesh found it necessary to set gs_base_kernel, and you rip it out? I''m curious as to what they''re going to say...> --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -1492,10 +1492,6 @@ static void vmx_set_info_guest(struct vcpu *v, uint64_t gs_base_kernel) > __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow); > } > > - /* PVH 32bitfixme */ > - if ( is_pvh_vcpu(v) ) > - __vmwrite(GUEST_GS_BASE, gs_base_kernel);And if you rip it out, then you should remove the now unused function parameter here again. Jan
George Dunlap
2013-Nov-15 16:40 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On 15/11/13 16:32, Jan Beulich wrote:>>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> wrote: >> --- a/xen/arch/x86/domain.c >> +++ b/xen/arch/x86/domain.c >> @@ -704,9 +704,11 @@ int arch_set_info_guest( >> /* PVH 32bitfixme */ >> ASSERT(!compat); >> >> - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || >> + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || >> + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || > I think it should actually be a bug for the guest to request an > all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE > would seem to be a valid requirement to be set. > > Apart from that ctrlreg[] is an 8-element array... And I don''t > see debugreg[] being verified at all. > >> c(user_regs.cs) || c(user_regs.ss) || c(user_regs.es) || >> c(user_regs.ds) || c(user_regs.fs) || c(user_regs.gs) || >> + c(kernel_ss) || c(kernel_sp) || c.nat->gs_base_kernel || > So George and/or Mukesh found it necessary to set > gs_base_kernel, and you rip it out? I''m curious as to what > they''re going to say...I didn''t find it necessary; I was mostly focused on merging the PVH and HVM codepaths without causing any regressions. It''s not obvious to me what''s special about gs_base_kernel, and I haven''t yet gone back to try to find out why Mukesh did it that way. -George
Jan Beulich
2013-Nov-15 16:45 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
>>> On 15.11.13 at 17:40, George Dunlap <george.dunlap@eu.citrix.com> wrote: > On 15/11/13 16:32, Jan Beulich wrote: >>>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> wrote: >>> --- a/xen/arch/x86/domain.c >>> +++ b/xen/arch/x86/domain.c >>> @@ -704,9 +704,11 @@ int arch_set_info_guest( >>> /* PVH 32bitfixme */ >>> ASSERT(!compat); >>> >>> - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || >>> + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || >>> + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || >> I think it should actually be a bug for the guest to request an >> all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE >> would seem to be a valid requirement to be set. >> >> Apart from that ctrlreg[] is an 8-element array... And I don''t >> see debugreg[] being verified at all. >> >>> c(user_regs.cs) || c(user_regs.ss) || c(user_regs.es) || >>> c(user_regs.ds) || c(user_regs.fs) || c(user_regs.gs) || >>> + c(kernel_ss) || c(kernel_sp) || c.nat->gs_base_kernel || >> So George and/or Mukesh found it necessary to set >> gs_base_kernel, and you rip it out? I''m curious as to what >> they''re going to say... > > I didn''t find it necessary; I was mostly focused on merging the PVH and > HVM codepaths without causing any regressions. It''s not obvious to me > what''s special about gs_base_kernel, and I haven''t yet gone back to try > to find out why Mukesh did it that way.Mukesh? Jan
Tim Deegan
2013-Nov-15 16:55 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
At 16:32 +0000 on 15 Nov (1384529574), Jan Beulich wrote:> >>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> wrote: > > --- a/xen/arch/x86/domain.c > > +++ b/xen/arch/x86/domain.c > > @@ -704,9 +704,11 @@ int arch_set_info_guest( > > /* PVH 32bitfixme */ > > ASSERT(!compat); > > > > - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || > > + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || > > + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || > > I think it should actually be a bug for the guest to request an > all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE > would seem to be a valid requirement to be set.I think zero is better. Guest CRx are explicitly _not_ loaded from these fields so making them look like valid CRx values is just confusing. But even better would be to allow all these fields to be set to any valid values, and to load the state into the guest vCPU. I think that''s going to be more sensible once the current restriction that PVH vCPUs are always in long more goes away. Tim.
Roger Pau Monné
2013-Nov-15 16:59 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On 15/11/13 17:32, Jan Beulich wrote:>>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> wrote: >> --- a/xen/arch/x86/domain.c >> +++ b/xen/arch/x86/domain.c >> @@ -704,9 +704,11 @@ int arch_set_info_guest( >> /* PVH 32bitfixme */ >> ASSERT(!compat); >> >> - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || >> + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || >> + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || > > I think it should actually be a bug for the guest to request an > all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE > would seem to be a valid requirement to be set.Without this patch you can set ctrlreg[4] (and ctrlreg[0]), but it''s going to be completely ignored, which is confusing IMHO.> Apart from that ctrlreg[] is an 8-element array... And I don''t > see debugreg[] being verified at all.Ack.> >> c(user_regs.cs) || c(user_regs.ss) || c(user_regs.es) || >> c(user_regs.ds) || c(user_regs.fs) || c(user_regs.gs) || >> + c(kernel_ss) || c(kernel_sp) || c.nat->gs_base_kernel || > > So George and/or Mukesh found it necessary to set > gs_base_kernel, and you rip it out? I''m curious as to what > they''re going to say... > >> --- a/xen/arch/x86/hvm/vmx/vmx.c >> +++ b/xen/arch/x86/hvm/vmx/vmx.c >> @@ -1492,10 +1492,6 @@ static void vmx_set_info_guest(struct vcpu *v, uint64_t gs_base_kernel) >> __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow); >> } >> >> - /* PVH 32bitfixme */ >> - if ( is_pvh_vcpu(v) ) >> - __vmwrite(GUEST_GS_BASE, gs_base_kernel); > > And if you rip it out, then you should remove the now unused > function parameter here again.Yes, will wait for other comments and resend.
Mukesh Rathor
2013-Nov-15 21:56 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On Fri, 15 Nov 2013 16:45:47 +0000 "Jan Beulich" <JBeulich@suse.com> wrote:> >>> On 15.11.13 at 17:40, George Dunlap <george.dunlap@eu.citrix.com> > >>> wrote: > > On 15/11/13 16:32, Jan Beulich wrote: > >>>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> > >>>>> wrote: > >>> --- a/xen/arch/x86/domain.c > >>> +++ b/xen/arch/x86/domain.c > >>> @@ -704,9 +704,11 @@ int arch_set_info_guest( > >>> /* PVH 32bitfixme */ > >>> ASSERT(!compat); > >>> > >>> - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || > >>> + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || > >>> + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || > >> I think it should actually be a bug for the guest to request an > >> all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE > >> would seem to be a valid requirement to be set. > >> > >> Apart from that ctrlreg[] is an 8-element array... And I don''t > >> see debugreg[] being verified at all. > >> > >>> c(user_regs.cs) || c(user_regs.ss) || > >>> c(user_regs.es) || c(user_regs.ds) || c(user_regs.fs) || > >>> c(user_regs.gs) || > >>> + c(kernel_ss) || c(kernel_sp) || > >>> c.nat->gs_base_kernel || > >> So George and/or Mukesh found it necessary to set > >> gs_base_kernel, and you rip it out? I''m curious as to what > >> they''re going to say... > > > > I didn''t find it necessary; I was mostly focused on merging the PVH > > and HVM codepaths without causing any regressions. It''s not > > obvious to me what''s special about gs_base_kernel, and I haven''t > > yet gone back to try to find out why Mukesh did it that way.Hi, We had talked about this while ago, but upon boot, the first thing a vcpu needs is access to kernel data structure. (A secondary vcpu is bootstrapped way up into the kernel). It would be possible to get rid of gs_base_kernel, but will take some work on the linux side. I can try and test it out, and let you guys know. thanks mukesh
Mukesh Rathor
2013-Nov-15 23:35 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On Fri, 15 Nov 2013 17:55:09 +0100 Tim Deegan <tim@xen.org> wrote:> At 16:32 +0000 on 15 Nov (1384529574), Jan Beulich wrote: > > >>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> > > >>> wrote: > > > --- a/xen/arch/x86/domain.c > > > +++ b/xen/arch/x86/domain.c > > > @@ -704,9 +704,11 @@ int arch_set_info_guest( > > > /* PVH 32bitfixme */ > > > ASSERT(!compat); > > > > > > - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || > > > + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || > > > + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || > > > > I think it should actually be a bug for the guest to request an > > all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE > > would seem to be a valid requirement to be set. > > I think zero is better. Guest CRx are explicitly _not_ loaded from > these fields so making them look like valid CRx values is just > confusing.+1.> But even better would be to allow all these fields to be set to any > valid values, and to load the state into the guest vCPU. I think > that''s going to be more sensible once the current restriction that PVH > vCPUs are always in long more goes away.Agree, it can be designed much better when the 32bit guest support is added. thanks, mukesh
Mukesh Rathor
2013-Nov-15 23:56 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On Fri, 15 Nov 2013 13:56:54 -0800 Mukesh Rathor <mukesh.rathor@oracle.com> wrote:> On Fri, 15 Nov 2013 16:45:47 +0000 > "Jan Beulich" <JBeulich@suse.com> wrote: > > > >>> On 15.11.13 at 17:40, George Dunlap > > >>> <george.dunlap@eu.citrix.com> wrote: > > > On 15/11/13 16:32, Jan Beulich wrote: > > >>>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> > > >>>>> wrote: > > >>> --- a/xen/arch/x86/domain.c > > >>> +++ b/xen/arch/x86/domain.c > > >>> @@ -704,9 +704,11 @@ int arch_set_info_guest( > > >>> /* PVH 32bitfixme */ > > >>> ASSERT(!compat); > > >>> > > >>> - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || > > >>> + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || > > >>> + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || > > >> I think it should actually be a bug for the guest to request an > > >> all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE > > >> would seem to be a valid requirement to be set. > > >> > > >> Apart from that ctrlreg[] is an 8-element array... And I don''t > > >> see debugreg[] being verified at all. > > >> > > >>> c(user_regs.cs) || c(user_regs.ss) || > > >>> c(user_regs.es) || c(user_regs.ds) || c(user_regs.fs) || > > >>> c(user_regs.gs) || > > >>> + c(kernel_ss) || c(kernel_sp) || > > >>> c.nat->gs_base_kernel || > > >> So George and/or Mukesh found it necessary to set > > >> gs_base_kernel, and you rip it out? I''m curious as to what > > >> they''re going to say... > > > > > > I didn''t find it necessary; I was mostly focused on merging the > > > PVH and HVM codepaths without causing any regressions. It''s not > > > obvious to me what''s special about gs_base_kernel, and I haven''t > > > yet gone back to try to find out why Mukesh did it that way. > > Hi, > > We had talked about this while ago, but upon boot, the first thing > a vcpu needs is access to kernel data structure. (A secondary vcpu > is bootstrapped way up into the kernel). It would be possible to > get rid of gs_base_kernel, but will take some work on the linux side. > I can try and test it out, and let you guys know.Ok, looking at this more, I can hack cpu_bringup_and_idle() in linux to include a static variable for cpuid, which is the least a vcpu needs to know first thing. But, I think that would not work when vcpu hotplug support is added. Another option would be to pass cpuid in one of the registers, say rdi. Thus, rdi == cpuid will be passed to VCPUOP_initialise. In bringup function, the booting vcpu can then load it''s own gs based on the cpuid. If linux folks, konrad (CCd), is OK with this, we can remove gs_base_kernel. Otherwise, it''s such a small thing, hopefually it can stay. thanks, Mukesh
Mukesh Rathor
2013-Nov-16 01:12 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On Fri, 15 Nov 2013 15:56:52 -0800 Mukesh Rathor <mukesh.rathor@oracle.com> wrote:> On Fri, 15 Nov 2013 13:56:54 -0800 > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > On Fri, 15 Nov 2013 16:45:47 +0000 > > "Jan Beulich" <JBeulich@suse.com> wrote: > >........> > > > We had talked about this while ago, but upon boot, the first thing > > a vcpu needs is access to kernel data structure. (A secondary vcpu > > is bootstrapped way up into the kernel). It would be possible to > > get rid of gs_base_kernel, but will take some work on the linux > > side. I can try and test it out, and let you guys know. > > Ok, looking at this more, I can hack cpu_bringup_and_idle() in > linux to include a static variable for cpuid, which is the least a > vcpu needs to know first thing. But, I think that would not work when > vcpu hotplug support is added. Another option would be to pass cpuid > in one of the registers, say rdi. Thus, rdi == cpuid will be passed > to VCPUOP_initialise. In bringup function, the booting vcpu can then > load it''s own gs based on the cpuid. If linux folks, konrad (CCd), is > OK with this, we can remove gs_base_kernel. Otherwise, it''s such a > small thing, hopefually it can stay.Konrad, Here''s the changes needed on linux side to remove gs_base_kernel. Since, I imagine vcpu hotplug would go thru the same path, it would work there also. If this looks OK to everybody, we can remove gs_base_kernel from the VCPUOP_initialise call. thanks, Mukesh diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index aa89bbf..54d1022 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -99,14 +99,8 @@ static void cpu_bringup(void) wmb(); /* make sure everything is out */ } -/* - * The vcpu is coming up with only gs_base_kernel set so we can do - * smp_processor_id() here. We need to set the rest of the context first thing. - */ -static void pvh_set_bringup_context(void) +static void pvh_set_bringup_context(int cpu) { - int cpu = smp_processor_id(); - load_percpu_segment(cpu); switch_to_new_gdt(smp_processor_id()); @@ -121,11 +115,12 @@ static void pvh_set_bringup_context(void) : : "r" (__KERNEL_DS), "a" (__KERNEL_CS) : "memory"); } -static void cpu_bringup_and_idle(void) +/* Note: cpu parameter is only relevant for PVH */ +static void cpu_bringup_and_idle(int cpu) { if (xen_feature(XENFEAT_auto_translated_physmap) && xen_feature(XENFEAT_supervisor_mode_kernel)) - pvh_set_bringup_context(); + pvh_set_bringup_context(cpu); cpu_bringup(); cpu_startup_entry(CPUHP_ONLINE); @@ -393,8 +388,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) /* Note: PVH is not yet supported on x86_32. */ ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.gs = __KERNEL_STACK_CANARY; -#else - ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; @@ -426,6 +419,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; ctxt->failsafe_callback_cs = __KERNEL_CS; +#else + ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->event_callback_eip (unsigned long)xen_hypervisor_callback; @@ -433,7 +428,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) (unsigned long)xen_failsafe_callback; ctxt->user_regs.cs = __KERNEL_CS; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); - } + } else + ctxt->user_regs.rdi = cpu; ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
Mukesh Rathor
2013-Nov-16 01:56 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On Fri, 15 Nov 2013 17:12:27 -0800 Mukesh Rathor <mukesh.rathor@oracle.com> wrote:> On Fri, 15 Nov 2013 15:56:52 -0800 > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > On Fri, 15 Nov 2013 13:56:54 -0800 > > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > > > On Fri, 15 Nov 2013 16:45:47 +0000 > > > "Jan Beulich" <JBeulich@suse.com> wrote: > > > > ........ > > > > > > We had talked about this while ago, but upon boot, the first thing > > > a vcpu needs is access to kernel data structure. (A secondary vcpu > > > is bootstrapped way up into the kernel). It would be possible to > > > get rid of gs_base_kernel, but will take some work on the linux > > > side. I can try and test it out, and let you guys know. > > > > Ok, looking at this more, I can hack cpu_bringup_and_idle() in > > linux to include a static variable for cpuid, which is the least a > > vcpu needs to know first thing. But, I think that would not work > > when vcpu hotplug support is added. Another option would be to pass > > cpuid in one of the registers, say rdi. Thus, rdi == cpuid will be > > passed to VCPUOP_initialise. In bringup function, the booting vcpu > > can then load it''s own gs based on the cpuid. If linux folks, > > konrad (CCd), is OK with this, we can remove gs_base_kernel. > > Otherwise, it''s such a small thing, hopefually it can stay. > > Konrad, > > Here''s the changes needed on linux side to remove gs_base_kernel. > Since, I imagine vcpu hotplug would go thru the same path, it would > work there also. If this looks OK to everybody, we can remove > gs_base_kernel from the VCPUOP_initialise call. > > thanks, > Mukesh > > > diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c > index aa89bbf..54d1022 100644 > --- a/arch/x86/xen/smp.c > +++ b/arch/x86/xen/smp.c > @@ -99,14 +99,8 @@ static void cpu_bringup(void) > wmb(); /* make sure everything is outRats, just realized this is not going to make sense because you don''t have the earlier patch: "PVH: simplify cpu_initialize_context. add pvh_set_bringup_context()" So, just look for it at: https://oss.oracle.com/git/mrathor/linux.git/ in branch tmp2. In a nutshell, for PVH we only need to pass eip, rsp, cr3, and gs_base_kernel. If we pass cpuid in rdi for context setting, we can get rid of gs_base_kernel. Since, PVH is now merged, Konrad I''ll officially send you the patches you see in my above tree after the merge to v3.12. I''ll do that next week, lmk what tree the patches should be based on for submission. thanks Mukesh
Roger Pau Monné
2013-Nov-16 08:02 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On 16/11/13 02:56, Mukesh Rathor wrote:> On Fri, 15 Nov 2013 17:12:27 -0800 > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > >> On Fri, 15 Nov 2013 15:56:52 -0800 >> Mukesh Rathor <mukesh.rathor@oracle.com> wrote: >> >>> On Fri, 15 Nov 2013 13:56:54 -0800 >>> Mukesh Rathor <mukesh.rathor@oracle.com> wrote: >>> >>>> On Fri, 15 Nov 2013 16:45:47 +0000 >>>> "Jan Beulich" <JBeulich@suse.com> wrote: >>>> >> ........ >>>> >>>> We had talked about this while ago, but upon boot, the first thing >>>> a vcpu needs is access to kernel data structure. (A secondary vcpu >>>> is bootstrapped way up into the kernel). It would be possible to >>>> get rid of gs_base_kernel, but will take some work on the linux >>>> side. I can try and test it out, and let you guys know. >>> >>> Ok, looking at this more, I can hack cpu_bringup_and_idle() in >>> linux to include a static variable for cpuid, which is the least a >>> vcpu needs to know first thing. But, I think that would not work >>> when vcpu hotplug support is added. Another option would be to pass >>> cpuid in one of the registers, say rdi. Thus, rdi == cpuid will be >>> passed to VCPUOP_initialise. In bringup function, the booting vcpu >>> can then load it''s own gs based on the cpuid. If linux folks, >>> konrad (CCd), is OK with this, we can remove gs_base_kernel. >>> Otherwise, it''s such a small thing, hopefually it can stay. >> >> Konrad, >> >> Here''s the changes needed on linux side to remove gs_base_kernel. >> Since, I imagine vcpu hotplug would go thru the same path, it would >> work there also. If this looks OK to everybody, we can remove >> gs_base_kernel from the VCPUOP_initialise call. >> >> thanks, >> Mukesh >> >> >> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c >> index aa89bbf..54d1022 100644 >> --- a/arch/x86/xen/smp.c >> +++ b/arch/x86/xen/smp.c >> @@ -99,14 +99,8 @@ static void cpu_bringup(void) >> wmb(); /* make sure everything is out > > Rats, just realized this is not going to make sense because you > don''t have the earlier patch: > "PVH: simplify cpu_initialize_context. add pvh_set_bringup_context()" > > So, just look for it at: > > https://oss.oracle.com/git/mrathor/linux.git/ in branch tmp2. > > In a nutshell, for PVH we only need to pass eip, rsp, cr3, and > gs_base_kernel. If we pass cpuid in rdi for context setting, we can get > rid of gs_base_kernel. > > Since, PVH is now merged, Konrad I''ll officially send you the > patches you see in my above tree after the merge to v3.12. I''ll do > that next week, lmk what tree the patches should be based on for > submission.I think we should wait to have a stable AP bringup interface before committing anything to Linux. I''m going to rework this patch in order to also check and load cr0/cr4 (as suggested by Tim).
Roger Pau Monné
2013-Nov-16 08:04 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On 15/11/13 17:55, Tim Deegan wrote:> At 16:32 +0000 on 15 Nov (1384529574), Jan Beulich wrote: >>>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> wrote: >>> --- a/xen/arch/x86/domain.c >>> +++ b/xen/arch/x86/domain.c >>> @@ -704,9 +704,11 @@ int arch_set_info_guest( >>> /* PVH 32bitfixme */ >>> ASSERT(!compat); >>> >>> - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || >>> + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || >>> + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || >> >> I think it should actually be a bug for the guest to request an >> all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE >> would seem to be a valid requirement to be set. > > I think zero is better. Guest CRx are explicitly _not_ loaded from > these fields so making them look like valid CRx values is just confusing. > > But even better would be to allow all these fields to be set to any > valid values, and to load the state into the guest vCPU. I think > that''s going to be more sensible once the current restriction that PVH > vCPUs are always in long more goes away.Ack, I''m going to rework the patch in order to check and load the other control registers.
George Dunlap
2013-Nov-18 11:50 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On 15/11/13 23:56, Mukesh Rathor wrote:> On Fri, 15 Nov 2013 13:56:54 -0800 > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > >> On Fri, 15 Nov 2013 16:45:47 +0000 >> "Jan Beulich" <JBeulich@suse.com> wrote: >> >>>>>> On 15.11.13 at 17:40, George Dunlap >>>>>> <george.dunlap@eu.citrix.com> wrote: >>>> On 15/11/13 16:32, Jan Beulich wrote: >>>>>>>> On 15.11.13 at 16:50, Roger Pau Monne <roger.pau@citrix.com> >>>>>>>> wrote: >>>>>> --- a/xen/arch/x86/domain.c >>>>>> +++ b/xen/arch/x86/domain.c >>>>>> @@ -704,9 +704,11 @@ int arch_set_info_guest( >>>>>> /* PVH 32bitfixme */ >>>>>> ASSERT(!compat); >>>>>> >>>>>> - if ( c(ctrlreg[1]) || c(ldt_base) || c(ldt_ents) || >>>>>> + if ( c(ctrlreg[0]) || c(ctrlreg[1]) || c(ctrlreg[2]) || >>>>>> + c(ctrlreg[4]) || c(ldt_base) || c(ldt_ents) || >>>>> I think it should actually be a bug for the guest to request an >>>>> all blank CR0 or CR4. Minimally CR0.PE, CR0.PG, and CR4.PAE >>>>> would seem to be a valid requirement to be set. >>>>> >>>>> Apart from that ctrlreg[] is an 8-element array... And I don''t >>>>> see debugreg[] being verified at all. >>>>> >>>>>> c(user_regs.cs) || c(user_regs.ss) || >>>>>> c(user_regs.es) || c(user_regs.ds) || c(user_regs.fs) || >>>>>> c(user_regs.gs) || >>>>>> + c(kernel_ss) || c(kernel_sp) || >>>>>> c.nat->gs_base_kernel || >>>>> So George and/or Mukesh found it necessary to set >>>>> gs_base_kernel, and you rip it out? I''m curious as to what >>>>> they''re going to say... >>>> I didn''t find it necessary; I was mostly focused on merging the >>>> PVH and HVM codepaths without causing any regressions. It''s not >>>> obvious to me what''s special about gs_base_kernel, and I haven''t >>>> yet gone back to try to find out why Mukesh did it that way. >> Hi, >> >> We had talked about this while ago, but upon boot, the first thing >> a vcpu needs is access to kernel data structure. (A secondary vcpu >> is bootstrapped way up into the kernel). It would be possible to >> get rid of gs_base_kernel, but will take some work on the linux side. >> I can try and test it out, and let you guys know. > Ok, looking at this more, I can hack cpu_bringup_and_idle() in > linux to include a static variable for cpuid, which is the least a > vcpu needs to know first thing. But, I think that would not work when > vcpu hotplug support is added. Another option would be to pass cpuid > in one of the registers, say rdi. Thus, rdi == cpuid will be passed > to VCPUOP_initialise. In bringup function, the booting vcpu can then > load it''s own gs based on the cpuid. If linux folks, konrad (CCd), is OK > with this, we can remove gs_base_kernel. Otherwise, it''s such a small > thing, hopefually it can stay.So Linux has the exact same problem on native, and (AFAICT) they solve it by simply writing gs to a global variable called initial_gs. Is there any reason why we can''t just do what native Linux does here? We should, in fact, be able to use the exact same variable. -George
George Dunlap
2013-Nov-18 12:15 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On 16/11/13 01:56, Mukesh Rathor wrote:> On Fri, 15 Nov 2013 17:12:27 -0800 > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > >> On Fri, 15 Nov 2013 15:56:52 -0800 >> Mukesh Rathor <mukesh.rathor@oracle.com> wrote: >> >>> On Fri, 15 Nov 2013 13:56:54 -0800 >>> Mukesh Rathor <mukesh.rathor@oracle.com> wrote: >>> >>>> On Fri, 15 Nov 2013 16:45:47 +0000 >>>> "Jan Beulich" <JBeulich@suse.com> wrote: >>>> >> ........ >>>> We had talked about this while ago, but upon boot, the first thing >>>> a vcpu needs is access to kernel data structure. (A secondary vcpu >>>> is bootstrapped way up into the kernel). It would be possible to >>>> get rid of gs_base_kernel, but will take some work on the linux >>>> side. I can try and test it out, and let you guys know. >>> Ok, looking at this more, I can hack cpu_bringup_and_idle() in >>> linux to include a static variable for cpuid, which is the least a >>> vcpu needs to know first thing. But, I think that would not work >>> when vcpu hotplug support is added. Another option would be to pass >>> cpuid in one of the registers, say rdi. Thus, rdi == cpuid will be >>> passed to VCPUOP_initialise. In bringup function, the booting vcpu >>> can then load it''s own gs based on the cpuid. If linux folks, >>> konrad (CCd), is OK with this, we can remove gs_base_kernel. >>> Otherwise, it''s such a small thing, hopefually it can stay. >> Konrad, >> >> Here''s the changes needed on linux side to remove gs_base_kernel. >> Since, I imagine vcpu hotplug would go thru the same path, it would >> work there also. If this looks OK to everybody, we can remove >> gs_base_kernel from the VCPUOP_initialise call. >> >> thanks, >> Mukesh >> >> >> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c >> index aa89bbf..54d1022 100644 >> --- a/arch/x86/xen/smp.c >> +++ b/arch/x86/xen/smp.c >> @@ -99,14 +99,8 @@ static void cpu_bringup(void) >> wmb(); /* make sure everything is out > Rats, just realized this is not going to make sense because you > don''t have the earlier patch: > "PVH: simplify cpu_initialize_context. add pvh_set_bringup_context()" > > So, just look for it at: > > https://oss.oracle.com/git/mrathor/linux.git/ in branch tmp2. > > In a nutshell, for PVH we only need to pass eip, rsp, cr3, and > gs_base_kernel. If we pass cpuid in rdi for context setting, we can get > rid of gs_base_kernel. > > Since, PVH is now merged, Konrad I''ll officially send you the > patches you see in my above tree after the merge to v3.12. I''ll do > that next week, lmk what tree the patches should be based on for > submission.The patches were checked in on the understanding that the interface was not yet finalized. I definitely think we should not submit the patches in Linux for inclusion until we have it sorted out. Originally we were thinking of this for 4.5, but we may well be able to get the interface nailed down for 4.4. -George
Konrad Rzeszutek Wilk
2013-Nov-18 16:17 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On Mon, Nov 18, 2013 at 12:15:58PM +0000, George Dunlap wrote:> On 16/11/13 01:56, Mukesh Rathor wrote: > >On Fri, 15 Nov 2013 17:12:27 -0800 > >Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > >>On Fri, 15 Nov 2013 15:56:52 -0800 > >>Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > >> > >>>On Fri, 15 Nov 2013 13:56:54 -0800 > >>>Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > >>> > >>>>On Fri, 15 Nov 2013 16:45:47 +0000 > >>>>"Jan Beulich" <JBeulich@suse.com> wrote: > >>>> > >>........ > >>>>We had talked about this while ago, but upon boot, the first thing > >>>>a vcpu needs is access to kernel data structure. (A secondary vcpu > >>>>is bootstrapped way up into the kernel). It would be possible to > >>>>get rid of gs_base_kernel, but will take some work on the linux > >>>>side. I can try and test it out, and let you guys know. > >>>Ok, looking at this more, I can hack cpu_bringup_and_idle() in > >>>linux to include a static variable for cpuid, which is the least a > >>>vcpu needs to know first thing. But, I think that would not work > >>>when vcpu hotplug support is added. Another option would be to pass > >>>cpuid in one of the registers, say rdi. Thus, rdi == cpuid will be > >>>passed to VCPUOP_initialise. In bringup function, the booting vcpu > >>>can then load it''s own gs based on the cpuid. If linux folks, > >>>konrad (CCd), is OK with this, we can remove gs_base_kernel. > >>>Otherwise, it''s such a small thing, hopefually it can stay. > >>Konrad, > >> > >>Here''s the changes needed on linux side to remove gs_base_kernel. > >>Since, I imagine vcpu hotplug would go thru the same path, it would > >>work there also. If this looks OK to everybody, we can remove > >>gs_base_kernel from the VCPUOP_initialise call. > >> > >>thanks, > >>Mukesh > >> > >> > >>diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c > >>index aa89bbf..54d1022 100644 > >>--- a/arch/x86/xen/smp.c > >>+++ b/arch/x86/xen/smp.c > >>@@ -99,14 +99,8 @@ static void cpu_bringup(void) > >> wmb(); /* make sure everything is out > >Rats, just realized this is not going to make sense because you > >don''t have the earlier patch: > >"PVH: simplify cpu_initialize_context. add pvh_set_bringup_context()" > > > >So, just look for it at: > > > >https://oss.oracle.com/git/mrathor/linux.git/ in branch tmp2. > > > >In a nutshell, for PVH we only need to pass eip, rsp, cr3, and > >gs_base_kernel. If we pass cpuid in rdi for context setting, we can get > >rid of gs_base_kernel. > > > >Since, PVH is now merged, Konrad I''ll officially send you the > >patches you see in my above tree after the merge to v3.12. I''ll do > >that next week, lmk what tree the patches should be based on for > >submission.Please base it on stable/pvh.v9. Or if you have a branch where you had done: git checkout v3.12 git merge konrad/stable/pvh.v9 [reworked the merge issues] --> this is the baseline <--- git am -s < "Xen/PVH: simplifiy..." You would want to post the patches _past_ the baseline. Also, do include the prefix ''xen/pvh'' in your Linux patches.> > The patches were checked in on the understanding that the interface > was not yet finalized. I definitely think we should not submit the > patches in Linux for inclusion until we have it sorted out.Keep in mind that the merge window for Linux has openned, which means that the next merge window in which PVH would be an candidate for is in 60 days.> Originally we were thinking of this for 4.5, but we may well be able > to get the interface nailed down for 4.4. > > -George
Konrad Rzeszutek Wilk
2013-Nov-18 16:19 UTC
Re: [PATCH RFC] pvh: clearly specify used parameters in vcpu_guest_context
On Fri, Nov 15, 2013 at 05:12:27PM -0800, Mukesh Rathor wrote:> On Fri, 15 Nov 2013 15:56:52 -0800 > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > On Fri, 15 Nov 2013 13:56:54 -0800 > > Mukesh Rathor <mukesh.rathor@oracle.com> wrote: > > > > > On Fri, 15 Nov 2013 16:45:47 +0000 > > > "Jan Beulich" <JBeulich@suse.com> wrote: > > > > ........ > > > > > > We had talked about this while ago, but upon boot, the first thing > > > a vcpu needs is access to kernel data structure. (A secondary vcpu > > > is bootstrapped way up into the kernel). It would be possible to > > > get rid of gs_base_kernel, but will take some work on the linux > > > side. I can try and test it out, and let you guys know. > > > > Ok, looking at this more, I can hack cpu_bringup_and_idle() in > > linux to include a static variable for cpuid, which is the least a > > vcpu needs to know first thing. But, I think that would not work when > > vcpu hotplug support is added. Another option would be to pass cpuid > > in one of the registers, say rdi. Thus, rdi == cpuid will be passed > > to VCPUOP_initialise. In bringup function, the booting vcpu can then > > load it''s own gs based on the cpuid. If linux folks, konrad (CCd), is > > OK with this, we can remove gs_base_kernel. Otherwise, it''s such a > > small thing, hopefually it can stay. > > Konrad, > > Here''s the changes needed on linux side to remove gs_base_kernel. > Since, I imagine vcpu hotplug would go thru the same path, it would > work there also. If this looks OK to everybody, we can remove > gs_base_kernel from the VCPUOP_initialise call.I think it is OK.> > thanks, > Mukesh > > > diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c > index aa89bbf..54d1022 100644 > --- a/arch/x86/xen/smp.c > +++ b/arch/x86/xen/smp.c > @@ -99,14 +99,8 @@ static void cpu_bringup(void) > wmb(); /* make sure everything is out */ > } > > -/* > - * The vcpu is coming up with only gs_base_kernel set so we can do > - * smp_processor_id() here. We need to set the rest of the context first thing. > - */ > -static void pvh_set_bringup_context(void) > +static void pvh_set_bringup_context(int cpu) > { > - int cpu = smp_processor_id(); > - > load_percpu_segment(cpu); > switch_to_new_gdt(smp_processor_id()); > > @@ -121,11 +115,12 @@ static void pvh_set_bringup_context(void) > : : "r" (__KERNEL_DS), "a" (__KERNEL_CS) : "memory"); > } > > -static void cpu_bringup_and_idle(void) > +/* Note: cpu parameter is only relevant for PVH */ > +static void cpu_bringup_and_idle(int cpu) > { > if (xen_feature(XENFEAT_auto_translated_physmap) && > xen_feature(XENFEAT_supervisor_mode_kernel)) > - pvh_set_bringup_context(); > + pvh_set_bringup_context(cpu); > > cpu_bringup(); > cpu_startup_entry(CPUHP_ONLINE); > @@ -393,8 +388,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) > /* Note: PVH is not yet supported on x86_32. */ > ctxt->user_regs.fs = __KERNEL_PERCPU; > ctxt->user_regs.gs = __KERNEL_STACK_CANARY; > -#else > - ctxt->gs_base_kernel = per_cpu_offset(cpu); > #endif > ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; > > @@ -426,6 +419,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) > #ifdef CONFIG_X86_32 > ctxt->event_callback_cs = __KERNEL_CS; > ctxt->failsafe_callback_cs = __KERNEL_CS; > +#else > + ctxt->gs_base_kernel = per_cpu_offset(cpu); > #endif > ctxt->event_callback_eip > (unsigned long)xen_hypervisor_callback; > @@ -433,7 +428,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) > (unsigned long)xen_failsafe_callback; > ctxt->user_regs.cs = __KERNEL_CS; > per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); > - } > + } else > + ctxt->user_regs.rdi = cpu; > > ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); > ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); >