Intel new CPU supports SMEP (Supervisor Mode Execution Protection). SMEP prevents software operating with CPL < 3 (supervisor mode) from fetching instructions from any linear address with a valid translation for which the U/S flag (bit 2) is 1 in every paging-structure entry controlling the translation for the linear address. This patch adds SMEP support to HVM guest. Signed-off-by: Yang Wei <wei.y.yang@intel.com> Signed-off-by: Shan Haitao <haitao.shan@intel.com> Signed-off-by: Li Xin <xin.li@intel.com> diff -r 0c0884fd8b49 tools/libxc/xc_cpufeature.h --- a/tools/libxc/xc_cpufeature.h Fri Jun 03 21:39:00 2011 +0100 +++ b/tools/libxc/xc_cpufeature.h Sun Jun 05 16:51:48 2011 +0800 @@ -124,5 +124,6 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */ #define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */ +#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */ #endif /* __LIBXC_CPUFEATURE_H */ diff -r 0c0884fd8b49 tools/libxc/xc_cpuid_x86.c --- a/tools/libxc/xc_cpuid_x86.c Fri Jun 03 21:39:00 2011 +0100 +++ b/tools/libxc/xc_cpuid_x86.c Sun Jun 05 16:51:48 2011 +0800 @@ -351,6 +351,14 @@ static void xc_cpuid_hvm_policy( clear_bit(X86_FEATURE_PAE, regs[3]); break; + case 0x00000007: /* Intel-defined CPU features */ + if ( input[1] == 0 ) { + regs[1] &= bitmaskof(X86_FEATURE_SMEP); + } else + regs[1] = 0; + regs[0] = regs[2] = regs[3] = 0; + break; + case 0x0000000d: xc_cpuid_config_xsave(xch, domid, xfeature_mask, input, regs); break; diff -r 0c0884fd8b49 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Jun 03 21:39:00 2011 +0100 +++ b/xen/arch/x86/hvm/hvm.c Sun Jun 05 16:51:48 2011 +0800 @@ -1664,7 +1664,8 @@ int hvm_set_cr4(unsigned long value) hvm_update_guest_cr(v, 4); /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */ - if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) { + if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | + X86_CR4_PAE | X86_CR4_SMEP) ) { if ( !nestedhvm_vmswitch_in_progress(v) && nestedhvm_vcpu_in_guestmode(v) ) paging_update_nestedmode(v); else @@ -2312,7 +2313,8 @@ enum hvm_copy_result hvm_copy_from_guest enum hvm_copy_result hvm_fetch_from_guest_virt( void *buf, unsigned long vaddr, int size, uint32_t pfec) { - if ( hvm_nx_enabled(current) ) + if ( hvm_nx_enabled(current) || + (!(pfec & PFEC_user_mode) && hvm_smep_enabled(current)) ) pfec |= PFEC_insn_fetch; return __hvm_copy(buf, vaddr, size, HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt, @@ -2338,7 +2340,8 @@ enum hvm_copy_result hvm_copy_from_guest enum hvm_copy_result hvm_fetch_from_guest_virt_nofault( void *buf, unsigned long vaddr, int size, uint32_t pfec) { - if ( hvm_nx_enabled(current) ) + if ( hvm_nx_enabled(current) || + (!(pfec & PFEC_user_mode) && hvm_smep_enabled(current)) ) pfec |= PFEC_insn_fetch; return __hvm_copy(buf, vaddr, size, HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt, @@ -2408,6 +2411,10 @@ void hvm_cpuid(unsigned int input, unsig *ecx |= (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ? cpufeat_mask(X86_FEATURE_OSXSAVE) : 0; break; + case 0x7: + if ( (count == 0) && !cpu_has_smep ) + *ebx &= ~cpufeat_mask(X86_FEATURE_SMEP); + break; case 0xb: /* Fix the x2APIC identifier. */ *edx = v->vcpu_id * 2; diff -r 0c0884fd8b49 xen/arch/x86/mm/guest_walk.c --- a/xen/arch/x86/mm/guest_walk.c Fri Jun 03 21:39:00 2011 +0100 +++ b/xen/arch/x86/mm/guest_walk.c Sun Jun 05 16:51:48 2011 +0800 @@ -131,7 +131,7 @@ guest_walk_tables(struct vcpu *v, struct guest_l3e_t *l3p = NULL; guest_l4e_t *l4p; #endif - uint32_t gflags, mflags, iflags, rc = 0; + uint32_t gflags, mflags, iflags, user_flag = _PAGE_USER, rc = 0; int pse; perfc_incr(guest_walk); @@ -153,6 +153,7 @@ guest_walk_tables(struct vcpu *v, struct l4p = (guest_l4e_t *) top_map; gw->l4e = l4p[guest_l4_table_offset(va)]; gflags = guest_l4e_get_flags(gw->l4e) ^ iflags; + user_flag &= gflags; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; @@ -167,6 +168,7 @@ guest_walk_tables(struct vcpu *v, struct /* Get the l3e and check its flags*/ gw->l3e = l3p[guest_l3_table_offset(va)]; gflags = guest_l3e_get_flags(gw->l3e) ^ iflags; + user_flag &= gflags; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; @@ -204,6 +206,7 @@ guest_walk_tables(struct vcpu *v, struct #endif /* All levels... */ gflags = guest_l2e_get_flags(gw->l2e) ^ iflags; + user_flag &= gflags; rc |= ((gflags & mflags) ^ mflags); if ( rc & _PAGE_PRESENT ) goto out; @@ -268,6 +271,7 @@ guest_walk_tables(struct vcpu *v, struct goto out; gw->l1e = l1p[guest_l1_table_offset(va)]; gflags = guest_l1e_get_flags(gw->l1e) ^ iflags; + user_flag &= gflags; rc |= ((gflags & mflags) ^ mflags); } @@ -277,6 +281,11 @@ guest_walk_tables(struct vcpu *v, struct * walkers behave this way. */ if ( rc == 0 ) { + if ( guest_supports_smep(v) && user_flag && + ((pfec & (PFEC_insn_fetch|PFEC_user_mode)) == PFEC_insn_fetch) ) { + rc = _PAGE_SMEP; + goto out; + } #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) ) paging_mark_dirty(d, mfn_x(gw->l4mfn)); diff -r 0c0884fd8b49 xen/include/asm-x86/guest_pt.h --- a/xen/include/asm-x86/guest_pt.h Fri Jun 03 21:39:00 2011 +0100 +++ b/xen/include/asm-x86/guest_pt.h Sun Jun 05 16:51:48 2011 +0800 @@ -203,6 +203,13 @@ guest_supports_nx(struct vcpu *v) return hvm_nx_enabled(v); } +static inline int +guest_supports_smep(struct vcpu *v) +{ + if ( !is_hvm_vcpu(v) ) + return 0; + return hvm_smep_enabled(v); +} /* Some bits are invalid in any pagetable entry. */ #if GUEST_PAGING_LEVELS == 2 diff -r 0c0884fd8b49 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Fri Jun 03 21:39:00 2011 +0100 +++ b/xen/include/asm-x86/hvm/hvm.h Sun Jun 05 16:51:48 2011 +0800 @@ -212,6 +212,8 @@ int hvm_girq_dest_2_vcpu_id(struct domai (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_WP)) #define hvm_pae_enabled(v) \ (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE)) +#define hvm_smep_enabled(v) \ + (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_SMEP)) #define hvm_nx_enabled(v) \ (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX)) @@ -321,6 +323,7 @@ static inline int hvm_do_pmu_interrupt(s X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | \ + (cpu_has_smep ? X86_CR4_SMEP : 0) | \ (xsave_enabled(_v) ? X86_CR4_OSXSAVE : 0)))) /* These exceptions must always be intercepted. */ diff -r 0c0884fd8b49 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Fri Jun 03 21:39:00 2011 +0100 +++ b/xen/include/asm-x86/page.h Sun Jun 05 16:51:48 2011 +0800 @@ -326,6 +326,7 @@ void setup_idle_pagetable(void); #define _PAGE_PSE_PAT 0x1000U #define _PAGE_PAGED 0x2000U #define _PAGE_SHARED 0x4000U +#define _PAGE_SMEP 0x8000U /* * Debug option: Ensure that granted mappings are not implicitly unmapped. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Hi, Thanks for the patch. At 19:19 +0800 on 05 Jun (1307301551), Li, Xin wrote:> @@ -2312,7 +2313,8 @@ enum hvm_copy_result hvm_copy_from_guest > enum hvm_copy_result hvm_fetch_from_guest_virt( > void *buf, unsigned long vaddr, int size, uint32_t pfec) > { > - if ( hvm_nx_enabled(current) ) > + if ( hvm_nx_enabled(current) || > + (!(pfec & PFEC_user_mode) && hvm_smep_enabled(current)) )Shouldn''t that be "if ( hvm_nx_enabled(current) || hvm_smep_enabled(current) )"?> pfec |= PFEC_insn_fetch; > return __hvm_copy(buf, vaddr, size, > HVMCOPY_from_guest | HVMCOPY_fault | HVMCOPY_virt, > @@ -2338,7 +2340,8 @@ enum hvm_copy_result hvm_copy_from_guest > enum hvm_copy_result hvm_fetch_from_guest_virt_nofault( > void *buf, unsigned long vaddr, int size, uint32_t pfec) > { > - if ( hvm_nx_enabled(current) ) > + if ( hvm_nx_enabled(current) || > + (!(pfec & PFEC_user_mode) && hvm_smep_enabled(current)) )Likewise.> pfec |= PFEC_insn_fetch; > return __hvm_copy(buf, vaddr, size, > HVMCOPY_from_guest | HVMCOPY_no_fault | HVMCOPY_virt, > @@ -2408,6 +2411,10 @@ void hvm_cpuid(unsigned int input, unsig > *ecx |= (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ? > cpufeat_mask(X86_FEATURE_OSXSAVE) : 0; > break; > + case 0x7: > + if ( (count == 0) && !cpu_has_smep ) > + *ebx &= ~cpufeat_mask(X86_FEATURE_SMEP); > + break; > case 0xb: > /* Fix the x2APIC identifier. */ > *edx = v->vcpu_id * 2; > diff -r 0c0884fd8b49 xen/arch/x86/mm/guest_walk.c > --- a/xen/arch/x86/mm/guest_walk.c Fri Jun 03 21:39:00 2011 +0100 > +++ b/xen/arch/x86/mm/guest_walk.c Sun Jun 05 16:51:48 2011 +0800 > @@ -131,7 +131,7 @@ guest_walk_tables(struct vcpu *v, struct > guest_l3e_t *l3p = NULL; > guest_l4e_t *l4p; > #endif > - uint32_t gflags, mflags, iflags, rc = 0; > + uint32_t gflags, mflags, iflags, user_flag = _PAGE_USER, rc = 0; > int pse; > > perfc_incr(guest_walk); > @@ -153,6 +153,7 @@ guest_walk_tables(struct vcpu *v, struct > l4p = (guest_l4e_t *) top_map; > gw->l4e = l4p[guest_l4_table_offset(va)]; > gflags = guest_l4e_get_flags(gw->l4e) ^ iflags; > + user_flag &= gflags;There''s no need to add SMEP-specific code at every level. The existing code already checks for flags that must be clear, so just arrange for _PAGE_USER to be in both mflags and iflags whenever SMEP is enabled and PFEC_user is clear.> rc |= ((gflags & mflags) ^ mflags); > if ( rc & _PAGE_PRESENT ) goto out; > > @@ -167,6 +168,7 @@ guest_walk_tables(struct vcpu *v, struct > /* Get the l3e and check its flags*/ > gw->l3e = l3p[guest_l3_table_offset(va)]; > gflags = guest_l3e_get_flags(gw->l3e) ^ iflags; > + user_flag &= gflags; > rc |= ((gflags & mflags) ^ mflags); > if ( rc & _PAGE_PRESENT ) > goto out; > @@ -204,6 +206,7 @@ guest_walk_tables(struct vcpu *v, struct > #endif /* All levels... */ > > gflags = guest_l2e_get_flags(gw->l2e) ^ iflags; > + user_flag &= gflags; > rc |= ((gflags & mflags) ^ mflags); > if ( rc & _PAGE_PRESENT ) > goto out; > @@ -268,6 +271,7 @@ guest_walk_tables(struct vcpu *v, struct > goto out; > gw->l1e = l1p[guest_l1_table_offset(va)]; > gflags = guest_l1e_get_flags(gw->l1e) ^ iflags; > + user_flag &= gflags; > rc |= ((gflags & mflags) ^ mflags); > } > > @@ -277,6 +281,11 @@ guest_walk_tables(struct vcpu *v, struct > * walkers behave this way. */ > if ( rc == 0 ) > { > + if ( guest_supports_smep(v) && user_flag && > + ((pfec & (PFEC_insn_fetch|PFEC_user_mode)) == PFEC_insn_fetch) ) { > + rc = _PAGE_SMEP; > + goto out; > + }I think this hunk will probably go away entirely, but if not, please don''t put it between the code below and the comment above that describes it.> #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */ > if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) ) > paging_mark_dirty(d, mfn_x(gw->l4mfn)); > diff -r 0c0884fd8b49 xen/include/asm-x86/guest_pt.h > --- a/xen/include/asm-x86/guest_pt.h Fri Jun 03 21:39:00 2011 +0100 > +++ b/xen/include/asm-x86/guest_pt.h Sun Jun 05 16:51:48 2011 +0800 > @@ -203,6 +203,13 @@ guest_supports_nx(struct vcpu *v) > return hvm_nx_enabled(v); > } > > +static inline int > +guest_supports_smep(struct vcpu *v) > +{ > + if ( !is_hvm_vcpu(v) ) > + return 0; > + return hvm_smep_enabled(v); > +} > > /* Some bits are invalid in any pagetable entry. */ > #if GUEST_PAGING_LEVELS == 2 > diff -r 0c0884fd8b49 xen/include/asm-x86/hvm/hvm.h > --- a/xen/include/asm-x86/hvm/hvm.h Fri Jun 03 21:39:00 2011 +0100 > +++ b/xen/include/asm-x86/hvm/hvm.h Sun Jun 05 16:51:48 2011 +0800 > @@ -212,6 +212,8 @@ int hvm_girq_dest_2_vcpu_id(struct domai > (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_WP)) > #define hvm_pae_enabled(v) \ > (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE)) > +#define hvm_smep_enabled(v) \ > + (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_SMEP)) > #define hvm_nx_enabled(v) \ > (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX)) > > @@ -321,6 +323,7 @@ static inline int hvm_do_pmu_interrupt(s > X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ > X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ > X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT | \ > + (cpu_has_smep ? X86_CR4_SMEP : 0) | \ > (xsave_enabled(_v) ? X86_CR4_OSXSAVE : 0)))) > > /* These exceptions must always be intercepted. */ > diff -r 0c0884fd8b49 xen/include/asm-x86/page.h > --- a/xen/include/asm-x86/page.h Fri Jun 03 21:39:00 2011 +0100 > +++ b/xen/include/asm-x86/page.h Sun Jun 05 16:51:48 2011 +0800 > @@ -326,6 +326,7 @@ void setup_idle_pagetable(void); > #define _PAGE_PSE_PAT 0x1000U > #define _PAGE_PAGED 0x2000U > #define _PAGE_SHARED 0x4000U > +#define _PAGE_SMEP 0x8000UWhat does this new code mean? You added code that returns it but not any that checks for it. Cheers, Tim.> > /* > * Debug option: Ensure that granted mappings are not implicitly unmapped.> _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel-- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> > - if ( hvm_nx_enabled(current) ) > > + if ( hvm_nx_enabled(current) || > > + (!(pfec & PFEC_user_mode) && hvm_smep_enabled(current)) ) > > Shouldn''t that be > "if ( hvm_nx_enabled(current) || hvm_smep_enabled(current) )"?A smep fault happens when 1) current privilege level < 3> > + (!(pfec & PFEC_user_mode) && hvm_smep_enabled(current)) ) > > Likewise. > > > l4p = (guest_l4e_t *) top_map; > > gw->l4e = l4p[guest_l4_table_offset(va)]; > > gflags = guest_l4e_get_flags(gw->l4e) ^ iflags; > > + user_flag &= gflags; > > There''s no need to add SMEP-specific code at every level. The existing > code already checks for flags that must be clear, so just arrange for > _PAGE_USER to be in both mflags and iflags whenever SMEP is enabled and > PFEC_user is clear.2) user flags in all page table level entries are set instead of clear. In current code, what I read is that it assumes some permission(s) is missing in guest_walk_tables(). NX is special, but your code wisely used inverted logic to seamlessly cover it. I did want to reuse the existing logic, but seems it can''t accommodate SMEP logic easily. Please advise!> > @@ -277,6 +281,11 @@ guest_walk_tables(struct vcpu *v, struct > > * walkers behave this way. */ > > if ( rc == 0 ) > > { > > + if ( guest_supports_smep(v) && user_flag && > > + ((pfec & (PFEC_insn_fetch|PFEC_user_mode)) => PFEC_insn_fetch) ) { > > + rc = _PAGE_SMEP; > > + goto out; > > + } > > I think this hunk will probably go away entirely, but if not, please > don''t put it between the code below and the comment above that describes > it.I didn''t figure out a better way than this, will add comments if we have to do this way. Or do you have any other suggestion?> > +#define _PAGE_SMEP 0x8000U > > What does this new code mean? You added code that returns it but not > any that checks for it.Actually the current logic doesn''t check what is missing returned from guest_walk_tables unless it needs to change error code, and the callers mostly return INVALID_GFN. I introduced _PAGE_SMEP just to let the caller know it is not 0. Probably I can reuse one of the _PAGE_ macros, but the memory virtualization code is subtle, I''m afraid I''ll introduce bugs in other parts. That''s to say, I do want to hear from you :) Thanks! -Xin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
At 18:51 +0800 on 06 Jun (1307386302), Li, Xin wrote:> > > - if ( hvm_nx_enabled(current) ) > > > + if ( hvm_nx_enabled(current) || > > > + (!(pfec & PFEC_user_mode) && hvm_smep_enabled(current)) ) > > > > Shouldn''t that be > > "if ( hvm_nx_enabled(current) || hvm_smep_enabled(current) )"? > > A smep fault happens when > 1) current privilege level < 3But this code is setting the PFEC_insn_fetch bit, not triggering a page fault. And the new PRM says (4.7): I/D flag (bit 4). This flag is 1 if (1) the access causing the page-fault exception was an instruction fetch; and (2) either (a) CR4.SMEP = 1; or (b) both (i) CR4.PAE = 1 (either PAE paging or IA-32e paging is in use); and (ii) IA32_EFER.NXE = 1. Otherwise, the flag is 0. This flag describes the access causing the page-fault exception, not the access rights specified by paging.> > There''s no need to add SMEP-specific code at every level. The existing > > code already checks for flags that must be clear, so just arrange for > > _PAGE_USER to be in both mflags and iflags whenever SMEP is enabled and > > PFEC_user is clear. > > 2) user flags in all page table level entries are set instead of clear.Oh I see - yes, this introduces a whole new kind of logic in pagetable walks. I''ve attached a version of your patch that I think will do the right thing, and that''s tidied up in a few other places. Can you check to see that it does what you need? Cheers, Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> Oh I see - yes, this introduces a whole new kind of logic in pagetable > walks. I''ve attached a version of your patch that I think will do the > right thing, and that''s tidied up in a few other places. Can you check > to see that it does what you need?Simpler and better! Verified it works fine. Please help to apply :) Thanks! -Xin _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
At 23:07 +0800 on 06 Jun (1307401624), Li, Xin wrote:> > Oh I see - yes, this introduces a whole new kind of logic in pagetable > > walks. I''ve attached a version of your patch that I think will do the > > right thing, and that''s tidied up in a few other places. Can you check > > to see that it does what you need? > > Simpler and better! Verified it works fine.Thanks. I''ve applied it. Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel