These aren''t part of the actual security fixes, but the first helps proving that what needs to be preemptible now really is. The second is really just cleanup the potential for which was noticed while putting together the earlier ones. 1: cleanup after making various page table manipulation operations preemptible 2: miscellaneous mm.c cleanup Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan Beulich
2013-May-02 14:52 UTC
[PATCH 1/2] x86: cleanup after making various page table manipulation operations preemptible
This drops the "preemptible" parameters from various functions where now they can''t (or shouldn''t, validated by assertions) be run in non- preemptible mode anymore, to prove that manipulations of at least L3 and L4 page tables and page table entries are now always preemptible, i.e. the earlier patches actually fulfill their purpose of fixing the resulting security issue. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Tim Deegan <tim@xen.org> --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1859,7 +1859,7 @@ static int relinquish_memory( } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) - ret = put_page_and_type_preemptible(page, 1); + ret = put_page_and_type_preemptible(page); switch ( ret ) { case 0: --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -943,7 +943,7 @@ get_page_from_l2e( define_get_linear_pagetable(l3); static int get_page_from_l3e( - l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible) + l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial) { int rc; @@ -957,7 +957,7 @@ get_page_from_l3e( } rc = get_page_and_type_from_pagenr( - l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible); + l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1); if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) ) rc = 0; @@ -967,7 +967,7 @@ get_page_from_l3e( define_get_linear_pagetable(l4); static int get_page_from_l4e( - l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible) + l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial) { int rc; @@ -981,7 +981,7 @@ get_page_from_l4e( } rc = get_page_and_type_from_pagenr( - l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible); + l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, 1); if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) ) rc = 0; @@ -1120,8 +1120,10 @@ static int put_page_from_l2e(l2_pgentry_ static int __put_page_type(struct page_info *, int preemptible); static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, int preemptible) + int partial, bool_t defer) { + struct page_info *pg; + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) return 1; @@ -1138,40 +1140,44 @@ static int put_page_from_l3e(l3_pgentry_ return 0; } + pg = l3e_get_page(l3e); + if ( unlikely(partial > 0) ) { - ASSERT(preemptible >= 0); - return __put_page_type(l3e_get_page(l3e), preemptible); + ASSERT(!defer); + return __put_page_type(pg, 1); } - if ( preemptible < 0 ) + if ( defer ) { - current->arch.old_guest_table = l3e_get_page(l3e); + current->arch.old_guest_table = pg; return 0; } - return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); + return put_page_and_type_preemptible(pg); } static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, int preemptible) + int partial, bool_t defer) { if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && (l4e_get_pfn(l4e) != pfn) ) { + struct page_info *pg = l4e_get_page(l4e); + if ( unlikely(partial > 0) ) { - ASSERT(preemptible >= 0); - return __put_page_type(l4e_get_page(l4e), preemptible); + ASSERT(!defer); + return __put_page_type(pg, 1); } - if ( preemptible < 0 ) + if ( defer ) { - current->arch.old_guest_table = l4e_get_page(l4e); + current->arch.old_guest_table = pg; return 0; } - return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); + return put_page_and_type_preemptible(pg); } return 1; } @@ -1305,7 +1311,7 @@ static int alloc_l2_table(struct page_in return rc > 0 ? 0 : rc; } -static int alloc_l3_table(struct page_info *page, int preemptible) +static int alloc_l3_table(struct page_info *page) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); @@ -1337,11 +1343,10 @@ static int alloc_l3_table(struct page_in rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), PGT_l2_page_table | PGT_pae_xen_l2, - d, partial, preemptible); + d, partial, 1); } else if ( !is_guest_l3_slot(i) || - (rc = get_page_from_l3e(pl3e[i], pfn, d, - partial, preemptible)) > 0 ) + (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 ) continue; if ( rc == -EAGAIN ) @@ -1401,7 +1406,7 @@ void init_guest_l4_table(l4_pgentry_t l4 l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR); } -static int alloc_l4_table(struct page_info *page, int preemptible) +static int alloc_l4_table(struct page_info *page) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); @@ -1413,8 +1418,7 @@ static int alloc_l4_table(struct page_in i++, partial = 0 ) { if ( !is_guest_l4_slot(d, i) || - (rc = get_page_from_l4e(pl4e[i], pfn, d, - partial, preemptible)) > 0 ) + (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 ) continue; if ( rc == -EAGAIN ) @@ -1501,7 +1505,7 @@ static int free_l2_table(struct page_inf return err; } -static int free_l3_table(struct page_info *page, int preemptible) +static int free_l3_table(struct page_info *page) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); @@ -1514,7 +1518,7 @@ static int free_l3_table(struct page_inf do { if ( is_guest_l3_slot(i) ) { - rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible); + rc = put_page_from_l3e(pl3e[i], pfn, partial, 0); if ( rc < 0 ) break; partial = 0; @@ -1540,7 +1544,7 @@ static int free_l3_table(struct page_inf return rc > 0 ? 0 : rc; } -static int free_l4_table(struct page_info *page, int preemptible) +static int free_l4_table(struct page_info *page) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); @@ -1550,7 +1554,7 @@ static int free_l4_table(struct page_inf do { if ( is_guest_l4_slot(d, i) ) - rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible); + rc = put_page_from_l4e(pl4e[i], pfn, partial, 0); if ( rc < 0 ) break; partial = 0; @@ -1809,7 +1813,6 @@ static int mod_l3_entry(l3_pgentry_t *pl l3_pgentry_t nl3e, unsigned long pfn, int preserve_ad, - int preemptible, struct vcpu *vcpu) { l3_pgentry_t ol3e; @@ -1849,7 +1852,7 @@ static int mod_l3_entry(l3_pgentry_t *pl return rc ? 0 : -EFAULT; } - rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible); + rc = get_page_from_l3e(nl3e, pfn, d, 0); if ( unlikely(rc < 0) ) return rc; rc = 0; @@ -1872,7 +1875,7 @@ static int mod_l3_entry(l3_pgentry_t *pl if ( !create_pae_xen_mappings(d, pl3e) ) BUG(); - put_page_from_l3e(ol3e, pfn, 0, -preemptible); + put_page_from_l3e(ol3e, pfn, 0, 1); return rc; } @@ -1881,7 +1884,6 @@ static int mod_l4_entry(l4_pgentry_t *pl l4_pgentry_t nl4e, unsigned long pfn, int preserve_ad, - int preemptible, struct vcpu *vcpu) { struct domain *d = vcpu->domain; @@ -1914,7 +1916,7 @@ static int mod_l4_entry(l4_pgentry_t *pl return rc ? 0 : -EFAULT; } - rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible); + rc = get_page_from_l4e(nl4e, pfn, d, 0); if ( unlikely(rc < 0) ) return rc; rc = 0; @@ -1933,7 +1935,7 @@ static int mod_l4_entry(l4_pgentry_t *pl return -EFAULT; } - put_page_from_l4e(ol4e, pfn, 0, -preemptible); + put_page_from_l4e(ol4e, pfn, 0, 1); return rc; } @@ -2053,10 +2055,12 @@ static int alloc_page_type(struct page_i rc = alloc_l2_table(page, type, preemptible); break; case PGT_l3_page_table: - rc = alloc_l3_table(page, preemptible); + ASSERT(preemptible); + rc = alloc_l3_table(page); break; case PGT_l4_page_table: - rc = alloc_l4_table(page, preemptible); + ASSERT(preemptible); + rc = alloc_l4_table(page); break; case PGT_seg_desc_page: rc = alloc_segdesc_page(page); @@ -2146,10 +2150,12 @@ int free_page_type(struct page_info *pag rc = free_l2_table(page, preemptible); break; case PGT_l3_page_table: - rc = free_l3_table(page, preemptible); + ASSERT(preemptible); + rc = free_l3_table(page); break; case PGT_l4_page_table: - rc = free_l4_table(page, preemptible); + ASSERT(preemptible); + rc = free_l4_table(page); break; default: MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page)); @@ -2620,7 +2626,7 @@ static int put_old_guest_table(struct vc if ( !v->arch.old_guest_table ) return 0; - switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) ) + switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) ) { case -EINTR: case -EAGAIN: @@ -2654,7 +2660,7 @@ int vcpu_destroy_pagetables(struct vcpu if ( paging_mode_refcounts(v->domain) ) put_page(page); else - rc = put_page_and_type_preemptible(page, 1); + rc = put_page_and_type_preemptible(page); } if ( l4tab ) @@ -2675,7 +2681,7 @@ int vcpu_destroy_pagetables(struct vcpu if ( paging_mode_refcounts(v->domain) ) put_page(page); else - rc = put_page_and_type_preemptible(page, 1); + rc = put_page_and_type_preemptible(page); } if ( !rc ) v->arch.guest_table_user = pagetable_null(); @@ -2705,7 +2711,7 @@ int new_guest_cr3(unsigned long mfn) l4e_from_pfn( mfn, (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), - gt_mfn, 0, 1, curr); + gt_mfn, 0, curr); unmap_domain_page(pl4e); switch ( rc ) { @@ -2769,7 +2775,7 @@ int new_guest_cr3(unsigned long mfn) if ( paging_mode_refcounts(d) ) put_page(page); else - switch ( rc = put_page_and_type_preemptible(page, 1) ) + switch ( rc = put_page_and_type_preemptible(page) ) { case -EINTR: rc = -EAGAIN; @@ -3053,7 +3059,7 @@ long do_mmuext_op( break; } - switch ( rc = put_page_and_type_preemptible(page, 1) ) + switch ( rc = put_page_and_type_preemptible(page) ) { case -EINTR: case -EAGAIN: @@ -3129,7 +3135,7 @@ long do_mmuext_op( if ( paging_mode_refcounts(d) ) put_page(page); else - switch ( rc = put_page_and_type_preemptible(page, 1) ) + switch ( rc = put_page_and_type_preemptible(page) ) { case -EINTR: rc = -EAGAIN; @@ -3604,11 +3610,11 @@ long do_mmu_update( break; case PGT_l3_page_table: rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); break; case PGT_l4_page_table: rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn, - cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v); + cmd == MMU_PT_UPDATE_PRESERVE_AD, v); break; case PGT_writable_page: perfc_incr(writable_mmu_updates); --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -364,15 +364,10 @@ static inline void put_page_and_type(str put_page(page); } -static inline int put_page_and_type_preemptible(struct page_info *page, - int preemptible) +static inline int put_page_and_type_preemptible(struct page_info *page) { - int rc = 0; + int rc = put_page_type_preemptible(page); - if ( preemptible ) - rc = put_page_type_preemptible(page); - else - put_page_type(page); if ( likely(rc == 0) ) put_page(page); return rc; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
This simply streamlines code in a few places, where room for improvement was noticed during the earlier here and the patches in the XSA-45 series. This also drops the bogus use of the domain lock in the CR3 write emulation (which protected against nothing). Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Tim Deegan <tim@xen.org> --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -2075,19 +2075,17 @@ static int alloc_page_type(struct page_i /* No need for atomic update of type_info here: noone else updates it. */ wmb(); - if ( rc == -EAGAIN ) - { - get_page_light(page); - page->u.inuse.type_info |= PGT_partial; - } - else if ( rc == -EINTR ) + switch ( rc ) { + case 0: + page->u.inuse.type_info |= PGT_validated; + break; + case -EINTR: ASSERT((page->u.inuse.type_info & (PGT_count_mask|PGT_validated|PGT_partial)) == 1); page->u.inuse.type_info &= ~PGT_count_mask; - } - else if ( rc ) - { + break; + default: ASSERT(rc < 0); MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" PRtype_info ": caf=%08lx taf=%" PRtype_info, @@ -2099,13 +2097,11 @@ static int alloc_page_type(struct page_i { ASSERT((page->u.inuse.type_info & (PGT_count_mask | PGT_validated)) == 1); + case -EAGAIN: get_page_light(page); page->u.inuse.type_info |= PGT_partial; } - } - else - { - page->u.inuse.type_info |= PGT_validated; + break; } return rc; @@ -2884,7 +2880,7 @@ long do_mmuext_op( { struct mmuext_op op; unsigned long type; - unsigned int i = 0, done = 0; + unsigned int i, done = 0; struct vcpu *curr = current; struct domain *d = curr->domain; struct domain *pg_owner; @@ -2917,22 +2913,16 @@ long do_mmuext_op( perfc_incr(calls_to_mmuext_op); if ( unlikely(!guest_handle_okay(uops, count)) ) - { - rc = -EFAULT; - goto out; - } + return -EFAULT; if ( (pg_owner = get_pg_owner(foreigndom)) == NULL ) - { - rc = -ESRCH; - goto out; - } + return -ESRCH; rc = xsm_mmuext_op(XSM_TARGET, d, pg_owner); if ( rc ) { - rcu_unlock_domain(pg_owner); - goto out; + put_pg_owner(pg_owner); + return rc; } for ( i = 0; i < count; i++ ) @@ -3404,7 +3394,6 @@ long do_mmuext_op( perfc_add(num_mmuext_ops, i); - out: /* Add incremental work we have done to the @done output parameter. */ if ( unlikely(!guest_handle_is_null(pdone)) ) { @@ -3460,22 +3449,17 @@ long do_mmu_update( perfc_incr(calls_to_mmu_update); if ( unlikely(!guest_handle_okay(ureqs, count)) ) - { - rc = -EFAULT; - goto out; - } + return -EFAULT; if ( (pt_dom = foreigndom >> 16) != 0 ) { /* Pagetables belong to a foreign domain (PFD). */ if ( (pt_owner = rcu_lock_domain_by_id(pt_dom - 1)) == NULL ) - { - rc = -EINVAL; - goto out; - } + return -EINVAL; + if ( pt_owner == d ) rcu_unlock_domain(pt_owner); - if ( (v = pt_owner->vcpu ? pt_owner->vcpu[0] : NULL) == NULL ) + else if ( !pt_owner->vcpu || (v = pt_owner->vcpu[0]) == NULL ) { rc = -EINVAL; goto out; --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2318,7 +2318,7 @@ static int emulate_privileged_op(struct case 3: {/* Write CR3 */ unsigned long gfn; struct page_info *page; - domain_lock(v->domain); + gfn = !is_pv_32on64_vcpu(v) ? xen_cr3_to_pfn(*reg) : compat_cr3_to_pfn(*reg); page = get_page_from_gfn(v->domain, gfn, NULL, P2M_ALLOC); @@ -2329,7 +2329,7 @@ static int emulate_privileged_op(struct } else rc = -EINVAL; - domain_unlock(v->domain); + switch ( rc ) { case 0: --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -273,8 +273,6 @@ extern unsigned char boot_edid_info[128] #endif -#define PGT_base_page_table PGT_l4_page_table - #define __HYPERVISOR_CS64 0xe008 #define __HYPERVISOR_CS32 0xe038 #define __HYPERVISOR_CS __HYPERVISOR_CS64 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
On 02/05/2013 15:44, "Jan Beulich" <JBeulich@suse.com> wrote:> These aren''t part of the actual security fixes, but the first helps > proving that what needs to be preemptible now really is. The second > is really just cleanup the potential for which was noticed while putting > together the earlier ones. > > 1: cleanup after making various page table manipulation operations preemptible > 2: miscellaneous mm.c cleanup > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Questionable for 4.3? I''m not sure. -- Keir> > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
>>> On 02.05.13 at 18:29, Keir Fraser <keir.xen@gmail.com> wrote: > On 02/05/2013 15:44, "Jan Beulich" <JBeulich@suse.com> wrote: > >> These aren''t part of the actual security fixes, but the first helps >> proving that what needs to be preemptible now really is. The second >> is really just cleanup the potential for which was noticed while putting >> together the earlier ones. >> >> 1: cleanup after making various page table manipulation operations > preemptible >> 2: miscellaneous mm.c cleanup >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> > > Questionable for 4.3? I''m not sure.I''m definitive about the first one, and am even intending to put its backports on the stable branches later today (as the whole batch already passed the push gate). You may have noticed that I committed them already (you should have raised your concern regarding committing them at this stage when these were discussed on security@), so if you''re unhappy with the second one to be there we ought to revert it. Jan
On 03/05/2013 08:02, "Jan Beulich" <JBeulich@suse.com> wrote:>>>> On 02.05.13 at 18:29, Keir Fraser <keir.xen@gmail.com> wrote: >> On 02/05/2013 15:44, "Jan Beulich" <JBeulich@suse.com> wrote: >> >>> These aren''t part of the actual security fixes, but the first helps >>> proving that what needs to be preemptible now really is. The second >>> is really just cleanup the potential for which was noticed while putting >>> together the earlier ones. >>> >>> 1: cleanup after making various page table manipulation operations >> preemptible >>> 2: miscellaneous mm.c cleanup >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> >> Questionable for 4.3? I''m not sure. > > I''m definitive about the first one, and am even intending to put its > backports on the stable branches later today (as the whole batch > already passed the push gate). You may have noticed that I > committed them already (you should have raised your concern > regarding committing them at this stage when these were discussed > on security@), so if you''re unhappy with the second one to be > there we ought to revert it.Yeah 1/2 looks surely fine, it''s just mechanistic cleanup really. 2/2 hmmm well I think it''s okay too -- you wouldn''t backport that one though, right? -- Keir> Jan >
>>> On 03.05.13 at 10:17, Keir Fraser <keir.xen@gmail.com> wrote: > On 03/05/2013 08:02, "Jan Beulich" <JBeulich@suse.com> wrote: > >>>>> On 02.05.13 at 18:29, Keir Fraser <keir.xen@gmail.com> wrote: >>> On 02/05/2013 15:44, "Jan Beulich" <JBeulich@suse.com> wrote: >>> >>>> These aren''t part of the actual security fixes, but the first helps >>>> proving that what needs to be preemptible now really is. The second >>>> is really just cleanup the potential for which was noticed while putting >>>> together the earlier ones. >>>> >>>> 1: cleanup after making various page table manipulation operations >>> preemptible >>>> 2: miscellaneous mm.c cleanup >>>> >>>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> >>> Questionable for 4.3? I''m not sure. >> >> I''m definitive about the first one, and am even intending to put its >> backports on the stable branches later today (as the whole batch >> already passed the push gate). You may have noticed that I >> committed them already (you should have raised your concern >> regarding committing them at this stage when these were discussed >> on security@), so if you''re unhappy with the second one to be >> there we ought to revert it. > > Yeah 1/2 looks surely fine, it''s just mechanistic cleanup really. 2/2 hmmm > well I think it''s okay too -- you wouldn''t backport that one though, right?Right, only the first one. Jan