Andres Lagar-Cavilla
2012-Aug-21 20:11 UTC
Re: [PATCH] Fix shared entry status for grant copy operation on paged out gfn
NB: this intended for 4.2 window as it solves a paging-related BUG Andres On Aug 21, 2012, at 4:13 PM, Andres Lagar-Cavilla wrote:> xen/common/grant_table.c | 33 ++++++++++++++++++++++----------- > 1 files changed, 22 insertions(+), 11 deletions(-) > > > The unwind path was not clearing the shared entry status bits. This was > BSOD-ing guests on network activity under certain configurations. > > Also: > * sed the fixup method name to signal it''s related to grant copy. > * use atomic clear flag ops during fixup. > > Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> > > diff -r 5267f78c8a1e -r 84a23ae853a5 xen/common/grant_table.c > --- a/xen/common/grant_table.c > +++ b/xen/common/grant_table.c > @@ -1751,14 +1751,14 @@ __release_grant_for_copy( > under the domain''s grant table lock. */ > /* Only safe on transitive grants. Even then, note that we don''t > attempt to drop any pin on the referent grant. */ > -static void __fixup_status_for_pin(const struct active_grant_entry *act, > +static void __fixup_status_for_copy_pin(const struct active_grant_entry *act, > uint16_t *status) > { > if ( !(act->pin & GNTPIN_hstw_mask) ) > - *status &= ~GTF_writing; > + gnttab_clear_flag(_GTF_writing, status); > > if ( !(act->pin & GNTPIN_hstr_mask) ) > - *status &= ~GTF_reading; > + gnttab_clear_flag(_GTF_reading, status); > } > > /* Grab a frame number from a grant entry and update the flags and pin > @@ -1834,7 +1834,7 @@ __acquire_grant_for_copy( > if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) > { > if ( !allow_transitive ) > - PIN_FAIL(unlock_out, GNTST_general_error, > + PIN_FAIL(unlock_out_clear, GNTST_general_error, > "transitive grant when transitivity not allowed\n"); > > trans_domid = sha2->transitive.trans_domid; > @@ -1842,7 +1842,7 @@ __acquire_grant_for_copy( > barrier(); /* Stop the compiler from re-loading > trans_domid from shared memory */ > if ( trans_domid == rd->domain_id ) > - PIN_FAIL(unlock_out, GNTST_general_error, > + PIN_FAIL(unlock_out_clear, GNTST_general_error, > "transitive grants cannot be self-referential\n"); > > /* We allow the trans_domid == ldom case, which > @@ -1855,7 +1855,7 @@ __acquire_grant_for_copy( > /* We need to leave the rrd locked during the grant copy */ > td = rcu_lock_domain_by_id(trans_domid); > if ( td == NULL ) > - PIN_FAIL(unlock_out, GNTST_general_error, > + PIN_FAIL(unlock_out_clear, GNTST_general_error, > "transitive grant referenced bad domain %d\n", > trans_domid); > spin_unlock(&rgt->lock); > @@ -1866,7 +1866,7 @@ __acquire_grant_for_copy( > > spin_lock(&rgt->lock); > if ( rc != GNTST_okay ) { > - __fixup_status_for_pin(act, status); > + __fixup_status_for_copy_pin(act, status); > rcu_unlock_domain(td); > spin_unlock(&rgt->lock); > return rc; > @@ -1878,7 +1878,7 @@ __acquire_grant_for_copy( > and try again. */ > if ( act->pin != old_pin ) > { > - __fixup_status_for_pin(act, status); > + __fixup_status_for_copy_pin(act, status); > rcu_unlock_domain(td); > spin_unlock(&rgt->lock); > put_page(*page); > @@ -1897,7 +1897,7 @@ __acquire_grant_for_copy( > { > rc = __get_paged_frame(sha1->frame, &grant_frame, page, readonly, rd); > if ( rc != GNTST_okay ) > - goto unlock_out; > + goto unlock_out_clear; > act->gfn = sha1->frame; > is_sub_page = 0; > trans_page_off = 0; > @@ -1907,7 +1907,7 @@ __acquire_grant_for_copy( > { > rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd); > if ( rc != GNTST_okay ) > - goto unlock_out; > + goto unlock_out_clear; > act->gfn = sha2->full_page.frame; > is_sub_page = 0; > trans_page_off = 0; > @@ -1917,7 +1917,7 @@ __acquire_grant_for_copy( > { > rc = __get_paged_frame(sha2->sub_page.frame, &grant_frame, page, readonly, rd); > if ( rc != GNTST_okay ) > - goto unlock_out; > + goto unlock_out_clear; > act->gfn = sha2->sub_page.frame; > is_sub_page = 1; > trans_page_off = sha2->sub_page.page_off; > @@ -1948,6 +1948,17 @@ __acquire_grant_for_copy( > *length = act->length; > *frame = act->frame; > > + spin_unlock(&rgt->lock); > + return rc; > + > + unlock_out_clear: > + if ( !(readonly) && > + !(act->pin & GNTPIN_hstw_mask) ) > + gnttab_clear_flag(_GTF_writing, status); > + > + if ( !act->pin ) > + gnttab_clear_flag(_GTF_reading, status); > + > unlock_out: > spin_unlock(&rgt->lock); > return rc;
Andres Lagar-Cavilla
2012-Aug-21 20:13 UTC
[PATCH] Fix shared entry status for grant copy operation on paged out gfn
xen/common/grant_table.c | 33 ++++++++++++++++++++++----------- 1 files changed, 22 insertions(+), 11 deletions(-) The unwind path was not clearing the shared entry status bits. This was BSOD-ing guests on network activity under certain configurations. Also: * sed the fixup method name to signal it''s related to grant copy. * use atomic clear flag ops during fixup. Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> diff -r 5267f78c8a1e -r 84a23ae853a5 xen/common/grant_table.c --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -1751,14 +1751,14 @@ __release_grant_for_copy( under the domain''s grant table lock. */ /* Only safe on transitive grants. Even then, note that we don''t attempt to drop any pin on the referent grant. */ -static void __fixup_status_for_pin(const struct active_grant_entry *act, +static void __fixup_status_for_copy_pin(const struct active_grant_entry *act, uint16_t *status) { if ( !(act->pin & GNTPIN_hstw_mask) ) - *status &= ~GTF_writing; + gnttab_clear_flag(_GTF_writing, status); if ( !(act->pin & GNTPIN_hstr_mask) ) - *status &= ~GTF_reading; + gnttab_clear_flag(_GTF_reading, status); } /* Grab a frame number from a grant entry and update the flags and pin @@ -1834,7 +1834,7 @@ __acquire_grant_for_copy( if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) { if ( !allow_transitive ) - PIN_FAIL(unlock_out, GNTST_general_error, + PIN_FAIL(unlock_out_clear, GNTST_general_error, "transitive grant when transitivity not allowed\n"); trans_domid = sha2->transitive.trans_domid; @@ -1842,7 +1842,7 @@ __acquire_grant_for_copy( barrier(); /* Stop the compiler from re-loading trans_domid from shared memory */ if ( trans_domid == rd->domain_id ) - PIN_FAIL(unlock_out, GNTST_general_error, + PIN_FAIL(unlock_out_clear, GNTST_general_error, "transitive grants cannot be self-referential\n"); /* We allow the trans_domid == ldom case, which @@ -1855,7 +1855,7 @@ __acquire_grant_for_copy( /* We need to leave the rrd locked during the grant copy */ td = rcu_lock_domain_by_id(trans_domid); if ( td == NULL ) - PIN_FAIL(unlock_out, GNTST_general_error, + PIN_FAIL(unlock_out_clear, GNTST_general_error, "transitive grant referenced bad domain %d\n", trans_domid); spin_unlock(&rgt->lock); @@ -1866,7 +1866,7 @@ __acquire_grant_for_copy( spin_lock(&rgt->lock); if ( rc != GNTST_okay ) { - __fixup_status_for_pin(act, status); + __fixup_status_for_copy_pin(act, status); rcu_unlock_domain(td); spin_unlock(&rgt->lock); return rc; @@ -1878,7 +1878,7 @@ __acquire_grant_for_copy( and try again. */ if ( act->pin != old_pin ) { - __fixup_status_for_pin(act, status); + __fixup_status_for_copy_pin(act, status); rcu_unlock_domain(td); spin_unlock(&rgt->lock); put_page(*page); @@ -1897,7 +1897,7 @@ __acquire_grant_for_copy( { rc = __get_paged_frame(sha1->frame, &grant_frame, page, readonly, rd); if ( rc != GNTST_okay ) - goto unlock_out; + goto unlock_out_clear; act->gfn = sha1->frame; is_sub_page = 0; trans_page_off = 0; @@ -1907,7 +1907,7 @@ __acquire_grant_for_copy( { rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, readonly, rd); if ( rc != GNTST_okay ) - goto unlock_out; + goto unlock_out_clear; act->gfn = sha2->full_page.frame; is_sub_page = 0; trans_page_off = 0; @@ -1917,7 +1917,7 @@ __acquire_grant_for_copy( { rc = __get_paged_frame(sha2->sub_page.frame, &grant_frame, page, readonly, rd); if ( rc != GNTST_okay ) - goto unlock_out; + goto unlock_out_clear; act->gfn = sha2->sub_page.frame; is_sub_page = 1; trans_page_off = sha2->sub_page.page_off; @@ -1948,6 +1948,17 @@ __acquire_grant_for_copy( *length = act->length; *frame = act->frame; + spin_unlock(&rgt->lock); + return rc; + + unlock_out_clear: + if ( !(readonly) && + !(act->pin & GNTPIN_hstw_mask) ) + gnttab_clear_flag(_GTF_writing, status); + + if ( !act->pin ) + gnttab_clear_flag(_GTF_reading, status); + unlock_out: spin_unlock(&rgt->lock); return rc;
Jan Beulich
2012-Aug-22 14:21 UTC
Re: [PATCH] Fix shared entry status for grant copy operation on paged out gfn
>>> On 21.08.12 at 22:13, Andres Lagar-Cavilla <andres@lagarcavilla.org> wrote: > xen/common/grant_table.c | 33 ++++++++++++++++++++++----------- > 1 files changed, 22 insertions(+), 11 deletions(-) > > > The unwind path was not clearing the shared entry status bits. This was > BSOD-ing guests on network activity under certain configurations. > > Also: > * sed the fixup method name to signal it''s related to grant copy. > * use atomic clear flag ops during fixup.Is that last thing really needed? I remember having looked at these non-atomic operations too a little while back, and came to the conclusion that probably the authors intentionally coded it that way. jan> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> > > diff -r 5267f78c8a1e -r 84a23ae853a5 xen/common/grant_table.c > --- a/xen/common/grant_table.c > +++ b/xen/common/grant_table.c > @@ -1751,14 +1751,14 @@ __release_grant_for_copy( > under the domain''s grant table lock. */ > /* Only safe on transitive grants. Even then, note that we don''t > attempt to drop any pin on the referent grant. */ > -static void __fixup_status_for_pin(const struct active_grant_entry *act, > +static void __fixup_status_for_copy_pin(const struct active_grant_entry > *act, > uint16_t *status) > { > if ( !(act->pin & GNTPIN_hstw_mask) ) > - *status &= ~GTF_writing; > + gnttab_clear_flag(_GTF_writing, status); > > if ( !(act->pin & GNTPIN_hstr_mask) ) > - *status &= ~GTF_reading; > + gnttab_clear_flag(_GTF_reading, status); > } > > /* Grab a frame number from a grant entry and update the flags and pin > @@ -1834,7 +1834,7 @@ __acquire_grant_for_copy( > if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) > { > if ( !allow_transitive ) > - PIN_FAIL(unlock_out, GNTST_general_error, > + PIN_FAIL(unlock_out_clear, GNTST_general_error, > "transitive grant when transitivity not > allowed\n"); > > trans_domid = sha2->transitive.trans_domid; > @@ -1842,7 +1842,7 @@ __acquire_grant_for_copy( > barrier(); /* Stop the compiler from re-loading > trans_domid from shared memory */ > if ( trans_domid == rd->domain_id ) > - PIN_FAIL(unlock_out, GNTST_general_error, > + PIN_FAIL(unlock_out_clear, GNTST_general_error, > "transitive grants cannot be self-referential\n"); > > /* We allow the trans_domid == ldom case, which > @@ -1855,7 +1855,7 @@ __acquire_grant_for_copy( > /* We need to leave the rrd locked during the grant copy */ > td = rcu_lock_domain_by_id(trans_domid); > if ( td == NULL ) > - PIN_FAIL(unlock_out, GNTST_general_error, > + PIN_FAIL(unlock_out_clear, GNTST_general_error, > "transitive grant referenced bad domain %d\n", > trans_domid); > spin_unlock(&rgt->lock); > @@ -1866,7 +1866,7 @@ __acquire_grant_for_copy( > > spin_lock(&rgt->lock); > if ( rc != GNTST_okay ) { > - __fixup_status_for_pin(act, status); > + __fixup_status_for_copy_pin(act, status); > rcu_unlock_domain(td); > spin_unlock(&rgt->lock); > return rc; > @@ -1878,7 +1878,7 @@ __acquire_grant_for_copy( > and try again. */ > if ( act->pin != old_pin ) > { > - __fixup_status_for_pin(act, status); > + __fixup_status_for_copy_pin(act, status); > rcu_unlock_domain(td); > spin_unlock(&rgt->lock); > put_page(*page); > @@ -1897,7 +1897,7 @@ __acquire_grant_for_copy( > { > rc = __get_paged_frame(sha1->frame, &grant_frame, page, readonly, > rd); > if ( rc != GNTST_okay ) > - goto unlock_out; > + goto unlock_out_clear; > act->gfn = sha1->frame; > is_sub_page = 0; > trans_page_off = 0; > @@ -1907,7 +1907,7 @@ __acquire_grant_for_copy( > { > rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, > readonly, rd); > if ( rc != GNTST_okay ) > - goto unlock_out; > + goto unlock_out_clear; > act->gfn = sha2->full_page.frame; > is_sub_page = 0; > trans_page_off = 0; > @@ -1917,7 +1917,7 @@ __acquire_grant_for_copy( > { > rc = __get_paged_frame(sha2->sub_page.frame, &grant_frame, page, > readonly, rd); > if ( rc != GNTST_okay ) > - goto unlock_out; > + goto unlock_out_clear; > act->gfn = sha2->sub_page.frame; > is_sub_page = 1; > trans_page_off = sha2->sub_page.page_off; > @@ -1948,6 +1948,17 @@ __acquire_grant_for_copy( > *length = act->length; > *frame = act->frame; > > + spin_unlock(&rgt->lock); > + return rc; > + > + unlock_out_clear: > + if ( !(readonly) && > + !(act->pin & GNTPIN_hstw_mask) ) > + gnttab_clear_flag(_GTF_writing, status); > + > + if ( !act->pin ) > + gnttab_clear_flag(_GTF_reading, status); > + > unlock_out: > spin_unlock(&rgt->lock); > return rc;
Andres Lagar-Cavilla
2012-Aug-22 14:32 UTC
Re: [PATCH] Fix shared entry status for grant copy operation on paged out gfn
On Aug 22, 2012, at 10:21 AM, Jan Beulich wrote:>>>> On 21.08.12 at 22:13, Andres Lagar-Cavilla <andres@lagarcavilla.org> wrote: >> xen/common/grant_table.c | 33 ++++++++++++++++++++++----------- >> 1 files changed, 22 insertions(+), 11 deletions(-) >> >> >> The unwind path was not clearing the shared entry status bits. This was >> BSOD-ing guests on network activity under certain configurations. >> >> Also: >> * sed the fixup method name to signal it''s related to grant copy. >> * use atomic clear flag ops during fixup. > > Is that last thing really needed? I remember having looked at > these non-atomic operations too a little while back, and came to > the conclusion that probably the authors intentionally coded it > that way.Due to some obscure property of transitive grants? All other flag clearing/setting in shared grant entries by the hypervisor is done atomically (GTF_transfer_completed being an exception). From my p.o.v. there is no downside. But I am not 100% certain and I can back it off. Thanks Andres> > jan > >> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org> >> >> diff -r 5267f78c8a1e -r 84a23ae853a5 xen/common/grant_table.c >> --- a/xen/common/grant_table.c >> +++ b/xen/common/grant_table.c >> @@ -1751,14 +1751,14 @@ __release_grant_for_copy( >> under the domain''s grant table lock. */ >> /* Only safe on transitive grants. Even then, note that we don''t >> attempt to drop any pin on the referent grant. */ >> -static void __fixup_status_for_pin(const struct active_grant_entry *act, >> +static void __fixup_status_for_copy_pin(const struct active_grant_entry >> *act, >> uint16_t *status) >> { >> if ( !(act->pin & GNTPIN_hstw_mask) ) >> - *status &= ~GTF_writing; >> + gnttab_clear_flag(_GTF_writing, status); >> >> if ( !(act->pin & GNTPIN_hstr_mask) ) >> - *status &= ~GTF_reading; >> + gnttab_clear_flag(_GTF_reading, status); >> } >> >> /* Grab a frame number from a grant entry and update the flags and pin >> @@ -1834,7 +1834,7 @@ __acquire_grant_for_copy( >> if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) >> { >> if ( !allow_transitive ) >> - PIN_FAIL(unlock_out, GNTST_general_error, >> + PIN_FAIL(unlock_out_clear, GNTST_general_error, >> "transitive grant when transitivity not >> allowed\n"); >> >> trans_domid = sha2->transitive.trans_domid; >> @@ -1842,7 +1842,7 @@ __acquire_grant_for_copy( >> barrier(); /* Stop the compiler from re-loading >> trans_domid from shared memory */ >> if ( trans_domid == rd->domain_id ) >> - PIN_FAIL(unlock_out, GNTST_general_error, >> + PIN_FAIL(unlock_out_clear, GNTST_general_error, >> "transitive grants cannot be self-referential\n"); >> >> /* We allow the trans_domid == ldom case, which >> @@ -1855,7 +1855,7 @@ __acquire_grant_for_copy( >> /* We need to leave the rrd locked during the grant copy */ >> td = rcu_lock_domain_by_id(trans_domid); >> if ( td == NULL ) >> - PIN_FAIL(unlock_out, GNTST_general_error, >> + PIN_FAIL(unlock_out_clear, GNTST_general_error, >> "transitive grant referenced bad domain %d\n", >> trans_domid); >> spin_unlock(&rgt->lock); >> @@ -1866,7 +1866,7 @@ __acquire_grant_for_copy( >> >> spin_lock(&rgt->lock); >> if ( rc != GNTST_okay ) { >> - __fixup_status_for_pin(act, status); >> + __fixup_status_for_copy_pin(act, status); >> rcu_unlock_domain(td); >> spin_unlock(&rgt->lock); >> return rc; >> @@ -1878,7 +1878,7 @@ __acquire_grant_for_copy( >> and try again. */ >> if ( act->pin != old_pin ) >> { >> - __fixup_status_for_pin(act, status); >> + __fixup_status_for_copy_pin(act, status); >> rcu_unlock_domain(td); >> spin_unlock(&rgt->lock); >> put_page(*page); >> @@ -1897,7 +1897,7 @@ __acquire_grant_for_copy( >> { >> rc = __get_paged_frame(sha1->frame, &grant_frame, page, readonly, >> rd); >> if ( rc != GNTST_okay ) >> - goto unlock_out; >> + goto unlock_out_clear; >> act->gfn = sha1->frame; >> is_sub_page = 0; >> trans_page_off = 0; >> @@ -1907,7 +1907,7 @@ __acquire_grant_for_copy( >> { >> rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, >> readonly, rd); >> if ( rc != GNTST_okay ) >> - goto unlock_out; >> + goto unlock_out_clear; >> act->gfn = sha2->full_page.frame; >> is_sub_page = 0; >> trans_page_off = 0; >> @@ -1917,7 +1917,7 @@ __acquire_grant_for_copy( >> { >> rc = __get_paged_frame(sha2->sub_page.frame, &grant_frame, page, >> readonly, rd); >> if ( rc != GNTST_okay ) >> - goto unlock_out; >> + goto unlock_out_clear; >> act->gfn = sha2->sub_page.frame; >> is_sub_page = 1; >> trans_page_off = sha2->sub_page.page_off; >> @@ -1948,6 +1948,17 @@ __acquire_grant_for_copy( >> *length = act->length; >> *frame = act->frame; >> >> + spin_unlock(&rgt->lock); >> + return rc; >> + >> + unlock_out_clear: >> + if ( !(readonly) && >> + !(act->pin & GNTPIN_hstw_mask) ) >> + gnttab_clear_flag(_GTF_writing, status); >> + >> + if ( !act->pin ) >> + gnttab_clear_flag(_GTF_reading, status); >> + >> unlock_out: >> spin_unlock(&rgt->lock); >> return rc; > >
Jan Beulich
2012-Aug-22 14:48 UTC
Re: [PATCH] Fix shared entry status for grant copy operation on paged out gfn
>>> On 22.08.12 at 16:32, Andres Lagar-Cavilla <andreslc@gridcentric.ca> wrote:> On Aug 22, 2012, at 10:21 AM, Jan Beulich wrote: > >>>>> On 21.08.12 at 22:13, Andres Lagar-Cavilla <andres@lagarcavilla.org> wrote: >>> xen/common/grant_table.c | 33 ++++++++++++++++++++++----------- >>> 1 files changed, 22 insertions(+), 11 deletions(-) >>> >>> >>> The unwind path was not clearing the shared entry status bits. This was >>> BSOD-ing guests on network activity under certain configurations. >>> >>> Also: >>> * sed the fixup method name to signal it''s related to grant copy. >>> * use atomic clear flag ops during fixup. >> >> Is that last thing really needed? I remember having looked at >> these non-atomic operations too a little while back, and came to >> the conclusion that probably the authors intentionally coded it >> that way. > > Due to some obscure property of transitive grants? All other flag > clearing/setting in shared grant entries by the hypervisor is done atomically > (GTF_transfer_completed being an exception). > > From my p.o.v. there is no downside. But I am not 100% certain and I can > back it off.Only if they read the thread and respond with an explanation. After all, the change is certainly not wrong, just slightly slowing things down. Jan