On Thu, Aug 26, Patrick Colp wrote:
> Hi Olaf,
>
> Thanks for all this info. The basic idea for grant table stuff is that
> if it''s already been granted, then it shouldn''t be paged
out (this
> case should be covered by reference counting). In the case where a
> guest wants to grant a page that is currently paged out, then that
> page should be brought back in before the grant is made. I think the
> idea here would maybe be to pause the VCPU running the grant code,
> page in the page, then resume the VCPU. The ip wouldn''t be
advanced at
> all, so it should re-execute the instruction to grant causing it to
> trap back into Xen and grant the (now paged in) page. The grant
> interface has changed a lot since I originally worked on it, so
it''s
> not surprising to me that there''s issues here. An alternative
approach
> is to return an error to the guest which results in it retrying the
> mapping itself (without needing to pause/unpause the VCPU). I
haven''t
> looked at the PV driver code to know if there''s already a retry
> mechanism built in or not. If there is, then that could be leveraged.
> If not, then either we pause the VCPU as mentioned above or code in a
> new path. I''m generally in favour of approaches that
don''t break old
> systems unless absolutely necessary (for performance reasons, say).
> When I get some time I''ll read over the grant mapping stuff again
to
> see if I can make more sense of it.
Patrick,
the patch below appears to fix xenpaging for me, with SLES11 SP1.
One thing was that both PV drivers and qemu drivers were active, so I
got both hda and sda. I blacklisted scsi_mod for the time being in
modprobe.conf.local.
The other thing is the grant entry handling. There is some incomplete
code already in xen-unstable. But there are more places to cover. In my
testing GNTTABOP_map_grant_ref and GNTTABOP_copy are used, so thats what
my patch changes.
The result of this patch is that all PV drivers have to check the
op.status of each map entry and retry this entry again in a loop until
GNTST_eagain changes to something else.
If I read the xenpaging code correctly, the vcpu is paused so that busy
loop on the client side should be no issue. What I get in the logs with
some debug in the new __get_paged_frame function is that p2mt changes
from 10 to 11 to 12 until the mfn becomes valid.
This appears to fix my testcase, although I just notice right now that
__get_paged_frame is called in a busy loop with the same gfn right now,
with p2mt == p2m_ram_paging_in_start. There are still some bugs
somewhere either in the PV drivers or xen itself. Or its my huge debug
code all over the place, keeping the serial line busy with sync_console.
This patch is just a heads up what I have found so far.
Olaf
---
xen/common/grant_table.c | 97 ++++++++++++++++++++++++++++-------------------
1 file changed, 59 insertions(+), 38 deletions(-)
--- xen-unstable.hg-4.1.22068.orig/xen/common/grant_table.c
+++ xen-unstable.hg-4.1.22068/xen/common/grant_table.c
@@ -139,6 +139,34 @@ shared_entry_header(struct grant_table *
#define active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
+/* Check if the page has been paged out */
+static int __get_paged_frame(unsigned long gfn, unsigned long *frame, int
readonly, struct domain *rd)
+{
+ struct p2m_domain *p2m;
+ p2m_type_t p2mt;
+ mfn_t mfn;
+ int rc = GNTST_okay;
+
+ p2m = p2m_get_hostp2m(rd);
+ if ( readonly )
+ mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+ else
+ mfn = gfn_to_mfn_unshare(p2m, gfn, &p2mt, 1);
+
+ if ( p2m_is_valid(p2mt) ) {
+ *frame = mfn_x(mfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(p2m, gfn);
+ if ( p2m_is_paging(p2mt) )
+ rc = GNTST_eagain;
+ } else {
+ *frame = INVALID_MFN;
+ rc = GNTST_bad_page;
+ }
+
+ return rc;
+}
+
static inline int
__get_maptrack_handle(
struct grant_table *t)
@@ -527,14 +555,16 @@ __gnttab_map_grant_ref(
if ( !act->pin )
{
+ unsigned long gfn;
+ unsigned long frame;
+
+ gfn = sha1 ? sha1->frame : sha2->full_page.frame;
+ rc = __get_paged_frame(gfn, &frame, !!(op->flags &
GNTMAP_readonly), rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
act->domid = ld->domain_id;
- if ( sha1 )
- act->gfn = sha1->frame;
- else
- act->gfn = sha2->full_page.frame;
- act->frame = (op->flags & GNTMAP_readonly) ?
- gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ act->frame = frame;
act->start = 0;
act->length = PAGE_SIZE;
act->is_sub_page = 0;
@@ -1697,6 +1727,7 @@ __acquire_grant_for_copy(
domid_t trans_domid;
grant_ref_t trans_gref;
struct domain *rrd;
+ unsigned long gfn;
unsigned long grant_frame;
unsigned trans_page_off;
unsigned trans_length;
@@ -1814,9 +1845,11 @@ __acquire_grant_for_copy(
}
else if ( sha1 )
{
- act->gfn = sha1->frame;
- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ gfn = sha1->frame;
+ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
is_sub_page = 0;
trans_page_off = 0;
trans_length = PAGE_SIZE;
@@ -1824,9 +1857,11 @@ __acquire_grant_for_copy(
}
else if ( !(sha2->hdr.flags & GTF_sub_page) )
{
- act->gfn = sha2->full_page.frame;
- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ gfn = sha2->full_page.frame;
+ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
is_sub_page = 0;
trans_page_off = 0;
trans_length = PAGE_SIZE;
@@ -1834,9 +1869,11 @@ __acquire_grant_for_copy(
}
else
{
- act->gfn = sha2->sub_page.frame;
- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ gfn = sha2->sub_page.frame;
+ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
is_sub_page = 1;
trans_page_off = sha2->sub_page.page_off;
trans_length = sha2->sub_page.length;
@@ -1932,17 +1969,9 @@ __gnttab_copy(
else
{
#ifdef CONFIG_X86
- p2m_type_t p2mt;
- struct p2m_domain *p2m = p2m_get_hostp2m(sd);
- s_frame = mfn_x(gfn_to_mfn(p2m, op->source.u.gmfn, &p2mt));
- if ( !p2m_is_valid(p2mt) )
- s_frame = INVALID_MFN;
- if ( p2m_is_paging(p2mt) )
- {
- p2m_mem_paging_populate(p2m, op->source.u.gmfn);
- rc = -ENOENT;
- goto error_out;
- }
+ rc = __get_paged_frame(op->source.u.gmfn, &s_frame, 1, sd);
+ if ( rc != GNTST_okay )
+ goto error_out;
#else
s_frame = gmfn_to_mfn(sd, op->source.u.gmfn);
#endif
@@ -1979,17 +2008,9 @@ __gnttab_copy(
else
{
#ifdef CONFIG_X86
- p2m_type_t p2mt;
- struct p2m_domain *p2m = p2m_get_hostp2m(dd);
- d_frame = mfn_x(gfn_to_mfn_unshare(p2m, op->dest.u.gmfn, &p2mt,
1));
- if ( !p2m_is_valid(p2mt) )
- d_frame = INVALID_MFN;
- if ( p2m_is_paging(p2mt) )
- {
- p2m_mem_paging_populate(p2m, op->dest.u.gmfn);
- rc = -ENOENT;
- goto error_out;
- }
+ rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, 0, dd);
+ if ( rc != GNTST_okay )
+ goto error_out;
#else
d_frame = gmfn_to_mfn(dd, op->dest.u.gmfn);
#endif
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel