thr3ads.net - Xen devel - [PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver [Jun 2012]

If this information is useful, please help other people find it:
Share via:

George Dunlap

2012-Jun-08 11:54 UTC

[PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver

Populate-on-demand: Check pages being returned by the balloon driver

This patch series is the second result of my work last summer on
decreasing fragmentation of superpages in a guests'' p2m when using
populate-on-demand.

This patch series is against 4.1; I''m posting it to get feedback on
the viability of getting a ported version of this patch into 4.2.

As with the previous series, the patces against 4.1 have been
extensively in the XenServer testing framework and have been in use by
XenServer customers for over 9 months now.  But the p2m code has
changed extensively in that time, so one could argue that the testing
doesn''t give us the same degree of confidence in the patches against
4.2 as against 4.1.

Looking at it now, there are a number of ugly bits -- "#if 0" and
such; please overlook this for now, as it will all be cleaned up as
part of the porting process.

If this looks good, I''ll do the work of porting and testing it before
posting it again.

George Dunlap

2012-Jun-08 11:54 UTC

head link

[PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

Return the p2m level of the entry which filled this request.
Intended to be used to see if pages returned by the balloon
driver are part of a superpage, and reclaim them if so.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3644,6 +3644,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
         p2m_type_t t;
         p2m_access_t ac;
         mfn_t mfn;
+        int l;
 
         /* Interface access to internal p2m accesses */
         hvmmem_access_t memaccess[] = {
@@ -3681,7 +3682,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
                 goto param_fail6;
 
             rc = -ESRCH;
-            mfn = p2m->get_entry(p2m, a.pfn, &t, &ac, p2m_query);
+            mfn = p2m->get_entry(p2m, a.pfn, &t, &ac, &l,
p2m_query);
 
             if ( mfn_x(mfn) == INVALID_MFN )
                 goto param_fail6;
diff --git a/xen/arch/x86/mm/hap/p2m-ept.c b/xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c
+++ b/xen/arch/x86/mm/hap/p2m-ept.c
@@ -527,14 +527,14 @@ out:
 /* Read ept p2m entries */
 static mfn_t ept_get_entry(struct p2m_domain *p2m,
                            unsigned long gfn, p2m_type_t *t, p2m_access_t* a,
-                           p2m_query_t q)
+                           int *level, p2m_query_t q)
 {
     struct domain *d = p2m->domain;
     ept_entry_t *table = map_domain_page(ept_get_asr(d));
     unsigned long gfn_remainder = gfn;
     ept_entry_t *ept_entry;
     u32 index;
-    int i;
+    int i=5;
     int ret = 0;
     mfn_t mfn = _mfn(INVALID_MFN);
 
@@ -616,6 +616,7 @@ static mfn_t ept_get_entry(struct p2m_do
     }
 
 out:
+    *level=i+1;
     unmap_domain_page(table);
     return mfn;
 }
@@ -709,9 +710,10 @@ out:
 
 static mfn_t ept_get_entry_current(struct p2m_domain *p2m,
                                    unsigned long gfn, p2m_type_t *t,
p2m_access_t *a,
+                                   int *l,
                                    p2m_query_t q)
 {
-    return ept_get_entry(p2m, gfn, t, a, q);
+    return ept_get_entry(p2m, gfn, t, a, l, q);
 }
 
 /*
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1579,7 +1579,7 @@ out:
 
 static mfn_t
 p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t,
p2m_access_t *a,
-               p2m_query_t q)
+               int *level, p2m_query_t q)
 {
     mfn_t mfn;
     paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
@@ -1594,7 +1594,8 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u
      * XXX we will return p2m_invalid for unmapped gfns */
     *t = p2m_mmio_dm;
     /* Not implemented except with EPT */
-    *a = p2m_access_rwx; 
+    *a = p2m_access_rwx;
+    *level = 5;
 
     mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
 
@@ -1602,6 +1603,7 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u
         /* This pfn is higher than the highest the p2m map currently holds */
         return _mfn(INVALID_MFN);
 
+    *level = 4;
 #if CONFIG_PAGING_LEVELS >= 4
     {
         l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
@@ -1615,6 +1617,7 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u
         unmap_domain_page(l4e);
     }
 #endif
+    *level=3;
     {
         l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
 #if CONFIG_PAGING_LEVELS == 3
@@ -1662,6 +1665,7 @@ pod_retry_l3:
 
     l2e = map_domain_page(mfn_x(mfn));
     l2e += l2_table_offset(addr);
+    *level=2;
 
 pod_retry_l2:
     if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
@@ -1695,6 +1699,7 @@ pod_retry_l2:
 
     l1e = map_domain_page(mfn_x(mfn));
     l1e += l1_table_offset(addr);
+    *level=1;
 pod_retry_l1:
     if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
     {
@@ -1723,7 +1728,7 @@ pod_retry_l1:
 /* Read the current domain''s p2m table (through the linear mapping).
*/
 static mfn_t p2m_gfn_to_mfn_current(struct p2m_domain *p2m,
                                     unsigned long gfn, p2m_type_t *t,
p2m_access_t *a,
-                                    p2m_query_t q)
+                                    int *level, p2m_query_t q)
 {
     mfn_t mfn = _mfn(INVALID_MFN);
     p2m_type_t p2mt = p2m_mmio_dm;
@@ -1735,6 +1740,7 @@ static mfn_t p2m_gfn_to_mfn_current(stru
 
     /* Not currently implemented except for EPT */
     *a = p2m_access_rwx;
+    *level=5;
 
     if ( gfn <= p2m->max_mapped_pfn )
     {
@@ -1749,6 +1755,7 @@ static mfn_t p2m_gfn_to_mfn_current(stru
                / sizeof(l1_pgentry_t));
 
 #if CONFIG_PAGING_LEVELS >= 4
+    *level=3;
         /*
          * Read & process L3
          */
@@ -1802,6 +1809,7 @@ static mfn_t p2m_gfn_to_mfn_current(stru
         p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
                                        + l2_linear_offset(addr)];
 
+    *level=2;
     pod_retry_l2:
         ret = __copy_from_user(&l2e,
                                p2m_entry,
@@ -1855,6 +1863,7 @@ static mfn_t p2m_gfn_to_mfn_current(stru
 
         /* Need to __copy_from_user because the p2m is sparse and this
          * part might not exist */
+    *level=1;
     pod_retry_l1:
         p2m_entry = &phys_to_machine_mapping[gfn];
 
@@ -2069,6 +2078,7 @@ void p2m_teardown(struct p2m_domain *p2m
     unsigned long gfn;
     p2m_type_t t;
     p2m_access_t a;
+    int l;
     mfn_t mfn;
 #endif
 
@@ -2077,7 +2087,7 @@ void p2m_teardown(struct p2m_domain *p2m
 #ifdef __x86_64__
     for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ )
     {
-        mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query);
+        mfn = p2m->get_entry(p2m, gfn, &t, &a, &l, p2m_query);
         if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
             BUG_ON(mem_sharing_unshare_page(p2m, gfn,
MEM_SHARING_DESTROY_GFN));
     }
@@ -2375,6 +2385,7 @@ p2m_remove_page(struct p2m_domain *p2m, 
     mfn_t mfn_return;
     p2m_type_t t;
     p2m_access_t a;
+    int l;
 
     if ( !paging_mode_translate(p2m->domain) )
     {
@@ -2390,7 +2401,7 @@ p2m_remove_page(struct p2m_domain *p2m, 
     {
         for ( i = 0; i < (1UL << page_order); i++ )
         {
-            mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a,
p2m_query);
+            mfn_return = p2m->get_entry(p2m, gfn + i, &t, &a,
&l, p2m_query);
             if ( !p2m_is_grant(t) )
                 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
             ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
@@ -3079,10 +3090,11 @@ void p2m_mem_access_check(unsigned long 
     mfn_t mfn;
     p2m_type_t p2mt;
     p2m_access_t p2ma;
+    int p2ml;
     
     /* First, handle rx2rw conversion automatically */
     p2m_lock(p2m);
-    mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query);
+    mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, &p2ml,
p2m_query);
 
     if ( access_w && p2ma == p2m_access_rx2rw ) 
     {
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -216,11 +216,13 @@ struct p2m_domain {
                                        unsigned long gfn,
                                        p2m_type_t *p2mt,
                                        p2m_access_t *p2ma,
+                                       int *level,
                                        p2m_query_t q);
     mfn_t              (*get_entry_current)(struct p2m_domain *p2m,
                                             unsigned long gfn,
                                             p2m_type_t *p2mt,
                                             p2m_access_t *p2ma,
+                                            int *level,
                                             p2m_query_t q);
     void               (*change_entry_type_global)(struct p2m_domain *p2m,
                                                    p2m_type_t ot,
@@ -334,7 +336,8 @@ static inline mfn_t gfn_to_mfn_type_curr
                                             p2m_access_t *a,
                                             p2m_query_t q)
 {
-    return p2m->get_entry_current(p2m, gfn, t, a, q);
+    int l;
+    return p2m->get_entry_current(p2m, gfn, t, a, &l, q);
 }
 
 /* Read P2M table, mapping pages as we go.
@@ -344,7 +347,8 @@ gfn_to_mfn_type_p2m(struct p2m_domain *p
                               p2m_type_t *t, p2m_query_t q)
 {
     p2m_access_t a = 0;
-    return p2m->get_entry(p2m, gfn, t, &a, q);
+    int l;
+    return p2m->get_entry(p2m, gfn, t, &a, &l, q);
 }

George Dunlap

2012-Jun-08 11:54 UTC

head link

[PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

When a gfn is passed back by the balloon driver to Xen, check
to see if it''s in a superpage; and if so, check to see if that
page is zeroed, and if so reclaim it.

This patch significantly reduces superpage fragmentation when
running on a high number of vcpus and significant memory
ballooning.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -74,6 +74,7 @@ boolean_param("hap_2mb", opt_hap_2mb);
 
 #define SUPERPAGE_PAGES (1UL << 9)
 #define superpage_aligned(_x)  (((_x)&(SUPERPAGE_PAGES-1))==0)
+#define order_aligned(_x,_o)  (((_x)&((1<<(_o))-1))==0)
 
 static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
 {
@@ -767,6 +768,9 @@ p2m_pod_offline_or_broken_replace(struct
     return;
 }
 
+static int p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long
gfn);
+
+
 /* This function is needed for two reasons:
  * + To properly handle clearing of PoD entries
  * + To "steal back" memory being freed for the PoD cache, rather
than
@@ -774,6 +778,9 @@ p2m_pod_offline_or_broken_replace(struct
  *
  * Once both of these functions have been completed, we can return and
  * allow decrease_reservation() to handle everything else.
+ *
+ * Because the balloon driver races with the page scrubber, we also
+ * scan for zeroed superpages we can reclaim.
  */
 int
 p2m_pod_decrease_reservation(struct domain *d,
@@ -781,11 +788,16 @@ p2m_pod_decrease_reservation(struct doma
                              unsigned int order)
 {
     int ret=0;
-    int i;
+    int i, entry_size=0;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     int steal_for_cache = 0;
-    int pod = 0, nonpod = 0, ram = 0;
+    int nonpod = 0;
+#if 0
+    unsigned long gfn_aligned;
+
+    int nonpod = 0, ram = 0;
+#endif
     
 
     /* If we don''t have any outstanding PoD entries, let things take
their
@@ -794,7 +806,8 @@ p2m_pod_decrease_reservation(struct doma
         goto out;
 
     /* Figure out if we need to steal some freed memory for our cache */
-    steal_for_cache =  ( p2m->pod.entry_count > p2m->pod.count );
+    if ( p2m->pod.entry_count > p2m->pod.count )
+        steal_for_cache = ( p2m->pod.entry_count - p2m->pod.count );
 
     p2m_lock(p2m);
     audit_p2m(p2m, 1);
@@ -802,7 +815,7 @@ p2m_pod_decrease_reservation(struct doma
     if ( unlikely(d->is_dying) )
         goto out_unlock;
 
-    /* See what''s in here. */
+#if 0
     /* FIXME: Add contiguous; query for PSE entries? */
     for ( i=0; i<(1<<order); i++)
     {
@@ -876,12 +889,105 @@ p2m_pod_decrease_reservation(struct doma
         }
     }    
 
+#else
+/*
+ * while(more pages to look at)
+ *  - Find the next page unit
+ *    - if req_decrease >= page_size
+ *         or page is zero
+ *       - Reclaim whole page
+ *    - else
+ *       - Reclaim req_decrease         
+ *       - break out of the loop
+ */
+    /* FIXME: Add contiguous; query for PSE entries? */
+    for ( i=0; i<(1<<order); i+=entry_size)
+    {
+        mfn_t mfn;
+        p2m_type_t t;
+        p2m_access_t a;
+        int l, entry_order;
+
+        /* See what''s in here. */
+        mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, &l,
p2m_query);
+
+        /* NB: entry_size is used by the loop update; so if you change the
+         * level at which you''re working (i.e., decide to work w/ 4k
+         * pages instead of a superpage), you must change entry_size
+         * so that the loop logic works right. */
+        entry_order = (l-1)*9;
+        entry_size = 1<<(entry_order);
+
+        /* If this is in a superpage and we need ram, try to nab it */
+        if ( steal_for_cache
+             && entry_order == 9
+             && p2m_is_ram(t)
+             && p2m_pod_zero_check_superpage(p2m, gpfn &
~(SUPERPAGE_PAGES-1)) )
+        {
+            /* Now we have a PoD entry to deal with; update
+             * steal_for_cache, and set entry_size to 0 so that we
+             * re-process the entry. */
+            entry_size=0;
+
+            /* Update steal_for_cache */
+            if ( p2m->pod.entry_count > p2m->pod.count )
+                steal_for_cache = ( p2m->pod.entry_count - p2m->pod.count
);
+            else
+                steal_for_cache = 0;
+            
+            continue;
+        }
+
+        /* Switch to singleton pages if we don''t want to reclaim the
+         * whole page, or if the start is not SP aligned.  Future
+         * iterations to this superpage frame will then be */
+        if ( entry_order > 0
+             && ( i + entry_size > (1<<order)
+                  || !superpage_aligned(gpfn + i) ) )
+        {
+            entry_order = 0;
+            entry_size = 1;
+        }
+
+        if ( t == p2m_populate_on_demand )
+        {
+            set_p2m_entry(p2m, gpfn+i, _mfn(INVALID_MFN), entry_order,
p2m_invalid, p2m->default_access);
+            p2m->pod.entry_count-=(1<<entry_order); /* Lock: p2m */
+            BUG_ON(p2m->pod.entry_count < 0);
+        }
+        else if( p2m_is_ram(t) )
+        {
+            if ( steal_for_cache )
+            {
+                struct page_info *page;
+                
+                ASSERT(mfn_valid(mfn));
+                
+                page = mfn_to_page(mfn);
+                
+                set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), entry_order,
+                              p2m_invalid, p2m->default_access);
+                set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
+                
+                p2m_pod_cache_add(p2m, page, entry_order);
+                
+                /* Update steal_for_cache */
+                if ( p2m->pod.entry_count > p2m->pod.count )
+                    steal_for_cache = ( p2m->pod.entry_count -
p2m->pod.count );
+                else
+                    steal_for_cache = 0;
+            }
+            else
+                nonpod++;
+        }
+    }
+#endif
     /* If there are no more non-PoD entries, tell decrease_reservation() that
      * there''s nothing left to do. */
     if ( nonpod == 0 )
         ret = 1;
 
-out_entry_check:
+//out_entry_check:
     /* If we''ve reduced our "liabilities" beyond our
"assets", free some */
     if ( p2m->pod.entry_count < p2m->pod.count )
     {
@@ -1040,6 +1146,8 @@ p2m_pod_zero_check_superpage(struct p2m_
     p2m_pod_cache_add(p2m, mfn_to_page(mfn0), 9);
     p2m->pod.entry_count += SUPERPAGE_PAGES;
 
+    ret = SUPERPAGE_PAGES;
+
 out_reset:
     if ( reset )
         set_p2m_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access);

George Dunlap

2012-Jun-08 11:54 UTC

head link

[PATCH 3 of 4 RFC] imported patch CA-72761-libxc-error.patch

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -87,8 +87,8 @@ static ssize_t rdexact(xc_interface *xch
         if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
             continue;
         if ( len == 0 ) {
-            ERROR("0-length read");
-            errno = 0;
+            errno = EPIPE;
+            return -1;
         }
         if ( len <= 0 ) {
             ERROR("read_exact_timed failed (read rc: %d, errno: %d, size
%d, offset %d)",
@@ -1253,7 +1253,7 @@ int xc_domain_restore(xc_interface *xch,
         if ( !ctx->completed ) {
             pagebuf.nr_physpages = pagebuf.nr_pages = 0;
             if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) {
-	        DPERROR(tdom, "Error when reading batch");
+	        /* pagebuf_get_one already returned a proper error */
                 goto out;
             }
         }

George Dunlap

2012-Jun-08 11:54 UTC

head link

[PATCH 4 of 4 RFC] imported patch pygrub-invalid-disk-catch.patch

diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
--- a/tools/pygrub/src/pygrub
+++ b/tools/pygrub/src/pygrub
@@ -814,22 +814,27 @@ if __name__ == "__main__":
 
     # get list of offsets into file which start partitions
     part_offs = get_partition_offsets(file)
+    if len(part_offs) < 1:
+        raise RuntimeError, "Disk has no partitions"
 
-    fs = fsimage.open(file, part_offs[0], bootfsoptions)
+    try:
+        fs = fsimage.open(file, part_offs[0], bootfsoptions)
 
-    # We always boot the "default" kernel if it exists, rather than 
-    # parsing the grub menu
-    initrd_path = None
-    if fs.file_exists("/xenkernel"):
-        incfg["kernel"] = "/xenkernel"
-        incfg["args"] = default_args
-        if fs.file_exists("/xeninitrd"):
-            incfg["ramdisk"] = "/xeninitrd"
-    elif fs.file_exists("/boot/xenkernel"):
-        incfg["kernel"] = "/boot/xenkernel"
-        incfg["args"] = default_args
-        if fs.file_exists("/boot/xeninitrd"):
-            incfg["ramdisk"] = "/boot/xeninitrd"
+        # We always boot the "default" kernel if it exists, rather
than
+        # parsing the grub menu
+        initrd_path = None
+        if fs.file_exists("/xenkernel"):
+            incfg["kernel"] = "/xenkernel"
+            incfg["args"] = default_args
+            if fs.file_exists("/xeninitrd"):
+                incfg["ramdisk"] = "/xeninitrd"
+        elif fs.file_exists("/boot/xenkernel"):
+            incfg["kernel"] = "/boot/xenkernel"
+            incfg["args"] = default_args
+            if fs.file_exists("/boot/xeninitrd"):
+                incfg["ramdisk"] = "/boot/xeninitrd"
+    except:
+        raise RuntimeError, "Unable to find partition containing
kernel"
 
     for offset in part_offs:
         try:

George Dunlap

2012-Jun-08 11:57 UTC

head link

Re: [PATCH 3 of 4 RFC] imported patch CA-72761-libxc-error.patch

GAH... please ignore this particular patch, it''s an accident.

On Fri, Jun 8, 2012 at 12:54 PM, George Dunlap
<george.dunlap@eu.citrix.com> wrote:> diff --git a/tools/libxc/xc_domain_restore.c
b/tools/libxc/xc_domain_restore.c
> --- a/tools/libxc/xc_domain_restore.c
> +++ b/tools/libxc/xc_domain_restore.c
> @@ -87,8 +87,8 @@ static ssize_t rdexact(xc_interface *xch
>         if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN))
)
>             continue;
>         if ( len == 0 ) {
> -            ERROR("0-length read");
> -            errno = 0;
> +            errno = EPIPE;
> +            return -1;
>         }
>         if ( len <= 0 ) {
>             ERROR("read_exact_timed failed (read rc: %d, errno: %d,
size %d, offset %d)",
> @@ -1253,7 +1253,7 @@ int xc_domain_restore(xc_interface *xch,
>         if ( !ctx->completed ) {
>             pagebuf.nr_physpages = pagebuf.nr_pages = 0;
>             if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0
) {
> -               DPERROR(tdom, "Error when reading batch");
> +               /* pagebuf_get_one already returned a proper error */
>                 goto out;
>             }
>         }
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

George Dunlap

2012-Jun-08 11:58 UTC

head link

Re: [PATCH 4 of 4 RFC] imported patch pygrub-invalid-disk-catch.patch

GAH... please ignore this particular patch, it''s a mistake.

On Fri, Jun 8, 2012 at 12:54 PM, George Dunlap
<george.dunlap@eu.citrix.com> wrote:> diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
> --- a/tools/pygrub/src/pygrub
> +++ b/tools/pygrub/src/pygrub
> @@ -814,22 +814,27 @@ if __name__ == "__main__":
>
>     # get list of offsets into file which start partitions
>     part_offs = get_partition_offsets(file)
> +    if len(part_offs) < 1:
> +        raise RuntimeError, "Disk has no partitions"
>
> -    fs = fsimage.open(file, part_offs[0], bootfsoptions)
> +    try:
> +        fs = fsimage.open(file, part_offs[0], bootfsoptions)
>
> -    # We always boot the "default" kernel if it exists, rather
than
> -    # parsing the grub menu
> -    initrd_path = None
> -    if fs.file_exists("/xenkernel"):
> -        incfg["kernel"] = "/xenkernel"
> -        incfg["args"] = default_args
> -        if fs.file_exists("/xeninitrd"):
> -            incfg["ramdisk"] = "/xeninitrd"
> -    elif fs.file_exists("/boot/xenkernel"):
> -        incfg["kernel"] = "/boot/xenkernel"
> -        incfg["args"] = default_args
> -        if fs.file_exists("/boot/xeninitrd"):
> -            incfg["ramdisk"] = "/boot/xeninitrd"
> +        # We always boot the "default" kernel if it exists,
rather than
> +        # parsing the grub menu
> +        initrd_path = None
> +        if fs.file_exists("/xenkernel"):
> +            incfg["kernel"] = "/xenkernel"
> +            incfg["args"] = default_args
> +            if fs.file_exists("/xeninitrd"):
> +                incfg["ramdisk"] = "/xeninitrd"
> +        elif fs.file_exists("/boot/xenkernel"):
> +            incfg["kernel"] = "/boot/xenkernel"
> +            incfg["args"] = default_args
> +            if fs.file_exists("/boot/xeninitrd"):
> +                incfg["ramdisk"] = "/boot/xeninitrd"
> +    except:
> +        raise RuntimeError, "Unable to find partition containing
kernel"
>
>     for offset in part_offs:
>         try:
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

George Dunlap

2012-Jun-08 11:58 UTC

head link

Re: [PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver

Hmm, a mistake in my command line (and the lack of -n) sent two
extraneous patches.  Please look at just patches 1 and 2, and ignore 3
and 4.

Thanks,
 -George

On Fri, Jun 8, 2012 at 12:54 PM, George Dunlap
<george.dunlap@eu.citrix.com> wrote:> Populate-on-demand: Check pages being returned by the balloon driver
>
> This patch series is the second result of my work last summer on
> decreasing fragmentation of superpages in a guests'' p2m when using
> populate-on-demand.
>
> This patch series is against 4.1; I''m posting it to get feedback
on
> the viability of getting a ported version of this patch into 4.2.
>
> As with the previous series, the patces against 4.1 have been
> extensively in the XenServer testing framework and have been in use by
> XenServer customers for over 9 months now.  But the p2m code has
> changed extensively in that time, so one could argue that the testing
> doesn''t give us the same degree of confidence in the patches
against
> 4.2 as against 4.1.
>
> Looking at it now, there are a number of ugly bits -- "#if 0" and
> such; please overlook this for now, as it will all be cleaned up as
> part of the porting process.
>
> If this looks good, I''ll do the work of porting and testing it
before
> posting it again.
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Tim Deegan

2012-Jun-14 10:14 UTC

head link

Re: [PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver

At 12:54 +0100 on 08 Jun (1339160090), George Dunlap
wrote:> Populate-on-demand: Check pages being returned by the balloon driver
> 
> This patch series is the second result of my work last summer on
> decreasing fragmentation of superpages in a guests'' p2m when using
> populate-on-demand.
> 
> This patch series is against 4.1; I''m posting it to get feedback
on
> the viability of getting a ported version of this patch into 4.2.
> 
> As with the previous series, the patces against 4.1 have been
> extensively in the XenServer testing framework and have been in use by
> XenServer customers for over 9 months now.  But the p2m code has
> changed extensively in that time, so one could argue that the testing
> doesn''t give us the same degree of confidence in the patches
against
> 4.2 as against 4.1.
I think there''s a reasonable chance of getting this in for 4.2 -- any
errors should at least be isolated in the PoD parts. :)

Tim Deegan

2012-Jun-14 10:21 UTC

head link

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

At 12:54 +0100 on 08 Jun (1339160091), George Dunlap
wrote:> Return the p2m level of the entry which filled this request.
> Intended to be used to see if pages returned by the balloon
> driver are part of a superpage, and reclaim them if so.
This looks broadly correct, but it''s a bit invasive.  If
there''s any way
to rework patch 2 not to need it that would be helpful.  It looks like
the main use is for detecting and removing superpage PoD entries; maybe
that could be done with a sweep like you have in the non-PoD case?

One niggle: returning level=5 on error is non-obvious, and it doesn''t
look like your code in patch 2 uses that.

Cheers,

Tim.

Tim Deegan

2012-Jun-14 10:28 UTC

head link

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

At 12:54 +0100 on 08 Jun (1339160092), George Dunlap
wrote:> +        else if( p2m_is_ram(t) )
> +        {
> +            if ( steal_for_cache )
> +            {
> +                struct page_info *page;
> +                
> +                ASSERT(mfn_valid(mfn));
I think this assertion is broken by the paging code, which adds
paged-out pages to the RAM types.  One fix is to undo that change and
have p2m_is_ram() only be true for present pages, but I think for 4.2 it
would be less disruptive to test here for the paged-out and paging
types.

Cc''ing Andres on the larger question -- do you think we can have
p2m_is_ram() imply mfn_valid() again?  I''m not sure whether it will
make
things cleaner (or at least move the burden of handling paged-out memory
into the paging code) or add more boilerplate to paths where the paging
will be handled after a check for p2m_os_ram().  What do you think?

Cheers,

Tim.

Andres Lagar-Cavilla

2012-Jun-14 12:57 UTC

head link

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

> At 12:54 +0100 on 08 Jun (1339160092), George Dunlap wrote:
>> +        else if( p2m_is_ram(t) )
>> +        {
>> +            if ( steal_for_cache )
>> +            {
>> +                struct page_info *page;
>> +
>> +                ASSERT(mfn_valid(mfn));
>
> I think this assertion is broken by the paging code, which adds
> paged-out pages to the RAM types.  One fix is to undo that change and
> have p2m_is_ram() only be true for present pages, but I think for 4.2 it
> would be less disruptive to test here for the paged-out and paging
> types.
>
> Cc''ing Andres on the larger question -- do you think we can have
> p2m_is_ram() imply mfn_valid() again?  I''m not sure whether it
will make
> things cleaner (or at least move the burden of handling paged-out memory
> into the paging code) or add more boilerplate to paths where the paging
> will be handled after a check for p2m_os_ram().  What do you think?
>
Imho, pages in any of the states of the paging state machine, are ram.
They are not mmio, grants or whatever. They just happen to be absent at
the moment.

This is not an immediately useful response because its corollary entails
that PoD pages are also ram.

There are a few possible ways around this. One problem is that
p2m_ram_paging_in may or may not have a backing mfn, so we can''t just
check the p2mt, and that is frankly annoying (plenty of wth conditionals
in the p2m code relate to this). We could disambiguate with two states,
e.g.:
 p2m_ram_paging_in_absent
 p2m_ram_paging_present
 p2m_is_paging_in(p2mt) returns true for both -> use in most cases where
p2m_ram_paging_in is used right now
 p2m_is_ram_present(p2mt) returns true for p2m_ram_rw, log dirty, shared,
paging_out and paging_in present -> used in cases like above
 p2m_is_ram(p2mt) includes ram_present and pod and paging_in_absent ->
used in broader cases

That looks like a fairly dangerous changeset. But the more comprehensive
solution. The path of least resistance is obviously to just fix
George''s
current patches.

btw, George, couple of #if 0''s and //''s on your "Check
zero-check pages
returned by the balloon driver" patch. Those are going out before tree
inclusion, I assume?

Thanks
Andres

) Then most checks for p2m_ram_paging, s/> Cheers,
>
> Tim.
>

Tim Deegan

2012-Jun-14 13:19 UTC

head link

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

At 05:57 -0700 on 14 Jun (1339653458), Andres Lagar-Cavilla
wrote:> Imho, pages in any of the states of the paging state machine, are ram.
> They are not mmio, grants or whatever. They just happen to be absent at
> the moment.
> 
> This is not an immediately useful response because its corollary entails
> that PoD pages are also ram.
I''m OK with that interpretation, as long as we decide it one way or the
other and make sure that all users are correct. :)
> There are a few possible ways around this. One problem is that
> p2m_ram_paging_in may or may not have a backing mfn, so we can''t
just
> check the p2mt, and that is frankly annoying (plenty of wth conditionals
> in the p2m code relate to this). We could disambiguate with two states,
> e.g.:
>  p2m_ram_paging_in_absent
>  p2m_ram_paging_present
>  p2m_is_paging_in(p2mt) returns true for both -> use in most cases where
> p2m_ram_paging_in is used right now
That sounds like a good idea; for after 4.2 though. 
>  p2m_is_ram_present(p2mt) returns true for p2m_ram_rw, log dirty, shared,
> paging_out and paging_in present -> used in cases like above
>  p2m_is_ram(p2mt) includes ram_present and pod and paging_in_absent ->
> used in broader cases
> 
> That looks like a fairly dangerous changeset. But the more comprehensive
> solution. The path of least resistance is obviously to just fix
George''s
> current patches.
Agreed.  I was really asking how you''d like to approach this after 4.2
has
branched.

Cheers,

Tim.

Olaf Hering

2012-Jun-14 13:19 UTC

head link

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

On Thu, Jun 14, Tim Deegan wrote:
> At 12:54 +0100 on 08 Jun (1339160092), George Dunlap wrote:
> > +        else if( p2m_is_ram(t) )
> > +        {
> > +            if ( steal_for_cache )
> > +            {
> > +                struct page_info *page;
> > +                
> > +                ASSERT(mfn_valid(mfn));
> 
> I think this assertion is broken by the paging code, which adds
> paged-out pages to the RAM types.  One fix is to undo that change and
> have p2m_is_ram() only be true for present pages, but I think for 4.2 it
> would be less disruptive to test here for the paged-out and paging
> types.
I assume the posted hunk is in PoD code, for 4.2 it should be fine
because paging cant be enabled if PoD is already enabled.

Olaf

Tim Deegan

2012-Jun-14 13:20 UTC

head link

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

At 15:19 +0200 on 14 Jun (1339687163), Olaf Hering
wrote:> On Thu, Jun 14, Tim Deegan wrote:
> 
> > At 12:54 +0100 on 08 Jun (1339160092), George Dunlap wrote:
> > > +        else if( p2m_is_ram(t) )
> > > +        {
> > > +            if ( steal_for_cache )
> > > +            {
> > > +                struct page_info *page;
> > > +                
> > > +                ASSERT(mfn_valid(mfn));
> > 
> > I think this assertion is broken by the paging code, which adds
> > paged-out pages to the RAM types.  One fix is to undo that change and
> > have p2m_is_ram() only be true for present pages, but I think for 4.2
it
> > would be less disruptive to test here for the paged-out and paging
> > types.
> 
> I assume the posted hunk is in PoD code, for 4.2 it should be fine
> because paging cant be enabled if PoD is already enabled.
Good point.  OK, we can ignore it for now and sort it all out after 4.2

Cheers,

Tim.

George Dunlap

2012-Jun-14 15:01 UTC

head link

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

On 14/06/12 11:21, Tim Deegan wrote:> At 12:54 +0100 on 08 Jun (1339160091), George Dunlap wrote:
>> Return the p2m level of the entry which filled this request.
>> Intended to be used to see if pages returned by the balloon
>> driver are part of a superpage, and reclaim them if so.
> This looks broadly correct, but it''s a bit invasive.  If
there''s any way
> to rework patch 2 not to need it that would be helpful.  It looks like
> the main use is for detecting and removing superpage PoD entries; maybe
> that could be done with a sweep like you have in the non-PoD case?You mean the non-balloon case?  I forget why I thought that was a bad 
idea, exactly.  Let me think about that.> One niggle: returning level=5 on error is non-obvious, and it
doesn''t
> look like your code in patch 2 uses that.Were you thinking -1 or something like that?

  -George

Tim Deegan

2012-Jun-14 15:31 UTC

head link

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

At 16:01 +0100 on 14 Jun (1339689663), George Dunlap
wrote:> On 14/06/12 11:21, Tim Deegan wrote:
> >At 12:54 +0100 on 08 Jun (1339160091), George Dunlap wrote:
> >>Return the p2m level of the entry which filled this request.
> >>Intended to be used to see if pages returned by the balloon
> >>driver are part of a superpage, and reclaim them if so.
> >This looks broadly correct, but it''s a bit invasive.  If
there''s any way
> >to rework patch 2 not to need it that would be helpful.  It looks like
> >the main use is for detecting and removing superpage PoD entries; maybe
> >that could be done with a sweep like you have in the non-PoD case?
> You mean the non-balloon case?  I forget why I thought that was a bad 
> idea, exactly.  Let me think about that.
> >One niggle: returning level=5 on error is non-obvious, and it
doesn''t
> >look like your code in patch 2 uses that.
> Were you thinking -1 or something like that?
Actually, looking at it again, and at the way the value is used by its
caller, maybe 5 is OK (though it looks like it will return 6 in the EPT
code).  So assuming this patch doesn''t go away, I''d be OK with
just a
comment explaining it.

Cheers,

Tim.

George Dunlap

2012-Jun-14 16:39 UTC

head link

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

On Thu, Jun 14, 2012 at 4:01 PM, George Dunlap
<george.dunlap@eu.citrix.com> wrote:> On 14/06/12 11:21, Tim Deegan wrote:
>>
>> At 12:54 +0100 on 08 Jun (1339160091), George Dunlap wrote:
>>>
>>> Return the p2m level of the entry which filled this request.
>>> Intended to be used to see if pages returned by the balloon
>>> driver are part of a superpage, and reclaim them if so.
>>
>> This looks broadly correct, but it''s a bit invasive.  If
there''s any way
>> to rework patch 2 not to need it that would be helpful.  It looks like
>> the main use is for detecting and removing superpage PoD entries; maybe
>> that could be done with a sweep like you have in the non-PoD case?
>
> You mean the non-balloon case?  I forget why I thought that was a bad idea,
> exactly.  Let me think about that.
I think I basically didn''t want to have to do the full check of each
of the 512 individual entries of the p2m in the case that it wasn''t,
in fact, a superpage; but I think in the common case it should either
be a full superpage, or the check at the beginning of
p2m_pod_zero_check_superpage() should fail out relatively early.  So
we could probably get away without it.

Why don''t I re-write patch 2 of this series to not require patch 1,
and include it in my other series.  If there''s a performance gain to
be had by avoiding the check, we can discuss that post-4.2.

 -George

Tim Deegan

2012-Jun-14 17:00 UTC

head link

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

At 17:39 +0100 on 14 Jun (1339695541), George Dunlap
wrote:> On Thu, Jun 14, 2012 at 4:01 PM, George Dunlap
> <george.dunlap@eu.citrix.com> wrote:
> > On 14/06/12 11:21, Tim Deegan wrote:
> >> This looks broadly correct, but it''s a bit invasive.  If
there''s any way
> >> to rework patch 2 not to need it that would be helpful.  It looks
like
> >> the main use is for detecting and removing superpage PoD entries;
maybe
> >> that could be done with a sweep like you have in the non-PoD case?
> >
> > You mean the non-balloon case?  I forget why I thought that was a bad
idea,
> > exactly.  Let me think about that.
> 
> I think I basically didn''t want to have to do the full check of
each
> of the 512 individual entries of the p2m in the case that it
wasn''t,
> in fact, a superpage; but I think in the common case it should either
> be a full superpage, or the check at the beginning of
> p2m_pod_zero_check_superpage() should fail out relatively early.  So
> we could probably get away without it.
> 
> Why don''t I re-write patch 2 of this series to not require patch
1,
> and include it in my other series.  If there''s a performance gain
to
> be had by avoiding the check, we can discuss that post-4.2.
That sounds ideal.  Thanks,

Tim.

Possibly Parallel Threads

Search for more seemingly similar threads

Xen devel - Jun 2012 - [PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver

[PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver

[PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

[PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

[PATCH 3 of 4 RFC] imported patch CA-72761-libxc-error.patch

[PATCH 4 of 4 RFC] imported patch pygrub-invalid-disk-catch.patch

Re: [PATCH 3 of 4 RFC] imported patch CA-72761-libxc-error.patch

Re: [PATCH 4 of 4 RFC] imported patch pygrub-invalid-disk-catch.patch

Re: [PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver

Re: [PATCH 0 of 4 RFC] Populate-on-demand: Check pages being returned by the balloon driver

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

Re: [PATCH 2 of 4 RFC] xen, pod: Check zero-check pages returned by the balloon driver

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

Re: [PATCH 1 of 4 RFC] xen, p2m: get_entry returns level of entry as well

Possibly Parallel Threads