Shane Wang
2009-Nov-06  11:29 UTC
[Xen-devel] [PATCH] to fix the bug for tboot on x86_32 brought by c/s 19914 and 19922
The changesets 19914 and 19922 remove the bitmap for boot allocator, which 
causes first_valid_mfn is reduced and starts from the end of xen. On the other 
hand, on x86_64, the xen heap is got considering e820 table, while on x86_32 the
xen heap is specified to be < xenheap_phys_end. This range overlaps tboot
range.
So, the tboot range is scrubbed by scrub_heap_pages() during start_xen() on 
x86_32. Then the result is that for TXT support on x86_32, shutdown/reboot/Sx 
doesn''t work properly. (i.e. hang)
This patch is to fix that by reserving tboot range as "inuse" in xen
heap on
x86_32. Then xen code will not touch the range.
Signed-off-by: Shane Wang <shane.wang@intel.com>
---
diff -r e351f4cedded xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c	Fri Nov 06 10:28:59 2009 -0800
+++ b/xen/arch/x86/setup.c	Fri Nov 06 10:29:44 2009 -0800
@@ -1066,6 +1066,9 @@ void __init __start_xen(unsigned long mb
      if ( !tboot_protect_mem_regions() )
          panic("Could not protect TXT memory regions\n");
+    if ( !tboot_reserve_range() )
+        panic("Could not reserve tboot range, tboot has been
overwritten\n");
+
      /* Create initial domain 0. */
      dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
      if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) )
diff -r e351f4cedded xen/arch/x86/tboot.c
--- a/xen/arch/x86/tboot.c	Fri Nov 06 10:28:59 2009 -0800
+++ b/xen/arch/x86/tboot.c	Fri Nov 06 10:29:44 2009 -0800
@@ -137,6 +137,24 @@ void __init tboot_probe(void)
                        TXT_PUB_CONFIG_REGS_BASE + TXTCR_SINIT_SIZE);
  }
+int __init tboot_reserve_range(void)
+{
+#if defined(CONFIG_X86_32)
+    /*
+     * reserve tboot range (from MLE page table to tboot end)
+     * from xenheap for X86_32
+     */
+    unsigned long mfn;
+    for ( mfn = PFN_DOWN(g_tboot_shared->tboot_base - 3*PAGE_SIZE);
+          mfn < PFN_DOWN(g_tboot_shared->tboot_base)
+                + PFN_UP(g_tboot_shared->tboot_size);
+          mfn++ )
+        if ( reserve_page_as_used(mfn) )
+            return 0;
+#endif
+    return 1;
+}
+
  /* definitions from xen/drivers/passthrough/vtd/iommu.h
   * used to walk through vtd page tables */
  #define LEVEL_STRIDE (9)
@@ -195,16 +213,16 @@ static void update_pagetable_mac(vmac_ct
          }
      }
  }
-
-static void tboot_gen_domain_integrity(const uint8_t key[TB_KEY_SIZE],
-                                       vmac_t *mac)
+
+static void tboot_mac_domain(void)
  {
      struct domain *d;
      struct page_info *page;
      uint8_t nonce[16] = {};
      vmac_ctx_t ctx;
-    vmac_set_key((uint8_t *)key, &ctx);
+    memset(&domain_mac, 0, sizeof(vmac_t));
+    vmac_set_key((uint8_t *)g_tboot_shared->s3_key, &ctx);
      for_each_domain( d )
      {
          if ( !d->arch.s3_integrity )
@@ -228,22 +246,52 @@ static void tboot_gen_domain_integrity(c
      /* MAC all shadow page tables */
      update_pagetable_mac(&ctx);
-    *mac = vmac(NULL, 0, nonce, NULL, &ctx);
+    domain_mac = vmac(NULL, 0, nonce, NULL, &ctx);
-    printk("MAC for domains is: 0x%08"PRIx64"\n", *mac);
+    printk("MAC for domains is: 0x%08"PRIx64"\n",
domain_mac);
      /* wipe ctx to ensure key is not left in memory */
      memset(&ctx, 0, sizeof(ctx));
  }
-static void tboot_gen_xenheap_integrity(const uint8_t key[TB_KEY_SIZE],
-                                        vmac_t *mac)
+static void tboot_add_mac_region(uint64_t start, uint64_t end)
+{
+    tboot_mac_region_t *mr;
+
+    if ( end <= start )
+        return;
+
+    ASSERT(g_tboot_shared->num_mac_regions < MAX_TB_MAC_REGIONS);
+
+    mr =
&g_tboot_shared->mac_regions[g_tboot_shared->num_mac_regions++];
+    mr->start = start;
+    mr->size = end - mr->start;
+}
+
+#if defined(CONFIG_X86_32)
+static void tboot_mac_xenheap(void)
+{
+    uint64_t start, end;
+
+    start = g_tboot_shared->tboot_base - 3*PAGE_SIZE; /* from MLE page table
*/
+    end = g_tboot_shared->tboot_base + g_tboot_shared->tboot_size;
+
+    if ( start < xenheap_phys_end ) {
+        if ( __pa(&_end) < start )
+            tboot_add_mac_region(__pa(&_end), start);
+        if ( end < xenheap_phys_end )
+            tboot_add_mac_region(end, xenheap_phys_end);
+    }
+}
+#else /* !CONFIG_X86_32 */
+static void tboot_mac_xenheap(void)
  {
      unsigned long mfn;
      uint8_t nonce[16] = {};
      vmac_ctx_t ctx;
-    vmac_set_key((uint8_t *)key, &ctx);
+    memset(&xenheap_mac, 0, sizeof(vmac_t));
+    vmac_set_key((uint8_t *)g_tboot_shared->s3_key, &ctx);
      for ( mfn = 0; mfn < max_page; mfn++ )
      {
          struct page_info *page = __mfn_to_page(mfn);
@@ -255,25 +303,26 @@ static void tboot_gen_xenheap_integrity(
              vmac_update((uint8_t *)pg, PAGE_SIZE, &ctx);
          }
      }
-    *mac = vmac(NULL, 0, nonce, NULL, &ctx);
+    xenheap_mac = vmac(NULL, 0, nonce, NULL, &ctx);
-    printk("MAC for xenheap is: 0x%08"PRIx64"\n", *mac);
+    printk("MAC for xenheap is: 0x%08"PRIx64"\n",
xenheap_mac);
      /* wipe ctx to ensure key is not left in memory */
      memset(&ctx, 0, sizeof(ctx));
  }
+#endif
-static void tboot_gen_frametable_integrity(const uint8_t key[TB_KEY_SIZE],
-                                           vmac_t *mac)
+static void tboot_mac_frametable(void)
  {
      uint8_t nonce[16] = {};
      vmac_ctx_t ctx;
-    vmac_set_key((uint8_t *)key, &ctx);
-    *mac = vmac((uint8_t *)frame_table,
+    memset(&frametable_mac, 0, sizeof(vmac_t));
+    vmac_set_key((uint8_t *)g_tboot_shared->s3_key, &ctx);
+    frametable_mac = vmac((uint8_t *)frame_table,
                  PFN_UP(max_pdx * sizeof(*frame_table)), nonce, NULL,
&ctx);
-    printk("MAC for frametable is: 0x%08"PRIx64"\n", *mac);
+    printk("MAC for frametable is: 0x%08"PRIx64"\n",
frametable_mac);
      /* wipe ctx to ensure key is not left in memory */
      memset(&ctx, 0, sizeof(ctx));
@@ -311,31 +360,32 @@ void tboot_shutdown(uint32_t shutdown_ty
          /*
           * Xen regions for tboot to MAC
           */
-        g_tboot_shared->num_mac_regions = 4;
+        g_tboot_shared->num_mac_regions = 0;
          /* S3 resume code (and other real mode trampoline code) */
-        g_tboot_shared->mac_regions[0].start =
bootsym_phys(trampoline_start);
-        g_tboot_shared->mac_regions[0].size = bootsym_phys(trampoline_end) -
-                                              bootsym_phys(trampoline_start);
+        tboot_add_mac_region(bootsym_phys(trampoline_start),
+                             bootsym_phys(trampoline_end));
+
          /* hypervisor code + data */
-        g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext);
-        g_tboot_shared->mac_regions[1].size = __pa(&__init_begin) -
-                                              __pa(&_stext);
+        tboot_add_mac_region((uint64_t)__pa(&_stext),
+                             (uint64_t)__pa(&__init_begin));
+
          /* per-cpu data */
-        g_tboot_shared->mac_regions[2].start =
(uint64_t)__pa(&__per_cpu_start);
-        g_tboot_shared->mac_regions[2].size -           
(((uint64_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT);
+        tboot_add_mac_region((uint64_t)__pa(&__per_cpu_start),
+            (uint64_t)__pa(&__per_cpu_start)
+            + (((uint64_t)last_cpu(cpu_possible_map) + 1) <<
PERCPU_SHIFT));
+
          /* bss */
-        g_tboot_shared->mac_regions[3].start =
(uint64_t)__pa(&__bss_start);
-        g_tboot_shared->mac_regions[3].size = __pa(&_end) -
__pa(&__bss_start);
+        tboot_add_mac_region((uint64_t)__pa(&__bss_start),
+                             (uint64_t)__pa(&_end));
          /*
           * MAC domains and other Xen memory
           */
          /* Xen has no better entropy source for MAC key than tboot''s
*/
          /* MAC domains first in case it perturbs xenheap */
-        tboot_gen_domain_integrity(g_tboot_shared->s3_key, &domain_mac);
-        tboot_gen_frametable_integrity(g_tboot_shared->s3_key,
&frametable_mac);
-        tboot_gen_xenheap_integrity(g_tboot_shared->s3_key,
&xenheap_mac);
+        tboot_mac_domain();
+        tboot_mac_frametable();
+        tboot_mac_xenheap();
      }
      write_ptbase(idle_vcpu[0]);
@@ -452,15 +502,18 @@ int tboot_s3_resume(void)
          return 0;
      /* need to do these in reverse order of shutdown */
-    tboot_gen_xenheap_integrity(g_tboot_shared->s3_key, &mac);
+    memcpy(&mac, &xenheap_mac, sizeof(vmac_t));
+    tboot_mac_xenheap();
      if ( mac != xenheap_mac )
          return -1;
-    tboot_gen_frametable_integrity(g_tboot_shared->s3_key, &mac);
+    memcpy(&mac, &frametable_mac, sizeof(vmac_t));
+    tboot_mac_frametable();
      if ( mac != frametable_mac )
          return -2;
-    tboot_gen_domain_integrity(g_tboot_shared->s3_key, &mac);
+    memcpy(&mac, &domain_mac, sizeof(vmac_t));
+    tboot_mac_domain();
      if ( mac != domain_mac )
          return -3;
diff -r e351f4cedded xen/common/page_alloc.c
--- a/xen/common/page_alloc.c	Fri Nov 06 10:28:59 2009 -0800
+++ b/xen/common/page_alloc.c	Fri Nov 06 10:29:44 2009 -0800
@@ -383,14 +383,15 @@ static struct page_info *alloc_heap_page
      return pg;
  }
-/* Remove any offlined page in the buddy pointed to by head. */
-static int reserve_offlined_page(struct page_info *head)
+/* Remove page from the free pages in the buddy pointed to by head */
+static int reserve_free_page(struct page_info *head, unsigned long pg_state)
  {
      unsigned int node = phys_to_nid(page_to_maddr(head));
      int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
      struct page_info *cur_head;
      int cur_order;
+    ASSERT((pg_state == PGC_state_offlined) || (pg_state == PGC_state_inuse));
      ASSERT(spin_is_locked(&heap_lock));
      cur_head = head;
@@ -402,7 +403,7 @@ static int reserve_offlined_page(struct
          struct page_info *pg;
          int next_order;
-        if ( page_state_is(cur_head, offlined) )
+        if ( (cur_head->count_info & PGC_state) == pg_state )
          {
              cur_head++;
              continue;
@@ -420,7 +421,7 @@ static int reserve_offlined_page(struct
              for ( i = (1 << cur_order), pg = cur_head + (1 <<
cur_order );
                    i < (1 << next_order);
                    i++, pg++ )
-                if ( page_state_is(pg, offlined) )
+                if ( (pg->count_info & PGC_state) == pg_state )
                      break;
              if ( i == ( 1 << next_order) )
              {
@@ -441,19 +442,30 @@ static int reserve_offlined_page(struct
      for ( cur_head = head; cur_head < head + ( 1UL << head_order);
cur_head++ )
      {
-        if ( !page_state_is(cur_head, offlined) )
+        if ( !((cur_head->count_info & PGC_state) == pg_state) )
              continue;
          avail[node][zone]--;
-        page_list_add_tail(cur_head,
-                           test_bit(_PGC_broken, &cur_head->count_info)
?
-                           &page_broken_list : &page_offlined_list);
+        /* additional adding tail for offlined */
+        if ( pg_state == PGC_state_offlined )
+            page_list_add_tail(cur_head,
+                               test_bit(_PGC_broken,
&cur_head->count_info) ?
+                               &page_broken_list :
&page_offlined_list);
          count++;
      }
      return count;
+}
+
+/* Remove any offlined page in the buddy pointed to by head. */
+static int reserve_offlined_page(struct page_info *head)
+{
+    if ( head == NULL )
+        return -EINVAL;
+
+    return reserve_free_page(head, PGC_state_offlined);
  }
  /* Free 2^@order set of pages. */
@@ -577,7 +589,7 @@ static unsigned long mark_page_offline(s
      return y;
  }
-static int reserve_heap_page(struct page_info *pg)
+static struct page_info* get_heap_page_head(struct page_info *pg)
  {
      struct page_info *head = NULL;
      unsigned int i, node = phys_to_nid(page_to_maddr(pg));
@@ -594,12 +606,11 @@ static int reserve_heap_page(struct page
          {
              if ( (head <= pg) &&
                   (head + (1UL << i) > pg) )
-                return reserve_offlined_page(head);
+                return head;
          }
      }
-    return -EINVAL;
-
+    return NULL;
  }
  int offline_page(unsigned long mfn, int broken, uint32_t *status)
@@ -643,7 +654,7 @@ int offline_page(unsigned long mfn, int
      if ( page_state_is(pg, free) )
      {
          /* Free pages are reserve directly */
-        reserve_heap_page(pg);
+        reserve_offlined_page(get_heap_page_head(pg));
          *status = PG_OFFLINE_OFFLINED;
      }
      else if ( page_state_is(pg, offlined) )
@@ -742,6 +753,50 @@ unsigned int online_page(unsigned long m
      return ret;
  }
+int reserve_page_as_used(unsigned long mfn)
+{
+    struct page_info *head, *pg;
+    unsigned long nx, x, y;
+    int ret;
+
+    if ( mfn > max_page )
+    {
+        dprintk(XENLOG_WARNING,
+                "try to reserve page out of range %lx\n", mfn);
+        return -EINVAL;
+    }
+
+    pg = mfn_to_page(mfn);
+    if ( !page_state_is(pg, free) )
+        return -EINVAL;
+
+    spin_lock(&heap_lock);
+
+    /* mark the page as inuse */
+    y = pg->count_info;
+    do {
+        nx = x = y;
+
+        nx &= ~PGC_state;
+        nx |= PGC_state_inuse;
+
+        if ( x == nx )
+            break;
+    } while ( (y = cmpxchg(&pg->count_info, x, nx)) != x );
+
+    head = get_heap_page_head(pg);
+    if ( head == NULL )
+    {
+        spin_unlock(&heap_lock);
+        ret = -EINVAL;
+    }
+
+    reserve_free_page(head, PGC_state_inuse);
+    spin_unlock(&heap_lock);
+
+    return 0;
+}
+
  int query_page_offline(unsigned long mfn, uint32_t *status)
  {
      struct page_info *pg;
diff -r e351f4cedded xen/include/asm-x86/tboot.h
--- a/xen/include/asm-x86/tboot.h	Fri Nov 06 10:28:59 2009 -0800
+++ b/xen/include/asm-x86/tboot.h	Fri Nov 06 10:29:44 2009 -0800
@@ -114,6 +114,7 @@ extern tboot_shared_t *g_tboot_shared;
  extern tboot_shared_t *g_tboot_shared;
  void tboot_probe(void);
+int tboot_reserve_range(void);
  void tboot_shutdown(uint32_t shutdown_type);
  int tboot_in_measured_env(void);
  int tboot_protect_mem_regions(void);
diff -r e351f4cedded xen/include/xen/mm.h
--- a/xen/include/xen/mm.h	Fri Nov 06 10:28:59 2009 -0800
+++ b/xen/include/xen/mm.h	Fri Nov 06 10:29:44 2009 -0800
@@ -62,6 +62,7 @@ unsigned int online_page(unsigned long m
  unsigned int online_page(unsigned long mfn, uint32_t *status);
  int offline_page(unsigned long mfn, int broken, uint32_t *status);
  int query_page_offline(unsigned long mfn, uint32_t *status);
+int reserve_page_as_used(unsigned long mfn);
  void scrub_heap_pages(void);
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Keir Fraser
2009-Nov-06  16:20 UTC
[Xen-devel] Re: [PATCH] to fix the bug for tboot on x86_32 brought by c/s 19914 and 19922
On 06/11/2009 11:29, "Shane Wang" <shane.wang@intel.com> wrote:> The changesets 19914 and 19922 remove the bitmap for boot allocator, which > causes first_valid_mfn is reduced and starts from the end of xen. On the other > hand, on x86_64, the xen heap is got considering e820 table, while on x86_32 > the xen heap is specified to be < xenheap_phys_end. This range overlaps tboot > range. > So, the tboot range is scrubbed by scrub_heap_pages() during start_xen() on > x86_32. Then the result is that for TXT support on x86_32, shutdown/reboot/Sx > doesn''t work properly. (i.e. hang)So... The problem is that xenheap on x86_32 does not respect e820? I''ll fix that if that''s the issue. It will be a much smaller change, and to arch/x86/setup.c only. -- Keir> This patch is to fix that by reserving tboot range as "inuse" in xen heap on > x86_32. Then xen code will not touch the range._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Wang, Shane
2009-Nov-09  02:55 UTC
[Xen-devel] RE: [PATCH] to fix the bug for tboot on x86_32 brought by c/s 19914 and 19922
Keir Fraser wrote:> On 06/11/2009 11:29, "Shane Wang" <shane.wang@intel.com> wrote: > >> The changesets 19914 and 19922 remove the bitmap for boot allocator, >> which causes first_valid_mfn is reduced and starts from the end of >> xen. On the other hand, on x86_64, the xen heap is got considering >> e820 table, while on x86_32 the xen heap is specified to be < >> xenheap_phys_end. This range overlaps tboot range. So, the tboot >> range is scrubbed by scrub_heap_pages() during start_xen() on >> x86_32. Then the result is that for TXT support on x86_32, >> shutdown/reboot/Sx doesn''t work properly. (i.e. hang) > > So... The problem is that xenheap on x86_32 does not respect e820? > I''ll fix that if that''s the issue. It will be a much smaller change, > and to arch/x86/setup.c only.Yes, it is. It seems x86_32 only respects after xenheap is created.> > -- Keir > >> This patch is to fix that by reserving tboot range as "inuse" in xen >> heap on x86_32. Then xen code will not touch the range._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2009-Nov-09  08:25 UTC
[Xen-devel] Re: [PATCH] to fix the bug for tboot on x86_32 brought by c/s 19914 and 19922
On 09/11/2009 02:55, "Wang, Shane" <shane.wang@intel.com> wrote:>> So... The problem is that xenheap on x86_32 does not respect e820? >> I''ll fix that if that''s the issue. It will be a much smaller change, >> and to arch/x86/setup.c only. > Yes, it is. It seems x86_32 only respects after xenheap is created.Okay, this should then be fixed by xen-unstable:20403, which I have also backported to xen-3.4-testing for Xen 3.4.2 release. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel