# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1304427973 -7200 # Node ID e7356373147a21df7280ce55e05ff1057f1a0cc9 # Parent 10f27b8b3d63959c7a8e15299a7a398b7ff7f230 The balloon driver in the guest frees guest pages and marks them as mmio. When the kernel crashes and the crash kernel attempts to read the oldmem via /proc/vmcore a read from ballooned pages will generate 100% load in dom0 because Xen asks qemu-dm for the page content. Since the reads come in as 8byte requests each ballooned page is tried 512 times. Add a new hvmop HVMOP_get_mem_type to return the hvmmem_type_t for the given pfn. Pages which are neither ram or mmio will be HVMMEM_mmio_dm. This interface enables the crash kernel to skip ballooned pages. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 10f27b8b3d63 -r e7356373147a xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Tue May 03 15:06:13 2011 +0200 @@ -217,6 +217,7 @@ do_hvm_op(unsigned long op, XEN_GUEST_HA break; } + case HVMOP_get_mem_type: case HVMOP_set_mem_type: case HVMOP_set_mem_access: case HVMOP_get_mem_access: diff -r 10f27b8b3d63 -r e7356373147a xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/hvm/hvm.c Tue May 03 15:06:13 2011 +0200 @@ -3676,6 +3676,37 @@ long do_hvm_op(unsigned long op, XEN_GUE break; } + case HVMOP_get_mem_type: + { + struct xen_hvm_get_mem_type a; + struct domain *d; + p2m_type_t t; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_remote_target_domain_by_id(a.domid, &d); + if ( rc != 0 ) + return rc; + + rc = -EINVAL; + if ( is_hvm_domain(d) ) + { + gfn_to_mfn_unshare(p2m_get_hostp2m(d), a.pfn, &t, 0); + if ( p2m_is_mmio(t) ) + a.mem_type = HVMMEM_mmio_dm; + else if ( p2m_is_readonly(t) ) + a.mem_type = HVMMEM_ram_ro; + else if ( p2m_is_ram(t) ) + a.mem_type = HVMMEM_ram_rw; + else + a.mem_type = HVMMEM_mmio_dm; + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; + } + rcu_unlock_domain(d); + break; + } + case HVMOP_set_mem_type: { struct xen_hvm_set_mem_type a; diff -r 10f27b8b3d63 -r e7356373147a xen/include/public/hvm/hvm_op.h --- a/xen/include/public/hvm/hvm_op.h Mon May 02 12:00:40 2011 +0100 +++ b/xen/include/public/hvm/hvm_op.h Tue May 03 15:06:13 2011 +0200 @@ -76,6 +76,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_ /* Flushes all VCPU TLBs: @arg must be NULL. */ #define HVMOP_flush_tlbs 5 +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +} hvmmem_type_t; + /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) @@ -109,11 +115,6 @@ typedef struct xen_hvm_modified_memory x DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); #define HVMOP_set_mem_type 8 -typedef enum { - HVMMEM_ram_rw, /* Normal read/write guest RAM */ - HVMMEM_ram_ro, /* Read-only; writes are discarded */ - HVMMEM_mmio_dm, /* Reads and write go to the device model */ -} hvmmem_type_t; /* Notify that a region of memory is to be treated in a specific way. */ struct xen_hvm_set_mem_type { /* Domain to be updated. */ @@ -223,6 +224,20 @@ struct xen_hvm_inject_trap { typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint8_t mem_type; + /* IN variable. */ + uint64_t pfn; +}; +typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); + + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Hi, At 14:11 +0100 on 03 May (1304431873), Olaf Hering wrote:> diff -r 10f27b8b3d63 -r e7356373147a xen/arch/x86/hvm/hvm.c > --- a/xen/arch/x86/hvm/hvm.c Mon May 02 12:00:40 2011 +0100 > +++ b/xen/arch/x86/hvm/hvm.c Tue May 03 15:06:13 2011 +0200 > @@ -3676,6 +3676,37 @@ long do_hvm_op(unsigned long op, XEN_GUE > break; > } > > + case HVMOP_get_mem_type: > + { > + struct xen_hvm_get_mem_type a; > + struct domain *d; > + p2m_type_t t; > + > + if ( copy_from_guest(&a, arg, 1) ) > + return -EFAULT; > + > + rc = rcu_lock_remote_target_domain_by_id(a.domid, &d);I thought this call was intended to be used from inside the guest in question. rcu_lock_remote_target_domain_by_id() explicitly refuses to let a domain operate on itself.> + if ( rc != 0 ) > + return rc; > + > + rc = -EINVAL; > + if ( is_hvm_domain(d) ) > + { > + gfn_to_mfn_unshare(p2m_get_hostp2m(d), a.pfn, &t, 0); > + if ( p2m_is_mmio(t) ) > + a.mem_type = HVMMEM_mmio_dm; > + else if ( p2m_is_readonly(t) ) > + a.mem_type = HVMMEM_ram_ro; > + else if ( p2m_is_ram(t) ) > + a.mem_type = HVMMEM_ram_rw; > + else > + a.mem_type = HVMMEM_mmio_dm; > + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; > + } > + rcu_unlock_domain(d); > + break; > + } > + > case HVMOP_set_mem_type: > { > struct xen_hvm_set_mem_type a; > diff -r 10f27b8b3d63 -r e7356373147a xen/include/public/hvm/hvm_op.h > --- a/xen/include/public/hvm/hvm_op.h Mon May 02 12:00:40 2011 +0100 > +++ b/xen/include/public/hvm/hvm_op.h Tue May 03 15:06:13 2011 +0200 > @@ -76,6 +76,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_ > /* Flushes all VCPU TLBs: @arg must be NULL. */ > #define HVMOP_flush_tlbs 5 > > +typedef enum { > + HVMMEM_ram_rw, /* Normal read/write guest RAM */ > + HVMMEM_ram_ro, /* Read-only; writes are discarded */ > + HVMMEM_mmio_dm, /* Reads and write go to the device model */ > +} hvmmem_type_t; > +This is now outside the #ifdef, when both of its users are inside it. If that wasn''t deliberate, please put it back.> /* Following tools-only interfaces may change in future. */ > #if defined(__XEN__) || defined(__XEN_TOOLS__) > > @@ -109,11 +115,6 @@ typedef struct xen_hvm_modified_memory x > DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); > > #define HVMOP_set_mem_type 8 > -typedef enum { > - HVMMEM_ram_rw, /* Normal read/write guest RAM */ > - HVMMEM_ram_ro, /* Read-only; writes are discarded */ > - HVMMEM_mmio_dm, /* Reads and write go to the device model */ > -} hvmmem_type_t; > /* Notify that a region of memory is to be treated in a specific way. */ > struct xen_hvm_set_mem_type { > /* Domain to be updated. */ > @@ -223,6 +224,20 @@ struct xen_hvm_inject_trap { > typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; > DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); > > +#define HVMOP_get_mem_type 15 > +/* Return hvmmem_type_t for the specified pfn. */ > +struct xen_hvm_get_mem_type { > + /* Domain to be queried. */ > + domid_t domid; > + /* OUT variable. */ > + uint8_t mem_type; > + /* IN variable. */ > + uint64_t pfn;This structure will be laid out differently on 32-bit and 64-bit builds. :( Also, since the _set operation uses a 16-bit variable for the type, you might as well do the same here. Cheers, Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On Tue, May 03, Tim Deegan wrote:> > + rc = rcu_lock_remote_target_domain_by_id(a.domid, &d); > > I thought this call was intended to be used from inside the guest in > question. rcu_lock_remote_target_domain_by_id() explicitly refuses to > let a domain operate on itself.Hmm, I have to test it with xen-unstable too. I did copy&paste from other parts of the file. 4.0 used rcu_lock_target_domain_by_id, so can I use that function?> > +typedef enum { > > + HVMMEM_ram_rw, /* Normal read/write guest RAM */ > > + HVMMEM_ram_ro, /* Read-only; writes are discarded */ > > + HVMMEM_mmio_dm, /* Reads and write go to the device model */ > > +} hvmmem_type_t; > > + > > This is now outside the #ifdef, when both of its users are inside it. > If that wasn''t deliberate, please put it back.Should HVMOP_get_mem_type use hvmmem_type_t? Its outside the ifdef, otherwise unmodified_drivers/linux-2.6/platform-pci/platform-pci.c does not get the define.> > /* Following tools-only interfaces may change in future. */ > > #if defined(__XEN__) || defined(__XEN_TOOLS__) > > > > @@ -109,11 +115,6 @@ typedef struct xen_hvm_modified_memory x > > DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); > > > > #define HVMOP_set_mem_type 8 > > -typedef enum { > > - HVMMEM_ram_rw, /* Normal read/write guest RAM */ > > - HVMMEM_ram_ro, /* Read-only; writes are discarded */ > > - HVMMEM_mmio_dm, /* Reads and write go to the device model */ > > -} hvmmem_type_t; > > /* Notify that a region of memory is to be treated in a specific way. */ > > struct xen_hvm_set_mem_type { > > /* Domain to be updated. */ > > @@ -223,6 +224,20 @@ struct xen_hvm_inject_trap { > > typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; > > DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); > > > > +#define HVMOP_get_mem_type 15 > > +/* Return hvmmem_type_t for the specified pfn. */ > > +struct xen_hvm_get_mem_type { > > + /* Domain to be queried. */ > > + domid_t domid; > > + /* OUT variable. */ > > + uint8_t mem_type; > > + /* IN variable. */ > > + uint64_t pfn; > > This structure will be laid out differently on 32-bit and 64-bit > builds. :( Also, since the _set operation uses a 16-bit variable for > the type, you might as well do the same here.Good point. Should I use the same padding as in xen_hvm_pagetable_dying? struct xen_hvm_get_mem_type { /* Domain to be queried. */ domid_t domid; /* OUT variable. */ uint16_t mem_type; uint16_t pad[2]; /* IN variable. */ uint64_t pfn; }; Olaf _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
At 14:59 +0100 on 03 May (1304434768), Olaf Hering wrote:> On Tue, May 03, Tim Deegan wrote: > > > > + rc = rcu_lock_remote_target_domain_by_id(a.domid, &d); > > > > I thought this call was intended to be used from inside the guest in > > question. rcu_lock_remote_target_domain_by_id() explicitly refuses to > > let a domain operate on itself. > > Hmm, I have to test it with xen-unstable too. I did copy&paste from > other parts of the file. 4.0 used rcu_lock_target_domain_by_id, so can I > use that function?Yes; I think that''s the right one for you. It''s the same except for the check that the target isn''t the caller.> > > +typedef enum { > > > + HVMMEM_ram_rw, /* Normal read/write guest RAM */ > > > + HVMMEM_ram_ro, /* Read-only; writes are discarded */ > > > + HVMMEM_mmio_dm, /* Reads and write go to the device model */ > > > +} hvmmem_type_t; > > > + > > > > This is now outside the #ifdef, when both of its users are inside it. > > If that wasn''t deliberate, please put it back. > > Should HVMOP_get_mem_type use hvmmem_type_t? Its outside the ifdef,Looks to me like it''s inside; at least, inside an equivalent one. Did you compile-test your kernel against this patch?> otherwise unmodified_drivers/linux-2.6/platform-pci/platform-pci.c does > not get the define.OK; as long as both the typedef and its user are outside the ifdef, that''s fine.> > > +#define HVMOP_get_mem_type 15 > > > +/* Return hvmmem_type_t for the specified pfn. */ > > > +struct xen_hvm_get_mem_type { > > > + /* Domain to be queried. */ > > > + domid_t domid; > > > + /* OUT variable. */ > > > + uint8_t mem_type; > > > + /* IN variable. */ > > > + uint64_t pfn; > > > > This structure will be laid out differently on 32-bit and 64-bit > > builds. :( Also, since the _set operation uses a 16-bit variable for > > the type, you might as well do the same here. > > Good point. Should I use the same padding as in xen_hvm_pagetable_dying? > > struct xen_hvm_get_mem_type { > /* Domain to be queried. */ > domid_t domid; > /* OUT variable. */ > uint16_t mem_type; > uint16_t pad[2]; > /* IN variable. */ > uint64_t pfn; > };Yes, that''s fine. Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
The balloon driver in the guest frees guest pages and marks them as mmio. When the kernel crashes and the crash kernel attempts to read the oldmem via /proc/vmcore a read from ballooned pages will generate 100% load in dom0 because Xen asks qemu-dm for the page content. Since the reads come in as 8byte requests each ballooned page is tried 512 times. Add a new hvmop HVMOP_get_mem_type to return the hvmmem_type_t for the given pfn. Pages which are neither ram or mmio will be HVMMEM_mmio_dm. This interface enables the crash kernel to skip ballooned pages. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 10f27b8b3d63 xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Tue May 03 21:12:22 2011 +0200 @@ -217,6 +217,7 @@ do_hvm_op(unsigned long op, XEN_GUEST_HA break; } + case HVMOP_get_mem_type: case HVMOP_set_mem_type: case HVMOP_set_mem_access: case HVMOP_get_mem_access: diff -r 10f27b8b3d63 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/hvm/hvm.c Tue May 03 21:12:22 2011 +0200 @@ -3676,6 +3676,37 @@ long do_hvm_op(unsigned long op, XEN_GUE break; } + case HVMOP_get_mem_type: + { + struct xen_hvm_get_mem_type a; + struct domain *d; + p2m_type_t t; + + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_target_domain_by_id(a.domid, &d); + if ( rc != 0 ) + return rc; + + rc = -EINVAL; + if ( is_hvm_domain(d) ) + { + gfn_to_mfn_unshare(p2m_get_hostp2m(d), a.pfn, &t, 0); + if ( p2m_is_mmio(t) ) + a.mem_type = HVMMEM_mmio_dm; + else if ( p2m_is_readonly(t) ) + a.mem_type = HVMMEM_ram_ro; + else if ( p2m_is_ram(t) ) + a.mem_type = HVMMEM_ram_rw; + else + a.mem_type = HVMMEM_mmio_dm; + rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0; + } + rcu_unlock_domain(d); + break; + } + case HVMOP_set_mem_type: { struct xen_hvm_set_mem_type a; diff -r 10f27b8b3d63 xen/include/public/hvm/hvm_op.h --- a/xen/include/public/hvm/hvm_op.h Mon May 02 12:00:40 2011 +0100 +++ b/xen/include/public/hvm/hvm_op.h Tue May 03 21:12:22 2011 +0200 @@ -76,6 +76,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_ /* Flushes all VCPU TLBs: @arg must be NULL. */ #define HVMOP_flush_tlbs 5 +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +} hvmmem_type_t; + /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) @@ -109,11 +115,6 @@ typedef struct xen_hvm_modified_memory x DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); #define HVMOP_set_mem_type 8 -typedef enum { - HVMMEM_ram_rw, /* Normal read/write guest RAM */ - HVMMEM_ram_ro, /* Read-only; writes are discarded */ - HVMMEM_mmio_dm, /* Reads and write go to the device model */ -} hvmmem_type_t; /* Notify that a region of memory is to be treated in a specific way. */ struct xen_hvm_set_mem_type { /* Domain to be updated. */ @@ -225,4 +226,18 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_t #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint16_t mem_type; + uint16_t pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + uint64_t pfn; +}; +typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
At 11:34 +0100 on 04 May (1304508849), Olaf Hering wrote:> Add a new hvmop HVMOP_get_mem_type to return the hvmmem_type_t for the > given pfn. Pages which are neither ram or mmio will be HVMMEM_mmio_dm. > This interface enables the crash kernel to skip ballooned pages.Applied; thanks. Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On Wed, May 04, Tim Deegan wrote:> At 11:34 +0100 on 04 May (1304508849), Olaf Hering wrote: > > Add a new hvmop HVMOP_get_mem_type to return the hvmmem_type_t for the > > given pfn. Pages which are neither ram or mmio will be HVMMEM_mmio_dm. > > This interface enables the crash kernel to skip ballooned pages. > > Applied; thanks.Tim. if this new hvmop interface is now finalized, could this be applied to 4.1 as well, and the header file change applied to the linux-2.6.18.hg and also mainline? Olaf _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
At 10:00 +0100 on 17 May (1305626451), Olaf Hering wrote:> On Wed, May 04, Tim Deegan wrote: > > > At 11:34 +0100 on 04 May (1304508849), Olaf Hering wrote: > > > Add a new hvmop HVMOP_get_mem_type to return the hvmmem_type_t for the > > > given pfn. Pages which are neither ram or mmio will be HVMMEM_mmio_dm. > > > This interface enables the crash kernel to skip ballooned pages. > > > > Applied; thanks. > > Tim. > > if this new hvmop interface is now finalized, could this be applied to > 4.1 as well, and the header file change applied to the linux-2.6.18.hg > and also mainline?That''s a question for Keir, who maintains the 4.1 tree. But we don''t usually backport new features to the older trees. Tim. -- Tim Deegan <Tim.Deegan@citrix.com> Principal Software Engineer, Xen Platform Team Citrix Systems UK Ltd. (Company #02937203, SL9 0BG) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On 18/05/2011 10:23, "Tim Deegan" <Tim.Deegan@citrix.com> wrote:>> if this new hvmop interface is now finalized, could this be applied to >> 4.1 as well, and the header file change applied to the linux-2.6.18.hg >> and also mainline? > > That''s a question for Keir, who maintains the 4.1 tree. But we don''t > usually backport new features to the older trees.This one looks harmless enough, I''ll backport it. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel