New key handler ''I'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com> diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 02 11:49:37 2012 +0200 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 07:46:14 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa) +{ + u64 address; + void *table_vaddr, *pde; + u64 next_table_maddr; + int index, next_level, present; + u32 *entry; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg)); + return; + } + + if ( level > 1 ) + { + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + address = gpa + amd_offset_level_address(index, level); + if ( (next_table_maddr != 0) && (next_level != 0) + && present ) + { + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), level - 1, address); + } + + if ( present ) + { + printk("gfn: %-16lx mfn: %-16lx\n", + address, next_table_maddr); + } + } + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +595,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Thu Aug 02 11:49:37 2012 +0200 +++ b/xen/drivers/passthrough/iommu.c Tue Aug 07 07:46:14 2012 -0700 @@ -18,6 +18,7 @@ #include <asm/hvm/iommu.h> #include <xen/paging.h> #include <xen/guest_access.h> +#include <xen/keyhandler.h> #include <xen/softirq.h> #include <xsm/xsm.h> @@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +void setup_iommu_dump(void); + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + setup_iommu_dump(); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +658,46 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + +void __init setup_iommu_dump(void) +{ + register_keyhandler(''I'', &iommu_p2m_table); +} + + /* * Local variables: * mode: C diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Thu Aug 02 11:49:37 2012 +0200 +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 07:46:14 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2356,6 +2357,56 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa) +{ + u64 address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( pt_maddr == 0 ) + return; + + pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %-16lx\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address); + + if ( level == 1 ) + printk("gfn: %-16lx mfn: %-16lx superpage=%d\n", + address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0); + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2378,6 +2429,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 3d17148e465c -r 8b634f178682 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Thu Aug 02 11:49:37 2012 +0200 +++ b/xen/drivers/passthrough/vtd/iommu.h Tue Aug 07 07:46:14 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) @@ -277,6 +279,7 @@ struct dma_pte { #define dma_set_pte_addr(p, addr) do {\ (p).val |= ((addr) & PAGE_MASK_4K); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) +#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0) /* interrupt remap entry */ struct iremap_entry { diff -r 3d17148e465c -r 8b634f178682 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Thu Aug 02 11:49:37 2012 +0200 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 07 07:46:14 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 3d17148e465c -r 8b634f178682 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Thu Aug 02 11:49:37 2012 +0200 +++ b/xen/include/xen/iommu.h Tue Aug 07 07:46:14 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote: > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 02 11:49:37 2012 +0200 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 07:46:14 2012 -0700 > @@ -22,6 +22,7 @@ > #include <xen/pci.h> > #include <xen/pci_regs.h> > #include <xen/paging.h> > +#include <xen/softirq.h> > #include <asm/hvm/iommu.h> > #include <asm/amd-iommu.h> > #include <asm/hvm/svm/amd-iommu-proto.h> > @@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u > > #include <asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa) > +{ > + u64 address; > + void *table_vaddr, *pde; > + u64 next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg)); > + return; > + } > + > + if ( level > 1 )As long as the top level call below can never pass <= 1 here and the recursive call gets gated accordingly, I don''t see why you do it differently here than for VT-d, resulting in both unnecessarily deep indentation and a pointless map/unmap pair around the conditional. Jan> + { > + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + address = gpa + amd_offset_level_address(index, level); > + if ( (next_table_maddr != 0) && (next_level != 0) > + && present ) > + { > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1, address); > + } > + > + if ( present ) > + { > + printk("gfn: %-16lx mfn: %-16lx\n", > + address, next_table_maddr); > + } > + } > + } > + > + unmap_domain_page(table_vaddr); > +} > + > +static void amd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd = domain_hvm_iommu(d); > + > + if ( !hd->root_table ) > + return; > + > + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0); > +} > + > const struct iommu_ops amd_iommu_ops = { > .init = amd_iommu_domain_init, > .dom0_init = amd_iommu_dom0_init,
Honestly, I don''t have an AMD machine to test my code - I just wrote it for completion sake. I based my code on deallocate_next_page_table() in the same file. I agree that the map/unmap can be easily avoided. Someone more familiar with AMD IOMMU might be able to comment more. Thanks, Santosh -----Original Message----- From: Jan Beulich [mailto:JBeulich@suse.com] Sent: Tuesday, August 07, 2012 8:52 AM To: Santosh Jodh Cc: wei.wang2@amd.com; xiantao.zhang@intel.com; xen-devel; Tim (Xen.org) Subject: Re: [Xen-devel] [PATCH] dump_p2m_table: For IOMMU>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote: > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 02 11:49:37 2012 +0200 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 07:46:14 2012 -0700 > @@ -22,6 +22,7 @@ > #include <xen/pci.h> > #include <xen/pci_regs.h> > #include <xen/paging.h> > +#include <xen/softirq.h> > #include <asm/hvm/iommu.h> > #include <asm/amd-iommu.h> > #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,69 @@ static > int amd_iommu_group_id(u16 seg, u > > #include <asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > +u64 gpa) { > + u64 address; > + void *table_vaddr, *pde; > + u64 next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg)); > + return; > + } > + > + if ( level > 1 )As long as the top level call below can never pass <= 1 here and the recursive call gets gated accordingly, I don''t see why you do it differently here than for VT-d, resulting in both unnecessarily deep indentation and a pointless map/unmap pair around the conditional. Jan> + { > + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + > + IOMMU_PDE_PRESENT_SHIFT); > + > + address = gpa + amd_offset_level_address(index, level); > + if ( (next_table_maddr != 0) && (next_level != 0) > + && present ) > + { > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1, address); > + } > + > + if ( present ) > + { > + printk("gfn: %-16lx mfn: %-16lx\n", > + address, next_table_maddr); > + } > + } > + } > + > + unmap_domain_page(table_vaddr); > +} > + > +static void amd_dump_p2m_table(struct domain *d) { > + struct hvm_iommu *hd = domain_hvm_iommu(d); > + > + if ( !hd->root_table ) > + return; > + > + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0); } > + > const struct iommu_ops amd_iommu_ops = { > .init = amd_iommu_domain_init, > .dom0_init = amd_iommu_dom0_init,
>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote: > +void __init setup_iommu_dump(void) > +{ > + register_keyhandler(''I'', &iommu_p2m_table); > +}Are you btw aware that a handler for ''I'' already exists (in xen/arch/x86/hvm/irq.c)? Jan
Thanks for catching that. I did my work in the xenserver tree (which is based off 4.1) which did not have that handler. I will look for the next free letter and resend the patch. Thanks, Santosh -----Original Message----- From: Jan Beulich [mailto:JBeulich@suse.com] Sent: Wednesday, August 08, 2012 12:32 AM To: Santosh Jodh Cc: wei.wang2@amd.com; xiantao.zhang@intel.com; xen-devel; Tim (Xen.org) Subject: Re: [Xen-devel] [PATCH] dump_p2m_table: For IOMMU>>> On 07.08.12 at 16:49, Santosh Jodh <santosh.jodh@citrix.com> wrote: > +void __init setup_iommu_dump(void) > +{ > + register_keyhandler(''I'', &iommu_p2m_table); }Are you btw aware that a handler for ''I'' already exists (in xen/arch/x86/hvm/irq.c)? Jan
New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com> diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 08 08:55:49 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa) +{ + u64 address; + void *table_vaddr, *pde; + u64 next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level <= 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + address = gpa + amd_offset_level_address(index, level); + if ( (next_table_maddr != 0) && (next_level != 0) + && present ) + { + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), level - 1, address); + } + + if ( present ) + { + printk("gfn: %-16lx mfn: %-16lx\n", + address, next_table_maddr); + } + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +595,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Wed Aug 08 08:55:49 2012 -0700 @@ -18,6 +18,7 @@ #include <asm/hvm/iommu.h> #include <xen/paging.h> #include <xen/guest_access.h> +#include <xen/keyhandler.h> #include <xen/softirq.h> #include <xsm/xsm.h> @@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +void setup_iommu_dump(void); + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + setup_iommu_dump(); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +658,46 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + +void __init setup_iommu_dump(void) +{ + register_keyhandler(''o'', &iommu_p2m_table); +} + + /* * Local variables: * mode: C diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Wed Aug 08 08:55:49 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,56 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa) +{ + u64 address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( pt_maddr == 0 ) + return; + + pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %-16lx\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address); + + if ( level == 1 ) + printk("gfn: %-16lx mfn: %-16lx superpage=%d\n", + address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0); + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2438,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 472fc515a463 -r 68946c7e1d67 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.h Wed Aug 08 08:55:49 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) @@ -277,6 +279,7 @@ struct dma_pte { #define dma_set_pte_addr(p, addr) do {\ (p).val |= ((addr) & PAGE_MASK_4K); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) +#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0) /* interrupt remap entry */ struct iremap_entry { diff -r 472fc515a463 -r 68946c7e1d67 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Aug 08 08:55:49 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 472fc515a463 -r 68946c7e1d67 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/xen/iommu.h Wed Aug 08 08:55:49 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>>> On 08.08.12 at 17:56, Santosh Jodh <santosh.jodh@citrix.com> wrote: > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %-16lx\n", page_to_maddr(pg));Does that build on x86-32? Similar format specifier problems appear to be present elsewhere in the patch. Jan
Thanks Jan. No - it does not compile on x86_32. Who runs x86_32 xen anyway? Just kidding... BTW, I hate printf format specifiers... Thanks, Santosh -----Original Message----- From: Jan Beulich [mailto:JBeulich@suse.com] Sent: Wednesday, August 08, 2012 9:21 AM To: Santosh Jodh Cc: wei.wang2@amd.com; xiantao.zhang@intel.com; xen-devel; Tim (Xen.org) Subject: Re: [Xen-devel] [PATCH] dump_p2m_table: For IOMMU>>> On 08.08.12 at 17:56, Santosh Jodh <santosh.jodh@citrix.com> wrote: > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %-16lx\n", > + page_to_maddr(pg));Does that build on x86-32? Similar format specifier problems appear to be present elsewhere in the patch. Jan
New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com> diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 08 09:56:50 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa) +{ + u64 address; + void *table_vaddr, *pde; + u64 next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level <= 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %016"PRIx64"\n", page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + address = gpa + amd_offset_level_address(index, level); + if ( (next_table_maddr != 0) && (next_level != 0) + && present ) + { + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), level - 1, address); + } + + if ( present ) + { + printk("gfn: %016"PRIx64" mfn: %016"PRIx64"\n", + address >> PAGE_SHIFT, next_table_maddr >> PAGE_SHIFT); + } + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +595,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Wed Aug 08 09:56:50 2012 -0700 @@ -18,6 +18,7 @@ #include <asm/hvm/iommu.h> #include <xen/paging.h> #include <xen/guest_access.h> +#include <xen/keyhandler.h> #include <xen/softirq.h> #include <xsm/xsm.h> @@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +void setup_iommu_dump(void); + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + setup_iommu_dump(); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +658,46 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + +void __init setup_iommu_dump(void) +{ + register_keyhandler(''o'', &iommu_p2m_table); +} + + /* * Local variables: * mode: C diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Wed Aug 08 09:56:50 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,56 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa) +{ + u64 address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( pt_maddr == 0 ) + return; + + pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address); + + if ( level == 1 ) + printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n", + address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0); + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2438,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 472fc515a463 -r 8deb7c7a25c4 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.h Wed Aug 08 09:56:50 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) @@ -277,6 +279,7 @@ struct dma_pte { #define dma_set_pte_addr(p, addr) do {\ (p).val |= ((addr) & PAGE_MASK_4K); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) +#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0) /* interrupt remap entry */ struct iremap_entry { diff -r 472fc515a463 -r 8deb7c7a25c4 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Aug 08 09:56:50 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 472fc515a463 -r 8deb7c7a25c4 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/xen/iommu.h Wed Aug 08 09:56:50 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>>> On 08.08.12 at 19:17, Santosh Jodh <santosh.jodh@citrix.com> wrote: > New key handler ''o'' to dump the IOMMU p2m table for each domain. > Skips dumping table for domain0. > Intel and AMD specific iommu_ops handler for dumping p2m table.I''m sorry Santosh, but this is still not quite right.> @@ -512,6 +513,69 @@ static int amd_iommu_group_id(u16 seg, u > > #include <asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, u64 gpa)static void amd_dump_p2m_table_level(struct page_info *pg, int level, paddr_t gpa)> +{ > + u64 address;paddr_t> + void *table_vaddr, *pde; > + u64 next_table_maddr;This ought to also be paddr_t, but I realize that the whole AMD IOMMU code is using u64 instead. So here it''s probably okay to remain u64.> + int index, next_level, present; > + u32 *entry; > + > + if ( level <= 1 ) > + return; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %016"PRIx64"\n", page_to_maddr(pg));We specifically have PRIpaddr for this purpose.> + return; > + } > + > + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + address = gpa + amd_offset_level_address(index, level); > + if ( (next_table_maddr != 0) && (next_level != 0) > + && present ) > + { > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1, address);I guess I see now that you started by cloning deallocate_next_page_table(), which has almost all the same issues I have been complaining about here. Wei - here I''m particularly worried about the use of "level - 1" instead of "next_level", which would similarly apply to the original function. If the way this is currently done is okay, then why is next_level being computed in the first place? (And similar to the issue Santosh has already fixed here - the original function pointlessly maps/unmaps the page when "level <= 1". Furthermore, iommu_map.c has nice helper functions iommu_next_level() and amd_iommu_is_pte_present() - why aren''t they in a header instead, so they could be used here, avoiding the open coding of them?)> + } > + > + if ( present ) > + { > + printk("gfn: %016"PRIx64" mfn: %016"PRIx64"\n", > + address >> PAGE_SHIFT, next_table_maddr >> PAGE_SHIFT);I''d prefer you to use PFN_DOWN() here. Also, depth first, as requested by Tim, to me doesn''t mean recursing before printing. I think you really want to print first, then recurse. Otherwise how would be output be made sense of?> + } > + } > + > + unmap_domain_page(table_vaddr); > +} >... > --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/iommu.c Wed Aug 08 09:56:50 2012 -0700 > @@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in > > DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); > > +void setup_iommu_dump(void); > +This is completely bogus. If the function was called from another source file, the declaration would belong into a header file. Since it''s only used here, it ought to be static.> static void __init parse_iommu_param(char *s) > { > char *ss; > @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai > if ( !iommu_enabled ) > return; > > + setup_iommu_dump(); > d->need_iommu = !!iommu_dom0_strict; > if ( need_iommu(d) ) > { >... > +void __init setup_iommu_dump(void) > +{ > + register_keyhandler(''o'', &iommu_p2m_table); > +}Furthermore, there''s no real need for a separate function here anyway. Just call register_key_handler() directly. Or alternatively this ought to match other code doing the same - using an initcall.> --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.c Wed Aug 08 09:56:50 2012 -0700 > +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa) > +{ > + u64 address;Again, both gpa and address ought to be paddr_t, and the format specifiers should match.> + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( pt_maddr == 0 ) > + return; > + > + pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);Pointless cast.> + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i < PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte = &pt_vaddr[i]; > + if ( !dma_pte_present(*pte) ) > + continue; > + > + address = gpa + offset_level_address(i, level); > + if ( next_level >= 1 ) > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address); > + > + if ( level == 1 ) > + printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n", > + address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);Why do you print leaf (level 1) tables here only? And the last line certainly is above 80 chars, so needs breaking up. (Also, just to avoid you needing to do another iteration: Don''t switch to PFN_DOWN() here.) I further wonder whether "superpage" alone is enough - don''t we have both 2M and 1G pages? Of course, that would become mute if higher levels got also dumped (as then this knowledge is implicit). Which reminds me to ask that both here and in the AMD code the recursion level should probably be reflected by indenting the printed strings. Jan
Thanks for the detailed feedback. Please see inline:> + int index, next_level, present; > + u32 *entry; > + > + if ( level <= 1 ) > + return; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %016"PRIx64"\n", > + page_to_maddr(pg));We specifically have PRIpaddr for this purpose. [Santosh Jodh] Ah - one more format specifier :-). Will change it.> + } > + > + if ( present ) > + { > + printk("gfn: %016"PRIx64" mfn: %016"PRIx64"\n", > + address >> PAGE_SHIFT, next_table_maddr >> > + PAGE_SHIFT);I''d prefer you to use PFN_DOWN() here. [Santosh Jodh] ok. Also, depth first, as requested by Tim, to me doesn''t mean recursing before printing. I think you really want to print first, then recurse. Otherwise how would be output be made sense of? [Santosh Jodh] it gives a simple gfp -> mfn map. With nested printing, you get the PD and PT addresses printed - which are not super useful. Anyway, I will change it.> static void __init parse_iommu_param(char *s) { > char *ss; > @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai > if ( !iommu_enabled ) > return; > > + setup_iommu_dump(); > d->need_iommu = !!iommu_dom0_strict; > if ( need_iommu(d) ) > { >... > +void __init setup_iommu_dump(void) > +{ > + register_keyhandler(''o'', &iommu_p2m_table); }Furthermore, there''s no real need for a separate function here anyway. Just call register_key_handler() directly. Or alternatively this ought to match other code doing the same - using an initcall. [Santosh Jodh] will just call register_key_handler. I had to rearrange code in the file to avoid forward declarations. So much for trying to contain my changes to the end of the file :-)> --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.c Wed Aug 08 09:56:50 2012 -0700 > +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 > +gpa) { > + u64 address;Again, both gpa and address ought to be paddr_t, and the format specifiers should match. [Santosh Jodh] will do.> + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( pt_maddr == 0 ) > + return; > + > + pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);Pointless cast. [Santosh Jodh] yep - gone.> + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i < PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte = &pt_vaddr[i]; > + if ( !dma_pte_present(*pte) ) > + continue; > + > + address = gpa + offset_level_address(i, level); > + if ( next_level >= 1 ) > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > + address); > + > + if ( level == 1 ) > + printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n", > + address >> PAGE_SHIFT_4K, pte->val >> > + PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0);Why do you print leaf (level 1) tables here only? [Santosh Jodh] as I said above - I was dumping gfn -> mfn. I will make it indent and print all levels. And the last line certainly is above 80 chars, so needs breaking up. [Santosh Jodh] yep - done. (Also, just to avoid you needing to do another iteration: Don''t switch to PFN_DOWN() here.) [Santosh Jodh] ok I further wonder whether "superpage" alone is enough - don''t we have both 2M and 1G pages? Of course, that would become mute if higher levels got also dumped (as then this knowledge is implicit). Which reminds me to ask that both here and in the AMD code the recursion level should probably be reflected by indenting the printed strings. [Santosh Jodh] will print indented and all levels. Jan
New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com> diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 09 18:31:32 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,81 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, + paddr_t gpa, int *indent) +{ + paddr_t address; + void *table_vaddr, *pde; + paddr_t next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level <= 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", + page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + address = gpa + amd_offset_level_address(index, level); + if ( present ) + { + int i; + + for ( i = 0; i < *indent; i++ ) + printk(" "); + + printk("gfn: %"PRIpaddr" mfn: %"PRIpaddr"\n", + PFN_DOWN(address), PFN_DOWN(next_table_maddr)); + + if ( (next_table_maddr != 0) && (next_level != 0) ) + { + *indent += 1; + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), level - 1, + address, indent); + + *indent -= 1; + } + } + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + int indent; + + if ( !hd->root_table ) + return; + + indent = 0; + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, &indent); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +607,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Thu Aug 09 18:31:32 2012 -0700 @@ -18,11 +18,13 @@ #include <asm/hvm/iommu.h> #include <xen/paging.h> #include <xen/guest_access.h> +#include <xen/keyhandler.h> #include <xen/softirq.h> #include <xsm/xsm.h> static void parse_iommu_param(char *s); static int iommu_populate_page_table(struct domain *d); +static void iommu_dump_p2m_table(unsigned char key); /* * The ''iommu'' parameter enables the IOMMU. Optional comma separated @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + register_keyhandler(''o'', &iommu_p2m_table); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +663,34 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + /* * Local variables: * mode: C diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Thu Aug 09 18:31:32 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,71 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, + int *indent) +{ + paddr_t address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( pt_maddr == 0 ) + return; + + pt_vaddr = map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( dma_pte_present(*pte) ) + { + int j; + + for ( j = 0; j < *indent; j++ ) + printk(" "); + + address = gpa + offset_level_address(i, level); + printk("gfn: %"PRIpaddr" mfn: %"PRIpaddr" super=%d rd=%d wr=%d\n", + address >> PAGE_SHIFT_4K, pte->val >> PAGE_SHIFT_4K, + dma_pte_superpage(*pte)? 1 : 0, dma_pte_read(*pte)? 1 : 0, + dma_pte_write(*pte)? 1 : 0); + + if ( next_level >= 1 ) { + *indent += 1; + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, + address, indent); + + *indent -= 1; + } + } + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + int indent; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + indent = 0; + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), + 0, &indent); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.h Thu Aug 09 18:31:32 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) @@ -277,6 +279,9 @@ struct dma_pte { #define dma_set_pte_addr(p, addr) do {\ (p).val |= ((addr) & PAGE_MASK_4K); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) +#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0) +#define dma_pte_read(p) (((p).val & DMA_PTE_READ) != 0) +#define dma_pte_write(p) (((p).val & DMA_PTE_WRITE) != 0) /* interrupt remap entry */ struct iremap_entry { diff -r 472fc515a463 -r f687c5580262 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Thu Aug 09 18:31:32 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 472fc515a463 -r f687c5580262 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/xen/iommu.h Thu Aug 09 18:31:32 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>>> On 10.08.12 at 03:43, Santosh Jodh <santosh.jodh@citrix.com> wrote: > New key handler ''o'' to dump the IOMMU p2m table for each domain. > Skips dumping table for domain0. > Intel and AMD specific iommu_ops handler for dumping p2m table.Looks pretty good now to me, just one minor comment below. But it will of course need the ack of both the VT-d and AMD IOMMU maintainers (who, while on Cc, didn''t participate in the discussion so far).> + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, &indent);Why do you pass a pointer here? Just pass 0, and in the recursive call pass either +1 or +(level - next_level). (The inconsistency on the use of next_level will need to be commented on by Wei anyway, as already requested in an earlier reply.) Jan
On 08/09/2012 09:26 AM, Jan Beulich wrote:> Wei - here I''m particularly worried about the use of "level - 1" > instead of "next_level", which would similarly apply to the > original function. If the way this is currently done is okay, then > why is next_level being computed in the first place?I think that recalculation is to guarantee that this recursive function returns. It should run at most "paging_mode" times no matter what "next_level" says. But if we could assume that next level field in every pde is correct, then using next level is fine to me. (And similar> to the issue Santosh has already fixed here - the original > function pointlessly maps/unmaps the page when "level<= 1". > Furthermore, iommu_map.c has nice helper functions > iommu_next_level() and amd_iommu_is_pte_present() - why > aren''t they in a header instead, so they could be used here, > avoiding the open coding of them?)Maybe those helps appears after the original function. I could sent a patch to clean up these: * do not map/unmap if level <= 1 * move amd_iommu_is_pte_present() and iommu_next_level() to a header file. and use them in deallocate_next_page_table. * Using next_level instead of recalculation (if requested) Thanks, Wei>> + } >> + >> + if ( present ) >> + { >> + printk("gfn: %016"PRIx64" mfn: %016"PRIx64"\n", >> + address>> PAGE_SHIFT, next_table_maddr>> PAGE_SHIFT); > > I''d prefer you to use PFN_DOWN() here. > > Also, depth first, as requested by Tim, to me doesn''t mean > recursing before printing. I think you really want to print first, > then recurse. Otherwise how would be output be made sense > of? > >> + } >> + } >> + >> + unmap_domain_page(table_vaddr); >> +} >> ... >> --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 >> +++ b/xen/drivers/passthrough/iommu.c Wed Aug 08 09:56:50 2012 -0700 >> @@ -54,6 +55,8 @@ bool_t __read_mostly amd_iommu_perdev_in >> >> DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); >> >> +void setup_iommu_dump(void); >> + > > This is completely bogus. If the function was called from another > source file, the declaration would belong into a header file. Since > it''s only used here, it ought to be static. > >> static void __init parse_iommu_param(char *s) >> { >> char *ss; >> @@ -119,6 +122,7 @@ void __init iommu_dom0_init(struct domai >> if ( !iommu_enabled ) >> return; >> >> + setup_iommu_dump(); >> d->need_iommu = !!iommu_dom0_strict; >> if ( need_iommu(d) ) >> { >> ... >> +void __init setup_iommu_dump(void) >> +{ >> + register_keyhandler(''o'',&iommu_p2m_table); >> +} > > Furthermore, there''s no real need for a separate function here > anyway. Just call register_key_handler() directly. Or > alternatively this ought to match other code doing the same - > using an initcall. > >> --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 >> +++ b/xen/drivers/passthrough/vtd/iommu.c Wed Aug 08 09:56:50 2012 -0700 >> +static void vtd_dump_p2m_table_level(u64 pt_maddr, int level, u64 gpa) >> +{ >> + u64 address; > > Again, both gpa and address ought to be paddr_t, and the format > specifiers should match. > >> + int i; >> + struct dma_pte *pt_vaddr, *pte; >> + int next_level; >> + >> + if ( pt_maddr == 0 ) >> + return; >> + >> + pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); > > Pointless cast. > >> + if ( pt_vaddr == NULL ) >> + { >> + printk("Failed to map VT-D domain page %016"PRIx64"\n", pt_maddr); >> + return; >> + } >> + >> + next_level = level - 1; >> + for ( i = 0; i< PTE_NUM; i++ ) >> + { >> + if ( !(i % 2) ) >> + process_pending_softirqs(); >> + >> + pte =&pt_vaddr[i]; >> + if ( !dma_pte_present(*pte) ) >> + continue; >> + >> + address = gpa + offset_level_address(i, level); >> + if ( next_level>= 1 ) >> + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, address); >> + >> + if ( level == 1 ) >> + printk("gfn: %016"PRIx64" mfn: %016"PRIx64" superpage=%d\n", >> + address>> PAGE_SHIFT_4K, pte->val>> PAGE_SHIFT_4K, dma_pte_superpage(*pte)? 1 : 0); > > Why do you print leaf (level 1) tables here only? > > And the last line certainly is above 80 chars, so needs breaking up. > > (Also, just to avoid you needing to do another iteration: Don''t > switch to PFN_DOWN() here.) > > I further wonder whether "superpage" alone is enough - don''t we > have both 2M and 1G pages? Of course, that would become mute > if higher levels got also dumped (as then this knowledge is implicit). > > Which reminds me to ask that both here and in the AMD code the > recursion level should probably be reflected by indenting the > printed strings. > > Jan >
Hi Santosh, On 08/10/2012 03:43 AM, Santosh Jodh wrote:> New key handler ''o'' to dump the IOMMU p2m table for each domain. > Skips dumping table for domain0. > Intel and AMD specific iommu_ops handler for dumping p2m table. > > Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> > > diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/amd/pci_amd_iommu.c > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 09 18:31:32 2012 -0700 > @@ -22,6 +22,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/paging.h> > +#include<xen/softirq.h> > #include<asm/hvm/iommu.h> > #include<asm/amd-iommu.h> > #include<asm/hvm/svm/amd-iommu-proto.h> > @@ -512,6 +513,81 @@ static int amd_iommu_group_id(u16 seg, u > > #include<asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > + paddr_t gpa, int *indent) > +{ > + paddr_t address; > + void *table_vaddr, *pde; > + paddr_t next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + if ( level<= 1 ) > + return;So, the l1 page table is not printed, is it by design?> + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > + page_to_maddr(pg)); > + return; > + } > + > + for ( index = 0; index< PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + address = gpa + amd_offset_level_address(index, level); > + if ( present ) > + { > + int i; > + > + for ( i = 0; i< *indent; i++ ) > + printk(" "); > + > + printk("gfn: %"PRIpaddr" mfn: %"PRIpaddr"\n", > + PFN_DOWN(address), PFN_DOWN(next_table_maddr)); > + > + if ( (next_table_maddr != 0)&& (next_level != 0) ) > + { > + *indent += 1; > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1, > + address, indent); > + > + *indent -= 1; > + } > + } > + } > + > + unmap_domain_page(table_vaddr); > +} > + > +static void amd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd = domain_hvm_iommu(d); > + int indent; > + > + if ( !hd->root_table ) > + return; > + > + indent = 0; > + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0,&indent); > +} > + > const struct iommu_ops amd_iommu_ops = { > .init = amd_iommu_domain_init, > .dom0_init = amd_iommu_dom0_init, > @@ -531,4 +607,5 @@ const struct iommu_ops amd_iommu_ops = { > .resume = amd_iommu_resume, > .share_p2m = amd_iommu_share_p2m, > .crash_shutdown = amd_iommu_suspend, > + .dump_p2m_table = amd_dump_p2m_table, > };I tested the amd part with an amd iommu system and I got output like this: XEN) domain1 IOMMU p2m table: (XEN) gfn: 0000000000000000 mfn: 00000000001b1fb5 (XEN) gfn: 0000000000000000 mfn: 000000000021f80b (XEN) gfn: 0000000000000000 mfn: 000000000023f400 (XEN) gfn: 0000000000000000 mfn: 000000000010a600 (XEN) gfn: 0000000000000000 mfn: 000000000023f200 (XEN) gfn: 0000000000000000 mfn: 000000000010a400 (XEN) gfn: 0000000000000000 mfn: 000000000023f000 (XEN) gfn: 0000000000000000 mfn: 000000000010ae00 (XEN) gfn: 0000000000000000 mfn: 000000000023ee00 (XEN) gfn: 0000000000000001 mfn: 000000000010ac00 (XEN) gfn: 0000000000000001 mfn: 000000000023ec00 (XEN) gfn: 0000000000000001 mfn: 000000000010aa00 (XEN) gfn: 0000000000000001 mfn: 000000000023ea00 (XEN) gfn: 0000000000000001 mfn: 000000000010a800 (XEN) gfn: 0000000000000001 mfn: 000000000023e800 (XEN) gfn: 0000000000000001 mfn: 000000000010be00 (XEN) gfn: 0000000000000001 mfn: 000000000023e600 (XEN) gfn: 0000000000000002 mfn: 000000000010bc00 (XEN) gfn: 0000000000000002 mfn: 000000000023e400 (XEN) gfn: 0000000000000002 mfn: 000000000010ba00 (XEN) gfn: 0000000000000002 mfn: 000000000023e200 (XEN) gfn: 0000000000000002 mfn: 000000000010b800 (XEN) gfn: 0000000000000002 mfn: 000000000023e000 (XEN) gfn: 0000000000000002 mfn: 000000000010b600 (XEN) gfn: 0000000000000002 mfn: 000000000023de00 (XEN) gfn: 0000000000000003 mfn: 000000000010b400 (XEN) gfn: 0000000000000003 mfn: 000000000023dc00 (XEN) gfn: 0000000000000003 mfn: 000000000010b200 (XEN) gfn: 0000000000000003 mfn: 000000000023da00 (XEN) gfn: 0000000000000003 mfn: 000000000010b000 (XEN) gfn: 0000000000000003 mfn: 000000000023d800 (XEN) gfn: 0000000000000003 mfn: 000000000010fe00 (XEN) gfn: 0000000000000003 mfn: 000000000023d600 (XEN) gfn: 0000000000000004 mfn: 000000000010fc00 (XEN) gfn: 0000000000000004 mfn: 000000000023d400 (XEN) gfn: 0000000000000004 mfn: 000000000010fa00 (XEN) gfn: 0000000000000004 mfn: 000000000023d200 (XEN) gfn: 0000000000000004 mfn: 000000000010f800 (XEN) gfn: 0000000000000004 mfn: 000000000023d000 (XEN) gfn: 0000000000000004 mfn: 000000000010f600 (XEN) gfn: 0000000000000004 mfn: 000000000023ce00 (XEN) gfn: 0000000000000005 mfn: 000000000010f400 (XEN) gfn: 0000000000000005 mfn: 000000000023cc00 (XEN) gfn: 0000000000000005 mfn: 000000000010f200 (XEN) gfn: 0000000000000005 mfn: 000000000023ca00 (XEN) gfn: 0000000000000005 mfn: 000000000010f000 (XEN) gfn: 0000000000000005 mfn: 000000000023c800 (XEN) gfn: 0000000000000005 mfn: 000000000010ee00 (XEN) gfn: 0000000000000005 mfn: 000000000023c600 (XEN) gfn: 0000000000000006 mfn: 000000000010ec00 (XEN) gfn: 0000000000000006 mfn: 000000000023c400 (XEN) gfn: 0000000000000006 mfn: 000000000010ea00 (XEN) gfn: 0000000000000006 mfn: 000000000023c200 (XEN) gfn: 0000000000000006 mfn: 000000000010e800 (XEN) gfn: 0000000000000006 mfn: 000000000023c000 It looks that the same gfn has been mapped to multiple mfns. Do you want to output only the gfn to mfn mapping or you also want to output the address of intermediate page tables? What do "gfn" and "mfn" stands for here? Thanks, Wei> diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/iommu.c > --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/iommu.c Thu Aug 09 18:31:32 2012 -0700 > @@ -18,11 +18,13 @@ > #include<asm/hvm/iommu.h> > #include<xen/paging.h> > #include<xen/guest_access.h> > +#include<xen/keyhandler.h> > #include<xen/softirq.h> > #include<xsm/xsm.h> > > static void parse_iommu_param(char *s); > static int iommu_populate_page_table(struct domain *d); > +static void iommu_dump_p2m_table(unsigned char key); > > /* > * The ''iommu'' parameter enables the IOMMU. Optional comma separated > @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in > > DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); > > +static struct keyhandler iommu_p2m_table = { > + .diagnostic = 0, > + .u.fn = iommu_dump_p2m_table, > + .desc = "dump iommu p2m table" > +}; > + > static void __init parse_iommu_param(char *s) > { > char *ss; > @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai > if ( !iommu_enabled ) > return; > > + register_keyhandler(''o'',&iommu_p2m_table); > d->need_iommu = !!iommu_dom0_strict; > if ( need_iommu(d) ) > { > @@ -654,6 +663,34 @@ int iommu_do_domctl( > return ret; > } > > +static void iommu_dump_p2m_table(unsigned char key) > +{ > + struct domain *d; > + const struct iommu_ops *ops; > + > + if ( !iommu_enabled ) > + { > + printk("IOMMU not enabled!\n"); > + return; > + } > + > + ops = iommu_get_ops(); > + for_each_domain(d) > + { > + if ( !d->domain_id ) > + continue; > + > + if ( iommu_use_hap_pt(d) ) > + { > + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); > + continue; > + } > + > + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); > + ops->dump_p2m_table(d); > + } > +} > + > /* > * Local variables: > * mode: C > diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.c > --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.c Thu Aug 09 18:31:32 2012 -0700 > @@ -31,6 +31,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/keyhandler.h> > +#include<xen/softirq.h> > #include<asm/msi.h> > #include<asm/irq.h> > #if defined(__i386__) || defined(__x86_64__) > @@ -2365,6 +2366,71 @@ static void vtd_resume(void) > } > } > > +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, > + int *indent) > +{ > + paddr_t address; > + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( pt_maddr == 0 ) > + return; > + > + pt_vaddr = map_vtd_domain_page(pt_maddr); > + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i< PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte =&pt_vaddr[i]; > + if ( dma_pte_present(*pte) ) > + { > + int j; > + > + for ( j = 0; j< *indent; j++ ) > + printk(" "); > + > + address = gpa + offset_level_address(i, level); > + printk("gfn: %"PRIpaddr" mfn: %"PRIpaddr" super=%d rd=%d wr=%d\n", > + address>> PAGE_SHIFT_4K, pte->val>> PAGE_SHIFT_4K, > + dma_pte_superpage(*pte)? 1 : 0, dma_pte_read(*pte)? 1 : 0, > + dma_pte_write(*pte)? 1 : 0); > + > + if ( next_level>= 1 ) { > + *indent += 1; > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > + address, indent); > + > + *indent -= 1; > + } > + } > + } > + > + unmap_vtd_domain_page(pt_vaddr); > +} > + > +static void vtd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd; > + int indent; > + > + if ( list_empty(&acpi_drhd_units) ) > + return; > + > + hd = domain_hvm_iommu(d); > + indent = 0; > + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), > + 0,&indent); > +} > + > const struct iommu_ops intel_iommu_ops = { > .init = intel_iommu_domain_init, > .dom0_init = intel_iommu_dom0_init, > @@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops > .crash_shutdown = vtd_crash_shutdown, > .iotlb_flush = intel_iommu_iotlb_flush, > .iotlb_flush_all = intel_iommu_iotlb_flush_all, > + .dump_p2m_table = vtd_dump_p2m_table, > }; > > /* > diff -r 472fc515a463 -r f687c5580262 xen/drivers/passthrough/vtd/iommu.h > --- a/xen/drivers/passthrough/vtd/iommu.h Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.h Thu Aug 09 18:31:32 2012 -0700 > @@ -248,6 +248,8 @@ struct context_entry { > #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) > #define address_level_offset(addr, level) \ > ((addr>> level_to_offset_bits(level))& LEVEL_MASK) > +#define offset_level_address(offset, level) \ > + ((u64)(offset)<< level_to_offset_bits(level)) > #define level_mask(l) (((u64)(-1))<< level_to_offset_bits(l)) > #define level_size(l) (1<< level_to_offset_bits(l)) > #define align_to_level(addr, l) ((addr + level_size(l) - 1)& level_mask(l)) > @@ -277,6 +279,9 @@ struct dma_pte { > #define dma_set_pte_addr(p, addr) do {\ > (p).val |= ((addr)& PAGE_MASK_4K); } while (0) > #define dma_pte_present(p) (((p).val& 3) != 0) > +#define dma_pte_superpage(p) (((p).val& (1<<7)) != 0) > +#define dma_pte_read(p) (((p).val& DMA_PTE_READ) != 0) > +#define dma_pte_write(p) (((p).val& DMA_PTE_WRITE) != 0) > > /* interrupt remap entry */ > struct iremap_entry { > diff -r 472fc515a463 -r f687c5580262 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Thu Aug 09 18:31:32 2012 -0700 > @@ -38,6 +38,10 @@ > #define PTE_PER_TABLE_ALLOC(entries) \ > PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries)>> PTE_PER_TABLE_SHIFT) > > +#define amd_offset_level_address(offset, level) \ > + ((u64)(offset)<< ((PTE_PER_TABLE_SHIFT * \ > + (level - IOMMU_PAGING_MODE_LEVEL_1)))) > + > #define PCI_MIN_CAP_OFFSET 0x40 > #define PCI_MAX_CAP_BLOCKS 48 > #define PCI_CAP_PTR_MASK 0xFC > diff -r 472fc515a463 -r f687c5580262 xen/include/xen/iommu.h > --- a/xen/include/xen/iommu.h Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/include/xen/iommu.h Thu Aug 09 18:31:32 2012 -0700 > @@ -141,6 +141,7 @@ struct iommu_ops { > void (*crash_shutdown)(void); > void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); > void (*iotlb_flush_all)(struct domain *d); > + void (*dump_p2m_table)(struct domain *d); > }; > > void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); >
>>> On 10.08.12 at 12:50, Wei Wang <wei.wang2@amd.com> wrote: > On 08/09/2012 09:26 AM, Jan Beulich wrote: > >> Wei - here I''m particularly worried about the use of "level - 1" >> instead of "next_level", which would similarly apply to the >> original function. If the way this is currently done is okay, then >> why is next_level being computed in the first place? > > I think that recalculation is to guarantee that this recursive function > returns. It should run at most "paging_mode" times no matter what > "next_level" says. But if we could assume that next level field in every > pde is correct, then using next level is fine to me.Especially in the dumping function we shouldn''t assume too much. However, wasn''t it that one can skip levels in your IOMMU implementation? That can''t be handled correctly if always subtracting 1.>> (And similar >> to the issue Santosh has already fixed here - the original >> function pointlessly maps/unmaps the page when "level<= 1". >> Furthermore, iommu_map.c has nice helper functions >> iommu_next_level() and amd_iommu_is_pte_present() - why >> aren''t they in a header instead, so they could be used here, >> avoiding the open coding of them?) > > Maybe those helps appears after the original function. I could sent a > patch to clean up these: > * do not map/unmap if level <= 1 > * move amd_iommu_is_pte_present() and iommu_next_level() to a header > file. and use them in deallocate_next_page_table. > * Using next_level instead of recalculation (if requested)Yes, please. As to using next_level - it depends, besides the above, on how bad it is if this is really wrong; an ASSERT() or BUG_ON() might be on order here. Jan
>>> On 10.08.12 at 14:31, Wei Wang <wei.wang2@amd.com> wrote: > On 08/10/2012 03:43 AM, Santosh Jodh wrote: >> + if ( level<= 1 ) >> + return; > > So, the l1 page table is not printed, is it by design?Yeah, that puzzled me too, but I wasn''t sure what''s right here.> I tested the amd part with an amd iommu system and I got output like this: > > XEN) domain1 IOMMU p2m table: > (XEN) gfn: 0000000000000000 mfn: 00000000001b1fb5 > (XEN) gfn: 0000000000000000 mfn: 000000000021f80b > (XEN) gfn: 0000000000000000 mfn: 000000000023f400 > (XEN) gfn: 0000000000000000 mfn: 000000000010a600 > (XEN) gfn: 0000000000000000 mfn: 000000000023f200 > (XEN) gfn: 0000000000000000 mfn: 000000000010a400 > (XEN) gfn: 0000000000000000 mfn: 000000000023f000 > (XEN) gfn: 0000000000000000 mfn: 000000000010ae00 > (XEN) gfn: 0000000000000000 mfn: 000000000023ee00 > (XEN) gfn: 0000000000000001 mfn: 000000000010ac00 > (XEN) gfn: 0000000000000001 mfn: 000000000023ec00 > (XEN) gfn: 0000000000000001 mfn: 000000000010aa00 > (XEN) gfn: 0000000000000001 mfn: 000000000023ea00 > (XEN) gfn: 0000000000000001 mfn: 000000000010a800 > (XEN) gfn: 0000000000000001 mfn: 000000000023e800 > (XEN) gfn: 0000000000000001 mfn: 000000000010be00 > (XEN) gfn: 0000000000000001 mfn: 000000000023e600 > (XEN) gfn: 0000000000000002 mfn: 000000000010bc00 > (XEN) gfn: 0000000000000002 mfn: 000000000023e400 > (XEN) gfn: 0000000000000002 mfn: 000000000010ba00 > (XEN) gfn: 0000000000000002 mfn: 000000000023e200 > (XEN) gfn: 0000000000000002 mfn: 000000000010b800 > (XEN) gfn: 0000000000000002 mfn: 000000000023e000 > (XEN) gfn: 0000000000000002 mfn: 000000000010b600 > (XEN) gfn: 0000000000000002 mfn: 000000000023de00 > (XEN) gfn: 0000000000000003 mfn: 000000000010b400 > (XEN) gfn: 0000000000000003 mfn: 000000000023dc00 > (XEN) gfn: 0000000000000003 mfn: 000000000010b200 > (XEN) gfn: 0000000000000003 mfn: 000000000023da00 > (XEN) gfn: 0000000000000003 mfn: 000000000010b000 > (XEN) gfn: 0000000000000003 mfn: 000000000023d800 > (XEN) gfn: 0000000000000003 mfn: 000000000010fe00 > (XEN) gfn: 0000000000000003 mfn: 000000000023d600 > (XEN) gfn: 0000000000000004 mfn: 000000000010fc00 > (XEN) gfn: 0000000000000004 mfn: 000000000023d400 > (XEN) gfn: 0000000000000004 mfn: 000000000010fa00 > (XEN) gfn: 0000000000000004 mfn: 000000000023d200 > (XEN) gfn: 0000000000000004 mfn: 000000000010f800 > (XEN) gfn: 0000000000000004 mfn: 000000000023d000 > (XEN) gfn: 0000000000000004 mfn: 000000000010f600 > (XEN) gfn: 0000000000000004 mfn: 000000000023ce00 > (XEN) gfn: 0000000000000005 mfn: 000000000010f400 > (XEN) gfn: 0000000000000005 mfn: 000000000023cc00 > (XEN) gfn: 0000000000000005 mfn: 000000000010f200 > (XEN) gfn: 0000000000000005 mfn: 000000000023ca00 > (XEN) gfn: 0000000000000005 mfn: 000000000010f000 > (XEN) gfn: 0000000000000005 mfn: 000000000023c800 > (XEN) gfn: 0000000000000005 mfn: 000000000010ee00 > (XEN) gfn: 0000000000000005 mfn: 000000000023c600 > (XEN) gfn: 0000000000000006 mfn: 000000000010ec00 > (XEN) gfn: 0000000000000006 mfn: 000000000023c400 > (XEN) gfn: 0000000000000006 mfn: 000000000010ea00 > (XEN) gfn: 0000000000000006 mfn: 000000000023c200 > (XEN) gfn: 0000000000000006 mfn: 000000000010e800 > (XEN) gfn: 0000000000000006 mfn: 000000000023c000 > > It looks that the same gfn has been mapped to multiple mfns. Do you want > to output only the gfn to mfn mapping or you also want to output the > address of intermediate page tables? What do "gfn" and "mfn" stands for > here?Indeed, apart from the apparent brokenness, printing the GFN with the intermediate levels makes no sense. Nor does it make sense to print with 16 digits when beyond the 10th a non-zero one will never be observable. I realize that that''s a downside of using PRIpaddr - I was probably giving a bad recommendation here (or at least it wasn''t applicable to all cases), I''m sorry for that; instead, when you convert to [GM]FN, you can safely cast to (and hence print as) unsigned long. Jan
On 08/10/2012 02:52 PM, Jan Beulich wrote:>>>> On 10.08.12 at 12:50, Wei Wang<wei.wang2@amd.com> wrote: >> On 08/09/2012 09:26 AM, Jan Beulich wrote: >> >>> Wei - here I''m particularly worried about the use of "level - 1" >>> instead of "next_level", which would similarly apply to the >>> original function. If the way this is currently done is okay, then >>> why is next_level being computed in the first place? >> >> I think that recalculation is to guarantee that this recursive function >> returns. It should run at most "paging_mode" times no matter what >> "next_level" says. But if we could assume that next level field in every >> pde is correct, then using next level is fine to me. > > Especially in the dumping function we shouldn''t assume too > much. However, wasn''t it that one can skip levels in your > IOMMU implementation? That can''t be handled correctly if > always subtracting 1.We have no skip levels yet. But since it checks (next_level != 0) before calling itself, it should not deallocate pages unexpectedly. But it will also waste some time in the loop. if next_level == 0 but level > 1 (e.g. we have only l4, l2, l1 tables). So, yes, now using next_level with ASSERT also looks better to me.> >>> (And similar >>> to the issue Santosh has already fixed here - the original >>> function pointlessly maps/unmaps the page when "level<= 1". >>> Furthermore, iommu_map.c has nice helper functions >>> iommu_next_level() and amd_iommu_is_pte_present() - why >>> aren''t they in a header instead, so they could be used here, >>> avoiding the open coding of them?) >> >> Maybe those helps appears after the original function. I could sent a >> patch to clean up these: >> * do not map/unmap if level<= 1 >> * move amd_iommu_is_pte_present() and iommu_next_level() to a header >> file. and use them in deallocate_next_page_table. >> * Using next_level instead of recalculation (if requested) > > Yes, please. As to using next_level - it depends, besides the above, > on how bad it is if this is really wrong; an ASSERT() or BUG_ON() > might be on order here.How about ASSERT(next_level < = (level -1) )?> Jan > >
>>> On 10.08.12 at 15:41, Wei Wang <wei.wang2@amd.com> wrote: > On 08/10/2012 02:52 PM, Jan Beulich wrote: >>>>> On 10.08.12 at 12:50, Wei Wang<wei.wang2@amd.com> wrote: >>> On 08/09/2012 09:26 AM, Jan Beulich wrote: >>>> (And similar >>>> to the issue Santosh has already fixed here - the original >>>> function pointlessly maps/unmaps the page when "level<= 1". >>>> Furthermore, iommu_map.c has nice helper functions >>>> iommu_next_level() and amd_iommu_is_pte_present() - why >>>> aren''t they in a header instead, so they could be used here, >>>> avoiding the open coding of them?) >>> >>> Maybe those helps appears after the original function. I could sent a >>> patch to clean up these: >>> * do not map/unmap if level<= 1 >>> * move amd_iommu_is_pte_present() and iommu_next_level() to a header >>> file. and use them in deallocate_next_page_table. >>> * Using next_level instead of recalculation (if requested) >> >> Yes, please. As to using next_level - it depends, besides the above, >> on how bad it is if this is really wrong; an ASSERT() or BUG_ON() >> might be on order here. > > How about ASSERT(next_level < = (level -1) )?But that would again mean levels can be skipped. If they can, then using next_level is the way to go, and the assertion is fine. If they can''t, the assertion should say ==. Jan
Thanks Wei - please see inline.> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > + paddr_t gpa, int *indent) { > + paddr_t address; > + void *table_vaddr, *pde; > + paddr_t next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + if ( level<= 1 ) > + return;So, the l1 page table is not printed, is it by design? [Santosh Jodh] That was a typo - it should be level < 1 I tested the amd part with an amd iommu system and I got output like this: XEN) domain1 IOMMU p2m table: (XEN) gfn: 0000000000000000 mfn: 00000000001b1fb5 (XEN) gfn: 0000000000000000 mfn: 000000000021f80b (XEN) gfn: 0000000000000000 mfn: 000000000023f400 (XEN) gfn: 0000000000000000 mfn: 000000000010a600 (XEN) gfn: 0000000000000000 mfn: 000000000023f200 (XEN) gfn: 0000000000000000 mfn: 000000000010a400 (XEN) gfn: 0000000000000000 mfn: 000000000023f000 (XEN) gfn: 0000000000000000 mfn: 000000000010ae00 (XEN) gfn: 0000000000000000 mfn: 000000000023ee00 (XEN) gfn: 0000000000000001 mfn: 000000000010ac00 (XEN) gfn: 0000000000000001 mfn: 000000000023ec00 (XEN) gfn: 0000000000000001 mfn: 000000000010aa00 (XEN) gfn: 0000000000000001 mfn: 000000000023ea00 (XEN) gfn: 0000000000000001 mfn: 000000000010a800 (XEN) gfn: 0000000000000001 mfn: 000000000023e800 (XEN) gfn: 0000000000000001 mfn: 000000000010be00 (XEN) gfn: 0000000000000001 mfn: 000000000023e600 (XEN) gfn: 0000000000000002 mfn: 000000000010bc00 (XEN) gfn: 0000000000000002 mfn: 000000000023e400 (XEN) gfn: 0000000000000002 mfn: 000000000010ba00 (XEN) gfn: 0000000000000002 mfn: 000000000023e200 (XEN) gfn: 0000000000000002 mfn: 000000000010b800 (XEN) gfn: 0000000000000002 mfn: 000000000023e000 (XEN) gfn: 0000000000000002 mfn: 000000000010b600 (XEN) gfn: 0000000000000002 mfn: 000000000023de00 (XEN) gfn: 0000000000000003 mfn: 000000000010b400 (XEN) gfn: 0000000000000003 mfn: 000000000023dc00 (XEN) gfn: 0000000000000003 mfn: 000000000010b200 (XEN) gfn: 0000000000000003 mfn: 000000000023da00 (XEN) gfn: 0000000000000003 mfn: 000000000010b000 (XEN) gfn: 0000000000000003 mfn: 000000000023d800 (XEN) gfn: 0000000000000003 mfn: 000000000010fe00 (XEN) gfn: 0000000000000003 mfn: 000000000023d600 (XEN) gfn: 0000000000000004 mfn: 000000000010fc00 (XEN) gfn: 0000000000000004 mfn: 000000000023d400 (XEN) gfn: 0000000000000004 mfn: 000000000010fa00 (XEN) gfn: 0000000000000004 mfn: 000000000023d200 (XEN) gfn: 0000000000000004 mfn: 000000000010f800 (XEN) gfn: 0000000000000004 mfn: 000000000023d000 (XEN) gfn: 0000000000000004 mfn: 000000000010f600 (XEN) gfn: 0000000000000004 mfn: 000000000023ce00 (XEN) gfn: 0000000000000005 mfn: 000000000010f400 (XEN) gfn: 0000000000000005 mfn: 000000000023cc00 (XEN) gfn: 0000000000000005 mfn: 000000000010f200 (XEN) gfn: 0000000000000005 mfn: 000000000023ca00 (XEN) gfn: 0000000000000005 mfn: 000000000010f000 (XEN) gfn: 0000000000000005 mfn: 000000000023c800 (XEN) gfn: 0000000000000005 mfn: 000000000010ee00 (XEN) gfn: 0000000000000005 mfn: 000000000023c600 (XEN) gfn: 0000000000000006 mfn: 000000000010ec00 (XEN) gfn: 0000000000000006 mfn: 000000000023c400 (XEN) gfn: 0000000000000006 mfn: 000000000010ea00 (XEN) gfn: 0000000000000006 mfn: 000000000023c200 (XEN) gfn: 0000000000000006 mfn: 000000000010e800 (XEN) gfn: 0000000000000006 mfn: 000000000023c000 It looks that the same gfn has been mapped to multiple mfns. Do you want to output only the gfn to mfn mapping or you also want to output the address of intermediate page tables? What do "gfn" and "mfn" stands for here? [Santosh Jodh] guest frame number and machine frame number. I had added the intermediate PD and PT prints after Jan Beulich suggested it - it''s possible I misunderstood him. I will resend a new patch. Could you please try it as I don''t have access to AMD IOMMU system.
New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Signed-off-by: Santosh Jodh <santosh.jodh@citrix.com> diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Aug 10 08:19:58 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, + paddr_t gpa, int indent) +{ + paddr_t address; + void *table_vaddr, *pde; + paddr_t next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level < 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", + page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + if ( !present ) + continue; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + address = gpa + amd_offset_level_address(index, level); + if ( (next_table_maddr != 0) && (next_level != 0) ) + { + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), level - 1, + address, indent + 1); + } + else + { + int i; + + for ( i = 0; i < indent; i++ ) + printk(" "); + + printk("gfn: %08lx mfn: %08lx\n", + (unsigned long)PFN_DOWN(address), + (unsigned long)PFN_DOWN(next_table_maddr)); + } + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + printk("p2m table has %d levels\n", hd->paging_mode); + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Fri Aug 10 08:19:58 2012 -0700 @@ -18,11 +18,13 @@ #include <asm/hvm/iommu.h> #include <xen/paging.h> #include <xen/guest_access.h> +#include <xen/keyhandler.h> #include <xen/softirq.h> #include <xsm/xsm.h> static void parse_iommu_param(char *s); static int iommu_populate_page_table(struct domain *d); +static void iommu_dump_p2m_table(unsigned char key); /* * The ''iommu'' parameter enables the IOMMU. Optional comma separated @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + register_keyhandler(''o'', &iommu_p2m_table); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +663,34 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + /* * Local variables: * mode: C diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Aug 10 08:19:58 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,71 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, + int indent) +{ + paddr_t address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( pt_maddr == 0 ) + return; + + pt_vaddr = map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + { + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, + address, indent + 1); + } + else + { + int j; + + for ( j = 0; j < indent; j++ ) + printk(" "); + + printk("gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n", + (unsigned long)(address >> PAGE_SHIFT_4K), + (unsigned long)(pte->val >> PAGE_SHIFT_4K), + dma_pte_superpage(*pte)? 1 : 0, + dma_pte_read(*pte)? 1 : 0, + dma_pte_write(*pte)? 1 : 0); + } + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.h Fri Aug 10 08:19:58 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) @@ -277,6 +279,9 @@ struct dma_pte { #define dma_set_pte_addr(p, addr) do {\ (p).val |= ((addr) & PAGE_MASK_4K); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) +#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0) +#define dma_pte_read(p) (((p).val & DMA_PTE_READ) != 0) +#define dma_pte_write(p) (((p).val & DMA_PTE_WRITE) != 0) /* interrupt remap entry */ struct iremap_entry { diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Aug 10 08:19:58 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << ((PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Tue Aug 07 18:37:31 2012 +0100 +++ b/xen/include/xen/iommu.h Fri Aug 10 08:19:58 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>>> On 10.08.12 at 21:14, Santosh Jodh <santosh.jodh@citrix.com> wrote: > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Aug 10 08:19:58 2012 -0700 > @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u > > #include <asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > + paddr_t gpa, int indent) > +{ > + paddr_t address; > + void *table_vaddr, *pde; > + paddr_t next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + if ( level < 1 ) > + return; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > + page_to_maddr(pg)); > + return; > + } > + > + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + if ( !present ) > + continue; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + address = gpa + amd_offset_level_address(index, level); > + if ( (next_table_maddr != 0) && (next_level != 0) )Why do you do this differently than for VT-d here? There you don''t check next_table_maddr (and I see no reason you would need to). Oh, I see, there''s a similar check in a different place there. But this needs to be functionally similar here then. Specifically, ...> + { > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1, > + address, indent + 1); > + } > + else... you''d get into the else''s body if next_table_maddr was zero, which is wrong afaict. So I think flow like if ( next_level ) print else if ( next_table_maddr ) recurse would be the preferable way to go if you feel that these zero checks are necessary (and if you do then, because this being the case is really a bug, this shouldn''t go through silently).> + { > + int i; > + > + for ( i = 0; i < indent; i++ ) > + printk(" ");printk("%*s...", indent, "", ...);> + > + printk("gfn: %08lx mfn: %08lx\n", > + (unsigned long)PFN_DOWN(address), > + (unsigned long)PFN_DOWN(next_table_maddr)); > + } > + } > + > + unmap_domain_page(table_vaddr); > +} >... > --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Aug 10 08:19:58 2012 -0700 > @@ -2365,6 +2366,71 @@ static void vtd_resume(void) > } > } > > +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, > + int indent) > +{ > + paddr_t address; > + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( pt_maddr == 0 ) > + return; > + > + pt_vaddr = map_vtd_domain_page(pt_maddr); > + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i < PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte = &pt_vaddr[i]; > + if ( !dma_pte_present(*pte) ) > + continue; > + > + address = gpa + offset_level_address(i, level); > + if ( next_level >= 1 ) > + { > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > + address, indent + 1); > + } > + else > + { > + int j; > + > + for ( j = 0; j < indent; j++ ) > + printk(" ");See above. Jan> + > + printk("gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n", > + (unsigned long)(address >> PAGE_SHIFT_4K), > + (unsigned long)(pte->val >> PAGE_SHIFT_4K), > + dma_pte_superpage(*pte)? 1 : 0, > + dma_pte_read(*pte)? 1 : 0, > + dma_pte_write(*pte)? 1 : 0); > + } > + } > + > + unmap_vtd_domain_page(pt_vaddr); > +}
Hi Santosh Please some outputs below, gfn still seems incorrect. Thanks Wei (XEN) gfn: 000000f0 mfn: 001023ac (XEN) gfn: 000000f0 mfn: 0023f83d (XEN) gfn: 000000f0 mfn: 001023ab (XEN) gfn: 000000f0 mfn: 0023f83c (XEN) gfn: 000000f0 mfn: 001023aa (XEN) gfn: 000000f0 mfn: 0023f83b (XEN) gfn: 000000f0 mfn: 001023a9 (XEN) gfn: 000000f0 mfn: 0023f83a (XEN) gfn: 000000f0 mfn: 001023a8 (XEN) gfn: 000000f0 mfn: 0023f839 (XEN) gfn: 000000f0 mfn: 001023a7 (XEN) gfn: 000000f0 mfn: 0023f838 (XEN) gfn: 000000f0 mfn: 001023a6 (XEN) gfn: 000000f0 mfn: 0023f837 (XEN) gfn: 000000f0 mfn: 001023a5 (XEN) gfn: 000000f0 mfn: 0023f836 (XEN) gfn: 000000f0 mfn: 001023a4 (XEN) gfn: 000000f0 mfn: 0023f835 (XEN) gfn: 000000f0 mfn: 001023a3 (XEN) gfn: 000000f0 mfn: 0023f834 (XEN) gfn: 000000f0 mfn: 001023a2 (XEN) gfn: 000000f0 mfn: 0023f833 (XEN) gfn: 000000f0 mfn: 001023a1 (XEN) gfn: 000000f0 mfn: 0023f832 (XEN) gfn: 000000f0 mfn: 001023a0 (XEN) gfn: 000000f0 mfn: 0023f831 (XEN) gfn: 000000f0 mfn: 0010239f (XEN) gfn: 000000f0 mfn: 0023f830 (XEN) gfn: 000000f0 mfn: 0010239e (XEN) gfn: 000000f0 mfn: 0023f82f (XEN) gfn: 000000f0 mfn: 0010239d (XEN) gfn: 000000f0 mfn: 0023f82e (XEN) gfn: 000000f0 mfn: 0010239c (XEN) gfn: 000000f0 mfn: 0023f82d (XEN) gfn: 000000f0 mfn: 0010239b (XEN) gfn: 000000f0 mfn: 0023f82c (XEN) gfn: 000000f0 mfn: 0010239a (XEN) gfn: 000000f0 mfn: 0023f82b (XEN) gfn: 000000f0 mfn: 00102399 (XEN) gfn: 000000f0 mfn: 0023f82a (XEN) gfn: 000000f0 mfn: 00102398 (XEN) gfn: 000000f0 mfn: 0023f829 (XEN) gfn: 000000f0 mfn: 00102397 (XEN) gfn: 000000f0 mfn: 0023f828 (XEN) gfn: 000000f0 mfn: 00102396 (XEN) gfn: 000000f0 mfn: 0023f827 (XEN) gfn: 000000f0 mfn: 00102395 (XEN) gfn: 000000f0 mfn: 0023f826 (XEN) gfn: 000000f0 mfn: 00102394 (XEN) gfn: 000000f0 mfn: 0023f825 (XEN) gfn: 000000f0 mfn: 00102393 (XEN) gfn: 000000f0 mfn: 0023f824 (XEN) gfn: 000000f0 mfn: 00102392 (XEN) gfn: 000000f0 mfn: 0023f823 (XEN) gfn: 000000f0 mfn: 00102391 (XEN) gfn: 000000f0 mfn: 0023f822 (XEN) gfn: 000000f0 mfn: 00102390 (XEN) gfn: 000000f0 mfn: 0023f821 (XEN) gfn: 000000f0 mfn: 0010238f (XEN) gfn: 000000f0 mfn: 0023f820 (XEN) gfn: 000000f0 mfn: 0010238e (XEN) gfn: 000000f0 mfn: 0023f81f (XEN) gfn: 000000f0 mfn: 0010238d (XEN) gfn: 000000f0 mfn: 0023f81e (XEN) gfn: 000000f0 mfn: 0010238c (XEN) gfn: 000000f0 mfn: 0023f81d (XEN) gfn: 000000f0 mfn: 0010238b (XEN) gfn: 000000f0 mfn: 0023f81c (XEN) gfn: 000000f0 mfn: 0010238a (XEN) gfn: 000000f0 mfn: 0023f81b (XEN) gfn: 000000f0 mfn: 00102389 (XEN) gfn: 000000f0 mfn: 0023f81a (XEN) gfn: 000000f0 mfn: 00102388 (XEN) gfn: 000000f0 mfn: 0023f819 (XEN) gfn: 000000f0 mfn: 00102387 (XEN) gfn: 000000f0 mfn: 0023f818 (XEN) gfn: 000000f0 mfn: 00102386 (XEN) gfn: 000000f0 mfn: 0023f817 (XEN) gfn: 000000f0 mfn: 00102385 (XEN) gfn: 000000f0 mfn: 0023f816 (XEN) gfn: 000000f0 mfn: 00102384 (XEN) gfn: 000000f0 mfn: 0023f815 (XEN) gfn: 000000f0 mfn: 00102383 (XEN) gfn: 000000f0 mfn: 0023f814 (XEN) gfn: 000000f0 mfn: 00102382 (XEN) gfn: 000000f0 mfn: 0023f813 (XEN) gfn: 000000f0 mfn: 00102381 (XEN) gfn: 000000f0 mfn: 0023f812 (XEN) gfn: 000000f0 mfn: 00102380 (XEN) gfn: 000000f0 mfn: 0023f811 (XEN) gfn: 000000f0 mfn: 0010217f (XEN) gfn: 000000f0 mfn: 0023f810 (XEN) gfn: 000000f0 mfn: 0010217e (XEN) gfn: 000000f0 mfn: 0023f80f (XEN) gfn: 000000f0 mfn: 0010217d (XEN) gfn: 000000f0 mfn: 0023f80e (XEN) gfn: 000000f0 mfn: 0010217c (XEN) gfn: 000000f0 mfn: 0023f80d (XEN) gfn: 000000f0 mfn: 0010217b (XEN) gfn: 000000f0 mfn: 0023f80c (XEN) gfn: 000000f0 mfn: 0010217a (XEN) gfn: 000000f0 mfn: 0023f80b (XEN) gfn: 000000f0 mfn: 00102179 (XEN) gfn: 000000fc mfn: 0023f806 (XEN) gfn: 000000fc mfn: 0023f809 (XEN) gfn: 000000fc mfn: 001025f1 (XEN) gfn: 000000fc mfn: 0023f807 (XEN) gfn: 000000fc mfn: 001025f0 (XEN) gfn: 000000fc mfn: 001025ef (XEN) gfn: 000000fc mfn: 0023f805 (XEN) gfn: 000000fc mfn: 001025ee (XEN) gfn: 000000fc mfn: 0023f804 (XEN) gfn: 000000fc mfn: 001025ed (XEN) gfn: 000000fc mfn: 0023f803 (XEN) gfn: 000000fc mfn: 001025ec (XEN) gfn: 000000fc mfn: 0023f802 (XEN) gfn: 000000fc mfn: 001025eb (XEN) gfn: 000000fc mfn: 0023f801 (XEN) gfn: 000000fc mfn: 001025ea (XEN) gfn: 000000fc mfn: 0023f800 (XEN) gfn: 000000fe mfn: 000812b0 (XEN) gfn: 000000fe mfn: 0010a085 (XEN) gfn: 000000fe mfn: 0021f60c (XEN) gfn: 000000fe mfn: 0010a084 (XEN) gfn: 000000fe mfn: 0021f60b (XEN) gfn: 000000fe mfn: 0010a083 On 08/10/2012 09:14 PM, Santosh Jodh wrote:> New key handler ''o'' to dump the IOMMU p2m table for each domain. > Skips dumping table for domain0. > Intel and AMD specific iommu_ops handler for dumping p2m table. > > Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> > > diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/amd/pci_amd_iommu.c > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Aug 10 08:19:58 2012 -0700 > @@ -22,6 +22,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/paging.h> > +#include<xen/softirq.h> > #include<asm/hvm/iommu.h> > #include<asm/amd-iommu.h> > #include<asm/hvm/svm/amd-iommu-proto.h> > @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u > > #include<asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > + paddr_t gpa, int indent) > +{ > + paddr_t address; > + void *table_vaddr, *pde; > + paddr_t next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + if ( level< 1 ) > + return; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > + page_to_maddr(pg)); > + return; > + } > + > + for ( index = 0; index< PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + if ( !present ) > + continue; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + address = gpa + amd_offset_level_address(index, level); > + if ( (next_table_maddr != 0)&& (next_level != 0) ) > + { > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1, > + address, indent + 1); > + } > + else > + { > + int i; > + > + for ( i = 0; i< indent; i++ ) > + printk(" "); > + > + printk("gfn: %08lx mfn: %08lx\n", > + (unsigned long)PFN_DOWN(address), > + (unsigned long)PFN_DOWN(next_table_maddr)); > + } > + } > + > + unmap_domain_page(table_vaddr); > +} > + > +static void amd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd = domain_hvm_iommu(d); > + > + if ( !hd->root_table ) > + return; > + > + printk("p2m table has %d levels\n", hd->paging_mode); > + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); > +} > + > const struct iommu_ops amd_iommu_ops = { > .init = amd_iommu_domain_init, > .dom0_init = amd_iommu_dom0_init, > @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = { > .resume = amd_iommu_resume, > .share_p2m = amd_iommu_share_p2m, > .crash_shutdown = amd_iommu_suspend, > + .dump_p2m_table = amd_dump_p2m_table, > }; > diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/iommu.c > --- a/xen/drivers/passthrough/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/iommu.c Fri Aug 10 08:19:58 2012 -0700 > @@ -18,11 +18,13 @@ > #include<asm/hvm/iommu.h> > #include<xen/paging.h> > #include<xen/guest_access.h> > +#include<xen/keyhandler.h> > #include<xen/softirq.h> > #include<xsm/xsm.h> > > static void parse_iommu_param(char *s); > static int iommu_populate_page_table(struct domain *d); > +static void iommu_dump_p2m_table(unsigned char key); > > /* > * The ''iommu'' parameter enables the IOMMU. Optional comma separated > @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in > > DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); > > +static struct keyhandler iommu_p2m_table = { > + .diagnostic = 0, > + .u.fn = iommu_dump_p2m_table, > + .desc = "dump iommu p2m table" > +}; > + > static void __init parse_iommu_param(char *s) > { > char *ss; > @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai > if ( !iommu_enabled ) > return; > > + register_keyhandler(''o'',&iommu_p2m_table); > d->need_iommu = !!iommu_dom0_strict; > if ( need_iommu(d) ) > { > @@ -654,6 +663,34 @@ int iommu_do_domctl( > return ret; > } > > +static void iommu_dump_p2m_table(unsigned char key) > +{ > + struct domain *d; > + const struct iommu_ops *ops; > + > + if ( !iommu_enabled ) > + { > + printk("IOMMU not enabled!\n"); > + return; > + } > + > + ops = iommu_get_ops(); > + for_each_domain(d) > + { > + if ( !d->domain_id ) > + continue; > + > + if ( iommu_use_hap_pt(d) ) > + { > + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); > + continue; > + } > + > + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); > + ops->dump_p2m_table(d); > + } > +} > + > /* > * Local variables: > * mode: C > diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.c > --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Aug 10 08:19:58 2012 -0700 > @@ -31,6 +31,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/keyhandler.h> > +#include<xen/softirq.h> > #include<asm/msi.h> > #include<asm/irq.h> > #if defined(__i386__) || defined(__x86_64__) > @@ -2365,6 +2366,71 @@ static void vtd_resume(void) > } > } > > +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, > + int indent) > +{ > + paddr_t address; > + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( pt_maddr == 0 ) > + return; > + > + pt_vaddr = map_vtd_domain_page(pt_maddr); > + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i< PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte =&pt_vaddr[i]; > + if ( !dma_pte_present(*pte) ) > + continue; > + > + address = gpa + offset_level_address(i, level); > + if ( next_level>= 1 ) > + { > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > + address, indent + 1); > + } > + else > + { > + int j; > + > + for ( j = 0; j< indent; j++ ) > + printk(" "); > + > + printk("gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n", > + (unsigned long)(address>> PAGE_SHIFT_4K), > + (unsigned long)(pte->val>> PAGE_SHIFT_4K), > + dma_pte_superpage(*pte)? 1 : 0, > + dma_pte_read(*pte)? 1 : 0, > + dma_pte_write(*pte)? 1 : 0); > + } > + } > + > + unmap_vtd_domain_page(pt_vaddr); > +} > + > +static void vtd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd; > + > + if ( list_empty(&acpi_drhd_units) ) > + return; > + > + hd = domain_hvm_iommu(d); > + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); > + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); > +} > + > const struct iommu_ops intel_iommu_ops = { > .init = intel_iommu_domain_init, > .dom0_init = intel_iommu_dom0_init, > @@ -2387,6 +2453,7 @@ const struct iommu_ops intel_iommu_ops > .crash_shutdown = vtd_crash_shutdown, > .iotlb_flush = intel_iommu_iotlb_flush, > .iotlb_flush_all = intel_iommu_iotlb_flush_all, > + .dump_p2m_table = vtd_dump_p2m_table, > }; > > /* > diff -r 472fc515a463 -r 9c7609a4fbc1 xen/drivers/passthrough/vtd/iommu.h > --- a/xen/drivers/passthrough/vtd/iommu.h Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.h Fri Aug 10 08:19:58 2012 -0700 > @@ -248,6 +248,8 @@ struct context_entry { > #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) > #define address_level_offset(addr, level) \ > ((addr>> level_to_offset_bits(level))& LEVEL_MASK) > +#define offset_level_address(offset, level) \ > + ((u64)(offset)<< level_to_offset_bits(level)) > #define level_mask(l) (((u64)(-1))<< level_to_offset_bits(l)) > #define level_size(l) (1<< level_to_offset_bits(l)) > #define align_to_level(addr, l) ((addr + level_size(l) - 1)& level_mask(l)) > @@ -277,6 +279,9 @@ struct dma_pte { > #define dma_set_pte_addr(p, addr) do {\ > (p).val |= ((addr)& PAGE_MASK_4K); } while (0) > #define dma_pte_present(p) (((p).val& 3) != 0) > +#define dma_pte_superpage(p) (((p).val& (1<<7)) != 0) > +#define dma_pte_read(p) (((p).val& DMA_PTE_READ) != 0) > +#define dma_pte_write(p) (((p).val& DMA_PTE_WRITE) != 0) > > /* interrupt remap entry */ > struct iremap_entry { > diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Aug 10 08:19:58 2012 -0700 > @@ -38,6 +38,10 @@ > #define PTE_PER_TABLE_ALLOC(entries) \ > PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries)>> PTE_PER_TABLE_SHIFT) > > +#define amd_offset_level_address(offset, level) \ > + ((u64)(offset)<< ((PTE_PER_TABLE_SHIFT * \ > + (level - IOMMU_PAGING_MODE_LEVEL_1)))) > + > #define PCI_MIN_CAP_OFFSET 0x40 > #define PCI_MAX_CAP_BLOCKS 48 > #define PCI_CAP_PTR_MASK 0xFC > diff -r 472fc515a463 -r 9c7609a4fbc1 xen/include/xen/iommu.h > --- a/xen/include/xen/iommu.h Tue Aug 07 18:37:31 2012 +0100 > +++ b/xen/include/xen/iommu.h Fri Aug 10 08:19:58 2012 -0700 > @@ -141,6 +141,7 @@ struct iommu_ops { > void (*crash_shutdown)(void); > void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); > void (*iotlb_flush_all)(struct domain *d); > + void (*dump_p2m_table)(struct domain *d); > }; > > void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); >
Why do you do this differently than for VT-d here? There you don''t check next_table_maddr (and I see no reason you would need to). Oh, I see, there''s a similar check in a different place there. But this needs to be functionally similar here then. Specifically, ...> + { > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1, > + address, indent + 1); > + } > + else... you''d get into the else''s body if next_table_maddr was zero, which is wrong afaict. So I think flow like if ( next_level ) print else if ( next_table_maddr ) recurse would be the preferable way to go if you feel that these zero checks are necessary (and if you do then, because this being the case is really a bug, this shouldn''t go through silently). [Santosh Jodh] I was basing my code on existing code in the individual files. I was just being paranoid as this is debug code and I would not want to crash the system. Anyway, I am resending a patch that structures the code in the same way for both Intel and AMD.> + { > + int i; > + > + for ( i = 0; i < indent; i++ ) > + printk(" ");printk("%*s...", indent, "", ...); [Santosh Jodh] Cool - got it.