New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> diff -r dc56a9defa30 -r 5357dccf4ba3 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 14 10:28:14 2012 +0200 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Aug 14 12:54:55 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,72 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, + paddr_t gpa, int indent) +{ + paddr_t address; + void *table_vaddr, *pde; + paddr_t next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level < 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", + page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + if ( !present ) + continue; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + address = gpa + amd_offset_level_address(index, level); + if ( next_level >= 1 ) + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), level - 1, + address, indent + 1); + else + printk("%*s" "gfn: %08lx mfn: %08lx\n", + indent, " ", + (unsigned long)PFN_DOWN(address), + (unsigned long)PFN_DOWN(next_table_maddr)); + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + printk("p2m table has %d levels\n", hd->paging_mode); + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +598,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r dc56a9defa30 -r 5357dccf4ba3 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Tue Aug 14 10:28:14 2012 +0200 +++ b/xen/drivers/passthrough/iommu.c Tue Aug 14 12:54:55 2012 -0700 @@ -19,10 +19,12 @@ #include <xen/paging.h> #include <xen/guest_access.h> #include <xen/softirq.h> +#include <xen/keyhandler.h> #include <xsm/xsm.h> static void parse_iommu_param(char *s); static int iommu_populate_page_table(struct domain *d); +static void iommu_dump_p2m_table(unsigned char key); /* * The ''iommu'' parameter enables the IOMMU. Optional comma separated @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + register_keyhandler(''o'', &iommu_p2m_table); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +663,34 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + /* * Local variables: * mode: C diff -r dc56a9defa30 -r 5357dccf4ba3 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Tue Aug 14 10:28:14 2012 +0200 +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Aug 14 12:54:55 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,63 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, + int indent) +{ + paddr_t address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( level < 1 ) + return; + + pt_vaddr = map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, + address, indent + 1); + else + printk("%*s" "gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n", + indent, " ", + (unsigned long)(address >> PAGE_SHIFT_4K), + (unsigned long)(pte->val >> PAGE_SHIFT_4K), + dma_pte_superpage(*pte)? 1 : 0, + dma_pte_read(*pte)? 1 : 0, + dma_pte_write(*pte)? 1 : 0); + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2445,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r dc56a9defa30 -r 5357dccf4ba3 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Tue Aug 14 10:28:14 2012 +0200 +++ b/xen/drivers/passthrough/vtd/iommu.h Tue Aug 14 12:54:55 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) @@ -277,6 +279,9 @@ struct dma_pte { #define dma_set_pte_addr(p, addr) do {\ (p).val |= ((addr) & PAGE_MASK_4K); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) +#define dma_pte_superpage(p) (((p).val & (1<<7)) != 0) +#define dma_pte_read(p) (((p).val & DMA_PTE_READ) != 0) +#define dma_pte_write(p) (((p).val & DMA_PTE_WRITE) != 0) /* interrupt remap entry */ struct iremap_entry { diff -r dc56a9defa30 -r 5357dccf4ba3 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 14 10:28:14 2012 +0200 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Aug 14 12:54:55 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << (12 + (PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r dc56a9defa30 -r 5357dccf4ba3 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Tue Aug 14 10:28:14 2012 +0200 +++ b/xen/include/xen/iommu.h Tue Aug 14 12:54:55 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>>> On 14.08.12 at 21:55, Santosh Jodh <santosh.jodh@citrix.com> wrote:Sorry to be picky; after this many rounds I would have expected that no further comments would be needed.> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > + paddr_t gpa, int indent) > +{ > + paddr_t address; > + void *table_vaddr, *pde; > + paddr_t next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + if ( level < 1 ) > + return; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > + page_to_maddr(pg)); > + return; > + } > + > + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + if ( !present ) > + continue; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + address = gpa + amd_offset_level_address(index, level); > + if ( next_level >= 1 ) > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), level - 1,Did you see Wei''s cleanup patches to the code you cloned from? You should follow that route (replacing the ASSERT() with printing of the inconsistency and _not_ recursing or doing the normal printing), and using either "level" or "next_level" consistently here.> + address, indent + 1); > + else > + printk("%*s" "gfn: %08lx mfn: %08lx\n", > + indent, " ",printk("%*sgfn: %08lx mfn: %08lx\n", indent, "", I can vaguely see the point in splitting the two strings in the first argument, but the extra space in the third argument is definitely wrong - it''ll make level 1 and level 2 indistinguishable. I also don''t see how you addressed Wei''s reporting of this still not printing correctly. I may be overlooking something, but without you making clear in the description what you changed over the previous version that''s also relatively easy to happen.> +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, > + int indent) > +{ > + paddr_t address; > + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( level < 1 ) > + return; > + > + pt_vaddr = map_vtd_domain_page(pt_maddr); > + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i < PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte = &pt_vaddr[i]; > + if ( !dma_pte_present(*pte) ) > + continue; > + > + address = gpa + offset_level_address(i, level); > + if ( next_level >= 1 ) > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > + address, indent + 1); > + else > + printk("%*s" "gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n", > + indent, " ",Same comment as above.> + (unsigned long)(address >> PAGE_SHIFT_4K), > + (unsigned long)(pte->val >> PAGE_SHIFT_4K), > + dma_pte_superpage(*pte)? 1 : 0, > + dma_pte_read(*pte)? 1 : 0, > + dma_pte_write(*pte)? 1 : 0);Missing spaces. Even worse - given your definitions of these macros there''s no point in using the conditional operators here at all. And, despite your claim in another response, this still isn''t similar to AMD''s variant (which still doesn''t print any of these three attributes). The printing of the superpage status is pretty pointless anyway, given that there''s no single use of dma_set_pte_superpage() throughout the tree - validly so since superpages can be in use currently only when the tables are shared with EPT, in which case you don''t print anything. Plus you''d need to detect the flag _above_ level 1 (at leaf level the bit is ignored and hence just confusing if printed) and print the entry instead of recursing. And if you decide to indeed properly implement this (rather than just dropping superpage support here), _I_ would expect you to properly implement level skipping in the corresponding AMD code too (which similarly isn''t being used currently). Jan
On 08/15/2012 10:54 AM, Jan Beulich wrote:>>>> On 14.08.12 at 21:55, Santosh Jodh<santosh.jodh@citrix.com> wrote: > > Sorry to be picky; after this many rounds I would have > expected that no further comments would be needed. > >> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, >> + paddr_t gpa, int indent) >> +{ >> + paddr_t address; >> + void *table_vaddr, *pde; >> + paddr_t next_table_maddr; >> + int index, next_level, present; >> + u32 *entry; >> + >> + if ( level< 1 ) >> + return; >> + >> + table_vaddr = __map_domain_page(pg); >> + if ( table_vaddr == NULL ) >> + { >> + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", >> + page_to_maddr(pg)); >> + return; >> + } >> + >> + for ( index = 0; index< PTE_PER_TABLE_SIZE; index++ ) >> + { >> + if ( !(index % 2) ) >> + process_pending_softirqs(); >> + >> + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); >> + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); >> + entry = (u32*)pde; >> + >> + present = get_field_from_reg_u32(entry[0], >> + IOMMU_PDE_PRESENT_MASK, >> + IOMMU_PDE_PRESENT_SHIFT); >> + >> + if ( !present ) >> + continue; >> + >> + next_level = get_field_from_reg_u32(entry[0], >> + IOMMU_PDE_NEXT_LEVEL_MASK, >> + IOMMU_PDE_NEXT_LEVEL_SHIFT); >> + >> + address = gpa + amd_offset_level_address(index, level); >> + if ( next_level>= 1 ) >> + amd_dump_p2m_table_level( >> + maddr_to_page(next_table_maddr), level - 1, > > Did you see Wei''s cleanup patches to the code you cloned from? > You should follow that route (replacing the ASSERT() with > printing of the inconsistency and _not_ recursing or doing the > normal printing), and using either "level" or "next_level" > consistently here.Hi, I tested the patch and the output looks much better now,please see attachment. One thing I notice: there is a 1GB mapping in the guest, but the format of it looks like other 2MB mappings: (XEN) gfn: 0003fa00 mfn: 0023b000 (XEN) gfn: 0003fc00 mfn: 00136200 (XEN) gfn: 0003fe00 mfn: 0023ae00 (XEN) gfn: 00040000 mfn: 00040000 << 1GB here (XEN) gfn: 00080000 mfn: 0023ac00 (XEN) gfn: 00080200 mfn: 00136000 (XEN) gfn: 00080400 mfn: 0023aa00 (XEN) gfn: 00080600 mfn: 00135e00 (XEN) gfn: 00080800 mfn: 0023a800 (XEN) gfn: 00080a00 mfn: 00135c00 (XEN) gfn: 00080c00 mfn: 0023a600 (XEN) gfn: 00080e00 mfn: 00135a00 (XEN) gfn: 00081000 mfn: 0023a400 (XEN) gfn: 00081200 mfn: 00135800 (XEN) gfn: 00081400 mfn: 0023a200 (XEN) gfn: 00081600 mfn: 00135600 Thanks, Wei>> + address, indent + 1); >> + else >> + printk("%*s" "gfn: %08lx mfn: %08lx\n", >> + indent, " ", > > printk("%*sgfn: %08lx mfn: %08lx\n", > indent, "", > > I can vaguely see the point in splitting the two strings in the > first argument, but the extra space in the third argument is > definitely wrong - it''ll make level 1 and level 2 indistinguishable. > > I also don''t see how you addressed Wei''s reporting of this still > not printing correctly. I may be overlooking something, but > without you making clear in the description what you changed > over the previous version that''s also relatively easy to happen. > >> +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, >> + int indent) >> +{ >> + paddr_t address; >> + int i; >> + struct dma_pte *pt_vaddr, *pte; >> + int next_level; >> + >> + if ( level< 1 ) >> + return; >> + >> + pt_vaddr = map_vtd_domain_page(pt_maddr); >> + if ( pt_vaddr == NULL ) >> + { >> + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); >> + return; >> + } >> + >> + next_level = level - 1; >> + for ( i = 0; i< PTE_NUM; i++ ) >> + { >> + if ( !(i % 2) ) >> + process_pending_softirqs(); >> + >> + pte =&pt_vaddr[i]; >> + if ( !dma_pte_present(*pte) ) >> + continue; >> + >> + address = gpa + offset_level_address(i, level); >> + if ( next_level>= 1 ) >> + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, >> + address, indent + 1); >> + else >> + printk("%*s" "gfn: %08lx mfn: %08lx super=%d rd=%d wr=%d\n", >> + indent, " ", > > Same comment as above. > >> + (unsigned long)(address>> PAGE_SHIFT_4K), >> + (unsigned long)(pte->val>> PAGE_SHIFT_4K), >> + dma_pte_superpage(*pte)? 1 : 0, >> + dma_pte_read(*pte)? 1 : 0, >> + dma_pte_write(*pte)? 1 : 0); > > Missing spaces. Even worse - given your definitions of these > macros there''s no point in using the conditional operators here > at all. > > And, despite your claim in another response, this still isn''t similar > to AMD''s variant (which still doesn''t print any of these three > attributes). > > The printing of the superpage status is pretty pointless anyway, > given that there''s no single use of dma_set_pte_superpage() > throughout the tree - validly so since superpages can be in use > currently only when the tables are shared with EPT, in which > case you don''t print anything. Plus you''d need to detect the flag > _above_ level 1 (at leaf level the bit is ignored and hence just > confusing if printed) and print the entry instead of recursing. And > if you decide to indeed properly implement this (rather than just > dropping superpage support here), _I_ would expect you to > properly implement level skipping in the corresponding AMD code > too (which similarly isn''t being used currently). > > Jan >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: Wednesday, August 15, 2012 1:54 AM > To: Santosh Jodh > Cc: wei.wang2@amd.com; xiantao.zhang@intel.com; xen-devel; Tim > (Xen.org) > Subject: Re: [Xen-devel] [PATCH] Dump IOMMU p2m table > > >>> On 14.08.12 at 21:55, Santosh Jodh <santosh.jodh@citrix.com> wrote: > > Sorry to be picky; after this many rounds I would have expected that no > further comments would be needed.I started off trying to code to existing structure and style in individual files I was modifying. I also created IOMMU p2m dump - similar to MMU p2m dump. Over the last few days, this has evolved into cleaning up existing AMD code, indenting for more clarity etc. All good points btw.> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int > level, > > + paddr_t gpa, int indent) { > > + paddr_t address; > > + void *table_vaddr, *pde; > > + paddr_t next_table_maddr; > > + int index, next_level, present; > > + u32 *entry; > > + > > + if ( level < 1 ) > > + return; > > + > > + table_vaddr = __map_domain_page(pg); > > + if ( table_vaddr == NULL ) > > + { > > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > > + page_to_maddr(pg)); > > + return; > > + } > > + > > + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) > > + { > > + if ( !(index % 2) ) > > + process_pending_softirqs(); > > + > > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > > + entry = (u32*)pde; > > + > > + present = get_field_from_reg_u32(entry[0], > > + IOMMU_PDE_PRESENT_MASK, > > + IOMMU_PDE_PRESENT_SHIFT); > > + > > + if ( !present ) > > + continue; > > + > > + next_level = get_field_from_reg_u32(entry[0], > > + > IOMMU_PDE_NEXT_LEVEL_MASK, > > + > > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > > + > > + address = gpa + amd_offset_level_address(index, level); > > + if ( next_level >= 1 ) > > + amd_dump_p2m_table_level( > > + maddr_to_page(next_table_maddr), level - 1, > > Did you see Wei''s cleanup patches to the code you cloned from? > You should follow that route (replacing the ASSERT() with printing of > the inconsistency and _not_ recursing or doing the normal printing), > and using either "level" or "next_level" > consistently here.Ok - will do.> > > + address, indent + 1); > > + else > > + printk("%*s" "gfn: %08lx mfn: %08lx\n", > > + indent, " ", > > printk("%*sgfn: %08lx mfn: %08lx\n", > indent, "", > > I can vaguely see the point in splitting the two strings in the first > argument, but the extra space in the third argument is definitely wrong > - it''ll make level 1 and level 2 indistinguishable.I misunderstood how "%*s" works. That probably explains the output Wei saw.> > I also don''t see how you addressed Wei''s reporting of this still not > printing correctly. I may be overlooking something, but without you > making clear in the description what you changed over the previous > version that''s also relatively easy to happen.Will add more history.> > > +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, > paddr_t gpa, > > + int indent) { > > + paddr_t address; > > + int i; > > + struct dma_pte *pt_vaddr, *pte; > > + int next_level; > > + > > + if ( level < 1 ) > > + return; > > + > > + pt_vaddr = map_vtd_domain_page(pt_maddr); > > + if ( pt_vaddr == NULL ) > > + { > > + printk("Failed to map VT-D domain page %"PRIpaddr"\n", > pt_maddr); > > + return; > > + } > > + > > + next_level = level - 1; > > + for ( i = 0; i < PTE_NUM; i++ ) > > + { > > + if ( !(i % 2) ) > > + process_pending_softirqs(); > > + > > + pte = &pt_vaddr[i]; > > + if ( !dma_pte_present(*pte) ) > > + continue; > > + > > + address = gpa + offset_level_address(i, level); > > + if ( next_level >= 1 ) > > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > > + address, indent + 1); > > + else > > + printk("%*s" "gfn: %08lx mfn: %08lx super=%d rd=%d > wr=%d\n", > > + indent, " ", > > Same comment as above.Yep - got it.> > > + (unsigned long)(address >> PAGE_SHIFT_4K), > > + (unsigned long)(pte->val >> PAGE_SHIFT_4K), > > + dma_pte_superpage(*pte)? 1 : 0, > > + dma_pte_read(*pte)? 1 : 0, > > + dma_pte_write(*pte)? 1 : 0); > > Missing spaces. Even worse - given your definitions of these macros > there''s no point in using the conditional operators here at all. > > And, despite your claim in another response, this still isn''t similar > to AMD''s variant (which still doesn''t print any of these three > attributes).I meant structure in terms of recursion logic, level checks etc. I will just remove the extra prints to make it more similar. My original goal was to print it similar to the existing MMU p2m table.
New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Incorporated feedback from Jan Beulich and Wei Wang. Fixed indent printing with %*s. Removed superflous superpage and other attribute prints. Make next_level use consistent for AMD IOMMU dumps. Warn if found inconsistent. Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 16 09:28:24 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, + paddr_t gpa, int indent) +{ + paddr_t address; + void *table_vaddr, *pde; + paddr_t next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level < 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", + page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + if ( !present ) + continue; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + if ( next_level != (level - 1) ) + { + printk("IOMMU p2m table error. next_level = %d, expected %d\n", + next_level, level - 1); + + continue; + } + + address = gpa + amd_offset_level_address(index, level); + if ( next_level >= 1 ) + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), next_level, + address, indent + 1); + else + printk("%*sgfn: %08lx mfn: %08lx\n", + indent, "", + (unsigned long)PFN_DOWN(address), + (unsigned long)PFN_DOWN(next_table_maddr)); + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + printk("p2m table has %d levels\n", hd->paging_mode); + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Thu Aug 16 09:28:24 2012 -0700 @@ -19,10 +19,12 @@ #include <xen/paging.h> #include <xen/guest_access.h> #include <xen/softirq.h> +#include <xen/keyhandler.h> #include <xsm/xsm.h> static void parse_iommu_param(char *s); static int iommu_populate_page_table(struct domain *d); +static void iommu_dump_p2m_table(unsigned char key); /* * The ''iommu'' parameter enables the IOMMU. Optional comma separated @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + register_keyhandler(''o'', &iommu_p2m_table); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +663,34 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + /* * Local variables: * mode: C diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Thu Aug 16 09:28:24 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,60 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, + int indent) +{ + paddr_t address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( level < 1 ) + return; + + pt_vaddr = map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, + address, indent + 1); + else + printk("%*sgfn: %08lx mfn: %08lx\n", + indent, "", + (unsigned long)(address >> PAGE_SHIFT_4K), + (unsigned long)(pte->val >> PAGE_SHIFT_4K)); + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2442,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.h Thu Aug 16 09:28:24 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Thu Aug 16 09:28:24 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << (12 + (PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/include/xen/iommu.h Thu Aug 16 09:28:24 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
>>> On 16.08.12 at 18:27, Santosh Jodh <Santosh.Jodh@citrix.com> wrote: >> From: Jan Beulich [mailto:JBeulich@suse.com] >> >>> On 14.08.12 at 21:55, Santosh Jodh <santosh.jodh@citrix.com> wrote: >> > + (unsigned long)(address >> PAGE_SHIFT_4K), >> > + (unsigned long)(pte->val >> PAGE_SHIFT_4K), >> > + dma_pte_superpage(*pte)? 1 : 0, >> > + dma_pte_read(*pte)? 1 : 0, >> > + dma_pte_write(*pte)? 1 : 0); >> >> Missing spaces. Even worse - given your definitions of these macros >> there''s no point in using the conditional operators here at all. >> >> And, despite your claim in another response, this still isn''t similar >> to AMD''s variant (which still doesn''t print any of these three >> attributes). > > I meant structure in terms of recursion logic, level checks etc. I will just > remove the extra prints to make it more similar. My original goal was to > print it similar to the existing MMU p2m table.Actually, retaining the read/write information here (and adding it to AMD''s) would be quite useful imo. The superpage part, as said earlier, needs to be done differently anyway. Jan
>>> On 16.08.12 at 18:36, Santosh Jodh <santosh.jodh@citrix.com> wrote: > New key handler ''o'' to dump the IOMMU p2m table for each domain. > Skips dumping table for domain0. > Intel and AMD specific iommu_ops handler for dumping p2m table. > > Incorporated feedback from Jan Beulich and Wei Wang. > Fixed indent printing with %*s. > Removed superflous superpage and other attribute prints. > Make next_level use consistent for AMD IOMMU dumps. Warn if found > inconsistent. > > Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com>Looks okay too me now. Wei, Zhang - can we get an ack (or more) from both of you? Thanks, Jan
On 08/16/2012 06:36 PM, Santosh Jodh wrote:> New key handler ''o'' to dump the IOMMU p2m table for each domain. > Skips dumping table for domain0. > Intel and AMD specific iommu_ops handler for dumping p2m table. > > Incorporated feedback from Jan Beulich and Wei Wang. > Fixed indent printing with %*s. > Removed superflous superpage and other attribute prints. > Make next_level use consistent for AMD IOMMU dumps. Warn if found inconsistent. > > Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> > > diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/amd/pci_amd_iommu.c > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 16 09:28:24 2012 -0700 > @@ -22,6 +22,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/paging.h> > +#include<xen/softirq.h> > #include<asm/hvm/iommu.h> > #include<asm/amd-iommu.h> > #include<asm/hvm/svm/amd-iommu-proto.h> > @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u > > #include<asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > + paddr_t gpa, int indent) > +{ > + paddr_t address; > + void *table_vaddr, *pde; > + paddr_t next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + if ( level< 1 ) > + return; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > + page_to_maddr(pg)); > + return; > + } > + > + for ( index = 0; index< PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + if ( !present ) > + continue; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + if ( next_level != (level - 1) ) > + { > + printk("IOMMU p2m table error. next_level = %d, expected %d\n", > + next_level, level - 1); > + > + continue; > + }HI, This check is not proper for 2MB and 1GB pages. For example, if a guest 4 level page tables, for a 2MB entry, the next_level field will be 3 ->(l4)->2(l3)->0(l2), because l2 entries becomes PTEs and PTE entries have next_level = 0. I saw following output for those pages: (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 (XEN) IOMMU p2m table error. next_level = 0, expected 1 Thanks, Wei> + > + address = gpa + amd_offset_level_address(index, level); > + if ( next_level>= 1 ) > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), next_level, > + address, indent + 1); > + else > + printk("%*sgfn: %08lx mfn: %08lx\n", > + indent, "", > + (unsigned long)PFN_DOWN(address), > + (unsigned long)PFN_DOWN(next_table_maddr)); > + } > + > + unmap_domain_page(table_vaddr); > +} > + > +static void amd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd = domain_hvm_iommu(d); > + > + if ( !hd->root_table ) > + return; > + > + printk("p2m table has %d levels\n", hd->paging_mode); > + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); > +} > + > const struct iommu_ops amd_iommu_ops = { > .init = amd_iommu_domain_init, > .dom0_init = amd_iommu_dom0_init, > @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = { > .resume = amd_iommu_resume, > .share_p2m = amd_iommu_share_p2m, > .crash_shutdown = amd_iommu_suspend, > + .dump_p2m_table = amd_dump_p2m_table, > }; > diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/iommu.c > --- a/xen/drivers/passthrough/iommu.c Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/iommu.c Thu Aug 16 09:28:24 2012 -0700 > @@ -19,10 +19,12 @@ > #include<xen/paging.h> > #include<xen/guest_access.h> > #include<xen/softirq.h> > +#include<xen/keyhandler.h> > #include<xsm/xsm.h> > > static void parse_iommu_param(char *s); > static int iommu_populate_page_table(struct domain *d); > +static void iommu_dump_p2m_table(unsigned char key); > > /* > * The ''iommu'' parameter enables the IOMMU. Optional comma separated > @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in > > DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); > > +static struct keyhandler iommu_p2m_table = { > + .diagnostic = 0, > + .u.fn = iommu_dump_p2m_table, > + .desc = "dump iommu p2m table" > +}; > + > static void __init parse_iommu_param(char *s) > { > char *ss; > @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai > if ( !iommu_enabled ) > return; > > + register_keyhandler(''o'',&iommu_p2m_table); > d->need_iommu = !!iommu_dom0_strict; > if ( need_iommu(d) ) > { > @@ -654,6 +663,34 @@ int iommu_do_domctl( > return ret; > } > > +static void iommu_dump_p2m_table(unsigned char key) > +{ > + struct domain *d; > + const struct iommu_ops *ops; > + > + if ( !iommu_enabled ) > + { > + printk("IOMMU not enabled!\n"); > + return; > + } > + > + ops = iommu_get_ops(); > + for_each_domain(d) > + { > + if ( !d->domain_id ) > + continue; > + > + if ( iommu_use_hap_pt(d) ) > + { > + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); > + continue; > + } > + > + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); > + ops->dump_p2m_table(d); > + } > +} > + > /* > * Local variables: > * mode: C > diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/vtd/iommu.c > --- a/xen/drivers/passthrough/vtd/iommu.c Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.c Thu Aug 16 09:28:24 2012 -0700 > @@ -31,6 +31,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/keyhandler.h> > +#include<xen/softirq.h> > #include<asm/msi.h> > #include<asm/irq.h> > #if defined(__i386__) || defined(__x86_64__) > @@ -2365,6 +2366,60 @@ static void vtd_resume(void) > } > } > > +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, > + int indent) > +{ > + paddr_t address; > + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( level< 1 ) > + return; > + > + pt_vaddr = map_vtd_domain_page(pt_maddr); > + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i< PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte =&pt_vaddr[i]; > + if ( !dma_pte_present(*pte) ) > + continue; > + > + address = gpa + offset_level_address(i, level); > + if ( next_level>= 1 ) > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > + address, indent + 1); > + else > + printk("%*sgfn: %08lx mfn: %08lx\n", > + indent, "", > + (unsigned long)(address>> PAGE_SHIFT_4K), > + (unsigned long)(pte->val>> PAGE_SHIFT_4K)); > + } > + > + unmap_vtd_domain_page(pt_vaddr); > +} > + > +static void vtd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd; > + > + if ( list_empty(&acpi_drhd_units) ) > + return; > + > + hd = domain_hvm_iommu(d); > + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); > + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); > +} > + > const struct iommu_ops intel_iommu_ops = { > .init = intel_iommu_domain_init, > .dom0_init = intel_iommu_dom0_init, > @@ -2387,6 +2442,7 @@ const struct iommu_ops intel_iommu_ops > .crash_shutdown = vtd_crash_shutdown, > .iotlb_flush = intel_iommu_iotlb_flush, > .iotlb_flush_all = intel_iommu_iotlb_flush_all, > + .dump_p2m_table = vtd_dump_p2m_table, > }; > > /* > diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/drivers/passthrough/vtd/iommu.h > --- a/xen/drivers/passthrough/vtd/iommu.h Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.h Thu Aug 16 09:28:24 2012 -0700 > @@ -248,6 +248,8 @@ struct context_entry { > #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) > #define address_level_offset(addr, level) \ > ((addr>> level_to_offset_bits(level))& LEVEL_MASK) > +#define offset_level_address(offset, level) \ > + ((u64)(offset)<< level_to_offset_bits(level)) > #define level_mask(l) (((u64)(-1))<< level_to_offset_bits(l)) > #define level_size(l) (1<< level_to_offset_bits(l)) > #define align_to_level(addr, l) ((addr + level_size(l) - 1)& level_mask(l)) > diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Thu Aug 16 09:28:24 2012 -0700 > @@ -38,6 +38,10 @@ > #define PTE_PER_TABLE_ALLOC(entries) \ > PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries)>> PTE_PER_TABLE_SHIFT) > > +#define amd_offset_level_address(offset, level) \ > + ((u64)(offset)<< (12 + (PTE_PER_TABLE_SHIFT * \ > + (level - IOMMU_PAGING_MODE_LEVEL_1)))) > + > #define PCI_MIN_CAP_OFFSET 0x40 > #define PCI_MAX_CAP_BLOCKS 48 > #define PCI_CAP_PTR_MASK 0xFC > diff -r 6d56e31fe1e1 -r 575a53faf4e1 xen/include/xen/iommu.h > --- a/xen/include/xen/iommu.h Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/include/xen/iommu.h Thu Aug 16 09:28:24 2012 -0700 > @@ -141,6 +141,7 @@ struct iommu_ops { > void (*crash_shutdown)(void); > void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); > void (*iotlb_flush_all)(struct domain *d); > + void (*dump_p2m_table)(struct domain *d); > }; > > void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); >
> -----Original Message----- > From: Wei Wang [mailto:wei.wang2@amd.com] > Sent: Friday, August 17, 2012 4:15 AM > To: Santosh Jodh > Cc: xen-devel@lists.xensource.com; xiantao.zhang@intel.com; Tim > (Xen.org); JBeulich@suse.com > Subject: Re: [PATCH] Dump IOMMU p2m table > > On 08/16/2012 06:36 PM, Santosh Jodh wrote: > > New key handler ''o'' to dump the IOMMU p2m table for each domain. > > Skips dumping table for domain0. > > Intel and AMD specific iommu_ops handler for dumping p2m table. > > > > Incorporated feedback from Jan Beulich and Wei Wang. > > Fixed indent printing with %*s. > > Removed superflous superpage and other attribute prints. > > Make next_level use consistent for AMD IOMMU dumps. Warn if found > inconsistent. > > > > Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> > > > > diff -r 6d56e31fe1e1 -r 575a53faf4e1 > xen/drivers/passthrough/amd/pci_amd_iommu.c > > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 15 > 09:41:21 2012 +0100 > > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 16 > 09:28:24 2012 -0700 > > @@ -22,6 +22,7 @@ > > #include<xen/pci.h> > > #include<xen/pci_regs.h> > > #include<xen/paging.h> > > +#include<xen/softirq.h> > > #include<asm/hvm/iommu.h> > > #include<asm/amd-iommu.h> > > #include<asm/hvm/svm/amd-iommu-proto.h> > > @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u > > > > #include<asm/io_apic.h> > > > > +static void amd_dump_p2m_table_level(struct page_info* pg, int > level, > > + paddr_t gpa, int indent) { > > + paddr_t address; > > + void *table_vaddr, *pde; > > + paddr_t next_table_maddr; > > + int index, next_level, present; > > + u32 *entry; > > + > > + if ( level< 1 ) > > + return; > > + > > + table_vaddr = __map_domain_page(pg); > > + if ( table_vaddr == NULL ) > > + { > > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > > + page_to_maddr(pg)); > > + return; > > + } > > + > > + for ( index = 0; index< PTE_PER_TABLE_SIZE; index++ ) > > + { > > + if ( !(index % 2) ) > > + process_pending_softirqs(); > > + > > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > > + entry = (u32*)pde; > > + > > + present = get_field_from_reg_u32(entry[0], > > + IOMMU_PDE_PRESENT_MASK, > > + IOMMU_PDE_PRESENT_SHIFT); > > + > > + if ( !present ) > > + continue; > > + > > + next_level = get_field_from_reg_u32(entry[0], > > + > IOMMU_PDE_NEXT_LEVEL_MASK, > > + > > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > > + > > + if ( next_level != (level - 1) ) > > + { > > + printk("IOMMU p2m table error. next_level = %d, expected > %d\n", > > + next_level, level - 1); > > + > > + continue; > > + } > > HI, > > This check is not proper for 2MB and 1GB pages. For example, if a guest > 4 level page tables, for a 2MB entry, the next_level field will be 3 > ->(l4)->2(l3)->0(l2), because l2 entries becomes PTEs and PTE entries > have next_level = 0. I saw following output for those pages: > > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > (XEN) IOMMU p2m table error. next_level = 0, expected 1 > > Thanks, > WeiHow about changing the check to: if ( next_level && (next_level != (level - 1)) ) Thanks, Santosh
On 08/17/2012 04:31 PM, Santosh Jodh wrote:> > >> -----Original Message----- >> From: Wei Wang [mailto:wei.wang2@amd.com] >> Sent: Friday, August 17, 2012 4:15 AM >> To: Santosh Jodh >> Cc: xen-devel@lists.xensource.com; xiantao.zhang@intel.com; Tim >> (Xen.org); JBeulich@suse.com >> Subject: Re: [PATCH] Dump IOMMU p2m table >> >> On 08/16/2012 06:36 PM, Santosh Jodh wrote: >>> New key handler ''o'' to dump the IOMMU p2m table for each domain. >>> Skips dumping table for domain0. >>> Intel and AMD specific iommu_ops handler for dumping p2m table. >>> >>> Incorporated feedback from Jan Beulich and Wei Wang. >>> Fixed indent printing with %*s. >>> Removed superflous superpage and other attribute prints. >>> Make next_level use consistent for AMD IOMMU dumps. Warn if found >> inconsistent. >>> >>> Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> >>> >>> diff -r 6d56e31fe1e1 -r 575a53faf4e1 >> xen/drivers/passthrough/amd/pci_amd_iommu.c >>> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 15 >> 09:41:21 2012 +0100 >>> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Aug 16 >> 09:28:24 2012 -0700 >>> @@ -22,6 +22,7 @@ >>> #include<xen/pci.h> >>> #include<xen/pci_regs.h> >>> #include<xen/paging.h> >>> +#include<xen/softirq.h> >>> #include<asm/hvm/iommu.h> >>> #include<asm/amd-iommu.h> >>> #include<asm/hvm/svm/amd-iommu-proto.h> >>> @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u >>> >>> #include<asm/io_apic.h> >>> >>> +static void amd_dump_p2m_table_level(struct page_info* pg, int >> level, >>> + paddr_t gpa, int indent) { >>> + paddr_t address; >>> + void *table_vaddr, *pde; >>> + paddr_t next_table_maddr; >>> + int index, next_level, present; >>> + u32 *entry; >>> + >>> + if ( level< 1 ) >>> + return; >>> + >>> + table_vaddr = __map_domain_page(pg); >>> + if ( table_vaddr == NULL ) >>> + { >>> + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", >>> + page_to_maddr(pg)); >>> + return; >>> + } >>> + >>> + for ( index = 0; index< PTE_PER_TABLE_SIZE; index++ ) >>> + { >>> + if ( !(index % 2) ) >>> + process_pending_softirqs(); >>> + >>> + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); >>> + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); >>> + entry = (u32*)pde; >>> + >>> + present = get_field_from_reg_u32(entry[0], >>> + IOMMU_PDE_PRESENT_MASK, >>> + IOMMU_PDE_PRESENT_SHIFT); >>> + >>> + if ( !present ) >>> + continue; >>> + >>> + next_level = get_field_from_reg_u32(entry[0], >>> + >> IOMMU_PDE_NEXT_LEVEL_MASK, >>> + >>> + IOMMU_PDE_NEXT_LEVEL_SHIFT); >>> + >>> + if ( next_level != (level - 1) ) >>> + { >>> + printk("IOMMU p2m table error. next_level = %d, expected >> %d\n", >>> + next_level, level - 1); >>> + >>> + continue; >>> + } >> >> HI, >> >> This check is not proper for 2MB and 1GB pages. For example, if a guest >> 4 level page tables, for a 2MB entry, the next_level field will be 3 >> ->(l4)->2(l3)->0(l2), because l2 entries becomes PTEs and PTE entries >> have next_level = 0. I saw following output for those pages: >> >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> (XEN) IOMMU p2m table error. next_level = 0, expected 1 >> >> Thanks, >> Wei > > How about changing the check to: > if ( next_level&& (next_level != (level - 1)) )That should be good, since we don''t have skip levels. Thanks, Wei> > Thanks, > Santosh >
New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Incorporated feedback from Jan Beulich and Wei Wang. Fixed indent printing with %*s. Removed superflous superpage and other attribute prints. Make next_level use consistent for AMD IOMMU dumps. Warn if found inconsistent. AMD IOMMU does skip levels. Handle 2mb and 1gb IOMMU page size for AMD. Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> diff -r 6d56e31fe1e1 -r e26e2d1e5642 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Aug 17 07:53:38 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, + paddr_t gpa, int indent) +{ + paddr_t address; + void *table_vaddr, *pde; + paddr_t next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level < 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", + page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + if ( !present ) + continue; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + if ( next_level && (next_level != (level - 1)) ) + { + printk("IOMMU p2m table error. next_level = %d, expected %d\n", + next_level, level - 1); + + continue; + } + + address = gpa + amd_offset_level_address(index, level); + if ( next_level >= 1 ) + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), next_level, + address, indent + 1); + else + printk("%*sgfn: %08lx mfn: %08lx\n", + indent, "", + (unsigned long)PFN_DOWN(address), + (unsigned long)PFN_DOWN(next_table_maddr)); + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + printk("p2m table has %d levels\n", hd->paging_mode); + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 6d56e31fe1e1 -r e26e2d1e5642 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Fri Aug 17 07:53:38 2012 -0700 @@ -19,10 +19,12 @@ #include <xen/paging.h> #include <xen/guest_access.h> #include <xen/softirq.h> +#include <xen/keyhandler.h> #include <xsm/xsm.h> static void parse_iommu_param(char *s); static int iommu_populate_page_table(struct domain *d); +static void iommu_dump_p2m_table(unsigned char key); /* * The ''iommu'' parameter enables the IOMMU. Optional comma separated @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + register_keyhandler(''o'', &iommu_p2m_table); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +663,34 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + /* * Local variables: * mode: C diff -r 6d56e31fe1e1 -r e26e2d1e5642 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Aug 17 07:53:38 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,60 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, + int indent) +{ + paddr_t address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( level < 1 ) + return; + + pt_vaddr = map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, + address, indent + 1); + else + printk("%*sgfn: %08lx mfn: %08lx\n", + indent, "", + (unsigned long)(address >> PAGE_SHIFT_4K), + (unsigned long)(pte->val >> PAGE_SHIFT_4K)); + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2442,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 6d56e31fe1e1 -r e26e2d1e5642 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.h Fri Aug 17 07:53:38 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) diff -r 6d56e31fe1e1 -r e26e2d1e5642 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Aug 17 07:53:38 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << (12 + (PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 6d56e31fe1e1 -r e26e2d1e5642 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/include/xen/iommu.h Fri Aug 17 07:53:38 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
New key handler ''o'' to dump the IOMMU p2m table for each domain. Skips dumping table for domain0. Intel and AMD specific iommu_ops handler for dumping p2m table. Incorporated feedback from Jan Beulich and Wei Wang. Fixed indent printing with %*s. Removed superflous superpage and other attribute prints. Make next_level use consistent for AMD IOMMU dumps. Warn if found inconsistent. AMD IOMMU does not skip levels. Handle 2mb and 1gb IOMMU page size for AMD. Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Aug 17 07:56:55 2012 -0700 @@ -22,6 +22,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/paging.h> +#include <xen/softirq.h> #include <asm/hvm/iommu.h> #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u #include <asm/io_apic.h> +static void amd_dump_p2m_table_level(struct page_info* pg, int level, + paddr_t gpa, int indent) +{ + paddr_t address; + void *table_vaddr, *pde; + paddr_t next_table_maddr; + int index, next_level, present; + u32 *entry; + + if ( level < 1 ) + return; + + table_vaddr = __map_domain_page(pg); + if ( table_vaddr == NULL ) + { + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", + page_to_maddr(pg)); + return; + } + + for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) + { + if ( !(index % 2) ) + process_pending_softirqs(); + + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); + entry = (u32*)pde; + + present = get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); + + if ( !present ) + continue; + + next_level = get_field_from_reg_u32(entry[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT); + + if ( next_level && (next_level != (level - 1)) ) + { + printk("IOMMU p2m table error. next_level = %d, expected %d\n", + next_level, level - 1); + + continue; + } + + address = gpa + amd_offset_level_address(index, level); + if ( next_level >= 1 ) + amd_dump_p2m_table_level( + maddr_to_page(next_table_maddr), next_level, + address, indent + 1); + else + printk("%*sgfn: %08lx mfn: %08lx\n", + indent, "", + (unsigned long)PFN_DOWN(address), + (unsigned long)PFN_DOWN(next_table_maddr)); + } + + unmap_domain_page(table_vaddr); +} + +static void amd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !hd->root_table ) + return; + + printk("p2m table has %d levels\n", hd->paging_mode); + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); +} + const struct iommu_ops amd_iommu_ops = { .init = amd_iommu_domain_init, .dom0_init = amd_iommu_dom0_init, @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = { .resume = amd_iommu_resume, .share_p2m = amd_iommu_share_p2m, .crash_shutdown = amd_iommu_suspend, + .dump_p2m_table = amd_dump_p2m_table, }; diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Fri Aug 17 07:56:55 2012 -0700 @@ -19,10 +19,12 @@ #include <xen/paging.h> #include <xen/guest_access.h> #include <xen/softirq.h> +#include <xen/keyhandler.h> #include <xsm/xsm.h> static void parse_iommu_param(char *s); static int iommu_populate_page_table(struct domain *d); +static void iommu_dump_p2m_table(unsigned char key); /* * The ''iommu'' parameter enables the IOMMU. Optional comma separated @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); +static struct keyhandler iommu_p2m_table = { + .diagnostic = 0, + .u.fn = iommu_dump_p2m_table, + .desc = "dump iommu p2m table" +}; + static void __init parse_iommu_param(char *s) { char *ss; @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai if ( !iommu_enabled ) return; + register_keyhandler(''o'', &iommu_p2m_table); d->need_iommu = !!iommu_dom0_strict; if ( need_iommu(d) ) { @@ -654,6 +663,34 @@ int iommu_do_domctl( return ret; } +static void iommu_dump_p2m_table(unsigned char key) +{ + struct domain *d; + const struct iommu_ops *ops; + + if ( !iommu_enabled ) + { + printk("IOMMU not enabled!\n"); + return; + } + + ops = iommu_get_ops(); + for_each_domain(d) + { + if ( !d->domain_id ) + continue; + + if ( iommu_use_hap_pt(d) ) + { + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); + continue; + } + + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); + ops->dump_p2m_table(d); + } +} + /* * Local variables: * mode: C diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/vtd/iommu.c --- a/xen/drivers/passthrough/vtd/iommu.c Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Aug 17 07:56:55 2012 -0700 @@ -31,6 +31,7 @@ #include <xen/pci.h> #include <xen/pci_regs.h> #include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/msi.h> #include <asm/irq.h> #if defined(__i386__) || defined(__x86_64__) @@ -2365,6 +2366,60 @@ static void vtd_resume(void) } } +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, + int indent) +{ + paddr_t address; + int i; + struct dma_pte *pt_vaddr, *pte; + int next_level; + + if ( level < 1 ) + return; + + pt_vaddr = map_vtd_domain_page(pt_maddr); + if ( pt_vaddr == NULL ) + { + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); + return; + } + + next_level = level - 1; + for ( i = 0; i < PTE_NUM; i++ ) + { + if ( !(i % 2) ) + process_pending_softirqs(); + + pte = &pt_vaddr[i]; + if ( !dma_pte_present(*pte) ) + continue; + + address = gpa + offset_level_address(i, level); + if ( next_level >= 1 ) + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, + address, indent + 1); + else + printk("%*sgfn: %08lx mfn: %08lx\n", + indent, "", + (unsigned long)(address >> PAGE_SHIFT_4K), + (unsigned long)(pte->val >> PAGE_SHIFT_4K)); + } + + unmap_vtd_domain_page(pt_vaddr); +} + +static void vtd_dump_p2m_table(struct domain *d) +{ + struct hvm_iommu *hd; + + if ( list_empty(&acpi_drhd_units) ) + return; + + hd = domain_hvm_iommu(d); + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); +} + const struct iommu_ops intel_iommu_ops = { .init = intel_iommu_domain_init, .dom0_init = intel_iommu_dom0_init, @@ -2387,6 +2442,7 @@ const struct iommu_ops intel_iommu_ops .crash_shutdown = vtd_crash_shutdown, .iotlb_flush = intel_iommu_iotlb_flush, .iotlb_flush_all = intel_iommu_iotlb_flush_all, + .dump_p2m_table = vtd_dump_p2m_table, }; /* diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/vtd/iommu.h --- a/xen/drivers/passthrough/vtd/iommu.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/drivers/passthrough/vtd/iommu.h Fri Aug 17 07:56:55 2012 -0700 @@ -248,6 +248,8 @@ struct context_entry { #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) #define address_level_offset(addr, level) \ ((addr >> level_to_offset_bits(level)) & LEVEL_MASK) +#define offset_level_address(offset, level) \ + ((u64)(offset) << level_to_offset_bits(level)) #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l)) #define level_size(l) (1 << level_to_offset_bits(l)) #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l)) diff -r 6d56e31fe1e1 -r 995803806158 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Aug 17 07:56:55 2012 -0700 @@ -38,6 +38,10 @@ #define PTE_PER_TABLE_ALLOC(entries) \ PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT) +#define amd_offset_level_address(offset, level) \ + ((u64)(offset) << (12 + (PTE_PER_TABLE_SHIFT * \ + (level - IOMMU_PAGING_MODE_LEVEL_1)))) + #define PCI_MIN_CAP_OFFSET 0x40 #define PCI_MAX_CAP_BLOCKS 48 #define PCI_CAP_PTR_MASK 0xFC diff -r 6d56e31fe1e1 -r 995803806158 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Wed Aug 15 09:41:21 2012 +0100 +++ b/xen/include/xen/iommu.h Fri Aug 17 07:56:55 2012 -0700 @@ -141,6 +141,7 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); + void (*dump_p2m_table)(struct domain *d); }; void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
Tested and Acked. Thanks, Wei On 08/17/2012 04:57 PM, Santosh Jodh wrote:> New key handler ''o'' to dump the IOMMU p2m table for each domain. > Skips dumping table for domain0. > Intel and AMD specific iommu_ops handler for dumping p2m table. > > Incorporated feedback from Jan Beulich and Wei Wang. > Fixed indent printing with %*s. > Removed superflous superpage and other attribute prints. > Make next_level use consistent for AMD IOMMU dumps. Warn if found inconsistent. > AMD IOMMU does not skip levels. Handle 2mb and 1gb IOMMU page size for AMD. > > Signed-off-by: Santosh Jodh<santosh.jodh@citrix.com> > > diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/amd/pci_amd_iommu.c > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Aug 17 07:56:55 2012 -0700 > @@ -22,6 +22,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/paging.h> > +#include<xen/softirq.h> > #include<asm/hvm/iommu.h> > #include<asm/amd-iommu.h> > #include<asm/hvm/svm/amd-iommu-proto.h> > @@ -512,6 +513,80 @@ static int amd_iommu_group_id(u16 seg, u > > #include<asm/io_apic.h> > > +static void amd_dump_p2m_table_level(struct page_info* pg, int level, > + paddr_t gpa, int indent) > +{ > + paddr_t address; > + void *table_vaddr, *pde; > + paddr_t next_table_maddr; > + int index, next_level, present; > + u32 *entry; > + > + if ( level< 1 ) > + return; > + > + table_vaddr = __map_domain_page(pg); > + if ( table_vaddr == NULL ) > + { > + printk("Failed to map IOMMU domain page %"PRIpaddr"\n", > + page_to_maddr(pg)); > + return; > + } > + > + for ( index = 0; index< PTE_PER_TABLE_SIZE; index++ ) > + { > + if ( !(index % 2) ) > + process_pending_softirqs(); > + > + pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); > + next_table_maddr = amd_iommu_get_next_table_from_pte(pde); > + entry = (u32*)pde; > + > + present = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_PRESENT_MASK, > + IOMMU_PDE_PRESENT_SHIFT); > + > + if ( !present ) > + continue; > + > + next_level = get_field_from_reg_u32(entry[0], > + IOMMU_PDE_NEXT_LEVEL_MASK, > + IOMMU_PDE_NEXT_LEVEL_SHIFT); > + > + if ( next_level&& (next_level != (level - 1)) ) > + { > + printk("IOMMU p2m table error. next_level = %d, expected %d\n", > + next_level, level - 1); > + > + continue; > + } > + > + address = gpa + amd_offset_level_address(index, level); > + if ( next_level>= 1 ) > + amd_dump_p2m_table_level( > + maddr_to_page(next_table_maddr), next_level, > + address, indent + 1); > + else > + printk("%*sgfn: %08lx mfn: %08lx\n", > + indent, "", > + (unsigned long)PFN_DOWN(address), > + (unsigned long)PFN_DOWN(next_table_maddr)); > + } > + > + unmap_domain_page(table_vaddr); > +} > + > +static void amd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd = domain_hvm_iommu(d); > + > + if ( !hd->root_table ) > + return; > + > + printk("p2m table has %d levels\n", hd->paging_mode); > + amd_dump_p2m_table_level(hd->root_table, hd->paging_mode, 0, 0); > +} > + > const struct iommu_ops amd_iommu_ops = { > .init = amd_iommu_domain_init, > .dom0_init = amd_iommu_dom0_init, > @@ -531,4 +606,5 @@ const struct iommu_ops amd_iommu_ops = { > .resume = amd_iommu_resume, > .share_p2m = amd_iommu_share_p2m, > .crash_shutdown = amd_iommu_suspend, > + .dump_p2m_table = amd_dump_p2m_table, > }; > diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/iommu.c > --- a/xen/drivers/passthrough/iommu.c Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/iommu.c Fri Aug 17 07:56:55 2012 -0700 > @@ -19,10 +19,12 @@ > #include<xen/paging.h> > #include<xen/guest_access.h> > #include<xen/softirq.h> > +#include<xen/keyhandler.h> > #include<xsm/xsm.h> > > static void parse_iommu_param(char *s); > static int iommu_populate_page_table(struct domain *d); > +static void iommu_dump_p2m_table(unsigned char key); > > /* > * The ''iommu'' parameter enables the IOMMU. Optional comma separated > @@ -54,6 +56,12 @@ bool_t __read_mostly amd_iommu_perdev_in > > DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); > > +static struct keyhandler iommu_p2m_table = { > + .diagnostic = 0, > + .u.fn = iommu_dump_p2m_table, > + .desc = "dump iommu p2m table" > +}; > + > static void __init parse_iommu_param(char *s) > { > char *ss; > @@ -119,6 +127,7 @@ void __init iommu_dom0_init(struct domai > if ( !iommu_enabled ) > return; > > + register_keyhandler(''o'',&iommu_p2m_table); > d->need_iommu = !!iommu_dom0_strict; > if ( need_iommu(d) ) > { > @@ -654,6 +663,34 @@ int iommu_do_domctl( > return ret; > } > > +static void iommu_dump_p2m_table(unsigned char key) > +{ > + struct domain *d; > + const struct iommu_ops *ops; > + > + if ( !iommu_enabled ) > + { > + printk("IOMMU not enabled!\n"); > + return; > + } > + > + ops = iommu_get_ops(); > + for_each_domain(d) > + { > + if ( !d->domain_id ) > + continue; > + > + if ( iommu_use_hap_pt(d) ) > + { > + printk("\ndomain%d IOMMU p2m table shared with MMU: \n", d->domain_id); > + continue; > + } > + > + printk("\ndomain%d IOMMU p2m table: \n", d->domain_id); > + ops->dump_p2m_table(d); > + } > +} > + > /* > * Local variables: > * mode: C > diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/vtd/iommu.c > --- a/xen/drivers/passthrough/vtd/iommu.c Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Aug 17 07:56:55 2012 -0700 > @@ -31,6 +31,7 @@ > #include<xen/pci.h> > #include<xen/pci_regs.h> > #include<xen/keyhandler.h> > +#include<xen/softirq.h> > #include<asm/msi.h> > #include<asm/irq.h> > #if defined(__i386__) || defined(__x86_64__) > @@ -2365,6 +2366,60 @@ static void vtd_resume(void) > } > } > > +static void vtd_dump_p2m_table_level(paddr_t pt_maddr, int level, paddr_t gpa, > + int indent) > +{ > + paddr_t address; > + int i; > + struct dma_pte *pt_vaddr, *pte; > + int next_level; > + > + if ( level< 1 ) > + return; > + > + pt_vaddr = map_vtd_domain_page(pt_maddr); > + if ( pt_vaddr == NULL ) > + { > + printk("Failed to map VT-D domain page %"PRIpaddr"\n", pt_maddr); > + return; > + } > + > + next_level = level - 1; > + for ( i = 0; i< PTE_NUM; i++ ) > + { > + if ( !(i % 2) ) > + process_pending_softirqs(); > + > + pte =&pt_vaddr[i]; > + if ( !dma_pte_present(*pte) ) > + continue; > + > + address = gpa + offset_level_address(i, level); > + if ( next_level>= 1 ) > + vtd_dump_p2m_table_level(dma_pte_addr(*pte), next_level, > + address, indent + 1); > + else > + printk("%*sgfn: %08lx mfn: %08lx\n", > + indent, "", > + (unsigned long)(address>> PAGE_SHIFT_4K), > + (unsigned long)(pte->val>> PAGE_SHIFT_4K)); > + } > + > + unmap_vtd_domain_page(pt_vaddr); > +} > + > +static void vtd_dump_p2m_table(struct domain *d) > +{ > + struct hvm_iommu *hd; > + > + if ( list_empty(&acpi_drhd_units) ) > + return; > + > + hd = domain_hvm_iommu(d); > + printk("p2m table has %d levels\n", agaw_to_level(hd->agaw)); > + vtd_dump_p2m_table_level(hd->pgd_maddr, agaw_to_level(hd->agaw), 0, 0); > +} > + > const struct iommu_ops intel_iommu_ops = { > .init = intel_iommu_domain_init, > .dom0_init = intel_iommu_dom0_init, > @@ -2387,6 +2442,7 @@ const struct iommu_ops intel_iommu_ops > .crash_shutdown = vtd_crash_shutdown, > .iotlb_flush = intel_iommu_iotlb_flush, > .iotlb_flush_all = intel_iommu_iotlb_flush_all, > + .dump_p2m_table = vtd_dump_p2m_table, > }; > > /* > diff -r 6d56e31fe1e1 -r 995803806158 xen/drivers/passthrough/vtd/iommu.h > --- a/xen/drivers/passthrough/vtd/iommu.h Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/drivers/passthrough/vtd/iommu.h Fri Aug 17 07:56:55 2012 -0700 > @@ -248,6 +248,8 @@ struct context_entry { > #define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE) > #define address_level_offset(addr, level) \ > ((addr>> level_to_offset_bits(level))& LEVEL_MASK) > +#define offset_level_address(offset, level) \ > + ((u64)(offset)<< level_to_offset_bits(level)) > #define level_mask(l) (((u64)(-1))<< level_to_offset_bits(l)) > #define level_size(l) (1<< level_to_offset_bits(l)) > #define align_to_level(addr, l) ((addr + level_size(l) - 1)& level_mask(l)) > diff -r 6d56e31fe1e1 -r 995803806158 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Aug 17 07:56:55 2012 -0700 > @@ -38,6 +38,10 @@ > #define PTE_PER_TABLE_ALLOC(entries) \ > PAGE_SIZE * (PTE_PER_TABLE_ALIGN(entries)>> PTE_PER_TABLE_SHIFT) > > +#define amd_offset_level_address(offset, level) \ > + ((u64)(offset)<< (12 + (PTE_PER_TABLE_SHIFT * \ > + (level - IOMMU_PAGING_MODE_LEVEL_1)))) > + > #define PCI_MIN_CAP_OFFSET 0x40 > #define PCI_MAX_CAP_BLOCKS 48 > #define PCI_CAP_PTR_MASK 0xFC > diff -r 6d56e31fe1e1 -r 995803806158 xen/include/xen/iommu.h > --- a/xen/include/xen/iommu.h Wed Aug 15 09:41:21 2012 +0100 > +++ b/xen/include/xen/iommu.h Fri Aug 17 07:56:55 2012 -0700 > @@ -141,6 +141,7 @@ struct iommu_ops { > void (*crash_shutdown)(void); > void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); > void (*iotlb_flush_all)(struct domain *d); > + void (*dump_p2m_table)(struct domain *d); > }; > > void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value); >