Jan Beulich
2012-Jun-22 08:18 UTC
[PATCH] AMD IOMMU: add mechanism to protect their PCI devices'' config spaces
Recent Dom0 kernels want to disable PCI MSI on all devices, yet doing
so on AMD IOMMUs (which get represented by a PCI device) disables part
of the functionality set up by the hypervisor.
Add a mechanism to mark certain PCI devices as having write protected
config spaces (both through port based [method 1] accesses and, for
x86-64, mmconfig), and use that for AMD''s IOMMUs.
Note that due to ptwr_do_page_fault() being run first, there''ll be a
MEM_LOG() issued for each such mmconfig based write attempt. If that''s
undesirable, the order of the calls in fixup_page_fault() would need
to be swapped.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Wei Wang <wei.wang2@amd.com>
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5209,6 +5209,97 @@ int ptwr_do_page_fault(struct vcpu *v, u
return 0;
}
+#ifdef __x86_64__
+/*************************
+ * fault handling for read-only MMIO pages
+ */
+
+struct mmio_ro_emulate_ctxt {
+ struct x86_emulate_ctxt ctxt;
+ unsigned long cr2;
+};
+
+static int mmio_ro_emulated_read(
+ enum x86_segment seg,
+ unsigned long offset,
+ void *p_data,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
+{
+ return X86EMUL_UNHANDLEABLE;
+}
+
+static int mmio_ro_emulated_write(
+ enum x86_segment seg,
+ unsigned long offset,
+ void *p_data,
+ unsigned int bytes,
+ struct x86_emulate_ctxt *ctxt)
+{
+ struct mmio_ro_emulate_ctxt *mmio_ro_ctxt + container_of(ctxt,
struct mmio_ro_emulate_ctxt, ctxt);
+
+ /* Only allow naturally-aligned stores at the original %cr2 address. */
+ if ( ((bytes | offset) & (bytes - 1)) || offset != mmio_ro_ctxt->cr2
)
+ {
+ MEM_LOG("mmio_ro_emulate: bad access (cr2=%lx, addr=%lx,
bytes=%u)",
+ mmio_ro_ctxt->cr2, offset, bytes);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ return X86EMUL_OKAY;
+}
+
+static const struct x86_emulate_ops mmio_ro_emulate_ops = {
+ .read = mmio_ro_emulated_read,
+ .insn_fetch = ptwr_emulated_read,
+ .write = mmio_ro_emulated_write,
+};
+
+/* Check if guest is trying to modify a r/o MMIO page. */
+int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
+ struct cpu_user_regs *regs)
+{
+ l1_pgentry_t pte;
+ unsigned long mfn;
+ unsigned int addr_size = is_pv_32on64_domain(v->domain) ?
+ 32 : BITS_PER_LONG;
+ struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
+ .ctxt.regs = regs,
+ .ctxt.addr_size = addr_size,
+ .ctxt.sp_size = addr_size,
+ .cr2 = addr
+ };
+ int rc;
+
+ /* Attempt to read the PTE that maps the VA being accessed. */
+ guest_get_eff_l1e(v, addr, &pte);
+
+ /* We are looking only for read-only mappings of MMIO pages. */
+ if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT)
)
+ return 0;
+
+ mfn = l1e_get_pfn(pte);
+ if ( mfn_valid(mfn) )
+ {
+ struct page_info *page = mfn_to_page(mfn);
+ struct domain *owner = page_get_owner_and_reference(page);
+
+ if ( owner )
+ put_page(page);
+ if ( owner != dom_io )
+ return 0;
+ }
+
+ if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) )
+ return 0;
+
+ rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops);
+
+ return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0;
+}
+#endif /* __x86_64__ */
+
void free_xen_pagetable(void *v)
{
if ( system_state == SYS_STATE_early_boot )
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1349,20 +1349,23 @@ static int fixup_page_fault(unsigned lon
return 0;
}
- if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
- guest_kernel_mode(v, regs) )
- {
- unsigned int mbs = PFEC_write_access;
- unsigned int mbz = PFEC_reserved_bit | PFEC_insn_fetch;
-
- /* Do not check if access-protection fault since the page may
- legitimately be not present in shadow page tables */
- if ( !paging_mode_enabled(d) )
- mbs |= PFEC_page_present;
-
- if ( ((regs->error_code & (mbs | mbz)) == mbs) &&
+ if ( guest_kernel_mode(v, regs) &&
+ !(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch))
&&
+ (regs->error_code & PFEC_write_access) )
+ {
+ if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
+ /* Do not check if access-protection fault since the page may
+ legitimately be not present in shadow page tables */
+ (paging_mode_enabled(d) ||
+ (regs->error_code & PFEC_page_present)) &&
ptwr_do_page_fault(v, addr, regs) )
return EXCRET_fault_fixed;
+
+#ifdef __x86_64__
+ if ( IS_PRIV(d) && (regs->error_code &
PFEC_page_present) &&
+ mmio_ro_do_page_fault(v, addr, regs) )
+ return EXCRET_fault_fixed;
+#endif
}
/* For non-external shadowed guests, we fix up both their own
@@ -1690,6 +1693,13 @@ static int pci_cfg_ok(struct domain *d,
return 0;
machine_bdf = (d->arch.pci_cf8 >> 8) & 0xFFFF;
+ if ( write )
+ {
+ const unsigned long *ro_map = pci_get_ro_map(0);
+
+ if ( ro_map && test_bit(machine_bdf, ro_map) )
+ return 0;
+ }
start = d->arch.pci_cf8 & 0xFF;
end = start + size - 1;
if (xsm_pci_config_permission(d, machine_bdf, start, end, write))
--- a/xen/arch/x86/x86_32/pci.c
+++ b/xen/arch/x86/x86_32/pci.c
@@ -6,6 +6,7 @@
#include <xen/spinlock.h>
#include <xen/pci.h>
+#include <xen/init.h>
#include <asm/io.h>
#define PCI_CONF_ADDRESS(bus, dev, func, reg) \
@@ -70,3 +71,7 @@ void pci_conf_write32(
BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), 0, 4, data);
}
+
+void __init arch_pci_ro_device(int seg, int bdf)
+{
+}
--- a/xen/arch/x86/x86_64/mmconfig_64.c
+++ b/xen/arch/x86/x86_64/mmconfig_64.c
@@ -14,6 +14,8 @@
#include <xen/xmalloc.h>
#include <xen/pci.h>
#include <xen/pci_regs.h>
+#include <xen/iommu.h>
+#include <xen/rangeset.h>
#include "mmconfig.h"
@@ -132,9 +134,30 @@ static void __iomem *mcfg_ioremap(const
return (void __iomem *) virt;
}
+void arch_pci_ro_device(int seg, int bdf)
+{
+ unsigned int idx, bus = PCI_BUS(bdf);
+
+ for (idx = 0; idx < pci_mmcfg_config_num; ++idx) {
+ const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg;
+ unsigned long mfn = (cfg->address >> PAGE_SHIFT) + bdf;
+
+ if (!pci_mmcfg_virt[idx].virt || cfg->pci_segment != seg ||
+ cfg->start_bus_number > bus || cfg->end_bus_number <
bus)
+ continue;
+
+ if (rangeset_add_singleton(mmio_ro_ranges, mfn))
+ printk(XENLOG_ERR
+ "%04x:%02x:%02x.%u: could not mark MCFG (mfn %#lx)
read-only\n",
+ cfg->pci_segment, bus, PCI_SLOT(bdf), PCI_FUNC(bdf),
+ mfn);
+ }
+}
+
int pci_mmcfg_arch_enable(unsigned int idx)
{
const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
+ const unsigned long *ro_map = pci_get_ro_map(cfg->pci_segment);
if (pci_mmcfg_virt[idx].virt)
return 0;
@@ -146,6 +169,16 @@ int pci_mmcfg_arch_enable(unsigned int i
}
printk(KERN_INFO "PCI: Using MCFG for segment %04x bus
%02x-%02x\n",
cfg->pci_segment, cfg->start_bus_number,
cfg->end_bus_number);
+ if (ro_map) {
+ unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0);
+ unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1);
+
+ while ((bdf = find_next_bit(ro_map, end + 1, bdf)) <= end) {
+ arch_pci_ro_device(cfg->pci_segment, bdf);
+ if (bdf++ == end)
+ break;
+ }
+ }
return 0;
}
--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -153,6 +153,12 @@ int __init amd_iommu_detect_one_acpi(
if ( rt )
return -ENODEV;
+ rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func));
+ if ( rt )
+ printk(XENLOG_ERR
+ "Could not mark config space of %04x:%02x:%02x.%u read-only
(%d)\n",
+ iommu->seg, bus, dev, func, rt);
+
list_add_tail(&iommu->list, &amd_iommu_head);
return 0;
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -593,11 +593,3 @@ void hvm_dpci_eoi(struct domain *d, unsi
unlock:
spin_unlock(&d->event_lock);
}
-
-static int __init setup_mmio_ro_ranges(void)
-{
- mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
- RANGESETF_prettyprint_hex);
- return 0;
-}
-__initcall(setup_mmio_ro_ranges);
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -36,6 +36,7 @@
struct pci_seg {
struct list_head alldevs_list;
u16 nr;
+ unsigned long *ro_map;
/* bus2bridge_lock protects bus2bridge array */
spinlock_t bus2bridge_lock;
#define MAX_BUSES 256
@@ -106,6 +107,8 @@ void __init pt_pci_init(void)
radix_tree_init(&pci_segments);
if ( !alloc_pseg(0) )
panic("Could not initialize PCI segment 0\n");
+ mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
+ RANGESETF_prettyprint_hex);
}
int __init pci_add_segment(u16 seg)
@@ -113,6 +116,13 @@ int __init pci_add_segment(u16 seg)
return alloc_pseg(seg) ? 0 : -ENOMEM;
}
+const unsigned long *pci_get_ro_map(u16 seg)
+{
+ struct pci_seg *pseg = get_pseg(seg);
+
+ return pseg ? pseg->ro_map : NULL;
+}
+
static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
{
struct pci_dev *pdev;
@@ -198,6 +208,33 @@ static void free_pdev(struct pci_seg *ps
xfree(pdev);
}
+int __init pci_ro_device(int seg, int bus, int devfn)
+{
+ struct pci_seg *pseg = alloc_pseg(seg);
+ struct pci_dev *pdev;
+
+ if ( !pseg )
+ return -ENOMEM;
+ pdev = alloc_pdev(pseg, bus, devfn);
+ if ( !pdev )
+ return -ENOMEM;
+
+ if ( !pseg->ro_map )
+ {
+ size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
+
+ pseg->ro_map = alloc_xenheap_pages(get_order_from_bytes(sz), 0);
+ if ( !pseg->ro_map )
+ return -ENOMEM;
+ memset(pseg->ro_map, 0, sz);
+ }
+
+ __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
+ arch_pci_ro_device(seg, PCI_BDF2(bus, devfn));
+
+ return 0;
+}
+
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
{
struct pci_seg *pseg = get_pseg(seg);
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -555,6 +555,8 @@ void memguard_unguard_stack(void *p);
int ptwr_do_page_fault(struct vcpu *, unsigned long,
struct cpu_user_regs *);
+int mmio_ro_do_page_fault(struct vcpu *, unsigned long,
+ struct cpu_user_regs *);
int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -98,8 +98,11 @@ struct pci_dev *pci_lock_domain_pdev(
void setup_dom0_pci_devices(struct domain *, void (*)(struct pci_dev *));
void pci_release_devices(struct domain *d);
int pci_add_segment(u16 seg);
+const unsigned long *pci_get_ro_map(u16 seg);
int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *);
int pci_remove_device(u16 seg, u8 bus, u8 devfn);
+int pci_ro_device(int seg, int bus, int devfn);
+void arch_pci_ro_device(int seg, int bdf);
struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(
struct domain *, int seg, int bus, int devfn);
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Keir Fraser
2012-Jun-22 08:53 UTC
Re: [PATCH] AMD IOMMU: add mechanism to protect their PCI devices'' config spaces
On 22/06/2012 09:18, "Jan Beulich" <JBeulich@suse.com> wrote:> Recent Dom0 kernels want to disable PCI MSI on all devices, yet doing > so on AMD IOMMUs (which get represented by a PCI device) disables part > of the functionality set up by the hypervisor. > > Add a mechanism to mark certain PCI devices as having write protected > config spaces (both through port based [method 1] accesses and, for > x86-64, mmconfig), and use that for AMD''s IOMMUs. > > Note that due to ptwr_do_page_fault() being run first, there''ll be a > MEM_LOG() issued for each such mmconfig based write attempt. If that''s > undesirable, the order of the calls in fixup_page_fault() would need > to be swapped. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > Tested-by: Wei Wang <wei.wang2@amd.com>Acked-by: Keir Fraser <keir@xen.org>> --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -5209,6 +5209,97 @@ int ptwr_do_page_fault(struct vcpu *v, u > return 0; > } > > +#ifdef __x86_64__ > +/************************* > + * fault handling for read-only MMIO pages > + */ > + > +struct mmio_ro_emulate_ctxt { > + struct x86_emulate_ctxt ctxt; > + unsigned long cr2; > +}; > + > +static int mmio_ro_emulated_read( > + enum x86_segment seg, > + unsigned long offset, > + void *p_data, > + unsigned int bytes, > + struct x86_emulate_ctxt *ctxt) > +{ > + return X86EMUL_UNHANDLEABLE; > +} > + > +static int mmio_ro_emulated_write( > + enum x86_segment seg, > + unsigned long offset, > + void *p_data, > + unsigned int bytes, > + struct x86_emulate_ctxt *ctxt) > +{ > + struct mmio_ro_emulate_ctxt *mmio_ro_ctxt > + container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt); > + > + /* Only allow naturally-aligned stores at the original %cr2 address. */ > + if ( ((bytes | offset) & (bytes - 1)) || offset != mmio_ro_ctxt->cr2 ) > + { > + MEM_LOG("mmio_ro_emulate: bad access (cr2=%lx, addr=%lx, bytes=%u)", > + mmio_ro_ctxt->cr2, offset, bytes); > + return X86EMUL_UNHANDLEABLE; > + } > + > + return X86EMUL_OKAY; > +} > + > +static const struct x86_emulate_ops mmio_ro_emulate_ops = { > + .read = mmio_ro_emulated_read, > + .insn_fetch = ptwr_emulated_read, > + .write = mmio_ro_emulated_write, > +}; > + > +/* Check if guest is trying to modify a r/o MMIO page. */ > +int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr, > + struct cpu_user_regs *regs) > +{ > + l1_pgentry_t pte; > + unsigned long mfn; > + unsigned int addr_size = is_pv_32on64_domain(v->domain) ? > + 32 : BITS_PER_LONG; > + struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { > + .ctxt.regs = regs, > + .ctxt.addr_size = addr_size, > + .ctxt.sp_size = addr_size, > + .cr2 = addr > + }; > + int rc; > + > + /* Attempt to read the PTE that maps the VA being accessed. */ > + guest_get_eff_l1e(v, addr, &pte); > + > + /* We are looking only for read-only mappings of MMIO pages. */ > + if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ) > + return 0; > + > + mfn = l1e_get_pfn(pte); > + if ( mfn_valid(mfn) ) > + { > + struct page_info *page = mfn_to_page(mfn); > + struct domain *owner = page_get_owner_and_reference(page); > + > + if ( owner ) > + put_page(page); > + if ( owner != dom_io ) > + return 0; > + } > + > + if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) > + return 0; > + > + rc = x86_emulate(&mmio_ro_ctxt.ctxt, &mmio_ro_emulate_ops); > + > + return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0; > +} > +#endif /* __x86_64__ */ > + > void free_xen_pagetable(void *v) > { > if ( system_state == SYS_STATE_early_boot ) > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1349,20 +1349,23 @@ static int fixup_page_fault(unsigned lon > return 0; > } > > - if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && > - guest_kernel_mode(v, regs) ) > - { > - unsigned int mbs = PFEC_write_access; > - unsigned int mbz = PFEC_reserved_bit | PFEC_insn_fetch; > - > - /* Do not check if access-protection fault since the page may > - legitimately be not present in shadow page tables */ > - if ( !paging_mode_enabled(d) ) > - mbs |= PFEC_page_present; > - > - if ( ((regs->error_code & (mbs | mbz)) == mbs) && > + if ( guest_kernel_mode(v, regs) && > + !(regs->error_code & (PFEC_reserved_bit | PFEC_insn_fetch)) && > + (regs->error_code & PFEC_write_access) ) > + { > + if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && > + /* Do not check if access-protection fault since the page may > + legitimately be not present in shadow page tables */ > + (paging_mode_enabled(d) || > + (regs->error_code & PFEC_page_present)) && > ptwr_do_page_fault(v, addr, regs) ) > return EXCRET_fault_fixed; > + > +#ifdef __x86_64__ > + if ( IS_PRIV(d) && (regs->error_code & PFEC_page_present) && > + mmio_ro_do_page_fault(v, addr, regs) ) > + return EXCRET_fault_fixed; > +#endif > } > > /* For non-external shadowed guests, we fix up both their own > @@ -1690,6 +1693,13 @@ static int pci_cfg_ok(struct domain *d, > return 0; > > machine_bdf = (d->arch.pci_cf8 >> 8) & 0xFFFF; > + if ( write ) > + { > + const unsigned long *ro_map = pci_get_ro_map(0); > + > + if ( ro_map && test_bit(machine_bdf, ro_map) ) > + return 0; > + } > start = d->arch.pci_cf8 & 0xFF; > end = start + size - 1; > if (xsm_pci_config_permission(d, machine_bdf, start, end, write)) > --- a/xen/arch/x86/x86_32/pci.c > +++ b/xen/arch/x86/x86_32/pci.c > @@ -6,6 +6,7 @@ > > #include <xen/spinlock.h> > #include <xen/pci.h> > +#include <xen/init.h> > #include <asm/io.h> > > #define PCI_CONF_ADDRESS(bus, dev, func, reg) \ > @@ -70,3 +71,7 @@ void pci_conf_write32( > BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255)); > pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), 0, 4, data); > } > + > +void __init arch_pci_ro_device(int seg, int bdf) > +{ > +} > --- a/xen/arch/x86/x86_64/mmconfig_64.c > +++ b/xen/arch/x86/x86_64/mmconfig_64.c > @@ -14,6 +14,8 @@ > #include <xen/xmalloc.h> > #include <xen/pci.h> > #include <xen/pci_regs.h> > +#include <xen/iommu.h> > +#include <xen/rangeset.h> > > #include "mmconfig.h" > > @@ -132,9 +134,30 @@ static void __iomem *mcfg_ioremap(const > return (void __iomem *) virt; > } > > +void arch_pci_ro_device(int seg, int bdf) > +{ > + unsigned int idx, bus = PCI_BUS(bdf); > + > + for (idx = 0; idx < pci_mmcfg_config_num; ++idx) { > + const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg; > + unsigned long mfn = (cfg->address >> PAGE_SHIFT) + bdf; > + > + if (!pci_mmcfg_virt[idx].virt || cfg->pci_segment != seg || > + cfg->start_bus_number > bus || cfg->end_bus_number < bus) > + continue; > + > + if (rangeset_add_singleton(mmio_ro_ranges, mfn)) > + printk(XENLOG_ERR > + "%04x:%02x:%02x.%u: could not mark MCFG (mfn %#lx) > read-only\n", > + cfg->pci_segment, bus, PCI_SLOT(bdf), PCI_FUNC(bdf), > + mfn); > + } > +} > + > int pci_mmcfg_arch_enable(unsigned int idx) > { > const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; > + const unsigned long *ro_map = pci_get_ro_map(cfg->pci_segment); > > if (pci_mmcfg_virt[idx].virt) > return 0; > @@ -146,6 +169,16 @@ int pci_mmcfg_arch_enable(unsigned int i > } > printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n", > cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); > + if (ro_map) { > + unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0); > + unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1); > + > + while ((bdf = find_next_bit(ro_map, end + 1, bdf)) <= end) { > + arch_pci_ro_device(cfg->pci_segment, bdf); > + if (bdf++ == end) > + break; > + } > + } > return 0; > } > > --- a/xen/drivers/passthrough/amd/iommu_detect.c > +++ b/xen/drivers/passthrough/amd/iommu_detect.c > @@ -153,6 +153,12 @@ int __init amd_iommu_detect_one_acpi( > if ( rt ) > return -ENODEV; > > + rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func)); > + if ( rt ) > + printk(XENLOG_ERR > + "Could not mark config space of %04x:%02x:%02x.%u read-only > (%d)\n", > + iommu->seg, bus, dev, func, rt); > + > list_add_tail(&iommu->list, &amd_iommu_head); > > return 0; > --- a/xen/drivers/passthrough/io.c > +++ b/xen/drivers/passthrough/io.c > @@ -593,11 +593,3 @@ void hvm_dpci_eoi(struct domain *d, unsi > unlock: > spin_unlock(&d->event_lock); > } > - > -static int __init setup_mmio_ro_ranges(void) > -{ > - mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges", > - RANGESETF_prettyprint_hex); > - return 0; > -} > -__initcall(setup_mmio_ro_ranges); > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -36,6 +36,7 @@ > struct pci_seg { > struct list_head alldevs_list; > u16 nr; > + unsigned long *ro_map; > /* bus2bridge_lock protects bus2bridge array */ > spinlock_t bus2bridge_lock; > #define MAX_BUSES 256 > @@ -106,6 +107,8 @@ void __init pt_pci_init(void) > radix_tree_init(&pci_segments); > if ( !alloc_pseg(0) ) > panic("Could not initialize PCI segment 0\n"); > + mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges", > + RANGESETF_prettyprint_hex); > } > > int __init pci_add_segment(u16 seg) > @@ -113,6 +116,13 @@ int __init pci_add_segment(u16 seg) > return alloc_pseg(seg) ? 0 : -ENOMEM; > } > > +const unsigned long *pci_get_ro_map(u16 seg) > +{ > + struct pci_seg *pseg = get_pseg(seg); > + > + return pseg ? pseg->ro_map : NULL; > +} > + > static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn) > { > struct pci_dev *pdev; > @@ -198,6 +208,33 @@ static void free_pdev(struct pci_seg *ps > xfree(pdev); > } > > +int __init pci_ro_device(int seg, int bus, int devfn) > +{ > + struct pci_seg *pseg = alloc_pseg(seg); > + struct pci_dev *pdev; > + > + if ( !pseg ) > + return -ENOMEM; > + pdev = alloc_pdev(pseg, bus, devfn); > + if ( !pdev ) > + return -ENOMEM; > + > + if ( !pseg->ro_map ) > + { > + size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long); > + > + pseg->ro_map = alloc_xenheap_pages(get_order_from_bytes(sz), 0); > + if ( !pseg->ro_map ) > + return -ENOMEM; > + memset(pseg->ro_map, 0, sz); > + } > + > + __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map); > + arch_pci_ro_device(seg, PCI_BDF2(bus, devfn)); > + > + return 0; > +} > + > struct pci_dev *pci_get_pdev(int seg, int bus, int devfn) > { > struct pci_seg *pseg = get_pseg(seg); > --- a/xen/include/asm-x86/mm.h > +++ b/xen/include/asm-x86/mm.h > @@ -555,6 +555,8 @@ void memguard_unguard_stack(void *p); > > int ptwr_do_page_fault(struct vcpu *, unsigned long, > struct cpu_user_regs *); > +int mmio_ro_do_page_fault(struct vcpu *, unsigned long, > + struct cpu_user_regs *); > > int audit_adjust_pgtables(struct domain *d, int dir, int noisy); > > --- a/xen/include/xen/pci.h > +++ b/xen/include/xen/pci.h > @@ -98,8 +98,11 @@ struct pci_dev *pci_lock_domain_pdev( > void setup_dom0_pci_devices(struct domain *, void (*)(struct pci_dev *)); > void pci_release_devices(struct domain *d); > int pci_add_segment(u16 seg); > +const unsigned long *pci_get_ro_map(u16 seg); > int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *); > int pci_remove_device(u16 seg, u8 bus, u8 devfn); > +int pci_ro_device(int seg, int bus, int devfn); > +void arch_pci_ro_device(int seg, int bdf); > struct pci_dev *pci_get_pdev(int seg, int bus, int devfn); > struct pci_dev *pci_get_pdev_by_domain( > struct domain *, int seg, int bus, int devfn); > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel