thr3ads.net - Xen devel - [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites [Mar 2013]

If this information is useful, please help other people find it:
Share via:

Jan Beulich

2013-Mar-26 08:51 UTC

[PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

1: IOMMU: allow MSI message to IRTE propagation to fail
2: x86/MSI: cleanup to prepare for multi-vector MSI
3: AMD IOMMU: allocate IRTE entries instead of using a static mapping
4: AMD IOMMU: untie remap and vector maps

Note that especially patch 3 is more RFC-like, as I continue to have
no way to test this here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Jan Beulich

2013-Mar-26 09:03 UTC

head link

[PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

With the need to allocate multiple contiguous IRTEs for multi-vector
MSI, the chance of failure here increases. While on the AMD side
there''s no allocation of IRTEs at present at all (and hence no way for
this allocation to fail, which is going to change with a later patch in
this series), VT-d already ignores an eventual error here, which this
patch fixes.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -254,13 +254,22 @@ static void hpet_msi_mask(struct irq_des
     ch->msi.msi_attrib.masked = 1;
 }
 
-static void hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
+static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
 {
     ch->msi.msg = *msg;
+
     if ( iommu_intremap )
-        iommu_update_ire_from_msi(&ch->msi, msg);
+    {
+        int rc = iommu_update_ire_from_msi(&ch->msi, msg);
+
+        if ( rc )
+            return rc;
+    }
+
     hpet_write32(msg->data, HPET_Tn_ROUTE(ch->idx));
     hpet_write32(msg->address_lo, HPET_Tn_ROUTE(ch->idx) + 4);
+
+    return 0;
 }
 
 static void __maybe_unused
@@ -318,12 +327,12 @@ static hw_irq_controller hpet_msi_type      .set_affinity 
= hpet_msi_set_affinity,
 };
 
-static void __hpet_setup_msi_irq(struct irq_desc *desc)
+static int __hpet_setup_msi_irq(struct irq_desc *desc)
 {
     struct msi_msg msg;
 
     msi_compose_msg(desc, &msg);
-    hpet_msi_write(desc->action->dev_id, &msg);
+    return hpet_msi_write(desc->action->dev_id, &msg);
 }
 
 static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch)
@@ -347,6 +356,8 @@ static int __init hpet_setup_msi_irq(str
 
     desc->handler = &hpet_msi_type;
     ret = request_irq(ch->msi.irq, hpet_interrupt_handler, 0,
"HPET", ch);
+    if ( ret >= 0 )
+        ret = __hpet_setup_msi_irq(desc);
     if ( ret < 0 )
     {
         if ( iommu_intremap )
@@ -354,7 +365,6 @@ static int __init hpet_setup_msi_irq(str
         return ret;
     }
 
-    __hpet_setup_msi_irq(desc);
     desc->msi_desc = &ch->msi;
 
     return 0;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1932,7 +1932,14 @@ int map_domain_pirq(
         if ( desc->handler != &no_irq_type )
             dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
                     d->domain_id, irq);
-        setup_msi_handler(desc, msi_desc);
+
+        ret = setup_msi_irq(desc, msi_desc);
+        if ( ret )
+        {
+            spin_unlock_irqrestore(&desc->lock, flags);
+            pci_disable_msi(msi_desc);
+            goto done;
+        }
 
         if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
              && !desc->arch.used_vectors )
@@ -1948,7 +1955,6 @@ int map_domain_pirq(
         }
 
         set_domain_irq_pirq(d, irq, info);
-        setup_msi_irq(desc);
         spin_unlock_irqrestore(&desc->lock, flags);
     }
     else
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -214,14 +214,18 @@ static void read_msi_msg(struct msi_desc
         iommu_read_msi_from_ire(entry, msg);
 }
 
-static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
     entry->msg = *msg;
 
     if ( iommu_intremap )
     {
+        int rc;
+
         ASSERT(msg != &entry->msg);
-        iommu_update_ire_from_msi(entry, msg);
+        rc = iommu_update_ire_from_msi(entry, msg);
+        if ( rc )
+            return rc;
     }
 
     switch ( entry->msi_attrib.type )
@@ -264,6 +268,8 @@ static void write_msi_msg(struct msi_des
     default:
         BUG();
     }
+
+    return 0;
 }
 
 void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
@@ -466,19 +472,15 @@ static struct msi_desc* alloc_msi_entry(
     return entry;
 }
 
-void setup_msi_handler(struct irq_desc *desc, struct msi_desc *msidesc)
+int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
 {
+    struct msi_msg msg;
+
     desc->msi_desc = msidesc;
     desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable
                                               : &pci_msi_nonmaskable;
-}
-
-void setup_msi_irq(struct irq_desc *desc)
-{
-    struct msi_msg msg;
-
     msi_compose_msg(desc, &msg);
-    write_msi_msg(desc->msi_desc, &msg);
+    return write_msi_msg(msidesc, &msg);
 }
 
 int msi_free_irq(struct msi_desc *entry)
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -359,16 +359,13 @@ done:
     }
 }
 
-void amd_iommu_msi_msg_update_ire(
+int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct pci_dev *pdev = msi_desc->dev;
     int bdf, seg;
     struct amd_iommu *iommu;
 
-    if ( !iommu_intremap )
-        return;
-
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     seg = pdev ? pdev->seg : hpet_sbdf.seg;
 
@@ -376,7 +373,7 @@ void amd_iommu_msi_msg_update_ire(
     if ( !iommu )
     {
         AMD_IOMMU_DEBUG("Fail to find iommu for MSI device id =
%#x\n", bdf);
-        return;
+        return -EINVAL;
     }
 
     if ( msi_desc->remap_index >= 0 )
@@ -395,7 +392,7 @@ void amd_iommu_msi_msg_update_ire(
     }
 
     if ( !msg )
-        return;
+        return 0;
 
     do {
         update_intremap_entry_from_msi_msg(iommu, bdf,
&msi_desc->remap_index,
@@ -404,6 +401,8 @@ void amd_iommu_msi_msg_update_ire(
             break;
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
+
+    return 0;
 }
 
 void amd_iommu_read_msi_from_ire(
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -548,18 +548,20 @@ void iommu_update_ire_from_apic(
     const struct iommu_ops *ops = iommu_get_ops();
     ops->update_ire_from_apic(apic, reg, value);
 }
-void iommu_update_ire_from_msi(
+
+int iommu_update_ire_from_msi(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     const struct iommu_ops *ops = iommu_get_ops();
-    ops->update_ire_from_msi(msi_desc, msg);
+    return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
 }
 
 void iommu_read_msi_from_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     const struct iommu_ops *ops = iommu_get_ops();
-    ops->read_msi_from_ire(msi_desc, msg);
+    if ( iommu_intremap )
+        ops->read_msi_from_ire(msi_desc, msg);
 }
 
 unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -90,7 +90,7 @@ void io_apic_write_remap_rte(unsigned in
 struct msi_desc;
 struct msi_msg;
 void msi_msg_read_remap_rte(struct msi_desc *, struct msi_msg *);
-void msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *);
+int msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *);
 
 int intel_setup_hpet_msi(struct msi_desc *);
 
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -653,7 +653,7 @@ void msi_msg_read_remap_rte(
         remap_entry_to_msi_msg(drhd->iommu, msg);
 }
 
-void msi_msg_write_remap_rte(
+int msi_msg_write_remap_rte(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct pci_dev *pdev = msi_desc->dev;
@@ -661,8 +661,8 @@ void msi_msg_write_remap_rte(
 
     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
                 : hpet_to_drhd(msi_desc->hpet_id);
-    if ( drhd )
-        msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
+    return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg)
+                : -EINVAL;
 }
 
 int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -93,7 +93,7 @@ void *amd_iommu_alloc_intremap_table(voi
 int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
 void amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int reg, unsigned int value);
-void amd_iommu_msi_msg_update_ire(
+int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg);
 void amd_iommu_read_msi_from_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -78,8 +78,7 @@ extern int pci_enable_msi(struct msi_inf
 extern void pci_disable_msi(struct msi_desc *desc);
 extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
-extern void setup_msi_handler(struct irq_desc *, struct msi_desc *);
-extern void setup_msi_irq(struct irq_desc *);
+extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
 extern void teardown_msi_irq(int irq);
 extern int msi_free_vector(struct msi_desc *entry);
 extern int pci_restore_msi_state(struct pci_dev *pdev);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -106,7 +106,7 @@ struct iommu_ops {
 			   u8 devfn, struct pci_dev *);
     int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
     void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned
int value);
-    void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg
*msg);
+    int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
     void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg);
     unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
     int (*setup_hpet_msi)(struct msi_desc *);
@@ -120,7 +120,7 @@ struct iommu_ops {
 };
 
 void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned
int value);
-void iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
+int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
 void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg);
 unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
 int iommu_setup_hpet_msi(struct msi_desc *);




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2013-Mar-26 09:03 UTC

head link

[PATCH 2/4] x86/MSI: cleanup to prepare for multi-vector MSI

The major aspect being the removal of the overload of the MSI entry''s
mask_base field for MSI purposes - a proper union is being installed
instead, tracking both the config space position needed and the number
of vectors used (which is going to be 1 until the actual multi-vector
MSI patches arrive).

It also corrects misleading information from debug key ''M'':
When
msi_get_mask_bit() returns a negative value, there''s no mask bit, and
hence output shouldn''t give the impression there is.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -354,18 +354,16 @@ static void msi_set_mask_bit(struct irq_
     switch (entry->msi_attrib.type) {
     case PCI_CAP_ID_MSI:
         if (entry->msi_attrib.maskbit) {
-            int pos;
             u32 mask_bits;
             u16 seg = entry->dev->seg;
             u8 bus = entry->dev->bus;
             u8 slot = PCI_SLOT(entry->dev->devfn);
             u8 func = PCI_FUNC(entry->dev->devfn);
 
-            pos = (long)entry->mask_base;
-            mask_bits = pci_conf_read32(seg, bus, slot, func, pos);
+            mask_bits = pci_conf_read32(seg, bus, slot, func,
entry->msi.mpos);
             mask_bits &= ~(1);
             mask_bits |= flag;
-            pci_conf_write32(seg, bus, slot, func, pos, mask_bits);
+            pci_conf_write32(seg, bus, slot, func, entry->msi.mpos,
mask_bits);
         }
         break;
     case PCI_CAP_ID_MSIX:
@@ -391,7 +389,7 @@ static int msi_get_mask_bit(const struct
         return pci_conf_read32(entry->dev->seg, entry->dev->bus,
                                PCI_SLOT(entry->dev->devfn),
                                PCI_FUNC(entry->dev->devfn),
-                               (unsigned long)entry->mask_base) & 1;
+                               entry->msi.mpos) & 1;
     case PCI_CAP_ID_MSIX:
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
& 1;
     }
@@ -532,6 +530,7 @@ static int msi_capability_init(struct pc
 {
     struct msi_desc *entry;
     int pos;
+    unsigned int maxvec, mpos;
     u16 control, seg = dev->seg;
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
@@ -540,6 +539,9 @@ static int msi_capability_init(struct pc
     ASSERT(spin_is_locked(&pcidevs_lock));
     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
     control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
+    maxvec = multi_msi_capable(control);
+    control &= ~PCI_MSI_FLAGS_QSIZE;
+
     /* MSI Entry Initialization */
     msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
 
@@ -553,23 +555,20 @@ static int msi_capability_init(struct pc
     entry->msi_attrib.maskbit = is_mask_bit_support(control);
     entry->msi_attrib.masked = 1;
     entry->msi_attrib.pos = pos;
+    mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
+    entry->msi.nvec = 1;
     entry->irq = irq;
     if ( is_mask_bit_support(control) )
-        entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
-                                                                  
is_64bit_address(control));
+        entry->msi.mpos = mpos;
     entry->dev = dev;
     if ( entry->msi_attrib.maskbit )
     {
-        unsigned int maskbits, temp;
+        u32 maskbits;
+
         /* All MSIs are unmasked by default, Mask them all */
-        maskbits = pci_conf_read32(seg, bus, slot, func,
-                                   msi_mask_bits_reg(pos,
is_64bit_address(control)));
-        temp = (1 << multi_msi_capable(control));
-        temp = ((temp - 1) & ~temp);
-        maskbits |= temp;
-        pci_conf_write32(seg, bus, slot, func,
-                         msi_mask_bits_reg(pos, is_64bit_address(control)),
-                         maskbits);
+        maskbits = pci_conf_read32(seg, bus, slot, func, mpos);
+        maskbits |= ~(u32)0 >> (32 - maxvec);
+        pci_conf_write32(seg, bus, slot, func, mpos, maskbits);
     }
     list_add_tail(&entry->list, &dev->msi_list);
 
@@ -1206,7 +1205,7 @@ static void dump_msi(unsigned char key)
         struct irq_desc *desc = irq_to_desc(irq);
         const struct msi_desc *entry;
         u32 addr, data, dest32;
-        char mask;
+        signed char mask;
         struct msi_attrib attr;
         unsigned long flags;
         const char *type = "???";
@@ -1241,12 +1240,16 @@ static void dump_msi(unsigned char key)
         dest32 = entry->msg.dest32;
         attr = entry->msi_attrib;
         if ( entry->msi_attrib.type )
-            mask = msi_get_mask_bit(entry) ? ''1'' :
''0'';
+            mask = msi_get_mask_bit(entry);
         else
-            mask = ''?'';
+            mask = -1;
 
         spin_unlock_irqrestore(&desc->lock, flags);
 
+        if ( mask >= 0 )
+            mask += ''0'';
+        else
+            mask = ''?'';
         printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
                " dest=%08x mask=%d/%d/%c\n",
                type, irq,
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -99,6 +99,10 @@ struct msi_desc {
 
 	union {
 		void __iomem *mask_base;/* va for the entry in mask table */
+		struct {
+			unsigned int nvec;/* number of vectors            */
+			unsigned int mpos;/* location of mask register    */
+		} msi;
 		unsigned int hpet_id;   /* HPET (dev is NULL)             */
 	};
 	struct pci_dev *dev;




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2013-Mar-26 09:04 UTC

head link

[PATCH 3/4] AMD IOMMU: allocate IRTE entries instead of using a static mapping

For multi-vector MSI, where we surely don''t want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
One thing I surely need confirmation on is whether this

        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !        
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);

in update_intremap_entry_from_msi_msg() is valid. If it isn''t,
it''s not
clear to me how to properly set up things for affected devices, as we
would need an identical index allocated for two different remap table
instances (which can hardly be expected to work out well).

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
          /* allocate per-device interrupt remapping table */
          if ( amd_iommu_perdev_intremap )
              ivrs_mappings[alias_id].intremap_table -               
amd_iommu_alloc_intremap_table();
+                amd_iommu_alloc_intremap_table(
+                    &ivrs_mappings[alias_id].intremap_inuse);
          else
          {
              if ( shared_intremap_table == NULL  )
-                 shared_intremap_table = amd_iommu_alloc_intremap_table();
+                 shared_intremap_table = amd_iommu_alloc_intremap_table(
+                     &shared_intremap_inuse);
              ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+             ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
          }
     }
     /* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
             if ( IO_APIC_ID(apic) != special->handle )
                 continue;
 
-            if ( ioapic_sbdf[special->handle].pin_setup )
+            if ( ioapic_sbdf[special->handle].pin_2_idx )
             {
                 if ( ioapic_sbdf[special->handle].bdf == bdf &&
                      ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
                 ioapic_sbdf[special->handle].bdf = bdf;
                 ioapic_sbdf[special->handle].seg = seg;
 
-                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
-                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+                    u16, nr_ioapic_entries[apic]);
                 if ( nr_ioapic_entries[apic] &&
-                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
                 {
                     printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                     return 0;
                 }
+                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                       nr_ioapic_entries[apic]);
             }
             break;
         }
@@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
     for ( apic = 0; !error && iommu_intremap && apic <
nr_ioapics; ++apic )
     {
         if ( !nr_ioapic_entries[apic] ||
-             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+             ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             continue;
 
         printk(XENLOG_ERR "IVHD Error: no information for IO-APIC
%#x\n",
@@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
             error = -ENXIO;
         else
         {
-            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
-                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
-            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+                u16, nr_ioapic_entries[apic]);
+            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             {
                 printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                 error = -ENOMEM;
             }
+            memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                   nr_ioapic_entries[apic]);
         }
     }
 
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -30,6 +30,7 @@
 struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
 struct hpet_sbdf hpet_sbdf;
 void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
 static DEFINE_SPINLOCK(shared_intremap_lock);
 
 static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -45,13 +46,14 @@ static int get_intremap_requestor_id(int
     return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
 }
 
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
 {
-    int offset = 0;
-    offset = (dm << INT_REMAP_INDEX_DM_SHIFT) &
INT_REMAP_INDEX_DM_MASK;
-    offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & 
-        INT_REMAP_INDEX_VECTOR_MASK;
-    return offset;
+    unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+    unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+    if ( slot < INTREMAP_ENTRIES )
+        __set_bit(slot, inuse);
+    return slot;
 }
 
 static u8 *get_intremap_entry(int seg, int bdf, int offset)
@@ -66,9 +68,10 @@ static u8 *get_intremap_entry(int seg, i
 
 static void free_intremap_entry(int seg, int bdf, int offset)
 {
-    u32* entry;
+    u32 *entry;
     entry = (u32*)get_intremap_entry(seg, bdf, offset);
     memset(entry, 0, sizeof(u32));
+    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
 }
 
 static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -97,18 +100,24 @@ static void update_intremap_entry(u32* e
                             INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
 }
 
-static void update_intremap_entry_from_ioapic(
+static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+    rte->vector = (u8)offset;
+    rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
-    const struct IO_APIC_route_entry *rte,
-    const struct IO_APIC_route_entry *old_rte)
+    struct IO_APIC_route_entry *rte,
+    u16 *index)
 {
     unsigned long flags;
     u32* entry;
     u8 delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -120,15 +129,19 @@ static void update_intremap_entry_from_i
 
     spin_lock_irqsave(lock, flags);
 
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( old_rte )
+    offset = *index;
+    if ( offset >= INTREMAP_ENTRIES )
     {
-        int old_offset = get_intremap_offset(old_rte->vector,
-                                             old_rte->delivery_mode);
-
-        if ( offset != old_offset )
-            free_intremap_entry(iommu->seg, bdf, old_offset);
+        offset = alloc_intremap_entry(iommu->seg, req_id);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            rte->mask = 1;
+            return -ENOSPC;
+        }
+        *index = offset;
     }
+
     entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
@@ -140,6 +153,10 @@ static void update_intremap_entry_from_i
         amd_iommu_flush_intremap(iommu, req_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    set_rte_index(rte, offset);
+
+    return 0;
 }
 
 int __init amd_iommu_setup_ioapic_remapping(void)
@@ -152,7 +169,7 @@ int __init amd_iommu_setup_ioapic_remapp
     u16 seg, bdf, req_id;
     struct amd_iommu *iommu;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -183,19 +200,24 @@ int __init amd_iommu_setup_ioapic_remapp
             dest = rte.dest.logical.logical_dest;
 
             spin_lock_irqsave(lock, flags);
-            offset = get_intremap_offset(vector, delivery_mode);
+            offset = alloc_intremap_entry(seg, req_id);
+            BUG_ON(offset >= INTREMAP_ENTRIES);
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
             entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
                                   delivery_mode, dest_mode, dest);
             spin_unlock_irqrestore(lock, flags);
 
+            set_rte_index(&rte, offset);
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+            __ioapic_write_entry(apic, pin, 1, rte);
+
             if ( iommu->enabled )
             {
                 spin_lock_irqsave(&iommu->lock, flags);
                 amd_iommu_flush_intremap(iommu, req_id);
                 spin_unlock_irqrestore(&iommu->lock, flags);
             }
-            set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
         }
     }
     return 0;
@@ -208,7 +230,7 @@ void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf;
+    int saved_mask, seg, bdf, rc;
     struct amd_iommu *iommu;
 
     if ( !iommu_intremap )
@@ -246,7 +268,7 @@ void amd_iommu_ioapic_update_ire(
     }
 
     if ( new_rte.mask &&
-         !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+         ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
         __io_apic_write(apic, reg, value);
@@ -261,14 +283,19 @@ void amd_iommu_ioapic_update_ire(
     }
 
     /* Update interrupt remapping entry */
-    update_intremap_entry_from_ioapic(
-        bdf, iommu, &new_rte,
-        test_and_set_bit(pin,
-                         ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ?
&old_rte
-                                                                  : NULL);
+    rc = update_intremap_entry_from_ioapic(
+             bdf, iommu, &new_rte,
+             &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
 
-    /* Forward write access to IO-APIC RTE */
-    __io_apic_write(apic, reg, value);
+    __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+    if ( rc )
+    {
+        /* Keep the entry masked. */
+        printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed
(%d)\n",
+               IO_APIC_ID(apic), pin, rc);
+        return;
+    }
 
     /* For lower bits access, return directly to avoid double writes */
     if ( reg == rte_lo )
@@ -282,16 +309,41 @@ void amd_iommu_ioapic_update_ire(
     }
 }
 
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg)
+{
+    unsigned int val = __io_apic_read(apic, reg);
+
+    if ( !(reg & 1) )
+    {
+        unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+        u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+        u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+        u16 req_id = get_intremap_requestor_id(seg, bdf);
+        const u32 *entry = (void *)get_intremap_entry(seg, req_id, offset);
+
+        val &= ~(INTREMAP_ENTRIES - 1);
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_INTTYPE_MASK,
+                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) <<
8;
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_VECTOR_MASK,
+                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
+    }
+
+    return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
     struct amd_iommu *iommu, u16 bdf,
-    int *remap_index, const struct msi_msg *msg)
+    int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
     u32* entry;
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -302,15 +354,6 @@ static void update_intremap_entry_from_m
         spin_lock_irqsave(lock, flags);
         free_intremap_entry(iommu->seg, req_id, *remap_index);
         spin_unlock_irqrestore(lock, flags);
-
-        if ( ( req_id != alias_id ) &&
-             get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL
)
-        {
-            lock = get_intremap_lock(iommu->seg, alias_id);
-            spin_lock_irqsave(lock, flags);
-            free_intremap_entry(iommu->seg, alias_id, *remap_index);
-            spin_unlock_irqrestore(lock, flags);
-        }
         goto done;
     }
 
@@ -321,16 +364,24 @@ static void update_intremap_entry_from_m
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) &
0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( *remap_index < 0)
+    offset = *remap_index;
+    if ( offset >= INTREMAP_ENTRIES )
+    {
+        offset = alloc_intremap_entry(iommu->seg, bdf);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            return -ENOSPC;
+        }
         *remap_index = offset;
-    else
-        BUG_ON(*remap_index != offset);
+    }
 
     entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
+    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
     /*
      * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
      * will use alias id to index interrupt remapping table.
@@ -342,10 +393,8 @@ static void update_intremap_entry_from_m
     if ( ( req_id != alias_id ) &&
          get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
     {
-        spin_lock_irqsave(lock, flags);
-        entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
-        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-        spin_unlock_irqrestore(lock, flags);
+        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !+      
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
 done:
@@ -357,14 +406,17 @@ done:
             amd_iommu_flush_intremap(iommu, alias_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    return 0;
 }
 
 int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct pci_dev *pdev = msi_desc->dev;
-    int bdf, seg;
+    int bdf, seg, rc;
     struct amd_iommu *iommu;
+    u32 data;
 
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -376,11 +428,12 @@ int amd_iommu_msi_msg_update_ire(
         return -EINVAL;
     }
 
-    if ( msi_desc->remap_index >= 0 )
+    if ( msi_desc->remap_index >= 0 && !msg )
     {
         do {
             update_intremap_entry_from_msi_msg(iommu, bdf,
-                                               &msi_desc->remap_index,
NULL);
+                                               &msi_desc->remap_index,
+                                               NULL, NULL);
             if ( !pdev || !pdev->phantom_stride )
                 break;
             bdf += pdev->phantom_stride;
@@ -395,19 +448,35 @@ int amd_iommu_msi_msg_update_ire(
         return 0;
 
     do {
-        update_intremap_entry_from_msi_msg(iommu, bdf,
&msi_desc->remap_index,
-                                           msg);
-        if ( !pdev || !pdev->phantom_stride )
+        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+                                                &msi_desc->remap_index,
+                                                msg, &data);
+        if ( rc || !pdev || !pdev->phantom_stride )
             break;
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
-    return 0;
+    msg->data = data;
+    return rc;
 }
 
 void amd_iommu_read_msi_from_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
+    unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+    const struct pci_dev *pdev = msi_desc->dev;
+    u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+    u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+    u16 req_id = get_dma_requestor_id(seg, bdf);
+    const u32 *entry = (void *)get_intremap_entry(seg, req_id, offset);
+
+    msg->data &= ~(INTREMAP_ENTRIES - 1);
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_INTTYPE_MASK,
+                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) <<
8;
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_VECTOR_MASK,
+                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -424,12 +493,14 @@ int __init amd_iommu_free_intremap_table
     return 0;
 }
 
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
 {
     void *tb;
     tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
     BUG_ON(tb == NULL);
     memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+    BUG_ON(*inuse_map == NULL);
     return tb;
 }
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
     .get_device_group_id = amd_iommu_group_id,
     .update_ire_from_apic = amd_iommu_ioapic_update_ire,
     .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
-    .read_apic_from_ire = __io_apic_read,
+    .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
     .read_msi_from_ire = amd_iommu_read_msi_from_ire,
     .setup_hpet_msi = amd_setup_hpet_msi,
     .suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
 
     /* per device interrupt remapping table */
     void *intremap_table;
+    unsigned long *intremap_inuse;
     spinlock_t intremap_lock;
 
     /* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -458,10 +458,6 @@
 #define MAX_AMD_IOMMUS                  32
 
 /* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK         0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT        10
-#define INT_REMAP_INDEX_VECTOR_MASK     0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT    2
 #define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
 #define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
 #define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
 
 /* interrupt remapping */
 int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
 int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
 void amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg);
 int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg);
 void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
 
 extern struct ioapic_sbdf {
     u16 bdf, seg;
-    unsigned long *pin_setup;
+    u16 *pin_2_idx;
 } ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
 
 extern struct hpet_sbdf {
     u16 bdf, seg, id;
     struct amd_iommu *iommu;
 } hpet_sbdf;
 
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
 /* power management support */
 void amd_iommu_resume(void);
 void amd_iommu_suspend(void);




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2013-Mar-26 09:05 UTC

head link

[PATCH 4/4] AMD IOMMU: untie remap and vector maps

With the specific IRTEs used for an interrupt no longer depending on
the vector, there''s no need to tie the remap sharing model to the
vector sharing one.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -207,35 +207,6 @@ int __init amd_iov_detect(void)
 
     init_done = 1;
 
-    /*
-     * AMD IOMMUs don''t distinguish between vectors destined for
-     * different cpus when doing interrupt remapping.  This means
-     * that interrupts going through the same intremap table
-     * can''t share the same vector.
-     *
-     * If irq_vector_map isn''t specified, choose a sensible default:
-     * - If we''re using per-device interemap tables, per-device
-     *   vector non-sharing maps
-     * - If we''re using a global interemap table, global vector
-     *   non-sharing map
-     */
-    if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
-    {
-        if ( amd_iommu_perdev_intremap )
-        {
-            printk("AMD-Vi: Enabling per-device vector maps\n");
-            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
-        }
-        else
-        {
-            printk("AMD-Vi: Enabling global vector map\n");
-            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
-        }
-    }
-    else
-    {
-        printk("AMD-Vi: Not overriding irq_vector_map setting\n");
-    }
     if ( !amd_iommu_perdev_intremap )
         printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table
is not recommended (see XSA-36)!\n");
     return scan_pci_devices();





_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

George Dunlap

2013-Mar-27 11:45 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On Tue, Mar 26, 2013 at 9:03 AM, Jan Beulich <JBeulich@suse.com>
wrote:> With the need to allocate multiple contiguous IRTEs for multi-vector
> MSI, the chance of failure here increases. While on the AMD side
> there''s no allocation of IRTEs at present at all (and hence no way
for
> this allocation to fail, which is going to change with a later patch in
> this series), VT-d already ignores an eventual error here, which this
> patch fixes.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Jan,

The inline patches you send are wordwrap-damaged.  I realize that
you''re also attaching the patches, but it''s a lot more hassle
to
download each one and manually import it.

Could you please try out "git send-email"?  There''s a basic
tutorial here:

http://wiki.xen.org/wiki/Submitting_Xen_Patches

For patches sent with git send-email (or any other method that has the
patches inline and not whitespace damaged) I have a really efficient
way to get them from the mailing list into a special git topic branch.
 It would be nice not to have to special-case yours.

 -George

Jan Beulich

2013-Mar-27 12:16 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

>>> On 27.03.13 at 12:45, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
> The inline patches you send are wordwrap-damaged.  I realize that
> you''re also attaching the patches, but it''s a lot more
hassle to
> download each one and manually import it.
I just went out and looked at the copy of the patch you responded
to that I got from xen-devel, and I''m unable to spot any wrapping
issues. On lists.xen.org, I can indeed see an issue in
xen/include/xen/iommu.h, but with this being inconsistent with
the mail I got from xen-devel I can''t be sure our mail system is at
fault. During the past couple of years I also haven''t had complaints
from others about patches from me coming through line wrapped,
and I know specifically the Linux folks are very picky about this.
> Could you please try out "git send-email"?  There''s a
basic tutorial here:
> 
> http://wiki.xen.org/wiki/Submitting_Xen_Patches 
> 
> For patches sent with git send-email (or any other method that has the
> patches inline and not whitespace damaged) I have a really efficient
> way to get them from the mailing list into a special git topic branch.
>  It would be nice not to have to special-case yours.
I see your point, but I don''t think using git for everything can be a
requirement. I''m used to working with quilt, and have no plans of
changing this, as I simply don''t see any benefit in using git in my day
to day coding work. I restrict uses of version control systems in
general to the areas where they''re needed - when working on
shared repositories.

Furthermore I don''t see why mails sent by means other than the mail
client we use over here wouldn''t suffer from similar issues, as they
would still pass the same gateways.

Hence all I can offer is working with our IS&T folks, should we be able
to identify an incompatibility (presumably in mail headers).

Jan

Boris Ostrovsky

2013-Mar-27 14:45 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On 03/27/2013 08:16 AM, Jan Beulich wrote:> I see your point, but I don''t think using git for everything can
be a
> requirement. I''m used to working with quilt, and have no plans of
> changing this, as I simply don''t see any benefit in using git in
my day
> to day coding work. I restrict uses of version control systems in
> general to the areas where they''re needed - when working on
> shared repositories.
Not to make you change your mind about which revision control
package to use but I find guilt (http://linux.die.net/man/7/guilt) to
be a good quilt wrapper on top of git. For simple quilt operations
you only need to change one letter.

-boris

George Dunlap

2013-Mar-27 14:55 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On 27/03/13 12:16, Jan Beulich wrote:>>>> On 27.03.13 at 12:45, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
>> The inline patches you send are wordwrap-damaged.  I realize that
>> you''re also attaching the patches, but it''s a lot
more hassle to
>> download each one and manually import it.
> I just went out and looked at the copy of the patch you responded
> to that I got from xen-devel, and I''m unable to spot any wrapping
> issues. On lists.xen.org, I can indeed see an issue in
> xen/include/xen/iommu.h, but with this being inconsistent with
> the mail I got from xen-devel I can''t be sure our mail system is
at
> fault. During the past couple of years I also haven''t had
complaints
> from others about patches from me coming through line wrapped,
> and I know specifically the Linux folks are very picky about this.
Did you look at the mail in your mailreader, or in the raw mail format?  
If you''re using your mail reader, it''s probably interpreting
the
wordwrap stuff properly.  The "raw" mail looks like this:

http://marc.info/?l=xen-devel&m=136428861403115&q=raw

The above is what GMail sees if I click "show original", and also what
the Citrix mail system gives me if I save the mail as a file. This 
mangling is apparently called "quoted-printable": 
http://en.wikipedia.org/wiki/Quoted-printable

The problem is that "patch" (and thus "git apply", "git
am", "hg
import", &c &c), not being a mail-reader, doesn''t know how
to de-mangle
stuff.

(I think I was making a mistake by calling it "whitespace mangling"; 
"wordwrap damage" is I think what I was looking for, even if 
"quoted-printable" is the technical term.)
>> Could you please try out "git send-email"?  There''s
a basic tutorial here:
>>
>> http://wiki.xen.org/wiki/Submitting_Xen_Patches
>>
>> For patches sent with git send-email (or any other method that has the
>> patches inline and not whitespace damaged) I have a really efficient
>> way to get them from the mailing list into a special git topic branch.
>>   It would be nice not to have to special-case yours.
> I see your point, but I don''t think using git for everything can
be a
> requirement. I''m used to working with quilt, and have no plans of
> changing this, as I simply don''t see any benefit in using git in
my day
> to day coding work. I restrict uses of version control systems in
> general to the areas where they''re needed - when working on
> shared repositories.
I have no problem with you using whatever tool you find most helpful.  I 
only suggested "git send-email" as one standard, relatively simple and
well-documented solution that will generate mail generated in text/plain 
rather than quoted-printable.

What mail client do you use to send your e-mails?
> Furthermore I don''t see why mails sent by means other than the
mail
> client we use over here wouldn''t suffer from similar issues, as
they
> would still pass the same gateways.
Because typically it''s the mail client that encodes stuff (and decodes 
stuff) from plain text into quoted-printable, not mailer demons that 
translate it.  What e-mail client do you use?  If there was an option to 
have it send stuff as "text/plain" rather than quoted-printable, I
think
that would do it.

  -George

George Dunlap

2013-Mar-27 17:26 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On Tue, Mar 26, 2013 at 9:03 AM, Jan Beulich <JBeulich@suse.com> wrote:
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -359,16 +359,13 @@ done:
>      }
>  }
>
> -void amd_iommu_msi_msg_update_ire(
> +int amd_iommu_msi_msg_update_ire(
>      struct msi_desc *msi_desc, struct msi_msg *msg)
>  {
>      struct pci_dev *pdev = msi_desc->dev;
>      int bdf, seg;
>      struct amd_iommu *iommu;
>
> -    if ( !iommu_intremap )
> -        return;
> -
It''s not clear to me from the description -- why are we removing this?

 -George

Jan Beulich

2013-Mar-28 08:25 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

>>> On 27.03.13 at 15:55, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
> Did you look at the mail in your mailreader, or in the raw mail format?  
In the mail reader of course (after all I expect you to use a mail
client too). And as said, I saw some damage when looking at the
copy on lists.xen.org.
> If you''re using your mail reader, it''s probably
interpreting the
> wordwrap stuff properly.  The "raw" mail looks like this:
> 
> http://marc.info/?l=xen-devel&m=136428861403115&q=raw 
> 
> The above is what GMail sees if I click "show original", and also
what
> the Citrix mail system gives me if I save the mail as a file. This 
> mangling is apparently called "quoted-printable": 
> http://en.wikipedia.org/wiki/Quoted-printable 
> 
> The problem is that "patch" (and thus "git apply",
"git am", "hg
> import", &c &c), not being a mail-reader, doesn''t
know how to de-mangle
> stuff.
And rightly so. But your mail client saving the mail should deal with
this properly. (And besides, if you already save the mail, I don''t
see why you can''t instead save the attachment).
> What mail client do you use to send your e-mails?
GroupWise.
>> Furthermore I don''t see why mails sent by means other than the
mail
>> client we use over here wouldn''t suffer from similar issues,
as they
>> would still pass the same gateways.
> 
> Because typically it''s the mail client that encodes stuff (and
decodes
> stuff) from plain text into quoted-printable, not mailer demons that 
> translate it.  What e-mail client do you use?  If there was an option to 
> have it send stuff as "text/plain" rather than quoted-printable,
I think
> that would do it.
There isn''t, unfortunately. I had quite a bit of a fight to at least
have them get attachments through without mangling them.

Jan

Jan Beulich

2013-Mar-28 08:27 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

>>> On 27.03.13 at 18:26, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
> On Tue, Mar 26, 2013 at 9:03 AM, Jan Beulich <JBeulich@suse.com>
wrote:
> 
>> --- a/xen/drivers/passthrough/amd/iommu_intr.c
>> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
>> @@ -359,16 +359,13 @@ done:
>>      }
>>  }
>>
>> -void amd_iommu_msi_msg_update_ire(
>> +int amd_iommu_msi_msg_update_ire(
>>      struct msi_desc *msi_desc, struct msi_msg *msg)
>>  {
>>      struct pci_dev *pdev = msi_desc->dev;
>>      int bdf, seg;
>>      struct amd_iommu *iommu;
>>
>> -    if ( !iommu_intremap )
>> -        return;
>> -
> 
> It''s not clear to me from the description -- why are we removing
this?
It''s not really being removed, it merely became redundant with
the one being added to iommu_update_ire_from_msi().

Jan

Tim Deegan

2013-Mar-28 09:46 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

At 08:25 +0000 on 28 Mar (1364459127), Jan Beulich
wrote:> > The problem is that "patch" (and thus "git apply",
"git am", "hg
> > import", &c &c), not being a mail-reader,
doesn''t know how to de-mangle
> > stuff.
> 
> And rightly so. But your mail client saving the mail should deal with
> this properly. (And besides, if you already save the mail, I don''t
> see why you can''t instead save the attachment).
This seems like a failing in ''git am'' rather than anywhere
else.
quoted-printable is a reasonable way to send files by email, even if a
bit OTT for 7-bit clean ASCII, and to store them in an mbox.

I prefer using git send-email (or hg email) to post patch series, as it
automates a tedious and fiddly task, but I don''t think we can or should
require it.

Tim.

George Dunlap

2013-Mar-28 09:49 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On 28/03/13 09:46, Tim Deegan wrote:> At 08:25 +0000 on 28 Mar (1364459127), Jan Beulich wrote:
>>> The problem is that "patch" (and thus "git
apply", "git am", "hg
>>> import", &c &c), not being a mail-reader,
doesn''t know how to de-mangle
>>> stuff.
>> And rightly so. But your mail client saving the mail should deal with
>> this properly. (And besides, if you already save the mail, I
don''t
>> see why you can''t instead save the attachment).
> This seems like a failing in ''git am'' rather than
anywhere else.
> quoted-printable is a reasonable way to send files by email, even if a
> bit OTT for 7-bit clean ASCII, and to store them in an mbox.
Yes, I suppose if you''re going to insist on reading mail folders, you 
should actually know how to read mail folders.

  -George

George Dunlap

2013-Mar-28 10:33 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On 28/03/13 08:25, Jan Beulich wrote:>>>> On 27.03.13 at 15:55, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
>> Did you look at the mail in your mailreader, or in the raw mail format?
> In the mail reader of course (after all I expect you to use a mail
> client too). And as said, I saw some damage when looking at the
> copy on lists.xen.org.
>
>> If you''re using your mail reader, it''s probably
interpreting the
>> wordwrap stuff properly.  The "raw" mail looks like this:
>>
>> http://marc.info/?l=xen-devel&m=136428861403115&q=raw
>>
>> The above is what GMail sees if I click "show original", and
also what
>> the Citrix mail system gives me if I save the mail as a file. This
>> mangling is apparently called "quoted-printable":
>> http://en.wikipedia.org/wiki/Quoted-printable
>>
>> The problem is that "patch" (and thus "git apply",
"git am", "hg
>> import", &c &c), not being a mail-reader, doesn''t
know how to de-mangle
>> stuff.
> And rightly so. But your mail client saving the mail should deal with
> this properly. (And besides, if you already save the mail, I don''t
> see why you can''t instead save the attachment).
This is already a longer discussion than I really wanted to have, but 
just so you have an idea what I''m on about, I''ll explain the
difference.

The key thing is that "git am" will take an mbox file with a series of
patches and automatically make a commit for each one.  So my algorithm 
for reviewing patch series sent in text/plain is as follows:

1. In Gmail, mark each patch in the series and save it to a special folder.
2. Open up mutt on my local box.  It will connect to gmail and open that 
folder.
3. Mark each patch in the series and save it to a local folder in ~/mail/.
4. Use git am to import the whole series as a sequence of commits.
5. View the changeset "in situ" using various git commands
(''git meld''
is my favorite ATM).

Marking each one might take 10 seconds, and it''s almost entirely 
brainless; the main "cognitive load" is just remembering the name that
I''ve given the local mail folder.  A series of 40 patches takes 
basically no more cognitive load to download and import into my git tree 
than a single patch.

To view yours at the moment, I have to do the following:
1. For each patch in the series, click to download the attachment and 
save it to a separate file
2. Edit each file to include "From: Jan Beulich
<JBeulich@suse.com>" at
the top, so it looks sufficiently like an mbox that "git am"
won''t complain
3. For each patch in the series, run "git am" on it individually.

So #1 is slightly more annoying, as saving is more like 2 seconds per 
mail and marking a message is like 0.5 seconds per mail.  But the big 
source of cognitive load is having to deal with the different name of 
each patch.  It''s just that extra little bit when having to open the 
file to add the header, and particularly then having to figure out what 
order the patches go in.  It doesn''t really take that much extra time, 
but that it takes attention to remember the filename, and this adds up 
for each patch in the series; so the longer the series, the more 
cognitive load it generates.

They''ve done studies that show that even a minimal amount of cognitive 
load has an effect on people''s endurance for other cognitive 
activities.  This is why most successful people instinctively find a way 
to make the unimportant decisions in their lives really simple -- 
spending time thinking about what to wear or what to eat eats away at 
precious energy they would rather spend on running the country or 
whatever it is they''re doing.

Given the limited amount of head-space I have for arbitrary strings of 
things, I''d prefer to spend it on actually understanding the patch, 
rather than on patch filenames if I can avoid it; that''s why I brought 
it up.

It seems like having an automated way to send off an entire patch queue, 
rather than cutting and pasting and attaching each mail individually, 
would reduce the cognitive load for you as well (not to mention probably 
save you several minutes of your day).  git and mercurial both have 
really good integrated mechanisms to do that; both also have extensions 
that allow you interact with the repository just like you do with 
quilt.  I''m not familiar with the git ones, but the mercurial one uses 
almost exactly the same commands as quilt, but with "hg" instead of 
"quilt" at the front (if I remember quilt correctly -- it''s
been a long
time since I used it).

If you''re not willing to find a way to send it text/plain, could you 
maybe at least name the attachments "01-[whatever].patch" 
"02-[whatever].patch"?  I think that would help reduce the cognitive 
load quite a bit.

Thanks,
  -George

Jan Beulich

2013-Mar-28 11:14 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

>>> On 28.03.13 at 11:33, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
> On 28/03/13 08:25, Jan Beulich wrote:
>>>>> On 27.03.13 at 15:55, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
>>> Did you look at the mail in your mailreader, or in the raw mail
format?
>> In the mail reader of course (after all I expect you to use a mail
>> client too). And as said, I saw some damage when looking at the
>> copy on lists.xen.org.
>>
>>> If you''re using your mail reader, it''s probably
interpreting the
>>> wordwrap stuff properly.  The "raw" mail looks like this:
>>>
>>> http://marc.info/?l=xen-devel&m=136428861403115&q=raw 
>>>
>>> The above is what GMail sees if I click "show original",
and also what
>>> the Citrix mail system gives me if I save the mail as a file. This
>>> mangling is apparently called "quoted-printable":
>>> http://en.wikipedia.org/wiki/Quoted-printable 
>>>
>>> The problem is that "patch" (and thus "git
apply", "git am", "hg
>>> import", &c &c), not being a mail-reader,
doesn''t know how to de-mangle
>>> stuff.
>> And rightly so. But your mail client saving the mail should deal with
>> this properly. (And besides, if you already save the mail, I
don''t
>> see why you can''t instead save the attachment).
> 
> This is already a longer discussion than I really wanted to have, but 
> just so you have an idea what I''m on about, I''ll explain
the difference.
> 
> The key thing is that "git am" will take an mbox file with a
series of
> patches and automatically make a commit for each one.  So my algorithm 
> for reviewing patch series sent in text/plain is as follows:
> 
> 1. In Gmail, mark each patch in the series and save it to a special folder.
And in that operation, the quoted printable encoding should be
undone. Which makes all of the rest of your mail more or less
mute.
> 2. Open up mutt on my local box.  It will connect to gmail and open that 
> folder.
> 3. Mark each patch in the series and save it to a local folder in ~/mail/.
> 4. Use git am to import the whole series as a sequence of commits.
> 5. View the changeset "in situ" using various git commands
(''git meld''
> is my favorite ATM).
> 
> Marking each one might take 10 seconds, and it''s almost entirely 
> brainless; the main "cognitive load" is just remembering the name
that
> I''ve given the local mail folder.  A series of 40 patches takes 
> basically no more cognitive load to download and import into my git tree 
> than a single patch.
> 
> To view yours at the moment, I have to do the following:
> 1. For each patch in the series, click to download the attachment and 
> save it to a separate file
> 2. Edit each file to include "From: Jan Beulich
<JBeulich@suse.com>" at
> the top, so it looks sufficiently like an mbox that "git am"
won''t complain
> 3. For each patch in the series, run "git am" on it individually.
> 
> So #1 is slightly more annoying, as saving is more like 2 seconds per 
> mail and marking a message is like 0.5 seconds per mail.  But the big 
> source of cognitive load is having to deal with the different name of 
> each patch.  It''s just that extra little bit when having to open
the
> file to add the header, and particularly then having to figure out what 
> order the patches go in.  It doesn''t really take that much extra
time,
> but that it takes attention to remember the filename, and this adds up 
> for each patch in the series; so the longer the series, the more 
> cognitive load it generates.
> 
> They''ve done studies that show that even a minimal amount of
cognitive
> load has an effect on people''s endurance for other cognitive 
> activities.  This is why most successful people instinctively find a way 
> to make the unimportant decisions in their lives really simple -- 
> spending time thinking about what to wear or what to eat eats away at 
> precious energy they would rather spend on running the country or 
> whatever it is they''re doing.
> 
> Given the limited amount of head-space I have for arbitrary strings of 
> things, I''d prefer to spend it on actually understanding the
patch,
> rather than on patch filenames if I can avoid it; that''s why I
brought
> it up.
> 
> It seems like having an automated way to send off an entire patch queue, 
> rather than cutting and pasting and attaching each mail individually, 
> would reduce the cognitive load for you as well (not to mention probably 
> save you several minutes of your day).  git and mercurial both have 
> really good integrated mechanisms to do that; both also have extensions 
> that allow you interact with the repository just like you do with 
> quilt.  I''m not familiar with the git ones, but the mercurial one
uses
> almost exactly the same commands as quilt, but with "hg" instead
of
> "quilt" at the front (if I remember quilt correctly --
it''s been a long
> time since I used it).
Indeed this might save me some time when sending the patches.
But would it not require more time when fiddling with the patches
while putting them together? As an example, I don''t normally
write patch descriptions right away, but do so only when getting
ready to submit the patches. With git wanting to create a commit
out of everything, I have to then run extra git commands to get
the description added to each patch. Whereas with quilt this is a
simple editing of the patch file, easily cut-n-paste between
different instances of the patch on different trees (which I
particularly find myself in need of when producing security fixes
and their backports).

Similarly, fixing minor issues (including rejects because of changes
to earlier files in the series) by editing a patch file is impossible
with git afaict. And no, using git''s merge functionality is of no
help to me at all, not the least because of the close to unusable
(to me at least) UIs of any Linux based editors I''ve come across
so far. Yet if anything, a merge tool ought to be interactive.
Merges that just leave awful marks in the merge output are
pretty pointless imo.
> If you''re not willing to find a way to send it text/plain, could
you
> maybe at least name the attachments "01-[whatever].patch" 
> "02-[whatever].patch"?  I think that would help reduce the
cognitive
> load quite a bit.
That would be possible, but making the patch mail composition
more tedious for me. And while I''m all in favor of making
others''
work easier looking at stuff I''m interested in getting reviewed, I
have some difficulty justifying my own effort needing to further
increase to do so. If there was a way to make your and my life
easier in this regard...

Jan

Tim Deegan

2013-Mar-28 11:25 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

At 11:14 +0000 on 28 Mar (1364469245), Jan Beulich
wrote:> > It seems like having an automated way to send off an entire patch
queue,
> > rather than cutting and pasting and attaching each mail individually, 
> > would reduce the cognitive load for you as well (not to mention
probably
> > save you several minutes of your day).  git and mercurial both have 
> > really good integrated mechanisms to do that; both also have
extensions
> > that allow you interact with the repository just like you do with 
> > quilt.  I''m not familiar with the git ones, but the mercurial
one uses
> > almost exactly the same commands as quilt, but with "hg"
instead of
> > "quilt" at the front (if I remember quilt correctly --
it''s been a long
> > time since I used it).
> 
> Indeed this might save me some time when sending the patches.
> But would it not require more time when fiddling with the patches
> while putting them together? As an example, I don''t normally
> write patch descriptions right away, but do so only when getting
> ready to submit the patches. With git wanting to create a commit
> out of everything, I have to then run extra git commands to get
> the description added to each patch. Whereas with quilt this is a
> simple editing of the patch file, easily cut-n-paste between
> different instances of the patch on different trees (which I
> particularly find myself in need of when producing security fixes
> and their backports).
I''m fond of hg''s patchqueues for the same reasons -- they
behave
basically like quilt does, but you can then use hg email to
automatically send a whole series.  On git, I''m experimenting with
guilt
to do the same, thuogh of course it''s all a bit less user-friendly. 

Tim.

George Dunlap

2013-Mar-28 11:39 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On 28/03/13 11:14, Jan Beulich wrote:>>>> On 28.03.13 at 11:33, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
>> On 28/03/13 08:25, Jan Beulich wrote:
>>>>>> On 27.03.13 at 15:55, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
>>>> Did you look at the mail in your mailreader, or in the raw mail
format?
>>> In the mail reader of course (after all I expect you to use a mail
>>> client too). And as said, I saw some damage when looking at the
>>> copy on lists.xen.org.
>>>
>>>> If you''re using your mail reader, it''s
probably interpreting the
>>>> wordwrap stuff properly.  The "raw" mail looks like
this:
>>>>
>>>> http://marc.info/?l=xen-devel&m=136428861403115&q=raw
>>>>
>>>> The above is what GMail sees if I click "show
original", and also what
>>>> the Citrix mail system gives me if I save the mail as a file.
This
>>>> mangling is apparently called "quoted-printable":
>>>> http://en.wikipedia.org/wiki/Quoted-printable
>>>>
>>>> The problem is that "patch" (and thus "git
apply", "git am", "hg
>>>> import", &c &c), not being a mail-reader,
doesn''t know how to de-mangle
>>>> stuff.
>>> And rightly so. But your mail client saving the mail should deal
with
>>> this properly. (And besides, if you already save the mail, I
don''t
>>> see why you can''t instead save the attachment).
>> This is already a longer discussion than I really wanted to have, but
>> just so you have an idea what I''m on about, I''ll
explain the difference.
>>
>> The key thing is that "git am" will take an mbox file with a
series of
>> patches and automatically make a commit for each one.  So my algorithm
>> for reviewing patch series sent in text/plain is as follows:
>>
>> 1. In Gmail, mark each patch in the series and save it to a special
folder.
> And in that operation, the quoted printable encoding should be
> undone. Which makes all of the rest of your mail more or less
> mute.
I''m not sure why you think so.  First of all, when I say "special 
folder" I mean, "special gmail folder", which mutt access via the
gmail
IMAP interface.

Secondly, gmail''s default is to treat all e-mail like just plain
English
text, in which whitespace and line breaks don''t really matter that 
much.  So if I do copy+paste, or a normal "save", it mucks up all the 
whitespace, and the patch won''t apply.

The alternate which gmail gives you is "show original", which shows
the
entire e-mail exactly as it received it -- including quoted-printable; 
in which case again the patch doesn''t apply.

It seems perfectly reasonable to me for gmail to behave in this manner.  
FWIW Thunderbird behaves exactly the same way -- if you click "save 
e-mail" then it saves the mail exactly as it received it; if you do a 
copy-and-paste, it mucks up the whitespace.
> Indeed this might save me some time when sending the patches.
> But would it not require more time when fiddling with the patches
> while putting them together? As an example, I don''t normally
> write patch descriptions right away, but do so only when getting
> ready to submit the patches. With git wanting to create a commit
> out of everything, I have to then run extra git commands to get
> the description added to each patch. Whereas with quilt this is a
> simple editing of the patch file, easily cut-n-paste between
> different instances of the patch on different trees (which I
> particularly find myself in need of when producing security fixes
> and their backports).
>
> Similarly, fixing minor issues (including rejects because of changes
> to earlier files in the series) by editing a patch file is impossible
> with git afaict. And no, using git''s merge functionality is of no
> help to me at all, not the least because of the close to unusable
> (to me at least) UIs of any Linux based editors I''ve come across
> so far. Yet if anything, a merge tool ought to be interactive.
> Merges that just leave awful marks in the merge output are
> pretty pointless imo.
Actually I completely agree with your assessment of git -- I''m sort of 
adapting to the "history rewriting" way of doing things, but I much 
prefer mercurial queues.

Mercurial queues, as I said, are almost exactly like quilt.  When you 
want to make a new patch, you type "hg qnew [filename]".  It will
create
the file in .hg/patches/ (instead of in patches/, as quilt does IIRC).  
The patches in the series are found in .hg/patches/series (instead of in 
patches/series.  You can directly edit this file and re-order or comment 
out patches which are not currently applied.  It will not complain if 
there is no description (just like quilt); however, if there *is* a 
description at the top of the patch, it will import that into the 
changeset when you do "hg qpush".  (Alternately, you can use "hg 
qrefresh -e" to edit the message at the top of the commit.  I find this 
more convenient.)

So basically, if you use hg patchqueues, you can use your existing 
workflow almost exactly:
* hg qnew / hg qrefresh / hg qpush / hg qpop just like quilt
* Edit patches directly, re-order patches by editing series, add patch 
comments directly, just like in quilt

But additionally, after you get your patch series the way you want it, 
you can just type "hg email -o", and it will e-mail the whole lot to
the
list.

The main thing to keep in mind is that if you edit the patches directly, 
you need to qpop and qpush them to get the changes into the mercurial 
changesets -- and that goes for the commit messages as well.  "hg 
e-mail" sends the *commits*, not the *patches*.  So if you edit the 
patches directly, you just need to pop and push all the paches first, 
which is as easy as "hg qpop -a ; hg qpush -a ; hg email -o"

There are similar options for git (guilt and stgit are some ones I''ve 
heard of), but I don''t really know much about them.
>
>> If you''re not willing to find a way to send it text/plain,
could you
>> maybe at least name the attachments "01-[whatever].patch"
>> "02-[whatever].patch"?  I think that would help reduce the
cognitive
>> load quite a bit.
> That would be possible, but making the patch mail composition
> more tedious for me. And while I''m all in favor of making
others''
> work easier looking at stuff I''m interested in getting reviewed, I
> have some difficulty justifying my own effort needing to further
> increase to do so. If there was a way to make your and my life
> easier in this regard...
Well, the justification should be that it''s more efficient for you to
do
it once per patch, than for every other reviewer to have to do it once 
per patch. :-)

But that may not actually be necessary; I''ve been mucking about with
git
and found some vestigal components that seem not to be in use anymore 
that actually understand quoted-printable.  They should be fairly simple 
to compose into a script that would do what I need.

  -George

George Dunlap

2013-Mar-28 12:37 UTC

head link

Re: [PATCH 4/4] AMD IOMMU: untie remap and vector maps

On Tue, Mar 26, 2013 at 9:05 AM, Jan Beulich <JBeulich@suse.com>
wrote:> With the specific IRTEs used for an interrupt no longer depending on
> the vector, there''s no need to tie the remap sharing model to the
> vector sharing one.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Unfortunately I''m having a bit of trouble "paging in" the
technical
details of why this change was necessary.  I thought that the
limitation had to do with the remapping table itself.  Are you saying
that if interrupt X maps to pcpu 0 vector 5, and interrupt Y maps to
pcpu 2 vector 5, that even if X and Y are in the same table, they will
still go to the right place?  Could you give me a brief explanation of
why that is?  (I don''t know what IRTE is, and Google is not helpful.)

 -George

Jan Beulich

2013-Mar-28 13:09 UTC

head link

Re: [PATCH 4/4] AMD IOMMU: untie remap and vector maps

>>> On 28.03.13 at 13:37, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
> On Tue, Mar 26, 2013 at 9:05 AM, Jan Beulich <JBeulich@suse.com>
wrote:
>> With the specific IRTEs used for an interrupt no longer depending on
>> the vector, there''s no need to tie the remap sharing model to
the
>> vector sharing one.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Unfortunately I''m having a bit of trouble "paging in"
the technical
> details of why this change was necessary.  I thought that the
> limitation had to do with the remapping table itself.  Are you saying
> that if interrupt X maps to pcpu 0 vector 5, and interrupt Y maps to
> pcpu 2 vector 5, that even if X and Y are in the same table, they will
> still go to the right place?  Could you give me a brief explanation of
> why that is?  (I don''t know what IRTE is, and Google is not
helpful.)
Previously the remapping table was indexed by vector (combined
with delivery mode, for whatever reason). That way, a particular
vector, no matter which CPU it would have been meant to go to
caused the same IRTE (Interrupt Remapping Table Entry) to be
selected, and with it the same destination ID.

By not using the vector for indexing anymore, a vector allocated
in CPU A''s space connects with IRTE x with destination ID set to
(or including, when in cluster or flat mode) whereas CPU B''s
identical vector connects to a different IRTE (even when using a
single global remapping table), properly set to the destination ID
for (or including) that CPU.

Jan

George Dunlap

2013-Mar-28 13:40 UTC

head link

Re: [PATCH 4/4] AMD IOMMU: untie remap and vector maps

On 28/03/13 13:09, Jan Beulich wrote:>>>> On 28.03.13 at 13:37, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
>> On Tue, Mar 26, 2013 at 9:05 AM, Jan Beulich <JBeulich@suse.com>
wrote:
>>> With the specific IRTEs used for an interrupt no longer depending
on
>>> the vector, there''s no need to tie the remap sharing model
to the
>>> vector sharing one.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> Unfortunately I''m having a bit of trouble "paging
in" the technical
>> details of why this change was necessary.  I thought that the
>> limitation had to do with the remapping table itself.  Are you saying
>> that if interrupt X maps to pcpu 0 vector 5, and interrupt Y maps to
>> pcpu 2 vector 5, that even if X and Y are in the same table, they will
>> still go to the right place?  Could you give me a brief explanation of
>> why that is?  (I don''t know what IRTE is, and Google is not
helpful.)
> Previously the remapping table was indexed by vector (combined
> with delivery mode, for whatever reason). That way, a particular
> vector, no matter which CPU it would have been meant to go to
> caused the same IRTE (Interrupt Remapping Table Entry) to be
> selected, and with it the same destination ID.
>
> By not using the vector for indexing anymore, a vector allocated
> in CPU A''s space connects with IRTE x with destination ID set to
> (or including, when in cluster or flat mode) whereas CPU B''s
> identical vector connects to a different IRTE (even when using a
> single global remapping table), properly set to the destination ID
> for (or including) that CPU.
OK -- well assuming that to be the case:

Acked-by: George Dunlap <george.dunlap@eu.citrix.com>

Stefano Stabellini

2013-Mar-28 13:47 UTC

head link

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On Thu, 28 Mar 2013, Jan Beulich wrote:> Indeed this might save me some time when sending the patches.
> But would it not require more time when fiddling with the patches
> while putting them together? As an example, I don''t normally
> write patch descriptions right away, but do so only when getting
> ready to submit the patches. With git wanting to create a commit
> out of everything, I have to then run extra git commands to get
> the description added to each patch. Whereas with quilt this is a
> simple editing of the patch file, easily cut-n-paste between
> different instances of the patch on different trees (which I
> particularly find myself in need of when producing security fixes
> and their backports).
> 
> Similarly, fixing minor issues (including rejects because of changes
> to earlier files in the series) by editing a patch file is impossible
> with git afaict. And no, using git''s merge functionality is of no
> help to me at all, not the least because of the close to unusable
> (to me at least) UIs of any Linux based editors I''ve come across
> so far. Yet if anything, a merge tool ought to be interactive.
> Merges that just leave awful marks in the merge output are
> pretty pointless imo.
[...]
> That would be possible, but making the patch mail composition
> more tedious for me. And while I''m all in favor of making
others''
> work easier looking at stuff I''m interested in getting reviewed, I
> have some difficulty justifying my own effort needing to further
> increase to do so. If there was a way to make your and my life
> easier in this regard...
I agree with you on all points, in fact I don''t use git rebase either.

But at the same time I need to be able to produce git branches and issue
pull requests on the LKML, so I use guilt: it''s just like quilt plus a
minimum level of integration with git, so that every time you guilt push
or guilt pop you create or remove a git commit from the working branch.
Of course all the patches are maintained in separate text files in a
subdirectory (.git/patches/<branchname>).
You can still use all your cut-and-paste tricks between the patch files.
If you are used to quilt, you should be at home with guilt.

Suravee Suthikulpanit

2013-Mar-29 05:18 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Jan,

I found an issue when booting the hypervisor after the patch with 
assertion at the line 64 of iommu_intr.c (in function get_intremap_entry).

Stack dump:
...........
get_intremap_entry+0x39/0x58
amd_iommu_read_msi_from_ire+0x74/0xac
iommu_read_msi_from_ire+0x3c/0x3f
set_msi_affinity+0x161/0x1ae
enable_iommu+0x681/0x7d2
amd_iommu_init+0x50b/0x6b1
amd_iov_detect+0x69/0xad
iommu_setup+0x67/0x171
__start_xen+0x278c/0x2b9d

************************************
Panic on CPU 0:
Assertion ''(table != NULL) && (offset <
INTREMAP_ENTRIES)'' failed at
iommu_intr.c:64

*************************************

Investigation showing table is not null and offset = 0.

Suravee

On 3/26/2013 3:51 AM, Jan Beulich wrote:> 1: IOMMU: allow MSI message to IRTE propagation to fail
> 2: x86/MSI: cleanup to prepare for multi-vector MSI
> 3: AMD IOMMU: allocate IRTE entries instead of using a static mapping
> 4: AMD IOMMU: untie remap and vector maps
>
> Note that especially patch 3 is more RFC-like, as I continue to have
> no way to test this here.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
>

Suravee Suthikulpanit

2013-Mar-29 05:45 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

On 3/29/2013 12:18 AM, Suravee Suthikulpanit wrote:> Jan,
>
> I found an issue when booting the hypervisor after the patch with 
> assertion at the line 64 of iommu_intr.c (in function 
> get_intremap_entry).
>
> Stack dump:
> ...........
> get_intremap_entry+0x39/0x58
> amd_iommu_read_msi_from_ire+0x74/0xac
> iommu_read_msi_from_ire+0x3c/0x3f
> set_msi_affinity+0x161/0x1ae
> enable_iommu+0x681/0x7d2
> amd_iommu_init+0x50b/0x6b1
> amd_iov_detect+0x69/0xad
> iommu_setup+0x67/0x171
> __start_xen+0x278c/0x2b9d
>
> ************************************
> Panic on CPU 0:
> Assertion ''(table != NULL) && (offset <
INTREMAP_ENTRIES)'' failed at
> iommu_intr.c:64
>
> *************************************
>
> Investigation showing table is not null and offset = 0.
>Sorry for typo.  The investigation shows that the ASSERT inside the 
get_int_remap_entry failed when bdf = 0x2 (which is the IOMMU), the 
table is NULL.

Suravee> Suravee
>
>
> On 3/26/2013 3:51 AM, Jan Beulich wrote:
>> 1: IOMMU: allow MSI message to IRTE propagation to fail
>> 2: x86/MSI: cleanup to prepare for multi-vector MSI
>> 3: AMD IOMMU: allocate IRTE entries instead of using a static mapping
>> 4: AMD IOMMU: untie remap and vector maps
>>
>> Note that especially patch 3 is more RFC-like, as I continue to have
>> no way to test this here.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>>
>

Jan Beulich

2013-Apr-02 08:38 UTC

head link

[PATCH v2 3/4] AMD IOMMU: allocate IRTE entries instead of using a static mapping

AMD IOMMU: allocate IRTE entries instead of using a static mapping

For multi-vector MSI, where we surely don''t want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: {get,free}_intremap_entry()''s offset parameter is now an entry
    offset rather than a byte one. With the return type change of
    get_intremap_entry() also drop all the bogus casts its callers
    used.
---
One thing I surely need confirmation on is whether this

        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !        
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);

in update_intremap_entry_from_msi_msg() is valid. If it isn''t,
it''s not
clear to me how to properly set up things for affected devices, as we
would need an identical index allocated for two different remap table
instances (which can hardly be expected to work out well).

--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
          /* allocate per-device interrupt remapping table */
          if ( amd_iommu_perdev_intremap )
              ivrs_mappings[alias_id].intremap_table -               
amd_iommu_alloc_intremap_table();
+                amd_iommu_alloc_intremap_table(
+                    &ivrs_mappings[alias_id].intremap_inuse);
          else
          {
              if ( shared_intremap_table == NULL  )
-                 shared_intremap_table = amd_iommu_alloc_intremap_table();
+                 shared_intremap_table = amd_iommu_alloc_intremap_table(
+                     &shared_intremap_inuse);
              ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+             ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
          }
     }
     /* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
             if ( IO_APIC_ID(apic) != special->handle )
                 continue;
 
-            if ( ioapic_sbdf[special->handle].pin_setup )
+            if ( ioapic_sbdf[special->handle].pin_2_idx )
             {
                 if ( ioapic_sbdf[special->handle].bdf == bdf &&
                      ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
                 ioapic_sbdf[special->handle].bdf = bdf;
                 ioapic_sbdf[special->handle].seg = seg;
 
-                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
-                    unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+                    u16, nr_ioapic_entries[apic]);
                 if ( nr_ioapic_entries[apic] &&
-                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
                 {
                     printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                     return 0;
                 }
+                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                       nr_ioapic_entries[apic]);
             }
             break;
         }
@@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
     for ( apic = 0; !error && iommu_intremap && apic <
nr_ioapics; ++apic )
     {
         if ( !nr_ioapic_entries[apic] ||
-             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+             ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             continue;
 
         printk(XENLOG_ERR "IVHD Error: no information for IO-APIC
%#x\n",
@@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
             error = -ENXIO;
         else
         {
-            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
-                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
-            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+                u16, nr_ioapic_entries[apic]);
+            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
             {
                 printk(XENLOG_ERR "IVHD Error: Out of memory\n");
                 error = -ENOMEM;
             }
+            memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+                   nr_ioapic_entries[apic]);
         }
     }
 
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -30,6 +30,7 @@
 struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
 struct hpet_sbdf hpet_sbdf;
 void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
 static DEFINE_SPINLOCK(shared_intremap_lock);
 
 static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -45,30 +46,31 @@ static int get_intremap_requestor_id(int
     return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
 }
 
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
 {
-    int offset = 0;
-    offset = (dm << INT_REMAP_INDEX_DM_SHIFT) &
INT_REMAP_INDEX_DM_MASK;
-    offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & 
-        INT_REMAP_INDEX_VECTOR_MASK;
-    return offset;
+    unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+    unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+    if ( slot < INTREMAP_ENTRIES )
+        __set_bit(slot, inuse);
+    return slot;
 }
 
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
 {
-    u8 *table;
+    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
 
-    table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
     ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
 
-    return (u8*) (table + offset);
+    return table + offset;
 }
 
 static void free_intremap_entry(int seg, int bdf, int offset)
 {
-    u32* entry;
-    entry = (u32*)get_intremap_entry(seg, bdf, offset);
+    u32 *entry = get_intremap_entry(seg, bdf, offset);
+
     memset(entry, 0, sizeof(u32));
+    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
 }
 
 static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -97,18 +99,24 @@ static void update_intremap_entry(u32* e
                             INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
 }
 
-static void update_intremap_entry_from_ioapic(
+static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+    rte->vector = (u8)offset;
+    rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
-    const struct IO_APIC_route_entry *rte,
-    const struct IO_APIC_route_entry *old_rte)
+    struct IO_APIC_route_entry *rte,
+    u16 *index)
 {
     unsigned long flags;
     u32* entry;
     u8 delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -120,16 +128,20 @@ static void update_intremap_entry_from_i
 
     spin_lock_irqsave(lock, flags);
 
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( old_rte )
+    offset = *index;
+    if ( offset >= INTREMAP_ENTRIES )
     {
-        int old_offset = get_intremap_offset(old_rte->vector,
-                                             old_rte->delivery_mode);
-
-        if ( offset != old_offset )
-            free_intremap_entry(iommu->seg, bdf, old_offset);
+        offset = alloc_intremap_entry(iommu->seg, req_id);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            rte->mask = 1;
+            return -ENOSPC;
+        }
+        *index = offset;
     }
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
@@ -140,6 +152,10 @@ static void update_intremap_entry_from_i
         amd_iommu_flush_intremap(iommu, req_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    set_rte_index(rte, offset);
+
+    return 0;
 }
 
 int __init amd_iommu_setup_ioapic_remapping(void)
@@ -152,7 +168,7 @@ int __init amd_iommu_setup_ioapic_remapp
     u16 seg, bdf, req_id;
     struct amd_iommu *iommu;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -183,19 +199,24 @@ int __init amd_iommu_setup_ioapic_remapp
             dest = rte.dest.logical.logical_dest;
 
             spin_lock_irqsave(lock, flags);
-            offset = get_intremap_offset(vector, delivery_mode);
-            entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+            offset = alloc_intremap_entry(seg, req_id);
+            BUG_ON(offset >= INTREMAP_ENTRIES);
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+            entry = get_intremap_entry(iommu->seg, req_id, offset);
             update_intremap_entry(entry, vector,
                                   delivery_mode, dest_mode, dest);
             spin_unlock_irqrestore(lock, flags);
 
+            set_rte_index(&rte, offset);
+            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+            __ioapic_write_entry(apic, pin, 1, rte);
+
             if ( iommu->enabled )
             {
                 spin_lock_irqsave(&iommu->lock, flags);
                 amd_iommu_flush_intremap(iommu, req_id);
                 spin_unlock_irqrestore(&iommu->lock, flags);
             }
-            set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
         }
     }
     return 0;
@@ -208,7 +229,7 @@ void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf;
+    int saved_mask, seg, bdf, rc;
     struct amd_iommu *iommu;
 
     if ( !iommu_intremap )
@@ -246,7 +267,7 @@ void amd_iommu_ioapic_update_ire(
     }
 
     if ( new_rte.mask &&
-         !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+         ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
         __io_apic_write(apic, reg, value);
@@ -261,14 +282,19 @@ void amd_iommu_ioapic_update_ire(
     }
 
     /* Update interrupt remapping entry */
-    update_intremap_entry_from_ioapic(
-        bdf, iommu, &new_rte,
-        test_and_set_bit(pin,
-                         ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ?
&old_rte
-                                                                  : NULL);
+    rc = update_intremap_entry_from_ioapic(
+             bdf, iommu, &new_rte,
+             &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
 
-    /* Forward write access to IO-APIC RTE */
-    __io_apic_write(apic, reg, value);
+    __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+    if ( rc )
+    {
+        /* Keep the entry masked. */
+        printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed
(%d)\n",
+               IO_APIC_ID(apic), pin, rc);
+        return;
+    }
 
     /* For lower bits access, return directly to avoid double writes */
     if ( reg == rte_lo )
@@ -282,16 +308,41 @@ void amd_iommu_ioapic_update_ire(
     }
 }
 
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg)
+{
+    unsigned int val = __io_apic_read(apic, reg);
+
+    if ( !(reg & 1) )
+    {
+        unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+        u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+        u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+        u16 req_id = get_intremap_requestor_id(seg, bdf);
+        const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+        val &= ~(INTREMAP_ENTRIES - 1);
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_INTTYPE_MASK,
+                                      INT_REMAP_ENTRY_INTTYPE_SHIFT) <<
8;
+        val |= get_field_from_reg_u32(*entry,
+                                      INT_REMAP_ENTRY_VECTOR_MASK,
+                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
+    }
+
+    return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
     struct amd_iommu *iommu, u16 bdf,
-    int *remap_index, const struct msi_msg *msg)
+    int *remap_index, const struct msi_msg *msg, u32 *data)
 {
     unsigned long flags;
     u32* entry;
     u16 req_id, alias_id;
     u8 delivery_mode, dest, vector, dest_mode;
     spinlock_t *lock;
-    int offset;
+    unsigned int offset;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -302,15 +353,6 @@ static void update_intremap_entry_from_m
         spin_lock_irqsave(lock, flags);
         free_intremap_entry(iommu->seg, req_id, *remap_index);
         spin_unlock_irqrestore(lock, flags);
-
-        if ( ( req_id != alias_id ) &&
-             get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL
)
-        {
-            lock = get_intremap_lock(iommu->seg, alias_id);
-            spin_lock_irqsave(lock, flags);
-            free_intremap_entry(iommu->seg, alias_id, *remap_index);
-            spin_unlock_irqrestore(lock, flags);
-        }
         goto done;
     }
 
@@ -321,16 +363,24 @@ static void update_intremap_entry_from_m
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) &
0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
MSI_DATA_VECTOR_MASK;
     dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
-    offset = get_intremap_offset(vector, delivery_mode);
-    if ( *remap_index < 0)
+    offset = *remap_index;
+    if ( offset >= INTREMAP_ENTRIES )
+    {
+        offset = alloc_intremap_entry(iommu->seg, bdf);
+        if ( offset >= INTREMAP_ENTRIES )
+        {
+            spin_unlock_irqrestore(lock, flags);
+            return -ENOSPC;
+        }
         *remap_index = offset;
-    else
-        BUG_ON(*remap_index != offset);
+    }
 
-    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+    entry = get_intremap_entry(iommu->seg, req_id, offset);
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
     spin_unlock_irqrestore(lock, flags);
 
+    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
     /*
      * In some special cases, a pci-e device(e.g SATA controller in IDE mode)
      * will use alias id to index interrupt remapping table.
@@ -342,10 +392,8 @@ static void update_intremap_entry_from_m
     if ( ( req_id != alias_id ) &&
          get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
     {
-        spin_lock_irqsave(lock, flags);
-        entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
-        update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
-        spin_unlock_irqrestore(lock, flags);
+        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !+      
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
     }
 
 done:
@@ -357,14 +405,17 @@ done:
             amd_iommu_flush_intremap(iommu, alias_id);
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
+
+    return 0;
 }
 
 int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct pci_dev *pdev = msi_desc->dev;
-    int bdf, seg;
+    int bdf, seg, rc;
     struct amd_iommu *iommu;
+    u32 data;
 
     bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
     seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -376,11 +427,12 @@ int amd_iommu_msi_msg_update_ire(
         return -EINVAL;
     }
 
-    if ( msi_desc->remap_index >= 0 )
+    if ( msi_desc->remap_index >= 0 && !msg )
     {
         do {
             update_intremap_entry_from_msi_msg(iommu, bdf,
-                                               &msi_desc->remap_index,
NULL);
+                                               &msi_desc->remap_index,
+                                               NULL, NULL);
             if ( !pdev || !pdev->phantom_stride )
                 break;
             bdf += pdev->phantom_stride;
@@ -395,19 +447,35 @@ int amd_iommu_msi_msg_update_ire(
         return 0;
 
     do {
-        update_intremap_entry_from_msi_msg(iommu, bdf,
&msi_desc->remap_index,
-                                           msg);
-        if ( !pdev || !pdev->phantom_stride )
+        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+                                                &msi_desc->remap_index,
+                                                msg, &data);
+        if ( rc || !pdev || !pdev->phantom_stride )
             break;
         bdf += pdev->phantom_stride;
     } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
 
-    return 0;
+    msg->data = data;
+    return rc;
 }
 
 void amd_iommu_read_msi_from_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
+    unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+    const struct pci_dev *pdev = msi_desc->dev;
+    u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+    u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+    u16 req_id = get_dma_requestor_id(seg, bdf);
+    const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+    msg->data &= ~(INTREMAP_ENTRIES - 1);
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_INTTYPE_MASK,
+                                        INT_REMAP_ENTRY_INTTYPE_SHIFT) <<
8;
+    msg->data |= get_field_from_reg_u32(*entry,
+                                        INT_REMAP_ENTRY_VECTOR_MASK,
+                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -424,12 +492,14 @@ int __init amd_iommu_free_intremap_table
     return 0;
 }
 
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
 {
     void *tb;
     tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
     BUG_ON(tb == NULL);
     memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+    *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+    BUG_ON(*inuse_map == NULL);
     return tb;
 }
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
     .get_device_group_id = amd_iommu_group_id,
     .update_ire_from_apic = amd_iommu_ioapic_update_ire,
     .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
-    .read_apic_from_ire = __io_apic_read,
+    .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
     .read_msi_from_ire = amd_iommu_read_msi_from_ire,
     .setup_hpet_msi = amd_setup_hpet_msi,
     .suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
 
     /* per device interrupt remapping table */
     void *intremap_table;
+    unsigned long *intremap_inuse;
     spinlock_t intremap_lock;
 
     /* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -458,10 +458,6 @@
 #define MAX_AMD_IOMMUS                  32
 
 /* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK         0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT        10
-#define INT_REMAP_INDEX_VECTOR_MASK     0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT    2
 #define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
 #define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
 #define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
 
 /* interrupt remapping */
 int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
 int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
 void amd_iommu_ioapic_update_ire(
     unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+    unsigned int apic, unsigned int reg);
 int amd_iommu_msi_msg_update_ire(
     struct msi_desc *msi_desc, struct msi_msg *msg);
 void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
 
 extern struct ioapic_sbdf {
     u16 bdf, seg;
-    unsigned long *pin_setup;
+    u16 *pin_2_idx;
 } ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
 
 extern struct hpet_sbdf {
     u16 bdf, seg, id;
     struct amd_iommu *iommu;
 } hpet_sbdf;
 
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
 /* power management support */
 void amd_iommu_resume(void);
 void amd_iommu_suspend(void);




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2013-Apr-02 08:39 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

>>> On 29.03.13 at 06:45, Suravee Suthikulpanit
<suravee.suthikulpanit@amd.com>
wrote:> On 3/29/2013 12:18 AM, Suravee Suthikulpanit wrote:
>> Jan,
>>
>> I found an issue when booting the hypervisor after the patch with 
>> assertion at the line 64 of iommu_intr.c (in function 
>> get_intremap_entry).
>>
>> Stack dump:
>> ...........
>> get_intremap_entry+0x39/0x58
>> amd_iommu_read_msi_from_ire+0x74/0xac
>> iommu_read_msi_from_ire+0x3c/0x3f
>> set_msi_affinity+0x161/0x1ae
>> enable_iommu+0x681/0x7d2
>> amd_iommu_init+0x50b/0x6b1
>> amd_iov_detect+0x69/0xad
>> iommu_setup+0x67/0x171
>> __start_xen+0x278c/0x2b9d
>>
>> ************************************
>> Panic on CPU 0:
>> Assertion ''(table != NULL) && (offset <
INTREMAP_ENTRIES)'' failed at
>> iommu_intr.c:64
>>
>> *************************************
>>
>> Investigation showing table is not null and offset = 0.
>>
> Sorry for typo.  The investigation shows that the ASSERT inside the 
> get_int_remap_entry failed when bdf = 0x2 (which is the IOMMU), the 
> table is NULL.
That''s odd - even before the patch, exactly the same is being done
in update_intremap_entry_from_msi_msg(), yet the assertion didn''t
trigger. Could you provide a full log with "iommu=debug" in place?
Perhaps with v2 of patch 3 in this series that I just sent as reply to
that other mail, as I spotted another bug while looking into this - the
"offset" parameter of {get,free}_intremap_entry() changed its
meaning from being a byte offset to being an entry one?

Jan

Jan Beulich

2013-Apr-10 13:55 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

>>> On 02.04.13 at 10:39, "Jan Beulich"
<JBeulich@suse.com> wrote:
>>>> On 29.03.13 at 06:45, Suravee Suthikulpanit
<suravee.suthikulpanit@amd.com>
> wrote:
>> On 3/29/2013 12:18 AM, Suravee Suthikulpanit wrote:
>>> Jan,
>>>
>>> I found an issue when booting the hypervisor after the patch with 
>>> assertion at the line 64 of iommu_intr.c (in function 
>>> get_intremap_entry).
>>>
>>> Stack dump:
>>> ...........
>>> get_intremap_entry+0x39/0x58
>>> amd_iommu_read_msi_from_ire+0x74/0xac
>>> iommu_read_msi_from_ire+0x3c/0x3f
>>> set_msi_affinity+0x161/0x1ae
>>> enable_iommu+0x681/0x7d2
>>> amd_iommu_init+0x50b/0x6b1
>>> amd_iov_detect+0x69/0xad
>>> iommu_setup+0x67/0x171
>>> __start_xen+0x278c/0x2b9d
>>>
>>> ************************************
>>> Panic on CPU 0:
>>> Assertion ''(table != NULL) && (offset <
INTREMAP_ENTRIES)'' failed at
>>> iommu_intr.c:64
>>>
>>> *************************************
>>>
>>> Investigation showing table is not null and offset = 0.
>>>
>> Sorry for typo.  The investigation shows that the ASSERT inside the 
>> get_int_remap_entry failed when bdf = 0x2 (which is the IOMMU), the 
>> table is NULL.
> 
> That''s odd - even before the patch, exactly the same is being done
> in update_intremap_entry_from_msi_msg(), yet the assertion didn''t
> trigger. Could you provide a full log with "iommu=debug" in
place?
> Perhaps with v2 of patch 3 in this series that I just sent as reply to
> that other mail, as I spotted another bug while looking into this - the
> "offset" parameter of {get,free}_intremap_entry() changed its
> meaning from being a byte offset to being an entry one?
Suravee,

did you have a chance to retry with the v2 patch indicated above,
or to gain understanding on the points mentioned above. I''m
afraid I won''t be able to make any progress (or propose any
fixes) without your help here, and we''d really like to have multi-
vector MSI support ready for 4.3...

Thanks, Jan

Suravee Suthikulanit

2013-Apr-10 14:38 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

On 4/10/2013 8:55 AM, Jan Beulich wrote:> Suravee,
>
> did you have a chance to retry with the v2 patch indicated above,
> or to gain understanding on the points mentioned above. I''m
> afraid I won''t be able to make any progress (or propose any
> fixes) without your help here, and we''d really like to have multi-
> vector MSI support ready for 4.3...
>
> Thanks, JanSorry I didn''t see the patch.  Let me try that and get back to you
today.

Suravee

Jan Beulich

2013-Apr-10 14:46 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

>>> On 10.04.13 at 16:38, Suravee Suthikulanit
<suravee.suthikulpanit@amd.com>
wrote:> On 4/10/2013 8:55 AM, Jan Beulich wrote:
>> Suravee,
>>
>> did you have a chance to retry with the v2 patch indicated above,
>> or to gain understanding on the points mentioned above. I''m
>> afraid I won''t be able to make any progress (or propose any
>> fixes) without your help here, and we''d really like to have
multi-
>> vector MSI support ready for 4.3...
>>
> Sorry I didn''t see the patch.  Let me try that and get back to you
today.
And irrespective of the patch (which fixes another issue, and
I suppose that''s not connected to the problem you saw), did
you find an explanation why the IOMMU lookup in the MSI
message read path would fail, when the same lookup in the MSI
message write path obviously succeeds?

Jan

Suravee Suthikulpanit

2013-Apr-11 00:34 UTC

head link

Re: [PATCH v2 3/4] AMD IOMMU: allocate IRTE entries instead of using a static mapping

Jan,
This patch is still having the same issue.  The table is NULL. I''m
still
trying to root cause it.

Suravee

On 4/2/2013 3:38 AM, Jan Beulich wrote:> AMD IOMMU: allocate IRTE entries instead of using a static mapping
>
> For multi-vector MSI, where we surely don''t want to allocate
> contiguous vectors and be able to set affinities of the individual
> vectors separately, we need to drop the use of the tuple of vector and
> delivery mode to determine the IRTE to use, and instead allocate IRTEs
> (which imo should have been done from the beginning).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: {get,free}_intremap_entry()''s offset parameter is now an entry
>      offset rather than a byte one. With the return type change of
>      get_intremap_entry() also drop all the bogus casts its callers
>      used.
> ---
> One thing I surely need confirmation on is whether this
>
>          BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table
!>                
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
>
> in update_intremap_entry_from_msi_msg() is valid. If it isn''t,
it''s not
> clear to me how to properly set up things for affected devices, as we
> would need an identical index allocated for two different remap table
> instances (which can hardly be expected to work out well).
>
> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
> @@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
>            /* allocate per-device interrupt remapping table */
>            if ( amd_iommu_perdev_intremap )
>                ivrs_mappings[alias_id].intremap_table > -               
amd_iommu_alloc_intremap_table();
> +                amd_iommu_alloc_intremap_table(
> +                    &ivrs_mappings[alias_id].intremap_inuse);
>            else
>            {
>                if ( shared_intremap_table == NULL  )
> -                 shared_intremap_table = amd_iommu_alloc_intremap_table();
> +                 shared_intremap_table = amd_iommu_alloc_intremap_table(
> +                     &shared_intremap_inuse);
>                ivrs_mappings[alias_id].intremap_table =
shared_intremap_table;
> +             ivrs_mappings[alias_id].intremap_inuse =
shared_intremap_inuse;
>            }
>       }
>       /* assgin iommu hardware */
> @@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
>               if ( IO_APIC_ID(apic) != special->handle )
>                   continue;
>   
> -            if ( ioapic_sbdf[special->handle].pin_setup )
> +            if ( ioapic_sbdf[special->handle].pin_2_idx )
>               {
>                   if ( ioapic_sbdf[special->handle].bdf == bdf
&&
>                        ioapic_sbdf[special->handle].seg == seg )
> @@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
>                   ioapic_sbdf[special->handle].bdf = bdf;
>                   ioapic_sbdf[special->handle].seg = seg;
>   
> -                ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
> -                    unsigned long,
BITS_TO_LONGS(nr_ioapic_entries[apic]));
> +                ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
> +                    u16, nr_ioapic_entries[apic]);
>                   if ( nr_ioapic_entries[apic] &&
> -                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +                     !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>                   {
>                       printk(XENLOG_ERR "IVHD Error: Out of
memory\n");
>                       return 0;
>                   }
> +                memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                       nr_ioapic_entries[apic]);
>               }
>               break;
>           }
> @@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
>       for ( apic = 0; !error && iommu_intremap && apic <
nr_ioapics; ++apic )
>       {
>           if ( !nr_ioapic_entries[apic] ||
> -             ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +             ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>               continue;
>   
>           printk(XENLOG_ERR "IVHD Error: no information for IO-APIC
%#x\n",
> @@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
>               error = -ENXIO;
>           else
>           {
> -            ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
> -                unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
> -            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
> +                u16, nr_ioapic_entries[apic]);
> +            if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
>               {
>                   printk(XENLOG_ERR "IVHD Error: Out of
memory\n");
>                   error = -ENOMEM;
>               }
> +            memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
> +                   nr_ioapic_entries[apic]);
>           }
>       }
>   
> --- a/xen/drivers/passthrough/amd/iommu_intr.c
> +++ b/xen/drivers/passthrough/amd/iommu_intr.c
> @@ -30,6 +30,7 @@
>   struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
>   struct hpet_sbdf hpet_sbdf;
>   void *shared_intremap_table;
> +unsigned long *shared_intremap_inuse;
>   static DEFINE_SPINLOCK(shared_intremap_lock);
>   
>   static spinlock_t* get_intremap_lock(int seg, int req_id)
> @@ -45,30 +46,31 @@ static int get_intremap_requestor_id(int
>       return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
>   }
>   
> -static int get_intremap_offset(u8 vector, u8 dm)
> +static unsigned int alloc_intremap_entry(int seg, int bdf)
>   {
> -    int offset = 0;
> -    offset = (dm << INT_REMAP_INDEX_DM_SHIFT) &
INT_REMAP_INDEX_DM_MASK;
> -    offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) &
> -        INT_REMAP_INDEX_VECTOR_MASK;
> -    return offset;
> +    unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
> +    unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
> +
> +    if ( slot < INTREMAP_ENTRIES )
> +        __set_bit(slot, inuse);
> +    return slot;
>   }
>   
> -static u8 *get_intremap_entry(int seg, int bdf, int offset)
> +static u32 *get_intremap_entry(int seg, int bdf, int offset)
>   {
> -    u8 *table;
> +    u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
>   
> -    table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
>       ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
>   
> -    return (u8*) (table + offset);
> +    return table + offset;
>   }
>   
>   static void free_intremap_entry(int seg, int bdf, int offset)
>   {
> -    u32* entry;
> -    entry = (u32*)get_intremap_entry(seg, bdf, offset);
> +    u32 *entry = get_intremap_entry(seg, bdf, offset);
> +
>       memset(entry, 0, sizeof(u32));
> +    __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
>   }
>   
>   static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
> @@ -97,18 +99,24 @@ static void update_intremap_entry(u32* e
>                               INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
>   }
>   
> -static void update_intremap_entry_from_ioapic(
> +static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
> +{
> +    rte->vector = (u8)offset;
> +    rte->delivery_mode = offset >> 8;
> +}
> +
> +static int update_intremap_entry_from_ioapic(
>       int bdf,
>       struct amd_iommu *iommu,
> -    const struct IO_APIC_route_entry *rte,
> -    const struct IO_APIC_route_entry *old_rte)
> +    struct IO_APIC_route_entry *rte,
> +    u16 *index)
>   {
>       unsigned long flags;
>       u32* entry;
>       u8 delivery_mode, dest, vector, dest_mode;
>       int req_id;
>       spinlock_t *lock;
> -    int offset;
> +    unsigned int offset;
>   
>       req_id = get_intremap_requestor_id(iommu->seg, bdf);
>       lock = get_intremap_lock(iommu->seg, req_id);
> @@ -120,16 +128,20 @@ static void update_intremap_entry_from_i
>   
>       spin_lock_irqsave(lock, flags);
>   
> -    offset = get_intremap_offset(vector, delivery_mode);
> -    if ( old_rte )
> +    offset = *index;
> +    if ( offset >= INTREMAP_ENTRIES )
>       {
> -        int old_offset = get_intremap_offset(old_rte->vector,
> -                                             old_rte->delivery_mode);
> -
> -        if ( offset != old_offset )
> -            free_intremap_entry(iommu->seg, bdf, old_offset);
> +        offset = alloc_intremap_entry(iommu->seg, req_id);
> +        if ( offset >= INTREMAP_ENTRIES )
> +        {
> +            spin_unlock_irqrestore(lock, flags);
> +            rte->mask = 1;
> +            return -ENOSPC;
> +        }
> +        *index = offset;
>       }
> -    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
> +
> +    entry = get_intremap_entry(iommu->seg, req_id, offset);
>       update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
>   
>       spin_unlock_irqrestore(lock, flags);
> @@ -140,6 +152,10 @@ static void update_intremap_entry_from_i
>           amd_iommu_flush_intremap(iommu, req_id);
>           spin_unlock_irqrestore(&iommu->lock, flags);
>       }
> +
> +    set_rte_index(rte, offset);
> +
> +    return 0;
>   }
>   
>   int __init amd_iommu_setup_ioapic_remapping(void)
> @@ -152,7 +168,7 @@ int __init amd_iommu_setup_ioapic_remapp
>       u16 seg, bdf, req_id;
>       struct amd_iommu *iommu;
>       spinlock_t *lock;
> -    int offset;
> +    unsigned int offset;
>   
>       /* Read ioapic entries and update interrupt remapping table
accordingly */
>       for ( apic = 0; apic < nr_ioapics; apic++ )
> @@ -183,19 +199,24 @@ int __init amd_iommu_setup_ioapic_remapp
>               dest = rte.dest.logical.logical_dest;
>   
>               spin_lock_irqsave(lock, flags);
> -            offset = get_intremap_offset(vector, delivery_mode);
> -            entry = (u32*)get_intremap_entry(iommu->seg, req_id,
offset);
> +            offset = alloc_intremap_entry(seg, req_id);
> +            BUG_ON(offset >= INTREMAP_ENTRIES);
> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
> +            entry = get_intremap_entry(iommu->seg, req_id, offset);
>               update_intremap_entry(entry, vector,
>                                     delivery_mode, dest_mode, dest);
>               spin_unlock_irqrestore(lock, flags);
>   
> +            set_rte_index(&rte, offset);
> +            ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
> +            __ioapic_write_entry(apic, pin, 1, rte);
> +
>               if ( iommu->enabled )
>               {
>                   spin_lock_irqsave(&iommu->lock, flags);
>                   amd_iommu_flush_intremap(iommu, req_id);
>                   spin_unlock_irqrestore(&iommu->lock, flags);
>               }
> -            set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
>           }
>       }
>       return 0;
> @@ -208,7 +229,7 @@ void amd_iommu_ioapic_update_ire(
>       struct IO_APIC_route_entry new_rte = { 0 };
>       unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
>       unsigned int pin = (reg - 0x10) / 2;
> -    int saved_mask, seg, bdf;
> +    int saved_mask, seg, bdf, rc;
>       struct amd_iommu *iommu;
>   
>       if ( !iommu_intremap )
> @@ -246,7 +267,7 @@ void amd_iommu_ioapic_update_ire(
>       }
>   
>       if ( new_rte.mask &&
> -         !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
> +         ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >=
INTREMAP_ENTRIES )
>       {
>           ASSERT(saved_mask);
>           __io_apic_write(apic, reg, value);
> @@ -261,14 +282,19 @@ void amd_iommu_ioapic_update_ire(
>       }
>   
>       /* Update interrupt remapping entry */
> -    update_intremap_entry_from_ioapic(
> -        bdf, iommu, &new_rte,
> -        test_and_set_bit(pin,
> -                         ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ?
&old_rte
> -                                                                  : NULL);
> +    rc = update_intremap_entry_from_ioapic(
> +             bdf, iommu, &new_rte,
> +             &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
>   
> -    /* Forward write access to IO-APIC RTE */
> -    __io_apic_write(apic, reg, value);
> +    __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
> +
> +    if ( rc )
> +    {
> +        /* Keep the entry masked. */
> +        printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed
(%d)\n",
> +               IO_APIC_ID(apic), pin, rc);
> +        return;
> +    }
>   
>       /* For lower bits access, return directly to avoid double writes */
>       if ( reg == rte_lo )
> @@ -282,16 +308,41 @@ void amd_iommu_ioapic_update_ire(
>       }
>   }
>   
> -static void update_intremap_entry_from_msi_msg(
> +unsigned int amd_iommu_read_ioapic_from_ire(
> +    unsigned int apic, unsigned int reg)
> +{
> +    unsigned int val = __io_apic_read(apic, reg);
> +
> +    if ( !(reg & 1) )
> +    {
> +        unsigned int offset = val & (INTREMAP_ENTRIES - 1);
> +        u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
> +        u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
> +        u16 req_id = get_intremap_requestor_id(seg, bdf);
> +        const u32 *entry = get_intremap_entry(seg, req_id, offset);
> +
> +        val &= ~(INTREMAP_ENTRIES - 1);
> +        val |= get_field_from_reg_u32(*entry,
> +                                      INT_REMAP_ENTRY_INTTYPE_MASK,
> +                                      INT_REMAP_ENTRY_INTTYPE_SHIFT)
<< 8;
> +        val |= get_field_from_reg_u32(*entry,
> +                                      INT_REMAP_ENTRY_VECTOR_MASK,
> +                                      INT_REMAP_ENTRY_VECTOR_SHIFT);
> +    }
> +
> +    return val;
> +}
> +
> +static int update_intremap_entry_from_msi_msg(
>       struct amd_iommu *iommu, u16 bdf,
> -    int *remap_index, const struct msi_msg *msg)
> +    int *remap_index, const struct msi_msg *msg, u32 *data)
>   {
>       unsigned long flags;
>       u32* entry;
>       u16 req_id, alias_id;
>       u8 delivery_mode, dest, vector, dest_mode;
>       spinlock_t *lock;
> -    int offset;
> +    unsigned int offset;
>   
>       req_id = get_dma_requestor_id(iommu->seg, bdf);
>       alias_id = get_intremap_requestor_id(iommu->seg, bdf);
> @@ -302,15 +353,6 @@ static void update_intremap_entry_from_m
>           spin_lock_irqsave(lock, flags);
>           free_intremap_entry(iommu->seg, req_id, *remap_index);
>           spin_unlock_irqrestore(lock, flags);
> -
> -        if ( ( req_id != alias_id ) &&
> -             get_ivrs_mappings(iommu->seg)[alias_id].intremap_table !=
NULL )
> -        {
> -            lock = get_intremap_lock(iommu->seg, alias_id);
> -            spin_lock_irqsave(lock, flags);
> -            free_intremap_entry(iommu->seg, alias_id, *remap_index);
> -            spin_unlock_irqrestore(lock, flags);
> -        }
>           goto done;
>       }
>   
> @@ -321,16 +363,24 @@ static void update_intremap_entry_from_m
>       delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT)
& 0x1;
>       vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
MSI_DATA_VECTOR_MASK;
>       dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) &
0xff;
> -    offset = get_intremap_offset(vector, delivery_mode);
> -    if ( *remap_index < 0)
> +    offset = *remap_index;
> +    if ( offset >= INTREMAP_ENTRIES )
> +    {
> +        offset = alloc_intremap_entry(iommu->seg, bdf);
> +        if ( offset >= INTREMAP_ENTRIES )
> +        {
> +            spin_unlock_irqrestore(lock, flags);
> +            return -ENOSPC;
> +        }
>           *remap_index = offset;
> -    else
> -        BUG_ON(*remap_index != offset);
> +    }
>   
> -    entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
> +    entry = get_intremap_entry(iommu->seg, req_id, offset);
>       update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
>       spin_unlock_irqrestore(lock, flags);
>   
> +    *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
> +
>       /*
>        * In some special cases, a pci-e device(e.g SATA controller in IDE
mode)
>        * will use alias id to index interrupt remapping table.
> @@ -342,10 +392,8 @@ static void update_intremap_entry_from_m
>       if ( ( req_id != alias_id ) &&
>            get_ivrs_mappings(iommu->seg)[alias_id].intremap_table !=
NULL )
>       {
> -        spin_lock_irqsave(lock, flags);
> -        entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
> -        update_intremap_entry(entry, vector, delivery_mode, dest_mode,
dest);
> -        spin_unlock_irqrestore(lock, flags);
> +        BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table
!> +              
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
>       }
>   
>   done:
> @@ -357,14 +405,17 @@ done:
>               amd_iommu_flush_intremap(iommu, alias_id);
>           spin_unlock_irqrestore(&iommu->lock, flags);
>       }
> +
> +    return 0;
>   }
>   
>   int amd_iommu_msi_msg_update_ire(
>       struct msi_desc *msi_desc, struct msi_msg *msg)
>   {
>       struct pci_dev *pdev = msi_desc->dev;
> -    int bdf, seg;
> +    int bdf, seg, rc;
>       struct amd_iommu *iommu;
> +    u32 data;
>   
>       bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
>       seg = pdev ? pdev->seg : hpet_sbdf.seg;
> @@ -376,11 +427,12 @@ int amd_iommu_msi_msg_update_ire(
>           return -EINVAL;
>       }
>   
> -    if ( msi_desc->remap_index >= 0 )
> +    if ( msi_desc->remap_index >= 0 && !msg )
>       {
>           do {
>               update_intremap_entry_from_msi_msg(iommu, bdf,
> -                                              
&msi_desc->remap_index, NULL);
> +                                              
&msi_desc->remap_index,
> +                                               NULL, NULL);
>               if ( !pdev || !pdev->phantom_stride )
>                   break;
>               bdf += pdev->phantom_stride;
> @@ -395,19 +447,35 @@ int amd_iommu_msi_msg_update_ire(
>           return 0;
>   
>       do {
> -        update_intremap_entry_from_msi_msg(iommu, bdf,
&msi_desc->remap_index,
> -                                           msg);
> -        if ( !pdev || !pdev->phantom_stride )
> +        rc = update_intremap_entry_from_msi_msg(iommu, bdf,
> +                                               
&msi_desc->remap_index,
> +                                                msg, &data);
> +        if ( rc || !pdev || !pdev->phantom_stride )
>               break;
>           bdf += pdev->phantom_stride;
>       } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
>   
> -    return 0;
> +    msg->data = data;
> +    return rc;
>   }
>   
>   void amd_iommu_read_msi_from_ire(
>       struct msi_desc *msi_desc, struct msi_msg *msg)
>   {
> +    unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
> +    const struct pci_dev *pdev = msi_desc->dev;
> +    u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) :
hpet_sbdf.bdf;
> +    u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
> +    u16 req_id = get_dma_requestor_id(seg, bdf);
> +    const u32 *entry = get_intremap_entry(seg, req_id, offset);
> +
> +    msg->data &= ~(INTREMAP_ENTRIES - 1);
> +    msg->data |= get_field_from_reg_u32(*entry,
> +                                        INT_REMAP_ENTRY_INTTYPE_MASK,
> +                                        INT_REMAP_ENTRY_INTTYPE_SHIFT)
<< 8;
> +    msg->data |= get_field_from_reg_u32(*entry,
> +                                        INT_REMAP_ENTRY_VECTOR_MASK,
> +                                        INT_REMAP_ENTRY_VECTOR_SHIFT);
>   }
>   
>   int __init amd_iommu_free_intremap_table(
> @@ -424,12 +492,14 @@ int __init amd_iommu_free_intremap_table
>       return 0;
>   }
>   
> -void* __init amd_iommu_alloc_intremap_table(void)
> +void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
>   {
>       void *tb;
>       tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
>       BUG_ON(tb == NULL);
>       memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
> +    *inuse_map = xzalloc_array(unsigned long,
BITS_TO_LONGS(INTREMAP_ENTRIES));
> +    BUG_ON(*inuse_map == NULL);
>       return tb;
>   }
>   
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
>       .get_device_group_id = amd_iommu_group_id,
>       .update_ire_from_apic = amd_iommu_ioapic_update_ire,
>       .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
> -    .read_apic_from_ire = __io_apic_read,
> +    .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
>       .read_msi_from_ire = amd_iommu_read_msi_from_ire,
>       .setup_hpet_msi = amd_setup_hpet_msi,
>       .suspend = amd_iommu_suspend,
> --- a/xen/include/asm-x86/amd-iommu.h
> +++ b/xen/include/asm-x86/amd-iommu.h
> @@ -119,6 +119,7 @@ struct ivrs_mappings {
>   
>       /* per device interrupt remapping table */
>       void *intremap_table;
> +    unsigned long *intremap_inuse;
>       spinlock_t intremap_lock;
>   
>       /* ivhd device data settings */
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
> @@ -458,10 +458,6 @@
>   #define MAX_AMD_IOMMUS                  32
>   
>   /* interrupt remapping table */
> -#define INT_REMAP_INDEX_DM_MASK         0x1C00
> -#define INT_REMAP_INDEX_DM_SHIFT        10
> -#define INT_REMAP_INDEX_VECTOR_MASK     0x3FC
> -#define INT_REMAP_INDEX_VECTOR_SHIFT    2
>   #define INT_REMAP_ENTRY_REMAPEN_MASK    0x00000001
>   #define INT_REMAP_ENTRY_REMAPEN_SHIFT   0
>   #define INT_REMAP_ENTRY_SUPIOPF_MASK    0x00000002
> --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
> @@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
>   
>   /* interrupt remapping */
>   int amd_iommu_setup_ioapic_remapping(void);
> -void *amd_iommu_alloc_intremap_table(void);
> +void *amd_iommu_alloc_intremap_table(unsigned long **);
>   int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
>   void amd_iommu_ioapic_update_ire(
>       unsigned int apic, unsigned int reg, unsigned int value);
> +unsigned int amd_iommu_read_ioapic_from_ire(
> +    unsigned int apic, unsigned int reg);
>   int amd_iommu_msi_msg_update_ire(
>       struct msi_desc *msi_desc, struct msi_msg *msg);
>   void amd_iommu_read_msi_from_ire(
> @@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
>   
>   extern struct ioapic_sbdf {
>       u16 bdf, seg;
> -    unsigned long *pin_setup;
> +    u16 *pin_2_idx;
>   } ioapic_sbdf[MAX_IO_APICS];
> -extern void *shared_intremap_table;
>   
>   extern struct hpet_sbdf {
>       u16 bdf, seg, id;
>       struct amd_iommu *iommu;
>   } hpet_sbdf;
>   
> +extern void *shared_intremap_table;
> +extern unsigned long *shared_intremap_inuse;
> +
>   /* power management support */
>   void amd_iommu_resume(void);
>   void amd_iommu_suspend(void);
>
>

Suravee Suthikulpanit

2013-Apr-11 01:51 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

On 4/10/2013 9:46 AM, Jan Beulich wrote:>>>> On 10.04.13 at 16:38, Suravee Suthikulanit
<suravee.suthikulpanit@amd.com>
> wrote:
>> On 4/10/2013 8:55 AM, Jan Beulich wrote:
>>> Suravee,
>>>
>>> did you have a chance to retry with the v2 patch indicated above,
>>> or to gain understanding on the points mentioned above.
I''m
>>> afraid I won''t be able to make any progress (or propose
any
>>> fixes) without your help here, and we''d really like to
have multi-
>>> vector MSI support ready for 4.3...
>>>
>> Sorry I didn''t see the patch.  Let me try that and get back to
you today.
> And irrespective of the patch (which fixes another issue, and
> I suppose that''s not connected to the problem you saw), did
> you find an explanation why the IOMMU lookup in the MSI
> message read path would fail, when the same lookup in the MSI
> message write path obviously succeeds?
>
> Jan
>Jan,

- From drivers/passthrough/amd/iommu_init.c: enable_iommu(), the 
enabling code tries to set MSI affinity for IOMMU.
- From xen/arch/x86/msi.c: set_msi_affinity(), the code tries to 
"read_msi_msg()" which eventually calls
"amd_iommu_read_msi_from_ire()".
- Please note that the "amd_iommu_read_msi_from_ire()" was originally 
empty prior the patch.
- From the patch, the new code try to call "get_intremap_entry()",
which
is the function that was asserting due to table is NULL.

Here, the MSI read message was sent to read IOMMU registers.  Since the 
message is targeting for IOMMU itself (bdf 0x2 in this case), it should 
not be remapped.  That is why I believe the interrupt remapping table is 
empty.

Let me know what you think.

Suravee
-

Jan Beulich

2013-Apr-11 07:13 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

>>> On 11.04.13 at 03:51, Suravee Suthikulpanit
<suravee.suthikulpanit@amd.com> wrote:
> - From drivers/passthrough/amd/iommu_init.c: enable_iommu(), the 
> enabling code tries to set MSI affinity for IOMMU.
> - From xen/arch/x86/msi.c: set_msi_affinity(), the code tries to 
> "read_msi_msg()" which eventually calls
"amd_iommu_read_msi_from_ire()".
> - Please note that the "amd_iommu_read_msi_from_ire()" was
originally
> empty prior the patch.
> - From the patch, the new code try to call
"get_intremap_entry()", which
> is the function that was asserting due to table is NULL.
But even before the patch update_intremap_entry_from_msi_msg()
also necessarily called get_intremap_entry() for the IOMMU device
itself. And the respective write_msi_msg() call follows right after the
read_msi_msg() one, i.e. there''s no room for other setup to be done
by that point. Plus the assertion was there already.
> Here, the MSI read message was sent to read IOMMU registers.  Since the 
> message is targeting for IOMMU itself (bdf 0x2 in this case), it should 
> not be remapped.  That is why I believe the interrupt remapping table is 
> empty.
Whether or not the MSI for the IOMMU device itself needs to be
remapped is an independent question, but judging from the
current code as well as from the specification not saying otherwise
it needs to be. Confirmation on this would surely be appreciated
(because otherwise (a) I''d need to understand how this special
case works with the code before the patch and (b) new special
casing may need to be added).

In any case - I think I said this before already - seeing a full log up
to the crash with "iommu=debug" in place might help. After all the
ACPI tables ought to mention the IOMMU PCI device in some way,
and hence there ought to be an IVRS mapping as well as an
associated intremap table. Or else the only way this can work
currently would be by way of find_iommu_for_device() returning
NULL for the IOMMU PCI device (which would mean a similar check
would need to be added to amd_iommu_read_msi_from_ire(), but
it would then be bogus for the failure there to cause a log message
to be issued if iommu_debug - that should be trivial to verify on
an unpatched or older [say 4.2.x based] hypervisor passing
"iommu=debug").

Jan

suravee suthikulpanit

2013-Apr-11 15:40 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

On 04/11/2013 02:13 AM, Jan Beulich wrote:>>>> On 11.04.13 at 03:51, Suravee Suthikulpanit
<suravee.suthikulpanit@amd.com> wrote:
>> - From drivers/passthrough/amd/iommu_init.c: enable_iommu(), the
>> enabling code tries to set MSI affinity for IOMMU.
>> - From xen/arch/x86/msi.c: set_msi_affinity(), the code tries to
>> "read_msi_msg()" which eventually calls
"amd_iommu_read_msi_from_ire()".
>> - Please note that the "amd_iommu_read_msi_from_ire()" was
originally
>> empty prior the patch.
>> - From the patch, the new code try to call
"get_intremap_entry()", which
>> is the function that was asserting due to table is NULL.
> But even before the patch update_intremap_entry_from_msi_msg()
> also necessarily called get_intremap_entry() for the IOMMU device
> itself. And the respective write_msi_msg() call follows right after the
> read_msi_msg() one, i.e. there''s no room for other setup to be
done
> by that point. Plus the assertion was there already.
>
>> Here, the MSI read message was sent to read IOMMU registers.  Since the
>> message is targeting for IOMMU itself (bdf 0x2 in this case), it should
>> not be remapped.  That is why I believe the interrupt remapping table
is
>> empty.
> Whether or not the MSI for the IOMMU device itself needs to be
> remapped is an independent question, but judging from the
> current code as well as from the specification not saying otherwise
> it needs to be. Confirmation on this would surely be appreciated
> (because otherwise (a) I''d need to understand how this special
> case works with the code before the patch and (b) new special
> casing may need to be added).
>
> In any case - I think I said this before already - seeing a full log up
> to the crash with "iommu=debug" in place might help. After all
the
> ACPI tables ought to mention the IOMMU PCI device in some way,
> and hence there ought to be an IVRS mapping as well as an
> associated intremap table. Or else the only way this can work
> currently would be by way of find_iommu_for_device() returning
> NULL for the IOMMU PCI device (which would mean a similar check
> would need to be added to amd_iommu_read_msi_from_ire(), but
> it would then be bogus for the failure there to cause a log message
> to be issued if iommu_debug - that should be trivial to verify on
> an unpatched or older [say 4.2.x based] hypervisor passing
> "iommu=debug").
>
> Jan
>I have attached the xl dmesg output from the machine which run the 
non-patched Xen (w/ iommu=debug). It is showing that there is no IVHD 
entry setting up for the bdf 0x2 (the IOMMU).  Also, there is a line 
"AMD-Vi: Fail to find iommu for MSI device id = 0x2) which can be traced 
to xen/drivers/passthrough/amd/iommu_intr.c: 
amd_iommu_msi_msg_update_ire() which is called from xen/arch/x86/msi.c: 
write_msi_msg().

When comparing with other AMD systems in the past (i.e. with SR56XX 
chipset), the IVHD contains IVHD entries such as below:
(XEN) AMD-Vi: IVHD Device Entry: type 0x3 id 0 flags 0
(XEN) AMD-Vi:  Dev_Id Range: 0 -> 0x2

which will generate the mapping entry for bfd 0x0, 0x1, 0x2.

Suravee
PS: I could not get the serial console out from the trouble target 
system (Trinity based), so I could not get the crash message.  But from 
the prior email I sent to first report the issue, you can see that it 
failed during "enable_iommu".



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2013-Apr-11 16:11 UTC

head link

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

>>> On 11.04.13 at 17:40, suravee suthikulpanit
<suravee.suthikulpanit@amd.com> wrote:
> I have attached the xl dmesg output from the machine which run the 
> non-patched Xen (w/ iommu=debug). It is showing that there is no IVHD 
> entry setting up for the bdf 0x2 (the IOMMU).  Also, there is a line 
> "AMD-Vi: Fail to find iommu for MSI device id = 0x2) which can be
traced
> to xen/drivers/passthrough/amd/iommu_intr.c: 
> amd_iommu_msi_msg_update_ire() which is called from xen/arch/x86/msi.c: 
> write_msi_msg().
Which confirms that adding a check as suggested in an earlier
reply should b e all that''s missing.
> When comparing with other AMD systems in the past (i.e. with SR56XX 
> chipset), the IVHD contains IVHD entries such as below:
> (XEN) AMD-Vi: IVHD Device Entry: type 0x3 id 0 flags 0
> (XEN) AMD-Vi:  Dev_Id Range: 0 -> 0x2
> 
> which will generate the mapping entry for bfd 0x0, 0x1, 0x2.
Now that is confusing me: I understand this to mean that on such
a system, the IOMMU''s MSI would get remapped, whereas on the
system you saw the crash on it would remain untouched. Can that
really be correct?
> PS: I could not get the serial console out from the trouble target 
> system (Trinity based), so I could not get the crash message.  But from 
> the prior email I sent to first report the issue, you can see that it 
> failed during "enable_iommu".
Yeah, sure, that''s understood and fine.

Thanks for all the help so far - I''ll get an updated patch set out
soon.

Jan

Konrad Rzeszutek Wilk

2013-May-06 20:25 UTC

head link

Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On Thu, Mar 28, 2013 at 11:14:05AM +0000, Jan Beulich
wrote:> >>> On 28.03.13 at 11:33, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
> > On 28/03/13 08:25, Jan Beulich wrote:
> >>>>> On 27.03.13 at 15:55, George Dunlap
<george.dunlap@eu.citrix.com> wrote:
> >>> Did you look at the mail in your mailreader, or in the raw
mail format?
> >> In the mail reader of course (after all I expect you to use a mail
> >> client too). And as said, I saw some damage when looking at the
> >> copy on lists.xen.org.
> >>
> >>> If you''re using your mail reader, it''s
probably interpreting the
> >>> wordwrap stuff properly.  The "raw" mail looks like
this:
> >>>
> >>> http://marc.info/?l=xen-devel&m=136428861403115&q=raw 
> >>>
> >>> The above is what GMail sees if I click "show
original", and also what
> >>> the Citrix mail system gives me if I save the mail as a file.
This
> >>> mangling is apparently called "quoted-printable":
> >>> http://en.wikipedia.org/wiki/Quoted-printable 
> >>>
> >>> The problem is that "patch" (and thus "git
apply", "git am", "hg
> >>> import", &c &c), not being a mail-reader,
doesn''t know how to de-mangle
> >>> stuff.
> >> And rightly so. But your mail client saving the mail should deal
with
> >> this properly. (And besides, if you already save the mail, I
don''t
> >> see why you can''t instead save the attachment).
> > 
> > This is already a longer discussion than I really wanted to have, but 
> > just so you have an idea what I''m on about, I''ll
explain the difference.
> > 
> > The key thing is that "git am" will take an mbox file with a
series of
> > patches and automatically make a commit for each one.  So my algorithm
> > for reviewing patch series sent in text/plain is as follows:
> > 
> > 1. In Gmail, mark each patch in the series and save it to a special
folder.
> 
> And in that operation, the quoted printable encoding should be
> undone. Which makes all of the rest of your mail more or less
> mute.
> 
> > 2. Open up mutt on my local box.  It will connect to gmail and open
that
> > folder.
> > 3. Mark each patch in the series and save it to a local folder in
~/mail/.
> > 4. Use git am to import the whole series as a sequence of commits.
> > 5. View the changeset "in situ" using various git commands
(''git meld''
> > is my favorite ATM).
> > 
> > Marking each one might take 10 seconds, and it''s almost
entirely
> > brainless; the main "cognitive load" is just remembering the
name that
> > I''ve given the local mail folder.  A series of 40 patches
takes
> > basically no more cognitive load to download and import into my git
tree
> > than a single patch.
> > 
> > To view yours at the moment, I have to do the following:
> > 1. For each patch in the series, click to download the attachment and 
> > save it to a separate file
> > 2. Edit each file to include "From: Jan Beulich
<JBeulich@suse.com>" at
> > the top, so it looks sufficiently like an mbox that "git am"
won''t complain
> > 3. For each patch in the series, run "git am" on it
individually.
> > 
> > So #1 is slightly more annoying, as saving is more like 2 seconds per 
> > mail and marking a message is like 0.5 seconds per mail.  But the big 
> > source of cognitive load is having to deal with the different name of 
> > each patch.  It''s just that extra little bit when having to
open the
> > file to add the header, and particularly then having to figure out
what
> > order the patches go in.  It doesn''t really take that much
extra time,
> > but that it takes attention to remember the filename, and this adds up
> > for each patch in the series; so the longer the series, the more 
> > cognitive load it generates.
> > 
> > They''ve done studies that show that even a minimal amount of
cognitive
> > load has an effect on people''s endurance for other cognitive 
> > activities.  This is why most successful people instinctively find a
way
> > to make the unimportant decisions in their lives really simple -- 
> > spending time thinking about what to wear or what to eat eats away at 
> > precious energy they would rather spend on running the country or 
> > whatever it is they''re doing.
> > 
> > Given the limited amount of head-space I have for arbitrary strings of
> > things, I''d prefer to spend it on actually understanding the
patch,
> > rather than on patch filenames if I can avoid it; that''s why
I brought
> > it up.
> > 
> > It seems like having an automated way to send off an entire patch
queue,
> > rather than cutting and pasting and attaching each mail individually, 
> > would reduce the cognitive load for you as well (not to mention
probably
> > save you several minutes of your day).  git and mercurial both have 
> > really good integrated mechanisms to do that; both also have
extensions
> > that allow you interact with the repository just like you do with 
> > quilt.  I''m not familiar with the git ones, but the mercurial
one uses
> > almost exactly the same commands as quilt, but with "hg"
instead of
> > "quilt" at the front (if I remember quilt correctly --
it''s been a long
> > time since I used it).
> 
> Indeed this might save me some time when sending the patches.
> But would it not require more time when fiddling with the patches
> while putting them together? As an example, I don''t normally
> write patch descriptions right away, but do so only when getting
> ready to submit the patches. With git wanting to create a commit
> out of everything, I have to then run extra git commands to get
> the description added to each patch. Whereas with quilt this is a
> simple editing of the patch file, easily cut-n-paste between
> different instances of the patch on different trees (which I
> particularly find myself in need of when producing security fixes
> and their backports).
I had a hit this issue in the past. The way I solved this is by
using a healthy mix of ''git commit -s'' (from vim
using :!git add % then !git commit -s), then typing in a short
intro and usually with ''*KONRAD* ADD MORE *'', then following
it
up with other patches. 

If at some point I realized I screwed up or was more happy with
the patches I would do ''!git rebase -i HEAD^^^^^^'' (the ^ is
the
amount of patches back I want to go) and either redo the commits
or alter some of the patches. Sometimes that also meant split
a patch in two which requires hitting the shell right before
the offending git commit (so when you do git rebase -i <something>
you can then choose to hit the shell before a patch), then in the
shell do  bit more of ''git show <the offending git commit> 
/tmp/a'',
''cp /tmp/a /tmp/orig'', then editing /tmp/a for the
non-offending bits,
patch -p1 -R < /tmp/a, git add <files>, git commit , and then
contiuning with the original patch that will now have a conflict
(as the git rebase will try apply it).

It is similar to do doing mercurial with qpop, qpush, qrefresh and qdiff.

At the end of the day if I am happy, I can cherry-pick the patch
to other trees (git cherry-pick -x) and call it a day.

Or git send-email.

> 
> Similarly, fixing minor issues (including rejects because of changes
> to earlier files in the series) by editing a patch file is impossible
> with git afaict. And no, using git''s merge functionality is of no
> help to me at all, not the least because of the close to unusable
> (to me at least) UIs of any Linux based editors I''ve come across
> so far. Yet if anything, a merge tool ought to be interactive.
> Merges that just leave awful marks in the merge output are
> pretty pointless imo.
I used kdiff3 for that. It does have a pretty good mechanism of helping
you find the base, local and remote and figuring out what goes where.

Ian Campbell

2013-May-07 08:53 UTC

head link

Re: Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On Mon, 2013-05-06 at 21:25 +0100, Konrad Rzeszutek Wilk
wrote:> If at some point I realized I screwed up or was more happy with
> the patches I would do ''!git rebase -i HEAD^^^^^^'' (the ^
is the
> amount of patches back I want to go) 
Top-tip: ~N is equivalent to N ^''s, so HEAD^^^^^^ == HEAD~6 (assuming I
counted right!)

Ian.

Wei Liu

2013-May-07 09:26 UTC

head link

Re: Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On Mon, May 06, 2013 at 09:25:04PM +0100, Konrad Rzeszutek Wilk wrote:
[...]> 
> If at some point I realized I screwed up or was more happy with
> the patches I would do ''!git rebase -i HEAD^^^^^^'' (the ^
is the
> amount of patches back I want to go) and either redo the commits
> or alter some of the patches. Sometimes that also meant split
> a patch in two which requires hitting the shell right before
> the offending git commit (so when you do git rebase -i <something>
> you can then choose to hit the shell before a patch), then in the
> shell do  bit more of ''git show <the offending git commit> 
/tmp/a'',
> ''cp /tmp/a /tmp/orig'', then editing /tmp/a for the
non-offending bits,
> patch -p1 -R < /tmp/a, git add <files>, git commit , and then
> contiuning with the original patch that will now have a conflict
> (as the git rebase will try apply it).
> 
Re spliting commit, you can do ''git rebase'', stopping at the
targeted
commit as HEAD, then do ''git reset HEAD^'' (no
''--hard'' option), which
will turn your commit into local change. Then ''git add <FILE>
-p'' should
do the job most of the time.

This method seems require less keystrokes and not having to deal with
conflict. ;-)


Wei.

Konrad Rzeszutek Wilk

2013-May-07 14:03 UTC

head link

Re: Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

On Tue, May 07, 2013 at 10:26:50AM +0100, Wei Liu wrote:> On Mon, May 06, 2013 at 09:25:04PM +0100, Konrad Rzeszutek Wilk wrote:
> [...]
> > 
> > If at some point I realized I screwed up or was more happy with
> > the patches I would do ''!git rebase -i HEAD^^^^^^''
(the ^ is the
> > amount of patches back I want to go) and either redo the commits
> > or alter some of the patches. Sometimes that also meant split
> > a patch in two which requires hitting the shell right before
> > the offending git commit (so when you do git rebase -i
<something>
> > you can then choose to hit the shell before a patch), then in the
> > shell do  bit more of ''git show <the offending git
commit>  /tmp/a'',
> > ''cp /tmp/a /tmp/orig'', then editing /tmp/a for the
non-offending bits,
> > patch -p1 -R < /tmp/a, git add <files>, git commit , and then
> > contiuning with the original patch that will now have a conflict
> > (as the git rebase will try apply it).
> > 
> 
> Re spliting commit, you can do ''git rebase'', stopping at
the targeted
> commit as HEAD, then do ''git reset HEAD^'' (no
''--hard'' option), which
> will turn your commit into local change. Then ''git add
<FILE> -p'' should
> do the job most of the time.
> 
> This method seems require less keystrokes and not having to deal with
> conflict. ;-)
That is much easier. Thanks!> 
> 
> Wei.
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>

Xen devel - Mar 2013 - [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

[PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

[PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

[PATCH 2/4] x86/MSI: cleanup to prepare for multi-vector MSI

[PATCH 3/4] AMD IOMMU: allocate IRTE entries instead of using a static mapping

[PATCH 4/4] AMD IOMMU: untie remap and vector maps

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 4/4] AMD IOMMU: untie remap and vector maps

Re: [PATCH 4/4] AMD IOMMU: untie remap and vector maps

Re: [PATCH 4/4] AMD IOMMU: untie remap and vector maps

Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

[PATCH v2 3/4] AMD IOMMU: allocate IRTE entries instead of using a static mapping

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH v2 3/4] AMD IOMMU: allocate IRTE entries instead of using a static mapping

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Re: [PATCH 0/4] x86/IOMMU: multi-vector MSI prerequisites

Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail

Re: Is: git send-email, patch sending, etc Was: Re: [PATCH 1/4] IOMMU: allow MSI message to IRTE propagation to fail