Jiang, Yunhong
2008-Dec-17  12:11 UTC
[Xen-devel] [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
In latest kernel, the pci_save_state will not try to save msi/x_state anymore,
instead, it will try to restore msi state when resume using kernel''s
msi data structure. This cause trouble for us, since thoese MSI data structure
is meaningless in Xen environment.
Several option to resolve this issue:
a) Change the latest kernel (as dom0) to still to save/restore the msi content
b) Add a new hypercall, so when dom0 try to restore dom0, it will instruct Xen
HV to restore the content based on Xen''s MSI data structure
c) In Dom0''s pci_restore_msi/x_state, try to call map_domain_pirq
again. Xen HV will found there is a vector assigned already, then it will try to
re_startup the interrupt.
We try to cook a patch for option c) as following (it is still not fully tested
because my environment is broken and I send to mailing list to get some feedback
in advance), but I suspect this option is not so good because it mix the
function of map_domain_pirq and pirq_guest_bind more (it is very un-clear of the
boundary between these two function now). And I''m not sure if the
re-startup will cause potential issue for non-S3 situation.
Any suggestion?
Thanks
Yunhong Jiang
diff -r 045f70d1acdb xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/irq.c	Wed Dec 17 12:49:18 2008 +0800
@@ -896,12 +896,23 @@ int map_domain_pirq(
         spin_lock_irqsave(&desc->lock, flags);
 
         if ( desc->handler != &no_irq_type )
-            dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
+            dprintk(XENLOG_G_INFO, "dom%d: vector %d in use\n",
               d->domain_id, vector);
         desc->handler = &pci_msi_type;
         d->arch.pirq_vector[pirq] = vector;
         d->arch.vector_pirq[vector] = pirq;
         setup_msi_irq(pdev, msi_desc);
+
+        /* If the vector has been bound, re-startup it again for S3 situation
*/
+        if (desc->status & IRQ_GUEST)
+        {
+            irq_guest_action_t *action;
+
+            action = (irq_guest_action_t *)desc->action;
+
+            if ((action->nr_guests == 1) && (action->guest[0] ==
d))
+                desc->handler->startup(vector);
+        }
         spin_unlock_irqrestore(&desc->lock, flags);
     } else
     {
diff -r 045f70d1acdb xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/msi.c	Wed Dec 17 14:52:59 2008 +0800
@@ -147,6 +147,9 @@ static int set_vector_msi(struct msi_des
         return -EINVAL;
     }
 
+    BUG_ON( (irq_desc[entry->vector].msi_desc)
+             &&(irq_desc[entry->vector].msi_desc != entry) );
+
     irq_desc[entry->vector].msi_desc = entry;
     return 0;
 }
@@ -573,17 +576,19 @@ static int __pci_enable_msi(struct msi_i
 {
     int status;
     struct pci_dev *pdev;
+    struct msi_desc *msi_desc = NULL;
 
     ASSERT(rw_is_locked(&pcidevs_lock));
     pdev = pci_get_pdev(msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
 
-    if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
+    if ( (msi_desc = find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) ))
     {
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on
"
                 "device %02x:%02x.%01x.\n", msi->vector,
msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+        *desc = msi_desc;
         return 0;
     }
 
@@ -633,6 +638,7 @@ static int __pci_enable_msix(struct msi_
     u16 control;
     u8 slot = PCI_SLOT(msi->devfn);
     u8 func = PCI_FUNC(msi->devfn);
+    struct msi_desc *msi_desc;
 
     ASSERT(rw_is_locked(&pcidevs_lock));
     pdev = pci_get_pdev(msi->bus, msi->devfn);
@@ -645,11 +651,12 @@ static int __pci_enable_msix(struct msi_
     if (msi->entry_nr >= nr_entries)
         return -EINVAL;
 
-    if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
+    if ( (msi_desc = find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX)))
     {
         dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on
"
                 "device %02x:%02x.%01x.\n", msi->vector,
msi->bus,
                 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+        *desc = msi_desc;
         return 0;
     }
 
diff -r 045f70d1acdb xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c	Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/physdev.c	Wed Dec 17 10:10:36 2008 +0800
@@ -51,6 +51,9 @@ static int physdev_map_pirq(struct physd
         goto free_domain;
     }
 
+    read_lock(&pcidevs_lock);
+    spin_lock(&d->event_lock);
+
     /* Verify or get vector. */
     switch ( map->type )
     {
@@ -60,7 +63,7 @@ static int physdev_map_pirq(struct physd
                 dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n",
                         d->domain_id, map->index);
                 ret = -EINVAL;
-                goto free_domain;
+                goto vector_fail;
             }
             vector = IO_APIC_VECTOR(map->index);
             if ( !vector )
@@ -68,21 +71,27 @@ static int physdev_map_pirq(struct physd
                 dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector
%d\n",
                         d->domain_id, vector);
                 ret = -EINVAL;
-                goto free_domain;
+                goto vector_fail;
             }
             break;
 
         case MAP_PIRQ_TYPE_MSI:
             vector = map->index;
+
             if ( vector == -1 )
-                vector = assign_irq_vector(AUTO_ASSIGN);
+            {
+                if (map->pirq >= 0)
+                    vector = d->arch.pirq_vector[map->pirq];
+                else
+                    vector = assign_irq_vector(AUTO_ASSIGN);
+            }
 
             if ( vector < 0 || vector >= NR_VECTORS )
             {
                 dprintk(XENLOG_G_ERR, "dom%d: map irq with wrong vector
%d\n",
                         d->domain_id, vector);
                 ret = -EINVAL;
-                goto free_domain;
+                goto vector_fail;
             }
 
             _msi.bus = map->bus;
@@ -97,12 +106,10 @@ static int physdev_map_pirq(struct physd
             dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n",
                     d->domain_id, map->type);
             ret = -EINVAL;
-            goto free_domain;
-    }
-
-    read_lock(&pcidevs_lock);
+            goto vector_fail;
+    }
+
     /* Verify or get pirq. */
-    spin_lock(&d->event_lock);
     if ( map->pirq < 0 )
     {
         if ( d->arch.vector_pirq[vector] )
@@ -147,11 +154,12 @@ static int physdev_map_pirq(struct physd
         map->pirq = pirq;
 
 done:
+    if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) &&
(map->index == -1) )
+        free_irq_vector(vector);
+vector_fail:
     spin_unlock(&d->event_lock);
     read_unlock(&pcidevs_lock);
-    if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) &&
(map->index == -1) )
-        free_irq_vector(vector);
-free_domain:
+free_domain:    
     rcu_unlock_domain(d);
     return ret;
 }
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Keir Fraser
2008-Dec-17  12:27 UTC
[Xen-devel] Re: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
On 17/12/2008 12:11, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:> In latest kernel, the pci_save_state will not try to save msi/x_state anymore, > instead, it will try to restore msi state when resume using kernel''s msi data > structure. This cause trouble for us, since thoese MSI data structure is > meaningless in Xen environment. > > Several option to resolve this issue: > a) Change the latest kernel (as dom0) to still to save/restore the msi content > b) Add a new hypercall, so when dom0 try to restore dom0, it will instruct Xen > HV to restore the content based on Xen''s MSI data structureCould Xen remember the MSI state automatically, as it does for IO-APIC presumably already? It knows what vectors are routed where at least, even if dom0 has to reprogram the PCI device itself. I''m not sure about option C. I didn''t really understand the patch, but it smelt like a hack. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2008-Dec-17  14:47 UTC
[Xen-devel] Re: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
>>> Keir Fraser <keir.fraser@eu.citrix.com> 17.12.08 13:27 >>> >On 17/12/2008 12:11, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote: > >> In latest kernel, the pci_save_state will not try to save msi/x_state anymore, >> instead, it will try to restore msi state when resume using kernel''s msi data >> structure. This cause trouble for us, since thoese MSI data structure is >> meaningless in Xen environment. >> >> Several option to resolve this issue: >> a) Change the latest kernel (as dom0) to still to save/restore the msi content >> b) Add a new hypercall, so when dom0 try to restore dom0, it will instruct Xen >> HV to restore the content based on Xen''s MSI data structure > >Could Xen remember the MSI state automatically, as it does for IO-APIC >presumably already? It knows what vectors are routed where at least, even if >dom0 has to reprogram the PCI device itself.That is what the map_guest_pirq() re-invocation is intended for - use the already known (stored) MSI state to re-setup the device accordingly (after all, msi_compose_msg() only depends on the vector information to be able to reconstruct address and data fields).>I''m not sure about option C. I didn''t really understand the patch, but it >smelt like a hack.It indeed does to a certain degree, and I had recommended to send it to the list early in case someone (you?) has a better idea to solve the problem *without* requiring modern Dom0 to deviate more from native than necessary (in particular, without requiring any teardown during suspend), which surely is desirable not only for our forward ported kernel, but also for pv-ops once it gets MSI enabled, and which is also pretty logical given the statement above on how easily the message is re-computed, making storing of the message fields across suspend superfluous. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-Dec-17  15:06 UTC
[Xen-devel] Re: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
On 17/12/2008 14:47, "Jan Beulich" <jbeulich@novell.com> wrote:>> Could Xen remember the MSI state automatically, as it does for IO-APIC >> presumably already? It knows what vectors are routed where at least, even if >> dom0 has to reprogram the PCI device itself. > > That is what the map_guest_pirq() re-invocation is intended for - use the > already known (stored) MSI state to re-setup the device accordingly > (after all, msi_compose_msg() only depends on the vector information > to be able to reconstruct address and data fields).Why wait for map_guest_pirq() to be invoked to do this? -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2008-Dec-17  15:19 UTC
[Xen-devel] Re: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
>>> Keir Fraser <keir.fraser@eu.citrix.com> 17.12.08 16:06 >>> >On 17/12/2008 14:47, "Jan Beulich" <jbeulich@novell.com> wrote: > >>> Could Xen remember the MSI state automatically, as it does for IO-APIC >>> presumably already? It knows what vectors are routed where at least, even if >>> dom0 has to reprogram the PCI device itself. >> >> That is what the map_guest_pirq() re-invocation is intended for - use the >> already known (stored) MSI state to re-setup the device accordingly >> (after all, msi_compose_msg() only depends on the vector information >> to be able to reconstruct address and data fields). > >Why wait for map_guest_pirq() to be invoked to do this?Otherwise we need a new hypercall here, since Xen cannot do this completely on its own (it has to wait for Dom0 to bring the device out of any suspend state it may itself be in). And it would allow restoring the old mechanism in your (2.6.18) Dom0, just without the unmap-pirq portion during suspend - instead of needing another full re- implementation cycle. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-Dec-17  15:22 UTC
[Xen-devel] Re: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
On 17/12/2008 15:19, "Jan Beulich" <jbeulich@novell.com> wrote:>> Why wait for map_guest_pirq() to be invoked to do this? > > Otherwise we need a new hypercall here, since Xen cannot do this > completely on its own (it has to wait for Dom0 to bring the device out of > any suspend state it may itself be in). And it would allow restoring the > old mechanism in your (2.6.18) Dom0, just without the unmap-pirq > portion during suspend - instead of needing another full re- > implementation cycle.Well, maybe it''s okay then. I don''t think Yunhong''s patch was a good argument for it -- looking again it appears to have plenty of not really related chunks contained in it. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2008-Dec-17  15:31 UTC
[Xen-devel] Re: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
>>> Keir Fraser <keir.fraser@eu.citrix.com> 17.12.08 16:22 >>> >Well, maybe it''s okay then. I don''t think Yunhong''s patch was a good >argument for it -- looking again it appears to have plenty of not really >related chunks contained in it.I don''t think it''s really okay as-is: As he indicated, there may be side effects of the changes during other than resume from S3 (in particular the IRQ_GUEST check around the newly added call to ->startup()), and I was hoping you might have a suggestion on how to better distinguish that particular case. In the worst case, Xen has to set another flag in each MSI irq_desc when it resumes from S3, and do the startup as well as the clearing of the flag when map_domain_pirq runs first for that particular vector. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2008-Dec-17  15:53 UTC
[Xen-devel] Re: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
On 17/12/2008 15:31, "Jan Beulich" <jbeulich@novell.com> wrote:>>>> Keir Fraser <keir.fraser@eu.citrix.com> 17.12.08 16:22 >>> >> Well, maybe it''s okay then. I don''t think Yunhong''s patch was a good >> argument for it -- looking again it appears to have plenty of not really >> related chunks contained in it. > > I don''t think it''s really okay as-is: As he indicated, there may be side > effects of the changes during other than resume from S3 (in particular > the IRQ_GUEST check around the newly added call to ->startup()), and > I was hoping you might have a suggestion on how to better distinguish > that particular case. In the worst case, Xen has to set another flag in > each MSI irq_desc when it resumes from S3, and do the startup as well > as the clearing of the flag when map_domain_pirq runs first for that > particular vector.Then do it as a new hypercall? How much complexity does that add on the guest side? -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2008-Dec-18  02:24 UTC
[Xen-devel] RE: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
In fact, I think this patch is something like hack the PHYSDEVOP_map_pirq hypercall for the restore usage. Currently the PHYSDEVOP_map_pirq will allocate the irq/vector , setup the irq_desc, programe the physical device etc. With this patch added, when used for resume, it will used mainly for programe the physical device, since the irq/vector/irq_desc is ready, and not like what the name implied (map_pirq) anymore. As for guest side, I assume it will be not complex (Jan may have more estimate), but we are not sure if the new hypercall is acceptable for Suse. Thanks Yunhong Jiang Keir Fraser <mailto:keir.fraser@eu.citrix.com> wrote:> On 17/12/2008 15:31, "Jan Beulich" <jbeulich@novell.com> wrote: > >>>>> Keir Fraser <keir.fraser@eu.citrix.com> 17.12.08 16:22 >>> >>> Well, maybe it''s okay then. I don''t think Yunhong''s patch was a good >>> argument for it -- looking again it appears to have plenty of not really >>> related chunks contained in it. >> >> I don''t think it''s really okay as-is: As he indicated, there may be side >> effects of the changes during other than resume from S3 (in particular >> the IRQ_GUEST check around the newly added call to ->startup()), and >> I was hoping you might have a suggestion on how to better distinguish >> that particular case. In the worst case, Xen has to set another flag in >> each MSI irq_desc when it resumes from S3, and do the startup as well >> as the clearing of the flag when map_domain_pirq runs first for that >> particular vector. > > Then do it as a new hypercall? How much complexity does that add on the > guest side? > > -- Keir_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2008-Dec-18  07:33 UTC
[Xen-devel] RE: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
>>> "Jiang, Yunhong" <yunhong.jiang@intel.com> 18.12.08 03:24 >>> >As for guest side, I assume it will be not complex (Jan may have more estimate),No, it wouldn''t be difficult (mostly replacing the map-pirq hypercall with whatever the new name would be.>but we are not sure if the new hypercall is acceptable for Suse.I don''t see a problem with this. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2008-Dec-19  10:10 UTC
[Xen-devel] RE: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
Attached is the patch with a new hypercall added. Jan/Keir, can you please have
a look on it? I didn''t change the dom0 since it has already implemented
save/restore logic with dom0''s internal data structure. But latest
kernel will need this.
Because this patch is based in current xen, the pcidevs_lock is still used as
rw_lock, the below small patch will change that to spin_lock if needed.
Thanks
Yunhong Jiang
diff -r a020395f3ea3 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c	Fri Dec 19 18:01:38 2008 +0800
+++ b/xen/arch/x86/msi.c	Fri Dec 19 18:02:31 2008 +0800
@@ -748,7 +748,7 @@ int pci_restore_msi_state(struct pci_dev
     struct msi_desc *entry, *tmp;
     irq_desc_t *desc;
 
-    ASSERT(rw_is_locked(&pcidevs_lock));
+    ASSERT(spin_is_locked(&pcidevs_lock));
 
     if (!pdev)
         return -EINVAL;
diff -r a020395f3ea3 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c	Fri Dec 19 18:01:38 2008 +0800
+++ b/xen/arch/x86/physdev.c	Fri Dec 19 18:02:57 2008 +0800
@@ -429,17 +429,17 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
         ret = -ENODEV;
 
-        read_lock(&pcidevs_lock);
+        spin_lock(&pcidevs_lock);
         pdev = pci_get_pdev(restore_msi.bus, restore_msi.devfn);
         if (!pdev)
         {
-            read_unlock(&pcidevs_lock);
+            spin_unlock(&pcidevs_lock);
             break;
         }
 
         ret = pci_restore_msi_state(pdev);
 
-        read_unlock(&pcidevs_lock);
+        spin_unlock(&pcidevs_lock);
         break;
     }
     default:
Thanks
Yunhong Jiang
Jan Beulich <mailto:jbeulich@novell.com> wrote:>>>> "Jiang, Yunhong" <yunhong.jiang@intel.com>
18.12.08 03:24 >>>
>> As for guest side, I assume it will be not complex (Jan may have more
>> estimate), 
> 
> No, it wouldn''t be difficult (mostly replacing the map-pirq
hypercall with
> whatever the new name would be.
> 
>> but we are not sure if the new hypercall is acceptable for Suse.
> 
> I don''t see a problem with this.
> 
> Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jan Beulich
2008-Dec-19  10:29 UTC
[Xen-devel] RE: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
>>> "Jiang, Yunhong" <yunhong.jiang@intel.com> 19.12.08 11:10 >>> >Attached is the patch with a new hypercall added. Jan/Keir, can you >please have a look on it? I didn''t change the dom0 since it has already >implemented save/restore logic with dom0''s internal data structure. >But latest kernel will need this.Looks fine to me - did you try it with a patched .27 kernel? Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jiang, Yunhong
2008-Dec-19  10:39 UTC
[Xen-devel] RE: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
Yes, I tried it in Sles11 RC1, but some changes needed both for this patch and
for the kernel, including:
a) In this patch, it call pci_get_pdev and protected by pcidevs_lock, while in
SLES11, it will be pci_lock_pdev() and get protected by pci_dev''s lock.
b) In .27 kernel,  current pci_restore_msi_state() will call
__pci_restore_msix_state/__pci_restore_msi_state one by one, since we now passed
the bus/devfn to Xen, so only a hypercall needed in pci_restore_msi_state() and
no distinguish for msi/msix anymore. It is something like following:
static int msi_restore_msi(struct pci_dev *dev)
{
        struct physdev_restore_msi restore_msi;
        int rc;
        restore_msi.bus = dev->bus->number;
        restore_msi.devfn = dev->devfn;
        if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi,
&restore_msi)))
                printk(KERN_WARNING "restore msi failed\n");
        return rc;
}
void pci_restore_msi_state(struct pci_dev *dev)
{
        msi_restore_msi(dev);
}
At least it works for my AHCI mode disk and my e1000 NIC.
Thanks
Yunhong Jiang
Jan Beulich <mailto:jbeulich@novell.com> wrote:>>>> "Jiang, Yunhong" <yunhong.jiang@intel.com>
19.12.08 11:10 >>>
>> Attached is the patch with a new hypercall added. Jan/Keir, can you
>> please have a look on it? I didn''t change the dom0 since it
has already
>> implemented save/restore logic with dom0''s internal data
structure.
>> But latest kernel will need this.
> 
> Looks fine to me - did you try it with a patched .27 kernel?
> 
> Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jan Beulich
2008-Dec-19  10:50 UTC
[Xen-devel] RE: [PATCH][RFC] Support S3 for MSI interrupt in latest kernel dom0
>>> "Jiang, Yunhong" <yunhong.jiang@intel.com> 19.12.08 11:39 >>> >Yes, I tried it in Sles11 RC1, but some changes needed both for this patch and for the kernel, including:I understand that changes are needed. Just wanted to clarify that we know the Xen-side patch works. Thanks, Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel