thr3ads.net - Xen devel - [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0 [Nov 2012]

If this information is useful, please help other people find it:
Share via:

Jan Beulich

2012-Nov-05 16:53 UTC

[PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Under the assumption that in these cases recurring faults aren''t a
security issue and it can be expected that the drivers there are going
to try to take care of the problem.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -625,6 +625,18 @@ static void parse_event_log_entry(struct
         for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
             if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
             {
+                const struct pci_dev *pdev;
+
+                spin_lock(&pcidevs_lock);
+                pdev = pci_get_pdev(iommu->seg, PCI_BUS(bdf),
PCI_DEVFN2(bdf));
+                if ( pdev && pdev->domain != dom_xen &&
+                     (!pdev->domain || !IS_PRIV(pdev->domain)) )
+                    pdev = NULL;
+                spin_unlock(&pcidevs_lock);
+
+                if ( pdev )
+                    continue;
+
                 cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
                                         PCI_SLOT(bdf), PCI_FUNC(bdf),
                                         PCI_COMMAND);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -916,7 +916,8 @@ static void __do_iommu_page_fault(struct
     reg = cap_fault_reg_offset(iommu->cap);
     while (1)
     {
-        u8 fault_reason;
+        const struct pci_dev *pdev;
+        u8 fault_reason, bus;
         u16 source_id, cword;
         u32 data;
         u64 guest_addr;
@@ -950,14 +951,27 @@ static void __do_iommu_page_fault(struct
         iommu_page_fault_do_one(iommu, type, fault_reason,
                                 source_id, guest_addr);
 
-        /* Tell the device to stop DMAing; we can''t rely on the guest
to
-         * control it for us. */
-        cword = pci_conf_read16(iommu->intel->drhd->segment,
-                                PCI_BUS(source_id), PCI_SLOT(source_id),
-                                PCI_FUNC(source_id), PCI_COMMAND);
-        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
-                         PCI_SLOT(source_id), PCI_FUNC(source_id),
-                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+        bus = PCI_BUS(source_id);
+
+        spin_lock(&pcidevs_lock);
+        pdev = pci_get_pdev(iommu->intel->drhd->segment, bus,
+                            PCI_DEVFN2(source_id));
+        if ( pdev && pdev->domain != dom_xen &&
+             (!pdev->domain || !IS_PRIV(pdev->domain)) )
+            pdev = NULL;
+        spin_unlock(&pcidevs_lock);
+
+        if ( !pdev )
+        {
+            /* Tell the device to stop DMAing; we can''t rely on the
guest to
+             * control it for us. */
+            cword = pci_conf_read16(iommu->intel->drhd->segment, bus,
+                                    PCI_SLOT(source_id), PCI_FUNC(source_id),
+                                    PCI_COMMAND);
+            pci_conf_write16(iommu->intel->drhd->segment, bus,
+                             PCI_SLOT(source_id), PCI_FUNC(source_id),
+                             PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+        }
 
         fault_index++;
         if ( fault_index > cap_num_fault_regs(iommu->cap) )





_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Keir Fraser

2012-Nov-05 17:15 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

On 05/11/2012 16:53, "Jan Beulich" <JBeulich@suse.com> wrote:
> Under the assumption that in these cases recurring faults aren''t a
> security issue and it can be expected that the drivers there are going
> to try to take care of the problem.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Solving an observed problem?

 -- Keir
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -625,6 +625,18 @@ static void parse_event_log_entry(struct
>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>              {
> +                const struct pci_dev *pdev;
> +
> +                spin_lock(&pcidevs_lock);
> +                pdev = pci_get_pdev(iommu->seg, PCI_BUS(bdf),
> PCI_DEVFN2(bdf));
> +                if ( pdev && pdev->domain != dom_xen &&
> +                     (!pdev->domain || !IS_PRIV(pdev->domain)) )
> +                    pdev = NULL;
> +                spin_unlock(&pcidevs_lock);
> +
> +                if ( pdev )
> +                    continue;
> +
>                  cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
>                                          PCI_SLOT(bdf), PCI_FUNC(bdf),
>                                          PCI_COMMAND);
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -916,7 +916,8 @@ static void __do_iommu_page_fault(struct
>      reg = cap_fault_reg_offset(iommu->cap);
>      while (1)
>      {
> -        u8 fault_reason;
> +        const struct pci_dev *pdev;
> +        u8 fault_reason, bus;
>          u16 source_id, cword;
>          u32 data;
>          u64 guest_addr;
> @@ -950,14 +951,27 @@ static void __do_iommu_page_fault(struct
>          iommu_page_fault_do_one(iommu, type, fault_reason,
>                                  source_id, guest_addr);
>  
> -        /* Tell the device to stop DMAing; we can''t rely on the
guest to
> -         * control it for us. */
> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
> -                                PCI_BUS(source_id), PCI_SLOT(source_id),
> -                                PCI_FUNC(source_id), PCI_COMMAND);
> -        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
> -                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +        bus = PCI_BUS(source_id);
> +
> +        spin_lock(&pcidevs_lock);
> +        pdev = pci_get_pdev(iommu->intel->drhd->segment, bus,
> +                            PCI_DEVFN2(source_id));
> +        if ( pdev && pdev->domain != dom_xen &&
> +             (!pdev->domain || !IS_PRIV(pdev->domain)) )
> +            pdev = NULL;
> +        spin_unlock(&pcidevs_lock);
> +
> +        if ( !pdev )
> +        {
> +            /* Tell the device to stop DMAing; we can''t rely on
the guest to
> +             * control it for us. */
> +            cword = pci_conf_read16(iommu->intel->drhd->segment,
bus,
> +                                    PCI_SLOT(source_id),
PCI_FUNC(source_id),
> +                                    PCI_COMMAND);
> +            pci_conf_write16(iommu->intel->drhd->segment, bus,
> +                             PCI_SLOT(source_id), PCI_FUNC(source_id),
> +                             PCI_COMMAND, cword &
~PCI_COMMAND_MASTER);
> +        }
>  
>          fault_index++;
>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Jan Beulich

2012-Nov-06 09:08 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 05.11.12 at 18:15, Keir Fraser <keir@xen.org> wrote:
> On 05/11/2012 16:53, "Jan Beulich" <JBeulich@suse.com>
wrote:
> 
>> Under the assumption that in these cases recurring faults
aren''t a
>> security issue and it can be expected that the drivers there are going
>> to try to take care of the problem.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Solving an observed problem?
In the context of analyzing the situation described in
"iommu=dom0-passthrough behavior"
(http://lists.xen.org/archives/html/xen-devel/2012-11/msg00140.html)
I suppressed the IOMMU setup for some device in Dom0, and
was quite puzzled to find that only a single fault would occur.

The BM disabling is definitely a potential problem for devices Xen
itself is making use of, and I view it as wrong for Dom0 too (it
ought to be given a chance to recover - the failure mode of the
driver for the disabled device differs quite significantly before
and after this patch).

With the change at hand, a next step (depending on the outcome
of above inquiry to the IOMMU folks) might then be to insert an
artificial device to overcome the IOMMU faults. This obviously
should only be done for Xen or Dom0 used devices, i.e. could
build on top of the special casing here.

Jan

Tim Deegan

2012-Nov-06 09:44 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

At 09:08 +0000 on 06 Nov (1352192909), Jan Beulich
wrote:> >>> On 05.11.12 at 18:15, Keir Fraser <keir@xen.org> wrote:
> > On 05/11/2012 16:53, "Jan Beulich" <JBeulich@suse.com>
wrote:
> > 
> >> Under the assumption that in these cases recurring faults
aren''t a
> >> security issue and it can be expected that the drivers there are
going
> >> to try to take care of the problem.
> >> 
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> > 
> > Solving an observed problem?
> 
> In the context of analyzing the situation described in
> "iommu=dom0-passthrough behavior"
> (http://lists.xen.org/archives/html/xen-devel/2012-11/msg00140.html)
> I suppressed the IOMMU setup for some device in Dom0, and
> was quite puzzled to find that only a single fault would occur.
I think it would be better to allow some small number of faults per
device before disabling it rather than give dom0 carte blanche.

This check is really there to stop a mad device from hosing the system
rather than to contain a malicious OS, and a properly out-of-control
device needs to be stopped or it will livelock Xen with iommu faults.
In a uniprocessor system, dom0 might never get the chance to fix it.

Tim.

Jan Beulich

2012-Nov-06 10:19 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 06.11.12 at 10:44, Tim Deegan <tim@xen.org> wrote:
> At 09:08 +0000 on 06 Nov (1352192909), Jan Beulich wrote:
>> >>> On 05.11.12 at 18:15, Keir Fraser <keir@xen.org>
wrote:
>> > On 05/11/2012 16:53, "Jan Beulich"
<JBeulich@suse.com> wrote:
>> > 
>> >> Under the assumption that in these cases recurring faults
aren''t a
>> >> security issue and it can be expected that the drivers there
are going
>> >> to try to take care of the problem.
>> >> 
>> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> > 
>> > Solving an observed problem?
>> 
>> In the context of analyzing the situation described in
>> "iommu=dom0-passthrough behavior"
>> (http://lists.xen.org/archives/html/xen-devel/2012-11/msg00140.html)
>> I suppressed the IOMMU setup for some device in Dom0, and
>> was quite puzzled to find that only a single fault would occur.
> 
> I think it would be better to allow some small number of faults per
> device before disabling it rather than give dom0 carte blanche.
> 
> This check is really there to stop a mad device from hosing the system
> rather than to contain a malicious OS, and a properly out-of-control
> device needs to be stopped or it will livelock Xen with iommu faults.
> In a uniprocessor system, dom0 might never get the chance to fix it.
If that''s the main goal, then on the AMD side the code may not do
what you want it to: PPR log entries, causing interrupts too, don''t
get limited/suppressed in any way, yet are obviously to some
extent under guest control.

But yes, for the purpose here limiting the fault rate in some way
(with a slightly higher limit for Dom0 than DomU-s) would indeed
be the better route.

Jan

Dario Faggioli

2012-Nov-06 12:06 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

On Tue, 2012-11-06 at 09:44 +0000, Tim Deegan wrote:> > In the context of analyzing the situation described in
> > "iommu=dom0-passthrough behavior"
> > (http://lists.xen.org/archives/html/xen-devel/2012-11/msg00140.html)
> > I suppressed the IOMMU setup for some device in Dom0, and
> > was quite puzzled to find that only a single fault would occur.
> 
> I think it would be better to allow some small number of faults per
> device before disabling it rather than give dom0 carte blanche.
> 
> This check is really there to stop a mad device from hosing the system
> rather than to contain a malicious OS, and a properly out-of-control
> device needs to be stopped or it will livelock Xen with iommu faults.
> In a uniprocessor system, dom0 might never get the chance to fix it.
> Right. But moving the fault handling code to softirq should have already
helped solving/mitigating that, hasn''t it?

When implementing and testing that, I wasn''t able to reproduce any
livelock situation (although I can''t exclude that to be at least partly
due to my inexperience, especially at the time, with I/O
virtualization)... Jan, have you (after killing the ''disable
bus-mastering part'' of course)?

Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Dario Faggioli

2012-Nov-06 12:08 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

On Tue, 2012-11-06 at 10:19 +0000, Jan Beulich wrote:> > This check is really there to stop a mad device from hosing the system
> > rather than to contain a malicious OS, and a properly out-of-control
> > device needs to be stopped or it will livelock Xen with iommu faults.
> > In a uniprocessor system, dom0 might never get the chance to fix it.
> 
> If that''s the main goal, then on the AMD side the code may not do
> what you want it to: PPR log entries, causing interrupts too,
don''t
> get limited/suppressed in any way, yet are obviously to some
> extent under guest control.
> IIRC, PPR are dealt with in softirq context too, aren''t they?
> But yes, for the purpose here limiting the fault rate in some way
> (with a slightly higher limit for Dom0 than DomU-s) would indeed
> be the better route.
> BTW, yes, independently from the above and for what it counts, I agree
with this.

Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2012-Nov-06 12:38 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 06.11.12 at 13:08, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> On Tue, 2012-11-06 at 10:19 +0000, Jan Beulich wrote:
>> > This check is really there to stop a mad device from hosing the
system
>> > rather than to contain a malicious OS, and a properly
out-of-control
>> > device needs to be stopped or it will livelock Xen with iommu
faults.
>> > In a uniprocessor system, dom0 might never get the chance to fix
it.
>> 
>> If that''s the main goal, then on the AMD side the code may not
do
>> what you want it to: PPR log entries, causing interrupts too,
don''t
>> get limited/suppressed in any way, yet are obviously to some
>> extent under guest control.
>> 
> IIRC, PPR are dealt with in softirq context too, aren''t they?
They are, but that only means Xen stays alive. Dom0 might
still not get a chance to run if there''s a flood of them.

Jan

Jan Beulich

2012-Nov-06 12:51 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 06.11.12 at 13:06, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> On Tue, 2012-11-06 at 09:44 +0000, Tim Deegan wrote:
>> > In the context of analyzing the situation described in
>> > "iommu=dom0-passthrough behavior"
>> >
(http://lists.xen.org/archives/html/xen-devel/2012-11/msg00140.html)
>> > I suppressed the IOMMU setup for some device in Dom0, and
>> > was quite puzzled to find that only a single fault would occur.
>> 
>> I think it would be better to allow some small number of faults per
>> device before disabling it rather than give dom0 carte blanche.
>> 
>> This check is really there to stop a mad device from hosing the system
>> rather than to contain a malicious OS, and a properly out-of-control
>> device needs to be stopped or it will livelock Xen with iommu faults.
>> In a uniprocessor system, dom0 might never get the chance to fix it.
>> 
> Right. But moving the fault handling code to softirq should have already
> helped solving/mitigating that, hasn''t it?
It helps keeping Xen alive, but doesn''t for any specific domain
(including Dom0).
> When implementing and testing that, I wasn''t able to reproduce any
> livelock situation (although I can''t exclude that to be at least
partly
> due to my inexperience, especially at the time, with I/O
> virtualization)... Jan, have you (after killing the ''disable
> bus-mastering part'' of course)?
No, I haven''t - you''d have to have a device that
doesn''t stop I/O
after a finite amount was done (or program one that way, e.g. by
handing it a cyclic list of SG descriptors or alike). Wasn''t it at your
(Citrix) end that the problem was actually observed/reported?

Jan

Tim Deegan

2012-Nov-06 13:58 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

At 12:51 +0000 on 06 Nov (1352206269), Jan Beulich
wrote:> >>> On 06.11.12 at 13:06, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> > On Tue, 2012-11-06 at 09:44 +0000, Tim Deegan wrote:
> >> > In the context of analyzing the situation described in
> >> > "iommu=dom0-passthrough behavior"
> >> >
(http://lists.xen.org/archives/html/xen-devel/2012-11/msg00140.html)
> >> > I suppressed the IOMMU setup for some device in Dom0, and
> >> > was quite puzzled to find that only a single fault would
occur.
> >> 
> >> I think it would be better to allow some small number of faults
per
> >> device before disabling it rather than give dom0 carte blanche.
> >> 
> >> This check is really there to stop a mad device from hosing the
system
> >> rather than to contain a malicious OS, and a properly
out-of-control
> >> device needs to be stopped or it will livelock Xen with iommu
faults.
> >> In a uniprocessor system, dom0 might never get the chance to fix
it.
> >> 
> > Right. But moving the fault handling code to softirq should have
already
> > helped solving/mitigating that, hasn''t it?
> 
> It helps keeping Xen alive, but doesn''t for any specific domain
> (including Dom0).
Indeed.  (Intel) IOMMU interrupts are suppressed until the softirq
handler acknowledges the error, but if the softirq handler doesn''t
disable the device, it will take another IOMMU interrupt immediately.
I thought the AMD side behaved eth same but clearly not -- I''ll try to
take a look at that later in the week.
> > When implementing and testing that, I wasn''t able to
reproduce any
> > livelock situation (although I can''t exclude that to be at
least partly
> > due to my inexperience, especially at the time, with I/O
> > virtualization)... Jan, have you (after killing the ''disable
> > bus-mastering part'' of course)?
> 
> No, I haven''t - you''d have to have a device that
doesn''t stop I/O
> after a finite amount was done (or program one that way, e.g. by
> handing it a cyclic list of SG descriptors or alike). Wasn''t it at
your
> (Citrix) end that the problem was actually observed/reported?
IIRC it was someone at Intel who reported it.  I never saw it myself.

Tim.

Dario Faggioli

2012-Nov-06 14:16 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

On Tue, 2012-11-06 at 13:58 +0000, Tim Deegan wrote:> > It helps keeping Xen alive, but doesn''t for any specific
domain
> > (including Dom0).
> 
> Indeed.  (Intel) IOMMU interrupts are suppressed until the softirq
> handler acknowledges the error, but if the softirq handler doesn''t
> disable the device, it will take another IOMMU interrupt immediately.
>Right. I wasn''t considering the possibility of this
''back-to-back''
variant of the thing.
> I thought the AMD side behaved eth same but clearly not -- I''ll
try to
> take a look at that later in the week.
> I think it does, or at least, I''m quite sure when I moved the fault
handling code in softiq context, I did it so that behavior was
consistent wrt both Intel and AMD (by explicitly silencing interrupts
until within the tasklet), at least for faults.

PPR came later and I moved them within the softirq as well after asking
AMD people, without having the possibility to test that myself.

So, to recap, feel free to check. Faults should behave as you''re
describing, PPR I''m not sure. :-)

Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2012-Nov-06 14:17 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 06.11.12 at 14:58, Tim Deegan <tim@xen.org> wrote:
> At 12:51 +0000 on 06 Nov (1352206269), Jan Beulich wrote:
>> >>> On 06.11.12 at 13:06, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
>> > On Tue, 2012-11-06 at 09:44 +0000, Tim Deegan wrote:
>> >> > In the context of analyzing the situation described in
>> >> > "iommu=dom0-passthrough behavior"
>> >> >
(http://lists.xen.org/archives/html/xen-devel/2012-11/msg00140.html)
>> >> > I suppressed the IOMMU setup for some device in Dom0, and
>> >> > was quite puzzled to find that only a single fault would
occur.
>> >> 
>> >> I think it would be better to allow some small number of
faults per
>> >> device before disabling it rather than give dom0 carte
blanche.
>> >> 
>> >> This check is really there to stop a mad device from hosing
the system
>> >> rather than to contain a malicious OS, and a properly
out-of-control
>> >> device needs to be stopped or it will livelock Xen with iommu
faults.
>> >> In a uniprocessor system, dom0 might never get the chance to
fix it.
>> >> 
>> > Right. But moving the fault handling code to softirq should have
already
>> > helped solving/mitigating that, hasn''t it?
>> 
>> It helps keeping Xen alive, but doesn''t for any specific
domain
>> (including Dom0).
> 
> Indeed.  (Intel) IOMMU interrupts are suppressed until the softirq
> handler acknowledges the error, but if the softirq handler doesn''t
> disable the device, it will take another IOMMU interrupt immediately.
> I thought the AMD side behaved eth same but clearly not -- I''ll
try to
> take a look at that later in the week.
I don''t think AMD''s behaves much different, it''s just
that for the
PPR case nothing is being done in that regard (and it''s unclear
whether or under what circumstances a high rate of these could
occur).

Jan

Dario Faggioli

2012-Nov-06 14:24 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

On Tue, 2012-11-06 at 12:51 +0000, Jan Beulich wrote:> > Right. But moving the fault handling code to softirq should have
already
> > helped solving/mitigating that, hasn''t it?
> 
> It helps keeping Xen alive, but doesn''t for any specific domain
> (including Dom0).
> Ok. Yes, now I remember... That was the main purpose of that work.
Thanks and sorry. :-P
> > When implementing and testing that, I wasn''t able to
reproduce any
> > livelock situation (although I can''t exclude that to be at
least partly
> > due to my inexperience, especially at the time, with I/O
> > virtualization)... Jan, have you (after killing the ''disable
> > bus-mastering part'' of course)?
> 
> No, I haven''t - you''d have to have a device that
doesn''t stop I/O
> after a finite amount was done (or program one that way, e.g. by
> handing it a cyclic list of SG descriptors or alike). Wasn''t it at
your
> (Citrix) end that the problem was actually observed/reported?
> Yes but, as Tim said, I was never able to get in touch with the original
faulting behavior/piece of hardware. Trying to simulate that as you''re
suggesting is basically what I did, but, at that time, I was using a
passed-to-a-guest device, and wasn''t bypassing (for anyone) the
~BUS_MASTERING thing... That''s why I asked whether you observed a
different behaviour.

Anyway, I see that we''re talking about different issues and understand
(now) what you''re trying to solve and why that is needed, so, again,
sorry for dragging the discussion a bit out of scope. :-)

Thanks and Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Dario Faggioli

2012-Nov-06 14:29 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

On Tue, 2012-11-06 at 14:17 +0000, Jan Beulich wrote:> > Indeed.  (Intel) IOMMU interrupts are suppressed until the softirq
> > handler acknowledges the error, but if the softirq handler
doesn''t
> > disable the device, it will take another IOMMU interrupt immediately.
> > I thought the AMD side behaved eth same but clearly not --
I''ll try to
> > take a look at that later in the week.
> 
> I don''t think AMD''s behaves much different, it''s
just that for the
> PPR case nothing is being done in that regard (and it''s unclear
> whether or under what circumstances a high rate of these could
> occur).
> Indeed. I just double checked the code and yes, both faults and PPRs are
handled that way, and new interrupts are disabled until the tasklet
handles the request and re-enables them.

The difference, as Jan is saying, is there is no PPR equivalent of
disabling bus mastering that can ensure livelock safeness when it comes
to them.

Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Jan Beulich

2012-Nov-07 16:05 UTC

head link

[PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

Instead, give the owning domain at least a small opportunity of fixing
things up, and allow for rare faults to not bring down the device at
all. The amount of faults tolerated within a given time period (all
numbers are made up with no specific rationale) is higher for Dom0 than
for DomU-s.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
 
 static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
 {
-    u16 domain_id, device_id, bdf, cword, flags;
+    u16 domain_id, device_id, bdf, flags;
     u32 code;
     u64 *addr;
     int count = 0;
@@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
                "fault address = %#"PRIx64", flags = %#x\n",
                event_str[code-1], domain_id, device_id, *addr, flags);
 
-        /* Tell the device to stop DMAing; we can''t rely on the guest
to
-         * control it for us. */
         for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
             if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
-            {
-                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
-                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
-                                        PCI_COMMAND);
-                pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
-                                 PCI_FUNC(bdf), PCI_COMMAND, 
-                                 cword & ~PCI_COMMAND_MASTER);
-            }
+                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
+                                         PCI_DEVFN2(bdf));
     }
     else
     {
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
 static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev;
     int rc = 0;
 
     if ( !iommu_enabled || !hd->platform_ops )
@@ -228,6 +229,13 @@ static int assign_device(struct domain *
         return -EXDEV;
 
     spin_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(seg, bus, devfn);
+    if ( pdev )
+    {
+        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
+        pdev->fault.count     = 0;
+    }
+
     if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
         goto done;
 
@@ -239,6 +247,8 @@ static int assign_device(struct domain *
         goto done;
     }
 done:
+    if ( pdev && pdev->domain != d )
+        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
     spin_unlock(&pcidevs_lock);
     return rc;
 }
@@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
         return ret;
     }
 
+    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
+    pdev->fault.count     = 0;
+
     if ( !has_arch_pdevs(d) && need_iommu(d) )
     {
         d->need_iommu = 0;
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
     *((u8*) &pdev->bus) = bus;
     *((u8*) &pdev->devfn) = devfn;
     pdev->domain = NULL;
+    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
     INIT_LIST_HEAD(&pdev->msi_list);
     list_add(&pdev->alldevs_list, &pseg->alldevs_list);
     spin_lock_init(&pdev->msix_table_lock);
@@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
     return 1;
 }
 
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
+{
+    struct pci_dev *pdev;
+    s_time_t now = NOW();
+    u16 cword;
+
+    spin_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(seg, bus, devfn);
+    if ( pdev )
+    {
+        if ( now < pdev->fault.time ||
+             now - pdev->fault.time > MILLISECS(10) )
+            pdev->fault.count >>= 1;
+        pdev->fault.time = now;
+        if ( ++pdev->fault.count < pdev->fault.threshold )
+            pdev = NULL;
+    }
+    spin_unlock(&pcidevs_lock);
+
+    if ( !pdev )
+        return;
+
+    /* Tell the device to stop DMAing; we can''t rely on the guest to
+     * control it for us. */
+    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                            PCI_COMMAND);
+    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+}
+
 /*
  * scan pci devices to add all existed PCI devices to alldevs_list,
  * and setup pci hierarchy in array bus2bridge.
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
     while (1)
     {
         u8 fault_reason;
-        u16 source_id, cword;
+        u16 source_id;
         u32 data;
         u64 guest_addr;
         int type;
@@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
         iommu_page_fault_do_one(iommu, type, fault_reason,
                                 source_id, guest_addr);
 
-        /* Tell the device to stop DMAing; we can''t rely on the guest
to
-         * control it for us. */
-        cword = pci_conf_read16(iommu->intel->drhd->segment,
-                                PCI_BUS(source_id), PCI_SLOT(source_id),
-                                PCI_FUNC(source_id), PCI_COMMAND);
-        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
-                         PCI_SLOT(source_id), PCI_FUNC(source_id),
-                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+        pci_check_disable_device(iommu->intel->drhd->segment,
+                                 PCI_BUS(source_id), PCI_DEVFN2(source_id));
 
         fault_index++;
         if ( fault_index > cap_num_fault_regs(iommu->cap) )
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -64,6 +64,13 @@ struct pci_dev {
     const u8 devfn;
     struct pci_dev_info info;
     struct arch_pci_dev arch;
+    struct {
+        s_time_t time;
+        unsigned int threshold;
+#define PT_FAULT_THRESHOLD_PRIV  10
+#define PT_FAULT_THRESHOLD_UNPRIV 3
+        unsigned int count;
+    } fault;
     u64 vf_rlen[6];
 };
 
@@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *, int seg, int bus, int devfn);
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
 
 uint8_t pci_conf_read8(
     unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func,




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Tim Deegan

2012-Nov-08 12:42 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

At 15:29 +0100 on 06 Nov (1352215760), Dario Faggioli
wrote:> On Tue, 2012-11-06 at 14:17 +0000, Jan Beulich wrote:
> > > Indeed.  (Intel) IOMMU interrupts are suppressed until the
softirq
> > > handler acknowledges the error, but if the softirq handler
doesn''t
> > > disable the device, it will take another IOMMU interrupt
immediately.
> > > I thought the AMD side behaved eth same but clearly not --
I''ll try to
> > > take a look at that later in the week.
> > 
> > I don''t think AMD''s behaves much different,
it''s just that for the
> > PPR case nothing is being done in that regard (and it''s
unclear
> > whether or under what circumstances a high rate of these could
> > occur).
> > 
> Indeed. I just double checked the code and yes, both faults and PPRs are
> handled that way, and new interrupts are disabled until the tasklet
> handles the request and re-enables them.
> 
> The difference, as Jan is saying, is there is no PPR equivalent of
> disabling bus mastering that can ensure livelock safeness when it comes
> to them.
Right, I see.  We could always disable the card entirely. 

Tim.

Tim Deegan

2012-Nov-08 12:46 UTC

head link

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

At 16:05 +0000 on 07 Nov (1352304352), Jan Beulich
wrote:> Instead, give the owning domain at least a small opportunity of fixing
> things up, and allow for rare faults to not bring down the device at
> all. The amount of faults tolerated within a given time period (all
> numbers are made up with no specific rationale) is higher for Dom0 than
> for DomU-s.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Looks good to me, though I don''t think dom0 should have any special
treatment here.  I''d be inclined to set the threshold to 10 for
everyone. 

Tim.
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>  
>  static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>  {
> -    u16 domain_id, device_id, bdf, cword, flags;
> +    u16 domain_id, device_id, bdf, flags;
>      u32 code;
>      u64 *addr;
>      int count = 0;
> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
>                 "fault address = %#"PRIx64", flags =
%#x\n",
>                 event_str[code-1], domain_id, device_id, *addr, flags);
>  
> -        /* Tell the device to stop DMAing; we can''t rely on the
guest to
> -         * control it for us. */
>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
> -            {
> -                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
> -                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
> -                                        PCI_COMMAND);
> -                pci_conf_write16(iommu->seg, PCI_BUS(bdf),
PCI_SLOT(bdf),
> -                                 PCI_FUNC(bdf), PCI_COMMAND, 
> -                                 cword & ~PCI_COMMAND_MASTER);
> -            }
> +                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
> +                                         PCI_DEVFN2(bdf));
>      }
>      else
>      {
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
>  static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
>  {
>      struct hvm_iommu *hd = domain_hvm_iommu(d);
> +    struct pci_dev *pdev;
>      int rc = 0;
>  
>      if ( !iommu_enabled || !hd->platform_ops )
> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
>          return -EXDEV;
>  
>      spin_lock(&pcidevs_lock);
> +    pdev = pci_get_pdev(seg, bus, devfn);
> +    if ( pdev )
> +    {
> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
> +        pdev->fault.count     = 0;
> +    }
> +
>      if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn))
)
>          goto done;
>  
> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
>          goto done;
>      }
>  done:
> +    if ( pdev && pdev->domain != d )
> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>      spin_unlock(&pcidevs_lock);
>      return rc;
>  }
> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
>          return ret;
>      }
>  
> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> +    pdev->fault.count     = 0;
> +
>      if ( !has_arch_pdevs(d) && need_iommu(d) )
>      {
>          d->need_iommu = 0;
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
>      *((u8*) &pdev->bus) = bus;
>      *((u8*) &pdev->devfn) = devfn;
>      pdev->domain = NULL;
> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>      INIT_LIST_HEAD(&pdev->msi_list);
>      list_add(&pdev->alldevs_list, &pseg->alldevs_list);
>      spin_lock_init(&pdev->msix_table_lock);
> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
>      return 1;
>  }
>  
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
> +{
> +    struct pci_dev *pdev;
> +    s_time_t now = NOW();
> +    u16 cword;
> +
> +    spin_lock(&pcidevs_lock);
> +    pdev = pci_get_pdev(seg, bus, devfn);
> +    if ( pdev )
> +    {
> +        if ( now < pdev->fault.time ||
> +             now - pdev->fault.time > MILLISECS(10) )
> +            pdev->fault.count >>= 1;
> +        pdev->fault.time = now;
> +        if ( ++pdev->fault.count < pdev->fault.threshold )
> +            pdev = NULL;
> +    }
> +    spin_unlock(&pcidevs_lock);
> +
> +    if ( !pdev )
> +        return;
> +
> +    /* Tell the device to stop DMAing; we can''t rely on the guest
to
> +     * control it for us. */
> +    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> +                            PCI_COMMAND);
> +    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> +                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +}
> +
>  /*
>   * scan pci devices to add all existed PCI devices to alldevs_list,
>   * and setup pci hierarchy in array bus2bridge.
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
>      while (1)
>      {
>          u8 fault_reason;
> -        u16 source_id, cword;
> +        u16 source_id;
>          u32 data;
>          u64 guest_addr;
>          int type;
> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
>          iommu_page_fault_do_one(iommu, type, fault_reason,
>                                  source_id, guest_addr);
>  
> -        /* Tell the device to stop DMAing; we can''t rely on the
guest to
> -         * control it for us. */
> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
> -                                PCI_BUS(source_id), PCI_SLOT(source_id),
> -                                PCI_FUNC(source_id), PCI_COMMAND);
> -        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
> -                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +        pci_check_disable_device(iommu->intel->drhd->segment,
> +                                 PCI_BUS(source_id),
PCI_DEVFN2(source_id));
>  
>          fault_index++;
>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -64,6 +64,13 @@ struct pci_dev {
>      const u8 devfn;
>      struct pci_dev_info info;
>      struct arch_pci_dev arch;
> +    struct {
> +        s_time_t time;
> +        unsigned int threshold;
> +#define PT_FAULT_THRESHOLD_PRIV  10
> +#define PT_FAULT_THRESHOLD_UNPRIV 3
> +        unsigned int count;
> +    } fault;
>      u64 vf_rlen[6];
>  };
>  
> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
>  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
>  struct pci_dev *pci_get_pdev_by_domain(
>      struct domain *, int seg, int bus, int devfn);
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>  
>  uint8_t pci_conf_read8(
>      unsigned int seg, unsigned int bus, unsigned int dev, unsigned int
func,
> 
> 
> IOMMU: don''t immediately disable bus mastering on faults
> 
> Instead, give the owning domain at least a small opportunity of fixing
> things up, and allow for rare faults to not bring down the device at
> all. The amount of faults tolerated within a given time period (all
> numbers are made up with no specific rationale) is higher for Dom0 than
> for DomU-s.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>  
>  static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
>  {
> -    u16 domain_id, device_id, bdf, cword, flags;
> +    u16 domain_id, device_id, bdf, flags;
>      u32 code;
>      u64 *addr;
>      int count = 0;
> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
>                 "fault address = %#"PRIx64", flags =
%#x\n",
>                 event_str[code-1], domain_id, device_id, *addr, flags);
>  
> -        /* Tell the device to stop DMAing; we can''t rely on the
guest to
> -         * control it for us. */
>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
> -            {
> -                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
> -                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
> -                                        PCI_COMMAND);
> -                pci_conf_write16(iommu->seg, PCI_BUS(bdf),
PCI_SLOT(bdf),
> -                                 PCI_FUNC(bdf), PCI_COMMAND, 
> -                                 cword & ~PCI_COMMAND_MASTER);
> -            }
> +                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
> +                                         PCI_DEVFN2(bdf));
>      }
>      else
>      {
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
>  static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
>  {
>      struct hvm_iommu *hd = domain_hvm_iommu(d);
> +    struct pci_dev *pdev;
>      int rc = 0;
>  
>      if ( !iommu_enabled || !hd->platform_ops )
> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
>          return -EXDEV;
>  
>      spin_lock(&pcidevs_lock);
> +    pdev = pci_get_pdev(seg, bus, devfn);
> +    if ( pdev )
> +    {
> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
> +        pdev->fault.count     = 0;
> +    }
> +
>      if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn))
)
>          goto done;
>  
> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
>          goto done;
>      }
>  done:
> +    if ( pdev && pdev->domain != d )
> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>      spin_unlock(&pcidevs_lock);
>      return rc;
>  }
> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
>          return ret;
>      }
>  
> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
> +    pdev->fault.count     = 0;
> +
>      if ( !has_arch_pdevs(d) && need_iommu(d) )
>      {
>          d->need_iommu = 0;
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
>      *((u8*) &pdev->bus) = bus;
>      *((u8*) &pdev->devfn) = devfn;
>      pdev->domain = NULL;
> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>      INIT_LIST_HEAD(&pdev->msi_list);
>      list_add(&pdev->alldevs_list, &pseg->alldevs_list);
>      spin_lock_init(&pdev->msix_table_lock);
> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
>      return 1;
>  }
>  
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
> +{
> +    struct pci_dev *pdev;
> +    s_time_t now = NOW();
> +    u16 cword;
> +
> +    spin_lock(&pcidevs_lock);
> +    pdev = pci_get_pdev(seg, bus, devfn);
> +    if ( pdev )
> +    {
> +        if ( now < pdev->fault.time ||
> +             now - pdev->fault.time > MILLISECS(10) )
> +            pdev->fault.count >>= 1;
> +        pdev->fault.time = now;
> +        if ( ++pdev->fault.count < pdev->fault.threshold )
> +            pdev = NULL;
> +    }
> +    spin_unlock(&pcidevs_lock);
> +
> +    if ( !pdev )
> +        return;
> +
> +    /* Tell the device to stop DMAing; we can''t rely on the guest
to
> +     * control it for us. */
> +    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> +                            PCI_COMMAND);
> +    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
> +                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +}
> +
>  /*
>   * scan pci devices to add all existed PCI devices to alldevs_list,
>   * and setup pci hierarchy in array bus2bridge.
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
>      while (1)
>      {
>          u8 fault_reason;
> -        u16 source_id, cword;
> +        u16 source_id;
>          u32 data;
>          u64 guest_addr;
>          int type;
> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
>          iommu_page_fault_do_one(iommu, type, fault_reason,
>                                  source_id, guest_addr);
>  
> -        /* Tell the device to stop DMAing; we can''t rely on the
guest to
> -         * control it for us. */
> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
> -                                PCI_BUS(source_id), PCI_SLOT(source_id),
> -                                PCI_FUNC(source_id), PCI_COMMAND);
> -        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
> -                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +        pci_check_disable_device(iommu->intel->drhd->segment,
> +                                 PCI_BUS(source_id),
PCI_DEVFN2(source_id));
>  
>          fault_index++;
>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -64,6 +64,13 @@ struct pci_dev {
>      const u8 devfn;
>      struct pci_dev_info info;
>      struct arch_pci_dev arch;
> +    struct {
> +        s_time_t time;
> +        unsigned int threshold;
> +#define PT_FAULT_THRESHOLD_PRIV  10
> +#define PT_FAULT_THRESHOLD_UNPRIV 3
> +        unsigned int count;
> +    } fault;
>      u64 vf_rlen[6];
>  };
>  
> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
>  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
>  struct pci_dev *pci_get_pdev_by_domain(
>      struct domain *, int seg, int bus, int devfn);
> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>  
>  uint8_t pci_conf_read8(
>      unsigned int seg, unsigned int bus, unsigned int dev, unsigned int
func,

Jan Beulich

2012-Nov-08 13:46 UTC

head link

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

>>> On 08.11.12 at 13:46, Tim Deegan <tim@xen.org> wrote:
> At 16:05 +0000 on 07 Nov (1352304352), Jan Beulich wrote:
>> Instead, give the owning domain at least a small opportunity of fixing
>> things up, and allow for rare faults to not bring down the device at
>> all. The amount of faults tolerated within a given time period (all
>> numbers are made up with no specific rationale) is higher for Dom0 than
>> for DomU-s.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Looks good to me, though I don''t think dom0 should have any
special
> treatment here.  I''d be inclined to set the threshold to 10 for
> everyone. 
No problem. I''ll wait for the IOMMU folks to give some feedback,
and either re-submit (if further changes are requested), or commit
with their acks (and the change as you suggest, if they''re in
agreement).

Of course, if they disagree we''ll have to get to a common position
anyway first.

If no other changes are requested, do I take the above as an
"ack-with-that-change"?

Jan
>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>>  
>>  static void parse_event_log_entry(struct amd_iommu *iommu, u32
entry[])
>>  {
>> -    u16 domain_id, device_id, bdf, cword, flags;
>> +    u16 domain_id, device_id, bdf, flags;
>>      u32 code;
>>      u64 *addr;
>>      int count = 0;
>> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
>>                 "fault address = %#"PRIx64", flags =
%#x\n",
>>                 event_str[code-1], domain_id, device_id, *addr, flags);
>>  
>> -        /* Tell the device to stop DMAing; we can''t rely on
the guest to
>> -         * control it for us. */
>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id
)
>> -            {
>> -                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
>> -                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
>> -                                        PCI_COMMAND);
>> -                pci_conf_write16(iommu->seg, PCI_BUS(bdf),
PCI_SLOT(bdf),
>> -                                 PCI_FUNC(bdf), PCI_COMMAND, 
>> -                                 cword & ~PCI_COMMAND_MASTER);
>> -            }
>> +                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
>> +                                         PCI_DEVFN2(bdf));
>>      }
>>      else
>>      {
>> --- a/xen/drivers/passthrough/iommu.c
>> +++ b/xen/drivers/passthrough/iommu.c
>> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
>>  static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
>>  {
>>      struct hvm_iommu *hd = domain_hvm_iommu(d);
>> +    struct pci_dev *pdev;
>>      int rc = 0;
>>  
>>      if ( !iommu_enabled || !hd->platform_ops )
>> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
>>          return -EXDEV;
>>  
>>      spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
>> +        pdev->fault.count     = 0;
>> +    }
>> +
>>      if ( (rc = hd->platform_ops->assign_device(d, seg, bus,
devfn)) )
>>          goto done;
>>  
>> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
>>          goto done;
>>      }
>>  done:
>> +    if ( pdev && pdev->domain != d )
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      spin_unlock(&pcidevs_lock);
>>      return rc;
>>  }
>> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
>>          return ret;
>>      }
>>  
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>> +    pdev->fault.count     = 0;
>> +
>>      if ( !has_arch_pdevs(d) && need_iommu(d) )
>>      {
>>          d->need_iommu = 0;
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
>>      *((u8*) &pdev->bus) = bus;
>>      *((u8*) &pdev->devfn) = devfn;
>>      pdev->domain = NULL;
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      INIT_LIST_HEAD(&pdev->msi_list);
>>      list_add(&pdev->alldevs_list, &pseg->alldevs_list);
>>      spin_lock_init(&pdev->msix_table_lock);
>> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
>>      return 1;
>>  }
>>  
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
>> +{
>> +    struct pci_dev *pdev;
>> +    s_time_t now = NOW();
>> +    u16 cword;
>> +
>> +    spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        if ( now < pdev->fault.time ||
>> +             now - pdev->fault.time > MILLISECS(10) )
>> +            pdev->fault.count >>= 1;
>> +        pdev->fault.time = now;
>> +        if ( ++pdev->fault.count < pdev->fault.threshold )
>> +            pdev = NULL;
>> +    }
>> +    spin_unlock(&pcidevs_lock);
>> +
>> +    if ( !pdev )
>> +        return;
>> +
>> +    /* Tell the device to stop DMAing; we can''t rely on the
guest to
>> +     * control it for us. */
>> +    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
PCI_FUNC(devfn),
>> +                            PCI_COMMAND);
>> +    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
>> +                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
>> +}
>> +
>>  /*
>>   * scan pci devices to add all existed PCI devices to alldevs_list,
>>   * and setup pci hierarchy in array bus2bridge.
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
>>      while (1)
>>      {
>>          u8 fault_reason;
>> -        u16 source_id, cword;
>> +        u16 source_id;
>>          u32 data;
>>          u64 guest_addr;
>>          int type;
>> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
>>          iommu_page_fault_do_one(iommu, type, fault_reason,
>>                                  source_id, guest_addr);
>>  
>> -        /* Tell the device to stop DMAing; we can''t rely on
the guest to
>> -         * control it for us. */
>> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
>> -                                PCI_BUS(source_id),
PCI_SLOT(source_id),
>> -                                PCI_FUNC(source_id), PCI_COMMAND);
>> -        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
>> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
>> -                         PCI_COMMAND, cword &
~PCI_COMMAND_MASTER);
>> +        pci_check_disable_device(iommu->intel->drhd->segment,
>> +                                 PCI_BUS(source_id), 
> PCI_DEVFN2(source_id));
>>  
>>          fault_index++;
>>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
>> --- a/xen/include/xen/pci.h
>> +++ b/xen/include/xen/pci.h
>> @@ -64,6 +64,13 @@ struct pci_dev {
>>      const u8 devfn;
>>      struct pci_dev_info info;
>>      struct arch_pci_dev arch;
>> +    struct {
>> +        s_time_t time;
>> +        unsigned int threshold;
>> +#define PT_FAULT_THRESHOLD_PRIV  10
>> +#define PT_FAULT_THRESHOLD_UNPRIV 3
>> +        unsigned int count;
>> +    } fault;
>>      u64 vf_rlen[6];
>>  };
>>  
>> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
>>  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
>>  struct pci_dev *pci_get_pdev_by_domain(
>>      struct domain *, int seg, int bus, int devfn);
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>>  
>>  uint8_t pci_conf_read8(
>>      unsigned int seg, unsigned int bus, unsigned int dev, unsigned int
> func,
>> 
>> 
> 
>> IOMMU: don''t immediately disable bus mastering on faults
>> 
>> Instead, give the owning domain at least a small opportunity of fixing
>> things up, and allow for rare faults to not bring down the device at
>> all. The amount of faults tolerated within a given time period (all
>> numbers are made up with no specific rationale) is higher for Dom0 than
>> for DomU-s.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> 
>> --- a/xen/drivers/passthrough/amd/iommu_init.c
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c
>> @@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
>>  
>>  static void parse_event_log_entry(struct amd_iommu *iommu, u32
entry[])
>>  {
>> -    u16 domain_id, device_id, bdf, cword, flags;
>> +    u16 domain_id, device_id, bdf, flags;
>>      u32 code;
>>      u64 *addr;
>>      int count = 0;
>> @@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
>>                 "fault address = %#"PRIx64", flags =
%#x\n",
>>                 event_str[code-1], domain_id, device_id, *addr, flags);
>>  
>> -        /* Tell the device to stop DMAing; we can''t rely on
the guest to
>> -         * control it for us. */
>>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id
)
>> -            {
>> -                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
>> -                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
>> -                                        PCI_COMMAND);
>> -                pci_conf_write16(iommu->seg, PCI_BUS(bdf),
PCI_SLOT(bdf),
>> -                                 PCI_FUNC(bdf), PCI_COMMAND, 
>> -                                 cword & ~PCI_COMMAND_MASTER);
>> -            }
>> +                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
>> +                                         PCI_DEVFN2(bdf));
>>      }
>>      else
>>      {
>> --- a/xen/drivers/passthrough/iommu.c
>> +++ b/xen/drivers/passthrough/iommu.c
>> @@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
>>  static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
>>  {
>>      struct hvm_iommu *hd = domain_hvm_iommu(d);
>> +    struct pci_dev *pdev;
>>      int rc = 0;
>>  
>>      if ( !iommu_enabled || !hd->platform_ops )
>> @@ -228,6 +229,13 @@ static int assign_device(struct domain *
>>          return -EXDEV;
>>  
>>      spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
>> +        pdev->fault.count     = 0;
>> +    }
>> +
>>      if ( (rc = hd->platform_ops->assign_device(d, seg, bus,
devfn)) )
>>          goto done;
>>  
>> @@ -239,6 +247,8 @@ static int assign_device(struct domain *
>>          goto done;
>>      }
>>  done:
>> +    if ( pdev && pdev->domain != d )
>> +        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      spin_unlock(&pcidevs_lock);
>>      return rc;
>>  }
>> @@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
>>          return ret;
>>      }
>>  
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>> +    pdev->fault.count     = 0;
>> +
>>      if ( !has_arch_pdevs(d) && need_iommu(d) )
>>      {
>>          d->need_iommu = 0;
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
>>      *((u8*) &pdev->bus) = bus;
>>      *((u8*) &pdev->devfn) = devfn;
>>      pdev->domain = NULL;
>> +    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
>>      INIT_LIST_HEAD(&pdev->msi_list);
>>      list_add(&pdev->alldevs_list, &pseg->alldevs_list);
>>      spin_lock_init(&pdev->msix_table_lock);
>> @@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
>>      return 1;
>>  }
>>  
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
>> +{
>> +    struct pci_dev *pdev;
>> +    s_time_t now = NOW();
>> +    u16 cword;
>> +
>> +    spin_lock(&pcidevs_lock);
>> +    pdev = pci_get_pdev(seg, bus, devfn);
>> +    if ( pdev )
>> +    {
>> +        if ( now < pdev->fault.time ||
>> +             now - pdev->fault.time > MILLISECS(10) )
>> +            pdev->fault.count >>= 1;
>> +        pdev->fault.time = now;
>> +        if ( ++pdev->fault.count < pdev->fault.threshold )
>> +            pdev = NULL;
>> +    }
>> +    spin_unlock(&pcidevs_lock);
>> +
>> +    if ( !pdev )
>> +        return;
>> +
>> +    /* Tell the device to stop DMAing; we can''t rely on the
guest to
>> +     * control it for us. */
>> +    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn),
PCI_FUNC(devfn),
>> +                            PCI_COMMAND);
>> +    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
>> +                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
>> +}
>> +
>>  /*
>>   * scan pci devices to add all existed PCI devices to alldevs_list,
>>   * and setup pci hierarchy in array bus2bridge.
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
>>      while (1)
>>      {
>>          u8 fault_reason;
>> -        u16 source_id, cword;
>> +        u16 source_id;
>>          u32 data;
>>          u64 guest_addr;
>>          int type;
>> @@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
>>          iommu_page_fault_do_one(iommu, type, fault_reason,
>>                                  source_id, guest_addr);
>>  
>> -        /* Tell the device to stop DMAing; we can''t rely on
the guest to
>> -         * control it for us. */
>> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
>> -                                PCI_BUS(source_id),
PCI_SLOT(source_id),
>> -                                PCI_FUNC(source_id), PCI_COMMAND);
>> -        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
>> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
>> -                         PCI_COMMAND, cword &
~PCI_COMMAND_MASTER);
>> +        pci_check_disable_device(iommu->intel->drhd->segment,
>> +                                 PCI_BUS(source_id), 
> PCI_DEVFN2(source_id));
>>  
>>          fault_index++;
>>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
>> --- a/xen/include/xen/pci.h
>> +++ b/xen/include/xen/pci.h
>> @@ -64,6 +64,13 @@ struct pci_dev {
>>      const u8 devfn;
>>      struct pci_dev_info info;
>>      struct arch_pci_dev arch;
>> +    struct {
>> +        s_time_t time;
>> +        unsigned int threshold;
>> +#define PT_FAULT_THRESHOLD_PRIV  10
>> +#define PT_FAULT_THRESHOLD_UNPRIV 3
>> +        unsigned int count;
>> +    } fault;
>>      u64 vf_rlen[6];
>>  };
>>  
>> @@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
>>  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
>>  struct pci_dev *pci_get_pdev_by_domain(
>>      struct domain *, int seg, int bus, int devfn);
>> +void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
>>  
>>  uint8_t pci_conf_read8(
>>      unsigned int seg, unsigned int bus, unsigned int dev, unsigned int
> func,

Tim Deegan

2012-Nov-08 14:23 UTC

head link

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

At 13:46 +0000 on 08 Nov (1352382372), Jan Beulich
wrote:> >>> On 08.11.12 at 13:46, Tim Deegan <tim@xen.org> wrote:
> > At 16:05 +0000 on 07 Nov (1352304352), Jan Beulich wrote:
> >> Instead, give the owning domain at least a small opportunity of
fixing
> >> things up, and allow for rare faults to not bring down the device
at
> >> all. The amount of faults tolerated within a given time period
(all
> >> numbers are made up with no specific rationale) is higher for Dom0
than
> >> for DomU-s.
> >> 
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> > 
> > Looks good to me, though I don''t think dom0 should have any
special
> > treatment here.  I''d be inclined to set the threshold to 10
for
> > everyone. 
> 
> No problem. I''ll wait for the IOMMU folks to give some feedback,
> and either re-submit (if further changes are requested), or commit
> with their acks (and the change as you suggest, if they''re in
> agreement).
> 
> Of course, if they disagree we''ll have to get to a common position
> anyway first.
> 
> If no other changes are requested, do I take the above as an
> "ack-with-that-change"?
Yes, please do.

Tim.

Dario Faggioli

2012-Nov-08 18:07 UTC

head link

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

On Wed, 2012-11-07 at 16:05 +0000, Jan Beulich wrote: > Instead, give the owning domain at least a small opportunity of fixing
> things up, and allow for rare faults to not bring down the device at
> all. The amount of faults tolerated within a given time period (all
> numbers are made up with no specific rationale) is higher for Dom0 than
> for DomU-s.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> (For what it''s worth)

Acked-by: Dario Faggioli <dario.faggioli@citrix.com>

I don''t really have a strong opinion about Dom0 and Xen needing special
treatment or not, so the above applies to whatever solution you decide
to go for. :-)

Thanks and Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Keir Fraser

2012-Nov-30 09:42 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

On 05/11/2012 16:53, "Jan Beulich" <JBeulich@suse.com> wrote:
> Under the assumption that in these cases recurring faults aren''t a
> security issue and it can be expected that the drivers there are going
> to try to take care of the problem.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
This one''s sat a while with no comments...

 -- Keir
> --- a/xen/drivers/passthrough/amd/iommu_init.c
> +++ b/xen/drivers/passthrough/amd/iommu_init.c
> @@ -625,6 +625,18 @@ static void parse_event_log_entry(struct
>          for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
>              if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
>              {
> +                const struct pci_dev *pdev;
> +
> +                spin_lock(&pcidevs_lock);
> +                pdev = pci_get_pdev(iommu->seg, PCI_BUS(bdf),
> PCI_DEVFN2(bdf));
> +                if ( pdev && pdev->domain != dom_xen &&
> +                     (!pdev->domain || !IS_PRIV(pdev->domain)) )
> +                    pdev = NULL;
> +                spin_unlock(&pcidevs_lock);
> +
> +                if ( pdev )
> +                    continue;
> +
>                  cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
>                                          PCI_SLOT(bdf), PCI_FUNC(bdf),
>                                          PCI_COMMAND);
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -916,7 +916,8 @@ static void __do_iommu_page_fault(struct
>      reg = cap_fault_reg_offset(iommu->cap);
>      while (1)
>      {
> -        u8 fault_reason;
> +        const struct pci_dev *pdev;
> +        u8 fault_reason, bus;
>          u16 source_id, cword;
>          u32 data;
>          u64 guest_addr;
> @@ -950,14 +951,27 @@ static void __do_iommu_page_fault(struct
>          iommu_page_fault_do_one(iommu, type, fault_reason,
>                                  source_id, guest_addr);
>  
> -        /* Tell the device to stop DMAing; we can''t rely on the
guest to
> -         * control it for us. */
> -        cword = pci_conf_read16(iommu->intel->drhd->segment,
> -                                PCI_BUS(source_id), PCI_SLOT(source_id),
> -                                PCI_FUNC(source_id), PCI_COMMAND);
> -        pci_conf_write16(iommu->intel->drhd->segment,
PCI_BUS(source_id),
> -                         PCI_SLOT(source_id), PCI_FUNC(source_id),
> -                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
> +        bus = PCI_BUS(source_id);
> +
> +        spin_lock(&pcidevs_lock);
> +        pdev = pci_get_pdev(iommu->intel->drhd->segment, bus,
> +                            PCI_DEVFN2(source_id));
> +        if ( pdev && pdev->domain != dom_xen &&
> +             (!pdev->domain || !IS_PRIV(pdev->domain)) )
> +            pdev = NULL;
> +        spin_unlock(&pcidevs_lock);
> +
> +        if ( !pdev )
> +        {
> +            /* Tell the device to stop DMAing; we can''t rely on
the guest to
> +             * control it for us. */
> +            cword = pci_conf_read16(iommu->intel->drhd->segment,
bus,
> +                                    PCI_SLOT(source_id),
PCI_FUNC(source_id),
> +                                    PCI_COMMAND);
> +            pci_conf_write16(iommu->intel->drhd->segment, bus,
> +                             PCI_SLOT(source_id), PCI_FUNC(source_id),
> +                             PCI_COMMAND, cword &
~PCI_COMMAND_MASTER);
> +        }
>  
>          fault_index++;
>          if ( fault_index > cap_num_fault_regs(iommu->cap) )
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

Jan Beulich

2012-Nov-30 09:50 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 30.11.12 at 10:42, Keir Fraser <keir@xen.org> wrote:
> On 05/11/2012 16:53, "Jan Beulich" <JBeulich@suse.com>
wrote:
> 
>> Under the assumption that in these cases recurring faults
aren''t a
>> security issue and it can be expected that the drivers there are going
>> to try to take care of the problem.
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> This one''s sat a while with no comments...
It''s in already (26133:fdb69dd527cd), with Tim''s and
Dario''s ack
(who were the ones involved in creating the original change this
modifies).

But yes, we''re having a general response problem for IOMMU
related stuff - I already asked for this to be a topic on the next
community call.

Jan

Zhang, Xiantao

2012-Dec-03 06:08 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Hi, Jan 
If the phantom device support for IOMMU is in upstream,  is this patch still
needed ?  Basically,  I can''t figure out why several faults should be
allowed before disabling bus mastering.   Did you meet some real issues ?  
Thanks!
Xiantao> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Friday, November 30, 2012 5:51 PM
> To: wei.huang2@amd.com; weiwang.dd@gmail.com; Zhang, Xiantao; Keir
> Fraser
> Cc: Dario Faggioli; xen-devel; Tim Deegan
> Subject: Re: [Xen-devel] [PATCH] IOMMU: don''t disable bus
mastering on
> faults for devices used by Xen or Dom0
> 
> >>> On 30.11.12 at 10:42, Keir Fraser <keir@xen.org> wrote:
> > On 05/11/2012 16:53, "Jan Beulich" <JBeulich@suse.com>
wrote:
> >
> >> Under the assumption that in these cases recurring faults
aren''t a
> >> security issue and it can be expected that the drivers there are
> >> going to try to take care of the problem.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >
> > This one''s sat a while with no comments...
> 
> It''s in already (26133:fdb69dd527cd), with Tim''s and
Dario''s ack (who were
> the ones involved in creating the original change this modifies).
> 
> But yes, we''re having a general response problem for IOMMU related
stuff -
> I already asked for this to be a topic on the next community call.
> 
> Jan

Jan Beulich

2012-Dec-03 07:36 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 03.12.12 at 07:08, "Zhang, Xiantao"
<xiantao.zhang@intel.com> wrote:
> If the phantom device support for IOMMU is in upstream,  is this patch
still
> needed ?
Phantom function is unrelated to the behavioral adjustment here.
>  Basically,  I can''t figure out why several faults should be
allowed
> before disabling bus mastering.   Did you meet some real issues ?   Thanks!
I observed quite a different driver failure pattern with and without
this adjustment, but in a contrived environment only. From the
customer data for the problem that prompted the phantom function
work, I could also conclude the same (comparing the driver failure
under native Linux with IOMMU turned on and the one under Xen).

But in any case, I am of the opinion that an occasional fault
shouldn''t give reason to disable the device altogether - what
we''re
aiming at is solely to keep Xen and other domains functional (which
doesn''t require as drastic an action as was carried out prior to this
adjustment). Also, afaict native Linux doesn''t have any such
disabling behavior at all.

Jan
>> -----Original Message-----
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: Friday, November 30, 2012 5:51 PM
>> To: wei.huang2@amd.com; weiwang.dd@gmail.com; Zhang, Xiantao; Keir
>> Fraser
>> Cc: Dario Faggioli; xen-devel; Tim Deegan
>> Subject: Re: [Xen-devel] [PATCH] IOMMU: don''t disable bus
mastering on
>> faults for devices used by Xen or Dom0
>> 
>> >>> On 30.11.12 at 10:42, Keir Fraser <keir@xen.org>
wrote:
>> > On 05/11/2012 16:53, "Jan Beulich"
<JBeulich@suse.com> wrote:
>> >
>> >> Under the assumption that in these cases recurring faults
aren''t a
>> >> security issue and it can be expected that the drivers there
are
>> >> going to try to take care of the problem.
>> >>
>> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> >
>> > This one''s sat a while with no comments...
>> 
>> It''s in already (26133:fdb69dd527cd), with Tim''s and
Dario''s ack (who were
>> the ones involved in creating the original change this modifies).
>> 
>> But yes, we''re having a general response problem for IOMMU
related stuff -
>> I already asked for this to be a topic on the next community call.
>> 
>> Jan

Zhang, Xiantao

2012-Dec-04 00:55 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

> 
> >>> On 03.12.12 at 07:08, "Zhang, Xiantao"
<xiantao.zhang@intel.com>
> wrote:
> > If the phantom device support for IOMMU is in upstream,  is this patch
> > still needed ?
> 
> Phantom function is unrelated to the behavioral adjustment here.
> 
> >  Basically,  I can''t figure out why several faults should be
allowed
> > before disabling bus mastering.   Did you meet some real issues ?  
Thanks!
> 
> I observed quite a different driver failure pattern with and without this
> adjustment, but in a contrived environment only. From the customer data
> for the problem that prompted the phantom function work, I could also
> conclude the same (comparing the driver failure under native Linux with
> IOMMU turned on and the one under Xen).
> 
> But in any case, I am of the opinion that an occasional fault
shouldn''t give
> reason to disable the device altogether - what we''re aiming at is
solely to
> keep Xen and other domains functional (which doesn''t require as
drastic an
> action as was carried out prior to this adjustment). Also, afaict native
Linux
> doesn''t have any such disabling behavior at all.
Okay, maybe we need to align this with native linux side, and just keep the
fault reporting instead of disabling it in device level. And if the number of
faults reaches to  a limit, and hypervisor can choose to suppress its output.
Xiantao
> >> -----Original Message-----
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: Friday, November 30, 2012 5:51 PM
> >> To: wei.huang2@amd.com; weiwang.dd@gmail.com; Zhang, Xiantao; Keir
> >> Fraser
> >> Cc: Dario Faggioli; xen-devel; Tim Deegan
> >> Subject: Re: [Xen-devel] [PATCH] IOMMU: don''t disable bus
mastering
> >> on faults for devices used by Xen or Dom0
> >>
> >> >>> On 30.11.12 at 10:42, Keir Fraser
<keir@xen.org> wrote:
> >> > On 05/11/2012 16:53, "Jan Beulich"
<JBeulich@suse.com> wrote:
> >> >
> >> >> Under the assumption that in these cases recurring faults
aren''t a
> >> >> security issue and it can be expected that the drivers
there are
> >> >> going to try to take care of the problem.
> >> >>
> >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> >
> >> > This one''s sat a while with no comments...
> >>
> >> It''s in already (26133:fdb69dd527cd), with Tim''s
and Dario''s ack (who
> >> were the ones involved in creating the original change this
modifies).
> >>
> >> But yes, we''re having a general response problem for
IOMMU related
> >> stuff - I already asked for this to be a topic on the next
community call.
> >>
> >> Jan
> 
>

Jan Beulich

2012-Dec-04 08:19 UTC

head link

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

>>> On 04.12.12 at 01:55, "Zhang, Xiantao"
<xiantao.zhang@intel.com> wrote:
>>  
>> >>> On 03.12.12 at 07:08, "Zhang, Xiantao"
<xiantao.zhang@intel.com>
>> wrote:
>> > If the phantom device support for IOMMU is in upstream,  is this
patch
>> > still needed ?
>> 
>> Phantom function is unrelated to the behavioral adjustment here.
>> 
>> >  Basically,  I can''t figure out why several faults should
be allowed
>> > before disabling bus mastering.   Did you meet some real issues ? 
Thanks!
>> 
>> I observed quite a different driver failure pattern with and without
this
>> adjustment, but in a contrived environment only. From the customer data
>> for the problem that prompted the phantom function work, I could also
>> conclude the same (comparing the driver failure under native Linux with
>> IOMMU turned on and the one under Xen).
>> 
>> But in any case, I am of the opinion that an occasional fault
shouldn''t give
>> reason to disable the device altogether - what we''re aiming at
is solely to
>> keep Xen and other domains functional (which doesn''t require
as drastic an
>> action as was carried out prior to this adjustment). Also, afaict
native
> Linux
>> doesn''t have any such disabling behavior at all.
> 
> Okay, maybe we need to align this with native linux side, and just keep the
> fault reporting instead of disabling it in device level. And if the number
of
> faults reaches to  a limit, and hypervisor can choose to suppress its
output.
Just suppressing the output is not enough (and I''m sure you''re
aware that, other than native Linux and for whatever obscure
reason, at least the VT-d code doesn''t produce _any_ indication
of a fault in the hypervisor log) - the problem that was addressed
with the original change was that with high enough a fault rate,
a CPU could be made busy with just handling these faults.

Having moved the real handling of the fault into a tasklet only
reduced the impact, so I continue to think that shutting off
at least bus mastering on the device is the right thing to do.
But - this shutting off is based on the source ID of the request,
so if the source ID doesn''t refer to the device at fault (as e.g.
would currently be the case for the Marvell controllers that
we''re adding the phantom function support for), we would
continue to be in trouble. But buggy hardware can certainly be
expected not to be passed through to guests in the first place.

Jan

Seemingly Similar Threads

Search for more maybe matching threads

Xen devel - Nov 2012 - [PATCH] IOMMU: don't disable bus mastering on faults for devices used by Xen or Dom0

[PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

[PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

Re: [PATCH, v2] IOMMU: don''t immediately disable bus mastering on faults

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Re: [PATCH] IOMMU: don''t disable bus mastering on faults for devices used by Xen or Dom0

Seemingly Similar Threads