<suravee.suthikulpanit@amd.com>
2013-Jun-10 05:05 UTC
[PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> The IOMMU interrupt bits in the IOMMU status registers are "read-only, and write-1-to-clear (RW1C). Therefore, the existing logic which reads the register, set the bit, and then writing back the values could accidentally clear certain bits if it has been set. The correct logic would just be writing only the value which only set the interrupt bits, and leave the rest to zeros. This patch also, clean up #define masks as Jan has suggested. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> --- V2 changes: - Additional fixes as pointed out by Jan. - Clean up unnecessary #define mask as suggested by Jan. xen/drivers/passthrough/amd/iommu_cmd.c | 18 +++-- xen/drivers/passthrough/amd/iommu_init.c | 31 ++++----- xen/include/asm-x86/hvm/svm/amd-iommu-defs.h | 95 +++++++++++--------------- 3 files changed, 63 insertions(+), 81 deletions(-) diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c index 4c60149..f0283d4 100644 --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -75,11 +75,9 @@ static void flush_command_buffer(struct amd_iommu *iommu) u32 cmd[4], status; int loop_count, comp_wait; - /* clear ''ComWaitInt'' in status register (WIC) */ - set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0, - IOMMU_STATUS_COMP_WAIT_INT_MASK, - IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status); - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT), + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /* send an empty COMPLETION_WAIT command to flush command buffer */ cmd[3] = cmd[2] = 0; @@ -96,16 +94,16 @@ static void flush_command_buffer(struct amd_iommu *iommu) do { status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); comp_wait = get_field_from_reg_u32(status, - IOMMU_STATUS_COMP_WAIT_INT_MASK, - IOMMU_STATUS_COMP_WAIT_INT_SHIFT); + (1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT), + IOMMU_STATUS_COMP_WAIT_INT_SHIFT); --loop_count; } while ( !comp_wait && loop_count ); if ( comp_wait ) { - /* clear ''ComWaitInt'' in status register (WIC) */ - status &= IOMMU_STATUS_COMP_WAIT_INT_MASK; - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT), + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); return; } AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c index a939c73..a85c63f 100644 --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_iommu *iommu, ctrl_func(iommu, IOMMU_CONTROL_DISABLED); - /*clear overflow bit */ - iommu_clear_bit(&entry, of_bit); + /* RW1C overflow bit */ + iommu_set_bit(&entry, of_bit); writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ @@ -622,11 +622,9 @@ static void iommu_check_event_log(struct amd_iommu *iommu) if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C interrupt status bit */ + writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT), + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -692,11 +690,9 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu) if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C interrupt status bit */ + writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT), + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -733,10 +729,13 @@ static void iommu_interrupt_handler(int irq, void *dev_id, spin_lock_irqsave(&iommu->lock, flags); - /* Silence interrupts from both event and PPR logging */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); + /* Silence interrupts from both event and PPR + * by clearing the enable logging bits in the + * control register */ + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); + /* RW1C status bit */ writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); spin_unlock_irqrestore(&iommu->lock, flags); diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h index d2176d0..2f2d740 100644 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h @@ -313,31 +313,21 @@ #define IOMMU_LOG_ENTRY_TIMEOUT 1000 /* Control Register */ -#define IOMMU_CONTROL_MMIO_OFFSET 0x18 -#define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK 0x00000001 -#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT 0 -#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK 0x00000002 -#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT 1 -#define IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK 0x00000004 -#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT 2 -#define IOMMU_CONTROL_EVENT_LOG_INT_MASK 0x00000008 -#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT 3 -#define IOMMU_CONTROL_COMP_WAIT_INT_MASK 0x00000010 -#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT 4 -#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_MASK 0x000000E0 -#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT 5 -#define IOMMU_CONTROL_PASS_POSTED_WRITE_MASK 0x00000100 -#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT 8 -#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK 0x00000200 -#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT 9 -#define IOMMU_CONTROL_COHERENT_MASK 0x00000400 -#define IOMMU_CONTROL_COHERENT_SHIFT 10 -#define IOMMU_CONTROL_ISOCHRONOUS_MASK 0x00000800 -#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 -#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK 0x00001000 -#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 -#define IOMMU_CONTROL_RESTART_MASK 0x80000000 -#define IOMMU_CONTROL_RESTART_SHIFT 31 +#define IOMMU_CONTROL_MMIO_OFFSET 0x18 +#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT 0 +#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT 1 +#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT 2 +#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT 3 +#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT 4 +#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT 5 +#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT 8 +#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT 9 +#define IOMMU_CONTROL_COHERENT_SHIFT 10 +#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 +#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 +#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT 14 +#define IOMMU_CONTROL_RESTART_SHIFT 31 #define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 #define IOMMU_CONTROL_PPR_INT_SHIFT 14 @@ -363,38 +353,33 @@ #define IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT 0 /* Extended Feature Register*/ -#define IOMMU_EXT_FEATURE_MMIO_OFFSET 0x30 -#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT 0x0 -#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT 0x1 -#define IOMMU_EXT_FEATURE_XTSUP_SHIFT 0x2 -#define IOMMU_EXT_FEATURE_NXSUP_SHIFT 0x3 -#define IOMMU_EXT_FEATURE_GTSUP_SHIFT 0x4 -#define IOMMU_EXT_FEATURE_IASUP_SHIFT 0x6 -#define IOMMU_EXT_FEATURE_GASUP_SHIFT 0x7 -#define IOMMU_EXT_FEATURE_HESUP_SHIFT 0x8 -#define IOMMU_EXT_FEATURE_PCSUP_SHIFT 0x9 -#define IOMMU_EXT_FEATURE_HATS_SHIFT 0x10 -#define IOMMU_EXT_FEATURE_HATS_MASK 0x00000C00 -#define IOMMU_EXT_FEATURE_GATS_SHIFT 0x12 -#define IOMMU_EXT_FEATURE_GATS_MASK 0x00003000 -#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT 0x14 -#define IOMMU_EXT_FEATURE_GLXSUP_MASK 0x0000C000 - -#define IOMMU_EXT_FEATURE_PASMAX_SHIFT 0x0 -#define IOMMU_EXT_FEATURE_PASMAX_MASK 0x0000001F +#define IOMMU_EXT_FEATURE_MMIO_OFFSET 0x30 +#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT 0x0 +#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT 0x1 +#define IOMMU_EXT_FEATURE_XTSUP_SHIFT 0x2 +#define IOMMU_EXT_FEATURE_NXSUP_SHIFT 0x3 +#define IOMMU_EXT_FEATURE_GTSUP_SHIFT 0x4 +#define IOMMU_EXT_FEATURE_IASUP_SHIFT 0x6 +#define IOMMU_EXT_FEATURE_GASUP_SHIFT 0x7 +#define IOMMU_EXT_FEATURE_HESUP_SHIFT 0x8 +#define IOMMU_EXT_FEATURE_PCSUP_SHIFT 0x9 +#define IOMMU_EXT_FEATURE_HATS_SHIFT 0x10 +#define IOMMU_EXT_FEATURE_HATS_MASK 0x00000C00 +#define IOMMU_EXT_FEATURE_GATS_SHIFT 0x12 +#define IOMMU_EXT_FEATURE_GATS_MASK 0x00003000 +#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT 0x14 +#define IOMMU_EXT_FEATURE_GLXSUP_MASK 0x0000C000 + +#define IOMMU_EXT_FEATURE_PASMAX_SHIFT 0x0 +#define IOMMU_EXT_FEATURE_PASMAX_MASK 0x0000001F /* Status Register*/ -#define IOMMU_STATUS_MMIO_OFFSET 0x2020 -#define IOMMU_STATUS_EVENT_OVERFLOW_MASK 0x00000001 -#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT 0 -#define IOMMU_STATUS_EVENT_LOG_INT_MASK 0x00000002 -#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT 1 -#define IOMMU_STATUS_COMP_WAIT_INT_MASK 0x00000004 -#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT 2 -#define IOMMU_STATUS_EVENT_LOG_RUN_MASK 0x00000008 -#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 -#define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 -#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 +#define IOMMU_STATUS_MMIO_OFFSET 0x2020 +#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT 0 +#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT 1 +#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT 2 +#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 +#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 -- 1.7.10.4
<suravee.suthikulpanit@amd.com>
2013-Jun-10 05:05 UTC
[PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt and event log interrupt bits to re-enable the interrupt. This is done by writing 1 to the memory mapped register to clear the bit. Due to hardware bug, if the driver tries to clear this bit while the IOMMU hardware also setting this bit, the conflict will result with the bit being set. If the interrupt handling code does not make sure to clear this bit, subsequent changes in the event/PPR logs will no longer generating interrupts, and would result if buffer overflow. After clearing the bits, the driver must read back the register to verify. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> --- V2 changes: - Coding style fixes xen/drivers/passthrough/amd/iommu_init.c | 36 ++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c index a85c63f..048a2e6 100644 --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -616,15 +616,21 @@ static void iommu_check_event_log(struct amd_iommu *iommu) spin_lock_irqsave(&iommu->lock, flags); - /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + while (entry & (1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT)) + { + /* Check event overflow */ + if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) + iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); - if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) - iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); + /* RW1C interrupt status bit */ + writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT), + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - /* RW1C interrupt status bit */ - writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT), - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* Workaround for erratum787: + * Re-check to make sure the bit has been cleared */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -684,15 +690,21 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu) spin_lock_irqsave(&iommu->lock, flags); - /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + while ( entry & (1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT ) ) + { + /* Check event overflow */ + if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) + iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); - if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) - iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); + /* RW1C interrupt status bit */ + writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT), + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - /* RW1C interrupt status bit */ - writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT), - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* Workaround for erratum787: + * Re-check to make sure the bit has been cleared */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + } spin_unlock_irqrestore(&iommu->lock, flags); } -- 1.7.10.4
At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote:> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > The IOMMU interrupt handling in bottom half must clear the PPR log interrupt > and event log interrupt bits to re-enable the interrupt. This is done by > writing 1 to the memory mapped register to clear the bit. Due to hardware bug, > if the driver tries to clear this bit while the IOMMU hardware also setting > this bit, the conflict will result with the bit being set. If the interrupt > handling code does not make sure to clear this bit, subsequent changes in the > event/PPR logs will no longer generating interrupts, and would result if > buffer overflow. After clearing the bits, the driver must read back > the register to verify.Is there a risk of livelock here? That is, if some device is causing a lot of IOMMU faults, a CPU could get stuck in this loop re-enabling interrupts as fast as they are raised. The solution suggested in the erratum seems better: if the bit is set after clearing, process the interrupts again (i.e. run/schedule the top-half handler). That way the bottom-half handler will definitely terminate and the system can make some progress. Cheers, Tim.> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > --- > V2 changes: > - Coding style fixes > > xen/drivers/passthrough/amd/iommu_init.c | 36 ++++++++++++++++++++---------- > 1 file changed, 24 insertions(+), 12 deletions(-) > > diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c > index a85c63f..048a2e6 100644 > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -616,15 +616,21 @@ static void iommu_check_event_log(struct amd_iommu *iommu) > > spin_lock_irqsave(&iommu->lock, flags); > > - /*check event overflow */ > entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + while (entry & (1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT)) > + { > + /* Check event overflow */ > + if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) > + iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); > > - if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) > - iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); > + /* RW1C interrupt status bit */ > + writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT), > + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > - /* RW1C interrupt status bit */ > - writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT), > - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + /* Workaround for erratum787: > + * Re-check to make sure the bit has been cleared */ > + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + } > > spin_unlock_irqrestore(&iommu->lock, flags); > } > @@ -684,15 +690,21 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu) > > spin_lock_irqsave(&iommu->lock, flags); > > - /*check event overflow */ > entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + while ( entry & (1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT ) ) > + { > + /* Check event overflow */ > + if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) > + iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); > > - if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) > - iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); > + /* RW1C interrupt status bit */ > + writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT), > + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > - /* RW1C interrupt status bit */ > - writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT), > - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + /* Workaround for erratum787: > + * Re-check to make sure the bit has been cleared */ > + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + } > > spin_unlock_irqrestore(&iommu->lock, flags); > } > -- > 1.7.10.4 > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Jan Beulich
2013-Jun-10 09:47 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote: > At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote: >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >> >> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt >> and event log interrupt bits to re-enable the interrupt. This is done by >> writing 1 to the memory mapped register to clear the bit. Due to hardware > bug, >> if the driver tries to clear this bit while the IOMMU hardware also setting >> this bit, the conflict will result with the bit being set. If the interrupt >> handling code does not make sure to clear this bit, subsequent changes in > the >> event/PPR logs will no longer generating interrupts, and would result if >> buffer overflow. After clearing the bits, the driver must read back >> the register to verify. > > Is there a risk of livelock here? That is, if some device is causing a > lot of IOMMU faults, a CPU could get stuck in this loop re-enabling > interrupts as fast as they are raised. > > The solution suggested in the erratum seems better: if the bit is set > after clearing, process the interrupts again (i.e. run/schedule the > top-half handler). That way the bottom-half handler will definitely > terminate and the system can make some progress.That''s what''s being done really: The actual interrupt handler disables the interrupt sources, and the tasklet re-enables them (or at least is supposed to do so - patch 1 isn''t really correct in the respect). The only thing that I think is wrong (but again already in patch 1) is that the status bit should get cleared before an interrupt source gets re-enabled. I started cleaning up patch 1 anyway, so I''ll post a v3 once done. Jan
Jan Beulich
2013-Jun-10 10:05 UTC
Re: [PATCH 1/2 V2] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 10.06.13 at 07:05, <suravee.suthikulpanit@amd.com> wrote: > From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > The IOMMU interrupt bits in the IOMMU status registers are > "read-only, and write-1-to-clear (RW1C). Therefore, the existing > logic which reads the register, set the bit, and then writing back > the values could accidentally clear certain bits if it has been set. > > The correct logic would just be writing only the value which only > set the interrupt bits, and leave the rest to zeros. > > This patch also, clean up #define masks as Jan has suggested.This went to far, and imo in the wrong direction - the mask values are what ultimately should stay, since the shift values can be computed from them. And this cleanup should really be done only when [gs]et_field_in_reg_u32() get the redundant shift parameter eliminated (i.e. in a separate, post-4.3 patch). But as said already in the response to Tim''s reply on patch 2 - this patch also had a few other issues, so I''m going to reply with a v3 once I''m done with the fixing/cleanup. Jan> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > --- > V2 changes: > - Additional fixes as pointed out by Jan. > - Clean up unnecessary #define mask as suggested by Jan. > > xen/drivers/passthrough/amd/iommu_cmd.c | 18 +++-- > xen/drivers/passthrough/amd/iommu_init.c | 31 ++++----- > xen/include/asm-x86/hvm/svm/amd-iommu-defs.h | 95 +++++++++++--------------- > 3 files changed, 63 insertions(+), 81 deletions(-) > > diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c > b/xen/drivers/passthrough/amd/iommu_cmd.c > index 4c60149..f0283d4 100644 > --- a/xen/drivers/passthrough/amd/iommu_cmd.c > +++ b/xen/drivers/passthrough/amd/iommu_cmd.c > @@ -75,11 +75,9 @@ static void flush_command_buffer(struct amd_iommu *iommu) > u32 cmd[4], status; > int loop_count, comp_wait; > > - /* clear ''ComWaitInt'' in status register (WIC) */ > - set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0, > - IOMMU_STATUS_COMP_WAIT_INT_MASK, > - IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status); > - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + /* RW1C ''ComWaitInt'' in status register */ > + writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT), > + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > /* send an empty COMPLETION_WAIT command to flush command buffer */ > cmd[3] = cmd[2] = 0; > @@ -96,16 +94,16 @@ static void flush_command_buffer(struct amd_iommu *iommu) > do { > status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > comp_wait = get_field_from_reg_u32(status, > - IOMMU_STATUS_COMP_WAIT_INT_MASK, > - > IOMMU_STATUS_COMP_WAIT_INT_SHIFT); > + (1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT), > + IOMMU_STATUS_COMP_WAIT_INT_SHIFT); > --loop_count; > } while ( !comp_wait && loop_count ); > > if ( comp_wait ) > { > - /* clear ''ComWaitInt'' in status register (WIC) */ > - status &= IOMMU_STATUS_COMP_WAIT_INT_MASK; > - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + /* RW1C ''ComWaitInt'' in status register */ > + writel((1 << IOMMU_STATUS_COMP_WAIT_INT_SHIFT), > + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > return; > } > AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); > diff --git a/xen/drivers/passthrough/amd/iommu_init.c > b/xen/drivers/passthrough/amd/iommu_init.c > index a939c73..a85c63f 100644 > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_iommu *iommu, > > ctrl_func(iommu, IOMMU_CONTROL_DISABLED); > > - /*clear overflow bit */ > - iommu_clear_bit(&entry, of_bit); > + /* RW1C overflow bit */ > + iommu_set_bit(&entry, of_bit); > writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > /*reset event log base address */ > @@ -622,11 +622,9 @@ static void iommu_check_event_log(struct amd_iommu > *iommu) > if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) > iommu_reset_log(iommu, &iommu->event_log, > set_iommu_event_log_control); > > - /* reset interrupt status bit */ > - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); > - > - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + /* RW1C interrupt status bit */ > + writel((1 << IOMMU_STATUS_EVENT_LOG_INT_SHIFT), > + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > spin_unlock_irqrestore(&iommu->lock, flags); > } > @@ -692,11 +690,9 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu) > if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) > iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); > > - /* reset interrupt status bit */ > - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > - iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); > - > - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + /* RW1C interrupt status bit */ > + writel((1 << IOMMU_STATUS_PPR_LOG_INT_SHIFT), > + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > spin_unlock_irqrestore(&iommu->lock, flags); > } > @@ -733,10 +729,13 @@ static void iommu_interrupt_handler(int irq, void > *dev_id, > > spin_lock_irqsave(&iommu->lock, flags); > > - /* Silence interrupts from both event and PPR logging */ > - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > - iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); > - iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); > + /* Silence interrupts from both event and PPR > + * by clearing the enable logging bits in the > + * control register */ > + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); > + iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); > + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); > + /* RW1C status bit */ > writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); > > spin_unlock_irqrestore(&iommu->lock, flags); > diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > index d2176d0..2f2d740 100644 > --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h > @@ -313,31 +313,21 @@ > #define IOMMU_LOG_ENTRY_TIMEOUT 1000 > > /* Control Register */ > -#define IOMMU_CONTROL_MMIO_OFFSET 0x18 > -#define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK 0x00000001 > -#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT 0 > -#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK 0x00000002 > -#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT 1 > -#define IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK 0x00000004 > -#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT 2 > -#define IOMMU_CONTROL_EVENT_LOG_INT_MASK 0x00000008 > -#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT 3 > -#define IOMMU_CONTROL_COMP_WAIT_INT_MASK 0x00000010 > -#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT 4 > -#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_MASK 0x000000E0 > -#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT 5 > -#define IOMMU_CONTROL_PASS_POSTED_WRITE_MASK 0x00000100 > -#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT 8 > -#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK 0x00000200 > -#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT 9 > -#define IOMMU_CONTROL_COHERENT_MASK 0x00000400 > -#define IOMMU_CONTROL_COHERENT_SHIFT 10 > -#define IOMMU_CONTROL_ISOCHRONOUS_MASK 0x00000800 > -#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 > -#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK 0x00001000 > -#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 > -#define IOMMU_CONTROL_RESTART_MASK 0x80000000 > -#define IOMMU_CONTROL_RESTART_SHIFT 31 > +#define IOMMU_CONTROL_MMIO_OFFSET 0x18 > +#define IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT 0 > +#define IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT 1 > +#define IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT 2 > +#define IOMMU_CONTROL_EVENT_LOG_INT_SHIFT 3 > +#define IOMMU_CONTROL_COMP_WAIT_INT_SHIFT 4 > +#define IOMMU_CONTROL_INVALIDATION_TIMEOUT_SHIFT 5 > +#define IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT 8 > +#define IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT 9 > +#define IOMMU_CONTROL_COHERENT_SHIFT 10 > +#define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 > +#define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 > +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 > +#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT 14 > +#define IOMMU_CONTROL_RESTART_SHIFT 31 > > #define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 > #define IOMMU_CONTROL_PPR_INT_SHIFT 14 > @@ -363,38 +353,33 @@ > #define IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT 0 > > /* Extended Feature Register*/ > -#define IOMMU_EXT_FEATURE_MMIO_OFFSET 0x30 > -#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT 0x0 > -#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT 0x1 > -#define IOMMU_EXT_FEATURE_XTSUP_SHIFT 0x2 > -#define IOMMU_EXT_FEATURE_NXSUP_SHIFT 0x3 > -#define IOMMU_EXT_FEATURE_GTSUP_SHIFT 0x4 > -#define IOMMU_EXT_FEATURE_IASUP_SHIFT 0x6 > -#define IOMMU_EXT_FEATURE_GASUP_SHIFT 0x7 > -#define IOMMU_EXT_FEATURE_HESUP_SHIFT 0x8 > -#define IOMMU_EXT_FEATURE_PCSUP_SHIFT 0x9 > -#define IOMMU_EXT_FEATURE_HATS_SHIFT 0x10 > -#define IOMMU_EXT_FEATURE_HATS_MASK 0x00000C00 > -#define IOMMU_EXT_FEATURE_GATS_SHIFT 0x12 > -#define IOMMU_EXT_FEATURE_GATS_MASK 0x00003000 > -#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT 0x14 > -#define IOMMU_EXT_FEATURE_GLXSUP_MASK 0x0000C000 > - > -#define IOMMU_EXT_FEATURE_PASMAX_SHIFT 0x0 > -#define IOMMU_EXT_FEATURE_PASMAX_MASK 0x0000001F > +#define IOMMU_EXT_FEATURE_MMIO_OFFSET 0x30 > +#define IOMMU_EXT_FEATURE_PREFSUP_SHIFT 0x0 > +#define IOMMU_EXT_FEATURE_PPRSUP_SHIFT 0x1 > +#define IOMMU_EXT_FEATURE_XTSUP_SHIFT 0x2 > +#define IOMMU_EXT_FEATURE_NXSUP_SHIFT 0x3 > +#define IOMMU_EXT_FEATURE_GTSUP_SHIFT 0x4 > +#define IOMMU_EXT_FEATURE_IASUP_SHIFT 0x6 > +#define IOMMU_EXT_FEATURE_GASUP_SHIFT 0x7 > +#define IOMMU_EXT_FEATURE_HESUP_SHIFT 0x8 > +#define IOMMU_EXT_FEATURE_PCSUP_SHIFT 0x9 > +#define IOMMU_EXT_FEATURE_HATS_SHIFT 0x10 > +#define IOMMU_EXT_FEATURE_HATS_MASK 0x00000C00 > +#define IOMMU_EXT_FEATURE_GATS_SHIFT 0x12 > +#define IOMMU_EXT_FEATURE_GATS_MASK 0x00003000 > +#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT 0x14 > +#define IOMMU_EXT_FEATURE_GLXSUP_MASK 0x0000C000 > + > +#define IOMMU_EXT_FEATURE_PASMAX_SHIFT 0x0 > +#define IOMMU_EXT_FEATURE_PASMAX_MASK 0x0000001F > > /* Status Register*/ > -#define IOMMU_STATUS_MMIO_OFFSET 0x2020 > -#define IOMMU_STATUS_EVENT_OVERFLOW_MASK 0x00000001 > -#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT 0 > -#define IOMMU_STATUS_EVENT_LOG_INT_MASK 0x00000002 > -#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT 1 > -#define IOMMU_STATUS_COMP_WAIT_INT_MASK 0x00000004 > -#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT 2 > -#define IOMMU_STATUS_EVENT_LOG_RUN_MASK 0x00000008 > -#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 > -#define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 > -#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 > +#define IOMMU_STATUS_MMIO_OFFSET 0x2020 > +#define IOMMU_STATUS_EVENT_OVERFLOW_SHIFT 0 > +#define IOMMU_STATUS_EVENT_LOG_INT_SHIFT 1 > +#define IOMMU_STATUS_COMP_WAIT_INT_SHIFT 2 > +#define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 > +#define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 > #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 > #define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 > #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 > -- > 1.7.10.4
At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote:> >>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote: > > At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote: > >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > >> > >> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt > >> and event log interrupt bits to re-enable the interrupt. This is done by > >> writing 1 to the memory mapped register to clear the bit. Due to hardware > > bug, > >> if the driver tries to clear this bit while the IOMMU hardware also setting > >> this bit, the conflict will result with the bit being set. If the interrupt > >> handling code does not make sure to clear this bit, subsequent changes in > > the > >> event/PPR logs will no longer generating interrupts, and would result if > >> buffer overflow. After clearing the bits, the driver must read back > >> the register to verify. > > > > Is there a risk of livelock here? That is, if some device is causing a > > lot of IOMMU faults, a CPU could get stuck in this loop re-enabling > > interrupts as fast as they are raised. > > > > The solution suggested in the erratum seems better: if the bit is set > > after clearing, process the interrupts again (i.e. run/schedule the > > top-half handler). That way the bottom-half handler will definitely > > terminate and the system can make some progress. > > That''s what''s being done really: The actual interrupt handler disables > the interrupt sources, and the tasklet re-enables them (or at least is > supposed to do so - patch 1 isn''t really correct in the respect).Oh I see, the idea is that we use the control register to mask interrupts instead of relying on the status register? That seems better. But doesn''t this IOMMU already mask further interrupts when the pending bit in the status register is set? I can''t see any wording about that in the IOMMU doc but the erratum implies it. Suravee, do you know if this is the case? If that _is_ the case then the correct handling logic is much simpler: don''t touch any IOMMU registers at all in the interrupt handler; clear the interrupt-pending bits in the tasklet. In either case, the while () loop worries me; I think it would be better to schedule the tasklet again if we see the bit is set; a ''while (pending is set) { clear pending bit; }'' loop might never exit the tasklet at all. Cheers, Tim.
Jan Beulich
2013-Jun-10 10:53 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote: > At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote: >> >>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote: >> > At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote: >> >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >> >> >> >> The IOMMU interrupt handling in bottom half must clear the PPR log > interrupt >> >> and event log interrupt bits to re-enable the interrupt. This is done by >> >> writing 1 to the memory mapped register to clear the bit. Due to hardware >> > bug, >> >> if the driver tries to clear this bit while the IOMMU hardware also setting >> >> this bit, the conflict will result with the bit being set. If the interrupt >> >> handling code does not make sure to clear this bit, subsequent changes in >> > the >> >> event/PPR logs will no longer generating interrupts, and would result if >> >> buffer overflow. After clearing the bits, the driver must read back >> >> the register to verify. >> > >> > Is there a risk of livelock here? That is, if some device is causing a >> > lot of IOMMU faults, a CPU could get stuck in this loop re-enabling >> > interrupts as fast as they are raised. >> > >> > The solution suggested in the erratum seems better: if the bit is set >> > after clearing, process the interrupts again (i.e. run/schedule the >> > top-half handler). That way the bottom-half handler will definitely >> > terminate and the system can make some progress. >> >> That''s what''s being done really: The actual interrupt handler disables >> the interrupt sources, and the tasklet re-enables them (or at least is >> supposed to do so - patch 1 isn''t really correct in the respect). > > Oh I see, the idea is that we use the control register to mask > interrupts instead of relying on the status register? That seems > better. But doesn''t this IOMMU already mask further interrupts when the > pending bit in the status register is set? I can''t see any wording > about that in the IOMMU doc but the erratum implies it. Suravee, do you > know if this is the case?Actually, the documentation has a subtle but perhaps important difference int the wording here: For EventLogInt and ComWaitInt is read "An interrupt is generated when <status bit> = 1b and MMIO Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt it says "An interrupt is generated when <status bit> changes from 0b to 1b and MMIO Offset 0018h[<control bit>] = 1b". So I''d like to be one the safe side and assume further interrupts can be generated in all cases - see also the emulation behavior in iommu_guest.c which - afaict - always raises an interrupt, not just on a 0 -> 1 transition.> If that _is_ the case then the correct handling logic is much simpler: > don''t touch any IOMMU registers at all in the interrupt handler; clear > the interrupt-pending bits in the tasklet. > > In either case, the while () loop worries me; I think it would be better > to schedule the tasklet again if we see the bit is set; a ''while > (pending is set) { clear pending bit; }'' loop might never exit the > tasklet at all.That could only be due to a hardware bug worse than the one we''re working around here, and I don''t think is worth dealing with. Actually, re-scheduling the tasklet would likely make analyzing the issue (if it ever really happened in practice) more difficult than just having the NMI watchdog point cleanly at the spinning code here. Jan
Jan Beulich
2013-Jun-10 10:56 UTC
[PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
The IOMMU interrupt bits in the IOMMU status registers are "read-only, and write-1-to-clear (RW1C). Therefore, the existing logic which reads the register, set the bit, and then writing back the values could accidentally clear certain bits if it has been set. The correct logic would just be writing only the value which only set the interrupt bits, and leave the rest to zeros. This patch also, clean up #define masks as Jan has suggested. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> With iommu_interrupt_handler() properly having got switched its readl() from status to control register, the subsequent writel() needed to be switched too (and the RW1C comment there was bogus). Further, with iommu_interrupt_handler() now actually disabling the interrupt sources, they also need to get re-enabled by the tasklet once it finished processing the respective log. Some of the cleanup went too far - undone. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_cmd.c 2013-06-10 11:28:23.000000000 +0200 +++ 2013-05-13/xen/drivers/passthrough/amd/iommu_cmd.c 2013-06-10 12:16:15.000000000 +0200 @@ -75,11 +75,9 @@ static void flush_command_buffer(struct u32 cmd[4], status; int loop_count, comp_wait; - /* clear ''ComWaitInt'' in status register (WIC) */ - set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0, - IOMMU_STATUS_COMP_WAIT_INT_MASK, - IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status); - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /* send an empty COMPLETION_WAIT command to flush command buffer */ cmd[3] = cmd[2] = 0; @@ -103,9 +101,9 @@ static void flush_command_buffer(struct if ( comp_wait ) { - /* clear ''ComWaitInt'' in status register (WIC) */ - status &= IOMMU_STATUS_COMP_WAIT_INT_MASK; - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); return; } AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); --- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_guest.c 2012-02-10 11:25:54.000000000 +0100 +++ 2013-05-13/xen/drivers/passthrough/amd/iommu_guest.c 2013-06-10 12:32:00.000000000 +0200 @@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str u64_to_reg(&iommu->ppr_log.reg_tail, val); break; case IOMMU_STATUS_MMIO_OFFSET: - u64_to_reg(&iommu->reg_status, val); + val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK | + IOMMU_STATUS_EVENT_LOG_INT_MASK | + IOMMU_STATUS_COMP_WAIT_INT_MASK | + IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK | + IOMMU_STATUS_PPR_LOG_INT_MASK | + IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK | + IOMMU_STATUS_GAPIC_LOG_INT_MASK; + u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) ^ val); break; default: --- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_init.c 2013-06-10 11:28:23.000000000 +0200 +++ 2013-05-13/xen/drivers/passthrough/amd/iommu_init.c 2013-06-10 12:17:00.000000000 +0200 @@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET); iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); - iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); } else { iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); - iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); } @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i ctrl_func(iommu, IOMMU_CONTROL_DISABLED); - /*clear overflow bit */ - iommu_clear_bit(&entry, of_bit); + /* RW1C overflow bit */ + iommu_set_bit(&entry, of_bit); writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ @@ -619,14 +619,18 @@ static void iommu_check_event_log(struct /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); - - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + else + { + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -689,14 +693,18 @@ static void iommu_check_ppr_log(struct a /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_PPR_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); - - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + else + { + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -733,11 +741,14 @@ static void iommu_interrupt_handler(int spin_lock_irqsave(&iommu->lock, flags); - /* Silence interrupts from both event and PPR logging */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); + /* + * Silence interrupts from both event and PPR by clearing the + * enable logging bits in the control register + */ + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); spin_unlock_irqrestore(&iommu->lock, flags); --- 2013-05-13.orig/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h 2013-03-11 14:57:19.000000000 +0100 +++ 2013-05-13/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h 2013-06-10 12:35:55.000000000 +0200 @@ -336,14 +336,13 @@ #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK 0x00001000 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 +#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT 14 +#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 +#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 #define IOMMU_CONTROL_RESTART_MASK 0x80000000 #define IOMMU_CONTROL_RESTART_SHIFT 31 -#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 -#define IOMMU_CONTROL_PPR_INT_SHIFT 14 -#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 -#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 - /* Exclusion Register */ #define IOMMU_EXCLUSION_BASE_LOW_OFFSET 0x20 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET 0x24 @@ -395,9 +394,18 @@ #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 +#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK 0x00000020 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 +#define IOMMU_STATUS_PPR_LOG_INT_MASK 0x00000040 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 +#define IOMMU_STATUS_PPR_LOG_RUN_MASK 0x00000080 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 +#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK 0x00000100 +#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT 8 +#define IOMMU_STATUS_GAPIC_LOG_INT_MASK 0x00000200 +#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT 9 +#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK 0x00000400 +#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT 10 /* I/O Page Table */ #define IOMMU_PAGE_TABLE_ENTRY_SIZE 8 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
The IOMMU interrupt handling in bottom half must clear the PPR log interrupt and event log interrupt bits to re-enable the interrupt. This is done by writing 1 to the memory mapped register to clear the bit. Due to hardware bug, if the driver tries to clear this bit while the IOMMU hardware also setting this bit, the conflict will result with the bit being set. If the interrupt handling code does not make sure to clear this bit, subsequent changes in the event/PPR logs will no longer generating interrupts, and would result if buffer overflow. After clearing the bits, the driver must read back the register to verify. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Adjust to apply on top of heavily modified patch 1. Adjust flow to get away with a single readl() in each instance of the status register checks. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -616,13 +616,18 @@ static void iommu_check_event_log(struct spin_lock_irqsave(&iommu->lock, flags); - /*check event overflow */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - - /* RW1C interrupt status bit */ - writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + do { + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* + * Workaround for erratum787: + * Re-check to make sure the bit has been cleared. + */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + } while ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK ); + /* Check event overflow. */ if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); else @@ -690,13 +695,18 @@ static void iommu_check_ppr_log(struct a spin_lock_irqsave(&iommu->lock, flags); - /*check event overflow */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - - /* RW1C interrupt status bit */ - writel(IOMMU_STATUS_PPR_LOG_INT_MASK, - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + do { + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_PPR_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* + * Workaround for erratum787: + * Re-check to make sure the bit has been cleared. + */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + } while ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK ); + /* Check event overflow. */ if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); else _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-Jun-10 11:02 UTC
Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 10.06.13 at 12:56, "Jan Beulich" <JBeulich@suse.com> wrote: > The IOMMU interrupt bits in the IOMMU status registers are > "read-only, and write-1-to-clear (RW1C). Therefore, the existing > logic which reads the register, set the bit, and then writing back > the values could accidentally clear certain bits if it has been set. > > The correct logic would just be writing only the value which only > set the interrupt bits, and leave the rest to zeros. > > This patch also, clean up #define masks as Jan has suggested. > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > With iommu_interrupt_handler() properly having got switched its readl() > from status to control register, the subsequent writel() needed to be > switched too (and the RW1C comment there was bogus). > > Further, with iommu_interrupt_handler() now actually disabling the > interrupt sources, they also need to get re-enabled by the tasklet once > it finished processing the respective log.Finally, guest write emulation to the status register needs to be done with the RW1C (and RO for all other bits) semantics in mind too.> Signed-off-by: Jan Beulich <jbeulich@suse.com>Jan
Tim Deegan
2013-Jun-10 12:18 UTC
Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
At 11:56 +0100 on 10 Jun (1370865376), Jan Beulich wrote:> --- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_guest.c 2012-02-10 11:25:54.000000000 +0100 > +++ 2013-05-13/xen/drivers/passthrough/amd/iommu_guest.c 2013-06-10 12:32:00.000000000 +0200 > @@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str > u64_to_reg(&iommu->ppr_log.reg_tail, val); > break; > case IOMMU_STATUS_MMIO_OFFSET: > - u64_to_reg(&iommu->reg_status, val); > + val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK | > + IOMMU_STATUS_EVENT_LOG_INT_MASK | > + IOMMU_STATUS_COMP_WAIT_INT_MASK | > + IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK | > + IOMMU_STATUS_PPR_LOG_INT_MASK | > + IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK | > + IOMMU_STATUS_GAPIC_LOG_INT_MASK; > + u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) ^ val);''^ val''? Surely ''& ~val'' or it will set unset bits too.> @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i > > ctrl_func(iommu, IOMMU_CONTROL_DISABLED); > > - /*clear overflow bit */ > - iommu_clear_bit(&entry, of_bit); > + /* RW1C overflow bit */ > + iommu_set_bit(&entry, of_bit); > writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);Won''t this clear all other status bits as well? Tim.
Jan Beulich
2013-Jun-10 12:31 UTC
Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 10.06.13 at 14:18, Tim Deegan <tim@xen.org> wrote: > At 11:56 +0100 on 10 Jun (1370865376), Jan Beulich wrote: >> --- 2013-05-13.orig/xen/drivers/passthrough/amd/iommu_guest.c 2012-02-10 > 11:25:54.000000000 +0100 >> +++ 2013-05-13/xen/drivers/passthrough/amd/iommu_guest.c 2013-06-10 > 12:32:00.000000000 +0200 >> @@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str >> u64_to_reg(&iommu->ppr_log.reg_tail, val); >> break; >> case IOMMU_STATUS_MMIO_OFFSET: >> - u64_to_reg(&iommu->reg_status, val); >> + val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK | >> + IOMMU_STATUS_EVENT_LOG_INT_MASK | >> + IOMMU_STATUS_COMP_WAIT_INT_MASK | >> + IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK | >> + IOMMU_STATUS_PPR_LOG_INT_MASK | >> + IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK | >> + IOMMU_STATUS_GAPIC_LOG_INT_MASK; >> + u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) ^ val); > > ''^ val''? Surely ''& ~val'' or it will set unset bits too.Oh, yes, of course.>> @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i >> >> ctrl_func(iommu, IOMMU_CONTROL_DISABLED); >> >> - /*clear overflow bit */ >> - iommu_clear_bit(&entry, of_bit); >> + /* RW1C overflow bit */ >> + iommu_set_bit(&entry, of_bit); >> writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > Won''t this clear all other status bits as well?Yes it will, albeit not because of this patch. But I guess we should fix this as we go. Expect v4 soon... Jan
Jan Beulich
2013-Jun-10 12:41 UTC
[PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
The IOMMU interrupt bits in the IOMMU status registers are "read-only, and write-1-to-clear (RW1C). Therefore, the existing logic which reads the register, set the bit, and then writing back the values could accidentally clear certain bits if it has been set. The correct logic would just be writing only the value which only set the interrupt bits, and leave the rest to zeros. This patch also, clean up #define masks as Jan has suggested. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> With iommu_interrupt_handler() properly having got switched its readl() from status to control register, the subsequent writel() needed to be switched too (and the RW1C comment there was bogus). Some of the cleanup went too far - undone. Further, with iommu_interrupt_handler() now actually disabling the interrupt sources, they also need to get re-enabled by the tasklet once it finished processing the respective log. Finally, guest write emulation to the status register needs to be done with the RW1C (and RO for all other bits) semantics in mind too. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v4: Clear _only_ the respective overflow bit in iommu_reset_log(). Fix logic error in adjustment to guest_iommu_mmio_write64(). (Both pointed out by Tim - Thanks!) --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -75,11 +75,9 @@ static void flush_command_buffer(struct u32 cmd[4], status; int loop_count, comp_wait; - /* clear ''ComWaitInt'' in status register (WIC) */ - set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0, - IOMMU_STATUS_COMP_WAIT_INT_MASK, - IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status); - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /* send an empty COMPLETION_WAIT command to flush command buffer */ cmd[3] = cmd[2] = 0; @@ -103,9 +101,9 @@ static void flush_command_buffer(struct if ( comp_wait ) { - /* clear ''ComWaitInt'' in status register (WIC) */ - status &= IOMMU_STATUS_COMP_WAIT_INT_MASK; - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); return; } AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); --- a/xen/drivers/passthrough/amd/iommu_guest.c +++ b/xen/drivers/passthrough/amd/iommu_guest.c @@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str u64_to_reg(&iommu->ppr_log.reg_tail, val); break; case IOMMU_STATUS_MMIO_OFFSET: - u64_to_reg(&iommu->reg_status, val); + val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK | + IOMMU_STATUS_EVENT_LOG_INT_MASK | + IOMMU_STATUS_COMP_WAIT_INT_MASK | + IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK | + IOMMU_STATUS_PPR_LOG_INT_MASK | + IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK | + IOMMU_STATUS_GAPIC_LOG_INT_MASK; + u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) & ~val); break; default: --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET); iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); - iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); } else { iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); - iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); } @@ -410,7 +410,7 @@ static void iommu_reset_log(struct amd_i void (*ctrl_func)(struct amd_iommu *iommu, int)) { u32 entry; - int log_run, run_bit, of_bit; + int log_run, run_bit; int loop_count = 1000; BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log))); @@ -419,10 +419,6 @@ static void iommu_reset_log(struct amd_i IOMMU_STATUS_EVENT_LOG_RUN_SHIFT : IOMMU_STATUS_PPR_LOG_RUN_SHIFT; - of_bit = ( log == &iommu->event_log ) ? - IOMMU_STATUS_EVENT_OVERFLOW_SHIFT : - IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT; - /* wait until EventLogRun bit = 0 */ do { entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); @@ -439,9 +435,10 @@ static void iommu_reset_log(struct amd_i ctrl_func(iommu, IOMMU_CONTROL_DISABLED); - /*clear overflow bit */ - iommu_clear_bit(&entry, of_bit); - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C overflow bit */ + writel(log == &iommu->event_log ? IOMMU_STATUS_EVENT_OVERFLOW_MASK + : IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ log->head = 0; @@ -619,14 +616,18 @@ static void iommu_check_event_log(struct /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); - - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + else + { + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -689,14 +690,18 @@ static void iommu_check_ppr_log(struct a /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_PPR_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); - - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + else + { + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -733,11 +738,14 @@ static void iommu_interrupt_handler(int spin_lock_irqsave(&iommu->lock, flags); - /* Silence interrupts from both event and PPR logging */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); + /* + * Silence interrupts from both event and PPR by clearing the + * enable logging bits in the control register + */ + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); spin_unlock_irqrestore(&iommu->lock, flags); --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h @@ -336,14 +336,13 @@ #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK 0x00001000 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 +#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT 14 +#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 +#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 #define IOMMU_CONTROL_RESTART_MASK 0x80000000 #define IOMMU_CONTROL_RESTART_SHIFT 31 -#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 -#define IOMMU_CONTROL_PPR_INT_SHIFT 14 -#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 -#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 - /* Exclusion Register */ #define IOMMU_EXCLUSION_BASE_LOW_OFFSET 0x20 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET 0x24 @@ -395,9 +394,18 @@ #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 +#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK 0x00000020 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 +#define IOMMU_STATUS_PPR_LOG_INT_MASK 0x00000040 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 +#define IOMMU_STATUS_PPR_LOG_RUN_MASK 0x00000080 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 +#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK 0x00000100 +#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT 8 +#define IOMMU_STATUS_GAPIC_LOG_INT_MASK 0x00000200 +#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT 9 +#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK 0x00000400 +#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT 10 /* I/O Page Table */ #define IOMMU_PAGE_TABLE_ENTRY_SIZE 8 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
At 11:53 +0100 on 10 Jun (1370865209), Jan Beulich wrote:> > Oh I see, the idea is that we use the control register to mask > > interrupts instead of relying on the status register? That seems > > better. But doesn''t this IOMMU already mask further interrupts when the > > pending bit in the status register is set? I can''t see any wording > > about that in the IOMMU doc but the erratum implies it. Suravee, do you > > know if this is the case? > > Actually, the documentation has a subtle but perhaps important > difference int the wording here: For EventLogInt and ComWaitInt > is read "An interrupt is generated when <status bit> = 1b and MMIO > Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt > it says "An interrupt is generated when <status bit> changes from 0b > to 1b and MMIO Offset 0018h[<control bit>] = 1b". > > So I''d like to be one the safe side and assume further interrupts can > be generated in all cases.Fair enough. - see also the emulation behavior in> iommu_guest.c which - afaict - always raises an interrupt, not just on > a 0 -> 1 transition.Well, if it were a documented beahviour, we ought to change that. :)> > In either case, the while () loop worries me; I think it would be better > > to schedule the tasklet again if we see the bit is set; a ''while > > (pending is set) { clear pending bit; }'' loop might never exit the > > tasklet at all. > > That could only be due to a hardware bug worse than the one we''re > working around here, and I don''t think is worth dealing with.Well, there''s runaway guest hardware. If we reschedule the tasklet then pci_check_disable_device() will eventually catch and suppress the misbehaviour; if we spin here it won''t get a chance. I guess the argument is that it will eventually overflow the log buffer and stop setting the log-interrupt bit -- that needs at least a comment. I was also a bit worried about the erratum-787 event getting delayed (since there''s no interrupt to cause us to actually process it), but I just realised that''s a more general problem: we ought to be resetting the ''pending'' bits _before_ scanning the log, or any new entries that arrive between the log scan and the RW1C write won''t be seen until the _next_ log entry causes an interrupt. How about: write-to-clear status.pending process the log if (status.pending) reschedule the tasklet else unmask the interrupt. Since we have to do the extra read anyway for erratum 787, we might as well save ourselves the bother of taking an interrupt in the other case. Tim.
Tim Deegan
2013-Jun-10 12:46 UTC
Re: [PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
At 13:41 +0100 on 10 Jun (1370871672), Jan Beulich wrote:> The IOMMU interrupt bits in the IOMMU status registers are > "read-only, and write-1-to-clear (RW1C). Therefore, the existing > logic which reads the register, set the bit, and then writing back > the values could accidentally clear certain bits if it has been set. > > The correct logic would just be writing only the value which only > set the interrupt bits, and leave the rest to zeros. > > This patch also, clean up #define masks as Jan has suggested. > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > With iommu_interrupt_handler() properly having got switched its readl() > from status to control register, the subsequent writel() needed to be > switched too (and the RW1C comment there was bogus). > > Some of the cleanup went too far - undone. > > Further, with iommu_interrupt_handler() now actually disabling the > interrupt sources, they also need to get re-enabled by the tasklet once > it finished processing the respective log. > > Finally, guest write emulation to the status register needs to be done > with the RW1C (and RO for all other bits) semantics in mind too. > > Signed-off-by: Jan Beulich <jbeulich@suse.com>Reviewed-by: Tim Deegan <tim@xen.org>
Jan Beulich
2013-Jun-10 13:23 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote: > At 11:53 +0100 on 10 Jun (1370865209), Jan Beulich wrote: >> > Oh I see, the idea is that we use the control register to mask >> > interrupts instead of relying on the status register? That seems >> > better. But doesn''t this IOMMU already mask further interrupts when the >> > pending bit in the status register is set? I can''t see any wording >> > about that in the IOMMU doc but the erratum implies it. Suravee, do you >> > know if this is the case? >> >> Actually, the documentation has a subtle but perhaps important >> difference int the wording here: For EventLogInt and ComWaitInt >> is read "An interrupt is generated when <status bit> = 1b and MMIO >> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt >> it says "An interrupt is generated when <status bit> changes from 0b >> to 1b and MMIO Offset 0018h[<control bit>] = 1b". >> >> So I''d like to be one the safe side and assume further interrupts can >> be generated in all cases. > > Fair enough. > > - see also the emulation behavior in >> iommu_guest.c which - afaict - always raises an interrupt, not just on >> a 0 -> 1 transition. > > Well, if it were a documented beahviour, we ought to change that. :)I''ll leave that to Suravee though, pending clarification on whether the difference in wording is actually meaningful.>> > In either case, the while () loop worries me; I think it would be better >> > to schedule the tasklet again if we see the bit is set; a ''while >> > (pending is set) { clear pending bit; }'' loop might never exit the >> > tasklet at all. >> >> That could only be due to a hardware bug worse than the one we''re >> working around here, and I don''t think is worth dealing with. > > Well, there''s runaway guest hardware. If we reschedule the tasklet then > pci_check_disable_device() will eventually catch and suppress the > misbehaviour; if we spin here it won''t get a chance. I guess the > argument is that it will eventually overflow the log buffer and stop > setting the log-interrupt bit -- that needs at least a comment. > > I was also a bit worried about the erratum-787 event getting delayed > (since there''s no interrupt to cause us to actually process it), but I > just realised that''s a more general problem: we ought to be resetting > the ''pending'' bits _before_ scanning the log, or any new entries that > arrive between the log scan and the RW1C write won''t be seen until the > _next_ log entry causes an interrupt. > > How about: > write-to-clear status.pending > process the log > if (status.pending) > reschedule the tasklet > else > unmask the interrupt. > > Since we have to do the extra read anyway for erratum 787, we might as > well save ourselves the bother of taking an interrupt in the other case.Yes, that''s a very reasonable approach. Will be a v4 then here soon too, except that the locking there looks bogus too (and hence may need fixing along the way)... Jan
Jan Beulich
2013-Jun-10 13:41 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote: > How about: > write-to-clear status.pending > process the log > if (status.pending) > reschedule the tasklet > else > unmask the interrupt.Actually I think this is leaving a window for improperly handled log entries too: There''s also no guarantee that re-enabling the interrupt would cause an interrupt to be raised when the buffer became non-empty between the status.pending check and the re-enable. Therefore I think we need write-to-clear status.pending process the log unmask the interrupt. if (status.pending) reschedule the tasklet Jan
George Dunlap
2013-Jun-10 13:49 UTC
Re: [PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On Mon, Jun 10, 2013 at 1:41 PM, Jan Beulich <JBeulich@suse.com> wrote:> The IOMMU interrupt bits in the IOMMU status registers are > "read-only, and write-1-to-clear (RW1C). Therefore, the existing > logic which reads the register, set the bit, and then writing back > the values could accidentally clear certain bits if it has been set. > > The correct logic would just be writing only the value which only > set the interrupt bits, and leave the rest to zeros. > > This patch also, clean up #define masks as Jan has suggested. > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > With iommu_interrupt_handler() properly having got switched its readl() > from status to control register, the subsequent writel() needed to be > switched too (and the RW1C comment there was bogus). > > Some of the cleanup went too far - undone. > > Further, with iommu_interrupt_handler() now actually disabling the > interrupt sources, they also need to get re-enabled by the tasklet once > it finished processing the respective log. > > Finally, guest write emulation to the status register needs to be done > with the RW1C (and RO for all other bits) semantics in mind too. > > Signed-off-by: Jan Beulich <jbeulich@suse.com>What''s the impact of this as a bug? This looks like it has a fairly high risk of introducing regressions in existing working system. So unless it has a pretty wide impact, I think we should wait and include this in 4.3.1. -George
Suravee Suthikulanit
2013-Jun-10 13:53 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
On 6/10/2013 5:53 AM, Jan Beulich wrote:>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote: >> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote: >>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote: >>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote: >>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>> >>>>> The IOMMU interrupt handling in bottom half must clear the PPR log >> interrupt >>>>> and event log interrupt bits to re-enable the interrupt. This is done by >>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware >>>> bug, >>>>> if the driver tries to clear this bit while the IOMMU hardware also setting >>>>> this bit, the conflict will result with the bit being set. If the interrupt >>>>> handling code does not make sure to clear this bit, subsequent changes in >>>> the >>>>> event/PPR logs will no longer generating interrupts, and would result if >>>>> buffer overflow. After clearing the bits, the driver must read back >>>>> the register to verify. >>>> Is there a risk of livelock here? That is, if some device is causing a >>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling >>>> interrupts as fast as they are raised. >>>> >>>> The solution suggested in the erratum seems better: if the bit is set >>>> after clearing, process the interrupts again (i.e. run/schedule the >>>> top-half handler). That way the bottom-half handler will definitely >>>> terminate and the system can make some progress. >>> That''s what''s being done really: The actual interrupt handler disables >>> the interrupt sources, and the tasklet re-enables them (or at least is >>> supposed to do so - patch 1 isn''t really correct in the respect). >> Oh I see, the idea is that we use the control register to mask >> interrupts instead of relying on the status register? That seems >> better. But doesn''t this IOMMU already mask further interrupts when the >> pending bit in the status register is set? I can''t see any wording >> about that in the IOMMU doc but the erratum implies it. Suravee, do you >> know if this is the case? > Actually, the documentation has a subtle but perhaps important > difference int the wording here: For EventLogInt and ComWaitInt > is read "An interrupt is generated when <status bit> = 1b and MMIO > Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt > it says "An interrupt is generated when <status bit> changes from 0b > to 1b and MMIO Offset 0018h[<control bit>] = 1b". > > So I''d like to be one the safe side and assume further interrupts can > be generated in all cases - see also the emulation behavior in > iommu_guest.c which - afaict - always raises an interrupt, not just on > a 0 -> 1 transition.The behavior should be that the interrupt will be generated if the "xxxInt" bit is 0. Once generated, it will be set to "1" by the hardware. If this bit is 1, events will be added to the log but interrupt will not be generated. Suravee
Jan Beulich
2013-Jun-10 13:55 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote: > How about: > write-to-clear status.pending > process the log > if (status.pending) > reschedule the tasklet > else > unmask the interrupt.Actually, I don''t think that''s right: Clearing the pending bit with the respective interrupt source disabled doesn''t allow the pending bit to become set again upon arrival of a new log entry, and hence we might still be leaving entries unhandled for an indefinite period of time. So I now think we need to do write-to-clear status.pending process the log unmask the interrupt process the log again if (status.pending) reschedule the tasklet Of course we could skip the unmask and parse-again if the interrupt wasn''t masked in the first place, which is possible since it gets masked only for any IOMMU that had its interrupt handler executed before the tasklet gets busy on it. Jan
At 14:41 +0100 on 10 Jun (1370875314), Jan Beulich wrote:> >>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote: > > How about: > > write-to-clear status.pending > > process the log > > if (status.pending) > > reschedule the tasklet > > else > > unmask the interrupt. > > Actually I think this is leaving a window for improperly handled > log entries too: There''s also no guarantee that re-enabling the > interrupt would cause an interrupt to be raised when the buffer > became non-empty between the status.pending check and the > re-enable.Oh. Yes, I suppose that depends on whether the interrupt is triggered on (edge(pending) && enabled) or edge(pending && enabled), or something else.> Therefore I think we need > > write-to-clear status.pending > process the log > unmask the interrupt. > if (status.pending) > reschedule the taskletYes, that looks better. Tim.
Suravee Suthikulanit
2013-Jun-10 13:58 UTC
Re: [PATCH 1/2 v3] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On 6/10/2013 7:31 AM, Jan Beulich wrote:>>> @@ -439,8 +439,8 @@ static void iommu_reset_log(struct amd_i >>> >> >>> >> ctrl_func(iommu, IOMMU_CONTROL_DISABLED); >>> >> >>> >>- /*clear overflow bit */ >>> >>- iommu_clear_bit(&entry, of_bit); >>> >>+ /* RW1C overflow bit */ >>> >>+ iommu_set_bit(&entry, of_bit); >>> >> writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >> > >> >Won''t this clear all other status bits as well? > Yes it will, albeit not because of this patch. But I guess we should > fix this as we go. Expect v4 soon...Sorry, I missed one case. Suravee
Jan Beulich
2013-Jun-10 13:59 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 15:53, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>wrote:> On 6/10/2013 5:53 AM, Jan Beulich wrote: >>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote: >>> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote: >>>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote: >>>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote: >>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>>> >>>>>> The IOMMU interrupt handling in bottom half must clear the PPR log >>> interrupt >>>>>> and event log interrupt bits to re-enable the interrupt. This is done by >>>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware >>>>> bug, >>>>>> if the driver tries to clear this bit while the IOMMU hardware also setting >>>>>> this bit, the conflict will result with the bit being set. If the interrupt >>>>>> handling code does not make sure to clear this bit, subsequent changes in >>>>> the >>>>>> event/PPR logs will no longer generating interrupts, and would result if >>>>>> buffer overflow. After clearing the bits, the driver must read back >>>>>> the register to verify. >>>>> Is there a risk of livelock here? That is, if some device is causing a >>>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling >>>>> interrupts as fast as they are raised. >>>>> >>>>> The solution suggested in the erratum seems better: if the bit is set >>>>> after clearing, process the interrupts again (i.e. run/schedule the >>>>> top-half handler). That way the bottom-half handler will definitely >>>>> terminate and the system can make some progress. >>>> That''s what''s being done really: The actual interrupt handler disables >>>> the interrupt sources, and the tasklet re-enables them (or at least is >>>> supposed to do so - patch 1 isn''t really correct in the respect). >>> Oh I see, the idea is that we use the control register to mask >>> interrupts instead of relying on the status register? That seems >>> better. But doesn''t this IOMMU already mask further interrupts when the >>> pending bit in the status register is set? I can''t see any wording >>> about that in the IOMMU doc but the erratum implies it. Suravee, do you >>> know if this is the case? >> Actually, the documentation has a subtle but perhaps important >> difference int the wording here: For EventLogInt and ComWaitInt >> is read "An interrupt is generated when <status bit> = 1b and MMIO >> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt >> it says "An interrupt is generated when <status bit> changes from 0b >> to 1b and MMIO Offset 0018h[<control bit>] = 1b". >> >> So I''d like to be one the safe side and assume further interrupts can >> be generated in all cases - see also the emulation behavior in >> iommu_guest.c which - afaict - always raises an interrupt, not just on >> a 0 -> 1 transition. > > The behavior should be that the interrupt will be generated if the "xxxInt" > bit is 0. Once generated, it will be set to "1" by the hardware. If this > bit is 1, events will be added to the log but interrupt will not be > generated."Should be" isn''t enough here, even more so given the mentioned wording differences in your documentation. We need to know how actual (past, current, and future) hardware behaves. Jan
Jan Beulich
2013-Jun-10 14:08 UTC
Re: [PATCH 1/2 v4] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 10.06.13 at 15:49, George Dunlap <George.Dunlap@eu.citrix.com> wrote: > On Mon, Jun 10, 2013 at 1:41 PM, Jan Beulich <JBeulich@suse.com> wrote: >> The IOMMU interrupt bits in the IOMMU status registers are >> "read-only, and write-1-to-clear (RW1C). Therefore, the existing >> logic which reads the register, set the bit, and then writing back >> the values could accidentally clear certain bits if it has been set. >> >> The correct logic would just be writing only the value which only >> set the interrupt bits, and leave the rest to zeros. >> >> This patch also, clean up #define masks as Jan has suggested. >> >> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >> >> With iommu_interrupt_handler() properly having got switched its readl() >> from status to control register, the subsequent writel() needed to be >> switched too (and the RW1C comment there was bogus). >> >> Some of the cleanup went too far - undone. >> >> Further, with iommu_interrupt_handler() now actually disabling the >> interrupt sources, they also need to get re-enabled by the tasklet once >> it finished processing the respective log. >> >> Finally, guest write emulation to the status register needs to be done >> with the RW1C (and RO for all other bits) semantics in mind too. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> > > What''s the impact of this as a bug?The primary issue really comes very close to a security one: The workaround to the live lock (observed on VT-d iirc) consisting of disabling the interrupt and deferring the actual handling to a tasklet might have been completely broken. Whether that''s actually the case very much depends on how close documentation documents actual hardware: If the wording there is precise, we''d be susceptible to this on the event log side, but be safe from this for the PPR log.> This looks like it has a fairly high risk of introducing regressions > in existing working system. So unless it has a pretty wide impact, I > think we should wait and include this in 4.3.1.If it turns out the documentation is imprecise in the way that we would hope for, I''m fine with postponing this, as then the only real problem would be that we might be seeing too few interrupts (i.e. there might be a silently non-functional device somewhere in the system because of this, but there wouldn''t be any risk to the system as a whole). So when to apply this depends heavily on AMD clarifying actual hardware behavior vs. documentation. But of course you also need to view this in the context of patch 2, which is something I think we want to have in 4.3 (but which will [minimally] need reworking if this patch it to be deferred). Jan
Jan Beulich
2013-Jun-10 15:03 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 15:55, "Jan Beulich" <JBeulich@suse.com> wrote: >>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote: >> How about: >> write-to-clear status.pending >> process the log >> if (status.pending) >> reschedule the tasklet >> else >> unmask the interrupt. > > Actually, I don''t think that''s right: Clearing the pending bit with > the respective interrupt source disabled doesn''t allow the > pending bit to become set again upon arrival of a new log entry, > and hence we might still be leaving entries unhandled for an > indefinite period of time. So I now think we need to do > > write-to-clear status.pending > process the log > unmask the interrupt > process the log again > if (status.pending) > reschedule the tasklet > > Of course we could skip the unmask and parse-again if the > interrupt wasn''t masked in the first place, which is possible since > it gets masked only for any IOMMU that had its interrupt handler > executed before the tasklet gets busy on it.So with this done I now realized that all of these transformations don''t really belong in this erratum workaround patch. They either should be a prereq patch (or folded into patch 1), or a follow-up one, with the one here then nevertheless going back to the original simple loop approach. Do you view this any different? Here is what one of the two functions now looks like, just for reference: static void iommu_check_event_log(struct amd_iommu *iommu) { u32 entry; unsigned long flags; for ( ; ; ) { /* RW1C interrupt status bit */ writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); iommu_read_log(iommu, &iommu->event_log, sizeof(event_entry_t), parse_event_log_entry); spin_lock_irqsave(&iommu->lock, flags); /* Check event overflow. */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); if ( entry & IOMMU_STATUS_EVENT_OVERFLOW_MASK ) iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); else { entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) ) { entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK; writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); spin_unlock_irqrestore(&iommu->lock, flags); continue; } } break; } /* * Workaround for erratum787: * Re-check to make sure the bit has been cleared. */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK ) tasklet_schedule(&amd_iommu_irq_tasklet); spin_unlock_irqrestore(&iommu->lock, flags); } You''ll note that even here we''re having a loop again, which you presumably won''t like much. The only alternative I see is to run iommu_read_log() with iommu->lock held, which has the caveat of the function itself taking a lock (albeit - without having done any proving yet - I think that lock is taken completely pointlessly). In any case - the erratum workaround is really just the comment and three following lines. Everything else belongs elsewhere imo. Jan
Suravee Suthikulanit
2013-Jun-10 15:11 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
On 6/10/2013 8:59 AM, Jan Beulich wrote:>>>> On 10.06.13 at 15:53, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> > wrote: >> On 6/10/2013 5:53 AM, Jan Beulich wrote: >>>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote: >>>> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote: >>>>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote: >>>>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote: >>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>>>> >>>>>>> The IOMMU interrupt handling in bottom half must clear the PPR log >>>> interrupt >>>>>>> and event log interrupt bits to re-enable the interrupt. This is done by >>>>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware >>>>>> bug, >>>>>>> if the driver tries to clear this bit while the IOMMU hardware also setting >>>>>>> this bit, the conflict will result with the bit being set. If the interrupt >>>>>>> handling code does not make sure to clear this bit, subsequent changes in >>>>>> the >>>>>>> event/PPR logs will no longer generating interrupts, and would result if >>>>>>> buffer overflow. After clearing the bits, the driver must read back >>>>>>> the register to verify. >>>>>> Is there a risk of livelock here? That is, if some device is causing a >>>>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling >>>>>> interrupts as fast as they are raised. >>>>>> >>>>>> The solution suggested in the erratum seems better: if the bit is set >>>>>> after clearing, process the interrupts again (i.e. run/schedule the >>>>>> top-half handler). That way the bottom-half handler will definitely >>>>>> terminate and the system can make some progress. >>>>> That''s what''s being done really: The actual interrupt handler disables >>>>> the interrupt sources, and the tasklet re-enables them (or at least is >>>>> supposed to do so - patch 1 isn''t really correct in the respect). >>>> Oh I see, the idea is that we use the control register to mask >>>> interrupts instead of relying on the status register? That seems >>>> better. But doesn''t this IOMMU already mask further interrupts when the >>>> pending bit in the status register is set? I can''t see any wording >>>> about that in the IOMMU doc but the erratum implies it. Suravee, do you >>>> know if this is the case? >>> Actually, the documentation has a subtle but perhaps important >>> difference int the wording here: For EventLogInt and ComWaitInt >>> is read "An interrupt is generated when <status bit> = 1b and MMIO >>> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt >>> it says "An interrupt is generated when <status bit> changes from 0b >>> to 1b and MMIO Offset 0018h[<control bit>] = 1b". >>> >>> So I''d like to be one the safe side and assume further interrupts can >>> be generated in all cases - see also the emulation behavior in >>> iommu_guest.c which - afaict - always raises an interrupt, not just on >>> a 0 -> 1 transition. >> The behavior should be that the interrupt will be generated if the "xxxInt" >> bit is 0. Once generated, it will be set to "1" by the hardware. If this >> bit is 1, events will be added to the log but interrupt will not be >> generated. > "Should be" isn''t enough here, even more so given the mentioned > wording differences in your documentation. We need to know how > actual (past, current, and future) hardware behaves. > > Jan >I am sure this is what the behavior of the hardware. Besides, only the hardware can set this bit. I have also tested by not clearing the bit, and basically I did not see additional interrupts. Suravee
Jan Beulich
2013-Jun-10 15:21 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 17:11, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>wrote:> On 6/10/2013 8:59 AM, Jan Beulich wrote: >>>>> On 10.06.13 at 15:53, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> >> wrote: >>> On 6/10/2013 5:53 AM, Jan Beulich wrote: >>>>>>> On 10.06.13 at 12:40, Tim Deegan <tim@xen.org> wrote: >>>>> At 10:47 +0100 on 10 Jun (1370861263), Jan Beulich wrote: >>>>>>>>> On 10.06.13 at 11:35, Tim Deegan <tim@xen.org> wrote: >>>>>>> At 00:05 -0500 on 10 Jun (1370822751), suravee.suthikulpanit@amd.com wrote: >>>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> >>>>>>>> >>>>>>>> The IOMMU interrupt handling in bottom half must clear the PPR log >>>>> interrupt >>>>>>>> and event log interrupt bits to re-enable the interrupt. This is done by >>>>>>>> writing 1 to the memory mapped register to clear the bit. Due to hardware >>>>>>> bug, >>>>>>>> if the driver tries to clear this bit while the IOMMU hardware also setting >>>>>>>> this bit, the conflict will result with the bit being set. If the interrupt >>>>>>>> handling code does not make sure to clear this bit, subsequent changes in >>>>>>> the >>>>>>>> event/PPR logs will no longer generating interrupts, and would result if >>>>>>>> buffer overflow. After clearing the bits, the driver must read back >>>>>>>> the register to verify. >>>>>>> Is there a risk of livelock here? That is, if some device is causing a >>>>>>> lot of IOMMU faults, a CPU could get stuck in this loop re-enabling >>>>>>> interrupts as fast as they are raised. >>>>>>> >>>>>>> The solution suggested in the erratum seems better: if the bit is set >>>>>>> after clearing, process the interrupts again (i.e. run/schedule the >>>>>>> top-half handler). That way the bottom-half handler will definitely >>>>>>> terminate and the system can make some progress. >>>>>> That''s what''s being done really: The actual interrupt handler disables >>>>>> the interrupt sources, and the tasklet re-enables them (or at least is >>>>>> supposed to do so - patch 1 isn''t really correct in the respect). >>>>> Oh I see, the idea is that we use the control register to mask >>>>> interrupts instead of relying on the status register? That seems >>>>> better. But doesn''t this IOMMU already mask further interrupts when the >>>>> pending bit in the status register is set? I can''t see any wording >>>>> about that in the IOMMU doc but the erratum implies it. Suravee, do you >>>>> know if this is the case? >>>> Actually, the documentation has a subtle but perhaps important >>>> difference int the wording here: For EventLogInt and ComWaitInt >>>> is read "An interrupt is generated when <status bit> = 1b and MMIO >>>> Offset 0018h[<control bit>] = 1b", where as for PPRInt and GAInt >>>> it says "An interrupt is generated when <status bit> changes from 0b >>>> to 1b and MMIO Offset 0018h[<control bit>] = 1b". >>>> >>>> So I''d like to be one the safe side and assume further interrupts can >>>> be generated in all cases - see also the emulation behavior in >>>> iommu_guest.c which - afaict - always raises an interrupt, not just on >>>> a 0 -> 1 transition. >>> The behavior should be that the interrupt will be generated if the "xxxInt" >>> bit is 0. Once generated, it will be set to "1" by the hardware. If this >>> bit is 1, events will be added to the log but interrupt will not be >>> generated. >> "Should be" isn''t enough here, even more so given the mentioned >> wording differences in your documentation. We need to know how >> actual (past, current, and future) hardware behaves. >> > I am sure this is what the behavior of the hardware. Besides, only the > hardware can set this bit. > I have also tested by not clearing the bit, and basically I did not see > additional interrupts.Which would allow us to simplify patch 1 quite a bit - there''s then no need to clear the two interrupt-enable bits in the interrupt handler, and iommu_check_*_log() then also wouldn''t need to set them again. We would just need to find the right point in time when to clear the corresponding status flag. And - as written in a response to Tim already - probably some parts of what we discussed for patch 2 really would need to move to patch 1. Jan
At 16:03 +0100 on 10 Jun (1370880211), Jan Beulich wrote:> >>> On 10.06.13 at 15:55, "Jan Beulich" <JBeulich@suse.com> wrote: > >>>> On 10.06.13 at 14:43, Tim Deegan <tim@xen.org> wrote: > >> How about: > >> write-to-clear status.pending > >> process the log > >> if (status.pending) > >> reschedule the tasklet > >> else > >> unmask the interrupt. > > > > Actually, I don''t think that''s right: Clearing the pending bit with > > the respective interrupt source disabled doesn''t allow the > > pending bit to become set again upon arrival of a new log entry,From my reading of the IOMMU spec the pending bit gets set whether the enable bit is set or not: PPRInt: peripheral page service request interrupt. Revision 1: RO. Reset 0b. Reserved. Revision 2: RW1C. Reset 0b. 1=PPR request entry written to the PPR log by the IOMMU. 0=No PPR entry written to the PPR log by the IOMMU. An interrupt is generated when PPRInt=1b and MMIO Offset 0018h[PPRIntEn]=1b. and EventLogInt: event log interrupt. RW1C. Reset 0b. 1=Event entry written to the event log by the IOMMU. 0=No event entry written to the event log by the IOMMU. An interrupt is generated when EventLogInt=1b and MMIO Offset 0018h[EventIntEn]=1b. so we should be OK without a second pass if the pending bit is still clear after unmasking the interrupt.> So with this done I now realized that all of these transformations > don''t really belong in this erratum workaround patch.Agreed. I think this reshuffle to avoid lost entries should be its own patch, and the erratum 787 one should follow it -- unless the logic we end up with handles erratum 787 as a side-effect. :)> static void iommu_check_event_log(struct amd_iommu *iommu) > { > u32 entry; > unsigned long flags; > > for ( ; ; ) > { > /* RW1C interrupt status bit */ > writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, > iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > > iommu_read_log(iommu, &iommu->event_log, > sizeof(event_entry_t), parse_event_log_entry); > > spin_lock_irqsave(&iommu->lock, flags); > > /* Check event overflow. */ > entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > if ( entry & IOMMU_STATUS_EVENT_OVERFLOW_MASK ) > iommu_reset_log(iommu, &iommu->event_log, > set_iommu_event_log_control); > else > { > entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); > if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) ) > { > entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK; > writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); > spin_unlock_irqrestore(&iommu->lock, flags); > continue; > } > } > > break; > } > > /* > * Workaround for erratum787: > * Re-check to make sure the bit has been cleared. > */ > entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK ) > tasklet_schedule(&amd_iommu_irq_tasklet); > > spin_unlock_irqrestore(&iommu->lock, flags); > } > > You''ll note that even here we''re having a loop again, which you > presumably won''t like much.Well, this time the event handling is inside the loop, so it ought to catch and disable bad passthrough devices. I''d still prefer having the tasklet schedule itself and terminate, but I''m happy to defer to your taste.> The only alternative I see is to run > iommu_read_log() with iommu->lock held, which has the caveat of > the function itself taking a lock (albeit - without having done any > proving yet - I think that lock is taken completely pointlessly). > > In any case - the erratum workaround is really just the comment > and three following lines. Everything else belongs elsewhere imo.Yep. Tim.
Suravee Suthikulanit
2013-Jun-10 23:13 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
Hi All, We should also check if the EventLogInt and PPRInt bits are set before actually going into the log processing code. Also, I agree with Jan that we should not need to disable the Event log and the PPR log in the IOMMU control register. This could be handled simply through the status register. Also, I think we can further simplify the logic for the workaround by having only one loop instead of two. Here is the newly proposed changes for the patch. However, I am still not sure if we should reschedule the tasklet instead of just using the while loop here. Thank you, Suravee diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c index b5a39a9..bd9913f 100644 --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -615,19 +615,8 @@ static void iommu_check_event_log(struct amd_iommu *iommu) /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - - /* RW1C interrupt status bit */ - writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); - else - { - entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); - } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -689,26 +678,20 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu) /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - - /* RW1C interrupt status bit */ - writel(IOMMU_STATUS_PPR_LOG_INT_MASK, - iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); - else - { - entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); - } spin_unlock_irqrestore(&iommu->lock, flags); } +#define IOMMU_INT_PENDING(x) ( (x & IOMMU_STATUS_EVENT_LOG_INT_MASK) || \ + (x & IOMMU_STATUS_PPR_LOG_INT_MASK) ) + static void do_amd_iommu_irq(unsigned long data) { struct amd_iommu *iommu; + u32 status, entry; + unsigned long flags; if ( !iommu_found() ) { @@ -722,33 +705,47 @@ static void do_amd_iommu_irq(unsigned long data) * tasklet (instead of one per each IOMMUs). */ for_each_amd_iommu ( iommu ) { - iommu_check_event_log(iommu); + /* Get the IOMMU status register */ + spin_lock_irqsave(&iommu->lock, flags); + status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + spin_unlock_irqrestore(&iommu->lock, flags); + + while ( IOMMU_INT_PENDING(status) ) + { + entry = 0; + + if ( status & IOMMU_STATUS_EVENT_LOG_INT_MASK ) + { + iommu_check_event_log(iommu); + iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); + } - if ( iommu->ppr_log.buffer != NULL ) - iommu_check_ppr_log(iommu); + if ( (iommu->ppr_log.buffer != NULL) + && (status & IOMMU_STATUS_PPR_LOG_INT_MASK) ) + { + iommu_check_ppr_log(iommu); + iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); + } + + spin_lock_irqsave(&iommu->lock, flags); + + /* RW1C interrupt status bit */ + writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + + /* + * Workaround for erratum787: + * Re-check to make sure the bit has been cleared. + */ + status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); + } } } static void iommu_interrupt_handler(int irq, void *dev_id, struct cpu_user_regs *regs) { - u32 entry; - unsigned long flags; - struct amd_iommu *iommu = dev_id; - - spin_lock_irqsave(&iommu->lock, flags); - - /* - * Silence interrupts from both event and PPR by clearing the - * enable logging bits in the control register - */ - entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); - iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); - iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); - - spin_unlock_irqrestore(&iommu->lock, flags); - /* It is the tasklet that will clear the logs and re-enable interrupts */ tasklet_schedule(&amd_iommu_irq_tasklet); }
Jan Beulich
2013-Jun-11 06:40 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 10.06.13 at 18:31, Tim Deegan <tim@xen.org> wrote: > At 16:03 +0100 on 10 Jun (1370880211), Jan Beulich wrote: > From my reading of the IOMMU spec the pending bit gets set whether the > enable bit is set or not: > > PPRInt: peripheral page service request interrupt. Revision 1: > RO. Reset 0b. Reserved. Revision 2: RW1C. Reset 0b. 1=PPR request > entry written to the PPR log by the IOMMU. 0=No PPR entry written to > the PPR log by the IOMMU. An interrupt is generated when PPRInt=1b and > MMIO Offset 0018h[PPRIntEn]=1b. > > and > > EventLogInt: event log interrupt. RW1C. Reset 0b. 1=Event entry > written to the event log by the IOMMU. 0=No event entry written to the > event log by the IOMMU. An interrupt is generated when EventLogInt=1b > and MMIO Offset 0018h[EventIntEn]=1b. > > so we should be OK without a second pass if the pending bit is still > clear after unmasking the interrupt.Do we want to rely on this? I don''t think so, as the names of the bits suggest otherwise. I''ll send v5 in a minute, and I think this will work for both possible cases.>> So with this done I now realized that all of these transformations >> don''t really belong in this erratum workaround patch. > > Agreed. I think this reshuffle to avoid lost entries should be its own > patch, and the erratum 787 one should follow it -- unless the logic we > end up with handles erratum 787 as a side-effect. :)Actually merging it into patch 1 seemed the more natural thing in the end, as some of the effects of the patch before this re-shuffle really require these further adjustments to be done at once.>> You''ll note that even here we''re having a loop again, which you >> presumably won''t like much. > > Well, this time the event handling is inside the loop, so it ought to > catch and disable bad passthrough devices. I''d still prefer having the > tasklet schedule itself and terminate, but I''m happy to defer to your > taste.I came to the same conclusion - it''ll re-schedule the tasklet now both for the interrupt re-enabling case as well as (in patch 2) for the interrupt-bit-still-or-again-set one. Jan
Jan Beulich
2013-Jun-11 06:45 UTC
Re: [PATCH 2/2 V2] iommu/amd: Workaround for erratum 787
>>> On 11.06.13 at 01:13, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote: > We should also check if the EventLogInt and PPRInt bits are set before > actually going into the log processing code.No, I''d prefer to keep that aspect as is, again to be one the safe side.> Also, I agree with Jan that we should not need > to disable the Event log and the PPR log in the IOMMU control register. > This could be handled simply through the status register.I also didn''t switch that aspect, keeping in mind that your documentation says otherwise (and, however minor, that the virtual IOMMU emulation code is implemented with the opposite behavior). Safest will be to not depend on questionable aspects.> Also, I think we can further simplify the logic for the workaround by having > only > one loop instead of two. Here is the newly proposed changes for the patch. > However, > I am still not sure if we should reschedule the tasklet instead of just > using the while loop here.It''s re-scheduling the tasklet for all recovery purposes now. Please take a look at v5 (to be sent right after this mail). Jan
Jan Beulich
2013-Jun-11 06:47 UTC
[PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
The IOMMU interrupt bits in the IOMMU status registers are "read-only, and write-1-to-clear (RW1C). Therefore, the existing logic which reads the register, set the bit, and then writing back the values could accidentally clear certain bits if it has been set. The correct logic would just be writing only the value which only set the interrupt bits, and leave the rest to zeros. This patch also, clean up #define masks as Jan has suggested. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> With iommu_interrupt_handler() properly having got switched its readl() from status to control register, the subsequent writel() needed to be switched too (and the RW1C comment there was bogus). Some of the cleanup went too far - undone. Further, with iommu_interrupt_handler() now actually disabling the interrupt sources, they also need to get re-enabled by the tasklet once it finished processing the respective log. This also implies re-running the tasklet so that log entries added between reading the log and re- enabling the interrupt will get handled in a timely manner. Finally, guest write emulation to the status register needs to be done with the RW1C (and RO for all other bits) semantics in mind too. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v5: Move most of what accumulated in patch 2 here. Re-schedule the tasklet when re-enabling a previously disabled interrupt source. --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -75,11 +75,9 @@ static void flush_command_buffer(struct u32 cmd[4], status; int loop_count, comp_wait; - /* clear ''ComWaitInt'' in status register (WIC) */ - set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0, - IOMMU_STATUS_COMP_WAIT_INT_MASK, - IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status); - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /* send an empty COMPLETION_WAIT command to flush command buffer */ cmd[3] = cmd[2] = 0; @@ -103,9 +101,9 @@ static void flush_command_buffer(struct if ( comp_wait ) { - /* clear ''ComWaitInt'' in status register (WIC) */ - status &= IOMMU_STATUS_COMP_WAIT_INT_MASK; - writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C ''ComWaitInt'' in status register */ + writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); return; } AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); --- a/xen/drivers/passthrough/amd/iommu_guest.c +++ b/xen/drivers/passthrough/amd/iommu_guest.c @@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str u64_to_reg(&iommu->ppr_log.reg_tail, val); break; case IOMMU_STATUS_MMIO_OFFSET: - u64_to_reg(&iommu->reg_status, val); + val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK | + IOMMU_STATUS_EVENT_LOG_INT_MASK | + IOMMU_STATUS_COMP_WAIT_INT_MASK | + IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK | + IOMMU_STATUS_PPR_LOG_INT_MASK | + IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK | + IOMMU_STATUS_GAPIC_LOG_INT_MASK; + u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) & ~val); break; default: --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET); iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); - iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); } else { iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); - iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); } @@ -410,7 +410,7 @@ static void iommu_reset_log(struct amd_i void (*ctrl_func)(struct amd_iommu *iommu, int)) { u32 entry; - int log_run, run_bit, of_bit; + int log_run, run_bit; int loop_count = 1000; BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log))); @@ -419,10 +419,6 @@ static void iommu_reset_log(struct amd_i IOMMU_STATUS_EVENT_LOG_RUN_SHIFT : IOMMU_STATUS_PPR_LOG_RUN_SHIFT; - of_bit = ( log == &iommu->event_log ) ? - IOMMU_STATUS_EVENT_OVERFLOW_SHIFT : - IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT; - /* wait until EventLogRun bit = 0 */ do { entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); @@ -439,9 +435,10 @@ static void iommu_reset_log(struct amd_i ctrl_func(iommu, IOMMU_CONTROL_DISABLED); - /*clear overflow bit */ - iommu_clear_bit(&entry, of_bit); - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + /* RW1C overflow bit */ + writel(log == &iommu->event_log ? IOMMU_STATUS_EVENT_OVERFLOW_MASK + : IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ log->head = 0; @@ -611,22 +608,33 @@ static void iommu_check_event_log(struct u32 entry; unsigned long flags; + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + iommu_read_log(iommu, &iommu->event_log, sizeof(event_entry_t), parse_event_log_entry); spin_lock_irqsave(&iommu->lock, flags); - /*check event overflow */ + /* Check event overflow. */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); - - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + else + { + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) ) + { + entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK; + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + /* + * Re-schedule the tasklet to handle eventual log entries added + * between reading the log above and re-enabling the interrupt. + */ + tasklet_schedule(&amd_iommu_irq_tasklet); + } + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -681,22 +689,33 @@ static void iommu_check_ppr_log(struct a u32 entry; unsigned long flags; + /* RW1C interrupt status bit */ + writel(IOMMU_STATUS_PPR_LOG_INT_MASK, + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + iommu_read_log(iommu, &iommu->ppr_log, sizeof(ppr_entry_t), parse_ppr_log_entry); spin_lock_irqsave(&iommu->lock, flags); - /*check event overflow */ + /* Check event overflow. */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); - - /* reset interrupt status bit */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); - - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + else + { + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + if ( !(entry & IOMMU_CONTROL_PPR_LOG_INT_MASK) ) + { + entry |= IOMMU_CONTROL_PPR_LOG_INT_MASK; + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + /* + * Re-schedule the tasklet to handle eventual log entries added + * between reading the log above and re-enabling the interrupt. + */ + tasklet_schedule(&amd_iommu_irq_tasklet); + } + } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -733,11 +752,14 @@ static void iommu_interrupt_handler(int spin_lock_irqsave(&iommu->lock, flags); - /* Silence interrupts from both event and PPR logging */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); + /* + * Silence interrupts from both event and PPR by clearing the + * enable logging bits in the control register + */ + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); spin_unlock_irqrestore(&iommu->lock, flags); --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h @@ -336,14 +336,17 @@ #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK 0x00001000 #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 +#define IOMMU_CONTROL_PPR_LOG_ENABLE_MASK 0x00002000 +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 +#define IOMMU_CONTROL_PPR_LOG_INT_MASK 0x00004000 +#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT 14 +#define IOMMU_CONTROL_PPR_ENABLE_MASK 0x00008000 +#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 +#define IOMMU_CONTROL_GT_ENABLE_MASK 0x00010000 +#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 #define IOMMU_CONTROL_RESTART_MASK 0x80000000 #define IOMMU_CONTROL_RESTART_SHIFT 31 -#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 -#define IOMMU_CONTROL_PPR_INT_SHIFT 14 -#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 -#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 - /* Exclusion Register */ #define IOMMU_EXCLUSION_BASE_LOW_OFFSET 0x20 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET 0x24 @@ -395,9 +398,18 @@ #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 +#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK 0x00000020 #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 +#define IOMMU_STATUS_PPR_LOG_INT_MASK 0x00000040 #define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 +#define IOMMU_STATUS_PPR_LOG_RUN_MASK 0x00000080 #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 +#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK 0x00000100 +#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT 8 +#define IOMMU_STATUS_GAPIC_LOG_INT_MASK 0x00000200 +#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT 9 +#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK 0x00000400 +#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT 10 /* I/O Page Table */ #define IOMMU_PAGE_TABLE_ENTRY_SIZE 8 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
The IOMMU interrupt handling in bottom half must clear the PPR log interrupt and event log interrupt bits to re-enable the interrupt. This is done by writing 1 to the memory mapped register to clear the bit. Due to hardware bug, if the driver tries to clear this bit while the IOMMU hardware also setting this bit, the conflict will result with the bit being set. If the interrupt handling code does not make sure to clear this bit, subsequent changes in the event/PPR logs will no longer generating interrupts, and would result if buffer overflow. After clearing the bits, the driver must read back the register to verify. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Adjust to apply on top of heavily modified patch 1. Adjust flow to get away with a single readl() in each instance of the status register checks. Signed-off-by: Jan Beulich <jbeulich@suse.com> v5: Moved most of what accumulated here into patch 1. Rather than looping, re-schedule the tasklet to work around the erratum. (skipped v4 to remain in sync with patch 1) --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -636,6 +636,14 @@ static void iommu_check_event_log(struct } } + /* + * Workaround for erratum787: + * Re-check to make sure the bit has been cleared. + */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK ) + tasklet_schedule(&amd_iommu_irq_tasklet); + spin_unlock_irqrestore(&iommu->lock, flags); } @@ -717,6 +725,14 @@ static void iommu_check_ppr_log(struct a } } + /* + * Workaround for erratum787: + * Re-check to make sure the bit has been cleared. + */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + if ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK ) + tasklet_schedule(&amd_iommu_irq_tasklet); + spin_unlock_irqrestore(&iommu->lock, flags); } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
At 07:40 +0100 on 11 Jun (1370936413), Jan Beulich wrote:> >>> On 10.06.13 at 18:31, Tim Deegan <tim@xen.org> wrote: > > At 16:03 +0100 on 10 Jun (1370880211), Jan Beulich wrote: > > From my reading of the IOMMU spec the pending bit gets set whether the > > enable bit is set or not: > > > > PPRInt: peripheral page service request interrupt. Revision 1: > > RO. Reset 0b. Reserved. Revision 2: RW1C. Reset 0b. 1=PPR request > > entry written to the PPR log by the IOMMU. 0=No PPR entry written to > > the PPR log by the IOMMU. An interrupt is generated when PPRInt=1b and > > MMIO Offset 0018h[PPRIntEn]=1b. > > > > and > > > > EventLogInt: event log interrupt. RW1C. Reset 0b. 1=Event entry > > written to the event log by the IOMMU. 0=No event entry written to the > > event log by the IOMMU. An interrupt is generated when EventLogInt=1b > > and MMIO Offset 0018h[EventIntEn]=1b. > > > > so we should be OK without a second pass if the pending bit is still > > clear after unmasking the interrupt. > > Do we want to rely on this? I don''t think so, as the names of the > bits suggest otherwise. I''ll send v5 in a minute, and I think this will > work for both possible cases.I think it''s pretty clear from the text that the bit gets set when an entry is written, even if an interrupt doesn''t get raised. To me, that means we can replace the mandatory re-run on interrupt enable with a check of the status register (i.e. the check we''ll do for erratum 787 anyway). But if you''re not comfortable with that, the v5 you just posted looks correct to me, and (at least until we have restartable IOMMU faults) that path won''t be hot enough to worry about the efficiency. So both v5 patches are Reviewed-by: Tim Deegan <tim@xen.org>. Cheers, Tim.
Suravee Suthikulanit
2013-Jun-11 23:03 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On 6/11/2013 1:47 AM, Jan Beulich wrote:> @@ -611,22 +608,33 @@ static void iommu_check_event_log(struct > u32 entry; > unsigned long flags; > > + /* RW1C interrupt status bit */ > + writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, > + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + > iommu_read_log(iommu, &iommu->event_log, > sizeof(event_entry_t), parse_event_log_entry); > > spin_lock_irqsave(&iommu->lock, flags); > > - /*check event overflow */ > + /* Check event overflow. */ > entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > - > if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) > iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); > - > - /* reset interrupt status bit */ > - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); > - > - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + else > + { > + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); > + if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) ) > + { > + entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK; > + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); > + /* > + * Re-schedule the tasklet to handle eventual log entries added > + * between reading the log above and re-enabling the interrupt. > + */ > + tasklet_schedule(&amd_iommu_irq_tasklet); > + } > + }If more entries are added to the event log during the time that event log interrupt is disabled (in the control register), the IOMMU hardware will generate interrupt once the the interrupt enable bit in the control register changes from 0 to 1 and set the status register. Since the "iommu_interrupt_handler" code is already calling "schedule_tasklet", we should not need to "re-schedule" tasklet here. I have confirmed the hardware behavior described with the hardware designer. This is also the same on the PPR log. Suravee
Jan Beulich
2013-Jun-12 06:24 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 12.06.13 at 01:03, Suravee Suthikulanit <suravee.suthikulpanit@amd.com>wrote:> On 6/11/2013 1:47 AM, Jan Beulich wrote: >> @@ -611,22 +608,33 @@ static void iommu_check_event_log(struct >> u32 entry; >> unsigned long flags; >> >> + /* RW1C interrupt status bit */ >> + writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, >> + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >> + >> iommu_read_log(iommu, &iommu->event_log, >> sizeof(event_entry_t), parse_event_log_entry); >> >> spin_lock_irqsave(&iommu->lock, flags); >> >> - /*check event overflow */ >> + /* Check event overflow. */ >> entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >> - >> if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) >> iommu_reset_log(iommu, &iommu->event_log, > set_iommu_event_log_control); >> - >> - /* reset interrupt status bit */ >> - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >> - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); >> - >> - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >> + else >> + { >> + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); >> + if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) ) >> + { >> + entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK; >> + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); >> + /* >> + * Re-schedule the tasklet to handle eventual log entries added >> + * between reading the log above and re-enabling the interrupt. >> + */ >> + tasklet_schedule(&amd_iommu_irq_tasklet); >> + } >> + } > If more entries are added to the event log during the time that event > log interrupt is disabled (in the control register), > the IOMMU hardware will generate interrupt once the the interrupt enable > bit in the control register changes from 0 to 1 and set the status > register. Since the "iommu_interrupt_handler" code is already calling > "schedule_tasklet", we should not need to "re-schedule" tasklet here. > I have confirmed the hardware behavior described with the hardware > designer. This is also the same on the PPR log.And also the same between v1 and v2 hardware? Again, I''d like to be on the safe side, and rather do a reschedule too much than one too little. And in any case, having your documentation made more precise in these respects would be much appreciated. Jan
Suravee Suthikulpanit
2013-Jun-12 22:37 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On 6/12/2013 1:24 AM, Jan Beulich wrote:>> If more entries are added to the event log during the time that event >> log interrupt is disabled (in the control register), >> the IOMMU hardware will generate interrupt once the the interrupt enable >> bit in the control register changes from 0 to 1 and set the status >> register. Since the "iommu_interrupt_handler" code is already calling >> "schedule_tasklet", we should not need to "re-schedule" tasklet here. >> I have confirmed the hardware behavior described with the hardware >> designer. This is also the same on the PPR log. > And also the same between v1 and v2 hardware? Again, I''d like to > be on the safe side, and rather do a reschedule too much than one > too little. And in any case, having your documentation made more > precise in these respects would be much appreciated. > > Jan > >Understand. I apologize if the AMD IOMMU specification does not describe the behavior quite clearly. Let me know if I could help clarifing any issues with the hardware designers. Since we are modifying the IOMMU interrupt enabling/disabling, I have been doing some more testing on the IOMMU interrupt handling. I found that IOMMU MSI interrupt is currently broken, but I think this is because of some older changes. I am still tracking down the issue, and will update my findings. Thank you, Suravee
Suravee Suthikulpanit
2013-Jun-13 01:44 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote:> On 6/12/2013 1:24 AM, Jan Beulich wrote: >>> If more entries are added to the event log during the time that event >>> log interrupt is disabled (in the control register), >>> the IOMMU hardware will generate interrupt once the the interrupt >>> enable >>> bit in the control register changes from 0 to 1 and set the status >>> register. Since the "iommu_interrupt_handler" code is already calling >>> "schedule_tasklet", we should not need to "re-schedule" tasklet here. >>> I have confirmed the hardware behavior described with the hardware >>> designer. This is also the same on the PPR log. >> And also the same between v1 and v2 hardware? Again, I''d like to >> be on the safe side, and rather do a reschedule too much than one >> too little. And in any case, having your documentation made more >> precise in these respects would be much appreciated. >> >> Jan >> >> > Understand. I apologize if the AMD IOMMU specification does not > describe the behavior quite clearly. Let me know if I could help > clarifing any issues with the hardware designers. > > Since we are modifying the IOMMU interrupt enabling/disabling, I have > been doing some more testing on the IOMMU interrupt handling. I found > that IOMMU MSI interrupt is currently broken, but I think this is > because of some older changes. I am still tracking down the issue, > and will update my findings. > > Thank you, > > SuraveeThe following commit broke the IOMMU MSI interrupt: 2012-11-28 899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: include IOMMU interrupt information in ''M'' debug key output (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b50a981c551eeec49e6) This patch also need the following patch to resolve kernel panic: c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing from c/s 26198:ba90ecb0231f (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480d54c03ff7e028c39e) I''ll update once I root cause the issue. Suravee> > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel >
Jan Beulich
2013-Jun-13 07:54 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote: > On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote: >> Since we are modifying the IOMMU interrupt enabling/disabling, I have >> been doing some more testing on the IOMMU interrupt handling. I found >> that IOMMU MSI interrupt is currently broken, but I think this is >> because of some older changes. I am still tracking down the issue, >> and will update my findings. > > The following commit broke the IOMMU MSI interrupt: > > 2012-11-28 899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: > include IOMMU interrupt information in ''M'' debug key output > (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5 > 0a981c551eeec49e6) > > This patch also need the following patch to resolve kernel panic: > > c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing > from c/s 26198:ba90ecb0231f > (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480 > d54c03ff7e028c39e) > > I''ll update once I root cause the issue.I''ll also take a look, but it would help to know in what way it is broken: Some sort of crash, no interrupt delivered, ... This surely needs to be resolved before 4.3 can go out - George, can you put this on your bug list, please? Jan
Suravee Suthikulpanit
2013-Jun-13 13:48 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On 6/13/2013 2:54 AM, Jan Beulich wrote:>>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote: >> On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote: >>> Since we are modifying the IOMMU interrupt enabling/disabling, I have >>> been doing some more testing on the IOMMU interrupt handling. I found >>> that IOMMU MSI interrupt is currently broken, but I think this is >>> because of some older changes. I am still tracking down the issue, >>> and will update my findings. >> The following commit broke the IOMMU MSI interrupt: >> >> 2012-11-28 899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: >> include IOMMU interrupt information in ''M'' debug key output >> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5 >> 0a981c551eeec49e6) >> >> This patch also need the following patch to resolve kernel panic: >> >> c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing >> from c/s 26198:ba90ecb0231f >> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480 >> d54c03ff7e028c39e) >> >> I''ll update once I root cause the issue. > I''ll also take a look, but it would help to know in what way it is > broken: Some sort of crash, no interrupt delivered, ... This surely > needs to be resolved before 4.3 can go out - George, can you put > this on your bug list, please? > > Jan >BasicallyI amnot seeing any interrupts coming in from the IOMMU. I was testing it by setting the ComWaitIntEn bit of the IOMMU control register to let the IOMMU hardware generate MSI interrupts for the COMPLETION_WAIT command. I am not seeing any interrupts (i.e. The interrupt handler and tasklet handler function is never executed). I have double checked this in the latest Xen-4.2.x, and they are working fine. The same MSI interrupt is also used for PPR and Event Log. Suravee _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
George Dunlap
2013-Jun-13 14:20 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
create ^ title MSI interrupts broken with IOMMU after c/s 899110e thanks On Thu, Jun 13, 2013 at 2:44 AM, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote:> On 6/12/2013 5:37 PM, Suravee Suthikulpanit wrote: >> >> On 6/12/2013 1:24 AM, Jan Beulich wrote: >>>> >>>> If more entries are added to the event log during the time that event >>>> log interrupt is disabled (in the control register), >>>> the IOMMU hardware will generate interrupt once the the interrupt enable >>>> bit in the control register changes from 0 to 1 and set the status >>>> register. Since the "iommu_interrupt_handler" code is already calling >>>> "schedule_tasklet", we should not need to "re-schedule" tasklet here. >>>> I have confirmed the hardware behavior described with the hardware >>>> designer. This is also the same on the PPR log. >>> >>> And also the same between v1 and v2 hardware? Again, I''d like to >>> be on the safe side, and rather do a reschedule too much than one >>> too little. And in any case, having your documentation made more >>> precise in these respects would be much appreciated. >>> >>> Jan >>> >>> >> Understand. I apologize if the AMD IOMMU specification does not describe >> the behavior quite clearly. Let me know if I could help clarifing any >> issues with the hardware designers. >> >> Since we are modifying the IOMMU interrupt enabling/disabling, I have been >> doing some more testing on the IOMMU interrupt handling. I found that IOMMU >> MSI interrupt is currently broken, but I think this is because of some older >> changes. I am still tracking down the issue, and will update my findings. >> >> Thank you, >> >> Suravee > > > The following commit broke the IOMMU MSI interrupt: > > 2012-11-28 899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: include > IOMMU interrupt information in ''M'' debug key output > (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b50a981c551eeec49e6) > > This patch also need the following patch to resolve kernel panic: > > c759fee45bf44f2947a3480d54c03ff7e028c39e AMD IOMMU: add locking missing from > c/s 26198:ba90ecb0231f > (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=c759fee45bf44f2947a3480d54c03ff7e028c39e) > > I''ll update once I root cause the issue. > > > Suravee > >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xen.org >> http://lists.xen.org/xen-devel >> > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
xen@bugs.xenproject.org
2013-Jun-13 14:30 UTC
Processed: Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
Processing commands for xen@bugs.xenproject.org:> create ^Command failed: Addresses: dunlapg@umich.edu are not permitted to use `create'' at /srv/xen-devel-bugs/lib/emesinae/control.pl line 171, <M> line 48. Stop processing here. --- Xen Hypervisor Bug Tracker See http://wiki.xen.org/wiki/Reporting_Bugs_against_Xen for information on reporting bugs Contact xen-bugs-owner@bugs.xenproject.org with any infrastructure issues
Jan Beulich
2013-Jun-13 15:58 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote: > The following commit broke the IOMMU MSI interrupt: > > 2012-11-28 899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: > include IOMMU interrupt information in ''M'' debug key output > (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5 > 0a981c551eeec49e6)Having gone over the changes again, this still looks pretty innocent/ mechanical to me - I can''t see what may have got broken. Considering that this is the change adding respective information to ''M'' output - what does ''M'' show for the IOMMU entry/entries? Jan
Suravee Suthikulanit
2013-Jun-13 16:34 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On 6/13/2013 10:58 AM, Jan Beulich wrote:>>>> On 13.06.13 at 03:44, Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> wrote: >> The following commit broke the IOMMU MSI interrupt: >> >> 2012-11-28 899110e3f6d2a191638e8b50a981c551eeec49e6 AMD IOMMU: >> include IOMMU interrupt information in ''M'' debug key output >> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=899110e3f6d2a191638e8b5 >> 0a981c551eeec49e6) > Having gone over the changes again, this still looks pretty innocent/ > mechanical to me - I can''t see what may have got broken. > Considering that this is the change adding respective information to > ''M'' output - what does ''M'' show for the IOMMU entry/entries? > > Jan > >Basically, the only different is this line that only appears in the "Bad" version. (XEN) MSI 56 vec=28 fixed edge deassert phys cpu dest=00000001 mask=0/0/1 "xl debug-key i" also show the following information (XEN) IRQ: 56 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 mapped, unbound Not sure what "status=0" means. Before: (Good) (XEN) MSI information: (XEN) MSI 57 vec=c0 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 58 vec=c8 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 59 vec=d0 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 60 vec=d8 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 61 vec=29 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI-X 62 vec=31 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 63 vec=39 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 64 vec=41 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 65 vec=49 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 66 vec=51 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 67 vec=59 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI 68 vec=69 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI-X 69 vec=79 lowest edge assert log lowest dest=00000003 mask=1/1/1 (XEN) MSI-X 70 vec=81 lowest edge assert log lowest dest=00000003 mask=1/1/1 (XEN) MSI-X 71 vec=89 lowest edge assert log lowest dest=00000003 mask=1/1/1 (XEN) MSI-X 72 vec=99 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 73 vec=a1 lowest edge assert log lowest dest=00000002 mask=1/0/0 (XEN) MSI-X 74 vec=a9 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI 75 vec=b9 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 76 vec=c1 lowest edge assert log lowest dest=00000001 mask=0/1/1 After: (Bad) (XEN) MSI information: (XEN) MSI 56 vec=28 fixed edge deassert phys cpu dest=00000001 mask=0/0/1 (XEN) MSI 57 vec=c0 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 58 vec=c8 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 59 vec=d0 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 60 vec=d8 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 61 vec=29 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI-X 62 vec=31 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 63 vec=39 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 64 vec=41 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 65 vec=49 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 66 vec=51 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 67 vec=59 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI 68 vec=71 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI-X 69 vec=79 lowest edge assert log lowest dest=00000003 mask=1/1/1 (XEN) MSI-X 70 vec=81 lowest edge assert log lowest dest=00000003 mask=1/1/1 (XEN) MSI-X 71 vec=89 lowest edge assert log lowest dest=00000003 mask=1/1/1 (XEN) MSI-X 72 vec=99 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI-X 73 vec=a1 lowest edge assert log lowest dest=00000002 mask=1/0/0 (XEN) MSI-X 74 vec=a9 lowest edge assert log lowest dest=00000001 mask=1/0/0 (XEN) MSI 75 vec=b9 lowest edge assert log lowest dest=00000001 mask=0/1/1 (XEN) MSI 76 vec=c1 lowest edge assert log lowest dest=00000001 mask=0/1/1 Suravee
Jan Beulich
2013-Jun-14 06:27 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote: > Basically, the only different is this line that only appears in the "Bad" > version. > > (XEN) MSI 56 vec=28 fixed edge deassert phys cpu dest=00000001 mask=0/0/1 > > "xl debug-key i" also show the following information > > (XEN) IRQ: 56 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 mapped, unboundThere are several questionable things here: - the interrupt being of "deassert" kind - destination mode being physical (while all others are logical) - the interrupt being unbound (i.e. there''s no action associated with it, and hence no handler would ever get called)> Not sure what "status=0" means.This says that none of the IRQ_* flags are set in desc->status, which isn''t in itself problematic for non-guest interrupts. Jan
Jan Beulich
2013-Jun-14 06:40 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
>>> On 14.06.13 at 08:27, "Jan Beulich" <JBeulich@suse.com> wrote: >>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> > wrote: >> Basically, the only different is this line that only appears in the "Bad" >> version. >> >> (XEN) MSI 56 vec=28 fixed edge deassert phys cpu > dest=00000001 mask=0/0/1 >> >> "xl debug-key i" also show the following information >> >> (XEN) IRQ: 56 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 > mapped, unbound > > There are several questionable things here: > - the interrupt being of "deassert" kind > - destination mode being physical (while all others are logical) > - the interrupt being unbound (i.e. there''s no action associated > with it, and hence no handler would ever get called)And I think I see now at least part of what''s missing: Neither msi_compose_msg() nor write_msi_msg() get (indirectly) called from set_iommu_interrupt_handler(). Which was that (wrong) way already before said c/s, but got masked by iommu_msi_set_affinity() doing a full setup rather than incremental modification as set_msi_affinity() does. In order to fix this reasonably cleanly I''d like to do a little bit of code re-structuring, so it''ll be more than a couple of minutes until I could send you a patch. Plus that - to me - doesn''t explain the NULL action pointer yet. But wait - was that log perhaps taken with the tree rewound to said broken commit? I.e. missing commit d739470b? That would explain it. Jan
>>> On 14.06.13 at 08:40, "Jan Beulich" <JBeulich@suse.com> wrote: >>>> On 14.06.13 at 08:27, "Jan Beulich" <JBeulich@suse.com> wrote: >>>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote: >>> Basically, the only different is this line that only appears in the "Bad" >>> version. >>> >>> (XEN) MSI 56 vec=28 fixed edge deassert phys cpu >> dest=00000001 mask=0/0/1 >>> >>> "xl debug-key i" also show the following information >>> >>> (XEN) IRQ: 56 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 >> mapped, unbound >> >> There are several questionable things here: >> - the interrupt being of "deassert" kind >> - destination mode being physical (while all others are logical) >> - the interrupt being unbound (i.e. there''s no action associated >> with it, and hence no handler would ever get called) > > And I think I see now at least part of what''s missing: Neither > msi_compose_msg() nor write_msi_msg() get (indirectly) called > from set_iommu_interrupt_handler(). Which was that (wrong) > way already before said c/s, but got masked by > iommu_msi_set_affinity() doing a full setup rather than > incremental modification as set_msi_affinity() does. In order to > fix this reasonably cleanly I''d like to do a little bit of code > re-structuring, so it''ll be more than a couple of minutes until I > could send you a patch.So here''s a first try. Jan Commit 899110e3 ("AMD IOMMU: include IOMMU interrupt information in ''M'' debug key output") made the AMD IOMMU MSI setup code use more of the generic MSI setup code (as other than for VT-d this is an ordinary MSI- capable PCI device), but failed to notice that till now interrupt setup there _required_ the subsequent affinity setup to be done, as that was the only point where the MSI message would get written. The generic MSI affinity setting routine, however, does only an incremental change, i.e. relies on this setup to have been done before. In order to not make the code even more clumsy, introduce a new low level helper routine __setup_msi_irq(), thus eliminating the need for the AMD IOMMU code to directly fiddle with the IRQ descriptor. Reported-by: Suravee Suthikulanit <suravee.suthikulpanit@amd.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -472,11 +472,18 @@ static struct msi_desc* alloc_msi_entry( int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc) { + return __setup_msi_irq(desc, msidesc, + msi_maskable_irq(msidesc) ? &pci_msi_maskable + : &pci_msi_nonmaskable); +} + +int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc, + hw_irq_controller *handler) +{ struct msi_msg msg; desc->msi_desc = msidesc; - desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable - : &pci_msi_nonmaskable; + desc->handler = handler; msi_compose_msg(desc, &msg); return write_msi_msg(msidesc, &msg); } --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -748,7 +748,7 @@ static void iommu_interrupt_handler(int static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu) { int irq, ret; - struct irq_desc *desc; + hw_irq_controller *handler; unsigned long flags; u16 control; @@ -759,7 +759,6 @@ static bool_t __init set_iommu_interrupt return 0; } - desc = irq_to_desc(irq); spin_lock_irqsave(&pcidevs_lock, flags); iommu->msi.dev = pci_get_pdev(iommu->seg, PCI_BUS(iommu->bdf), PCI_DEVFN2(iommu->bdf)); @@ -771,7 +770,6 @@ static bool_t __init set_iommu_interrupt PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf)); return 0; } - desc->msi_desc = &iommu->msi; control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf), PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf), iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS); @@ -781,14 +779,15 @@ static bool_t __init set_iommu_interrupt iommu->msi.msi_attrib.maskbit = 1; iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos, is_64bit_address(control)); - desc->handler = &iommu_maskable_msi_type; + handler = &iommu_maskable_msi_type; } else - desc->handler = &iommu_msi_type; - ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu); + handler = &iommu_msi_type; + ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler); + if ( !ret ) + ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu); if ( ret ) { - desc->handler = &no_irq_type; destroy_irq(irq); AMD_IOMMU_DEBUG("can''t request irq\n"); return 0; --- a/xen/include/asm-x86/msi.h +++ b/xen/include/asm-x86/msi.h @@ -72,6 +72,7 @@ struct msi_msg { }; struct irq_desc; +struct hw_interrupt_type; struct msi_desc; /* Helper functions */ extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc); @@ -79,6 +80,8 @@ extern void pci_disable_msi(struct msi_d extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off); extern void pci_cleanup_msi(struct pci_dev *pdev); extern int setup_msi_irq(struct irq_desc *, struct msi_desc *); +extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *, + const struct hw_interrupt_type *); extern void teardown_msi_irq(int irq); extern int msi_free_vector(struct msi_desc *entry); extern int pci_restore_msi_state(struct pci_dev *pdev); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Suravee Suthikulanit
2013-Jun-14 16:10 UTC
Re: [PATCH] AMD IOMMU: make interrupt work again
On 6/14/2013 2:14 AM, Jan Beulich wrote:>>>> On 14.06.13 at 08:40, "Jan Beulich" <JBeulich@suse.com> wrote: >>>>> On 14.06.13 at 08:27, "Jan Beulich" <JBeulich@suse.com> wrote: >>>>>> On 13.06.13 at 18:34, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> wrote: >>>> Basically, the only different is this line that only appears in the "Bad" >>>> version. >>>> >>>> (XEN) MSI 56 vec=28 fixed edge deassert phys cpu >>> dest=00000001 mask=0/0/1 >>>> "xl debug-key i" also show the following information >>>> >>>> (XEN) IRQ: 56 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 >>> mapped, unbound >>> >>> There are several questionable things here: >>> - the interrupt being of "deassert" kind >>> - destination mode being physical (while all others are logical) >>> - the interrupt being unbound (i.e. there''s no action associated >>> with it, and hence no handler would ever get called) >> And I think I see now at least part of what''s missing: Neither >> msi_compose_msg() nor write_msi_msg() get (indirectly) called >> from set_iommu_interrupt_handler(). Which was that (wrong) >> way already before said c/s, but got masked by >> iommu_msi_set_affinity() doing a full setup rather than >> incremental modification as set_msi_affinity() does. In order to >> fix this reasonably cleanly I''d like to do a little bit of code >> re-structuring, so it''ll be more than a couple of minutes until I >> could send you a patch. > So here''s a first try. > > JanThis fixes the issue. Here is the output from the xl debug-key i and M. (XEN) MSI 56 vec=28 lowest edge assert log lowest dest=00000001 mask=0/0/? XEN) IRQ: 56 affinity:1 vec:28 type=AMD-IOMMU-MSI status=00000000 iommu_interrupt_handler+0/0x66 Acked: - Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Thank you Jan. Suravee> > Commit 899110e3 ("AMD IOMMU: include IOMMU interrupt information in ''M'' > debug key output") made the AMD IOMMU MSI setup code use more of the > generic MSI setup code (as other than for VT-d this is an ordinary MSI- > capable PCI device), but failed to notice that till now interrupt setup > there _required_ the subsequent affinity setup to be done, as that was > the only point where the MSI message would get written. The generic MSI > affinity setting routine, however, does only an incremental change, > i.e. relies on this setup to have been done before. > > In order to not make the code even more clumsy, introduce a new low > level helper routine __setup_msi_irq(), thus eliminating the need for > the AMD IOMMU code to directly fiddle with the IRQ descriptor. > > Reported-by: Suravee Suthikulanit <suravee.suthikulpanit@amd.com> > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -472,11 +472,18 @@ static struct msi_desc* alloc_msi_entry( > > int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc) > { > + return __setup_msi_irq(desc, msidesc, > + msi_maskable_irq(msidesc) ? &pci_msi_maskable > + : &pci_msi_nonmaskable); > +} > + > +int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc, > + hw_irq_controller *handler) > +{ > struct msi_msg msg; > > desc->msi_desc = msidesc; > - desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable > - : &pci_msi_nonmaskable; > + desc->handler = handler; > msi_compose_msg(desc, &msg); > return write_msi_msg(msidesc, &msg); > } > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -748,7 +748,7 @@ static void iommu_interrupt_handler(int > static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu) > { > int irq, ret; > - struct irq_desc *desc; > + hw_irq_controller *handler; > unsigned long flags; > u16 control; > > @@ -759,7 +759,6 @@ static bool_t __init set_iommu_interrupt > return 0; > } > > - desc = irq_to_desc(irq); > spin_lock_irqsave(&pcidevs_lock, flags); > iommu->msi.dev = pci_get_pdev(iommu->seg, PCI_BUS(iommu->bdf), > PCI_DEVFN2(iommu->bdf)); > @@ -771,7 +770,6 @@ static bool_t __init set_iommu_interrupt > PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf)); > return 0; > } > - desc->msi_desc = &iommu->msi; > control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf), > PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf), > iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS); > @@ -781,14 +779,15 @@ static bool_t __init set_iommu_interrupt > iommu->msi.msi_attrib.maskbit = 1; > iommu->msi.msi.mpos = msi_mask_bits_reg(iommu->msi.msi_attrib.pos, > is_64bit_address(control)); > - desc->handler = &iommu_maskable_msi_type; > + handler = &iommu_maskable_msi_type; > } > else > - desc->handler = &iommu_msi_type; > - ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu); > + handler = &iommu_msi_type; > + ret = __setup_msi_irq(irq_to_desc(irq), &iommu->msi, handler); > + if ( !ret ) > + ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu); > if ( ret ) > { > - desc->handler = &no_irq_type; > destroy_irq(irq); > AMD_IOMMU_DEBUG("can''t request irq\n"); > return 0; > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -72,6 +72,7 @@ struct msi_msg { > }; > > struct irq_desc; > +struct hw_interrupt_type; > struct msi_desc; > /* Helper functions */ > extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc); > @@ -79,6 +80,8 @@ extern void pci_disable_msi(struct msi_d > extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off); > extern void pci_cleanup_msi(struct pci_dev *pdev); > extern int setup_msi_irq(struct irq_desc *, struct msi_desc *); > +extern int __setup_msi_irq(struct irq_desc *, struct msi_desc *, > + const struct hw_interrupt_type *); > extern void teardown_msi_irq(int irq); > extern int msi_free_vector(struct msi_desc *entry); > extern int pci_restore_msi_state(struct pci_dev *pdev); > >
Suravee Suthikulanit
2013-Jun-17 18:57 UTC
Re: [PATCH 2/2 v5] iommu/amd: Workaround for erratum 787
On 6/11/2013 1:47 AM, Jan Beulich wrote:> The IOMMU interrupt handling in bottom half must clear the PPR log interrupt > and event log interrupt bits to re-enable the interrupt. This is done by > writing 1 to the memory mapped register to clear the bit. Due to hardware bug, > if the driver tries to clear this bit while the IOMMU hardware also setting > this bit, the conflict will result with the bit being set. If the interrupt > handling code does not make sure to clear this bit, subsequent changes in the > event/PPR logs will no longer generating interrupts, and would result if > buffer overflow. After clearing the bits, the driver must read back > the register to verify. > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> > > Adjust to apply on top of heavily modified patch 1. Adjust flow to get away > with a single readl() in each instance of the status register checks. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > v5: Moved most of what accumulated here into patch 1. Rather than looping, > re-schedule the tasklet to work around the erratum. > (skipped v4 to remain in sync with patch 1) > > --- a/xen/drivers/passthrough/amd/iommu_init.c > +++ b/xen/drivers/passthrough/amd/iommu_init.c > @@ -636,6 +636,14 @@ static void iommu_check_event_log(struct > } > } > > + /* > + * Workaround for erratum787: > + * Re-check to make sure the bit has been cleared. > + */ > + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK ) > + tasklet_schedule(&amd_iommu_irq_tasklet); > + > spin_unlock_irqrestore(&iommu->lock, flags); > } > > @@ -717,6 +725,14 @@ static void iommu_check_ppr_log(struct a > } > } > > + /* > + * Workaround for erratum787: > + * Re-check to make sure the bit has been cleared. > + */ > + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); > + if ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK ) > + tasklet_schedule(&amd_iommu_irq_tasklet); > + > spin_unlock_irqrestore(&iommu->lock, flags); > } > > > >Acked: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Suravee Suthikulanit
2013-Jun-17 18:59 UTC
Re: [PATCH 1/2 v5] iommu/amd: Fix logic for clearing the IOMMU interrupt bits
On 6/12/2013 1:24 AM, Jan Beulich wrote:>>>> On 12.06.13 at 01:03, Suravee Suthikulanit <suravee.suthikulpanit@amd.com> > wrote: >> On 6/11/2013 1:47 AM, Jan Beulich wrote: >>> @@ -611,22 +608,33 @@ static void iommu_check_event_log(struct >>> u32 entry; >>> unsigned long flags; >>> >>> + /* RW1C interrupt status bit */ >>> + writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, >>> + iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >>> + >>> iommu_read_log(iommu, &iommu->event_log, >>> sizeof(event_entry_t), parse_event_log_entry); >>> >>> spin_lock_irqsave(&iommu->lock, flags); >>> >>> - /*check event overflow */ >>> + /* Check event overflow. */ >>> entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >>> - >>> if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) >>> iommu_reset_log(iommu, &iommu->event_log, >> set_iommu_event_log_control); >>> - >>> - /* reset interrupt status bit */ >>> - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >>> - iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); >>> - >>> - writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); >>> + else >>> + { >>> + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); >>> + if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) ) >>> + { >>> + entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK; >>> + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); >>> + /* >>> + * Re-schedule the tasklet to handle eventual log entries added >>> + * between reading the log above and re-enabling the interrupt. >>> + */ >>> + tasklet_schedule(&amd_iommu_irq_tasklet); >>> + } >>> + } >> If more entries are added to the event log during the time that event >> log interrupt is disabled (in the control register), >> the IOMMU hardware will generate interrupt once the the interrupt enable >> bit in the control register changes from 0 to 1 and set the status >> register. Since the "iommu_interrupt_handler" code is already calling >> "schedule_tasklet", we should not need to "re-schedule" tasklet here. >> I have confirmed the hardware behavior described with the hardware >> designer. This is also the same on the PPR log. > And also the same between v1 and v2 hardware? Again, I''d like to > be on the safe side, and rather do a reschedule too much than one > too little. And in any case, having your documentation made more > precise in these respects would be much appreciated. > > Jan > >Acked: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>