Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 00 of 14 V3] amd iommu: support ATS device passthru on IOMMUv2 systems
Hi all, this is patch v3. ATS devices with PRI and PASID capabilities can communicate with iommuv2 to perform two level (nested) address translation and demand paging for DMA. To passthru such devices, iommu driver has to been enabled in guest OS. This patch set adds initial iommu emulation for hvm guests to support ATS device passthru. changes in v3: * Use xenstore to receive guest iommu configuration instead of adding in a new field in hvm_info_table. * Support pci segment in vbdf to mbdf bind. * Make hypercalls visible for non-x86 platforms. * A few code cleanups according to comments from Jan and Ian. Changes in v2: * Do not use linked list to access guest iommu tables. * Do not parse iommu parameter in libxl_device_model_info again. * Fix incorrect logical calculation in patch 11. * Fix hypercall definition for non-x86 systems. Thanks, Wei
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 01 of 14 V3] amd iommu: Refactoring iommu ring buffer definition
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213576 -3600 # Node ID 9c9ddf2dd700119fdaf8a420fb051c22279853cc # Parent 5b2676ac13218951698c49fa0350f2ac48220f3d amd iommu: Refactoring iommu ring buffer definition. Introduce struct ring_buffer to represent iommu cmd buffer, event log and ppr log Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 5b2676ac1321 -r 9c9ddf2dd700 xen/drivers/passthrough/amd/iommu_cmd.c --- a/xen/drivers/passthrough/amd/iommu_cmd.c Mon Jan 09 16:01:44 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_cmd.c Tue Jan 10 17:39:36 2012 +0100 @@ -29,7 +29,7 @@ static int queue_iommu_command(struct am u32 tail, head, *cmd_buffer; int i; - tail = iommu->cmd_buffer_tail; + tail = iommu->cmd_buffer.tail; if ( ++tail == iommu->cmd_buffer.entries ) tail = 0; @@ -40,13 +40,13 @@ static int queue_iommu_command(struct am if ( head != tail ) { cmd_buffer = (u32 *)(iommu->cmd_buffer.buffer + - (iommu->cmd_buffer_tail * + (iommu->cmd_buffer.tail * IOMMU_CMD_BUFFER_ENTRY_SIZE)); for ( i = 0; i < IOMMU_CMD_BUFFER_U32_PER_ENTRY; i++ ) cmd_buffer[i] = cmd[i]; - iommu->cmd_buffer_tail = tail; + iommu->cmd_buffer.tail = tail; return 1; } @@ -57,7 +57,7 @@ static void commit_iommu_command_buffer( { u32 tail; - set_field_in_reg_u32(iommu->cmd_buffer_tail, 0, + set_field_in_reg_u32(iommu->cmd_buffer.tail, 0, IOMMU_CMD_BUFFER_TAIL_MASK, IOMMU_CMD_BUFFER_TAIL_SHIFT, &tail); writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET); diff -r 5b2676ac1321 -r 9c9ddf2dd700 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Mon Jan 09 16:01:44 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:36 2012 +0100 @@ -294,20 +294,20 @@ static int amd_iommu_read_event_log(stru IOMMU_EVENT_LOG_TAIL_MASK, IOMMU_EVENT_LOG_TAIL_SHIFT); - while ( tail != iommu->event_log_head ) + while ( tail != iommu->event_log.head ) { /* read event log entry */ event_log = (u32 *)(iommu->event_log.buffer + - (iommu->event_log_head * + (iommu->event_log.head * IOMMU_EVENT_LOG_ENTRY_SIZE)); parse_event_log_entry(iommu, event_log); - if ( ++iommu->event_log_head == iommu->event_log.entries ) - iommu->event_log_head = 0; + if ( ++iommu->event_log.head == iommu->event_log.entries ) + iommu->event_log.head = 0; /* update head pointer */ - set_field_in_reg_u32(iommu->event_log_head, 0, + set_field_in_reg_u32(iommu->event_log.head, 0, IOMMU_EVENT_LOG_HEAD_MASK, IOMMU_EVENT_LOG_HEAD_SHIFT, &head); writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET); @@ -346,7 +346,7 @@ static void amd_iommu_reset_event_log(st writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ - iommu->event_log_head = 0; + iommu->event_log.head = 0; set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); } @@ -605,71 +605,82 @@ static void enable_iommu(struct amd_iomm } -static void __init deallocate_iommu_table_struct( - struct table_struct *table) +static void __init deallocate_buffer(void *buf, uint32_t sz) { int order = 0; - if ( table->buffer ) + if ( buf ) { - order = get_order_from_bytes(table->alloc_size); - __free_amd_iommu_tables(table->buffer, order); - table->buffer = NULL; + order = get_order_from_bytes(sz); + __free_amd_iommu_tables(buf, order); } } -static int __init allocate_iommu_table_struct(struct table_struct *table, - const char *name) +static void __init deallocate_device_table(struct table_struct *table) { - int order = 0; - if ( table->buffer == NULL ) - { - order = get_order_from_bytes(table->alloc_size); - table->buffer = __alloc_amd_iommu_tables(order); - - if ( table->buffer == NULL ) - { - AMD_IOMMU_DEBUG("Error allocating %s\n", name); - return -ENOMEM; - } - memset(table->buffer, 0, PAGE_SIZE * (1UL << order)); - } - return 0; + deallocate_buffer(table->buffer, table->alloc_size); + table->buffer = NULL; } -static int __init allocate_cmd_buffer(struct amd_iommu *iommu) +static void __init deallocate_ring_buffer(struct ring_buffer *ring_buf) +{ + deallocate_buffer(ring_buf->buffer, ring_buf->alloc_size); + ring_buf->buffer = NULL; + ring_buf->head = 0; + ring_buf->tail = 0; +} + +static void * __init allocate_buffer(uint32_t alloc_size, const char *name) +{ + void * buffer; + int order = get_order_from_bytes(alloc_size); + + buffer = __alloc_amd_iommu_tables(order); + + if ( buffer == NULL ) + { + AMD_IOMMU_DEBUG("Error allocating %s\n", name); + return NULL; + } + + memset(buffer, 0, PAGE_SIZE * (1UL << order)); + return buffer; +} + +static void * __init allocate_ring_buffer(struct ring_buffer *ring_buf, + uint32_t entry_size, + uint64_t entries, const char *name) +{ + ring_buf->head = 0; + ring_buf->tail = 0; + + ring_buf->alloc_size = PAGE_SIZE << get_order_from_bytes(entries * + entry_size); + ring_buf->entries = ring_buf->alloc_size / entry_size; + ring_buf->buffer = allocate_buffer(ring_buf->alloc_size, name); + return ring_buf->buffer; +} + +static void * __init allocate_cmd_buffer(struct amd_iommu *iommu) { /* allocate ''command buffer'' in power of 2 increments of 4K */ - iommu->cmd_buffer_tail = 0; - iommu->cmd_buffer.alloc_size = PAGE_SIZE << - get_order_from_bytes( - PAGE_ALIGN(IOMMU_CMD_BUFFER_DEFAULT_ENTRIES - * IOMMU_CMD_BUFFER_ENTRY_SIZE)); - iommu->cmd_buffer.entries = iommu->cmd_buffer.alloc_size / - IOMMU_CMD_BUFFER_ENTRY_SIZE; - - return (allocate_iommu_table_struct(&iommu->cmd_buffer, "Command Buffer")); + return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t), + IOMMU_CMD_BUFFER_DEFAULT_ENTRIES, + "Command Buffer"); } -static int __init allocate_event_log(struct amd_iommu *iommu) +static void * __init allocate_event_log(struct amd_iommu *iommu) { - /* allocate ''event log'' in power of 2 increments of 4K */ - iommu->event_log_head = 0; - iommu->event_log.alloc_size = PAGE_SIZE << - get_order_from_bytes( - PAGE_ALIGN(IOMMU_EVENT_LOG_DEFAULT_ENTRIES * - IOMMU_EVENT_LOG_ENTRY_SIZE)); - iommu->event_log.entries = iommu->event_log.alloc_size / - IOMMU_EVENT_LOG_ENTRY_SIZE; - - return (allocate_iommu_table_struct(&iommu->event_log, "Event Log")); + /* allocate ''event log'' in power of 2 increments of 4K */ + return allocate_ring_buffer(&iommu->event_log, sizeof(event_entry_t), + IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log"); } static int __init amd_iommu_init_one(struct amd_iommu *iommu) { - if ( allocate_cmd_buffer(iommu) != 0 ) + if ( allocate_cmd_buffer(iommu) == NULL ) goto error_out; - if ( allocate_event_log(iommu) != 0 ) + if ( allocate_event_log(iommu) == NULL ) goto error_out; if ( map_iommu_mmio_region(iommu) != 0 ) @@ -708,8 +719,8 @@ static void __init amd_iommu_init_cleanu list_del(&iommu->list); if ( iommu->enabled ) { - deallocate_iommu_table_struct(&iommu->cmd_buffer); - deallocate_iommu_table_struct(&iommu->event_log); + deallocate_ring_buffer(&iommu->cmd_buffer); + deallocate_ring_buffer(&iommu->event_log); unmap_iommu_mmio_region(iommu); } xfree(iommu); @@ -719,7 +730,7 @@ static void __init amd_iommu_init_cleanu iterate_ivrs_entries(amd_iommu_free_intremap_table); /* free device table */ - deallocate_iommu_table_struct(&device_table); + deallocate_device_table(&device_table); /* free ivrs_mappings[] */ radix_tree_destroy(&ivrs_maps, xfree); @@ -830,8 +841,10 @@ static int __init amd_iommu_setup_device device_table.entries = device_table.alloc_size / IOMMU_DEV_TABLE_ENTRY_SIZE; - if ( allocate_iommu_table_struct(&device_table, "Device Table") != 0 ) - return -ENOMEM; + device_table.buffer = allocate_buffer(device_table.alloc_size, + "Device Table"); + if ( device_table.buffer == NULL ) + return -ENOMEM; /* Add device table entries */ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) diff -r 5b2676ac1321 -r 9c9ddf2dd700 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Mon Jan 09 16:01:44 2012 +0100 +++ b/xen/include/asm-x86/amd-iommu.h Tue Jan 10 17:39:36 2012 +0100 @@ -30,12 +30,42 @@ extern struct list_head amd_iommu_head; +#pragma pack(1) +typedef struct event_entry +{ + uint32_t data[4]; +} event_entry_t; + +typedef struct ppr_entry +{ + uint32_t data[4]; +} ppr_entry_t; + +typedef struct cmd_entry +{ + uint32_t data[4]; +} cmd_entry_t; + +typedef struct dev_entry +{ + uint32_t data[8]; +} dev_entry_t; +#pragma pack() + struct table_struct { void *buffer; unsigned long entries; unsigned long alloc_size; }; +struct ring_buffer { + void *buffer; + unsigned long entries; + unsigned long alloc_size; + uint32_t tail; + uint32_t head; +}; + typedef struct iommu_cap { uint32_t header; /* offset 00h */ uint32_t base_low; /* offset 04h */ @@ -60,10 +90,8 @@ struct amd_iommu { unsigned long mmio_base_phys; struct table_struct dev_table; - struct table_struct cmd_buffer; - u32 cmd_buffer_tail; - struct table_struct event_log; - u32 event_log_head; + struct ring_buffer cmd_buffer; + struct ring_buffer event_log; int exclusion_enable; int exclusion_allow_all;
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 02 of 14 V3] amd iommu: Introduces new helper functions to simplify bitwise operations
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213581 -3600 # Node ID dfdc0df7d68fa4551271b29671a2d333b185a48c # Parent 9c9ddf2dd700119fdaf8a420fb051c22279853cc amd iommu: Introduces new helper functions to simplify bitwise operations Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 9c9ddf2dd700 -r dfdc0df7d68f xen/drivers/passthrough/amd/iommu_cmd.c --- a/xen/drivers/passthrough/amd/iommu_cmd.c Tue Jan 10 17:39:36 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_cmd.c Tue Jan 10 17:39:41 2012 +0100 @@ -33,10 +33,8 @@ static int queue_iommu_command(struct am if ( ++tail == iommu->cmd_buffer.entries ) tail = 0; - head = get_field_from_reg_u32(readl(iommu->mmio_base + - IOMMU_CMD_BUFFER_HEAD_OFFSET), - IOMMU_CMD_BUFFER_HEAD_MASK, - IOMMU_CMD_BUFFER_HEAD_SHIFT); + head = iommu_get_rb_pointer(readl(iommu->mmio_base + + IOMMU_CMD_BUFFER_HEAD_OFFSET)); if ( head != tail ) { cmd_buffer = (u32 *)(iommu->cmd_buffer.buffer + @@ -55,11 +53,9 @@ static int queue_iommu_command(struct am static void commit_iommu_command_buffer(struct amd_iommu *iommu) { - u32 tail; + u32 tail = 0; - set_field_in_reg_u32(iommu->cmd_buffer.tail, 0, - IOMMU_CMD_BUFFER_TAIL_MASK, - IOMMU_CMD_BUFFER_TAIL_SHIFT, &tail); + iommu_set_rb_pointer(&tail, iommu->cmd_buffer.tail); writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET); } diff -r 9c9ddf2dd700 -r dfdc0df7d68f xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:36 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:41 2012 +0100 @@ -106,21 +106,21 @@ static void register_iommu_dev_table_in_ u64 addr_64, addr_lo, addr_hi; u32 entry; + ASSERT( iommu->dev_table.buffer ); + addr_64 = (u64)virt_to_maddr(iommu->dev_table.buffer); addr_lo = addr_64 & DMA_32BIT_MASK; addr_hi = addr_64 >> 32; - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_DEV_TABLE_BASE_LOW_MASK, - IOMMU_DEV_TABLE_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); set_field_in_reg_u32((iommu->dev_table.alloc_size / PAGE_SIZE) - 1, entry, IOMMU_DEV_TABLE_SIZE_MASK, IOMMU_DEV_TABLE_SIZE_SHIFT, &entry); writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_LOW_OFFSET); - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_DEV_TABLE_BASE_HIGH_MASK, - IOMMU_DEV_TABLE_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_HIGH_OFFSET); } @@ -130,21 +130,21 @@ static void register_iommu_cmd_buffer_in u32 power_of2_entries; u32 entry; + ASSERT( iommu->cmd_buffer.buffer ); + addr_64 = (u64)virt_to_maddr(iommu->cmd_buffer.buffer); addr_lo = addr_64 & DMA_32BIT_MASK; addr_hi = addr_64 >> 32; - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_CMD_BUFFER_BASE_LOW_MASK, - IOMMU_CMD_BUFFER_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); writel(entry, iommu->mmio_base + IOMMU_CMD_BUFFER_BASE_LOW_OFFSET); power_of2_entries = get_order_from_bytes(iommu->cmd_buffer.alloc_size) + IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE; - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_CMD_BUFFER_BASE_HIGH_MASK, - IOMMU_CMD_BUFFER_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); set_field_in_reg_u32(power_of2_entries, entry, IOMMU_CMD_BUFFER_LENGTH_MASK, IOMMU_CMD_BUFFER_LENGTH_SHIFT, &entry); @@ -157,21 +157,21 @@ static void register_iommu_event_log_in_ u32 power_of2_entries; u32 entry; + ASSERT( iommu->event_log.buffer ); + addr_64 = (u64)virt_to_maddr(iommu->event_log.buffer); addr_lo = addr_64 & DMA_32BIT_MASK; addr_hi = addr_64 >> 32; - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_EVENT_LOG_BASE_LOW_MASK, - IOMMU_EVENT_LOG_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); writel(entry, iommu->mmio_base + IOMMU_EVENT_LOG_BASE_LOW_OFFSET); power_of2_entries = get_order_from_bytes(iommu->event_log.alloc_size) + IOMMU_EVENT_LOG_POWER_OF2_ENTRIES_PER_PAGE; - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_EVENT_LOG_BASE_HIGH_MASK, - IOMMU_EVENT_LOG_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); set_field_in_reg_u32(power_of2_entries, entry, IOMMU_EVENT_LOG_LENGTH_MASK, IOMMU_EVENT_LOG_LENGTH_SHIFT, &entry); @@ -234,14 +234,12 @@ static void register_iommu_exclusion_ran addr_lo = iommu->exclusion_base & DMA_32BIT_MASK; addr_hi = iommu->exclusion_base >> 32; - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_EXCLUSION_BASE_HIGH_MASK, - IOMMU_EXCLUSION_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_HIGH_OFFSET); - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_EXCLUSION_BASE_LOW_MASK, - IOMMU_EXCLUSION_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); set_field_in_reg_u32(iommu->exclusion_allow_all, entry, IOMMU_EXCLUSION_ALLOW_ALL_MASK, @@ -490,9 +488,7 @@ static void parse_event_log_entry(struct if ( code == IOMMU_EVENT_IO_PAGE_FAULT ) { - device_id = get_field_from_reg_u32(entry[0], - IOMMU_EVENT_DEVICE_ID_MASK, - IOMMU_EVENT_DEVICE_ID_SHIFT); + device_id = iommu_get_devid_from_event(entry[0]); domain_id = get_field_from_reg_u32(entry[1], IOMMU_EVENT_DOMAIN_ID_MASK, IOMMU_EVENT_DOMAIN_ID_SHIFT); diff -r 9c9ddf2dd700 -r dfdc0df7d68f xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Jan 10 17:39:36 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Jan 10 17:39:41 2012 +0100 @@ -82,10 +82,6 @@ /* Device Table */ #define IOMMU_DEV_TABLE_BASE_LOW_OFFSET 0x00 #define IOMMU_DEV_TABLE_BASE_HIGH_OFFSET 0x04 -#define IOMMU_DEV_TABLE_BASE_LOW_MASK 0xFFFFF000 -#define IOMMU_DEV_TABLE_BASE_LOW_SHIFT 12 -#define IOMMU_DEV_TABLE_BASE_HIGH_MASK 0x000FFFFF -#define IOMMU_DEV_TABLE_BASE_HIGH_SHIFT 0 #define IOMMU_DEV_TABLE_SIZE_MASK 0x000001FF #define IOMMU_DEV_TABLE_SIZE_SHIFT 0 @@ -164,22 +160,13 @@ #define IOMMU_DEV_TABLE_INT_CONTROL_MASK 0x30000000 #define IOMMU_DEV_TABLE_INT_CONTROL_SHIFT 28 - /* Command Buffer */ #define IOMMU_CMD_BUFFER_BASE_LOW_OFFSET 0x08 #define IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET 0x0C #define IOMMU_CMD_BUFFER_HEAD_OFFSET 0x2000 #define IOMMU_CMD_BUFFER_TAIL_OFFSET 0x2008 -#define IOMMU_CMD_BUFFER_BASE_LOW_MASK 0xFFFFF000 -#define IOMMU_CMD_BUFFER_BASE_LOW_SHIFT 12 -#define IOMMU_CMD_BUFFER_BASE_HIGH_MASK 0x000FFFFF -#define IOMMU_CMD_BUFFER_BASE_HIGH_SHIFT 0 #define IOMMU_CMD_BUFFER_LENGTH_MASK 0x0F000000 #define IOMMU_CMD_BUFFER_LENGTH_SHIFT 24 -#define IOMMU_CMD_BUFFER_HEAD_MASK 0x0007FFF0 -#define IOMMU_CMD_BUFFER_HEAD_SHIFT 4 -#define IOMMU_CMD_BUFFER_TAIL_MASK 0x0007FFF0 -#define IOMMU_CMD_BUFFER_TAIL_SHIFT 4 #define IOMMU_CMD_BUFFER_ENTRY_SIZE 16 #define IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE 8 @@ -251,10 +238,6 @@ #define IOMMU_EVENT_LOG_BASE_HIGH_OFFSET 0x14 #define IOMMU_EVENT_LOG_HEAD_OFFSET 0x2010 #define IOMMU_EVENT_LOG_TAIL_OFFSET 0x2018 -#define IOMMU_EVENT_LOG_BASE_LOW_MASK 0xFFFFF000 -#define IOMMU_EVENT_LOG_BASE_LOW_SHIFT 12 -#define IOMMU_EVENT_LOG_BASE_HIGH_MASK 0x000FFFFF -#define IOMMU_EVENT_LOG_BASE_HIGH_SHIFT 0 #define IOMMU_EVENT_LOG_LENGTH_MASK 0x0F000000 #define IOMMU_EVENT_LOG_LENGTH_SHIFT 24 #define IOMMU_EVENT_LOG_HEAD_MASK 0x0007FFF0 @@ -440,4 +423,20 @@ #define INV_IOMMU_ALL_PAGES_ADDRESS ((1ULL << 63) - 1) +#define IOMMU_RING_BUFFER_PTR_MASK 0x0007FFF0 +#define IOMMU_RING_BUFFER_PTR_SHIFT 4 + +#define IOMMU_CMD_DEVICE_ID_MASK 0x0000FFFF +#define IOMMU_CMD_DEVICE_ID_SHIFT 0 + +#define IOMMU_CMD_ADDR_LOW_MASK 0xFFFFF000 +#define IOMMU_CMD_ADDR_LOW_SHIFT 12 +#define IOMMU_CMD_ADDR_HIGH_MASK 0xFFFFFFFF +#define IOMMU_CMD_ADDR_HIGH_SHIFT 0 + +#define IOMMU_REG_BASE_ADDR_LOW_MASK 0xFFFFF000 +#define IOMMU_REG_BASE_ADDR_LOW_SHIFT 12 +#define IOMMU_REG_BASE_ADDR_HIGH_MASK 0x000FFFFF +#define IOMMU_REG_BASE_ADDR_HIGH_SHIFT 0 + #endif /* _ASM_X86_64_AMD_IOMMU_DEFS_H */ diff -r 9c9ddf2dd700 -r dfdc0df7d68f xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Jan 10 17:39:36 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Jan 10 17:39:41 2012 +0100 @@ -192,4 +192,71 @@ static inline int iommu_has_feature(stru return !!(iommu->features & (1U << bit)); } +/* access tail or head pointer of ring buffer */ +static inline uint32_t iommu_get_rb_pointer(uint32_t reg) +{ + return get_field_from_reg_u32(reg, IOMMU_RING_BUFFER_PTR_MASK, + IOMMU_RING_BUFFER_PTR_SHIFT); +} + +static inline void iommu_set_rb_pointer(uint32_t *reg, uint32_t val) +{ + set_field_in_reg_u32(val, *reg, IOMMU_RING_BUFFER_PTR_MASK, + IOMMU_RING_BUFFER_PTR_SHIFT, reg); +} + +/* access device id field from iommu cmd */ +static inline uint16_t iommu_get_devid_from_cmd(uint32_t cmd) +{ + return get_field_from_reg_u32(cmd, IOMMU_CMD_DEVICE_ID_MASK, + IOMMU_CMD_DEVICE_ID_SHIFT); +} + +static inline void iommu_set_devid_to_cmd(uint32_t *cmd, uint16_t id) +{ + set_field_in_reg_u32(id, *cmd, IOMMU_CMD_DEVICE_ID_MASK, + IOMMU_CMD_DEVICE_ID_SHIFT, cmd); +} + +/* access address field from iommu cmd */ +static inline uint32_t iommu_get_addr_lo_from_cmd(uint32_t cmd) +{ + return get_field_from_reg_u32(cmd, IOMMU_CMD_ADDR_LOW_MASK, + IOMMU_CMD_ADDR_LOW_SHIFT); +} + +static inline uint32_t iommu_get_addr_hi_from_cmd(uint32_t cmd) +{ + return get_field_from_reg_u32(cmd, IOMMU_CMD_ADDR_LOW_MASK, + IOMMU_CMD_ADDR_HIGH_SHIFT); +} + +/* access address field from event log entry */ +#define iommu_get_devid_from_event iommu_get_devid_from_cmd + +/* access iommu base addresses field from mmio regs */ +static inline void iommu_set_addr_lo_to_reg(uint32_t *reg, uint32_t addr) +{ + set_field_in_reg_u32(addr, *reg, IOMMU_REG_BASE_ADDR_LOW_MASK, + IOMMU_REG_BASE_ADDR_LOW_SHIFT, reg); +} + +static inline void iommu_set_addr_hi_to_reg(uint32_t *reg, uint32_t addr) +{ + set_field_in_reg_u32(addr, *reg, IOMMU_REG_BASE_ADDR_HIGH_MASK, + IOMMU_REG_BASE_ADDR_HIGH_SHIFT, reg); +} + +static inline uint32_t iommu_get_addr_lo_from_reg(uint32_t reg) +{ + return get_field_from_reg_u32(reg, IOMMU_REG_BASE_ADDR_LOW_MASK, + IOMMU_REG_BASE_ADDR_LOW_SHIFT); +} + +static inline uint32_t iommu_get_addr_hi_from_reg(uint32_t reg) +{ + return get_field_from_reg_u32(reg, IOMMU_REG_BASE_ADDR_HIGH_MASK, + IOMMU_REG_BASE_ADDR_HIGH_SHIFT); +} + #endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 03 of 14 V3] amd iommu: Add iommu emulation for hvm guest
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213585 -3600 # Node ID 6789e0d335e67e700f97942dd094c548fbbd80f3 # Parent dfdc0df7d68fa4551271b29671a2d333b185a48c amd iommu: Add iommu emulation for hvm guest ATS device driver that support PASID [1] and PRI [2] capabilites needs to work with iommu driver in guest OS. We have to expose iommu functionality to HVM guest, if we want assign ATS device to it. A new hypervisor mmio handler is added to intercept iommu mmio accesses from guest. Signed-off-by: Wei Wang <wei.wang2@amd.com> [1] http://www.pcisig.com/specifications/pciexpress/specifications/ECN-PASID-ATS-2011-03-31.pdf [2] http://www.pcisig.com/members/downloads/specifications/iov/ats_r1.1_26Jan09.pdf diff -r dfdc0df7d68f -r 6789e0d335e6 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/arch/x86/hvm/intercept.c Tue Jan 10 17:39:45 2012 +0100 @@ -38,7 +38,8 @@ hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] &hpet_mmio_handler, &vlapic_mmio_handler, &vioapic_mmio_handler, - &msixtbl_mmio_handler + &msixtbl_mmio_handler, + &iommu_mmio_handler }; static int hvm_mmio_access(struct vcpu *v, diff -r dfdc0df7d68f -r 6789e0d335e6 xen/drivers/passthrough/amd/Makefile --- a/xen/drivers/passthrough/amd/Makefile Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/drivers/passthrough/amd/Makefile Tue Jan 10 17:39:45 2012 +0100 @@ -5,3 +5,4 @@ obj-y += pci_amd_iommu.o obj-bin-y += iommu_acpi.init.o obj-y += iommu_intr.o obj-y += iommu_cmd.o +obj-y += iommu_guest.o diff -r dfdc0df7d68f -r 6789e0d335e6 xen/drivers/passthrough/amd/iommu_cmd.c --- a/xen/drivers/passthrough/amd/iommu_cmd.c Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_cmd.c Tue Jan 10 17:39:45 2012 +0100 @@ -398,3 +398,15 @@ void amd_iommu_flush_all_caches(struct a invalidate_iommu_all(iommu); flush_command_buffer(iommu); } + +void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[]) +{ + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + send_iommu_command(iommu, cmd); + flush_command_buffer(iommu); + + spin_unlock_irqrestore(&iommu->lock, flags); +} diff -r dfdc0df7d68f -r 6789e0d335e6 xen/drivers/passthrough/amd/iommu_guest.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/amd/iommu_guest.c Tue Jan 10 17:39:45 2012 +0100 @@ -0,0 +1,915 @@ +/* + * Copyright (C) 2011 Advanced Micro Devices, Inc. + * Author: Wei Wang <wei.wang2@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <xen/sched.h> +#include <asm/p2m.h> +#include <asm/hvm/iommu.h> +#include <asm/amd-iommu.h> +#include <asm/hvm/svm/amd-iommu-proto.h> + + +#define IOMMU_MMIO_SIZE 0x8000 +#define IOMMU_MMIO_PAGE_NR 0x8 +#define RING_BF_LENGTH_MASK 0x0F000000 +#define RING_BF_LENGTH_SHIFT 24 + +#define PASMAX_9_bit 0x8 +#define GUEST_CR3_1_LEVEL 0x0 +#define GUEST_ADDRESS_SIZE_6_LEVEL 0x2 +#define HOST_ADDRESS_SIZE_6_LEVEL 0x2 + +#define guest_iommu_set_status(iommu, bit) \ + iommu_set_bit(&((iommu)->reg_status.lo), bit) + +#define guest_iommu_clear_status(iommu, bit) \ + iommu_clear_bit(&((iommu)->reg_status.lo), bit) + +#define reg_to_u64(reg) (((uint64_t)reg.hi << 32) | reg.lo ) +#define u64_to_reg(reg, val) \ + do \ + { \ + (reg)->lo = val & 0xFFFFFFFF; \ + (reg)->hi = (val >> 32) & 0xFFFFFFFF; \ + } while(0) + +static unsigned int machine_bdf(struct domain *d, uint16_t guest_bdf) +{ + return guest_bdf; +} + +static uint16_t guest_bdf(struct domain *d, uint16_t machine_bdf) +{ + return machine_bdf; +} + +static inline struct guest_iommu *domain_iommu(struct domain *d) +{ + return domain_hvm_iommu(d)->g_iommu; +} + +static inline struct guest_iommu *vcpu_iommu(struct vcpu *v) +{ + return domain_hvm_iommu(v->domain)->g_iommu; +} + +static void guest_iommu_enable(struct guest_iommu *iommu) +{ + iommu->enabled = 1; +} + +static void guest_iommu_disable(struct guest_iommu *iommu) +{ + iommu->enabled = 0; +} + +static uint64_t get_guest_cr3_from_dte(dev_entry_t *dte) +{ + uint64_t gcr3_1, gcr3_2, gcr3_3; + + gcr3_1 = get_field_from_reg_u32(dte->data[1], + IOMMU_DEV_TABLE_GCR3_1_MASK, + IOMMU_DEV_TABLE_GCR3_1_SHIFT); + gcr3_2 = get_field_from_reg_u32(dte->data[2], + IOMMU_DEV_TABLE_GCR3_2_MASK, + IOMMU_DEV_TABLE_GCR3_2_SHIFT); + gcr3_3 = get_field_from_reg_u32(dte->data[3], + IOMMU_DEV_TABLE_GCR3_3_MASK, + IOMMU_DEV_TABLE_GCR3_3_SHIFT); + + return ((gcr3_3 << 31) | (gcr3_2 << 15 ) | (gcr3_1 << 12)) >> PAGE_SHIFT; +} + +static uint16_t get_domid_from_dte(dev_entry_t *dte) +{ + return get_field_from_reg_u32(dte->data[2], IOMMU_DEV_TABLE_DOMAIN_ID_MASK, + IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT); +} + +static uint16_t get_glx_from_dte(dev_entry_t *dte) +{ + return get_field_from_reg_u32(dte->data[1], IOMMU_DEV_TABLE_GLX_MASK, + IOMMU_DEV_TABLE_GLX_SHIFT); +} + +static uint16_t get_gv_from_dte(dev_entry_t *dte) +{ + return get_field_from_reg_u32(dte->data[1],IOMMU_DEV_TABLE_GV_MASK, + IOMMU_DEV_TABLE_GV_SHIFT); +} + +static unsigned int host_domid(struct domain *d, uint64_t g_domid) +{ + /* Only support one PPR device in guest for now */ + return d->domain_id; +} + +static unsigned long get_gfn_from_base_reg(uint64_t base_raw) +{ + uint64_t addr_lo, addr_hi, addr64; + + addr_lo = iommu_get_addr_lo_from_reg(base_raw & DMA_32BIT_MASK); + addr_hi = iommu_get_addr_hi_from_reg(base_raw >> 32); + addr64 = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); + + ASSERT ( addr64 != 0 ); + + return addr64 >> PAGE_SHIFT; +} + +static void guest_iommu_deliver_msi(struct domain *d) +{ + uint8_t vector, dest, dest_mode, delivery_mode, trig_mode; + struct guest_iommu *iommu = domain_iommu(d); + + vector = iommu->msi.vector; + dest = iommu->msi.dest; + dest_mode = iommu->msi.dest_mode; + delivery_mode = iommu->msi.delivery_mode; + trig_mode = iommu->msi.trig_mode; + + vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode); +} + +static unsigned long guest_iommu_get_table_mfn(struct domain *d, + uint64_t base_raw, + unsigned int entry_size, + unsigned int pos) +{ + unsigned long idx, gfn, mfn; + p2m_type_t p2mt; + + gfn = get_gfn_from_base_reg(base_raw); + idx = (pos * entry_size) >> PAGE_SHIFT; + + mfn = mfn_x(get_gfn(d, gfn + idx, &p2mt)); + put_gfn(d, gfn); + + return mfn; +} + +static void guest_iommu_enable_dev_table(struct guest_iommu *iommu) +{ + uint32_t length_raw = get_field_from_reg_u32(iommu->dev_table.reg_base.lo, + IOMMU_DEV_TABLE_SIZE_MASK, + IOMMU_DEV_TABLE_SIZE_SHIFT); + iommu->dev_table.size = (length_raw + 1) * PAGE_SIZE; +} + +static void guest_iommu_enable_ring_buffer(struct guest_iommu *iommu, + struct guest_buffer *buffer, + uint32_t entry_size) +{ + uint32_t length_raw = get_field_from_reg_u32(buffer->reg_base.hi, + RING_BF_LENGTH_MASK, + RING_BF_LENGTH_SHIFT); + buffer->entries = 1 << length_raw; +} + +void guest_iommu_add_ppr_log(struct domain *d, u32 entry[]) +{ + uint16_t gdev_id; + unsigned long mfn, tail, head; + ppr_entry_t *log, *log_base; + struct guest_iommu *iommu; + + iommu = domain_iommu(d); + tail = iommu_get_rb_pointer(iommu->ppr_log.reg_tail.lo); + head = iommu_get_rb_pointer(iommu->ppr_log.reg_head.lo); + + if ( tail >= iommu->ppr_log.entries || head >= iommu->ppr_log.entries ) + { + AMD_IOMMU_DEBUG("Error: guest iommu ppr log overflows\n"); + guest_iommu_disable(iommu); + return; + } + + mfn = guest_iommu_get_table_mfn(d, reg_to_u64(iommu->ppr_log.reg_base), + sizeof(ppr_entry_t), tail); + ASSERT(mfn_valid(mfn)); + + log_base = map_domain_page(mfn); + log = log_base + tail % (PAGE_SIZE / sizeof(ppr_entry_t)); + + /* Convert physical device id back into virtual device id */ + gdev_id = guest_bdf(d, iommu_get_devid_from_cmd(entry[0])); + iommu_set_devid_to_cmd(&entry[0], gdev_id); + + memcpy(log, entry, sizeof(ppr_entry_t)); + + /* Now shift ppr log tail pointer */ + if ( (++tail) >= iommu->ppr_log.entries ) + { + tail = 0; + guest_iommu_set_status(iommu, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT); + } + iommu_set_rb_pointer(&iommu->ppr_log.reg_tail.lo, tail); + unmap_domain_page(log_base); + + guest_iommu_deliver_msi(d); +} + +void guest_iommu_add_event_log(struct domain *d, u32 entry[]) +{ + uint16_t dev_id; + unsigned long mfn, tail, head; + event_entry_t *log, *log_base; + struct guest_iommu *iommu; + + iommu = domain_iommu(d); + tail = iommu_get_rb_pointer(iommu->event_log.reg_tail.lo); + head = iommu_get_rb_pointer(iommu->event_log.reg_head.lo); + + if ( tail >= iommu->event_log.entries || head >= iommu->event_log.entries ) + { + AMD_IOMMU_DEBUG("Error: guest iommu event overflows\n"); + guest_iommu_disable(iommu); + return; + } + + mfn = guest_iommu_get_table_mfn(d, reg_to_u64(iommu->event_log.reg_base), + sizeof(event_entry_t), tail); + ASSERT(mfn_valid(mfn)); + + log_base = map_domain_page(mfn); + log = log_base + tail % (PAGE_SIZE / sizeof(event_entry_t)); + + /* re-write physical device id into virtual device id */ + dev_id = guest_bdf(d, iommu_get_devid_from_cmd(entry[0])); + iommu_set_devid_to_cmd(&entry[0], dev_id); + memcpy(log, entry, sizeof(event_entry_t)); + + /* Now shift event log tail pointer */ + if ( (++tail) >= iommu->event_log.entries ) + { + tail = 0; + guest_iommu_set_status(iommu, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT); + } + + iommu_set_rb_pointer(&iommu->event_log.reg_tail.lo, tail); + unmap_domain_page(log_base); + + guest_iommu_deliver_msi(d); +} + +static int do_complete_ppr_request(struct domain *d, cmd_entry_t *cmd) +{ + uint16_t dev_id; + struct amd_iommu *iommu; + + dev_id = machine_bdf(d, iommu_get_devid_from_cmd(cmd->data[0])); + iommu = find_iommu_for_device(0, dev_id); + + if ( !iommu ) + { + AMD_IOMMU_DEBUG("%s Fail to find iommu for bdf %x\n", + __func__, dev_id); + return -ENODEV; + } + + /* replace virtual device id into physical */ + iommu_set_devid_to_cmd(&cmd->data[0], dev_id); + amd_iommu_send_guest_cmd(iommu, cmd->data); + + return 0; +} + +static int do_invalidate_pages(struct domain *d, cmd_entry_t *cmd) +{ + uint16_t gdom_id, hdom_id; + struct amd_iommu *iommu = NULL; + + gdom_id = get_field_from_reg_u32(cmd->data[1], + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_MASK, + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_SHIFT); + + hdom_id = host_domid(d, gdom_id); + set_field_in_reg_u32(hdom_id, cmd->data[1], + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_MASK, + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_SHIFT, &cmd->data[1]); + + for_each_amd_iommu ( iommu ) + amd_iommu_send_guest_cmd(iommu, cmd->data); + + return 0; +} + +static int do_invalidate_all(struct domain *d, cmd_entry_t *cmd) +{ + struct amd_iommu *iommu = NULL; + + for_each_amd_iommu ( iommu ) + amd_iommu_flush_all_pages(d); + + return 0; +} + +static int do_invalidate_iotlb_pages(struct domain *d, cmd_entry_t *cmd) +{ + struct amd_iommu *iommu; + uint16_t dev_id; + + dev_id = machine_bdf(d, iommu_get_devid_from_cmd(cmd->data[0])); + + iommu = find_iommu_for_device(0, dev_id); + if ( !iommu ) + { + AMD_IOMMU_DEBUG("%s Fail to find iommu for bdf %x\n", + __func__, dev_id); + return -ENODEV; + } + + iommu_set_devid_to_cmd(&cmd->data[0], dev_id); + amd_iommu_send_guest_cmd(iommu, cmd->data); + + return 0; +} + +static int do_completion_wait(struct domain *d, cmd_entry_t *cmd) +{ + bool_t com_wait_int_en, com_wait_int, i, s; + struct guest_iommu *iommu; + unsigned long gfn; + p2m_type_t p2mt; + + iommu = domain_iommu(d); + + i = iommu_get_bit(cmd->data[0], IOMMU_COMP_WAIT_I_FLAG_SHIFT); + s = iommu_get_bit(cmd->data[0], IOMMU_COMP_WAIT_S_FLAG_SHIFT); + + if ( i ) + guest_iommu_set_status(iommu, IOMMU_STATUS_COMP_WAIT_INT_SHIFT); + + if ( s ) + { + uint64_t gaddr_lo, gaddr_hi, gaddr_64, data; + void *vaddr; + + data = (uint64_t) cmd->data[3] << 32 | cmd->data[2]; + gaddr_lo = get_field_from_reg_u32(cmd->data[0], + IOMMU_COMP_WAIT_ADDR_LOW_MASK, + IOMMU_COMP_WAIT_ADDR_LOW_SHIFT); + gaddr_hi = get_field_from_reg_u32(cmd->data[1], + IOMMU_COMP_WAIT_ADDR_HIGH_MASK, + IOMMU_COMP_WAIT_ADDR_HIGH_SHIFT); + + gaddr_64 = (gaddr_hi << 32) | (gaddr_lo << 3); + + gfn = gaddr_64 >> PAGE_SHIFT; + vaddr = map_domain_page(mfn_x(get_gfn(d, gfn ,&p2mt))); + put_gfn(d, gfn); + + write_u64_atomic((uint64_t*)(vaddr + (gaddr_64 & (PAGE_SIZE-1))), data); + unmap_domain_page(vaddr); + } + + com_wait_int_en = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_COMP_WAIT_INT_SHIFT); + com_wait_int = iommu_get_bit(iommu->reg_status.lo, + IOMMU_STATUS_COMP_WAIT_INT_SHIFT); + + if ( com_wait_int_en && com_wait_int ) + guest_iommu_deliver_msi(d); + + return 0; +} + +static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) +{ + uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id; + dev_entry_t *gdte, *mdte, *dte_base; + struct amd_iommu *iommu = NULL; + struct guest_iommu *g_iommu; + uint64_t gcr3_gfn, gcr3_mfn; + uint8_t glx, gv; + unsigned long dte_mfn, flags; + p2m_type_t p2mt; + + g_iommu = domain_iommu(d); + gbdf = iommu_get_devid_from_cmd(cmd->data[0]); + mbdf = machine_bdf(d, gbdf); + + /* Guest can only update DTEs for its passthru devices */ + if ( mbdf == 0 || gbdf == 0 ) + return 0; + + /* Sometimes guest invalidates devices from non-exists dtes */ + if ( (gbdf * sizeof(dev_entry_t)) > g_iommu->dev_table.size ) + return 0; + + dte_mfn = guest_iommu_get_table_mfn(d, + reg_to_u64(g_iommu->dev_table.reg_base), + sizeof(dev_entry_t), gbdf); + ASSERT(mfn_valid(dte_mfn)); + + dte_base = map_domain_page(dte_mfn); + + gdte = dte_base + gbdf % (PAGE_SIZE / sizeof(dev_entry_t)); + + gdom_id = get_domid_from_dte(gdte); + gcr3_gfn = get_guest_cr3_from_dte(gdte); + + /* Do not update host dte before gcr3 has been set */ + if ( gcr3_gfn == 0 ) + return 0; + + gcr3_mfn = mfn_x(get_gfn(d, gcr3_gfn, &p2mt)); + put_gfn(d, gcr3_gfn); + + ASSERT(mfn_valid(gcr3_mfn)); + + /* Read guest dte information */ + iommu = find_iommu_for_device(0, mbdf); + if ( !iommu ) + { + AMD_IOMMU_DEBUG("%s Fail to find iommu!\n",__func__); + return -ENODEV; + } + + glx = get_glx_from_dte(gdte); + gv = get_gv_from_dte(gdte); + + unmap_domain_page(dte_base); + + /* Setup host device entry */ + hdom_id = host_domid(d, gdom_id); + req_id = get_dma_requestor_id(iommu->seg, mbdf); + mdte = iommu->dev_table.buffer + (req_id * sizeof(dev_entry_t)); + + spin_lock_irqsave(&iommu->lock, flags); + iommu_dte_set_guest_cr3((u32*)mdte, hdom_id, + gcr3_mfn << PAGE_SHIFT, gv, glx); + + amd_iommu_flush_device(iommu, req_id); + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static void guest_iommu_process_command(unsigned long _d) +{ + unsigned long opcode, tail, head, entries_per_page, cmd_mfn; + cmd_entry_t *cmd, *cmd_base; + struct domain *d; + struct guest_iommu *iommu; + + d = (struct domain*) _d; + iommu = domain_iommu(d); + + if ( !iommu->enabled ) + return; + + head = iommu_get_rb_pointer(iommu->cmd_buffer.reg_head.lo); + tail = iommu_get_rb_pointer(iommu->cmd_buffer.reg_tail.lo); + + /* Tail pointer is rolled over by guest driver, value outside + * cmd_buffer_entries cause iommu disabled + */ + + if ( tail >= iommu->cmd_buffer.entries || + head >= iommu->cmd_buffer.entries ) + { + AMD_IOMMU_DEBUG("Error: guest iommu cmd buffer overflows\n"); + guest_iommu_disable(iommu); + return; + } + + entries_per_page = PAGE_SIZE / sizeof(cmd_entry_t); + + while ( head != tail ) + { + int ret = 0; + + cmd_mfn = guest_iommu_get_table_mfn(d, + reg_to_u64(iommu->cmd_buffer.reg_base), + sizeof(cmd_entry_t), head); + ASSERT(mfn_valid(cmd_mfn)); + + cmd_base = map_domain_page(cmd_mfn); + cmd = cmd_base + head % entries_per_page; + + opcode = get_field_from_reg_u32(cmd->data[1], + IOMMU_CMD_OPCODE_MASK, + IOMMU_CMD_OPCODE_SHIFT); + switch ( opcode ) + { + case IOMMU_CMD_COMPLETION_WAIT: + ret = do_completion_wait(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_DEVTAB_ENTRY: + ret = do_invalidate_dte(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_IOMMU_PAGES: + ret = do_invalidate_pages(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_IOTLB_PAGES: + ret = do_invalidate_iotlb_pages(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_INT_TABLE: + break; + case IOMMU_CMD_COMPLETE_PPR_REQUEST: + ret = do_complete_ppr_request(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_IOMMU_ALL: + ret = do_invalidate_all(d, cmd); + break; + default: + AMD_IOMMU_DEBUG("CMD: Unknown command cmd_type = %lx " + "head = %ld\n", opcode, head); + break; + } + + unmap_domain_page(cmd_base); + if ( (++head) >= iommu->cmd_buffer.entries ) + head = 0; + if ( ret ) + guest_iommu_disable(iommu); + } + + /* Now shift cmd buffer head pointer */ + iommu_set_rb_pointer(&iommu->cmd_buffer.reg_head.lo, head); + return; +} + +static int guest_iommu_write_ctrl(struct guest_iommu *iommu, uint64_t newctrl) +{ + bool_t cmd_en, event_en, iommu_en, ppr_en, ppr_log_en; + bool_t cmd_en_old, event_en_old, iommu_en_old; + bool_t cmd_run; + + iommu_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT); + iommu_en_old = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT); + + cmd_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT); + cmd_en_old = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT); + cmd_run = iommu_get_bit(iommu->reg_status.lo, + + IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT); + event_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT); + event_en_old = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT); + + ppr_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_PPR_ENABLE_SHIFT); + ppr_log_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); + + if ( iommu_en ) + { + guest_iommu_enable(iommu); + guest_iommu_enable_dev_table(iommu); + } + + if ( iommu_en && cmd_en ) + { + guest_iommu_enable_ring_buffer(iommu, &iommu->cmd_buffer, + sizeof(cmd_entry_t)); + /* Enable iommu command processing */ + tasklet_schedule(&iommu->cmd_buffer_tasklet); + } + + if ( iommu_en && event_en ) + { + guest_iommu_enable_ring_buffer(iommu, &iommu->event_log, + sizeof(event_entry_t)); + guest_iommu_set_status(iommu, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT); + guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT); + } + + if ( iommu_en && ppr_en && ppr_log_en ) + { + guest_iommu_enable_ring_buffer(iommu, &iommu->ppr_log, + sizeof(ppr_entry_t)); + guest_iommu_set_status(iommu, IOMMU_STATUS_PPR_LOG_RUN_SHIFT); + guest_iommu_clear_status(iommu, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT); + } + + if ( iommu_en && cmd_en_old && !cmd_en ) + { + /* Disable iommu command processing */ + tasklet_kill(&iommu->cmd_buffer_tasklet); + } + + if ( event_en_old && !event_en ) + { + guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT); + } + + if ( !iommu_en && iommu_en_old ) + { + guest_iommu_disable(iommu); + } + + u64_to_reg(&iommu->reg_ctrl, newctrl); + return 0; +} + +static uint64_t iommu_mmio_read64(struct guest_iommu *iommu, + unsigned long offset) +{ + uint64_t val; + + switch ( offset ) + { + case IOMMU_DEV_TABLE_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->dev_table.reg_base); + break; + case IOMMU_CMD_BUFFER_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->cmd_buffer.reg_base); + break; + case IOMMU_EVENT_LOG_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->event_log.reg_base); + break; + case IOMMU_PPR_LOG_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->ppr_log.reg_base); + break; + case IOMMU_CMD_BUFFER_HEAD_OFFSET: + val = reg_to_u64(iommu->cmd_buffer.reg_head); + break; + case IOMMU_CMD_BUFFER_TAIL_OFFSET: + val = reg_to_u64(iommu->cmd_buffer.reg_tail); + break; + case IOMMU_EVENT_LOG_HEAD_OFFSET:; + val = reg_to_u64(iommu->event_log.reg_head); + break; + case IOMMU_EVENT_LOG_TAIL_OFFSET: + val = reg_to_u64(iommu->event_log.reg_tail); + break; + case IOMMU_PPR_LOG_HEAD_OFFSET: + val = reg_to_u64(iommu->ppr_log.reg_head); + break; + case IOMMU_PPR_LOG_TAIL_OFFSET: + val = reg_to_u64(iommu->ppr_log.reg_tail); + break; + case IOMMU_CONTROL_MMIO_OFFSET: + val = reg_to_u64(iommu->reg_ctrl); + break; + case IOMMU_STATUS_MMIO_OFFSET: + val = reg_to_u64(iommu->reg_status); + break; + case IOMMU_EXT_FEATURE_MMIO_OFFSET: + val = reg_to_u64(iommu->reg_ext_feature); + break; + + default: + AMD_IOMMU_DEBUG("Guest reads unknown mmio offset = %lx\n", + offset); + val = 0; + break; + } + + return val; +} + +static int guest_iommu_mmio_read(struct vcpu *v, unsigned long addr, + unsigned long len, unsigned long *pval) +{ + struct guest_iommu *iommu = vcpu_iommu(v); + unsigned long offset; + uint64_t val; + uint32_t mmio, shift; + uint64_t mask = 0; + + offset = addr - iommu->mmio_base; + + if ( unlikely((offset & (len - 1 )) || (len > 8)) ) + { + AMD_IOMMU_DEBUG("iommu mmio write access is not aligned." + "offset = %lx, len = %lx \n", offset, len); + return X86EMUL_UNHANDLEABLE; + } + + mask = (len == 8) ? (~0ULL) : (1ULL << (len * 8)) - 1; + shift = (offset & 7u) * 8; + + /* mmio access is always aligned on 8-byte boundary */ + mmio = offset & (~7u); + + spin_lock(&iommu->lock); + val = iommu_mmio_read64(iommu, mmio); + spin_unlock(&iommu->lock); + + *pval = (val >> shift ) & mask; + + return X86EMUL_OKAY; +} + +static void guest_iommu_mmio_write64(struct guest_iommu *iommu, + unsigned long offset, uint64_t val) +{ + switch ( offset ) + { + case IOMMU_DEV_TABLE_BASE_LOW_OFFSET: + u64_to_reg(&iommu->dev_table.reg_base, val); + break; + case IOMMU_CMD_BUFFER_BASE_LOW_OFFSET: + u64_to_reg(&iommu->cmd_buffer.reg_base, val); + break; + case IOMMU_EVENT_LOG_BASE_LOW_OFFSET: + u64_to_reg(&iommu->event_log.reg_base, val); + case IOMMU_PPR_LOG_BASE_LOW_OFFSET: + u64_to_reg(&iommu->ppr_log.reg_base, val); + break; + case IOMMU_CONTROL_MMIO_OFFSET: + guest_iommu_write_ctrl(iommu, val); + break; + case IOMMU_CMD_BUFFER_HEAD_OFFSET: + u64_to_reg(&iommu->cmd_buffer.reg_head, val); + break; + case IOMMU_CMD_BUFFER_TAIL_OFFSET: + u64_to_reg(&iommu->cmd_buffer.reg_tail, val); + tasklet_schedule(&iommu->cmd_buffer_tasklet); + break; + case IOMMU_EVENT_LOG_HEAD_OFFSET: + u64_to_reg(&iommu->event_log.reg_head, val); + break; + case IOMMU_EVENT_LOG_TAIL_OFFSET: + u64_to_reg(&iommu->event_log.reg_tail, val); + break; + case IOMMU_PPR_LOG_HEAD_OFFSET: + u64_to_reg(&iommu->ppr_log.reg_head, val); + break; + case IOMMU_PPR_LOG_TAIL_OFFSET: + u64_to_reg(&iommu->ppr_log.reg_tail, val); + break; + case IOMMU_STATUS_MMIO_OFFSET: + u64_to_reg(&iommu->reg_status, val); + break; + + default: + AMD_IOMMU_DEBUG("guest writes unknown mmio offset = %lx, " + "val = %lx\n", offset, val); + break; + } +} + +static int guest_iommu_mmio_write(struct vcpu *v, unsigned long addr, + unsigned long len, unsigned long val) +{ + struct guest_iommu *iommu = vcpu_iommu(v); + unsigned long offset; + uint64_t reg_old, mmio; + uint32_t shift; + uint64_t mask = 0; + + offset = addr - iommu->mmio_base; + + if ( unlikely((offset & (len - 1 )) || (len > 8)) ) + { + AMD_IOMMU_DEBUG("iommu mmio write access is not aligned." + "offset = %lx, len = %lx \n", offset, len); + return X86EMUL_UNHANDLEABLE; + } + + mask = (len == 8) ? (~0ULL): (1ULL << (len * 8)) - 1; + shift = (offset & 7u) * 8; + + /* mmio access is always aligned on 8-byte boundary */ + mmio = offset & (~7u); + + spin_lock(&iommu->lock); + + reg_old = iommu_mmio_read64(iommu, mmio); + reg_old &= ~( mask << shift ); + val = reg_old | ((val & mask) << shift ); + guest_iommu_mmio_write64(iommu, mmio, val); + + spin_unlock(&iommu->lock); + + return X86EMUL_OKAY; +} + +int guest_iommu_set_base(struct domain *d, uint64_t base) +{ + p2m_type_t t; + struct guest_iommu *iommu = domain_iommu(d); + + iommu->mmio_base = base; + base >>= PAGE_SHIFT; + + for ( int i = 0; i < IOMMU_MMIO_PAGE_NR; i++ ) + { + unsigned long gfn = base + i; + + get_gfn_query(d, gfn, &t); + p2m_change_type(d, gfn, t, p2m_mmio_dm); + put_gfn(d, gfn); + } + + return 0; +} + +/* Initialize mmio read only bits */ +static void guest_iommu_reg_init(struct guest_iommu *iommu) +{ + uint32_t lower, upper; + + lower = upper = 0; + /* Support prefetch */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PREFSUP_SHIFT); + /* Support PPR log */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PPRSUP_SHIFT); + /* Support guest translation */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_GTSUP_SHIFT); + /* Support invalidate all command */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_IASUP_SHIFT); + + /* Host translation size has 6 levels */ + set_field_in_reg_u32(HOST_ADDRESS_SIZE_6_LEVEL, lower, + IOMMU_EXT_FEATURE_HATS_MASK, + IOMMU_EXT_FEATURE_HATS_SHIFT, + &lower); + /* Guest translation size has 6 levels */ + set_field_in_reg_u32(GUEST_ADDRESS_SIZE_6_LEVEL, lower, + IOMMU_EXT_FEATURE_GATS_MASK, + IOMMU_EXT_FEATURE_GATS_SHIFT, + &lower); + /* Single level gCR3 */ + set_field_in_reg_u32(GUEST_CR3_1_LEVEL, lower, + IOMMU_EXT_FEATURE_GLXSUP_MASK, + IOMMU_EXT_FEATURE_GLXSUP_SHIFT, &lower); + /* 9 bit PASID */ + set_field_in_reg_u32(PASMAX_9_bit, upper, + IOMMU_EXT_FEATURE_PASMAX_MASK, + IOMMU_EXT_FEATURE_PASMAX_SHIFT, &upper); + + iommu->reg_ext_feature.lo = lower; + iommu->reg_ext_feature.hi = upper; +} + +/* Domain specific initialization */ +int guest_iommu_init(struct domain* d) +{ + struct guest_iommu *iommu; + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !is_hvm_domain(d) ) + return 0; + + iommu = xzalloc(struct guest_iommu); + if ( !iommu ) + { + AMD_IOMMU_DEBUG("Error allocating guest iommu structure.\n"); + return 1; + } + + guest_iommu_reg_init(iommu); + iommu->domain = d; + hd->g_iommu = iommu; + + tasklet_init(&iommu->cmd_buffer_tasklet, + guest_iommu_process_command, (unsigned long)d); + + spin_lock_init(&iommu->lock); + + return 0; +} + +void guest_iommu_destroy(struct domain *d) +{ + struct guest_iommu *iommu; + + if ( !is_hvm_domain(d) ) + return; + + iommu = domain_iommu(d); + + tasklet_kill(&iommu->cmd_buffer_tasklet); + xfree(iommu); + + domain_hvm_iommu(d)->g_iommu = NULL; +} + +static int guest_iommu_mmio_range(struct vcpu *v, unsigned long addr) +{ + struct guest_iommu *iommu = vcpu_iommu(v); + + return ( addr >= iommu->mmio_base && + addr < (iommu->mmio_base + IOMMU_MMIO_SIZE) ); +} + +const struct hvm_mmio_handler iommu_mmio_handler = { + .check_handler = guest_iommu_mmio_range, + .read_handler = guest_iommu_mmio_read, + .write_handler = guest_iommu_mmio_write +}; diff -r dfdc0df7d68f -r 6789e0d335e6 xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_map.c Tue Jan 10 17:39:45 2012 +0100 @@ -234,6 +234,53 @@ void __init iommu_dte_add_device_entry(u dte[3] = entry; } +void iommu_dte_set_guest_cr3(u32 *dte, u16 dom_id, u64 gcr3, + int gv, unsigned int glx) +{ + u32 entry, gcr3_1, gcr3_2, gcr3_3; + + gcr3_3 = gcr3 >> 31; + gcr3_2 = (gcr3 >> 15) & 0xFFFF; + gcr3_1 = (gcr3 >> PAGE_SHIFT) & 0x7; + + /* I bit must be set when gcr3 is enabled */ + entry = dte[3]; + set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry, + IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK, + IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry); + /* update gcr3 */ + set_field_in_reg_u32(gcr3_3, entry, + IOMMU_DEV_TABLE_GCR3_3_MASK, + IOMMU_DEV_TABLE_GCR3_3_SHIFT, &entry); + dte[3] = entry; + + set_field_in_reg_u32(dom_id, entry, + IOMMU_DEV_TABLE_DOMAIN_ID_MASK, + IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry); + /* update gcr3 */ + entry = dte[2]; + set_field_in_reg_u32(gcr3_2, entry, + IOMMU_DEV_TABLE_GCR3_2_MASK, + IOMMU_DEV_TABLE_GCR3_2_SHIFT, &entry); + dte[2] = entry; + + entry = dte[1]; + /* Enable GV bit */ + set_field_in_reg_u32(!!gv, entry, + IOMMU_DEV_TABLE_GV_MASK, + IOMMU_DEV_TABLE_GV_SHIFT, &entry); + + /* 1 level guest cr3 table */ + set_field_in_reg_u32(glx, entry, + IOMMU_DEV_TABLE_GLX_MASK, + IOMMU_DEV_TABLE_GLX_SHIFT, &entry); + /* update gcr3 */ + set_field_in_reg_u32(gcr3_1, entry, + IOMMU_DEV_TABLE_GCR3_1_MASK, + IOMMU_DEV_TABLE_GCR3_1_SHIFT, &entry); + dte[1] = entry; +} + u64 amd_iommu_get_next_table_from_pte(u32 *entry) { u64 addr_lo, addr_hi, ptr; diff -r dfdc0df7d68f -r 6789e0d335e6 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Tue Jan 10 17:39:45 2012 +0100 @@ -260,6 +260,8 @@ static int amd_iommu_domain_init(struct hd->domain_id = d->domain_id; + guest_iommu_init(d); + return 0; } @@ -443,6 +445,7 @@ static void deallocate_iommu_page_tables static void amd_iommu_domain_destroy(struct domain *d) { + guest_iommu_destroy(d); deallocate_iommu_page_tables(d); amd_iommu_flush_all_pages(d); } diff -r dfdc0df7d68f -r 6789e0d335e6 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/include/asm-x86/amd-iommu.h Tue Jan 10 17:39:45 2012 +0100 @@ -24,6 +24,7 @@ #include <xen/types.h> #include <xen/list.h> #include <xen/spinlock.h> +#include <xen/tasklet.h> #include <asm/hvm/svm/amd-iommu-defs.h> #define iommu_found() (!list_empty(&amd_iommu_head)) @@ -129,4 +130,55 @@ struct ivrs_mappings *get_ivrs_mappings( int iterate_ivrs_mappings(int (*)(u16 seg, struct ivrs_mappings *)); int iterate_ivrs_entries(int (*)(u16 seg, struct ivrs_mappings *)); +/* iommu tables in guest space */ +struct mmio_reg { + uint32_t lo; + uint32_t hi; +}; + +struct guest_dev_table { + struct mmio_reg reg_base; + uint32_t size; +}; + +struct guest_buffer { + struct mmio_reg reg_base; + struct mmio_reg reg_tail; + struct mmio_reg reg_head; + uint32_t entries; +}; + +struct guest_iommu_msi { + uint8_t vector; + uint8_t dest; + uint8_t dest_mode; + uint8_t delivery_mode; + uint8_t trig_mode; +}; + +/* virtual IOMMU structure */ +struct guest_iommu { + + struct domain *domain; + spinlock_t lock; + bool_t enabled; + + struct guest_dev_table dev_table; + struct guest_buffer cmd_buffer; + struct guest_buffer event_log; + struct guest_buffer ppr_log; + + struct tasklet cmd_buffer_tasklet; + + uint64_t mmio_base; /* MMIO base address */ + + /* MMIO regs */ + struct mmio_reg reg_ctrl; /* MMIO offset 0018h */ + struct mmio_reg reg_status; /* MMIO offset 2020h */ + struct mmio_reg reg_ext_feature; /* MMIO offset 0030h */ + + /* guest interrupt settings */ + struct guest_iommu_msi msi; +}; + #endif /* _ASM_X86_64_AMD_IOMMU_H */ diff -r dfdc0df7d68f -r 6789e0d335e6 xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/include/asm-x86/hvm/io.h Tue Jan 10 17:39:45 2012 +0100 @@ -69,8 +69,9 @@ extern const struct hvm_mmio_handler hpe extern const struct hvm_mmio_handler vlapic_mmio_handler; extern const struct hvm_mmio_handler vioapic_mmio_handler; extern const struct hvm_mmio_handler msixtbl_mmio_handler; +extern const struct hvm_mmio_handler iommu_mmio_handler; -#define HVM_MMIO_HANDLER_NR 4 +#define HVM_MMIO_HANDLER_NR 5 int hvm_io_intercept(ioreq_t *p, int type); void register_io_handler( diff -r dfdc0df7d68f -r 6789e0d335e6 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Jan 10 17:39:45 2012 +0100 @@ -113,6 +113,13 @@ #define IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT 12 /* DeviceTable Entry[63:32] */ +#define IOMMU_DEV_TABLE_GV_SHIFT 23 +#define IOMMU_DEV_TABLE_GV_MASK 0x800000 +#define IOMMU_DEV_TABLE_GLX_SHIFT 24 +#define IOMMU_DEV_TABLE_GLX_MASK 0x3000000 +#define IOMMU_DEV_TABLE_GCR3_1_SHIFT 26 +#define IOMMU_DEV_TABLE_GCR3_1_MASK 0x1c000000 + #define IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK 0x000FFFFF #define IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT 0 #define IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK 0x20000000 @@ -123,6 +130,8 @@ /* DeviceTable Entry[95:64] */ #define IOMMU_DEV_TABLE_DOMAIN_ID_MASK 0x0000FFFF #define IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT 0 +#define IOMMU_DEV_TABLE_GCR3_2_SHIFT 16 +#define IOMMU_DEV_TABLE_GCR3_2_MASK 0xFFFF0000 /* DeviceTable Entry[127:96] */ #define IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK 0x00000001 @@ -151,6 +160,8 @@ #define IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT 5 #define IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_MASK 0xFFFFFFC0 #define IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_SHIFT 6 +#define IOMMU_DEV_TABLE_GCR3_3_SHIFT 11 +#define IOMMU_DEV_TABLE_GCR3_3_MASK 0xfffff800 /* DeviceTable Entry[191:160] */ #define IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_MASK 0x000FFFFF @@ -179,6 +190,7 @@ #define IOMMU_CMD_INVALIDATE_IOMMU_PAGES 0x3 #define IOMMU_CMD_INVALIDATE_IOTLB_PAGES 0x4 #define IOMMU_CMD_INVALIDATE_INT_TABLE 0x5 +#define IOMMU_CMD_COMPLETE_PPR_REQUEST 0x7 #define IOMMU_CMD_INVALIDATE_IOMMU_ALL 0x8 /* COMPLETION_WAIT command */ @@ -265,6 +277,28 @@ #define IOMMU_EVENT_DEVICE_ID_MASK 0x0000FFFF #define IOMMU_EVENT_DEVICE_ID_SHIFT 0 +/* PPR Log */ +#define IOMMU_PPR_LOG_ENTRY_SIZE 16 +#define IOMMU_PPR_LOG_POWER_OF2_ENTRIES_PER_PAGE 8 +#define IOMMU_PPR_LOG_U32_PER_ENTRY (IOMMU_PPR_LOG_ENTRY_SIZE / 4) + +#define IOMMU_PPR_LOG_BASE_LOW_OFFSET 0x0038 +#define IOMMU_PPR_LOG_BASE_HIGH_OFFSET 0x003C +#define IOMMU_PPR_LOG_BASE_LOW_MASK 0xFFFFF000 +#define IOMMU_PPR_LOG_BASE_LOW_SHIFT 12 +#define IOMMU_PPR_LOG_BASE_HIGH_MASK 0x000FFFFF +#define IOMMU_PPR_LOG_BASE_HIGH_SHIFT 0 +#define IOMMU_PPR_LOG_LENGTH_MASK 0x0F000000 +#define IOMMU_PPR_LOG_LENGTH_SHIFT 24 +#define IOMMU_PPR_LOG_HEAD_MASK 0x0007FFF0 +#define IOMMU_PPR_LOG_HEAD_SHIFT 4 +#define IOMMU_PPR_LOG_TAIL_MASK 0x0007FFF0 +#define IOMMU_PPR_LOG_TAIL_SHIFT 4 +#define IOMMU_PPR_LOG_HEAD_OFFSET 0x2030 +#define IOMMU_PPR_LOG_TAIL_OFFSET 0x2038 +#define IOMMU_PPR_LOG_DEVICE_ID_MASK 0x0000FFFF +#define IOMMU_PPR_LOG_DEVICE_ID_SHIFT 0 + /* Control Register */ #define IOMMU_CONTROL_MMIO_OFFSET 0x18 #define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK 0x00000001 @@ -292,6 +326,11 @@ #define IOMMU_CONTROL_RESTART_MASK 0x80000000 #define IOMMU_CONTROL_RESTART_SHIFT 31 +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 +#define IOMMU_CONTROL_PPR_INT_SHIFT 14 +#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 +#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 + /* Exclusion Register */ #define IOMMU_EXCLUSION_BASE_LOW_OFFSET 0x20 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET 0x24 @@ -325,7 +364,8 @@ #define IOMMU_EXT_FEATURE_HATS_MASK 0x00000C00 #define IOMMU_EXT_FEATURE_GATS_SHIFT 0x12 #define IOMMU_EXT_FEATURE_GATS_MASK 0x00003000 -#define IOMMU_EXT_FEATURE_GLXSUP 0x14 +#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT 0x14 +#define IOMMU_EXT_FEATURE_GLXSUP_MASK 0x0000C000 #define IOMMU_EXT_FEATURE_PASMAX_SHIFT 0x0 #define IOMMU_EXT_FEATURE_PASMAX_MASK 0x0000001F @@ -342,6 +382,9 @@ #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 +#define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 +#define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 +#define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 /* I/O Page Table */ #define IOMMU_PAGE_TABLE_ENTRY_SIZE 8 diff -r dfdc0df7d68f -r 6789e0d335e6 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Tue Jan 10 17:39:45 2012 +0100 @@ -71,6 +71,8 @@ void amd_iommu_set_root_page_table( u32 *dte, u64 root_ptr, u16 domain_id, u8 paging_mode, u8 valid); void iommu_dte_set_iotlb(u32 *dte, u8 i); void iommu_dte_add_device_entry(u32 *dte, struct ivrs_mappings *ivrs_dev); +void iommu_dte_set_guest_cr3(u32 *dte, u16 dom_id, u64 gcr3, + int gv, unsigned int glx); /* send cmd to iommu */ void amd_iommu_flush_all_pages(struct domain *d); @@ -106,6 +108,14 @@ void amd_iommu_resume(void); void amd_iommu_suspend(void); void amd_iommu_crash_shutdown(void); +/* guest iommu support */ +void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[]); +void guest_iommu_add_ppr_log(struct domain *d, u32 entry[]); +void guest_iommu_add_event_log(struct domain *d, u32 entry[]); +int guest_iommu_init(struct domain* d); +void guest_iommu_destroy(struct domain *d); +int guest_iommu_set_base(struct domain *d, uint64_t base); + static inline u32 get_field_from_reg_u32(u32 reg_value, u32 mask, u32 shift) { u32 field; diff -r dfdc0df7d68f -r 6789e0d335e6 xen/include/xen/hvm/iommu.h --- a/xen/include/xen/hvm/iommu.h Tue Jan 10 17:39:41 2012 +0100 +++ b/xen/include/xen/hvm/iommu.h Tue Jan 10 17:39:45 2012 +0100 @@ -47,6 +47,7 @@ struct hvm_iommu { int domain_id; int paging_mode; struct page_info *root_table; + struct guest_iommu *g_iommu; /* iommu_ops */ const struct iommu_ops *platform_ops;
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213589 -3600 # Node ID 6cb08a39044171124b9b9176b50d2ea9196420bb # Parent 6789e0d335e67e700f97942dd094c548fbbd80f3 amd iommu: Enable ppr log. IOMMUv2 writes peripheral page service request (PPR) records into ppr log to report guest OS pending DMA page requests from ATS devices. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 6789e0d335e6 -r 6cb08a390441 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:45 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:49 2012 +0100 @@ -126,14 +126,15 @@ static void register_iommu_dev_table_in_ static void register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu) { - u64 addr_64, addr_lo, addr_hi; + u64 addr_64; + u32 addr_lo, addr_hi; u32 power_of2_entries; u32 entry; ASSERT( iommu->cmd_buffer.buffer ); - addr_64 = (u64)virt_to_maddr(iommu->cmd_buffer.buffer); - addr_lo = addr_64 & DMA_32BIT_MASK; + addr_64 = virt_to_maddr(iommu->cmd_buffer.buffer); + addr_lo = addr_64; addr_hi = addr_64 >> 32; entry = 0; @@ -153,14 +154,15 @@ static void register_iommu_cmd_buffer_in static void register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu) { - u64 addr_64, addr_lo, addr_hi; + u64 addr_64; + u32 addr_lo, addr_hi; u32 power_of2_entries; u32 entry; ASSERT( iommu->event_log.buffer ); - addr_64 = (u64)virt_to_maddr(iommu->event_log.buffer); - addr_lo = addr_64 & DMA_32BIT_MASK; + addr_64 = virt_to_maddr(iommu->event_log.buffer); + addr_lo = addr_64; addr_hi = addr_64 >> 32; entry = 0; @@ -178,6 +180,35 @@ static void register_iommu_event_log_in_ writel(entry, iommu->mmio_base+IOMMU_EVENT_LOG_BASE_HIGH_OFFSET); } +static void register_iommu_ppr_log_in_mmio_space(struct amd_iommu *iommu) +{ + u64 addr_64; + u32 addr_lo, addr_hi; + u32 power_of2_entries; + u32 entry; + + ASSERT ( iommu->ppr_log.buffer ); + + addr_64 = virt_to_maddr(iommu->ppr_log.buffer); + addr_lo = addr_64; + addr_hi = addr_64 >> 32; + + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_PPR_LOG_BASE_LOW_OFFSET); + + power_of2_entries = get_order_from_bytes(iommu->ppr_log.alloc_size) + + IOMMU_PPR_LOG_POWER_OF2_ENTRIES_PER_PAGE; + + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); + set_field_in_reg_u32(power_of2_entries, entry, + IOMMU_PPR_LOG_LENGTH_MASK, + IOMMU_PPR_LOG_LENGTH_SHIFT, &entry); + writel(entry, iommu->mmio_base + IOMMU_PPR_LOG_BASE_HIGH_OFFSET); +} + + static void set_iommu_translation_control(struct amd_iommu *iommu, int enable) { @@ -215,10 +246,10 @@ static void set_iommu_command_buffer_con static void register_iommu_exclusion_range(struct amd_iommu *iommu) { - u64 addr_lo, addr_hi; + u32 addr_lo, addr_hi; u32 entry; - addr_lo = iommu->exclusion_limit & DMA_32BIT_MASK; + addr_lo = iommu->exclusion_limit; addr_hi = iommu->exclusion_limit >> 32; set_field_in_reg_u32((u32)addr_hi, 0, @@ -278,6 +309,35 @@ static void set_iommu_event_log_control( writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); } +static void set_iommu_ppr_log_control(struct amd_iommu *iommu, + int enable) +{ + u32 entry; + + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + + /*reset head and tail pointer manually before enablement */ + if ( enable ) + { + writel(0x0, iommu->mmio_base + IOMMU_PPR_LOG_HEAD_OFFSET); + writel(0x0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET); + + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); + } + else + { + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); + } + + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + if ( enable ) + AMD_IOMMU_DEBUG("PPR Log Enabled.\n"); +} + static void parse_event_log_entry(struct amd_iommu *, u32 entry[]); static int amd_iommu_read_event_log(struct amd_iommu *iommu) @@ -585,12 +645,19 @@ static void enable_iommu(struct amd_iomm register_iommu_event_log_in_mmio_space(iommu); register_iommu_exclusion_range(iommu); + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + register_iommu_ppr_log_in_mmio_space(iommu); + iommu_msi_set_affinity(irq_to_desc(iommu->irq), &cpu_online_map); amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED); set_iommu_ht_flags(iommu); set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED); set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); + + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) ) @@ -671,16 +738,29 @@ static void * __init allocate_event_log( IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log"); } +static void * __init allocate_ppr_log(struct amd_iommu *iommu) +{ + /* allocate ''ppr log'' in power of 2 increments of 4K */ + return allocate_ring_buffer(&iommu->ppr_log, sizeof(ppr_entry_t), + IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); +} + static int __init amd_iommu_init_one(struct amd_iommu *iommu) { + if ( map_iommu_mmio_region(iommu) != 0 ) + goto error_out; + + get_iommu_features(iommu); + if ( allocate_cmd_buffer(iommu) == NULL ) goto error_out; if ( allocate_event_log(iommu) == NULL ) goto error_out; - if ( map_iommu_mmio_region(iommu) != 0 ) - goto error_out; + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + if ( allocate_ppr_log(iommu) == NULL ) + goto error_out; if ( set_iommu_interrupt_handler(iommu) == 0 ) goto error_out; @@ -693,8 +773,6 @@ static int __init amd_iommu_init_one(str iommu->dev_table.entries = device_table.entries; iommu->dev_table.buffer = device_table.buffer; - get_iommu_features(iommu); - enable_iommu(iommu); printk("AMD-Vi: IOMMU %d Enabled.\n", nr_amd_iommus ); nr_amd_iommus++; @@ -717,6 +795,7 @@ static void __init amd_iommu_init_cleanu { deallocate_ring_buffer(&iommu->cmd_buffer); deallocate_ring_buffer(&iommu->event_log); + deallocate_ring_buffer(&iommu->ppr_log); unmap_iommu_mmio_region(iommu); } xfree(iommu); @@ -915,6 +994,10 @@ static void disable_iommu(struct amd_iom amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED); set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_DISABLED); set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED); + + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_DISABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_DISABLED); iommu->enabled = 0; diff -r 6789e0d335e6 -r 6cb08a390441 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Tue Jan 10 17:39:45 2012 +0100 +++ b/xen/include/asm-x86/amd-iommu.h Tue Jan 10 17:39:49 2012 +0100 @@ -93,6 +93,7 @@ struct amd_iommu { struct table_struct dev_table; struct ring_buffer cmd_buffer; struct ring_buffer event_log; + struct ring_buffer ppr_log; int exclusion_enable; int exclusion_allow_all; diff -r 6789e0d335e6 -r 6cb08a390441 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Jan 10 17:39:45 2012 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Tue Jan 10 17:39:49 2012 +0100 @@ -27,6 +27,9 @@ /* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */ #define IOMMU_EVENT_LOG_DEFAULT_ENTRIES 512 +/* IOMMU PPR Log entries: in power of 2 increments, minimum of 256 */ +#define IOMMU_PPR_LOG_DEFAULT_ENTRIES 512 + #define PTE_PER_TABLE_SHIFT 9 #define PTE_PER_TABLE_SIZE (1 << PTE_PER_TABLE_SHIFT) #define PTE_PER_TABLE_MASK (~(PTE_PER_TABLE_SIZE - 1))
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 05 of 14 V3] amd iommu: Enable guest level translation
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213593 -3600 # Node ID e698b7b63a9dc75c5cad4dbe02d38d90bdaf1512 # Parent 6cb08a39044171124b9b9176b50d2ea9196420bb amd iommu: Enable guest level translation. Similar to nested paging for SVM, IOMMUv2 supports two level translations for DMA. This patch enables this feature. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 6cb08a390441 -r e698b7b63a9d xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:49 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:53 2012 +0100 @@ -223,6 +223,23 @@ static void set_iommu_translation_contro writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); } +static void set_iommu_guest_translation_control(struct amd_iommu *iommu, + int enable) +{ + u32 entry; + + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + + enable ? + iommu_set_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT): + iommu_clear_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT); + + writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); + + if ( enable ) + AMD_IOMMU_DEBUG("Guest Translation Enabled.\n"); +} + static void set_iommu_command_buffer_control(struct amd_iommu *iommu, int enable) { @@ -658,6 +675,9 @@ static void enable_iommu(struct amd_iomm if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED); + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) ) + set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_ENABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) ) @@ -998,6 +1018,9 @@ static void disable_iommu(struct amd_iom if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_DISABLED); + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) ) + set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_DISABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_DISABLED); iommu->enabled = 0;
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 06 of 14 V3] amd iommu: add ppr log processing into iommu interrupt handling
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213597 -3600 # Node ID 3d252e3969bae12e85e5a1f2f339dad169d0d892 # Parent e698b7b63a9dc75c5cad4dbe02d38d90bdaf1512 amd iommu: add ppr log processing into iommu interrupt handling PPR log and event log share the same interrupt source. Interrupt handler should check both of them. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r e698b7b63a9d -r 3d252e3969ba xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:53 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:39:57 2012 +0100 @@ -355,75 +355,92 @@ static void set_iommu_ppr_log_control(st AMD_IOMMU_DEBUG("PPR Log Enabled.\n"); } -static void parse_event_log_entry(struct amd_iommu *, u32 entry[]); +/* read event log or ppr log from iommu ring buffer */ +static int iommu_read_log(struct amd_iommu *iommu, + struct ring_buffer *log, + unsigned int entry_size, + void (*parse_func)(struct amd_iommu *, u32 *)) +{ + u32 tail, head, *entry, tail_offest, head_offset; -static int amd_iommu_read_event_log(struct amd_iommu *iommu) -{ - u32 tail, head, *event_log; - - BUG_ON( !iommu ); + BUG_ON( !iommu || ((log != &iommu->event_log) && + (log != &iommu->ppr_log)) ); /* make sure there''s an entry in the log */ - tail = readl(iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET); - tail = get_field_from_reg_u32(tail, - IOMMU_EVENT_LOG_TAIL_MASK, - IOMMU_EVENT_LOG_TAIL_SHIFT); + tail_offest = ( log == &iommu->event_log ) ? + IOMMU_EVENT_LOG_TAIL_OFFSET: + IOMMU_PPR_LOG_TAIL_OFFSET; - while ( tail != iommu->event_log.head ) + head_offset = ( log == &iommu->event_log ) ? + IOMMU_EVENT_LOG_HEAD_OFFSET: + IOMMU_PPR_LOG_HEAD_OFFSET; + + tail = readl(iommu->mmio_base + tail_offest); + tail = iommu_get_rb_pointer(tail); + + while ( tail != log->head ) { /* read event log entry */ - event_log = (u32 *)(iommu->event_log.buffer + - (iommu->event_log.head * - IOMMU_EVENT_LOG_ENTRY_SIZE)); + entry = (u32 *)(log->buffer + log->head * entry_size); - parse_event_log_entry(iommu, event_log); - - if ( ++iommu->event_log.head == iommu->event_log.entries ) - iommu->event_log.head = 0; + parse_func(iommu, entry); + if ( ++log->head == log->entries ) + log->head = 0; /* update head pointer */ - set_field_in_reg_u32(iommu->event_log.head, 0, - IOMMU_EVENT_LOG_HEAD_MASK, - IOMMU_EVENT_LOG_HEAD_SHIFT, &head); - writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET); + head = 0; + iommu_set_rb_pointer(&head, log->head); + + writel(head, iommu->mmio_base + head_offset); } return 0; } -static void amd_iommu_reset_event_log(struct amd_iommu *iommu) +/* reset event log or ppr log when overflow */ +static void iommu_reset_log(struct amd_iommu *iommu, + struct ring_buffer *log, + void (*ctrl_func)(struct amd_iommu *iommu, int)) { u32 entry; - int log_run; + int log_run, run_bit, of_bit; int loop_count = 1000; + BUG_ON( !iommu || ((log != &iommu->event_log) && + (log != &iommu->ppr_log)) ); + + run_bit = ( log == &iommu->event_log ) ? + IOMMU_STATUS_EVENT_LOG_RUN_SHIFT: + IOMMU_STATUS_PPR_LOG_RUN_SHIFT; + + of_bit = ( log == &iommu->event_log ) ? + IOMMU_STATUS_EVENT_OVERFLOW_SHIFT: + IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT; + /* wait until EventLogRun bit = 0 */ do { entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - log_run = iommu_get_bit(entry, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT); + log_run = iommu_get_bit(entry, run_bit); loop_count--; } while ( log_run && loop_count ); if ( log_run ) { - AMD_IOMMU_DEBUG("Warning: EventLogRun bit is not cleared" - "before reset!\n"); + AMD_IOMMU_DEBUG("Warning: Log Run bit %d is not cleared" + "before reset! \n", run_bit); return; } - set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED); + ctrl_func(iommu, IOMMU_CONTROL_DISABLED); - /* read event log for debugging */ - amd_iommu_read_event_log(iommu); /*clear overflow bit */ - iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT); - - writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); + iommu_clear_bit(&entry, of_bit); + writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ - iommu->event_log.head = 0; + log->head = 0; - set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); + ctrl_func(iommu, IOMMU_CONTROL_ENABLED); } static void iommu_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask) @@ -595,30 +612,95 @@ static void parse_event_log_entry(struct } } -static void amd_iommu_page_fault(int irq, void *dev_id, - struct cpu_user_regs *regs) +static void iommu_check_event_log(struct amd_iommu *iommu) { u32 entry; unsigned long flags; - struct amd_iommu *iommu = dev_id; spin_lock_irqsave(&iommu->lock, flags); - amd_iommu_read_event_log(iommu); + + iommu_read_log(iommu, &iommu->event_log, + sizeof(event_entry_t), parse_event_log_entry); /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) - amd_iommu_reset_event_log(iommu); + iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); /* reset interrupt status bit */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); + writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + spin_unlock_irqrestore(&iommu->lock, flags); } +void parse_ppr_log_entry(struct amd_iommu *iommu, u32 entry[]) +{ + + u16 device_id; + u8 bus, devfn; + struct pci_dev *pdev; + struct domain *d; + + /* here device_id is physical value */ + device_id = iommu_get_devid_from_cmd(entry[0]); + bus = device_id >> 8; + devfn = device_id & 0xFF; + + local_irq_enable(); + + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev(0, bus, devfn); + spin_unlock(&pcidevs_lock); + + local_irq_disable(); + + if ( pdev == NULL ) + return; + + d = pdev->domain; + + guest_iommu_add_ppr_log(d, entry); +} + +static void iommu_check_ppr_log(struct amd_iommu *iommu) +{ + u32 entry; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + iommu_read_log(iommu, &iommu->ppr_log, + sizeof(ppr_entry_t), parse_ppr_log_entry); + + /*check event overflow */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + + if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) + iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); + + /* reset interrupt status bit */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); + + writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void iommu_interrupt_handler(int irq, void *dev_id, + struct cpu_user_regs *regs) +{ + struct amd_iommu *iommu = dev_id; + iommu_check_event_log(iommu); + + if ( iommu->ppr_log.buffer != NULL ) + iommu_check_ppr_log(iommu); +} + static int __init set_iommu_interrupt_handler(struct amd_iommu *iommu) { int irq, ret; @@ -631,8 +713,7 @@ static int __init set_iommu_interrupt_ha } irq_desc[irq].handler = &iommu_msi_type; - ret = request_irq(irq, amd_iommu_page_fault, 0, - "amd_iommu", iommu); + ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu); if ( ret ) { irq_desc[irq].handler = &no_irq_type;
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213601 -3600 # Node ID 31e61ed495ae1429e3317f8b0359ff37fdcbb6cd # Parent 3d252e3969bae12e85e5a1f2f339dad169d0d892 amd iommu: Add 2 hypercalls for libxc iommu_set_msi: used by qemu to inform hypervisor iommu vector number in guest space. Hypervisor needs this vector to inject msi into guest when PPR logging happens. iommu_bind_bdf: used by xl to bind guest bdf number to machine bdf number. IOMMU emulations codes receives commands from guest iommu driver and forwards them to host iommu. But virtual device id from guest should be converted into physical before sending to real hardware. Signed -off-by: Wei Wang <wei.wang2@amd.com> diff -r 3d252e3969ba -r 31e61ed495ae xen/drivers/passthrough/amd/iommu_guest.c --- a/xen/drivers/passthrough/amd/iommu_guest.c Tue Jan 10 17:39:57 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_guest.c Tue Jan 10 17:40:01 2012 +0100 @@ -48,14 +48,31 @@ (reg)->hi = (val >> 32) & 0xFFFFFFFF; \ } while(0) -static unsigned int machine_bdf(struct domain *d, uint16_t guest_bdf) +static unsigned int machine_bdf(struct domain *d, uint16_t guest_seg, + uint16_t guest_bdf) { - return guest_bdf; + struct pci_dev *pdev; + uint16_t mbdf = 0; + + for_each_pdev( d, pdev ) + { + if ( (pdev->gbdf == guest_bdf) && (pdev->gseg == guest_seg) ) + { + mbdf = PCI_BDF2(pdev->bus, pdev->devfn); + break; + } + } + return mbdf; } -static uint16_t guest_bdf(struct domain *d, uint16_t machine_bdf) +static uint16_t guest_bdf(struct domain *d, uint16_t machine_seg, + uint16_t machine_bdf) { - return machine_bdf; + struct pci_dev *pdev; + + pdev = pci_get_pdev_by_domain(d, machine_seg, PCI_BUS(machine_bdf), + PCI_DEVFN2(machine_bdf)); + return pdev->gbdf; } static inline struct guest_iommu *domain_iommu(struct domain *d) @@ -207,7 +224,7 @@ void guest_iommu_add_ppr_log(struct doma log = log_base + tail % (PAGE_SIZE / sizeof(ppr_entry_t)); /* Convert physical device id back into virtual device id */ - gdev_id = guest_bdf(d, iommu_get_devid_from_cmd(entry[0])); + gdev_id = guest_bdf(d, 0, iommu_get_devid_from_cmd(entry[0])); iommu_set_devid_to_cmd(&entry[0], gdev_id); memcpy(log, entry, sizeof(ppr_entry_t)); @@ -250,7 +267,7 @@ void guest_iommu_add_event_log(struct do log = log_base + tail % (PAGE_SIZE / sizeof(event_entry_t)); /* re-write physical device id into virtual device id */ - dev_id = guest_bdf(d, iommu_get_devid_from_cmd(entry[0])); + dev_id = guest_bdf(d, 0, iommu_get_devid_from_cmd(entry[0])); iommu_set_devid_to_cmd(&entry[0], dev_id); memcpy(log, entry, sizeof(event_entry_t)); @@ -272,7 +289,7 @@ static int do_complete_ppr_request(struc uint16_t dev_id; struct amd_iommu *iommu; - dev_id = machine_bdf(d, iommu_get_devid_from_cmd(cmd->data[0])); + dev_id = machine_bdf(d, 0, iommu_get_devid_from_cmd(cmd->data[0])); iommu = find_iommu_for_device(0, dev_id); if ( !iommu ) @@ -324,7 +341,7 @@ static int do_invalidate_iotlb_pages(str struct amd_iommu *iommu; uint16_t dev_id; - dev_id = machine_bdf(d, iommu_get_devid_from_cmd(cmd->data[0])); + dev_id = machine_bdf(d, 0, iommu_get_devid_from_cmd(cmd->data[0])); iommu = find_iommu_for_device(0, dev_id); if ( !iommu ) @@ -402,7 +419,7 @@ static int do_invalidate_dte(struct doma g_iommu = domain_iommu(d); gbdf = iommu_get_devid_from_cmd(cmd->data[0]); - mbdf = machine_bdf(d, gbdf); + mbdf = machine_bdf(d, 0, gbdf); /* Guest can only update DTEs for its passthru devices */ if ( mbdf == 0 || gbdf == 0 ) @@ -913,3 +930,45 @@ const struct hvm_mmio_handler iommu_mmio .read_handler = guest_iommu_mmio_read, .write_handler = guest_iommu_mmio_write }; + +/* iommu hypercall handler */ +int iommu_bind_bdf(struct domain* d, uint16_t gseg, uint16_t gbdf, + uint16_t mseg, uint16_t mbdf) +{ + struct pci_dev *pdev; + int ret = -ENODEV; + + if ( !iommu_found() ) + return 0; + + spin_lock(&pcidevs_lock); + + for_each_pdev( d, pdev ) + { + if ( (pdev->seg != mseg) || (pdev->bus != PCI_BUS(mbdf) ) || + (pdev->devfn != PCI_DEVFN2(mbdf)) ) + continue; + + pdev->gseg = gseg; + pdev->gbdf = gbdf; + ret = 0; + } + + spin_unlock(&pcidevs_lock); + return ret; +} + +void iommu_set_msi(struct domain* d, uint16_t vector, uint16_t dest, + uint16_t dest_mode, uint16_t delivery_mode, + uint16_t trig_mode) +{ + struct guest_iommu *iommu = domain_iommu(d); + + if ( !iommu_found() ) + return; + + iommu->msi.vector = vector; + iommu->msi.dest = dest; + iommu->msi.dest_mode = dest_mode; + iommu->msi.trig_mode = trig_mode; +} diff -r 3d252e3969ba -r 31e61ed495ae xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Tue Jan 10 17:39:57 2012 +0100 +++ b/xen/drivers/passthrough/iommu.c Tue Jan 10 17:40:01 2012 +0100 @@ -648,6 +648,40 @@ int iommu_do_domctl( put_domain(d); break; + case XEN_DOMCTL_guest_iommu_op: + { + xen_domctl_guest_iommu_op_t * guest_op; + + if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) + { + gdprintk(XENLOG_ERR, + "XEN_DOMCTL_guest_iommu_op: get_domain_by_id() failed\n"); + ret = -EINVAL; + break; + } + + guest_op = &(domctl->u.guest_iommu_op); + switch ( guest_op->op ) + { + case XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI: + iommu_set_msi(d, guest_op->u.msi.vector, + guest_op->u.msi.dest, + guest_op->u.msi.dest_mode, + guest_op->u.msi.delivery_mode, + guest_op->u.msi.trig_mode); + ret = 0; + break; + case XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF: + ret = iommu_bind_bdf(d, guest_op->u.bdf_bind.g_seg, + guest_op->u.bdf_bind.g_bdf, + guest_op->u.bdf_bind.m_seg, + guest_op->u.bdf_bind.m_bdf); + break; + } + put_domain(d); + break; + } + default: ret = -ENOSYS; break; diff -r 3d252e3969ba -r 31e61ed495ae xen/include/public/domctl.h --- a/xen/include/public/domctl.h Tue Jan 10 17:39:57 2012 +0100 +++ b/xen/include/public/domctl.h Tue Jan 10 17:40:01 2012 +0100 @@ -848,6 +848,31 @@ struct xen_domctl_set_access_required { typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); +/* Support for guest iommu emulation */ +struct xen_domctl_guest_iommu_op { + /* XEN_DOMCTL_GUEST_IOMMU_OP_* */ +#define XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI 0 +#define XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF 1 + uint8_t op; + union { + struct iommu_msi { + uint8_t vector; + uint8_t dest; + uint8_t dest_mode; + uint8_t delivery_mode; + uint8_t trig_mode; + } msi; + struct bdf_bind { + uint16_t g_seg; + uint16_t g_bdf; + uint16_t m_seg; + uint16_t m_bdf; + } bdf_bind; + } u; +}; +typedef struct xen_domctl_guest_iommu_op xen_domctl_guest_iommu_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_guest_iommu_op_t); + struct xen_domctl { uint32_t cmd; #define XEN_DOMCTL_createdomain 1 @@ -912,6 +937,7 @@ struct xen_domctl { #define XEN_DOMCTL_getvcpuextstate 63 #define XEN_DOMCTL_set_access_required 64 #define XEN_DOMCTL_audit_p2m 65 +#define XEN_DOMCTL_guest_iommu_op 66 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -960,6 +986,7 @@ struct xen_domctl { struct xen_domctl_debug_op debug_op; struct xen_domctl_mem_event_op mem_event_op; struct xen_domctl_mem_sharing_op mem_sharing_op; + struct xen_domctl_guest_iommu_op guest_iommu_op; #if defined(__i386__) || defined(__x86_64__) struct xen_domctl_cpuid cpuid; struct xen_domctl_vcpuextstate vcpuextstate; diff -r 3d252e3969ba -r 31e61ed495ae xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Tue Jan 10 17:39:57 2012 +0100 +++ b/xen/include/xen/iommu.h Tue Jan 10 17:40:01 2012 +0100 @@ -164,6 +164,12 @@ int iommu_do_domctl(struct xen_domctl *, void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count); void iommu_iotlb_flush_all(struct domain *d); +/* Only used by AMD IOMMU so far */ +void iommu_set_msi(struct domain* d, uint16_t vector, uint16_t dest, + uint16_t dest_mode, uint16_t delivery_mode, + uint16_t trig_mode); +int iommu_bind_bdf(struct domain* d, uint16_t gseg, uint16_t gbdf, + uint16_t mseg, uint16_t mbdf); /* * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to * avoid unecessary iotlb_flush in the low level IOMMU code. diff -r 3d252e3969ba -r 31e61ed495ae xen/include/xen/pci.h --- a/xen/include/xen/pci.h Tue Jan 10 17:39:57 2012 +0100 +++ b/xen/include/xen/pci.h Tue Jan 10 17:40:01 2012 +0100 @@ -59,6 +59,11 @@ struct pci_dev { const u16 seg; const u8 bus; const u8 devfn; + + /* Used by iommu to represent virtual seg and bdf value in guest space */ + u16 gseg; + u16 gbdf; + struct pci_dev_info info; struct arch_pci_dev arch; u64 vf_rlen[6];
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 08 of 14 V3] amd iommu: Add a new flag to indication iommuv2 feature enabled or not
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213604 -3600 # Node ID 82f5e77e13c43f0e4e34dfefdd218aec092f9542 # Parent 31e61ed495ae1429e3317f8b0359ff37fdcbb6cd amd iommu: Add a new flag to indication iommuv2 feature enabled or not. Hypercalls should return early on non-iommuv2 systems. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 31e61ed495ae -r 82f5e77e13c4 xen/drivers/passthrough/amd/iommu_guest.c --- a/xen/drivers/passthrough/amd/iommu_guest.c Tue Jan 10 17:40:01 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_guest.c Tue Jan 10 17:40:04 2012 +0100 @@ -821,6 +821,9 @@ int guest_iommu_set_base(struct domain * p2m_type_t t; struct guest_iommu *iommu = domain_iommu(d); + if ( !is_hvm_domain(d) || !iommuv2_enabled ) + return 1; + iommu->mmio_base = base; base >>= PAGE_SHIFT; @@ -880,7 +883,7 @@ int guest_iommu_init(struct domain* d) struct guest_iommu *iommu; struct hvm_iommu *hd = domain_hvm_iommu(d); - if ( !is_hvm_domain(d) ) + if ( !is_hvm_domain(d) || !iommuv2_enabled ) return 0; iommu = xzalloc(struct guest_iommu); @@ -904,13 +907,11 @@ int guest_iommu_init(struct domain* d) void guest_iommu_destroy(struct domain *d) { - struct guest_iommu *iommu; + struct guest_iommu *iommu = domain_iommu(d); - if ( !is_hvm_domain(d) ) + if ( !is_hvm_domain(d) || !iommuv2_enabled ) return; - iommu = domain_iommu(d); - tasklet_kill(&iommu->cmd_buffer_tasklet); xfree(iommu); @@ -921,6 +922,9 @@ static int guest_iommu_mmio_range(struct { struct guest_iommu *iommu = vcpu_iommu(v); + if ( !iommu_found() || !iommuv2_enabled ) + return 0; + return ( addr >= iommu->mmio_base && addr < (iommu->mmio_base + IOMMU_MMIO_SIZE) ); } @@ -938,7 +942,7 @@ int iommu_bind_bdf(struct domain* d, uin struct pci_dev *pdev; int ret = -ENODEV; - if ( !iommu_found() ) + if ( !iommu_found() || !iommuv2_enabled ) return 0; spin_lock(&pcidevs_lock); @@ -964,7 +968,7 @@ void iommu_set_msi(struct domain* d, uin { struct guest_iommu *iommu = domain_iommu(d); - if ( !iommu_found() ) + if ( !iommu_found() || !iommuv2_enabled ) return; iommu->msi.vector = vector; diff -r 31e61ed495ae -r 82f5e77e13c4 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:40:01 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Jan 10 17:40:04 2012 +0100 @@ -36,6 +36,7 @@ unsigned short ivrs_bdf_entries; static struct radix_tree_root ivrs_maps; struct list_head amd_iommu_head; struct table_struct device_table; +bool_t iommuv2_enabled; static int iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask) { @@ -765,6 +766,10 @@ static void enable_iommu(struct amd_iomm amd_iommu_flush_all_caches(iommu); iommu->enabled = 1; + + if ( iommu->features ) + iommuv2_enabled = 1; + spin_unlock_irqrestore(&iommu->lock, flags); } diff -r 31e61ed495ae -r 82f5e77e13c4 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Tue Jan 10 17:40:01 2012 +0100 +++ b/xen/include/asm-x86/amd-iommu.h Tue Jan 10 17:40:04 2012 +0100 @@ -182,4 +182,6 @@ struct guest_iommu { struct guest_iommu_msi msi; }; +extern bool_t iommuv2_enabled; + #endif /* _ASM_X86_64_AMD_IOMMU_H */
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 09 of 14 V3] amd iommu: Add a hypercall for hvmloader
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213608 -3600 # Node ID 2f9c68c3b521efccebebffe76d17ace7dbae5e25 # Parent 82f5e77e13c43f0e4e34dfefdd218aec092f9542 amd iommu: Add a hypercall for hvmloader. IOMMU MMIO base address is dynamically allocated by firmware. This patch allows hvmloader to notify hypervisor where the iommu mmio pages are. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 82f5e77e13c4 -r 2f9c68c3b521 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Jan 10 17:40:04 2012 +0100 +++ b/xen/arch/x86/hvm/hvm.c Tue Jan 10 17:40:08 2012 +0100 @@ -65,6 +65,7 @@ #include <public/memory.h> #include <asm/mem_event.h> #include <public/mem_event.h> +#include <asm/hvm/svm/amd-iommu-proto.h> bool_t __read_mostly hvm_enabled; @@ -3677,6 +3678,9 @@ long do_hvm_op(unsigned long op, XEN_GUE case HVM_PARAM_BUFIOREQ_EVTCHN: rc = -EINVAL; break; + case HVM_PARAM_IOMMU_BASE: + rc = guest_iommu_set_base(d, a.value); + break; } if ( rc == 0 ) diff -r 82f5e77e13c4 -r 2f9c68c3b521 xen/include/public/hvm/params.h --- a/xen/include/public/hvm/params.h Tue Jan 10 17:40:04 2012 +0100 +++ b/xen/include/public/hvm/params.h Tue Jan 10 17:40:08 2012 +0100 @@ -141,7 +141,8 @@ /* Boolean: Enable nestedhvm (hvm only) */ #define HVM_PARAM_NESTEDHVM 24 +#define HVM_PARAM_IOMMU_BASE 27 -#define HVM_NR_PARAMS 27 +#define HVM_NR_PARAMS 28 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 10 of 14 V3] amd iommu: Enable FC bit in iommu host level PTE
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213611 -3600 # Node ID f1bf84f5fbb94f8702c8e96462e715ad5066dca2 # Parent 2f9c68c3b521efccebebffe76d17ace7dbae5e25 amd iommu: Enable FC bit in iommu host level PTE Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 2f9c68c3b521 -r f1bf84f5fbb9 xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Tue Jan 10 17:40:08 2012 +0100 +++ b/xen/drivers/passthrough/amd/iommu_map.c Tue Jan 10 17:40:11 2012 +0100 @@ -83,6 +83,13 @@ static bool_t set_iommu_pde_present(u32 set_field_in_reg_u32(ir, entry, IOMMU_PDE_IO_READ_PERMISSION_MASK, IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry); + + /* FC bit should be enabled in PTE, this helps to solve potential + * issues with ATS devices + */ + if ( next_level == IOMMU_PAGING_MODE_LEVEL_0 ) + set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry, + IOMMU_PTE_FC_MASK, IOMMU_PTE_FC_SHIFT, &entry); pde[1] = entry; /* mark next level as ''present'' */
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213614 -3600 # Node ID 01d2c1d4e3b992997f170d95dccc2195b9206b04 # Parent f1bf84f5fbb94f8702c8e96462e715ad5066dca2 hvmloader: Build IVRS table. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r f1bf84f5fbb9 -r 01d2c1d4e3b9 tools/firmware/hvmloader/acpi/acpi2_0.h --- a/tools/firmware/hvmloader/acpi/acpi2_0.h Tue Jan 10 17:40:11 2012 +0100 +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h Tue Jan 10 17:40:14 2012 +0100 @@ -389,6 +389,60 @@ struct acpi_20_madt_intsrcovr { #define ACPI_2_0_WAET_REVISION 0x01 #define ACPI_1_0_FADT_REVISION 0x01 +#define IVRS_SIGNATURE ASCII32(''I'',''V'',''R'',''S'') +#define IVRS_REVISION 1 +#define IVRS_VASIZE 64 +#define IVRS_PASIZE 52 +#define IVRS_GVASIZE 64 + +#define IVHD_BLOCK_TYPE 0x10 +#define IVHD_FLAG_HTTUNEN (1 << 0) +#define IVHD_FLAG_PASSPW (1 << 1) +#define IVHD_FLAG_RESPASSPW (1 << 2) +#define IVHD_FLAG_ISOC (1 << 3) +#define IVHD_FLAG_IOTLBSUP (1 << 4) +#define IVHD_FLAG_COHERENT (1 << 5) +#define IVHD_FLAG_PREFSUP (1 << 6) +#define IVHD_FLAG_PPRSUP (1 << 7) + +#define IVHD_EFR_GTSUP (1 << 2) +#define IVHD_EFR_IASUP (1 << 5) + +#define IVHD_SELECT_4_BYTE 0x2 + +struct ivrs_ivhd_block +{ + uint8_t type; + uint8_t flags; + uint16_t length; + uint16_t devid; + uint16_t cap_offset; + uint64_t iommu_base_addr; + uint16_t pci_segment; + uint16_t iommu_info; + uint32_t reserved; +}; + +/* IVHD 4-byte device entries */ +struct ivrs_ivhd_device +{ + uint8_t type; + uint16_t dev_id; + uint8_t flags; +}; + +#define PT_DEV_MAX_NR 32 +#define IOMMU_CAP_OFFSET 0x40 +struct acpi_40_ivrs +{ + struct acpi_header header; + uint32_t iv_info; + uint32_t reserved[2]; + struct ivrs_ivhd_block ivhd_block; + struct ivrs_ivhd_device ivhd_device[PT_DEV_MAX_NR]; +}; + + #pragma pack () struct acpi_config { diff -r f1bf84f5fbb9 -r 01d2c1d4e3b9 tools/firmware/hvmloader/acpi/build.c --- a/tools/firmware/hvmloader/acpi/build.c Tue Jan 10 17:40:11 2012 +0100 +++ b/tools/firmware/hvmloader/acpi/build.c Tue Jan 10 17:40:14 2012 +0100 @@ -23,6 +23,8 @@ #include "ssdt_pm.h" #include "../config.h" #include "../util.h" +#include "../hypercall.h" +#include <xen/hvm/params.h> #define align16(sz) (((sz) + 15) & ~15) #define fixed_strcpy(d, s) strncpy((d), (s), sizeof(d)) @@ -198,6 +200,77 @@ static struct acpi_20_waet *construct_wa return waet; } +extern uint32_t ptdev_bdf[PT_DEV_MAX_NR]; +extern uint32_t ptdev_nr; +extern uint32_t iommu_bdf; +static struct acpi_40_ivrs* construct_ivrs(void) +{ + struct acpi_40_ivrs *ivrs; + uint64_t mmio; + struct ivrs_ivhd_block *ivhd; + struct ivrs_ivhd_device *dev_entry; + struct xen_hvm_param p; + + if (ptdev_nr == 0) return NULL; + + ivrs = mem_alloc(sizeof(*ivrs), 16); + if (!ivrs) return NULL; + + memset(ivrs, 0, sizeof(*ivrs)); + + /* initialize acpi header */ + ivrs->header.signature = IVRS_SIGNATURE; + ivrs->header.revision = IVRS_REVISION; + fixed_strcpy(ivrs->header.oem_id, ACPI_OEM_ID); + fixed_strcpy(ivrs->header.oem_table_id, ACPI_OEM_TABLE_ID); + + ivrs->header.oem_revision = ACPI_OEM_REVISION; + ivrs->header.creator_id = ACPI_CREATOR_ID; + ivrs->header.creator_revision = ACPI_CREATOR_REVISION; + + ivrs->header.length = sizeof(*ivrs); + + /* initialize IVHD Block */ + ivhd = &ivrs->ivhd_block; + ivrs->iv_info = (IVRS_VASIZE << 15) | (IVRS_PASIZE << 8) | + (IVRS_GVASIZE << 5); + + ivhd->type = IVHD_BLOCK_TYPE; + ivhd->flags = IVHD_FLAG_PPRSUP | IVHD_FLAG_IOTLBSUP; + ivhd->devid = iommu_bdf; + ivhd->cap_offset = IOMMU_CAP_OFFSET; + + /*reserve 32K IOMMU MMIO space */ + mmio = virt_to_phys(mem_alloc(0x8000, 0x1000)); + if (!mmio) return NULL; + + p.domid = DOMID_SELF; + p.index = HVM_PARAM_IOMMU_BASE; + p.value = mmio; + + /* Return non-zero if IOMMUv2 hardware is not avaliable */ + if ( hypercall_hvm_op(HVMOP_set_param, &p) ) + return NULL; + + ivhd->iommu_base_addr = mmio; + ivhd->reserved = IVHD_EFR_IASUP | IVHD_EFR_GTSUP; + + /* Build IVHD device entries */ + dev_entry = ivrs->ivhd_device; + for ( int i = 0; i < ptdev_nr; i++ ) + { + dev_entry[i].type = IVHD_SELECT_4_BYTE; + dev_entry[i].dev_id = ptdev_bdf[i]; + dev_entry[i].flags = 0; + } + + ivhd->length = sizeof(*ivhd) + sizeof(*dev_entry) * PT_DEV_MAX_NR; + set_checksum(ivrs, offsetof(struct acpi_header, checksum), + ivrs->header.length); + + return ivrs; +} + static int construct_secondary_tables(unsigned long *table_ptrs, struct acpi_info *info) { @@ -206,6 +279,7 @@ static int construct_secondary_tables(un struct acpi_20_hpet *hpet; struct acpi_20_waet *waet; struct acpi_20_tcpa *tcpa; + struct acpi_40_ivrs *ivrs; unsigned char *ssdt; static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001}; uint16_t *tis_hdr; @@ -293,6 +367,13 @@ static int construct_secondary_tables(un } } + if ( !strncmp(xenstore_read("iommu", "1"), "1", 1) ) + { + ivrs = construct_ivrs(); + if ( ivrs != NULL ) + table_ptrs[nr_tables++] = (unsigned long)ivrs; + } + table_ptrs[nr_tables] = 0; return nr_tables; } diff -r f1bf84f5fbb9 -r 01d2c1d4e3b9 tools/firmware/hvmloader/pci.c --- a/tools/firmware/hvmloader/pci.c Tue Jan 10 17:40:11 2012 +0100 +++ b/tools/firmware/hvmloader/pci.c Tue Jan 10 17:40:14 2012 +0100 @@ -34,11 +34,17 @@ unsigned long pci_mem_end = PCI_MEM_END; enum virtual_vga virtual_vga = VGA_none; unsigned long igd_opregion_pgbase = 0; +/* support up to 32 passthrough devices */ +#define PT_DEV_MAX_NR 32 +uint32_t ptdev_bdf[PT_DEV_MAX_NR]; +uint32_t ptdev_nr; +uint32_t iommu_bdf; + void pci_setup(void) { uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0; uint32_t vga_devfn = 256; - uint16_t class, vendor_id, device_id; + uint16_t class, vendor_id, device_id, sub_vendor_id; unsigned int bar, pin, link, isa_irq; /* Resources assignable to PCI devices via BARs. */ @@ -72,12 +78,34 @@ void pci_setup(void) class = pci_readw(devfn, PCI_CLASS_DEVICE); vendor_id = pci_readw(devfn, PCI_VENDOR_ID); device_id = pci_readw(devfn, PCI_DEVICE_ID); + sub_vendor_id = pci_readw(devfn, PCI_SUBSYSTEM_VENDOR_ID); + if ( (vendor_id == 0xffff) && (device_id == 0xffff) ) continue; ASSERT((devfn != PCI_ISA_DEVFN) || ((vendor_id == 0x8086) && (device_id == 0x7000))); + /* Found amd iommu device. */ + if ( class == 0x0806 && vendor_id == 0x1022 ) + { + iommu_bdf = devfn; + continue; + } + /* IVRS: Detecting passthrough devices. + * sub_vendor_id != citrix && sub_vendor_id != qemu */ + if ( sub_vendor_id != 0x5853 && sub_vendor_id != 0x1af4 ) + { + /* found amd iommu device */ + if ( ptdev_nr < PT_DEV_MAX_NR ) + { + ptdev_bdf[ptdev_nr] = devfn; + ptdev_nr++; + } + else + printf("Number of passthru devices > PT_DEV_MAX_NR \n"); + } + switch ( class ) { case 0x0300:
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213617 -3600 # Node ID 2dc60e3398dd602a34ebdf92103a3957b97c02c5 # Parent 01d2c1d4e3b992997f170d95dccc2195b9206b04 libxc: add wrappers for new hypercalls Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 01d2c1d4e3b9 -r 2dc60e3398dd tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Tue Jan 10 17:40:14 2012 +0100 +++ b/tools/libxc/xc_domain.c Tue Jan 10 17:40:17 2012 +0100 @@ -1352,6 +1352,59 @@ int xc_domain_bind_pt_isa_irq( PT_IRQ_TYPE_ISA, 0, 0, 0, machine_irq)); } +int xc_domain_update_iommu_msi( + xc_interface *xch, + uint32_t domid, + uint8_t vector, + uint8_t dest, + uint8_t dest_mode, + uint8_t delivery_mode, + uint8_t trig_mode) +{ + int rc; + DECLARE_DOMCTL; + xen_domctl_guest_iommu_op_t * iommu_op; + + domctl.cmd = XEN_DOMCTL_guest_iommu_op; + domctl.domain = (domid_t)domid; + + iommu_op = &(domctl.u.guest_iommu_op); + iommu_op->op = XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI; + iommu_op->u.msi.vector = vector; + iommu_op->u.msi.dest = dest; + iommu_op->u.msi.dest_mode = dest_mode; + iommu_op->u.msi.delivery_mode = delivery_mode; + iommu_op->u.msi.trig_mode = trig_mode; + + rc = do_domctl(xch, &domctl); + return rc; +} + +int xc_domain_bind_pt_bdf(xc_interface *xch, + uint32_t domid, + uint16_t gseg, + uint16_t gbdf, + uint16_t mseg, + uint16_t mbdf) +{ + int rc; + DECLARE_DOMCTL; + xen_domctl_guest_iommu_op_t * guest_op; + + domctl.cmd = XEN_DOMCTL_guest_iommu_op; + domctl.domain = (domid_t)domid; + + guest_op = &(domctl.u.guest_iommu_op); + guest_op->op = XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF; + guest_op->u.bdf_bind.g_seg = gseg; + guest_op->u.bdf_bind.g_bdf = gbdf; + guest_op->u.bdf_bind.m_seg = mseg; + guest_op->u.bdf_bind.m_bdf = mbdf; + + rc = do_domctl(xch, &domctl); + return rc; +} + int xc_domain_memory_mapping( xc_interface *xch, uint32_t domid, diff -r 01d2c1d4e3b9 -r 2dc60e3398dd tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Tue Jan 10 17:40:14 2012 +0100 +++ b/tools/libxc/xenctrl.h Tue Jan 10 17:40:17 2012 +0100 @@ -1697,6 +1697,21 @@ int xc_domain_bind_pt_isa_irq(xc_interfa uint32_t domid, uint8_t machine_irq); +int xc_domain_bind_pt_bdf(xc_interface *xch, + uint32_t domid, + uint16_t gseg, + uint16_t gbdf, + uint16_t mseg, + uint16_t mbdf); + +int xc_domain_update_iommu_msi(xc_interface *xch, + uint32_t domid, + uint8_t vector, + uint8_t dest, + uint8_t dest_mode, + uint8_t delivery_mode, + uint8_t trig_mode); + int xc_domain_set_machine_address_size(xc_interface *xch, uint32_t domid, unsigned int width);
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 13 of 14 V3] libxl: bind virtual bdf to physical bdf after device assignment
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213620 -3600 # Node ID 9e89b6485b6c91a8d563c46c47a8d768eee7d1f2 # Parent 2dc60e3398dd602a34ebdf92103a3957b97c02c5 libxl: bind virtual bdf to physical bdf after device assignment Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 2dc60e3398dd -r 9e89b6485b6c tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Tue Jan 10 17:40:17 2012 +0100 +++ b/tools/libxl/libxl_pci.c Tue Jan 10 17:40:20 2012 +0100 @@ -735,6 +735,13 @@ out: LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_assign_device failed"); return ERROR_FAIL; } + if (LIBXL__DOMAIN_IS_TYPE(gc, domid, HVM)) { + rc = xc_domain_bind_pt_bdf(ctx->xch, domid, 0, pcidev->vdevfn, pcidev->domain, pcidev_encode_bdf(pcidev)); + if ( rc ) { + LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_bind_pt_bdf failed"); + return ERROR_FAIL; + } + } } if (!starting)
Wei Wang
2012-Jan-10 17:07 UTC
[PATCH 14 of 14 V3] libxl: Introduce a new guest config file parameter
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1326213623 -3600 # Node ID 39eb093ea89eeaa4dbff29439499f2a289291ff0 # Parent 9e89b6485b6c91a8d563c46c47a8d768eee7d1f2 libxl: Introduce a new guest config file parameter Use iommu = {1,0} to enable or disable guest iommu emulation. Default value is 0. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 9e89b6485b6c -r 39eb093ea89e tools/libxl/libxl_create.c --- a/tools/libxl/libxl_create.c Tue Jan 10 17:40:20 2012 +0100 +++ b/tools/libxl/libxl_create.c Tue Jan 10 17:40:23 2012 +0100 @@ -99,6 +99,7 @@ int libxl_init_build_info(libxl_ctx *ctx b_info->u.hvm.vpt_align = 1; b_info->u.hvm.timer_mode = 1; b_info->u.hvm.nested_hvm = 0; + b_info->u.hvm.iommu = 0; break; case LIBXL_DOMAIN_TYPE_PV: b_info->u.pv.slack_memkb = 8 * 1024; @@ -189,13 +190,15 @@ int libxl__domain_build(libxl__gc *gc, vments[4] = "start_time"; vments[5] = libxl__sprintf(gc, "%lu.%02d", start_time.tv_sec,(int)start_time.tv_usec/10000); - localents = libxl__calloc(gc, 7, sizeof(char *)); + localents = libxl__calloc(gc, 9, sizeof(char *)); localents[0] = "platform/acpi"; localents[1] = (info->u.hvm.acpi) ? "1" : "0"; localents[2] = "platform/acpi_s3"; localents[3] = (info->u.hvm.acpi_s3) ? "1" : "0"; localents[4] = "platform/acpi_s4"; localents[5] = (info->u.hvm.acpi_s4) ? "1" : "0"; + localents[6] = "iommu"; + localents[7] = (info->u.hvm.iommu) ? "1" : "0"; break; case LIBXL_DOMAIN_TYPE_PV: diff -r 9e89b6485b6c -r 39eb093ea89e tools/libxl/libxl_types.idl --- a/tools/libxl/libxl_types.idl Tue Jan 10 17:40:20 2012 +0100 +++ b/tools/libxl/libxl_types.idl Tue Jan 10 17:40:23 2012 +0100 @@ -184,6 +184,7 @@ libxl_domain_build_info = Struct("domain ("vpt_align", bool), ("timer_mode", integer), ("nested_hvm", bool), + ("iommu", bool), ])), ("pv", Struct(None, [("kernel", libxl_file_reference), ("slack_memkb", uint32), diff -r 9e89b6485b6c -r 39eb093ea89e tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Tue Jan 10 17:40:20 2012 +0100 +++ b/tools/libxl/xl_cmdimpl.c Tue Jan 10 17:40:23 2012 +0100 @@ -360,6 +360,7 @@ static void printf_info(int domid, printf("\t\t\t(vpt_align %d)\n", b_info->u.hvm.vpt_align); printf("\t\t\t(timer_mode %d)\n", b_info->u.hvm.timer_mode); printf("\t\t\t(nestedhvm %d)\n", b_info->u.hvm.nested_hvm); + printf("\t\t\t(iommu %d)\n", b_info->u.hvm.iommu); printf("\t\t\t(device_model %s)\n", dm_info->device_model ? : "default"); printf("\t\t\t(videoram %d)\n", dm_info->videoram); @@ -766,6 +767,8 @@ static void parse_config_data(const char b_info->u.hvm.timer_mode = l; if (!xlu_cfg_get_long (config, "nestedhvm", &l, 0)) b_info->u.hvm.nested_hvm = l; + if (!xlu_cfg_get_long (config, "iommu", &l, 0)) + b_info->u.hvm.iommu = l; break; case LIBXL_DOMAIN_TYPE_PV: {
Ian Jackson
2012-Jan-10 17:13 UTC
Re: [PATCH 13 of 14 V3] libxl: bind virtual bdf to physical bdf after device assignment
Wei Wang writes ("[PATCH 13 of 14 V3] libxl: bind virtual bdf to physical bdf after device assignment"):> libxl: bind virtual bdf to physical bdf after device assignmentI confess I don''t understand at all why this is needed. Ian.
Wei Wang2
2012-Jan-10 17:35 UTC
Re: [PATCH 13 of 14 V3] libxl: bind virtual bdf to physical bdf after device assignment
On Tuesday 10 January 2012 18:13:35 Ian Jackson wrote:> Wei Wang writes ("[PATCH 13 of 14 V3] libxl: bind virtual bdf to physicalbdf after device assignment"):> > libxl: bind virtual bdf to physical bdf after device assignment > > I confess I don''t understand at all why this is needed. > > Ian.Oh, the idea of this hypercall is used to allow hypervisor to trace the virtual bdf of a physical bdf. IOMMU emulator reads iommu ppr log from host buffer and dispatch them into guest log buffer. In the host log entry, physical bdf is written by iommu and needs to be changed to virtual bdf when write back to a guest. It might also be done if we could extend the interface of xc_assign_device? Thanks, Wei
Ian Campbell
2012-Jan-11 08:43 UTC
Re: [PATCH 14 of 14 V3] libxl: Introduce a new guest config file parameter
On Tue, 2012-01-10 at 17:07 +0000, Wei Wang wrote:> # HG changeset patch > # User Wei Wang <wei.wang2@amd.com> > # Date 1326213623 -3600 > # Node ID 39eb093ea89eeaa4dbff29439499f2a289291ff0 > # Parent 9e89b6485b6c91a8d563c46c47a8d768eee7d1f2 > libxl: Introduce a new guest config file parameter > Use iommu = {1,0} to enable or disable guest iommu emulation. > Default value is 0.Please patch docs/man/xl.cfg.pod.5 to explain this new option to the users, when/why they would enable it etc. A description of the hardware requirements might be useful, although perhaps not in that document. Likewise the guest OS requirements. Is there a passthru page on the wiki which could be amended?> Signed-off-by: Wei Wang <wei.wang2@amd.com>[...]> diff -r 9e89b6485b6c -r 39eb093ea89e tools/libxl/xl_cmdimpl.c > --- a/tools/libxl/xl_cmdimpl.c Tue Jan 10 17:40:20 2012 +0100 > +++ b/tools/libxl/xl_cmdimpl.c Tue Jan 10 17:40:23 2012 +0100 > @@ -360,6 +360,7 @@ static void printf_info(int domid, > printf("\t\t\t(vpt_align %d)\n", b_info->u.hvm.vpt_align); > printf("\t\t\t(timer_mode %d)\n", b_info->u.hvm.timer_mode); > printf("\t\t\t(nestedhvm %d)\n", b_info->u.hvm.nested_hvm); > + printf("\t\t\t(iommu %d)\n", b_info->u.hvm.iommu);I wonder if we should stop adding new stuff to this output, it''s for legacy users anyway. Ian.
Wei Wang2
2012-Jan-11 10:47 UTC
Re: [PATCH 14 of 14 V3] libxl: Introduce a new guest config file parameter
On Wednesday 11 January 2012 09:43:44 Ian Campbell wrote:> On Tue, 2012-01-10 at 17:07 +0000, Wei Wang wrote: > > # HG changeset patch > > # User Wei Wang <wei.wang2@amd.com> > > # Date 1326213623 -3600 > > # Node ID 39eb093ea89eeaa4dbff29439499f2a289291ff0 > > # Parent 9e89b6485b6c91a8d563c46c47a8d768eee7d1f2 > > libxl: Introduce a new guest config file parameter > > Use iommu = {1,0} to enable or disable guest iommu emulation. > > Default value is 0. > > Please patch docs/man/xl.cfg.pod.5 to explain this new option to the > users, when/why they would enable it etc.Sure, will do that> A description of the hardware requirements might be useful, although > perhaps not in that document. Likewise the guest OS requirements. Is > there a passthru page on the wiki which could be amended?I could add new section in http://wiki.xen.org/wiki/VTdHowTo describing how to use iommu emulation for ats/gpgpu passthru and the hw/sw requirement.> > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > [...] > > > diff -r 9e89b6485b6c -r 39eb093ea89e tools/libxl/xl_cmdimpl.c > > --- a/tools/libxl/xl_cmdimpl.c Tue Jan 10 17:40:20 2012 +0100 > > +++ b/tools/libxl/xl_cmdimpl.c Tue Jan 10 17:40:23 2012 +0100 > > @@ -360,6 +360,7 @@ static void printf_info(int domid, > > printf("\t\t\t(vpt_align %d)\n", b_info->u.hvm.vpt_align); > > printf("\t\t\t(timer_mode %d)\n", b_info->u.hvm.timer_mode); > > printf("\t\t\t(nestedhvm %d)\n", b_info->u.hvm.nested_hvm); > > + printf("\t\t\t(iommu %d)\n", b_info->u.hvm.iommu); > > I wonder if we should stop adding new stuff to this output, it''s for > legacy users anyway.Sounds like I could remove it.. Thanks Wei> Ian.
Jan Beulich
2012-Jan-11 15:04 UTC
Re: [PATCH 00 of 14 V3] amd iommu: support ATS device passthru on IOMMUv2 systems
>>> On 10.01.12 at 18:07, Wei Wang <wei.wang2@amd.com> wrote: > Hi all, this is patch v3. > ATS devices with PRI and PASID capabilities can communicate with iommuv2 to > perform two level (nested) address translation and demand paging for DMA. > To passthru such devices, iommu driver has to been enabled in guest OS. > This patch set adds initial iommu emulation for hvm guests to support ATS > device passthru.I would look into committing 1-6 and 10 (if that one is independent of 7-9), if you can confirm that those on their own provide meaningful benefit (enabling the ppr log probably is what I''m after, but I''d still like your confirmation - patch 3 in particular doesn''t look very useful without the later ones). So ideally the ones leading up to the ppr log enabling would all be first (or even a separate series), and the guest iommu ones would follow (as those make only sense when the tools maintainers are okay with the changes too). Jan> changes in v3: > * Use xenstore to receive guest iommu configuration instead of adding in a > new field in hvm_info_table. > * Support pci segment in vbdf to mbdf bind. > * Make hypercalls visible for non-x86 platforms. > * A few code cleanups according to comments from Jan and Ian. > > Changes in v2: > * Do not use linked list to access guest iommu tables. > * Do not parse iommu parameter in libxl_device_model_info again. > * Fix incorrect logical calculation in patch 11. > * Fix hypercall definition for non-x86 systems. > > Thanks, > Wei
Wei Wang
2012-Jan-11 17:36 UTC
Re: [PATCH 00 of 14 V3] amd iommu: support ATS device passthru on IOMMUv2 systems
Am 11.01.2012 16:04, schrieb Jan Beulich:>>>> On 10.01.12 at 18:07, Wei Wang<wei.wang2@amd.com> wrote: >> Hi all, this is patch v3. >> ATS devices with PRI and PASID capabilities can communicate with iommuv2 to >> perform two level (nested) address translation and demand paging for DMA. >> To passthru such devices, iommu driver has to been enabled in guest OS. >> This patch set adds initial iommu emulation for hvm guests to support ATS >> device passthru. > I would look into committing 1-6 and 10 (if that one is independent of > 7-9), if you can confirm that those on their own provide meaningful > benefit (enabling the ppr log probably is what I''m after, but I''d still > like your confirmation - patch 3 in particular doesn''t look very useful > without the later ones). So ideally the ones leading up to the ppr log > enabling would all be first (or even a separate series), and the guest > iommu ones would follow (as those make only sense when the tools > maintainers are okay with the changes too)Hi Jan, Thanks for doing this. It sounds great! Even without guest iommu being enabled, patch 1-6 and 10 would still be useful for turn on ppr and GT features. Patch 6 will call some functions from patch 3. If you want to leave patch 3 behind, I could send a new version to move patch 6 ahead. Thanks, Wei> Jan > >> changes in v3: >> * Use xenstore to receive guest iommu configuration instead of adding in a >> new field in hvm_info_table. >> * Support pci segment in vbdf to mbdf bind. >> * Make hypercalls visible for non-x86 platforms. >> * A few code cleanups according to comments from Jan and Ian. >> >> Changes in v2: >> * Do not use linked list to access guest iommu tables. >> * Do not parse iommu parameter in libxl_device_model_info again. >> * Fix incorrect logical calculation in patch 11. >> * Fix hypercall definition for non-x86 systems. >> >> Thanks, >> Wei > > >
Jan Beulich
2012-Jan-12 11:36 UTC
Re: [PATCH 03 of 14 V3] amd iommu: Add iommu emulation for hvm guest
>>> On 10.01.12 at 18:07, Wei Wang <wei.wang2@amd.com> wrote: > +static unsigned long get_gfn_from_base_reg(uint64_t base_raw) > +{ > + uint64_t addr_lo, addr_hi, addr64; > + > + addr_lo = iommu_get_addr_lo_from_reg(base_raw & DMA_32BIT_MASK); > + addr_hi = iommu_get_addr_hi_from_reg(base_raw >> 32); > + addr64 = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);I suppose that this isn''t really correct - addr_lo shouldn''t really need any shifting, or else base_raw would be a pretty odd entity. I''ll convert the function to use reg_to_u64() instead. While I won''t do this, I then also wonder whether the first two operations could be converted to u64_to_reg(), and if so, what the purpose of the whole function is (it would then merely shift the input value to obtain a frame number). Jan> + > + ASSERT ( addr64 != 0 ); > + > + return addr64 >> PAGE_SHIFT; > +}
Wei Wang
2012-Jan-16 10:29 UTC
Re: [PATCH 03 of 14 V3] amd iommu: Add iommu emulation for hvm guest
On 01/12/2012 12:36 PM, Jan Beulich wrote:>>>> On 10.01.12 at 18:07, Wei Wang<wei.wang2@amd.com> wrote: >> +static unsigned long get_gfn_from_base_reg(uint64_t base_raw) >> +{ >> + uint64_t addr_lo, addr_hi, addr64; >> + >> + addr_lo = iommu_get_addr_lo_from_reg(base_raw& DMA_32BIT_MASK); >> + addr_hi = iommu_get_addr_hi_from_reg(base_raw>> 32); >> + addr64 = (addr_hi<< 32) | (addr_lo<< PAGE_SHIFT); > I suppose that this isn''t really correct - addr_lo shouldn''t really > need any shifting, or else base_raw would be a pretty odd entity. > I''ll convert the function to use reg_to_u64() instead. While I > won''t do this, I then also wonder whether the first two operations > could be converted to u64_to_reg(), and if so, what the purpose > of the whole function is (it would then merely shift the input > value to obtain a frame number)The names might be confusing but actually iommu mmio regs do not cache lower 12 bit of the base addresses, so that addr_lo only contains bit 12 - bit 31 of the lower 32 bit part. That is why 12 bit left shift is needed to form a fully 64 bit address. But anyway this function seems redundant, I attached a patch to simplify it. Thanks, Wei> Jan > >> + >> + ASSERT ( addr64 != 0 ); >> + >> + return addr64>> PAGE_SHIFT; >> +} > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel