Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 00 of 16] [RFC] amd iommu: support ATS device passthru on IOMMUv2 systems
ATS devices with PRI and PASID capabilities can communicate with iommuv2 to do 2 level (nested) DMA translation and IO demand paging. To do that, both iommu driver and ats device have to been enabled in guest OS. This patch set adds initial iommu emulation for hvm guests to support ATS device passthru. Please review. Thanks, Wei
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 01 of 16] amd iommu: Refactoring iommu ring buffer definition
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323863478 -3600 # Node ID a3d2f93d82940a391de00717ddbf842f821fcea5 # Parent 846725c81ed924e03bf4e6d65f912be089c54a06 amd iommu: Refactoring iommu ring buffer definition. Introduce struct ring_buffer to represent iommu cmd buffer, event log and ppr log Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 846725c81ed9 -r a3d2f93d8294 xen/drivers/passthrough/amd/iommu_cmd.c --- a/xen/drivers/passthrough/amd/iommu_cmd.c Tue Dec 13 13:32:23 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_cmd.c Wed Dec 14 12:51:18 2011 +0100 @@ -29,7 +29,7 @@ static int queue_iommu_command(struct am u32 tail, head, *cmd_buffer; int i; - tail = iommu->cmd_buffer_tail; + tail = iommu->cmd_buffer.tail; if ( ++tail == iommu->cmd_buffer.entries ) tail = 0; @@ -40,13 +40,13 @@ static int queue_iommu_command(struct am if ( head != tail ) { cmd_buffer = (u32 *)(iommu->cmd_buffer.buffer + - (iommu->cmd_buffer_tail * + (iommu->cmd_buffer.tail * IOMMU_CMD_BUFFER_ENTRY_SIZE)); for ( i = 0; i < IOMMU_CMD_BUFFER_U32_PER_ENTRY; i++ ) cmd_buffer[i] = cmd[i]; - iommu->cmd_buffer_tail = tail; + iommu->cmd_buffer.tail = tail; return 1; } @@ -57,7 +57,7 @@ static void commit_iommu_command_buffer( { u32 tail; - set_field_in_reg_u32(iommu->cmd_buffer_tail, 0, + set_field_in_reg_u32(iommu->cmd_buffer.tail, 0, IOMMU_CMD_BUFFER_TAIL_MASK, IOMMU_CMD_BUFFER_TAIL_SHIFT, &tail); writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET); diff -r 846725c81ed9 -r a3d2f93d8294 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Tue Dec 13 13:32:23 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 12:51:18 2011 +0100 @@ -294,20 +294,20 @@ static int amd_iommu_read_event_log(stru IOMMU_EVENT_LOG_TAIL_MASK, IOMMU_EVENT_LOG_TAIL_SHIFT); - while ( tail != iommu->event_log_head ) + while ( tail != iommu->event_log.head ) { /* read event log entry */ event_log = (u32 *)(iommu->event_log.buffer + - (iommu->event_log_head * + (iommu->event_log.head * IOMMU_EVENT_LOG_ENTRY_SIZE)); parse_event_log_entry(iommu, event_log); - if ( ++iommu->event_log_head == iommu->event_log.entries ) - iommu->event_log_head = 0; + if ( ++iommu->event_log.head == iommu->event_log.entries ) + iommu->event_log.head = 0; /* update head pointer */ - set_field_in_reg_u32(iommu->event_log_head, 0, + set_field_in_reg_u32(iommu->event_log.head, 0, IOMMU_EVENT_LOG_HEAD_MASK, IOMMU_EVENT_LOG_HEAD_SHIFT, &head); writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET); @@ -346,7 +346,7 @@ static void amd_iommu_reset_event_log(st writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ - iommu->event_log_head = 0; + iommu->event_log.head = 0; set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); } @@ -605,71 +605,83 @@ static void enable_iommu(struct amd_iomm } -static void __init deallocate_iommu_table_struct( - struct table_struct *table) +static void __init deallocate_buffer(void *buf, uint32_t sz) { int order = 0; - if ( table->buffer ) + if ( buf ) { - order = get_order_from_bytes(table->alloc_size); - __free_amd_iommu_tables(table->buffer, order); - table->buffer = NULL; + order = get_order_from_bytes(sz); + __free_amd_iommu_tables(buf, order); } } -static int __init allocate_iommu_table_struct(struct table_struct *table, - const char *name) +static void __init deallocate_device_table(struct table_struct *table) { - int order = 0; - if ( table->buffer == NULL ) - { - order = get_order_from_bytes(table->alloc_size); - table->buffer = __alloc_amd_iommu_tables(order); - - if ( table->buffer == NULL ) - { - AMD_IOMMU_DEBUG("Error allocating %s\n", name); - return -ENOMEM; - } - memset(table->buffer, 0, PAGE_SIZE * (1UL << order)); - } - return 0; + deallocate_buffer(table->buffer, table->alloc_size); + table->buffer = NULL; } -static int __init allocate_cmd_buffer(struct amd_iommu *iommu) +static void __init deallocate_ring_buffer(struct ring_buffer *ring_buf) +{ + deallocate_buffer(ring_buf->buffer, ring_buf->alloc_size); + ring_buf->buffer = NULL; + ring_buf->head = 0; + ring_buf->tail = 0; +} + +static void * __init allocate_buffer(uint32_t alloc_size, const char *name) +{ + void * buffer; + int order = get_order_from_bytes(alloc_size); + + buffer = __alloc_amd_iommu_tables(order); + + if ( buffer == NULL ) + { + AMD_IOMMU_DEBUG("Error allocating %s\n", name); + return NULL; + } + + memset(buffer, 0, PAGE_SIZE * (1UL << order)); + return buffer; +} + +static void * __init allocate_ring_buffer(struct ring_buffer *ring_buf, + uint32_t entry_size, + uint64_t entries, const char *name) +{ + ring_buf->head = 0; + ring_buf->tail = 0; + + ring_buf->entry_size = entry_size; + ring_buf->alloc_size = PAGE_SIZE << get_order_from_bytes(entries * + entry_size); + ring_buf->entries = ring_buf->alloc_size / entry_size; + ring_buf->buffer = allocate_buffer(ring_buf->alloc_size, name); + return ring_buf->buffer; +} + +static void * __init allocate_cmd_buffer(struct amd_iommu *iommu) { /* allocate ''command buffer'' in power of 2 increments of 4K */ - iommu->cmd_buffer_tail = 0; - iommu->cmd_buffer.alloc_size = PAGE_SIZE << - get_order_from_bytes( - PAGE_ALIGN(IOMMU_CMD_BUFFER_DEFAULT_ENTRIES - * IOMMU_CMD_BUFFER_ENTRY_SIZE)); - iommu->cmd_buffer.entries = iommu->cmd_buffer.alloc_size / - IOMMU_CMD_BUFFER_ENTRY_SIZE; - - return (allocate_iommu_table_struct(&iommu->cmd_buffer, "Command Buffer")); + return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t), + IOMMU_CMD_BUFFER_DEFAULT_ENTRIES, + "Command Buffer"); } -static int __init allocate_event_log(struct amd_iommu *iommu) +static void * __init allocate_event_log(struct amd_iommu *iommu) { - /* allocate ''event log'' in power of 2 increments of 4K */ - iommu->event_log_head = 0; - iommu->event_log.alloc_size = PAGE_SIZE << - get_order_from_bytes( - PAGE_ALIGN(IOMMU_EVENT_LOG_DEFAULT_ENTRIES * - IOMMU_EVENT_LOG_ENTRY_SIZE)); - iommu->event_log.entries = iommu->event_log.alloc_size / - IOMMU_EVENT_LOG_ENTRY_SIZE; - - return (allocate_iommu_table_struct(&iommu->event_log, "Event Log")); + /* allocate ''event log'' in power of 2 increments of 4K */ + return allocate_ring_buffer(&iommu->event_log, sizeof(event_entry_t), + IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log"); } static int __init amd_iommu_init_one(struct amd_iommu *iommu) { - if ( allocate_cmd_buffer(iommu) != 0 ) + if ( allocate_cmd_buffer(iommu) == NULL ) goto error_out; - if ( allocate_event_log(iommu) != 0 ) + if ( allocate_event_log(iommu) == NULL ) goto error_out; if ( map_iommu_mmio_region(iommu) != 0 ) @@ -708,8 +720,8 @@ static void __init amd_iommu_init_cleanu list_del(&iommu->list); if ( iommu->enabled ) { - deallocate_iommu_table_struct(&iommu->cmd_buffer); - deallocate_iommu_table_struct(&iommu->event_log); + deallocate_ring_buffer(&iommu->cmd_buffer); + deallocate_ring_buffer(&iommu->event_log); unmap_iommu_mmio_region(iommu); } xfree(iommu); @@ -719,7 +731,7 @@ static void __init amd_iommu_init_cleanu iterate_ivrs_entries(amd_iommu_free_intremap_table); /* free device table */ - deallocate_iommu_table_struct(&device_table); + deallocate_device_table(&device_table); /* free ivrs_mappings[] */ radix_tree_destroy(&ivrs_maps, xfree); @@ -830,8 +842,10 @@ static int __init amd_iommu_setup_device device_table.entries = device_table.alloc_size / IOMMU_DEV_TABLE_ENTRY_SIZE; - if ( allocate_iommu_table_struct(&device_table, "Device Table") != 0 ) - return -ENOMEM; + device_table.buffer = allocate_buffer(device_table.alloc_size, + "Device Table"); + if ( device_table.buffer == NULL ) + return -ENOMEM; /* Add device table entries */ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) diff -r 846725c81ed9 -r a3d2f93d8294 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Tue Dec 13 13:32:23 2011 +0100 +++ b/xen/include/asm-x86/amd-iommu.h Wed Dec 14 12:51:18 2011 +0100 @@ -30,12 +30,43 @@ extern struct list_head amd_iommu_head; +#pragma pack(1) +typedef struct event_entry +{ + uint32_t data[4]; +} event_entry_t; + +typedef struct ppr_entry +{ + uint32_t data[4]; +} ppr_entry_t; + +typedef struct cmd_entry +{ + uint32_t data[4]; +} cmd_entry_t; + +typedef struct dev_entry +{ + uint32_t data[8]; +} dev_entry_t; +#pragma pack() + struct table_struct { void *buffer; unsigned long entries; unsigned long alloc_size; }; +struct ring_buffer { + void *buffer; + unsigned long entries; + unsigned long alloc_size; + unsigned long entry_size; + uint32_t tail; + uint32_t head; +}; + typedef struct iommu_cap { uint32_t header; /* offset 00h */ uint32_t base_low; /* offset 04h */ @@ -60,10 +91,8 @@ struct amd_iommu { unsigned long mmio_base_phys; struct table_struct dev_table; - struct table_struct cmd_buffer; - u32 cmd_buffer_tail; - struct table_struct event_log; - u32 event_log_head; + struct ring_buffer cmd_buffer; + struct ring_buffer event_log; int exclusion_enable; int exclusion_allow_all;
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 02 of 16] amd iommu: Introduces new helper functions to simplify iommu bitwise operations
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323863482 -3600 # Node ID b190e3362524a0e160e9892cd600447cee2a022e # Parent a3d2f93d82940a391de00717ddbf842f821fcea5 amd iommu: Introduces new helper functions to simplify iommu bitwise operations Signed-off-by: Wei Wang <wei.wang2@amd.com diff -r a3d2f93d8294 -r b190e3362524 xen/drivers/passthrough/amd/iommu_cmd.c --- a/xen/drivers/passthrough/amd/iommu_cmd.c Wed Dec 14 12:51:18 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_cmd.c Wed Dec 14 12:51:22 2011 +0100 @@ -33,10 +33,8 @@ static int queue_iommu_command(struct am if ( ++tail == iommu->cmd_buffer.entries ) tail = 0; - head = get_field_from_reg_u32(readl(iommu->mmio_base + - IOMMU_CMD_BUFFER_HEAD_OFFSET), - IOMMU_CMD_BUFFER_HEAD_MASK, - IOMMU_CMD_BUFFER_HEAD_SHIFT); + head = iommu_get_rb_pointer(readl(iommu->mmio_base + + IOMMU_CMD_BUFFER_HEAD_OFFSET)); if ( head != tail ) { cmd_buffer = (u32 *)(iommu->cmd_buffer.buffer + @@ -55,11 +53,9 @@ static int queue_iommu_command(struct am static void commit_iommu_command_buffer(struct amd_iommu *iommu) { - u32 tail; + u32 tail = 0; - set_field_in_reg_u32(iommu->cmd_buffer.tail, 0, - IOMMU_CMD_BUFFER_TAIL_MASK, - IOMMU_CMD_BUFFER_TAIL_SHIFT, &tail); + iommu_set_rb_pointer(&tail, iommu->cmd_buffer.tail); writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET); } diff -r a3d2f93d8294 -r b190e3362524 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 12:51:18 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 12:51:22 2011 +0100 @@ -106,21 +106,21 @@ static void register_iommu_dev_table_in_ u64 addr_64, addr_lo, addr_hi; u32 entry; + ASSERT( iommu->dev_table.buffer ); + addr_64 = (u64)virt_to_maddr(iommu->dev_table.buffer); addr_lo = addr_64 & DMA_32BIT_MASK; addr_hi = addr_64 >> 32; - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_DEV_TABLE_BASE_LOW_MASK, - IOMMU_DEV_TABLE_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); set_field_in_reg_u32((iommu->dev_table.alloc_size / PAGE_SIZE) - 1, entry, IOMMU_DEV_TABLE_SIZE_MASK, IOMMU_DEV_TABLE_SIZE_SHIFT, &entry); writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_LOW_OFFSET); - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_DEV_TABLE_BASE_HIGH_MASK, - IOMMU_DEV_TABLE_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_HIGH_OFFSET); } @@ -130,21 +130,21 @@ static void register_iommu_cmd_buffer_in u32 power_of2_entries; u32 entry; + ASSERT( iommu->dev_table.buffer ); + addr_64 = (u64)virt_to_maddr(iommu->cmd_buffer.buffer); addr_lo = addr_64 & DMA_32BIT_MASK; addr_hi = addr_64 >> 32; - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_CMD_BUFFER_BASE_LOW_MASK, - IOMMU_CMD_BUFFER_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); writel(entry, iommu->mmio_base + IOMMU_CMD_BUFFER_BASE_LOW_OFFSET); power_of2_entries = get_order_from_bytes(iommu->cmd_buffer.alloc_size) + IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE; - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_CMD_BUFFER_BASE_HIGH_MASK, - IOMMU_CMD_BUFFER_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); set_field_in_reg_u32(power_of2_entries, entry, IOMMU_CMD_BUFFER_LENGTH_MASK, IOMMU_CMD_BUFFER_LENGTH_SHIFT, &entry); @@ -157,21 +157,21 @@ static void register_iommu_event_log_in_ u32 power_of2_entries; u32 entry; + ASSERT( iommu->dev_table.buffer ); + addr_64 = (u64)virt_to_maddr(iommu->event_log.buffer); addr_lo = addr_64 & DMA_32BIT_MASK; addr_hi = addr_64 >> 32; - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_EVENT_LOG_BASE_LOW_MASK, - IOMMU_EVENT_LOG_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); writel(entry, iommu->mmio_base + IOMMU_EVENT_LOG_BASE_LOW_OFFSET); power_of2_entries = get_order_from_bytes(iommu->event_log.alloc_size) + IOMMU_EVENT_LOG_POWER_OF2_ENTRIES_PER_PAGE; - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_EVENT_LOG_BASE_HIGH_MASK, - IOMMU_EVENT_LOG_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); set_field_in_reg_u32(power_of2_entries, entry, IOMMU_EVENT_LOG_LENGTH_MASK, IOMMU_EVENT_LOG_LENGTH_SHIFT, &entry); @@ -234,14 +234,12 @@ static void register_iommu_exclusion_ran addr_lo = iommu->exclusion_base & DMA_32BIT_MASK; addr_hi = iommu->exclusion_base >> 32; - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_EXCLUSION_BASE_HIGH_MASK, - IOMMU_EXCLUSION_BASE_HIGH_SHIFT, &entry); + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_HIGH_OFFSET); - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, - IOMMU_EXCLUSION_BASE_LOW_MASK, - IOMMU_EXCLUSION_BASE_LOW_SHIFT, &entry); + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); set_field_in_reg_u32(iommu->exclusion_allow_all, entry, IOMMU_EXCLUSION_ALLOW_ALL_MASK, @@ -490,9 +488,7 @@ static void parse_event_log_entry(struct if ( code == IOMMU_EVENT_IO_PAGE_FAULT ) { - device_id = get_field_from_reg_u32(entry[0], - IOMMU_EVENT_DEVICE_ID_MASK, - IOMMU_EVENT_DEVICE_ID_SHIFT); + device_id = iommu_get_devid_from_event(entry[0]); domain_id = get_field_from_reg_u32(entry[1], IOMMU_EVENT_DOMAIN_ID_MASK, IOMMU_EVENT_DOMAIN_ID_SHIFT); diff -r a3d2f93d8294 -r b190e3362524 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Wed Dec 14 12:51:18 2011 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Wed Dec 14 12:51:22 2011 +0100 @@ -191,5 +191,85 @@ static inline int iommu_has_feature(stru return 0; return !!(iommu->features & (1U << bit)); } +/* access tail or head pointer of ring buffer */ +#define IOMMU_RING_BUFFER_PTR_MASK 0x0007FFF0 +#define IOMMU_RING_BUFFER_PTR_SHIFT 4 +static inline uint32_t iommu_get_rb_pointer(uint32_t reg) +{ + return get_field_from_reg_u32(reg, IOMMU_RING_BUFFER_PTR_MASK, + IOMMU_RING_BUFFER_PTR_SHIFT); +} + +static inline void iommu_set_rb_pointer(uint32_t *reg, uint32_t val) +{ + set_field_in_reg_u32(val, *reg, IOMMU_RING_BUFFER_PTR_MASK, + IOMMU_RING_BUFFER_PTR_SHIFT, reg); +} + +/* access device field from iommu cmd */ +#define IOMMU_CMD_DEVICE_ID_MASK 0x0000FFFF +#define IOMMU_CMD_DEVICE_ID_SHIFT 0 + +static inline uint16_t iommu_get_devid_from_cmd(uint32_t cmd) +{ + return get_field_from_reg_u32(cmd, IOMMU_CMD_DEVICE_ID_MASK, + IOMMU_CMD_DEVICE_ID_SHIFT); +} + +static inline void iommu_set_devid_to_cmd(uint32_t *cmd, uint16_t id) +{ + set_field_in_reg_u32(id, *cmd, IOMMU_CMD_DEVICE_ID_MASK, + IOMMU_CMD_DEVICE_ID_SHIFT, cmd); +} + +/* access address field from iommu cmd */ +#define IOMMU_CMD_ADDR_LOW_MASK 0xFFFFF000 +#define IOMMU_CMD_ADDR_LOW_SHIFT 12 +#define IOMMU_CMD_ADDR_HIGH_MASK 0xFFFFFFFF +#define IOMMU_CMD_ADDR_HIGH_SHIFT 0 + +static inline uint32_t iommu_get_addr_lo_from_cmd(uint32_t cmd) +{ + return get_field_from_reg_u32(cmd, IOMMU_CMD_ADDR_LOW_MASK, + IOMMU_CMD_ADDR_LOW_SHIFT); +} + +static inline uint32_t iommu_get_addr_hi_from_cmd(uint32_t cmd) +{ + return get_field_from_reg_u32(cmd, IOMMU_CMD_ADDR_LOW_MASK, + IOMMU_CMD_ADDR_HIGH_SHIFT); +} + +#define iommu_get_devid_from_event iommu_get_devid_from_cmd + +/* access iommu base addresses from mmio regs */ +#define IOMMU_REG_BASE_ADDR_BASE_LOW_MASK 0xFFFFF000 +#define IOMMU_REG_BASE_ADDR_LOW_SHIFT 12 +#define IOMMU_REG_BASE_ADDR_HIGH_MASK 0x000FFFFF +#define IOMMU_REG_BASE_ADDR_HIGH_SHIFT 0 + +static inline void iommu_set_addr_lo_to_reg(uint32_t *reg, uint32_t addr) +{ + set_field_in_reg_u32(addr, *reg, IOMMU_REG_BASE_ADDR_BASE_LOW_MASK, + IOMMU_REG_BASE_ADDR_LOW_SHIFT, reg); +} + +static inline void iommu_set_addr_hi_to_reg(uint32_t *reg, uint32_t addr) +{ + set_field_in_reg_u32(addr, *reg, IOMMU_REG_BASE_ADDR_HIGH_MASK, + IOMMU_REG_BASE_ADDR_HIGH_SHIFT, reg); +} + +static inline uint32_t iommu_get_addr_lo_from_reg(uint32_t reg) +{ + return get_field_from_reg_u32(reg, IOMMU_REG_BASE_ADDR_BASE_LOW_MASK, + IOMMU_REG_BASE_ADDR_LOW_SHIFT); +} + +static inline uint32_t iommu_get_addr_hi_from_reg(uint32_t reg) +{ + return get_field_from_reg_u32(reg, IOMMU_REG_BASE_ADDR_HIGH_MASK, + IOMMU_REG_BASE_ADDR_HIGH_SHIFT); +} #endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 03 of 16] amd iommu: Add iommu emulation for hvm guest
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323863897 -3600 # Node ID ea52a2b93dffe708084fdc6ee663bd5eee8c1031 # Parent b190e3362524a0e160e9892cd600447cee2a022e amd iommu: Add iommu emulation for hvm guest ATS device driver that support PASID [1] and PRI [2] capabilites needs to work with iommu driver in OS. If we want passthru ATS device to hvm guests using unmodified OS, we have to expose iommu functionality to HVM guest. Signed-off-by: Wei Wang <wei.wang2@amd.com> [1] http://www.pcisig.com/specifications/pciexpress/specifications/ECN-PASID-ATS-2011-03-31.pdf [2] http://www.pcisig.com/members/downloads/specifications/iov/ats_r1.1_26Jan09.pdf diff -r b190e3362524 -r ea52a2b93dff xen/drivers/passthrough/amd/Makefile --- a/xen/drivers/passthrough/amd/Makefile Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/drivers/passthrough/amd/Makefile Wed Dec 14 12:58:17 2011 +0100 @@ -5,3 +5,4 @@ obj-y += pci_amd_iommu.o obj-bin-y += iommu_acpi.init.o obj-y += iommu_intr.o obj-y += iommu_cmd.o +obj-y += iommu_guest.o diff -r b190e3362524 -r ea52a2b93dff xen/drivers/passthrough/amd/iommu_cmd.c --- a/xen/drivers/passthrough/amd/iommu_cmd.c Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_cmd.c Wed Dec 14 12:58:17 2011 +0100 @@ -398,3 +398,15 @@ void amd_iommu_flush_all_caches(struct a invalidate_iommu_all(iommu); flush_command_buffer(iommu); } + +void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[]) +{ + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + send_iommu_command(iommu, cmd); + flush_command_buffer(iommu); + + spin_unlock_irqrestore(&iommu->lock, flags); +} diff -r b190e3362524 -r ea52a2b93dff xen/drivers/passthrough/amd/iommu_guest.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 12:58:17 2011 +0100 @@ -0,0 +1,953 @@ +/* + * Copyright (C) 2011 Advanced Micro Devices, Inc. + * Author: Wei Wang <wei.wang2@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <xen/sched.h> +#include <asm/p2m.h> +#include <asm/hvm/iommu.h> +#include <asm/amd-iommu.h> +#include <asm/hvm/svm/amd-iommu-proto.h> + + +#define IOMMU_MMIO_SIZE 0x8000 +#define IOMMU_MMIO_PAGE_NR 0x8 +#define RING_BF_LENGTH_MASK 0x0F000000 +#define RING_BF_LENGTH_SHIFT 24 + +#define PASMAX_9_bit 0x8 +#define GUEST_CR3_1_LEVEL 0x0 +#define GUEST_ADDRESS_SIZE_6_LEVEL 0x2 +#define HOST_ADDRESS_SIZE_6_LEVEL 0x2 + +#define guest_iommu_set_status(iommu, bit) \ + iommu_set_bit(&((iommu)->reg_status.lo), bit) + +#define guest_iommu_clear_status(iommu, bit) \ + iommu_clear_bit(&((iommu)->reg_status.lo), bit) + +#define reg_to_u64(reg) (((uint64_t)reg.hi << 32) | reg.lo ) +#define u64_to_reg(reg, val) \ + do \ + { \ + (reg)->lo = val & 0xFFFFFFFF; \ + (reg)->hi = (val >> 32) & 0xFFFFFFFF; \ + } while(0) + +static unsigned int machine_bdf(struct domain *d, uint16_t guest_bdf) +{ + return guest_bdf; +} + +static uint16_t guest_bdf(struct domain *d, uint16_t machine_bdf) +{ + return machine_bdf; +} + +static inline struct guest_iommu *domain_iommu(struct domain *d) +{ + return domain_hvm_iommu(d)->g_iommu; +} + +static inline struct guest_iommu *vcpu_iommu(struct vcpu *v) +{ + return domain_hvm_iommu(v->domain)->g_iommu; +} + +static void guest_iommu_enable(struct guest_iommu *iommu) +{ + iommu->enabled = 1; +} + +static void guest_iommu_disable(struct guest_iommu *iommu) +{ + iommu->enabled = 0; +} + +static uint64_t get_guest_cr3_from_dte(dev_entry_t *dte) +{ + uint64_t gcr3_1, gcr3_2, gcr3_3; + + gcr3_1 = get_field_from_reg_u32(dte->data[1], + IOMMU_DEV_TABLE_GCR3_1_MASK, + IOMMU_DEV_TABLE_GCR3_1_SHIFT); + gcr3_2 = get_field_from_reg_u32(dte->data[2], + IOMMU_DEV_TABLE_GCR3_2_MASK, + IOMMU_DEV_TABLE_GCR3_2_SHIFT); + gcr3_3 = get_field_from_reg_u32(dte->data[3], + IOMMU_DEV_TABLE_GCR3_3_MASK, + IOMMU_DEV_TABLE_GCR3_3_SHIFT); + + return ((gcr3_3 << 31) | (gcr3_2 << 15 ) | (gcr3_1 << 12)) >> PAGE_SHIFT; +} + +static uint16_t get_domid_from_dte(dev_entry_t *dte) +{ + return get_field_from_reg_u32(dte->data[2], IOMMU_DEV_TABLE_DOMAIN_ID_MASK, + IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT); +} + +static uint16_t get_glx_from_dte(dev_entry_t *dte) +{ + return get_field_from_reg_u32(dte->data[1], IOMMU_DEV_TABLE_GLX_MASK, + IOMMU_DEV_TABLE_GLX_SHIFT); +} + +static uint16_t get_gv_from_dte(dev_entry_t *dte) +{ + return get_field_from_reg_u32(dte->data[1],IOMMU_DEV_TABLE_GV_MASK, + IOMMU_DEV_TABLE_GV_SHIFT); +} + +static unsigned int host_domid(struct domain *d, uint64_t g_domid) +{ + /* Only support one PPR device in guest for now */ + return d->domain_id; +} + +static void guest_iommu_deliver_msi(struct domain *d) +{ + uint8_t vector, dest, dest_mode, delivery_mode, trig_mode; + struct guest_iommu *iommu = domain_iommu(d); + + vector = iommu->msi.vector; + dest = iommu->msi.dest; + dest_mode = iommu->msi.dest_mode; + delivery_mode = iommu->msi.delivery_mode; + trig_mode = iommu->msi.trig_mode; + + vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode); +} + +static struct page_info* guest_iommu_get_page(struct list_head *pglist, + unsigned int entry_size, + unsigned int pos) +{ + int idx; + struct list_head *head; + struct guest_pages *gpage = NULL; + + idx = (pos * entry_size) >> PAGE_SHIFT; + list_for_each( head, pglist ) + { + gpage = list_entry(head, struct guest_pages, list); + if ( (--idx) < 0 ) + break; + } + return (gpage) ? gpage->page : NULL; +} + +static void guest_iommu_map_table(uint64_t base_raw, struct list_head *head, + struct domain *d, unsigned long npages) +{ + struct guest_pages * gpage; + uint64_t addr_lo, addr_hi, addr64; + unsigned long gfn; + p2m_type_t p2mt; + + addr_lo = iommu_get_addr_lo_from_reg(base_raw & DMA_32BIT_MASK); + addr_hi = iommu_get_addr_hi_from_reg(base_raw >> 32); + addr64 = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); + + ASSERT ( addr64 != 0 && head != NULL ); + + gfn = addr64 >> PAGE_SHIFT; + + /* + * map guest table pages into Xen + * Assuming guest table contiguous in guest space + */ + for ( int i = 0; i < npages; i++ ) + { + gpage = (struct guest_pages *) xzalloc(struct guest_pages); + if ( unlikely(gpage == NULL) ) + { + AMD_IOMMU_DEBUG("Cannot allocate guest_pages struct\n"); + return; + } + + gpage->page = mfn_to_page(mfn_x(get_gfn(d, gfn, &p2mt))); + list_add_tail(&gpage->list, head); + put_gfn(d,gfn); + gfn ++; + } +} + +static void guest_iommu_enable_dev_table(struct guest_iommu *iommu) +{ + unsigned int npages; + uint32_t length_raw = get_field_from_reg_u32(iommu->dev_table.reg_base.lo, + IOMMU_DEV_TABLE_SIZE_MASK, + IOMMU_DEV_TABLE_SIZE_SHIFT); + + iommu->dev_table.size = (length_raw + 1) * PAGE_SIZE; + npages = PAGE_ALIGN(iommu->dev_table.size) >> PAGE_SHIFT; + + guest_iommu_map_table(reg_to_u64(iommu->dev_table.reg_base), + &iommu->dev_table.page_list, iommu->domain, npages); +} + +static void guest_iommu_enable_ring_buffer(struct guest_iommu *iommu, + struct guest_buffer *buffer, + uint32_t entry_size) +{ + unsigned npages; + uint32_t length_raw = get_field_from_reg_u32(buffer->reg_base.hi, + RING_BF_LENGTH_MASK, + RING_BF_LENGTH_SHIFT); + buffer->entries = 1 << length_raw; + npages = PAGE_ALIGN(buffer->entries * entry_size) >> PAGE_SHIFT; + + guest_iommu_map_table(reg_to_u64(buffer->reg_base), + &buffer->page_list, iommu->domain, npages); +} + +void guest_iommu_add_ppr_log(struct domain *d, u32 entry[]) +{ + uint16_t gdev_id; + ppr_entry_t *log, *log_base; + unsigned int tail; + struct guest_iommu *iommu; + struct page_info *page = NULL; + + iommu = domain_iommu(d); + tail = iommu_get_rb_pointer(iommu->ppr_log.reg_tail.lo); + + page = guest_iommu_get_page(&iommu->ppr_log.page_list, + sizeof(ppr_entry_t), tail); + if ( unlikely(page == NULL) ) + { + AMD_IOMMU_DEBUG("Error: Cannot access guest event log pages\n"); + return; + } + + log_base = __map_domain_page(page); + log = log_base + tail % (PAGE_SIZE / sizeof(ppr_entry_t)); + + /* Convert physical device id back into virtual device id */ + gdev_id = guest_bdf(d, iommu_get_devid_from_cmd(entry[0])); + iommu_set_devid_to_cmd(&entry[0], gdev_id); + + memcpy(log, entry, sizeof(ppr_entry_t)); + + /* Now shift ppr log tail pointer */ + tail++; + iommu_set_rb_pointer(&iommu->ppr_log.reg_tail.lo, tail); + unmap_domain_page(log_base); + + guest_iommu_deliver_msi(d); +} + +void guest_iommu_add_event_log(struct domain *d, u32 entry[]) +{ + uint16_t dev_id; + struct event_entry *log, *log_base; + unsigned int tail; + struct guest_iommu *iommu; + struct page_info *page = NULL; + + iommu = domain_iommu(d); + tail = iommu_get_rb_pointer(iommu->event_log.reg_tail.lo); + + page = guest_iommu_get_page(&iommu->event_log.page_list, + sizeof(event_entry_t), tail); + if ( unlikely(page == NULL) ) + { + AMD_IOMMU_DEBUG("Error: Cannot access guest event log pages\n"); + return; + } + + log_base = __map_domain_page(page); + log = log_base + tail % (PAGE_SIZE / sizeof(event_entry_t)); + + /* re-write physical device id into virtual device id */ + dev_id = guest_bdf(d, iommu_get_devid_from_cmd(entry[0])); + iommu_set_devid_to_cmd(&entry[0], dev_id); + memcpy(log, entry, sizeof(event_entry_t)); + + /* Now shift event log tail pointer */ + tail++; + iommu_set_rb_pointer(&iommu->event_log.reg_tail.lo, tail); + unmap_domain_page(log_base); + + guest_iommu_deliver_msi(d); +} + +static int do_complete_ppr_request(struct domain *d, cmd_entry_t *cmd) +{ + uint16_t dev_id; + struct amd_iommu *iommu; + + dev_id = machine_bdf(d, iommu_get_devid_from_cmd(cmd->data[0])); + iommu = find_iommu_for_device(0, dev_id); + + if ( !iommu ) + { + AMD_IOMMU_DEBUG("%s Fail to find iommu for bdf %x\n", + __func__, dev_id); + return -ENODEV; + } + + /* replace virtual device id into physical */ + iommu_set_devid_to_cmd(&cmd->data[0], dev_id); + amd_iommu_send_guest_cmd(iommu, cmd->data); + + return 0; +} + +static int do_invalidate_pages(struct domain *d, cmd_entry_t *cmd) +{ + uint16_t gdom_id, hdom_id; + struct amd_iommu *iommu = NULL; + + gdom_id = get_field_from_reg_u32(cmd->data[1], + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_MASK, + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_SHIFT); + + hdom_id = host_domid(d, gdom_id); + set_field_in_reg_u32(hdom_id, cmd->data[1], + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_MASK, + IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_SHIFT, &cmd->data[1]); + + for_each_amd_iommu ( iommu ) + amd_iommu_send_guest_cmd(iommu, cmd->data); + + return 0; +} + +static int do_invalidate_all(struct domain *d, cmd_entry_t *cmd) +{ + struct amd_iommu *iommu = NULL; + + for_each_amd_iommu ( iommu ) + amd_iommu_flush_all_pages(d); + + return 0; +} + +static int do_invalidate_iotlb_pages(struct domain *d, cmd_entry_t *cmd) +{ + struct amd_iommu *iommu; + uint16_t dev_id; + + dev_id = machine_bdf(d, iommu_get_devid_from_cmd(cmd->data[0])); + + iommu = find_iommu_for_device(0, dev_id); + if ( !iommu ) + { + AMD_IOMMU_DEBUG("%s Fail to find iommu for bdf %x\n", + __func__, dev_id); + return -ENODEV; + } + + iommu_set_devid_to_cmd(&cmd->data[0], dev_id); + amd_iommu_send_guest_cmd(iommu, cmd->data); + + return 0; +} + +static int do_completion_wait(struct domain *d, cmd_entry_t *cmd) +{ + bool_t com_wait_int_en, com_wait_int, i, s; + struct guest_iommu *iommu; + unsigned long gfn; + p2m_type_t p2mt; + + iommu = domain_iommu(d); + + i = iommu_get_bit(cmd->data[0], IOMMU_COMP_WAIT_I_FLAG_SHIFT); + s = iommu_get_bit(cmd->data[0], IOMMU_COMP_WAIT_S_FLAG_SHIFT); + + if ( i ) + guest_iommu_set_status(iommu, IOMMU_STATUS_COMP_WAIT_INT_SHIFT); + + if ( s ) + { + uint64_t gaddr_lo, gaddr_hi, gaddr_64, data; + void *vaddr; + + data = (uint64_t) cmd->data[3] << 32 | cmd->data[2]; + gaddr_lo = get_field_from_reg_u32(cmd->data[0], + IOMMU_COMP_WAIT_ADDR_LOW_MASK, + IOMMU_COMP_WAIT_ADDR_LOW_SHIFT); + gaddr_hi = get_field_from_reg_u32(cmd->data[1], + IOMMU_COMP_WAIT_ADDR_HIGH_MASK, + IOMMU_COMP_WAIT_ADDR_HIGH_SHIFT); + + gaddr_64 = (gaddr_hi << 32) | (gaddr_lo << 3); + + gfn = gaddr_64 >> PAGE_SHIFT; + vaddr = map_domain_page(mfn_x(get_gfn(d, gfn ,&p2mt))); + put_gfn(d, gfn); + + write_u64_atomic((uint64_t*)(vaddr + (gaddr_64 & (PAGE_SIZE-1))), data); + unmap_domain_page(vaddr); + } + + com_wait_int_en = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_COMP_WAIT_INT_SHIFT); + com_wait_int = iommu_get_bit(iommu->reg_status.lo, + IOMMU_STATUS_COMP_WAIT_INT_SHIFT); + + if ( com_wait_int_en && com_wait_int ) + guest_iommu_deliver_msi(d); + + return 0; +} + +static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd) +{ + uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id; + dev_entry_t *gdte, *mdte, *dte_base; + struct amd_iommu *iommu = NULL; + struct page_info *dte_page = NULL; + struct guest_iommu *g_iommu; + uint64_t gcr3_gfn, gcr3_mfn; + uint8_t glx, gv; + unsigned long flags; + p2m_type_t p2mt; + + g_iommu = domain_iommu(d); + gbdf = iommu_get_devid_from_cmd(cmd->data[0]); + mbdf = machine_bdf(d, gbdf); + + /* Guest can only update DTEs for its passthru devices */ + if ( mbdf == 0 || gbdf == 0 ) + return 0; + + /* Sometimes guest invalidates devices from non-exists dtes */ + if ( (gbdf * sizeof(dev_entry_t)) > g_iommu->dev_table.size ) + return 0; + + dte_page = guest_iommu_get_page(&g_iommu->dev_table.page_list, + sizeof(dev_entry_t), gbdf); + if ( unlikely(dte_page == NULL) ) + { + AMD_IOMMU_DEBUG("Error: Cannot access guest device table\n"); + return -ENOMEM ; + } + + dte_base = __map_domain_page(dte_page); + + gdte = dte_base + gbdf % (PAGE_SIZE / sizeof(dev_entry_t)); + + gdom_id = get_domid_from_dte(gdte); + gcr3_gfn = get_guest_cr3_from_dte(gdte); + + /* Do not update host dte before gcr3 has been set */ + if ( gcr3_gfn == 0 ) + return 0; + + gcr3_mfn = mfn_x(get_gfn(d, gcr3_gfn, &p2mt)); + put_gfn(d, gcr3_gfn); + + ASSERT(mfn_valid(gcr3_mfn)); + + /* Read guest dte information */ + iommu = find_iommu_for_device(0, mbdf); + if ( !iommu ) + { + AMD_IOMMU_DEBUG("%s Fail to find iommu!\n",__func__); + return -ENODEV; + } + + glx = get_glx_from_dte(gdte); + gv = get_gv_from_dte(gdte); + + unmap_domain_page(dte_base); + + /* Setup host device entry */ + hdom_id = host_domid(d, gdom_id); + req_id = get_dma_requestor_id(iommu->seg, mbdf); + mdte = iommu->dev_table.buffer + (req_id * sizeof(dev_entry_t)); + + spin_lock_irqsave(&iommu->lock, flags); + iommu_dte_set_guest_cr3((u32*)mdte, hdom_id, + gcr3_mfn << PAGE_SHIFT, gv, glx); + + amd_iommu_flush_device(iommu, req_id); + spin_unlock_irqrestore(&iommu->lock, flags); + + return 0; +} + +static void guest_iommu_process_command(unsigned long _d) +{ + unsigned int opcode, tail, head, entries_per_page; + cmd_entry_t *cmd, *cmd_base; + struct domain *d; + struct guest_iommu *iommu; + struct page_info *cmd_page = NULL; + + d = (struct domain*) _d; + iommu = domain_iommu(d); + + if ( !iommu->enabled ) + return; + + head = iommu_get_rb_pointer(iommu->cmd_buffer.reg_head.lo); + tail = iommu_get_rb_pointer(iommu->cmd_buffer.reg_tail.lo); + + /* Tail pointer is rolled over by guest driver, value outside + * cmd_buffer_entries cause iommu disabled + */ + entries_per_page = PAGE_SIZE / sizeof(cmd_entry_t); + + while ( head != tail ) + { + int ret = 0; + cmd_page = guest_iommu_get_page(&iommu->cmd_buffer.page_list, + sizeof(cmd_entry_t), head); + if ( unlikely(cmd_page == NULL) ) + { + AMD_IOMMU_DEBUG("Error: cannot access guest cmd buffer head = %d\n", + head); + return; + } + + cmd_base = __map_domain_page(cmd_page); + cmd = cmd_base + head % entries_per_page; + + opcode = get_field_from_reg_u32(cmd->data[1], + IOMMU_CMD_OPCODE_MASK, + IOMMU_CMD_OPCODE_SHIFT); + switch ( opcode ) + { + case IOMMU_CMD_COMPLETION_WAIT: + ret = do_completion_wait(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_DEVTAB_ENTRY: + ret = do_invalidate_dte(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_IOMMU_PAGES: + ret = do_invalidate_pages(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_IOTLB_PAGES: + ret = do_invalidate_iotlb_pages(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_INT_TABLE: + break; + case IOMMU_CMD_COMPLETE_PPR_REQUEST: + ret = do_complete_ppr_request(d, cmd); + break; + case IOMMU_CMD_INVALIDATE_IOMMU_ALL: + ret = do_invalidate_all(d, cmd); + break; + default: + AMD_IOMMU_DEBUG("CMD: Unknown command cmd_type = %x " + "head = %d\n", opcode, head); + break; + } + + unmap_domain_page(cmd_base); + if ( (++head) >= iommu->cmd_buffer.entries ) + head = 0; + if ( ret ) + guest_iommu_disable(iommu); + } + + /* Now shift cmd buffer head pointer */ + iommu_set_rb_pointer(&iommu->cmd_buffer.reg_head.lo, head); + return; +} + +static int guest_iommu_write_ctrl(struct guest_iommu *iommu, uint64_t newctrl) +{ + bool_t cmd_en, event_en, iommu_en, ppr_en, ppr_log_en; + bool_t cmd_en_old, event_en_old, iommu_en_old; + bool_t cmd_run; + + iommu_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT); + iommu_en_old = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT); + + cmd_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT); + cmd_en_old = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT); + cmd_run = iommu_get_bit(iommu->reg_status.lo, + + IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT); + event_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT); + event_en_old = iommu_get_bit(iommu->reg_ctrl.lo, + IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT); + + ppr_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_PPR_ENABLE_SHIFT); + ppr_log_en = iommu_get_bit(newctrl, + IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); + + if ( iommu_en ) + { + guest_iommu_enable(iommu); + guest_iommu_enable_dev_table(iommu); + } + + if ( iommu_en && cmd_en ) + { + guest_iommu_enable_ring_buffer(iommu, &iommu->cmd_buffer, + sizeof(cmd_entry_t)); + /* Enable iommu command processing */ + tasklet_schedule(&iommu->cmd_buffer_tasklet); + } + + if ( iommu_en && event_en ) + { + guest_iommu_enable_ring_buffer(iommu, &iommu->event_log, + sizeof(event_entry_t)); + guest_iommu_set_status(iommu, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT); + guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT); + } + + if ( iommu_en && ppr_en && ppr_log_en ) + { + guest_iommu_enable_ring_buffer(iommu, &iommu->ppr_log, + sizeof(ppr_entry_t)); + guest_iommu_set_status(iommu, IOMMU_STATUS_PPR_LOG_RUN_SHIFT); + guest_iommu_clear_status(iommu, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT); + } + + if ( iommu_en && cmd_en_old && !cmd_en ) + { + /* Disable iommu command processing */ + tasklet_kill(&iommu->cmd_buffer_tasklet); + } + + if ( event_en_old && !event_en ) + { + guest_iommu_clear_status(iommu, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT); + } + + if ( !iommu_en && iommu_en_old ) + { + guest_iommu_disable(iommu); + } + + iommu->reg_ctrl.lo = newctrl & 0xffffffff; + iommu->reg_ctrl.hi = newctrl >> 32; + return 0; +} + +static uint64_t iommu_mmio_read64(struct guest_iommu *iommu, + unsigned long offset) +{ + uint64_t val; + + switch ( offset ) + { + case IOMMU_DEV_TABLE_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->dev_table.reg_base); + break; + case IOMMU_CMD_BUFFER_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->cmd_buffer.reg_base); + break; + case IOMMU_EVENT_LOG_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->event_log.reg_base); + break; + case IOMMU_PPR_LOG_BASE_LOW_OFFSET: + val = reg_to_u64(iommu->ppr_log.reg_base); + break; + case IOMMU_CMD_BUFFER_HEAD_OFFSET: + val = reg_to_u64(iommu->cmd_buffer.reg_head); + break; + case IOMMU_CMD_BUFFER_TAIL_OFFSET: + val = reg_to_u64(iommu->cmd_buffer.reg_tail); + break; + case IOMMU_EVENT_LOG_HEAD_OFFSET:; + val = reg_to_u64(iommu->event_log.reg_head); + break; + case IOMMU_EVENT_LOG_TAIL_OFFSET: + val = reg_to_u64(iommu->event_log.reg_tail); + break; + case IOMMU_PPR_LOG_HEAD_OFFSET: + val = reg_to_u64(iommu->ppr_log.reg_head); + break; + case IOMMU_PPR_LOG_TAIL_OFFSET: + val = reg_to_u64(iommu->ppr_log.reg_tail); + break; + case IOMMU_CONTROL_MMIO_OFFSET: + val = reg_to_u64(iommu->reg_ctrl); + break; + case IOMMU_STATUS_MMIO_OFFSET: + val = reg_to_u64(iommu->reg_status); + break; + case IOMMU_EXT_FEATURE_MMIO_OFFSET: + val = reg_to_u64(iommu->reg_ext_feature); + break; + + default: + AMD_IOMMU_DEBUG("Guest reads unknown mmio offset = %lx\n", + offset); + val = 0; + break; + } + + return val; +} + +static int guest_iommu_mmio_read(struct vcpu *v, unsigned long addr, + unsigned long len, unsigned long *pval) +{ + struct guest_iommu *iommu = vcpu_iommu(v); + unsigned long offset; + uint64_t val; + uint32_t mmio, shift; + uint64_t mask = 0; + + offset = addr - iommu->mmio_base; + + if ( unlikely((offset & (len - 1 )) || (len > 8)) ) + { + AMD_IOMMU_DEBUG("iommu mmio write access is not aligned." + "offset = %lx, len = %lx \n", offset, len); + return X86EMUL_UNHANDLEABLE; + } + + mask = (len == 8) ? (~0ULL) : (1ULL << (len * 8)) - 1; + shift = (offset & 7u) * 8; + + /* mmio access is always aligned on 8-byte boundary */ + mmio = offset & (~7u); + + spin_lock(&iommu->lock); + val = iommu_mmio_read64(iommu, mmio); + spin_unlock(&iommu->lock); + + *pval = (val >> shift ) & mask; + + return X86EMUL_OKAY; +} + +static void guest_iommu_mmio_write64(struct guest_iommu *iommu, + unsigned long offset, uint64_t val) +{ + switch ( offset ) + { + case IOMMU_DEV_TABLE_BASE_LOW_OFFSET: + u64_to_reg(&iommu->dev_table.reg_base, val); + break; + case IOMMU_CMD_BUFFER_BASE_LOW_OFFSET: + u64_to_reg(&iommu->cmd_buffer.reg_base, val); + break; + case IOMMU_EVENT_LOG_BASE_LOW_OFFSET: + u64_to_reg(&iommu->event_log.reg_base, val); + case IOMMU_PPR_LOG_BASE_LOW_OFFSET: + u64_to_reg(&iommu->ppr_log.reg_base, val); + break; + case IOMMU_CONTROL_MMIO_OFFSET: + guest_iommu_write_ctrl(iommu, val); + break; + case IOMMU_CMD_BUFFER_HEAD_OFFSET: + u64_to_reg(&iommu->cmd_buffer.reg_head, val); + break; + case IOMMU_CMD_BUFFER_TAIL_OFFSET: + u64_to_reg(&iommu->cmd_buffer.reg_tail, val); + tasklet_schedule(&iommu->cmd_buffer_tasklet); + break; + case IOMMU_EVENT_LOG_HEAD_OFFSET: + u64_to_reg(&iommu->event_log.reg_head, val); + break; + case IOMMU_EVENT_LOG_TAIL_OFFSET: + u64_to_reg(&iommu->event_log.reg_tail, val); + break; + case IOMMU_PPR_LOG_HEAD_OFFSET: + u64_to_reg(&iommu->ppr_log.reg_head, val); + break; + case IOMMU_PPR_LOG_TAIL_OFFSET: + u64_to_reg(&iommu->ppr_log.reg_tail, val); + break; + case IOMMU_STATUS_MMIO_OFFSET: + u64_to_reg(&iommu->reg_status, val); + break; + + default: + AMD_IOMMU_DEBUG("guest writes unknown mmio offset = %lx, " + "val = %lx\n", offset, val); + break; + } +} + +static int guest_iommu_mmio_write(struct vcpu *v, unsigned long addr, + unsigned long len, unsigned long val) +{ + struct guest_iommu *iommu = vcpu_iommu(v); + unsigned long offset; + uint64_t reg_old, mmio; + uint32_t shift; + uint64_t mask = 0; + + offset = addr - iommu->mmio_base; + + if ( unlikely((offset & (len - 1 )) || (len > 8)) ) + { + AMD_IOMMU_DEBUG("iommu mmio write access is not aligned." + "offset = %lx, len = %lx \n", offset, len); + return X86EMUL_UNHANDLEABLE; + } + + mask = (len == 8) ? (~0ULL): (1ULL << (len * 8)) - 1; + shift = (offset & 7u) * 8; + + /* mmio access is always aligned on 8-byte boundary */ + mmio = offset & (~7u); + + spin_lock(&iommu->lock); + + reg_old = iommu_mmio_read64(iommu, mmio); + reg_old &= ~( mask << shift ); + val = reg_old | ((val & mask) << shift ); + guest_iommu_mmio_write64(iommu, mmio, val); + + spin_unlock(&iommu->lock); + + return X86EMUL_OKAY; +} + +int guest_iommu_set_base(struct domain *d, uint64_t base) +{ + p2m_type_t t; + struct guest_iommu *iommu = domain_iommu(d); + + iommu->mmio_base = base; + base >>= PAGE_SHIFT; + + for ( int i = 0; i < IOMMU_MMIO_PAGE_NR; i++ ) + { + unsigned long gfn = base + i; + + get_gfn_query(d, gfn, &t); + p2m_change_type(d, gfn, t, p2m_mmio_dm); + put_gfn(d, gfn); + } + + return 0; +} + +/* Initialize mmio read only bits */ +static void guest_iommu_reg_init(struct guest_iommu *iommu) +{ + uint32_t lower, upper; + + lower = upper = 0; + /* Support prefetch */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PREFSUP_SHIFT); + /* Support PPR log */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_PPRSUP_SHIFT); + /* Support guest translation */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_GTSUP_SHIFT); + /* Support invalidate all command */ + iommu_set_bit(&lower,IOMMU_EXT_FEATURE_IASUP_SHIFT); + + /* Host translation size has 6 levels */ + set_field_in_reg_u32(HOST_ADDRESS_SIZE_6_LEVEL, lower, + IOMMU_EXT_FEATURE_HATS_MASK, + IOMMU_EXT_FEATURE_HATS_SHIFT, + &lower); + /* Guest translation size has 6 levels */ + set_field_in_reg_u32(GUEST_ADDRESS_SIZE_6_LEVEL, lower, + IOMMU_EXT_FEATURE_GATS_MASK, + IOMMU_EXT_FEATURE_GATS_SHIFT, + &lower); + /* Single level gCR3 */ + set_field_in_reg_u32(GUEST_CR3_1_LEVEL, lower, + IOMMU_EXT_FEATURE_GLXSUP_MASK, + IOMMU_EXT_FEATURE_GLXSUP_SHIFT, &lower); + /* 9 bit PASID */ + set_field_in_reg_u32(PASMAX_9_bit, upper, + IOMMU_EXT_FEATURE_PASMAX_MASK, + IOMMU_EXT_FEATURE_PASMAX_SHIFT, &upper); + + iommu->reg_ext_feature.lo = lower; + iommu->reg_ext_feature.hi = upper; +} + +/* Domain specific initialization */ +int guest_iommu_init(struct domain* d) +{ + struct guest_iommu *iommu; + struct hvm_iommu *hd = domain_hvm_iommu(d); + + if ( !is_hvm_domain(d) ) + return 0; + + iommu = xzalloc(struct guest_iommu); + if ( !iommu ) + { + AMD_IOMMU_DEBUG("Error allocating guest iommu structure.\n"); + return 1; + } + + guest_iommu_reg_init(iommu); + iommu->domain = d; + hd->g_iommu = iommu; + + tasklet_init(&iommu->cmd_buffer_tasklet, + guest_iommu_process_command, (unsigned long)d); + + spin_lock_init(&iommu->lock); + + INIT_LIST_HEAD(&iommu->dev_table.page_list); + INIT_LIST_HEAD(&iommu->cmd_buffer.page_list); + INIT_LIST_HEAD(&iommu->event_log.page_list); + INIT_LIST_HEAD(&iommu->ppr_log.page_list); + + return 0; +} + +static void guest_iommu_deallocate(struct list_head *page_list) +{ + struct guest_pages *cur, *next; + + list_for_each_entry_safe(cur, next, page_list, list) + { + list_del(&cur->list); + xfree(cur); + } +} + +void guest_iommu_destroy(struct domain *d) +{ + struct guest_iommu *iommu; + + if ( !is_hvm_domain(d) ) + return; + + iommu = domain_iommu(d); + + guest_iommu_deallocate(&iommu->dev_table.page_list); + guest_iommu_deallocate(&iommu->cmd_buffer.page_list); + guest_iommu_deallocate(&iommu->event_log.page_list); + guest_iommu_deallocate(&iommu->ppr_log.page_list); + + tasklet_kill(&iommu->cmd_buffer_tasklet); +} + +static int guest_iommu_mmio_range(struct vcpu *v, unsigned long addr) +{ + struct guest_iommu *iommu = vcpu_iommu(v); + + return ( addr >= iommu->mmio_base && + addr < (iommu->mmio_base + IOMMU_MMIO_SIZE) ); +} + +const struct hvm_mmio_handler iommu_mmio_handler = { + .check_handler = guest_iommu_mmio_range, + .read_handler = guest_iommu_mmio_read, + .write_handler = guest_iommu_mmio_write +}; diff -r b190e3362524 -r ea52a2b93dff xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_map.c Wed Dec 14 12:58:17 2011 +0100 @@ -234,6 +234,53 @@ void __init iommu_dte_add_device_entry(u dte[3] = entry; } +void iommu_dte_set_guest_cr3(u32 *dte, u16 dom_id, u64 gcr3, + int gv, unsigned int glx) +{ + u32 entry, gcr3_1, gcr3_2, gcr3_3; + + gcr3_3 = gcr3 >> 31; + gcr3_2 = (gcr3 >> 15) & 0xFFFF; + gcr3_1 = (gcr3 >> PAGE_SHIFT) & 0x7; + + /* I bit must be set when gcr3 is enabled */ + entry = dte[3]; + set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry, + IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK, + IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry); + /* update gcr3 */ + set_field_in_reg_u32(gcr3_3, entry, + IOMMU_DEV_TABLE_GCR3_3_MASK, + IOMMU_DEV_TABLE_GCR3_3_SHIFT, &entry); + dte[3] = entry; + + set_field_in_reg_u32(dom_id, entry, + IOMMU_DEV_TABLE_DOMAIN_ID_MASK, + IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry); + /* update gcr3 */ + entry = dte[2]; + set_field_in_reg_u32(gcr3_2, entry, + IOMMU_DEV_TABLE_GCR3_2_MASK, + IOMMU_DEV_TABLE_GCR3_2_SHIFT, &entry); + dte[2] = entry; + + entry = dte[1]; + /* Enable GV bit */ + set_field_in_reg_u32(!!gv, entry, + IOMMU_DEV_TABLE_GV_MASK, + IOMMU_DEV_TABLE_GV_SHIFT, &entry); + + /* 1 level guest cr3 table */ + set_field_in_reg_u32(glx, entry, + IOMMU_DEV_TABLE_GLX_MASK, + IOMMU_DEV_TABLE_GLX_SHIFT, &entry); + /* update gcr3 */ + set_field_in_reg_u32(gcr3_1, entry, + IOMMU_DEV_TABLE_GCR3_1_MASK, + IOMMU_DEV_TABLE_GCR3_1_SHIFT, &entry); + dte[1] = entry; +} + u64 amd_iommu_get_next_table_from_pte(u32 *entry) { u64 addr_lo, addr_hi, ptr; diff -r b190e3362524 -r ea52a2b93dff xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Dec 14 12:58:17 2011 +0100 @@ -260,6 +260,8 @@ static int amd_iommu_domain_init(struct hd->domain_id = d->domain_id; + guest_iommu_init(d); + return 0; } @@ -443,6 +445,7 @@ static void deallocate_iommu_page_tables static void amd_iommu_domain_destroy(struct domain *d) { + guest_iommu_destroy(d); deallocate_iommu_page_tables(d); amd_iommu_flush_all_pages(d); } diff -r b190e3362524 -r ea52a2b93dff xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/include/asm-x86/amd-iommu.h Wed Dec 14 12:58:17 2011 +0100 @@ -24,6 +24,7 @@ #include <xen/types.h> #include <xen/list.h> #include <xen/spinlock.h> +#include <xen/tasklet.h> #include <asm/hvm/svm/amd-iommu-defs.h> #define iommu_found() (!list_empty(&amd_iommu_head)) @@ -130,4 +131,62 @@ struct ivrs_mappings *get_ivrs_mappings( int iterate_ivrs_mappings(int (*)(u16 seg, struct ivrs_mappings *)); int iterate_ivrs_entries(int (*)(u16 seg, struct ivrs_mappings *)); +/* iommu tables in guest space */ +struct guest_pages { + struct list_head list; + struct page_info *page; +}; + +struct mmio_reg { + uint32_t lo; + uint32_t hi; +}; + +struct guest_dev_table { + struct list_head page_list; + struct mmio_reg reg_base; + uint32_t size; +}; + +struct guest_buffer { + struct list_head page_list; + struct mmio_reg reg_base; + struct mmio_reg reg_tail; + struct mmio_reg reg_head; + uint32_t entries; +}; + +struct guest_iommu_msi { + uint8_t vector; + uint8_t dest; + uint8_t dest_mode; + uint8_t delivery_mode; + uint8_t trig_mode; +}; + +/* virtual IOMMU structure */ +struct guest_iommu { + + struct domain *domain; + spinlock_t lock; + bool_t enabled; + + struct guest_dev_table dev_table; + struct guest_buffer cmd_buffer; + struct guest_buffer event_log; + struct guest_buffer ppr_log; + + struct tasklet cmd_buffer_tasklet; + + uint64_t mmio_base; /* MMIO base address */ + + /* MMIO regs */ + struct mmio_reg reg_ctrl; /* MMIO offset 0018h */ + struct mmio_reg reg_status; /* MMIO offset 2020h */ + struct mmio_reg reg_ext_feature; /* MMIO offset 0030h */ + + /* guest interrupt settings */ + struct guest_iommu_msi msi; +}; + #endif /* _ASM_X86_64_AMD_IOMMU_H */ diff -r b190e3362524 -r ea52a2b93dff xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Dec 14 12:58:17 2011 +0100 @@ -117,6 +117,13 @@ #define IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT 12 /* DeviceTable Entry[63:32] */ +#define IOMMU_DEV_TABLE_GV_SHIFT 23 +#define IOMMU_DEV_TABLE_GV_MASK 0x800000 +#define IOMMU_DEV_TABLE_GLX_SHIFT 24 +#define IOMMU_DEV_TABLE_GLX_MASK 0x3000000 +#define IOMMU_DEV_TABLE_GCR3_1_SHIFT 26 +#define IOMMU_DEV_TABLE_GCR3_1_MASK 0x1c000000 + #define IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK 0x000FFFFF #define IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT 0 #define IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK 0x20000000 @@ -127,6 +134,8 @@ /* DeviceTable Entry[95:64] */ #define IOMMU_DEV_TABLE_DOMAIN_ID_MASK 0x0000FFFF #define IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT 0 +#define IOMMU_DEV_TABLE_GCR3_2_SHIFT 16 +#define IOMMU_DEV_TABLE_GCR3_2_MASK 0xFFFF0000 /* DeviceTable Entry[127:96] */ #define IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK 0x00000001 @@ -155,6 +164,8 @@ #define IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT 5 #define IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_MASK 0xFFFFFFC0 #define IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_SHIFT 6 +#define IOMMU_DEV_TABLE_GCR3_3_SHIFT 11 +#define IOMMU_DEV_TABLE_GCR3_3_MASK 0xfffff800 /* DeviceTable Entry[191:160] */ #define IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_MASK 0x000FFFFF @@ -164,7 +175,6 @@ #define IOMMU_DEV_TABLE_INT_CONTROL_MASK 0x30000000 #define IOMMU_DEV_TABLE_INT_CONTROL_SHIFT 28 - /* Command Buffer */ #define IOMMU_CMD_BUFFER_BASE_LOW_OFFSET 0x08 #define IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET 0x0C @@ -192,6 +202,7 @@ #define IOMMU_CMD_INVALIDATE_IOMMU_PAGES 0x3 #define IOMMU_CMD_INVALIDATE_IOTLB_PAGES 0x4 #define IOMMU_CMD_INVALIDATE_INT_TABLE 0x5 +#define IOMMU_CMD_COMPLETE_PPR_REQUEST 0x7 #define IOMMU_CMD_INVALIDATE_IOMMU_ALL 0x8 /* COMPLETION_WAIT command */ @@ -282,6 +293,28 @@ #define IOMMU_EVENT_DEVICE_ID_MASK 0x0000FFFF #define IOMMU_EVENT_DEVICE_ID_SHIFT 0 +/* PPR Log */ +#define IOMMU_PPR_LOG_ENTRY_SIZE 16 +#define IOMMU_PPR_LOG_POWER_OF2_ENTRIES_PER_PAGE 8 +#define IOMMU_PPR_LOG_U32_PER_ENTRY (IOMMU_PPR_LOG_ENTRY_SIZE / 4) + +#define IOMMU_PPR_LOG_BASE_LOW_OFFSET 0x0038 +#define IOMMU_PPR_LOG_BASE_HIGH_OFFSET 0x003C +#define IOMMU_PPR_LOG_BASE_LOW_MASK 0xFFFFF000 +#define IOMMU_PPR_LOG_BASE_LOW_SHIFT 12 +#define IOMMU_PPR_LOG_BASE_HIGH_MASK 0x000FFFFF +#define IOMMU_PPR_LOG_BASE_HIGH_SHIFT 0 +#define IOMMU_PPR_LOG_LENGTH_MASK 0x0F000000 +#define IOMMU_PPR_LOG_LENGTH_SHIFT 24 +#define IOMMU_PPR_LOG_HEAD_MASK 0x0007FFF0 +#define IOMMU_PPR_LOG_HEAD_SHIFT 4 +#define IOMMU_PPR_LOG_TAIL_MASK 0x0007FFF0 +#define IOMMU_PPR_LOG_TAIL_SHIFT 4 +#define IOMMU_PPR_LOG_HEAD_OFFSET 0x2030 +#define IOMMU_PPR_LOG_TAIL_OFFSET 0x2038 +#define IOMMU_PPR_LOG_DEVICE_ID_MASK 0x0000FFFF +#define IOMMU_PPR_LOG_DEVICE_ID_SHIFT 0 + /* Control Register */ #define IOMMU_CONTROL_MMIO_OFFSET 0x18 #define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK 0x00000001 @@ -309,6 +342,11 @@ #define IOMMU_CONTROL_RESTART_MASK 0x80000000 #define IOMMU_CONTROL_RESTART_SHIFT 31 +#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 +#define IOMMU_CONTROL_PPR_INT_SHIFT 14 +#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 +#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 + /* Exclusion Register */ #define IOMMU_EXCLUSION_BASE_LOW_OFFSET 0x20 #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET 0x24 @@ -342,7 +380,8 @@ #define IOMMU_EXT_FEATURE_HATS_MASK 0x00000C00 #define IOMMU_EXT_FEATURE_GATS_SHIFT 0x12 #define IOMMU_EXT_FEATURE_GATS_MASK 0x00003000 -#define IOMMU_EXT_FEATURE_GLXSUP 0x14 +#define IOMMU_EXT_FEATURE_GLXSUP_SHIFT 0x14 +#define IOMMU_EXT_FEATURE_GLXSUP_MASK 0x0000C000 #define IOMMU_EXT_FEATURE_PASMAX_SHIFT 0x0 #define IOMMU_EXT_FEATURE_PASMAX_MASK 0x0000001F @@ -359,6 +398,9 @@ #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 +#define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 +#define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 +#define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 /* I/O Page Table */ #define IOMMU_PAGE_TABLE_ENTRY_SIZE 8 diff -r b190e3362524 -r ea52a2b93dff xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Wed Dec 14 12:58:17 2011 +0100 @@ -71,6 +71,8 @@ void amd_iommu_set_root_page_table( u32 *dte, u64 root_ptr, u16 domain_id, u8 paging_mode, u8 valid); void iommu_dte_set_iotlb(u32 *dte, u8 i); void iommu_dte_add_device_entry(u32 *dte, struct ivrs_mappings *ivrs_dev); +void iommu_dte_set_guest_cr3(u32 *dte, u16 dom_id, u64 gcr3, + int gv, unsigned int glx); /* send cmd to iommu */ void amd_iommu_flush_all_pages(struct domain *d); @@ -106,6 +108,14 @@ void amd_iommu_resume(void); void amd_iommu_suspend(void); void amd_iommu_crash_shutdown(void); +/* guest iommu support */ +void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[]); +void guest_iommu_add_ppr_log(struct domain *d, u32 entry[]); +void guest_iommu_add_event_log(struct domain *d, u32 entry[]); +int guest_iommu_init(struct domain* d); +void guest_iommu_destroy(struct domain *d); +int guest_iommu_set_base(struct domain *d, uint64_t base); + static inline u32 get_field_from_reg_u32(u32 reg_value, u32 mask, u32 shift) { u32 field; diff -r b190e3362524 -r ea52a2b93dff xen/include/xen/hvm/iommu.h --- a/xen/include/xen/hvm/iommu.h Wed Dec 14 12:51:22 2011 +0100 +++ b/xen/include/xen/hvm/iommu.h Wed Dec 14 12:58:17 2011 +0100 @@ -47,6 +47,7 @@ struct hvm_iommu { int domain_id; int paging_mode; struct page_info *root_table; + struct guest_iommu *g_iommu; /* iommu_ops */ const struct iommu_ops *platform_ops;
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875319 -3600 # Node ID d3aa7f936872abacb7e059393fa8963db35c4045 # Parent ea52a2b93dffe708084fdc6ee663bd5eee8c1031 amd iommu: Enable ppr log. IOMMUv2 writes peripheral page service request (PPR) records into ppr log to report DMA page request from ATS devices to OS. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r ea52a2b93dff -r d3aa7f936872 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 12:58:17 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:08:39 2011 +0100 @@ -178,6 +178,34 @@ static void register_iommu_event_log_in_ writel(entry, iommu->mmio_base+IOMMU_EVENT_LOG_BASE_HIGH_OFFSET); } +static void register_iommu_ppr_log_in_mmio_space(struct amd_iommu *iommu) +{ + u64 addr_64, addr_lo, addr_hi; + u32 power_of2_entries; + u32 entry; + + ASSERT ( iommu->ppr_log.buffer ); + + addr_64 = (u64)virt_to_maddr(iommu->ppr_log.buffer); + addr_lo = addr_64 & DMA_32BIT_MASK; + addr_hi = addr_64 >> 32; + + entry = 0; + iommu_set_addr_lo_to_reg(&entry, addr_lo >> PAGE_SHIFT); + writel(entry, iommu->mmio_base + IOMMU_PPR_LOG_BASE_LOW_OFFSET); + + power_of2_entries = get_order_from_bytes(iommu->ppr_log.alloc_size) + + IOMMU_PPR_LOG_POWER_OF2_ENTRIES_PER_PAGE; + + entry = 0; + iommu_set_addr_hi_to_reg(&entry, addr_hi); + set_field_in_reg_u32(power_of2_entries, entry, + IOMMU_PPR_LOG_LENGTH_MASK, + IOMMU_PPR_LOG_LENGTH_SHIFT, &entry); + writel(entry, iommu->mmio_base + IOMMU_PPR_LOG_BASE_HIGH_OFFSET); +} + + static void set_iommu_translation_control(struct amd_iommu *iommu, int enable) { @@ -278,6 +306,35 @@ static void set_iommu_event_log_control( writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); } +static void set_iommu_ppr_log_control(struct amd_iommu *iommu, + int enable) +{ + u32 entry; + + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + + /*reset head and tail pointer manually before enablement */ + if ( enable ) + { + writel(0x0, iommu->mmio_base + IOMMU_PPR_LOG_HEAD_OFFSET); + writel(0x0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET); + + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); + } + else + { + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); + iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); + } + + writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + if ( enable ) + AMD_IOMMU_DEBUG("PPR Log Enabled.\n"); +} + static void parse_event_log_entry(struct amd_iommu *, u32 entry[]); static int amd_iommu_read_event_log(struct amd_iommu *iommu) @@ -585,12 +642,19 @@ static void enable_iommu(struct amd_iomm register_iommu_event_log_in_mmio_space(iommu); register_iommu_exclusion_range(iommu); + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + register_iommu_ppr_log_in_mmio_space(iommu); + iommu_msi_set_affinity(irq_to_desc(iommu->irq), &cpu_online_map); amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED); set_iommu_ht_flags(iommu); set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED); set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); + + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) ) @@ -672,16 +736,29 @@ static void * __init allocate_event_log( IOMMU_EVENT_LOG_DEFAULT_ENTRIES, "Event Log"); } +static void * __init allocate_ppr_log(struct amd_iommu *iommu) +{ + /* allocate ''ppr log'' in power of 2 increments of 4K */ + return allocate_ring_buffer(&iommu->ppr_log, sizeof(ppr_entry_t), + IOMMU_PPR_LOG_DEFAULT_ENTRIES, "PPR Log"); +} + static int __init amd_iommu_init_one(struct amd_iommu *iommu) { + if ( map_iommu_mmio_region(iommu) != 0 ) + goto error_out; + + get_iommu_features(iommu); + if ( allocate_cmd_buffer(iommu) == NULL ) goto error_out; if ( allocate_event_log(iommu) == NULL ) goto error_out; - if ( map_iommu_mmio_region(iommu) != 0 ) - goto error_out; + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + if ( allocate_ppr_log(iommu) == NULL ) + goto error_out; if ( set_iommu_interrupt_handler(iommu) == 0 ) goto error_out; @@ -694,8 +771,6 @@ static int __init amd_iommu_init_one(str iommu->dev_table.entries = device_table.entries; iommu->dev_table.buffer = device_table.buffer; - get_iommu_features(iommu); - enable_iommu(iommu); printk("AMD-Vi: IOMMU %d Enabled.\n", nr_amd_iommus ); nr_amd_iommus++; @@ -718,6 +793,7 @@ static void __init amd_iommu_init_cleanu { deallocate_ring_buffer(&iommu->cmd_buffer); deallocate_ring_buffer(&iommu->event_log); + deallocate_ring_buffer(&iommu->ppr_log); unmap_iommu_mmio_region(iommu); } xfree(iommu); @@ -916,6 +992,10 @@ static void disable_iommu(struct amd_iom amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED); set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_DISABLED); set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED); + + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) + set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_DISABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_DISABLED); iommu->enabled = 0; diff -r ea52a2b93dff -r d3aa7f936872 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Wed Dec 14 12:58:17 2011 +0100 +++ b/xen/include/asm-x86/amd-iommu.h Wed Dec 14 16:08:39 2011 +0100 @@ -94,6 +94,7 @@ struct amd_iommu { struct table_struct dev_table; struct ring_buffer cmd_buffer; struct ring_buffer event_log; + struct ring_buffer ppr_log; int exclusion_enable; int exclusion_allow_all; diff -r ea52a2b93dff -r d3aa7f936872 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Dec 14 12:58:17 2011 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Dec 14 16:08:39 2011 +0100 @@ -27,6 +27,9 @@ /* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */ #define IOMMU_EVENT_LOG_DEFAULT_ENTRIES 512 +/* IOMMU PPR Log entries: in power of 2 increments, minimum of 256 */ +#define IOMMU_PPR_LOG_DEFAULT_ENTRIES 512 + #define PTE_PER_TABLE_SHIFT 9 #define PTE_PER_TABLE_SIZE (1 << PTE_PER_TABLE_SHIFT) #define PTE_PER_TABLE_MASK (~(PTE_PER_TABLE_SIZE - 1))
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875770 -3600 # Node ID be3f15ae6a4153504bbb3376291a2383684afee5 # Parent d3aa7f936872abacb7e059393fa8963db35c4045 amd iommu: Enable guest level translation. Similar to nested paging for SVM, IOMMUv2 supports two level translations for DMA. This patch enables this feature. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r d3aa7f936872 -r be3f15ae6a41 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:08:39 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:10 2011 +0100 @@ -220,6 +220,23 @@ static void set_iommu_translation_contro writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); } +static void set_iommu_guest_translation_control(struct amd_iommu *iommu, + int enable) +{ + u32 entry; + + entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); + + enable ? + iommu_set_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT): + iommu_clear_bit(&entry, IOMMU_CONTROL_GT_ENABLE_SHIFT); + + writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); + + if ( enable ) + AMD_IOMMU_DEBUG("Guest Translation Enabled.\n"); +} + static void set_iommu_command_buffer_control(struct amd_iommu *iommu, int enable) { @@ -655,6 +672,9 @@ static void enable_iommu(struct amd_iomm if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_ENABLED); + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) ) + set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_ENABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_IASUP_SHIFT) ) @@ -996,6 +1016,9 @@ static void disable_iommu(struct amd_iom if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) ) set_iommu_ppr_log_control(iommu, IOMMU_CONTROL_DISABLED); + if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_GTSUP_SHIFT) ) + set_iommu_guest_translation_control(iommu, IOMMU_CONTROL_DISABLED); + set_iommu_translation_control(iommu, IOMMU_CONTROL_DISABLED); iommu->enabled = 0;
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 06 of 16] amd iommu: add ppr log processing into iommu interrupt handling
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875771 -3600 # Node ID fbed4e6011fce13d3a521bbc339f4959bf32a06c # Parent be3f15ae6a4153504bbb3376291a2383684afee5 amd iommu: add ppr log processing into iommu interrupt handling PPR log and event log share the same interrupt source. Interrupt handler should check both of them. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r be3f15ae6a41 -r fbed4e6011fc xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:10 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:11 2011 +0100 @@ -352,75 +352,91 @@ static void set_iommu_ppr_log_control(st AMD_IOMMU_DEBUG("PPR Log Enabled.\n"); } -static void parse_event_log_entry(struct amd_iommu *, u32 entry[]); +/* read event log or ppr log from iommu ring buffer */ +static int iommu_read_log(struct amd_iommu *iommu, + struct ring_buffer *log, + void (*parse_func)(struct amd_iommu *, u32 *)) +{ + u32 tail, head, *entry, tail_offest, head_offset; -static int amd_iommu_read_event_log(struct amd_iommu *iommu) -{ - u32 tail, head, *event_log; - - BUG_ON( !iommu ); + BUG_ON( !iommu || ((log != &iommu->event_log) && + (log != &iommu->ppr_log)) ); /* make sure there''s an entry in the log */ - tail = readl(iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET); - tail = get_field_from_reg_u32(tail, - IOMMU_EVENT_LOG_TAIL_MASK, - IOMMU_EVENT_LOG_TAIL_SHIFT); + tail_offest = ( log == &iommu->event_log ) ? + IOMMU_EVENT_LOG_TAIL_OFFSET: + IOMMU_PPR_LOG_TAIL_OFFSET; - while ( tail != iommu->event_log.head ) + head_offset = ( log == &iommu->event_log ) ? + IOMMU_EVENT_LOG_HEAD_OFFSET: + IOMMU_PPR_LOG_HEAD_OFFSET; + + tail = readl(iommu->mmio_base + tail_offest); + tail = iommu_get_rb_pointer(tail); + + while ( tail != log->head ) { /* read event log entry */ - event_log = (u32 *)(iommu->event_log.buffer + - (iommu->event_log.head * - IOMMU_EVENT_LOG_ENTRY_SIZE)); + entry = (u32 *)(log->buffer + log->head * log->entry_size); - parse_event_log_entry(iommu, event_log); - - if ( ++iommu->event_log.head == iommu->event_log.entries ) - iommu->event_log.head = 0; + parse_func(iommu, entry); + if ( ++log->head == log->entries ) + log->head = 0; /* update head pointer */ - set_field_in_reg_u32(iommu->event_log.head, 0, - IOMMU_EVENT_LOG_HEAD_MASK, - IOMMU_EVENT_LOG_HEAD_SHIFT, &head); - writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET); + head = 0; + iommu_set_rb_pointer(&head, log->head); + + writel(head, iommu->mmio_base + head_offset); } return 0; } -static void amd_iommu_reset_event_log(struct amd_iommu *iommu) +/* reset event log or ppr log when overflow */ +static void iommu_reset_log(struct amd_iommu *iommu, + struct ring_buffer *log, + void (*ctrl_func)(struct amd_iommu *iommu, int)) { u32 entry; - int log_run; + int log_run, run_bit, of_bit; int loop_count = 1000; + BUG_ON( !iommu || ((log != &iommu->event_log) && + (log != &iommu->ppr_log)) ); + + run_bit = ( log == &iommu->event_log ) ? + IOMMU_STATUS_EVENT_LOG_RUN_SHIFT: + IOMMU_STATUS_PPR_LOG_RUN_SHIFT; + + of_bit = ( log == &iommu->event_log ) ? + IOMMU_STATUS_EVENT_OVERFLOW_SHIFT: + IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT; + /* wait until EventLogRun bit = 0 */ do { entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - log_run = iommu_get_bit(entry, IOMMU_STATUS_EVENT_LOG_RUN_SHIFT); + log_run = iommu_get_bit(entry, run_bit); loop_count--; } while ( log_run && loop_count ); if ( log_run ) { - AMD_IOMMU_DEBUG("Warning: EventLogRun bit is not cleared" - "before reset!\n"); + AMD_IOMMU_DEBUG("Warning: Log Run bit %d is not cleared" + "before reset! \n", run_bit); return; } - set_iommu_event_log_control(iommu, IOMMU_CONTROL_DISABLED); + ctrl_func(iommu, IOMMU_CONTROL_DISABLED); - /* read event log for debugging */ - amd_iommu_read_event_log(iommu); /*clear overflow bit */ - iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT); - - writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); + iommu_clear_bit(&entry, of_bit); + writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); /*reset event log base address */ - iommu->event_log.head = 0; + log->head = 0; - set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED); + ctrl_func(iommu, IOMMU_CONTROL_ENABLED); } static void iommu_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask) @@ -592,30 +608,93 @@ static void parse_event_log_entry(struct } } -static void amd_iommu_page_fault(int irq, void *dev_id, - struct cpu_user_regs *regs) +static void iommu_check_event_log(struct amd_iommu *iommu) { u32 entry; unsigned long flags; - struct amd_iommu *iommu = dev_id; spin_lock_irqsave(&iommu->lock, flags); - amd_iommu_read_event_log(iommu); + + iommu_read_log(iommu, &iommu->event_log, parse_event_log_entry); /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) - amd_iommu_reset_event_log(iommu); + iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); /* reset interrupt status bit */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); - writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); + writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + spin_unlock_irqrestore(&iommu->lock, flags); } +void parse_ppr_log_entry(struct amd_iommu *iommu, u32 entry[]) +{ + + u16 device_id; + u8 bus, devfn; + struct pci_dev *pdev; + struct domain *d; + + /* here device_id is physical value */ + device_id = iommu_get_devid_from_cmd(entry[0]); + bus = device_id >> 8; + devfn = device_id & 0xFF; + + local_irq_enable(); + + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev(0, bus, devfn); + spin_unlock(&pcidevs_lock); + + local_irq_disable(); + + if ( pdev == NULL ) + return; + + d = pdev->domain; + + guest_iommu_add_ppr_log(d, entry); +} + +static void iommu_check_ppr_log(struct amd_iommu *iommu) +{ + u32 entry; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + iommu_read_log(iommu, &iommu->ppr_log, parse_ppr_log_entry); + + /*check event overflow */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + + if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) + iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); + + /* reset interrupt status bit */ + entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); + + writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void iommu_interrupt_handler(int irq, void *dev_id, + struct cpu_user_regs *regs) +{ + struct amd_iommu *iommu = dev_id; + iommu_check_event_log(iommu); + + if ( iommu->ppr_log.buffer != NULL ) + iommu_check_ppr_log(iommu); +} + static int __init set_iommu_interrupt_handler(struct amd_iommu *iommu) { int irq, ret; @@ -628,8 +707,7 @@ static int __init set_iommu_interrupt_ha } irq_desc[irq].handler = &iommu_msi_type; - ret = request_irq(irq, amd_iommu_page_fault, 0, - "amd_iommu", iommu); + ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu); if ( ret ) { irq_desc[irq].handler = &no_irq_type;
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875772 -3600 # Node ID ef5698887d044ad58293bee3549eaa20310c2b17 # Parent fbed4e6011fce13d3a521bbc339f4959bf32a06c amd iommu: Add 2 hypercalls for libxc iommu_set_msi: used by qemu to inform hypervisor iommu vector number in guest space. Hypervisor needs this vector to inject msi into guest when PPR logging happens. iommu_bind_bdf: used by xl to bind guest bdf number to machine bdf number. IOMMU emulations codes receives commands from guest iommu driver and forwards them to host iommu. But virtual device id from guest should be converted into physical before sending to real hardware. Signed -off-by: Wei Wang <wei.wang2@amd.com> diff -r fbed4e6011fc -r ef5698887d04 xen/drivers/passthrough/amd/iommu_guest.c --- a/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:11 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:12 2011 +0100 @@ -50,12 +50,27 @@ static unsigned int machine_bdf(struct domain *d, uint16_t guest_bdf) { - return guest_bdf; + struct pci_dev *pdev; + uint16_t mbdf = 0; + + for_each_pdev( d, pdev ) + { + if ( pdev->gbdf == guest_bdf ) + { + mbdf = PCI_BDF2(pdev->bus, pdev->devfn); + break; + } + } + return mbdf; } static uint16_t guest_bdf(struct domain *d, uint16_t machine_bdf) { - return machine_bdf; + struct pci_dev *pdev; + + pdev = pci_get_pdev_by_domain(d, 0, PCI_BUS(machine_bdf), + PCI_DEVFN2(machine_bdf)); + return pdev->gbdf; } static inline struct guest_iommu *domain_iommu(struct domain *d) @@ -951,3 +966,43 @@ const struct hvm_mmio_handler iommu_mmio .read_handler = guest_iommu_mmio_read, .write_handler = guest_iommu_mmio_write }; + +/* iommu hypercall handler */ +int iommu_bind_bdf(struct domain* d, uint16_t gbdf, uint16_t mbdf) +{ + struct pci_dev *pdev; + int ret = -ENODEV; + + if ( !iommu_found() ) + return 0; + + spin_lock(&pcidevs_lock); + + for_each_pdev( d, pdev ) + { + if ( (pdev->bus != PCI_BUS(mbdf) ) || + (pdev->devfn != PCI_DEVFN2(mbdf)) ) + continue; + + pdev->gbdf = gbdf; + ret = 0; + } + + spin_unlock(&pcidevs_lock); + return ret; +} + +void iommu_set_msi(struct domain* d, uint16_t vector, uint16_t dest, + uint16_t dest_mode, uint16_t delivery_mode, + uint16_t trig_mode) +{ + struct guest_iommu *iommu = domain_iommu(d); + + if ( !iommu_found() ) + return; + + iommu->msi.vector = vector; + iommu->msi.dest = dest; + iommu->msi.dest_mode = dest_mode; + iommu->msi.trig_mode = trig_mode; +} diff -r fbed4e6011fc -r ef5698887d04 xen/drivers/passthrough/iommu.c --- a/xen/drivers/passthrough/iommu.c Wed Dec 14 16:16:11 2011 +0100 +++ b/xen/drivers/passthrough/iommu.c Wed Dec 14 16:16:12 2011 +0100 @@ -640,6 +640,40 @@ int iommu_do_domctl( put_domain(d); break; +#ifndef __ia64__ + case XEN_DOMCTL_guest_iommu_op: + { + xen_domctl_guest_iommu_op_t * guest_op; + + if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) + { + gdprintk(XENLOG_ERR, + "XEN_DOMCTL_guest_iommu_op: get_domain_by_id() failed\n"); + ret = -EINVAL; + break; + } + + guest_op = &(domctl->u.guest_iommu_op); + switch ( guest_op->op ) + { + case XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI: + iommu_set_msi(d, guest_op->u.msi.vector, + guest_op->u.msi.dest, + guest_op->u.msi.dest_mode, + guest_op->u.msi.delivery_mode, + guest_op->u.msi.trig_mode); + ret = 0; + break; + case XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF: + ret = iommu_bind_bdf(d, guest_op->u.bdf_bind.g_bdf, + guest_op->u.bdf_bind.m_bdf); + break; + } + put_domain(d); + break; + } +#endif + default: ret = -ENOSYS; break; diff -r fbed4e6011fc -r ef5698887d04 xen/include/public/domctl.h --- a/xen/include/public/domctl.h Wed Dec 14 16:16:11 2011 +0100 +++ b/xen/include/public/domctl.h Wed Dec 14 16:16:12 2011 +0100 @@ -848,6 +848,31 @@ struct xen_domctl_set_access_required { typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); +#if defined(__i386__) || defined(__x86_64__) +/* Support for guest iommu emulation */ +struct xen_domctl_guest_iommu_op { + /* XEN_DOMCTL_GUEST_IOMMU_OP_* */ +#define XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI 0 +#define XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF 1 + uint8_t op; + union { + struct iommu_msi { + uint8_t vector; + uint8_t dest; + uint8_t dest_mode; + uint8_t delivery_mode; + uint8_t trig_mode; + } msi; + struct bdf_bind { + uint32_t g_bdf; + uint32_t m_bdf; + } bdf_bind; + } u; +}; +typedef struct xen_domctl_guest_iommu_op xen_domctl_guest_iommu_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_guest_iommu_op_t); +#endif + struct xen_domctl { uint32_t cmd; #define XEN_DOMCTL_createdomain 1 @@ -912,6 +937,7 @@ struct xen_domctl { #define XEN_DOMCTL_getvcpuextstate 63 #define XEN_DOMCTL_set_access_required 64 #define XEN_DOMCTL_audit_p2m 65 +#define XEN_DOMCTL_guest_iommu_op 66 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -963,6 +989,7 @@ struct xen_domctl { #if defined(__i386__) || defined(__x86_64__) struct xen_domctl_cpuid cpuid; struct xen_domctl_vcpuextstate vcpuextstate; + struct xen_domctl_guest_iommu_op guest_iommu_op; #endif struct xen_domctl_set_access_required access_required; struct xen_domctl_audit_p2m audit_p2m; diff -r fbed4e6011fc -r ef5698887d04 xen/include/xen/iommu.h --- a/xen/include/xen/iommu.h Wed Dec 14 16:16:11 2011 +0100 +++ b/xen/include/xen/iommu.h Wed Dec 14 16:16:12 2011 +0100 @@ -164,6 +164,14 @@ int iommu_do_domctl(struct xen_domctl *, void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count); void iommu_iotlb_flush_all(struct domain *d); +#ifndef __ia64_ +/* Only used by AMD IOMMU */ +void iommu_set_msi(struct domain* d, uint16_t vector, uint16_t dest, + uint16_t dest_mode, uint16_t delivery_mode, + uint16_t trig_mode); +int iommu_bind_bdf(struct domain* d, uint16_t gbdf, uint16_t mbdf); +#endif + /* * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to * avoid unecessary iotlb_flush in the low level IOMMU code. diff -r fbed4e6011fc -r ef5698887d04 xen/include/xen/pci.h --- a/xen/include/xen/pci.h Wed Dec 14 16:16:11 2011 +0100 +++ b/xen/include/xen/pci.h Wed Dec 14 16:16:12 2011 +0100 @@ -63,6 +63,9 @@ struct pci_dev { const u8 devfn; struct pci_dev_info info; u64 vf_rlen[6]; + + /* used by amd iomm to represent bdf value in guest space */ + u16 gbdf; }; #define for_each_pdev(domain, pdev) \
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875773 -3600 # Node ID 52dbaf1fb0e0364fad40f9be330f80e157c935e4 # Parent ef5698887d044ad58293bee3549eaa20310c2b17 amd iommu: Add a hypercall for hvmloader. IOMMU MMIO base address is dynamically allocated by firmware. This patch allows hvmloader to notify hypervisor where the iommu mmio pages are. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r ef5698887d04 -r 52dbaf1fb0e0 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Wed Dec 14 16:16:12 2011 +0100 +++ b/xen/arch/x86/hvm/hvm.c Wed Dec 14 16:16:13 2011 +0100 @@ -65,6 +65,7 @@ #include <public/memory.h> #include <asm/mem_event.h> #include <public/mem_event.h> +#include <asm/hvm/svm/amd-iommu-proto.h> bool_t __read_mostly hvm_enabled; @@ -3676,6 +3677,9 @@ long do_hvm_op(unsigned long op, XEN_GUE case HVM_PARAM_BUFIOREQ_EVTCHN: rc = -EINVAL; break; + case HVM_PARAM_IOMMU_BASE: + rc = guest_iommu_set_base(d, a.value); + break; } if ( rc == 0 ) diff -r ef5698887d04 -r 52dbaf1fb0e0 xen/include/public/hvm/params.h --- a/xen/include/public/hvm/params.h Wed Dec 14 16:16:12 2011 +0100 +++ b/xen/include/public/hvm/params.h Wed Dec 14 16:16:13 2011 +0100 @@ -142,6 +142,10 @@ /* Boolean: Enable nestedhvm (hvm only) */ #define HVM_PARAM_NESTEDHVM 24 -#define HVM_NR_PARAMS 27 +#ifndef __ia64__ +#define HVM_PARAM_IOMMU_BASE 27 +#endif + +#define HVM_NR_PARAMS 28 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875774 -3600 # Node ID f33af4d61321d074a4c624d909204fce5945f61b # Parent 52dbaf1fb0e0364fad40f9be330f80e157c935e4 amd iommu: add iommu mmio handler. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 52dbaf1fb0e0 -r f33af4d61321 xen/arch/x86/hvm/intercept.c --- a/xen/arch/x86/hvm/intercept.c Wed Dec 14 16:16:13 2011 +0100 +++ b/xen/arch/x86/hvm/intercept.c Wed Dec 14 16:16:14 2011 +0100 @@ -38,7 +38,8 @@ hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] &hpet_mmio_handler, &vlapic_mmio_handler, &vioapic_mmio_handler, - &msixtbl_mmio_handler + &msixtbl_mmio_handler, + &iommu_mmio_handler }; static int hvm_mmio_access(struct vcpu *v, diff -r 52dbaf1fb0e0 -r f33af4d61321 xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Wed Dec 14 16:16:13 2011 +0100 +++ b/xen/include/asm-x86/hvm/io.h Wed Dec 14 16:16:14 2011 +0100 @@ -69,8 +69,9 @@ extern const struct hvm_mmio_handler hpe extern const struct hvm_mmio_handler vlapic_mmio_handler; extern const struct hvm_mmio_handler vioapic_mmio_handler; extern const struct hvm_mmio_handler msixtbl_mmio_handler; +extern const struct hvm_mmio_handler iommu_mmio_handler; -#define HVM_MMIO_HANDLER_NR 4 +#define HVM_MMIO_HANDLER_NR 5 int hvm_io_intercept(ioreq_t *p, int type); void register_io_handler(
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 10 of 16] amd iommu: Enable FC bit in iommu host level PTE
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875775 -3600 # Node ID 001681ff1a0c09c4d04fd8bd45e8d26805686246 # Parent f33af4d61321d074a4c624d909204fce5945f61b amd iommu: Enable FC bit in iommu host level PTE Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r f33af4d61321 -r 001681ff1a0c xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Wed Dec 14 16:16:14 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_map.c Wed Dec 14 16:16:15 2011 +0100 @@ -83,6 +83,11 @@ static bool_t set_iommu_pde_present(u32 set_field_in_reg_u32(ir, entry, IOMMU_PDE_IO_READ_PERMISSION_MASK, IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry); + + /* IOMMUv2 needs FC bit enabled */ + if ( next_level == IOMMU_PAGING_MODE_LEVEL_0 ) + set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry, + IOMMU_PTE_FC_MASK, IOMMU_PTE_FC_SHIFT, &entry); pde[1] = entry; /* mark next level as ''present'' */
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 11 of 16] amd iommu: Add a new flag to indication iommuv2 feature enabled or not
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323875776 -3600 # Node ID 9a93e064dd3c467ce4b87ddef8739a3573ef547c # Parent 001681ff1a0c09c4d04fd8bd45e8d26805686246 amd iommu: Add a new flag to indication iommuv2 feature enabled or not. Hypercalls should return early on non-iommuv2 systems. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 001681ff1a0c -r 9a93e064dd3c xen/drivers/passthrough/amd/iommu_guest.c --- a/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:15 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:16 2011 +0100 @@ -48,6 +48,8 @@ (reg)->hi = (val >> 32) & 0xFFFFFFFF; \ } while(0) +extern bool_t iommuv2_enabled; + static unsigned int machine_bdf(struct domain *d, uint16_t guest_bdf) { struct pci_dev *pdev; @@ -839,6 +841,9 @@ int guest_iommu_set_base(struct domain * p2m_type_t t; struct guest_iommu *iommu = domain_iommu(d); + if ( !is_hvm_domain(d) && !iommuv2_enabled ) + return 1; + iommu->mmio_base = base; base >>= PAGE_SHIFT; @@ -898,7 +903,7 @@ int guest_iommu_init(struct domain* d) struct guest_iommu *iommu; struct hvm_iommu *hd = domain_hvm_iommu(d); - if ( !is_hvm_domain(d) ) + if ( !is_hvm_domain(d) && !iommuv2_enabled ) return 0; iommu = xzalloc(struct guest_iommu); @@ -940,7 +945,7 @@ void guest_iommu_destroy(struct domain * { struct guest_iommu *iommu; - if ( !is_hvm_domain(d) ) + if ( !is_hvm_domain(d) && !iommuv2_enabled ) return; iommu = domain_iommu(d); @@ -973,7 +978,7 @@ int iommu_bind_bdf(struct domain* d, uin struct pci_dev *pdev; int ret = -ENODEV; - if ( !iommu_found() ) + if ( !iommu_found() || !iommuv2_enabled ) return 0; spin_lock(&pcidevs_lock); @@ -998,7 +1003,7 @@ void iommu_set_msi(struct domain* d, uin { struct guest_iommu *iommu = domain_iommu(d); - if ( !iommu_found() ) + if ( !iommu_found() || !iommuv2_enabled ) return; iommu->msi.vector = vector; diff -r 001681ff1a0c -r 9a93e064dd3c xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:15 2011 +0100 +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:16 2011 +0100 @@ -36,6 +36,7 @@ unsigned short ivrs_bdf_entries; static struct radix_tree_root ivrs_maps; struct list_head amd_iommu_head; struct table_struct device_table; +bool_t iommuv2_enabled; static int iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask) { @@ -759,6 +760,10 @@ static void enable_iommu(struct amd_iomm amd_iommu_flush_all_caches(iommu); iommu->enabled = 1; + + if ( iommu->features ) + iommuv2_enabled = 1; + spin_unlock_irqrestore(&iommu->lock, flags); }
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323876140 -3600 # Node ID 42ecb2ba593c2827b2dc4e54360f8c6a42a4dbfc # Parent 9a93e064dd3c467ce4b87ddef8739a3573ef547c hvmloader: Build IVRS table. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 9a93e064dd3c -r 42ecb2ba593c tools/firmware/hvmloader/acpi/acpi2_0.h --- a/tools/firmware/hvmloader/acpi/acpi2_0.h Wed Dec 14 16:16:16 2011 +0100 +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h Wed Dec 14 16:22:20 2011 +0100 @@ -389,6 +389,60 @@ struct acpi_20_madt_intsrcovr { #define ACPI_2_0_WAET_REVISION 0x01 #define ACPI_1_0_FADT_REVISION 0x01 +#define IVRS_SIGNATURE ASCII32(''I'',''V'',''R'',''S'') +#define IVRS_REVISION 1 +#define IVRS_VASIZE 64 +#define IVRS_PASIZE 52 +#define IVRS_GVASIZE 64 + +#define IVHD_BLOCK_TYPE 0x10 +#define IVHD_FLAG_HTTUNEN (1 << 0) +#define IVHD_FLAG_PASSPW (1 << 1) +#define IVHD_FLAG_RESPASSPW (1 << 2) +#define IVHD_FLAG_ISOC (1 << 3) +#define IVHD_FLAG_IOTLBSUP (1 << 4) +#define IVHD_FLAG_COHERENT (1 << 5) +#define IVHD_FLAG_PREFSUP (1 << 6) +#define IVHD_FLAG_PPRSUP (1 << 7) + +#define IVHD_EFR_GTSUP (1 << 2) +#define IVHD_EFR_IASUP (1 << 5) + +#define IVHD_SELECT_4_BYTE 0x2 + +struct ivrs_ivhd_block +{ + uint8_t type; + uint8_t flags; + uint16_t length; + uint16_t devid; + uint16_t cap_offset; + uint64_t iommu_base_addr; + uint16_t pci_segment; + uint16_t iommu_info; + uint32_t reserved; +}; + +/* IVHD 4-byte device entries */ +struct ivrs_ivhd_device +{ + uint8_t type; + uint16_t dev_id; + uint8_t flags; +}; + +#define PT_DEV_MAX_NR 32 +#define IOMMU_CAP_OFFSET 0x40 +struct acpi_40_ivrs +{ + struct acpi_header header; + uint32_t iv_info; + uint32_t reserved[2]; + struct ivrs_ivhd_block ivhd_block; + struct ivrs_ivhd_device ivhd_device[PT_DEV_MAX_NR]; +}; + + #pragma pack () struct acpi_config { diff -r 9a93e064dd3c -r 42ecb2ba593c tools/firmware/hvmloader/acpi/build.c --- a/tools/firmware/hvmloader/acpi/build.c Wed Dec 14 16:16:16 2011 +0100 +++ b/tools/firmware/hvmloader/acpi/build.c Wed Dec 14 16:22:20 2011 +0100 @@ -23,6 +23,8 @@ #include "ssdt_pm.h" #include "../config.h" #include "../util.h" +#include "../hypercall.h" +#include <xen/hvm/params.h> #define align16(sz) (((sz) + 15) & ~15) #define fixed_strcpy(d, s) strncpy((d), (s), sizeof(d)) @@ -198,6 +200,77 @@ static struct acpi_20_waet *construct_wa return waet; } +extern uint32_t ptdev_bdf[PT_DEV_MAX_NR]; +extern uint32_t ptdev_nr; +extern uint32_t iommu_bdf; +static struct acpi_40_ivrs* construct_ivrs(void) +{ + struct acpi_40_ivrs *ivrs; + uint64_t mmio; + struct ivrs_ivhd_block *ivhd; + struct ivrs_ivhd_device *dev_entry; + struct xen_hvm_param p; + + if (ptdev_nr == 0) return NULL; + + ivrs = mem_alloc(sizeof(*ivrs), 16); + if (!ivrs) return NULL; + + memset(ivrs, 0, sizeof(*ivrs)); + + /* initialize acpi header */ + ivrs->header.signature = IVRS_SIGNATURE; + ivrs->header.revision = IVRS_REVISION; + fixed_strcpy(ivrs->header.oem_id, ACPI_OEM_ID); + fixed_strcpy(ivrs->header.oem_table_id, ACPI_OEM_TABLE_ID); + + ivrs->header.oem_revision = ACPI_OEM_REVISION; + ivrs->header.creator_id = ACPI_CREATOR_ID; + ivrs->header.creator_revision = ACPI_CREATOR_REVISION; + + ivrs->header.length = sizeof(*ivrs); + + /* initialize IVHD Block */ + ivhd = &ivrs->ivhd_block; + ivrs->iv_info = (IVRS_VASIZE << 15) | (IVRS_PASIZE << 8) | + (IVRS_GVASIZE << 5); + + ivhd->type = IVHD_BLOCK_TYPE; + ivhd->flags = IVHD_FLAG_PPRSUP | IVHD_FLAG_IOTLBSUP; + ivhd->devid = iommu_bdf; + ivhd->cap_offset = IOMMU_CAP_OFFSET; + + /*reserve 32K IOMMU MMIO space */ + mmio = virt_to_phys(mem_alloc(0x8000, 0x1000)); + if (!mmio) return NULL; + + p.domid = DOMID_SELF; + p.index = HVM_PARAM_IOMMU_BASE; + p.value = mmio; + + /* Return non-zero if IOMMUv2 hardware is not avaliable */ + if ( hypercall_hvm_op(HVMOP_set_param, &p) ) + return NULL; + + ivhd->iommu_base_addr = mmio; + ivhd->reserved = IVHD_EFR_IASUP | IVHD_EFR_GTSUP; + + /* Build IVHD device entries */ + dev_entry = ivrs->ivhd_device; + for ( int i = 0; i < ptdev_nr; i++ ) + { + dev_entry[i].type = IVHD_SELECT_4_BYTE; + dev_entry[i].dev_id = ptdev_bdf[i]; + dev_entry[i].flags = 0; + } + + ivhd->length = sizeof(*ivhd) + sizeof(*dev_entry) * PT_DEV_MAX_NR; + set_checksum(ivrs, offsetof(struct acpi_header, checksum), + ivrs->header.length); + + return ivrs; +} + static int construct_secondary_tables(unsigned long *table_ptrs, struct acpi_info *info) { @@ -206,6 +279,7 @@ static int construct_secondary_tables(un struct acpi_20_hpet *hpet; struct acpi_20_waet *waet; struct acpi_20_tcpa *tcpa; + struct acpi_40_ivrs *ivrs; unsigned char *ssdt; static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001}; uint16_t *tis_hdr; @@ -293,6 +367,13 @@ static int construct_secondary_tables(un } } + if ( hvm_info->iommu_enabled ) + { + ivrs = construct_ivrs(); + if ( ivrs != NULL ) + table_ptrs[nr_tables++] = (unsigned long)ivrs; + } + table_ptrs[nr_tables] = 0; return nr_tables; } diff -r 9a93e064dd3c -r 42ecb2ba593c tools/firmware/hvmloader/pci.c --- a/tools/firmware/hvmloader/pci.c Wed Dec 14 16:16:16 2011 +0100 +++ b/tools/firmware/hvmloader/pci.c Wed Dec 14 16:22:20 2011 +0100 @@ -34,11 +34,17 @@ unsigned long pci_mem_end = PCI_MEM_END; enum virtual_vga virtual_vga = VGA_none; unsigned long igd_opregion_pgbase = 0; +/* support up to 32 passthrough devices */ +#define PT_DEV_MAX_NR 32 +uint32_t ptdev_bdf[PT_DEV_MAX_NR]; +uint32_t ptdev_nr; +uint32_t iommu_bdf; + void pci_setup(void) { uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0; uint32_t vga_devfn = 256; - uint16_t class, vendor_id, device_id; + uint16_t class, vendor_id, device_id, sub_vendor_id; unsigned int bar, pin, link, isa_irq; /* Resources assignable to PCI devices via BARs. */ @@ -72,11 +78,34 @@ void pci_setup(void) class = pci_readw(devfn, PCI_CLASS_DEVICE); vendor_id = pci_readw(devfn, PCI_VENDOR_ID); device_id = pci_readw(devfn, PCI_DEVICE_ID); + sub_vendor_id = pci_readw(devfn, PCI_SUBSYSTEM_VENDOR_ID); + if ( (vendor_id == 0xffff) && (device_id == 0xffff) ) continue; ASSERT((devfn != PCI_ISA_DEVFN) || ((vendor_id == 0x8086) && (device_id == 0x7000))); + /* Found amd iommu device. */ + if ( class == 0x0806 && vendor_id == 0x1022 ) + { + iommu_bdf = devfn; + printf("Found iommu devfn %x class %x\n", iommu_bdf, class); + continue; + } + + /* IVRS: Detecting passthrough devices. + * sub_vendor_id != citrix && sub_vendor_id != qemu */ + if ( sub_vendor_id != 0x5853 && sub_vendor_id != 0x1af4 ) + { + /* found amd iommu device */ + if ( ptdev_nr < PT_DEV_MAX_NR ) + { + ptdev_bdf[ptdev_nr] = devfn; + ptdev_nr ++; + } + else + printf("Number of passthru devices > PT_DEV_MAX_NR \n"); + } switch ( class ) { diff -r 9a93e064dd3c -r 42ecb2ba593c xen/include/public/hvm/hvm_info_table.h --- a/xen/include/public/hvm/hvm_info_table.h Wed Dec 14 16:16:16 2011 +0100 +++ b/xen/include/public/hvm/hvm_info_table.h Wed Dec 14 16:22:20 2011 +0100 @@ -67,6 +67,9 @@ struct hvm_info_table { /* Bitmap of which CPUs are online at boot time. */ uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; + + /* guest iommu enabled */ + uint8_t iommu_enabled; }; #endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323876141 -3600 # Node ID 04573463beff7fc9696f5ecdb940920dcc2ec0ca # Parent 42ecb2ba593c2827b2dc4e54360f8c6a42a4dbfc libxc: add wrappers for new hypercalls Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 42ecb2ba593c -r 04573463beff tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Wed Dec 14 16:22:20 2011 +0100 +++ b/tools/libxc/xc_domain.c Wed Dec 14 16:22:21 2011 +0100 @@ -1352,6 +1352,55 @@ int xc_domain_bind_pt_isa_irq( PT_IRQ_TYPE_ISA, 0, 0, 0, machine_irq)); } +int xc_domain_update_iommu_msi( + xc_interface *xch, + uint32_t domid, + uint8_t vector, + uint8_t dest, + uint8_t dest_mode, + uint8_t delivery_mode, + uint8_t trig_mode) +{ + int rc; + DECLARE_DOMCTL; + xen_domctl_guest_iommu_op_t * iommu_op; + + domctl.cmd = XEN_DOMCTL_guest_iommu_op; + domctl.domain = (domid_t)domid; + + iommu_op = &(domctl.u.guest_iommu_op); + iommu_op->op = XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI; + iommu_op->u.msi.vector = vector; + iommu_op->u.msi.dest = dest; + iommu_op->u.msi.dest_mode = dest_mode; + iommu_op->u.msi.delivery_mode = delivery_mode; + iommu_op->u.msi.trig_mode = trig_mode; + + rc = do_domctl(xch, &domctl); + return rc; +} + +int xc_domain_bind_pt_bdf(xc_interface *xch, + uint32_t domid, + uint32_t gbdf, + uint32_t mbdf) +{ + int rc; + DECLARE_DOMCTL; + xen_domctl_guest_iommu_op_t * guest_op; + + domctl.cmd = XEN_DOMCTL_guest_iommu_op; + domctl.domain = (domid_t)domid; + + guest_op = &(domctl.u.guest_iommu_op); + guest_op->op = XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF; + guest_op->u.bdf_bind.g_bdf = gbdf; + guest_op->u.bdf_bind.m_bdf = mbdf; + + rc = do_domctl(xch, &domctl); + return rc; +} + int xc_domain_memory_mapping( xc_interface *xch, uint32_t domid, diff -r 42ecb2ba593c -r 04573463beff tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Dec 14 16:22:20 2011 +0100 +++ b/tools/libxc/xenctrl.h Wed Dec 14 16:22:21 2011 +0100 @@ -1697,6 +1697,19 @@ int xc_domain_bind_pt_isa_irq(xc_interfa uint32_t domid, uint8_t machine_irq); +int xc_domain_bind_pt_bdf(xc_interface *xch, + uint32_t domid, + uint32_t gbdf, + uint32_t mbdf); + +int xc_domain_update_iommu_msi(xc_interface *xch, + uint32_t domid, + uint8_t vector, + uint8_t dest, + uint8_t dest_mode, + uint8_t delivery_mode, + uint8_t trig_mode); + int xc_domain_set_machine_address_size(xc_interface *xch, uint32_t domid, unsigned int width);
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 14 of 16] libxl: bind virtual bdf to physical bdf after device assignment
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323876142 -3600 # Node ID f9575683f10a08a86a9c73226581610fa3f7be4b # Parent 04573463beff7fc9696f5ecdb940920dcc2ec0ca libxl: bind virtual bdf to physical bdf after device assignment Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 04573463beff -r f9575683f10a tools/libxl/libxl_pci.c --- a/tools/libxl/libxl_pci.c Wed Dec 14 16:22:21 2011 +0100 +++ b/tools/libxl/libxl_pci.c Wed Dec 14 16:22:22 2011 +0100 @@ -735,6 +735,13 @@ out: LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_assign_device failed"); return ERROR_FAIL; } + if (LIBXL__DOMAIN_IS_TYPE(gc, domid, HVM)) { + rc = xc_domain_bind_pt_bdf(ctx->xch, domid, pcidev->vdevfn, pcidev_encode_bdf(pcidev)); + if ( rc ) { + LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_bind_pt_bdf failed"); + return ERROR_FAIL; + } + } } if (!starting)
Wei Wang
2011-Dec-14 15:29 UTC
[PATCH 15 of 16] libxl: Introduce a new guest config file parameter
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323876143 -3600 # Node ID 93658ca85035d6a4e56e2e6602c02859974d30a4 # Parent f9575683f10a08a86a9c73226581610fa3f7be4b libxl: Introduce a new guest config file parameter Use iommu = {1,0} to enable or disable guest iommu emulation. Default value is 0. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r f9575683f10a -r 93658ca85035 tools/libxl/libxl_create.c --- a/tools/libxl/libxl_create.c Wed Dec 14 16:22:22 2011 +0100 +++ b/tools/libxl/libxl_create.c Wed Dec 14 16:22:23 2011 +0100 @@ -99,6 +99,7 @@ int libxl_init_build_info(libxl_ctx *ctx b_info->u.hvm.vpt_align = 1; b_info->u.hvm.timer_mode = 1; b_info->u.hvm.nested_hvm = 0; + b_info->u.hvm.iommu = 0; break; case LIBXL_DOMAIN_TYPE_PV: b_info->u.pv.slack_memkb = 8 * 1024; diff -r f9575683f10a -r 93658ca85035 tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Wed Dec 14 16:22:22 2011 +0100 +++ b/tools/libxl/libxl_dom.c Wed Dec 14 16:22:23 2011 +0100 @@ -266,6 +266,10 @@ static int hvm_build_set_params(xc_inter va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET); va_hvm->apic_mode = info->u.hvm.apic; va_hvm->nr_vcpus = info->max_vcpus; + + if ( info->u.hvm.iommu ) + va_hvm->iommu_enabled = 1; + memcpy(va_hvm->vcpu_online, &info->cur_vcpus, sizeof(info->cur_vcpus)); for (i = 0, sum = 0; i < va_hvm->length; i++) sum += ((uint8_t *) va_hvm)[i]; diff -r f9575683f10a -r 93658ca85035 tools/libxl/libxl_types.idl --- a/tools/libxl/libxl_types.idl Wed Dec 14 16:22:22 2011 +0100 +++ b/tools/libxl/libxl_types.idl Wed Dec 14 16:22:23 2011 +0100 @@ -184,6 +184,7 @@ libxl_domain_build_info = Struct("domain ("vpt_align", bool), ("timer_mode", integer), ("nested_hvm", bool), + ("iommu", bool), ])), ("pv", Struct(None, [("kernel", libxl_file_reference), ("slack_memkb", uint32), diff -r f9575683f10a -r 93658ca85035 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:22 2011 +0100 +++ b/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:23 2011 +0100 @@ -360,6 +360,7 @@ static void printf_info(int domid, printf("\t\t\t(vpt_align %d)\n", b_info->u.hvm.vpt_align); printf("\t\t\t(timer_mode %d)\n", b_info->u.hvm.timer_mode); printf("\t\t\t(nestedhvm %d)\n", b_info->u.hvm.nested_hvm); + printf("\t\t\t(iommu %d)\n", b_info->u.hvm.iommu); printf("\t\t\t(device_model %s)\n", dm_info->device_model ? : "default"); printf("\t\t\t(videoram %d)\n", dm_info->videoram); @@ -764,6 +765,8 @@ static void parse_config_data(const char b_info->u.hvm.timer_mode = l; if (!xlu_cfg_get_long (config, "nestedhvm", &l, 0)) b_info->u.hvm.nested_hvm = l; + if (!xlu_cfg_get_long (config, "iommu", &l, 0)) + b_info->u.hvm.iommu = l; break; case LIBXL_DOMAIN_TYPE_PV: {
# HG changeset patch # User Wei Wang <wei.wang2@amd.com> # Date 1323876144 -3600 # Node ID 24f4a0a23da71c58f457f0bf98aa8238dd45332d # Parent 93658ca85035d6a4e56e2e6602c02859974d30a4 libxl: add iommu parameter to qemu-dm. When iomm = 0, virtual iommu device will be disabled. Signed-off-by: Wei Wang <wei.wang2@amd.com> diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_dm.c --- a/tools/libxl/libxl_dm.c Wed Dec 14 16:22:23 2011 +0100 +++ b/tools/libxl/libxl_dm.c Wed Dec 14 16:22:24 2011 +0100 @@ -194,6 +194,9 @@ static char ** libxl__build_device_model if (info->gfx_passthru) { flexarray_append(dm_args, "-gfx_passthru"); } + if (info->iommu) { + flexarray_append(dm_args, "-iommu"); + } } if (info->saved_state) { flexarray_vappend(dm_args, "-loadvm", info->saved_state, NULL); @@ -404,6 +407,9 @@ static char ** libxl__build_device_model if (info->gfx_passthru) { flexarray_append(dm_args, "-gfx_passthru"); } + if (info->iommu) { + flexarray_append(dm_args, "-iommu"); + } } if (info->saved_state) { /* This file descriptor is meant to be used by QEMU */ diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_types.idl --- a/tools/libxl/libxl_types.idl Wed Dec 14 16:22:23 2011 +0100 +++ b/tools/libxl/libxl_types.idl Wed Dec 14 16:22:24 2011 +0100 @@ -254,6 +254,7 @@ The password never expires"""), ("extra", libxl_string_list, False, "extra parameters pass directly to qemu, NULL terminated"), ("extra_pv", libxl_string_list, False, "extra parameters pass directly to qemu for PV guest, NULL terminated"), ("extra_hvm", libxl_string_list, False, "extra parameters pass directly to qemu for HVM guest, NULL terminated"), + ("iommu", bool, False, "guest iommu enabled or disabled"), ], comment """Device Model information. diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:23 2011 +0100 +++ b/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:24 2011 +0100 @@ -386,6 +386,7 @@ static void printf_info(int domid, printf("\t\t\t(spicedisable_ticketing %d)\n", dm_info->spicedisable_ticketing); printf("\t\t\t(spiceagent_mouse %d)\n", dm_info->spiceagent_mouse); + printf("\t\t\t(iommu %d)\n", dm_info->iommu); printf("\t\t)\n"); break; case LIBXL_DOMAIN_TYPE_PV: @@ -1217,6 +1218,8 @@ skip_vfb: xlu_cfg_replace_string (config, "soundhw", &dm_info->soundhw, 0); if (!xlu_cfg_get_long (config, "xen_platform_pci", &l, 0)) dm_info->xen_platform_pci = l; + if (!xlu_cfg_get_long (config, "iommu", &l, 0)) + dm_info->iommu = l; } dm_info->type = c_info->type;
Jan Beulich
2011-Dec-14 16:44 UTC
Re: [PATCH 07 of 16] amd iommu: Add 2 hypercalls for libxc
>>> On 14.12.11 at 16:29, Wei Wang <wei.wang2@amd.com> wrote: > # HG changeset patch > # User Wei Wang <wei.wang2@amd.com> > # Date 1323875772 -3600 > # Node ID ef5698887d044ad58293bee3549eaa20310c2b17 > # Parent fbed4e6011fce13d3a521bbc339f4959bf32a06c > amd iommu: Add 2 hypercalls for libxc > > iommu_set_msi: used by qemu to inform hypervisor iommu vector number in > guest > space. Hypervisor needs this vector to inject msi into guest when PPR > logging > happens.And this cannot be done with the existing MSI emulation?> iommu_bind_bdf: used by xl to bind guest bdf number to machine bdf number. > IOMMU emulations codes receives commands from guest iommu driver and > forwards > them to host iommu. But virtual device id from guest should be converted > into > physical before sending to real hardware.The whole logic here needs to take the segment into account. No new code should again ignore the segment numbers.> --- a/xen/include/public/domctl.h Wed Dec 14 16:16:11 2011 +0100 > +++ b/xen/include/public/domctl.h Wed Dec 14 16:16:12 2011 +0100 > @@ -848,6 +848,31 @@ struct xen_domctl_set_access_required { > typedef struct xen_domctl_set_access_required > xen_domctl_set_access_required_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); > > +#if defined(__i386__) || defined(__x86_64__)What is x86-specific about these? Jan> +/* Support for guest iommu emulation */ > +struct xen_domctl_guest_iommu_op { > + /* XEN_DOMCTL_GUEST_IOMMU_OP_* */ > +#define XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI 0 > +#define XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF 1 > + uint8_t op; > + union { > + struct iommu_msi { > + uint8_t vector; > + uint8_t dest; > + uint8_t dest_mode; > + uint8_t delivery_mode; > + uint8_t trig_mode; > + } msi; > + struct bdf_bind { > + uint32_t g_bdf; > + uint32_t m_bdf; > + } bdf_bind; > + } u; > +}; > +typedef struct xen_domctl_guest_iommu_op xen_domctl_guest_iommu_op_t; > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_guest_iommu_op_t); > +#endif > + > struct xen_domctl { > uint32_t cmd; > #define XEN_DOMCTL_createdomain 1
Wei Wang2
2011-Dec-14 16:57 UTC
Re: [PATCH 07 of 16] amd iommu: Add 2 hypercalls for libxc
On Wednesday 14 December 2011 17:44:18 Jan Beulich wrote:> >>> On 14.12.11 at 16:29, Wei Wang <wei.wang2@amd.com> wrote: > > > > # HG changeset patch > > # User Wei Wang <wei.wang2@amd.com> > > # Date 1323875772 -3600 > > # Node ID ef5698887d044ad58293bee3549eaa20310c2b17 > > # Parent fbed4e6011fce13d3a521bbc339f4959bf32a06c > > amd iommu: Add 2 hypercalls for libxc > > > > iommu_set_msi: used by qemu to inform hypervisor iommu vector number in > > guest > > space. Hypervisor needs this vector to inject msi into guest when PPR > > logging > > happens. > > And this cannot be done with the existing MSI emulation?It looks like MSI emulation are used for passthru devices. I only add virtual amd iommu device and do not passthru amd iommu device. So no physcal msi are required and therefore complicate msi emulation might not be very necessary?> > iommu_bind_bdf: used by xl to bind guest bdf number to machine bdf > > number. IOMMU emulations codes receives commands from guest iommu driver > > and forwards > > them to host iommu. But virtual device id from guest should be converted > > into > > physical before sending to real hardware. > > The whole logic here needs to take the segment into account. No new > code should again ignore the segment numbers.Sure, I will fix that.> > --- a/xen/include/public/domctl.h Wed Dec 14 16:16:11 2011 +0100 > > +++ b/xen/include/public/domctl.h Wed Dec 14 16:16:12 2011 +0100 > > @@ -848,6 +848,31 @@ struct xen_domctl_set_access_required { > > typedef struct xen_domctl_set_access_required > > xen_domctl_set_access_required_t; > > DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); > > > > +#if defined(__i386__) || defined(__x86_64__) > > What is x86-specific about these?These hypercalls are only used by AMD. so ia64 should be avoided Thanks, Wei> Jan > > > +/* Support for guest iommu emulation */ > > +struct xen_domctl_guest_iommu_op { > > + /* XEN_DOMCTL_GUEST_IOMMU_OP_* */ > > +#define XEN_DOMCTL_GUEST_IOMMU_OP_SET_MSI 0 > > +#define XEN_DOMCTL_GUEST_IOMMU_OP_BIND_BDF 1 > > + uint8_t op; > > + union { > > + struct iommu_msi { > > + uint8_t vector; > > + uint8_t dest; > > + uint8_t dest_mode; > > + uint8_t delivery_mode; > > + uint8_t trig_mode; > > + } msi; > > + struct bdf_bind { > > + uint32_t g_bdf; > > + uint32_t m_bdf; > > + } bdf_bind; > > + } u; > > +}; > > +typedef struct xen_domctl_guest_iommu_op xen_domctl_guest_iommu_op_t; > > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_guest_iommu_op_t); > > +#endif > > + > > struct xen_domctl { > > uint32_t cmd; > > #define XEN_DOMCTL_createdomain 1
Jan Beulich
2011-Dec-14 17:03 UTC
Re: [PATCH 07 of 16] amd iommu: Add 2 hypercalls for libxc
>>> On 14.12.11 at 17:57, Wei Wang2 <wei.wang2@amd.com> wrote: > On Wednesday 14 December 2011 17:44:18 Jan Beulich wrote: >> >>> On 14.12.11 at 16:29, Wei Wang <wei.wang2@amd.com> wrote: >> > >> > # HG changeset patch >> > # User Wei Wang <wei.wang2@amd.com> >> > # Date 1323875772 -3600 >> > # Node ID ef5698887d044ad58293bee3549eaa20310c2b17 >> > # Parent fbed4e6011fce13d3a521bbc339f4959bf32a06c >> > amd iommu: Add 2 hypercalls for libxc >> > >> > iommu_set_msi: used by qemu to inform hypervisor iommu vector number in >> > guest >> > space. Hypervisor needs this vector to inject msi into guest when PPR >> > logging >> > happens. >> >> And this cannot be done with the existing MSI emulation? > It looks like MSI emulation are used for passthru devices. I only add > virtual amd iommu device and do not passthru amd iommu device. So no physcal > msi are required and therefore complicate msi emulation might not be very > necessary?Makes sense.>> > --- a/xen/include/public/domctl.h Wed Dec 14 16:16:11 2011 +0100 >> > +++ b/xen/include/public/domctl.h Wed Dec 14 16:16:12 2011 +0100 >> > @@ -848,6 +848,31 @@ struct xen_domctl_set_access_required { >> > typedef struct xen_domctl_set_access_required >> > xen_domctl_set_access_required_t; >> > DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); >> > >> > +#if defined(__i386__) || defined(__x86_64__) >> >> What is x86-specific about these? > These hypercalls are only used by AMD. so ia64 should be avoidedCurrently. But is there anything in them that makes them unusable for other IOMMUs in the future (from what I can tell only PCI and MSI are fundamentally required, which are certainly present on other platforms)? After all, this is just a type definition and a few manifest constants - I''m not asking that their implementation should be done for other than the case you care about right now. Jan
Wei Wang2
2011-Dec-15 10:02 UTC
Re: [PATCH 07 of 16] amd iommu: Add 2 hypercalls for libxc
On Wednesday 14 December 2011 18:03:54 Jan Beulich wrote:> >>> On 14.12.11 at 17:57, Wei Wang2 <wei.wang2@amd.com> wrote: > > > > On Wednesday 14 December 2011 17:44:18 Jan Beulich wrote: > >> >>> On 14.12.11 at 16:29, Wei Wang <wei.wang2@amd.com> wrote: > >> > > >> > # HG changeset patch > >> > # User Wei Wang <wei.wang2@amd.com> > >> > # Date 1323875772 -3600 > >> > # Node ID ef5698887d044ad58293bee3549eaa20310c2b17 > >> > # Parent fbed4e6011fce13d3a521bbc339f4959bf32a06c > >> > amd iommu: Add 2 hypercalls for libxc > >> > > >> > iommu_set_msi: used by qemu to inform hypervisor iommu vector number > >> > in guest > >> > space. Hypervisor needs this vector to inject msi into guest when PPR > >> > logging > >> > happens. > >> > >> And this cannot be done with the existing MSI emulation? > > > > It looks like MSI emulation are used for passthru devices. I only add > > virtual amd iommu device and do not passthru amd iommu device. So no > > physcal msi are required and therefore complicate msi emulation might not > > be very necessary? > > Makes sense. > > >> > --- a/xen/include/public/domctl.h Wed Dec 14 16:16:11 2011 +0100 > >> > +++ b/xen/include/public/domctl.h Wed Dec 14 16:16:12 2011 +0100 > >> > @@ -848,6 +848,31 @@ struct xen_domctl_set_access_required { > >> > typedef struct xen_domctl_set_access_required > >> > xen_domctl_set_access_required_t; > >> > DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); > >> > > >> > +#if defined(__i386__) || defined(__x86_64__) > >> > >> What is x86-specific about these? > > > > These hypercalls are only used by AMD. so ia64 should be avoided > > Currently. But is there anything in them that makes them unusable > for other IOMMUs in the future (from what I can tell only PCI and MSI > are fundamentally required, which are certainly present on other > platforms)? After all, this is just a type definition and a few manifest > constants - I''m not asking that their implementation should be done > for other than the case you care about right now.Ok, I will change this in the next version. Indeed there is nothing special for x86 in these two hcalls. Thanks, Wei> Jan
Jan Beulich
2011-Dec-15 10:23 UTC
Re: [PATCH 00 of 16] [RFC] amd iommu: support ATS device passthru on IOMMUv2 systems
>>> On 14.12.11 at 16:29, Wei Wang <wei.wang2@amd.com> wrote: > ATS devices with PRI and PASID capabilities can communicate with iommuv2 to > do 2 level (nested) DMA translation and IO demand paging. To do that, both > iommu driver and ats device have to been enabled in guest OS. This patch set > > adds initial iommu emulation for hvm guests to support ATS device passthru.I could take care of the first 6 patches in this series, as they''re only touching AMD IOMMU code and look sensible to me. I''m not sure though whether this is a good idea without knowing the disposition of the other 10 patches (particularly the relative large 3rd patch doesn''t seem to make sense without it later getting hooked up). Please let me know, Jan
Wei Wang2
2011-Dec-15 11:18 UTC
Re: [PATCH 00 of 16] [RFC] amd iommu: support ATS device passthru on IOMMUv2 systems
On Thursday 15 December 2011 11:23:58 Jan Beulich wrote:> >>> On 14.12.11 at 16:29, Wei Wang <wei.wang2@amd.com> wrote: > > > > ATS devices with PRI and PASID capabilities can communicate with iommuv2 > > to do 2 level (nested) DMA translation and IO demand paging. To do that, > > both iommu driver and ats device have to been enabled in guest OS. This > > patch set > > > > adds initial iommu emulation for hvm guests to support ATS device > > passthru. > > I could take care of the first 6 patches in this series, as they''re only > touching AMD IOMMU code and look sensible to me. I''m not sure > though whether this is a good idea without knowing the disposition > of the other 10 patches (particularly the relative large 3rd patch > doesn''t seem to make sense without it later getting hooked up). > > Please let me know, > JanActually, the amd specific patches implement the most IOMMUv2 support. Thanks for looking at this. We had thought about how to integrate IOMMUv2 for ATS device passthru. Since guest OS requires iommu to be presented in this case, we could go for either PV interface or full emulation. The iommuv2 driver has just been submitted to Linux mailing list, so this might be too early for pv iommu drivers... Using mmio handler in xen, we can avoid any guest OS changes and can get better performance than using qemu-dm. So this might be the only approach we intend to use at the moment. But yes, eventually, the iommu emulation is driven by hypercalls. It would be great that tools maintainers could check this in the meantime. so, Ian, could I invite you to take a look at this? Many thanks, Wei
On Wed, 2011-12-14 at 15:29 +0000, Wei Wang wrote:> hvmloader: Build IVRS table.Is there a good reference for what is in this table?> diff -r 9a93e064dd3c -r 42ecb2ba593c > xen/include/public/hvm/hvm_info_table.h > --- a/xen/include/public/hvm/hvm_info_table.h Wed Dec 14 16:16:16 > 2011 +0100 > +++ b/xen/include/public/hvm/hvm_info_table.h Wed Dec 14 16:22:20 > 2011 +0100 > @@ -67,6 +67,9 @@ struct hvm_info_table { > > /* Bitmap of which CPUs are online at boot time. */ > uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; > + > + /* guest iommu enabled */ > + uint8_t iommu_enabled; > }; > > #endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */We would like to remove the hvm info table. Please could you do this via xenstore (like Paul Durrant recently did for acpi s3 support) . Ian.
Tim Deegan
2011-Dec-15 13:35 UTC
Re: [PATCH 03 of 16] amd iommu: Add iommu emulation for hvm guest
Hi, At 16:29 +0100 on 14 Dec (1323880164), Wei Wang wrote:> +static struct page_info* guest_iommu_get_page(struct list_head *pglist, > + unsigned int entry_size, > + unsigned int pos) > +{ > + int idx; > + struct list_head *head; > + struct guest_pages *gpage = NULL; > + > + idx = (pos * entry_size) >> PAGE_SHIFT; > + list_for_each( head, pglist ) > + { > + gpage = list_entry(head, struct guest_pages, list); > + if ( (--idx) < 0 ) > + break; > + }Given that you allocate all these elements together, and free them, all together, why not just use an array instead of a linked list? Cheers, Tim.
Tim Deegan
2011-Dec-15 13:39 UTC
Re: [PATCH 11 of 16] amd iommu: Add a new flag to indication iommuv2 feature enabled or not
At 16:29 +0100 on 14 Dec (1323880172), Wei Wang wrote:> # HG changeset patch > # User Wei Wang <wei.wang2@amd.com> > # Date 1323875776 -3600 > # Node ID 9a93e064dd3c467ce4b87ddef8739a3573ef547c > # Parent 001681ff1a0c09c4d04fd8bd45e8d26805686246 > amd iommu: Add a new flag to indication iommuv2 feature enabled or not. > Hypercalls should return early on non-iommuv2 systems. > > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > diff -r 001681ff1a0c -r 9a93e064dd3c xen/drivers/passthrough/amd/iommu_guest.c > --- a/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:15 2011 +0100 > +++ b/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:16 2011 +0100 > @@ -48,6 +48,8 @@ > (reg)->hi = (val >> 32) & 0xFFFFFFFF; \ > } while(0) > > +extern bool_t iommuv2_enabled; > + > static unsigned int machine_bdf(struct domain *d, uint16_t guest_bdf) > { > struct pci_dev *pdev; > @@ -839,6 +841,9 @@ int guest_iommu_set_base(struct domain * > p2m_type_t t; > struct guest_iommu *iommu = domain_iommu(d); > > + if ( !is_hvm_domain(d) && !iommuv2_enabled ) > + return 1;Shouldn''t that that be || ? (And likewise below) Cheers, Tim.> + > iommu->mmio_base = base; > base >>= PAGE_SHIFT; > > @@ -898,7 +903,7 @@ int guest_iommu_init(struct domain* d) > struct guest_iommu *iommu; > struct hvm_iommu *hd = domain_hvm_iommu(d); > > - if ( !is_hvm_domain(d) ) > + if ( !is_hvm_domain(d) && !iommuv2_enabled ) > return 0; > > iommu = xzalloc(struct guest_iommu); > @@ -940,7 +945,7 @@ void guest_iommu_destroy(struct domain * > { > struct guest_iommu *iommu; > > - if ( !is_hvm_domain(d) ) > + if ( !is_hvm_domain(d) && !iommuv2_enabled ) > return; > > iommu = domain_iommu(d); > @@ -973,7 +978,7 @@ int iommu_bind_bdf(struct domain* d, uin > struct pci_dev *pdev; > int ret = -ENODEV; > > - if ( !iommu_found() ) > + if ( !iommu_found() || !iommuv2_enabled ) > return 0; > > spin_lock(&pcidevs_lock); > @@ -998,7 +1003,7 @@ void iommu_set_msi(struct domain* d, uin > { > struct guest_iommu *iommu = domain_iommu(d); > > - if ( !iommu_found() ) > + if ( !iommu_found() || !iommuv2_enabled ) > return; > > iommu->msi.vector = vector; > diff -r 001681ff1a0c -r 9a93e064dd3c xen/drivers/passthrough/amd/iommu_init.c > --- a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:15 2011 +0100 > +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:16 2011 +0100 > @@ -36,6 +36,7 @@ unsigned short ivrs_bdf_entries; > static struct radix_tree_root ivrs_maps; > struct list_head amd_iommu_head; > struct table_struct device_table; > +bool_t iommuv2_enabled; > > static int iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask) > { > @@ -759,6 +760,10 @@ static void enable_iommu(struct amd_iomm > amd_iommu_flush_all_caches(iommu); > > iommu->enabled = 1; > + > + if ( iommu->features ) > + iommuv2_enabled = 1; > + > spin_unlock_irqrestore(&iommu->lock, flags); > > } > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel
Wei Wang2
2011-Dec-15 14:05 UTC
Re: [PATCH 11 of 16] amd iommu: Add a new flag to indication iommuv2 feature enabled or not
On Thursday 15 December 2011 14:39:15 Tim Deegan wrote:> At 16:29 +0100 on 14 Dec (1323880172), Wei Wang wrote: > > # HG changeset patch > > # User Wei Wang <wei.wang2@amd.com> > > # Date 1323875776 -3600 > > # Node ID 9a93e064dd3c467ce4b87ddef8739a3573ef547c > > # Parent 001681ff1a0c09c4d04fd8bd45e8d26805686246 > > amd iommu: Add a new flag to indication iommuv2 feature enabled or not. > > Hypercalls should return early on non-iommuv2 systems. > > > > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > > > diff -r 001681ff1a0c -r 9a93e064dd3c > > xen/drivers/passthrough/amd/iommu_guest.c --- > > a/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:15 2011 > > +0100 +++ b/xen/drivers/passthrough/amd/iommu_guest.c Wed Dec 14 16:16:16 > > 2011 +0100 @@ -48,6 +48,8 @@ > > (reg)->hi = (val >> 32) & 0xFFFFFFFF; \ > > } while(0) > > > > +extern bool_t iommuv2_enabled; > > + > > static unsigned int machine_bdf(struct domain *d, uint16_t guest_bdf) > > { > > struct pci_dev *pdev; > > @@ -839,6 +841,9 @@ int guest_iommu_set_base(struct domain * > > p2m_type_t t; > > struct guest_iommu *iommu = domain_iommu(d); > > > > + if ( !is_hvm_domain(d) && !iommuv2_enabled ) > > + return 1; > > Shouldn''t that that be || ? (And likewise below)Oops... I will fix that. Thanks. Wei> Cheers, > > Tim. > > > + > > iommu->mmio_base = base; > > base >>= PAGE_SHIFT; > > > > @@ -898,7 +903,7 @@ int guest_iommu_init(struct domain* d) > > struct guest_iommu *iommu; > > struct hvm_iommu *hd = domain_hvm_iommu(d); > > > > - if ( !is_hvm_domain(d) ) > > + if ( !is_hvm_domain(d) && !iommuv2_enabled ) > > return 0; > > > > iommu = xzalloc(struct guest_iommu); > > @@ -940,7 +945,7 @@ void guest_iommu_destroy(struct domain * > > { > > struct guest_iommu *iommu; > > > > - if ( !is_hvm_domain(d) ) > > + if ( !is_hvm_domain(d) && !iommuv2_enabled ) > > return; > > > > iommu = domain_iommu(d); > > @@ -973,7 +978,7 @@ int iommu_bind_bdf(struct domain* d, uin > > struct pci_dev *pdev; > > int ret = -ENODEV; > > > > - if ( !iommu_found() ) > > + if ( !iommu_found() || !iommuv2_enabled ) > > return 0; > > > > spin_lock(&pcidevs_lock); > > @@ -998,7 +1003,7 @@ void iommu_set_msi(struct domain* d, uin > > { > > struct guest_iommu *iommu = domain_iommu(d); > > > > - if ( !iommu_found() ) > > + if ( !iommu_found() || !iommuv2_enabled ) > > return; > > > > iommu->msi.vector = vector; > > diff -r 001681ff1a0c -r 9a93e064dd3c > > xen/drivers/passthrough/amd/iommu_init.c --- > > a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:15 2011 +0100 > > +++ b/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 14 16:16:16 2011 > > +0100 @@ -36,6 +36,7 @@ unsigned short ivrs_bdf_entries; > > static struct radix_tree_root ivrs_maps; > > struct list_head amd_iommu_head; > > struct table_struct device_table; > > +bool_t iommuv2_enabled; > > > > static int iommu_has_ht_flag(struct amd_iommu *iommu, u8 mask) > > { > > @@ -759,6 +760,10 @@ static void enable_iommu(struct amd_iomm > > amd_iommu_flush_all_caches(iommu); > > > > iommu->enabled = 1; > > + > > + if ( iommu->features ) > > + iommuv2_enabled = 1; > > + > > spin_unlock_irqrestore(&iommu->lock, flags); > > > > } > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xensource.com > > http://lists.xensource.com/xen-devel
Wei Wang2
2011-Dec-15 14:09 UTC
Re: [PATCH 03 of 16] amd iommu: Add iommu emulation for hvm guest
On Thursday 15 December 2011 14:35:55 Tim Deegan wrote:> Hi, > > At 16:29 +0100 on 14 Dec (1323880164), Wei Wang wrote: > > +static struct page_info* guest_iommu_get_page(struct list_head *pglist, > > + unsigned int entry_size, > > + unsigned int pos) > > +{ > > + int idx; > > + struct list_head *head; > > + struct guest_pages *gpage = NULL; > > + > > + idx = (pos * entry_size) >> PAGE_SHIFT; > > + list_for_each( head, pglist ) > > + { > > + gpage = list_entry(head, struct guest_pages, list); > > + if ( (--idx) < 0 ) > > + break; > > + } > > Given that you allocate all these elements together, and free them, all > together, why not just use an array instead of a linked list? > > Cheers, > > Tim.The numbers of element might be variant. But array should also work, considering iommu tables has max. length of 2MB, the array length is small. Thanks, Wei
Jan Beulich
2011-Dec-15 14:13 UTC
Re: [PATCH 03 of 16] amd iommu: Add iommu emulation for hvm guest
>>> On 15.12.11 at 15:09, Wei Wang2 <wei.wang2@amd.com> wrote: > On Thursday 15 December 2011 14:35:55 Tim Deegan wrote: >> Hi, >> >> At 16:29 +0100 on 14 Dec (1323880164), Wei Wang wrote: >> > +static struct page_info* guest_iommu_get_page(struct list_head *pglist, >> > + unsigned int entry_size, >> > + unsigned int pos) >> > +{ >> > + int idx; >> > + struct list_head *head; >> > + struct guest_pages *gpage = NULL; >> > + >> > + idx = (pos * entry_size) >> PAGE_SHIFT; >> > + list_for_each( head, pglist ) >> > + { >> > + gpage = list_entry(head, struct guest_pages, list); >> > + if ( (--idx) < 0 ) >> > + break; >> > + } >> >> Given that you allocate all these elements together, and free them, all >> together, why not just use an array instead of a linked list? >> >> Cheers, >> >> Tim. > The numbers of element might be variant. But array should also work, > considering iommu tables has max. length of 2MB, the array length is small.Small enough so the array would fit in a single page? Jan
Ian Campbell
2011-Dec-15 14:26 UTC
Re: [PATCH 14 of 16] libxl: bind virtual bdf to physical bdf after device assignment
On Wed, 2011-12-14 at 15:29 +0000, Wei Wang wrote:> # HG changeset patch > # User Wei Wang <wei.wang2@amd.com> > # Date 1323876142 -3600 > # Node ID f9575683f10a08a86a9c73226581610fa3f7be4b > # Parent 04573463beff7fc9696f5ecdb940920dcc2ec0ca > libxl: bind virtual bdf to physical bdf after device assignment > > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > diff -r 04573463beff -r f9575683f10a tools/libxl/libxl_pci.c > --- a/tools/libxl/libxl_pci.c Wed Dec 14 16:22:21 2011 +0100 > +++ b/tools/libxl/libxl_pci.c Wed Dec 14 16:22:22 2011 +0100 > @@ -735,6 +735,13 @@ out: > LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_assign_device failed"); > return ERROR_FAIL; > } > + if (LIBXL__DOMAIN_IS_TYPE(gc, domid, HVM)) { > + rc = xc_domain_bind_pt_bdf(ctx->xch, domid, pcidev->vdevfn, pcidev_encode_bdf(pcidev)); > + if ( rc ) { > + LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_bind_pt_bdf failed"); > + return ERROR_FAIL;Indentation here is off.> + } > + } > } > > if (!starting) > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel
Wei Wang2
2011-Dec-15 14:30 UTC
Re: [PATCH 03 of 16] amd iommu: Add iommu emulation for hvm guest
On Thursday 15 December 2011 15:13:57 Jan Beulich wrote:> >>> On 15.12.11 at 15:09, Wei Wang2 <wei.wang2@amd.com> wrote: > > > > On Thursday 15 December 2011 14:35:55 Tim Deegan wrote: > >> Hi, > >> > >> At 16:29 +0100 on 14 Dec (1323880164), Wei Wang wrote: > >> > +static struct page_info* guest_iommu_get_page(struct list_head > >> > *pglist, + unsigned int > >> > entry_size, + unsigned > >> > int pos) +{ > >> > + int idx; > >> > + struct list_head *head; > >> > + struct guest_pages *gpage = NULL; > >> > + > >> > + idx = (pos * entry_size) >> PAGE_SHIFT; > >> > + list_for_each( head, pglist ) > >> > + { > >> > + gpage = list_entry(head, struct guest_pages, list); > >> > + if ( (--idx) < 0 ) > >> > + break; > >> > + } > >> > >> Given that you allocate all these elements together, and free them, all > >> together, why not just use an array instead of a linked list? > >> > >> Cheers, > >> > >> Tim. > > > > The numbers of element might be variant. But array should also work, > > considering iommu tables has max. length of 2MB, the array length is > > small. > > Small enough so the array would fit in a single page? > > JanWell...then, How about that I just save the first gfn of the table base address and mapping gfn + n dynamically? Since all gfns for iommu tables must be contiguous guest space.. Thanks, Wei
Ian Campbell
2011-Dec-15 14:30 UTC
Re: [PATCH 16 of 16] libxl: add iommu parameter to qemu-dm
On Wed, 2011-12-14 at 15:29 +0000, Wei Wang wrote:> # HG changeset patch > # User Wei Wang <wei.wang2@amd.com> > # Date 1323876144 -3600 > # Node ID 24f4a0a23da71c58f457f0bf98aa8238dd45332d > # Parent 93658ca85035d6a4e56e2e6602c02859974d30a4 > libxl: add iommu parameter to qemu-dm. > When iomm = 0, virtual iommu device will be disabled. > > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_dm.c > --- a/tools/libxl/libxl_dm.c Wed Dec 14 16:22:23 2011 +0100 > +++ b/tools/libxl/libxl_dm.c Wed Dec 14 16:22:24 2011 +0100 > @@ -194,6 +194,9 @@ static char ** libxl__build_device_model > if (info->gfx_passthru) { > flexarray_append(dm_args, "-gfx_passthru"); > } > + if (info->iommu) { > + flexarray_append(dm_args, "-iommu"); > + } > } > if (info->saved_state) { > flexarray_vappend(dm_args, "-loadvm", info->saved_state, NULL); > @@ -404,6 +407,9 @@ static char ** libxl__build_device_model > if (info->gfx_passthru) { > flexarray_append(dm_args, "-gfx_passthru"); > } > + if (info->iommu) { > + flexarray_append(dm_args, "-iommu"); > + } > } > if (info->saved_state) { > /* This file descriptor is meant to be used by QEMU */ > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_types.idl > --- a/tools/libxl/libxl_types.idl Wed Dec 14 16:22:23 2011 +0100 > +++ b/tools/libxl/libxl_types.idl Wed Dec 14 16:22:24 2011 +0100 > @@ -254,6 +254,7 @@ The password never expires"""), > ("extra", libxl_string_list, False, "extra parameters pass directly to qemu, NULL terminated"), > ("extra_pv", libxl_string_list, False, "extra parameters pass directly to qemu for PV guest, NULL terminated"), > ("extra_hvm", libxl_string_list, False, "extra parameters pass directly to qemu for HVM guest, NULL terminated"), > + ("iommu", bool, False, "guest iommu enabled or disabled"), > ], > comment> """Device Model information. > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/xl_cmdimpl.c > --- a/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:23 2011 +0100 > +++ b/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:24 2011 +0100 > @@ -386,6 +386,7 @@ static void printf_info(int domid, > printf("\t\t\t(spicedisable_ticketing %d)\n", > dm_info->spicedisable_ticketing); > printf("\t\t\t(spiceagent_mouse %d)\n", dm_info->spiceagent_mouse); > + printf("\t\t\t(iommu %d)\n", dm_info->iommu); > printf("\t\t)\n"); > break; > case LIBXL_DOMAIN_TYPE_PV: > @@ -1217,6 +1218,8 @@ skip_vfb: > xlu_cfg_replace_string (config, "soundhw", &dm_info->soundhw, 0); > if (!xlu_cfg_get_long (config, "xen_platform_pci", &l, 0)) > dm_info->xen_platform_pci = l; > + if (!xlu_cfg_get_long (config, "iommu", &l, 0)) > + dm_info->iommu = l;Didn''t you already parse this same key into the build_info? Is there ever a possibility of the dm_info and build_info versions of this field differing? Assuming not I think this setting ought to only live in one place and I think build_info should be that place rather than the dm info. That might need some refactoring in libxl to pass the right struct down. Also you have only CC''d the hypervisor maintainers on this (and the other?) tool stack patch. Please check MAINTAINERS to see who ought to be CC''d. Ian.
Wei Wang2
2011-Dec-15 14:52 UTC
Re: [PATCH 16 of 16] libxl: add iommu parameter to qemu-dm
On Thursday 15 December 2011 15:30:44 Ian Campbell wrote:> On Wed, 2011-12-14 at 15:29 +0000, Wei Wang wrote: > > # HG changeset patch > > # User Wei Wang <wei.wang2@amd.com> > > # Date 1323876144 -3600 > > # Node ID 24f4a0a23da71c58f457f0bf98aa8238dd45332d > > # Parent 93658ca85035d6a4e56e2e6602c02859974d30a4 > > libxl: add iommu parameter to qemu-dm. > > When iomm = 0, virtual iommu device will be disabled. > > > > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_dm.c > > --- a/tools/libxl/libxl_dm.c Wed Dec 14 16:22:23 2011 +0100 > > +++ b/tools/libxl/libxl_dm.c Wed Dec 14 16:22:24 2011 +0100 > > @@ -194,6 +194,9 @@ static char ** libxl__build_device_model > > if (info->gfx_passthru) { > > flexarray_append(dm_args, "-gfx_passthru"); > > } > > + if (info->iommu) { > > + flexarray_append(dm_args, "-iommu"); > > + } > > } > > if (info->saved_state) { > > flexarray_vappend(dm_args, "-loadvm", info->saved_state, NULL); > > @@ -404,6 +407,9 @@ static char ** libxl__build_device_model > > if (info->gfx_passthru) { > > flexarray_append(dm_args, "-gfx_passthru"); > > } > > + if (info->iommu) { > > + flexarray_append(dm_args, "-iommu"); > > + } > > } > > if (info->saved_state) { > > /* This file descriptor is meant to be used by QEMU */ > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_types.idl > > --- a/tools/libxl/libxl_types.idl Wed Dec 14 16:22:23 2011 +0100 > > +++ b/tools/libxl/libxl_types.idl Wed Dec 14 16:22:24 2011 +0100 > > @@ -254,6 +254,7 @@ The password never expires"""), > > ("extra", libxl_string_list, False, "extra parameters > > pass directly to qemu, NULL terminated"), ("extra_pv", > > libxl_string_list, False, "extra parameters pass directly to qemu for PV > > guest, NULL terminated"), ("extra_hvm", libxl_string_list, False, > > "extra parameters pass directly to qemu for HVM guest, NULL terminated"), > > + ("iommu", bool, False, "guest iommu enabled > > or disabled"), ], > > comment> > """Device Model information. > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/xl_cmdimpl.c > > --- a/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:23 2011 +0100 > > +++ b/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:24 2011 +0100 > > @@ -386,6 +386,7 @@ static void printf_info(int domid, > > printf("\t\t\t(spicedisable_ticketing %d)\n", > > dm_info->spicedisable_ticketing); > > printf("\t\t\t(spiceagent_mouse %d)\n", > > dm_info->spiceagent_mouse); + printf("\t\t\t(iommu %d)\n", > > dm_info->iommu); > > printf("\t\t)\n"); > > break; > > case LIBXL_DOMAIN_TYPE_PV: > > @@ -1217,6 +1218,8 @@ skip_vfb: > > xlu_cfg_replace_string (config, "soundhw", &dm_info->soundhw, > > 0); if (!xlu_cfg_get_long (config, "xen_platform_pci", &l, 0)) > > dm_info->xen_platform_pci = l; > > + if (!xlu_cfg_get_long (config, "iommu", &l, 0)) > > + dm_info->iommu = l; > > Didn''t you already parse this same key into the build_info? Is there > ever a possibility of the dm_info and build_info versions of this field > differing?> Assuming not I think this setting ought to only live in one place and I > think build_info should be that place rather than the dm info. That > might need some refactoring in libxl to pass the right struct down.Yes, I added a new flag in build_info, which will control hvmloader to build IVRS table. And I also need a flag for qemu-dm to decide if virtual iommu device should be registered or not. I just saw other parameters like gfx-passthu are attached to dm_info, so I do the same thing for iommu. Also I cannot make a reference of libxl_domain_build_info in libxl__build_device_model_args.> Also you have only CC''d the hypervisor maintainers on this (and the > other?) tool stack patch. Please check MAINTAINERS to see who ought to > be CC''d.Thanks, I cc''d them to Ian Jackson.> Ian.Wei
Ian Jackson
2011-Dec-15 16:27 UTC
Re: [PATCH 15 of 16] libxl: Introduce a new guest config file parameter
Wei Wang writes ("[Xen-devel] [PATCH 15 of 16] libxl: Introduce a new guest config file parameter"):> libxl: Introduce a new guest config file parameter > Use iommu = {1,0} to enable or disable guest iommu emulation. > Default value is 0.This needs documenting. And that means you need to explain why a user might want to turn this off, or on - not just say "it enables or disables guest iommu emulation". Ian.
Ian Campbell
2011-Dec-15 16:59 UTC
Re: [PATCH 16 of 16] libxl: add iommu parameter to qemu-dm
On Thu, 2011-12-15 at 14:52 +0000, Wei Wang2 wrote:> On Thursday 15 December 2011 15:30:44 Ian Campbell wrote: > > On Wed, 2011-12-14 at 15:29 +0000, Wei Wang wrote: > > > # HG changeset patch > > > # User Wei Wang <wei.wang2@amd.com> > > > # Date 1323876144 -3600 > > > # Node ID 24f4a0a23da71c58f457f0bf98aa8238dd45332d > > > # Parent 93658ca85035d6a4e56e2e6602c02859974d30a4 > > > libxl: add iommu parameter to qemu-dm. > > > When iomm = 0, virtual iommu device will be disabled. > > > > > > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > > > > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_dm.c > > > --- a/tools/libxl/libxl_dm.c Wed Dec 14 16:22:23 2011 +0100 > > > +++ b/tools/libxl/libxl_dm.c Wed Dec 14 16:22:24 2011 +0100 > > > @@ -194,6 +194,9 @@ static char ** libxl__build_device_model > > > if (info->gfx_passthru) { > > > flexarray_append(dm_args, "-gfx_passthru"); > > > } > > > + if (info->iommu) { > > > + flexarray_append(dm_args, "-iommu"); > > > + } > > > } > > > if (info->saved_state) { > > > flexarray_vappend(dm_args, "-loadvm", info->saved_state, NULL); > > > @@ -404,6 +407,9 @@ static char ** libxl__build_device_model > > > if (info->gfx_passthru) { > > > flexarray_append(dm_args, "-gfx_passthru"); > > > } > > > + if (info->iommu) { > > > + flexarray_append(dm_args, "-iommu"); > > > + } > > > } > > > if (info->saved_state) { > > > /* This file descriptor is meant to be used by QEMU */ > > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_types.idl > > > --- a/tools/libxl/libxl_types.idl Wed Dec 14 16:22:23 2011 +0100 > > > +++ b/tools/libxl/libxl_types.idl Wed Dec 14 16:22:24 2011 +0100 > > > @@ -254,6 +254,7 @@ The password never expires"""), > > > ("extra", libxl_string_list, False, "extra parameters > > > pass directly to qemu, NULL terminated"), ("extra_pv", > > > libxl_string_list, False, "extra parameters pass directly to qemu for PV > > > guest, NULL terminated"), ("extra_hvm", libxl_string_list, False, > > > "extra parameters pass directly to qemu for HVM guest, NULL terminated"), > > > + ("iommu", bool, False, "guest iommu enabled > > > or disabled"), ], > > > comment> > > """Device Model information. > > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/xl_cmdimpl.c > > > --- a/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:23 2011 +0100 > > > +++ b/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:24 2011 +0100 > > > @@ -386,6 +386,7 @@ static void printf_info(int domid, > > > printf("\t\t\t(spicedisable_ticketing %d)\n", > > > dm_info->spicedisable_ticketing); > > > printf("\t\t\t(spiceagent_mouse %d)\n", > > > dm_info->spiceagent_mouse); + printf("\t\t\t(iommu %d)\n", > > > dm_info->iommu); > > > printf("\t\t)\n"); > > > break; > > > case LIBXL_DOMAIN_TYPE_PV: > > > @@ -1217,6 +1218,8 @@ skip_vfb: > > > xlu_cfg_replace_string (config, "soundhw", &dm_info->soundhw, > > > 0); if (!xlu_cfg_get_long (config, "xen_platform_pci", &l, 0)) > > > dm_info->xen_platform_pci = l; > > > + if (!xlu_cfg_get_long (config, "iommu", &l, 0)) > > > + dm_info->iommu = l; > > > > Didn''t you already parse this same key into the build_info? Is there > > ever a possibility of the dm_info and build_info versions of this field > > differing? > > > Assuming not I think this setting ought to only live in one place and I > > think build_info should be that place rather than the dm info. That > > might need some refactoring in libxl to pass the right struct down. > > Yes, I added a new flag in build_info, which will control hvmloader to build > IVRS table. And I also need a flag for qemu-dm to decide if virtual iommu > device should be registered or not. I just saw other parameters like > gfx-passthu are attached to dm_info, so I do the same thing for iommu.gfx-passthru is only in dm_info> Also I cannot make a reference of libxl_domain_build_info in > libxl__build_device_model_args.You just need to plumb the variable through. This is apparently just the first time we have need of it.> > Also you have only CC''d the hypervisor maintainers on this (and the > > other?) tool stack patch. Please check MAINTAINERS to see who ought to > > be CC''d. > Thanks, I cc''d them to Ian Jackson. > > > Ian. > > Wei >
Wei Wang2
2011-Dec-15 17:10 UTC
Re: [PATCH 16 of 16] libxl: add iommu parameter to qemu-dm
On Thursday 15 December 2011 17:59:19 Ian Campbell wrote:> On Thu, 2011-12-15 at 14:52 +0000, Wei Wang2 wrote: > > On Thursday 15 December 2011 15:30:44 Ian Campbell wrote: > > > On Wed, 2011-12-14 at 15:29 +0000, Wei Wang wrote: > > > > # HG changeset patch > > > > # User Wei Wang <wei.wang2@amd.com> > > > > # Date 1323876144 -3600 > > > > # Node ID 24f4a0a23da71c58f457f0bf98aa8238dd45332d > > > > # Parent 93658ca85035d6a4e56e2e6602c02859974d30a4 > > > > libxl: add iommu parameter to qemu-dm. > > > > When iomm = 0, virtual iommu device will be disabled. > > > > > > > > Signed-off-by: Wei Wang <wei.wang2@amd.com> > > > > > > > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_dm.c > > > > --- a/tools/libxl/libxl_dm.c Wed Dec 14 16:22:23 2011 +0100 > > > > +++ b/tools/libxl/libxl_dm.c Wed Dec 14 16:22:24 2011 +0100 > > > > @@ -194,6 +194,9 @@ static char ** libxl__build_device_model > > > > if (info->gfx_passthru) { > > > > flexarray_append(dm_args, "-gfx_passthru"); > > > > } > > > > + if (info->iommu) { > > > > + flexarray_append(dm_args, "-iommu"); > > > > + } > > > > } > > > > if (info->saved_state) { > > > > flexarray_vappend(dm_args, "-loadvm", info->saved_state, > > > > NULL); @@ -404,6 +407,9 @@ static char ** libxl__build_device_model > > > > if (info->gfx_passthru) { > > > > flexarray_append(dm_args, "-gfx_passthru"); > > > > } > > > > + if (info->iommu) { > > > > + flexarray_append(dm_args, "-iommu"); > > > > + } > > > > } > > > > if (info->saved_state) { > > > > /* This file descriptor is meant to be used by QEMU */ > > > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/libxl_types.idl > > > > --- a/tools/libxl/libxl_types.idl Wed Dec 14 16:22:23 2011 +0100 > > > > +++ b/tools/libxl/libxl_types.idl Wed Dec 14 16:22:24 2011 +0100 > > > > @@ -254,6 +254,7 @@ The password never expires"""), > > > > ("extra", libxl_string_list, False, "extra parameters > > > > pass directly to qemu, NULL terminated"), ("extra_pv", > > > > libxl_string_list, False, "extra parameters pass directly to qemu for > > > > PV guest, NULL terminated"), ("extra_hvm", libxl_string_list, > > > > False, "extra parameters pass directly to qemu for HVM guest, NULL > > > > terminated"), + ("iommu", bool, False, > > > > "guest iommu enabled or disabled"), ], > > > > comment> > > > """Device Model information. > > > > diff -r 93658ca85035 -r 24f4a0a23da7 tools/libxl/xl_cmdimpl.c > > > > --- a/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:23 2011 +0100 > > > > +++ b/tools/libxl/xl_cmdimpl.c Wed Dec 14 16:22:24 2011 +0100 > > > > @@ -386,6 +386,7 @@ static void printf_info(int domid, > > > > printf("\t\t\t(spicedisable_ticketing %d)\n", > > > > dm_info->spicedisable_ticketing); > > > > printf("\t\t\t(spiceagent_mouse %d)\n", > > > > dm_info->spiceagent_mouse); + printf("\t\t\t(iommu %d)\n", > > > > dm_info->iommu); > > > > printf("\t\t)\n"); > > > > break; > > > > case LIBXL_DOMAIN_TYPE_PV: > > > > @@ -1217,6 +1218,8 @@ skip_vfb: > > > > xlu_cfg_replace_string (config, "soundhw", > > > > &dm_info->soundhw, 0); if (!xlu_cfg_get_long (config, > > > > "xen_platform_pci", &l, 0)) dm_info->xen_platform_pci = l; > > > > + if (!xlu_cfg_get_long (config, "iommu", &l, 0)) > > > > + dm_info->iommu = l; > > > > > > Didn''t you already parse this same key into the build_info? Is there > > > ever a possibility of the dm_info and build_info versions of this field > > > differing? > > > > > > Assuming not I think this setting ought to only live in one place and I > > > think build_info should be that place rather than the dm info. That > > > might need some refactoring in libxl to pass the right struct down. > > > > Yes, I added a new flag in build_info, which will control hvmloader to > > build IVRS table. And I also need a flag for qemu-dm to decide if virtual > > iommu device should be registered or not. I just saw other parameters > > like gfx-passthu are attached to dm_info, so I do the same thing for > > iommu. > > gfx-passthru is only in dm_info > > > Also I cannot make a reference of libxl_domain_build_info in > > libxl__build_device_model_args. > > You just need to plumb the variable through. This is apparently just the > first time we have need of it.Ok, that sounds doable. I will fix that in the next version. Thanks, Wei> > > > Also you have only CC''d the hypervisor maintainers on this (and the > > > other?) tool stack patch. Please check MAINTAINERS to see who ought to > > > be CC''d. > > > > Thanks, I cc''d them to Ian Jackson. > > > > > Ian. > > > > Wei > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel
Ian Campbell
2011-Dec-16 11:44 UTC
Re: [PATCH 16 of 16] libxl: add iommu parameter to qemu-dm
On Thu, 2011-12-15 at 17:10 +0000, Wei Wang2 wrote:> > > > > > Also I cannot make a reference of libxl_domain_build_info in > > > libxl__build_device_model_args. > > > > You just need to plumb the variable through. This is apparently just > the > > first time we have need of it. > Ok, that sounds doable. I will fix that in the next version.Thanks. I actually don''t think libxl_device_model_info should really be exposed to users of the library. That stuff all belongs in the build_info and should be turned into dm config as necessary internally by the library. However I don''t expect you to take on that change! Ian.