Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 0/8] VMCI: dma dg: Add support for DMA datagrams
A new version of the VMCI device will introduce two new major changes: - support MMIO access to device registers - support send/receive of datagrams using DMA transfers instead of ioread8_rep/iowrite8_rep operations This patch series updates the VMCI driver to support these new features while maintaining backwards compatibility. The DMA based datagram operations use a send and a receive buffer allocated at module load time. The buffer contains a header describing the layout of the buffer followed by either an SG list or inline data. The header also contains a flag indicating whether the buffer is currently owned by the driver or the device. Both for send and receive, the driver will initialize the buffer, transfer ownership to the device by writing the buffer address to a register, and then wait for the ownership to be transferred back. The device will generate an interrupt when this happens. v2 (fixes issues flagged by kernel test robot <lkp at intel.com>): - changed type of mmio_base to void __iomem * - made vmci_read_reg, vmci_write_reg and vmci_write_data static functions Jorgen Hansen (8): VMCI: dma dg: whitespace formatting change for vmci register defines VMCI: dma dg: add MMIO access to registers VMCI: dma dg: detect DMA datagram capability VMCI: dma dg: set OS page size VMCI: dma dg: register dummy IRQ handlers for DMA datagrams VMCI: dma dg: allocate send and receive buffers for DMA datagrams VMCI: dma dg: add support for DMA datagrams sends VMCI: dma dg: add support for DMA datagrams receive drivers/misc/vmw_vmci/vmci_guest.c | 340 ++++++++++++++++++++++++----- include/linux/vmw_vmci_defs.h | 84 ++++++- 2 files changed, 361 insertions(+), 63 deletions(-) -- 2.25.1
Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 1/8] VMCI: dma dg: whitespace formatting change for vmci register defines
Update formatting of existing register defines in preparation for adding additional register definitions for the VMCI device. Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- include/linux/vmw_vmci_defs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index e36cb114c188..9911ecfc18ba 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -12,15 +12,15 @@ #include <linux/bits.h> /* Register offsets. */ -#define VMCI_STATUS_ADDR 0x00 -#define VMCI_CONTROL_ADDR 0x04 -#define VMCI_ICR_ADDR 0x08 -#define VMCI_IMR_ADDR 0x0c -#define VMCI_DATA_OUT_ADDR 0x10 -#define VMCI_DATA_IN_ADDR 0x14 -#define VMCI_CAPS_ADDR 0x18 -#define VMCI_RESULT_LOW_ADDR 0x1c -#define VMCI_RESULT_HIGH_ADDR 0x20 +#define VMCI_STATUS_ADDR 0x00 +#define VMCI_CONTROL_ADDR 0x04 +#define VMCI_ICR_ADDR 0x08 +#define VMCI_IMR_ADDR 0x0c +#define VMCI_DATA_OUT_ADDR 0x10 +#define VMCI_DATA_IN_ADDR 0x14 +#define VMCI_CAPS_ADDR 0x18 +#define VMCI_RESULT_LOW_ADDR 0x1c +#define VMCI_RESULT_HIGH_ADDR 0x20 /* Max number of devices. */ #define VMCI_MAX_DEVICES 1 -- 2.25.1
Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 2/8] VMCI: dma dg: add MMIO access to registers
Detect the support for MMIO access through examination of the length of the region requested in BAR1. If it is 256KB, the VMCI device supports MMIO access to registers. If MMIO access is supported, map the area of the region used for MMIO access (64KB size at offset 128KB). Add wrapper functions for accessing 32 bit register accesses through either MMIO or IO ports based on device configuration. Sending and receiving datagrams through iowrite8_rep/ioread8_rep is left unchanged for now, and will be addressed in a later change. Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- drivers/misc/vmw_vmci/vmci_guest.c | 68 ++++++++++++++++++++++-------- include/linux/vmw_vmci_defs.h | 12 ++++++ 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 1018dc77269d..38ee7ed32ab9 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -45,6 +45,7 @@ static u32 vm_context_id = VMCI_INVALID_ID; struct vmci_guest_device { struct device *dev; /* PCI device we are attached to */ void __iomem *iobase; + void __iomem *mmio_base; bool exclusive_vectors; @@ -89,6 +90,21 @@ u32 vmci_get_vm_context_id(void) return vm_context_id; } +static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) +{ + if (dev->mmio_base != NULL) + return readl(dev->mmio_base + reg); + return ioread32(dev->iobase + reg); +} + +static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) +{ + if (dev->mmio_base != NULL) + writel(val, dev->mmio_base + reg); + else + iowrite32(val, dev->iobase + reg); +} + /* * VM to hypervisor call mechanism. We use the standard VMware naming * convention since shared code is calling this function as well. @@ -116,7 +132,7 @@ int vmci_send_datagram(struct vmci_datagram *dg) if (vmci_dev_g) { iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR, dg, VMCI_DG_SIZE(dg)); - result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR); + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); } else { result = VMCI_ERROR_UNAVAILABLE; } @@ -384,7 +400,7 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) unsigned int icr; /* Acknowledge interrupt and determine what needs doing. */ - icr = ioread32(dev->iobase + VMCI_ICR_ADDR); + icr = vmci_read_reg(dev, VMCI_ICR_ADDR); if (icr == 0 || icr == ~0) return IRQ_NONE; @@ -429,7 +445,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, const struct pci_device_id *id) { struct vmci_guest_device *vmci_dev; - void __iomem *iobase; + void __iomem *iobase = NULL; + void __iomem *mmio_base = NULL; unsigned int capabilities; unsigned int caps_in_use; unsigned long cmd; @@ -445,16 +462,32 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, return error; } - error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME); - if (error) { - dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); - return error; + /* + * The VMCI device with mmio access to registers requests 256KB + * for BAR1. If present, driver will use new VMCI device + * functionality for register access and datagram send/recv. + */ + + if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { + dev_info(&pdev->dev, "MMIO register access is available\n"); + mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, + VMCI_MMIO_ACCESS_SIZE); + /* If the map fails, we fall back to IOIO access. */ + if (!mmio_base) + dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); } - iobase = pcim_iomap_table(pdev)[0]; + if (!mmio_base) { + error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); + if (error) { + dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); + return error; + } + iobase = pcim_iomap_table(pdev)[0]; + } - dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n", - (unsigned long)iobase, pdev->irq); + dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, %#lx, irq %u\n", + (unsigned long)iobase, (unsigned long)mmio_base, pdev->irq); vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); if (!vmci_dev) { @@ -466,6 +499,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_dev->dev = &pdev->dev; vmci_dev->exclusive_vectors = false; vmci_dev->iobase = iobase; + vmci_dev->mmio_base = mmio_base; tasklet_init(&vmci_dev->datagram_tasklet, vmci_dispatch_dgs, (unsigned long)vmci_dev); @@ -490,7 +524,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, * * Right now, we need datagrams. There are no fallbacks. */ - capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR); + capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); if (!(capabilities & VMCI_CAPS_DATAGRAM)) { dev_err(&pdev->dev, "Device does not support datagrams\n"); error = -ENXIO; @@ -534,7 +568,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); /* Let the host know which capabilities we intend to use. */ - iowrite32(caps_in_use, vmci_dev->iobase + VMCI_CAPS_ADDR); + vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); /* Set up global device so that we can start sending datagrams */ spin_lock_irq(&vmci_dev_spinlock); @@ -630,11 +664,10 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, cmd = VMCI_IMR_DATAGRAM; if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) cmd |= VMCI_IMR_NOTIFICATION; - iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR); + vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); /* Enable interrupts. */ - iowrite32(VMCI_CONTROL_INT_ENABLE, - vmci_dev->iobase + VMCI_CONTROL_ADDR); + vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); pci_set_drvdata(pdev, vmci_dev); @@ -657,8 +690,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, err_remove_bitmap: if (vmci_dev->notification_bitmap) { - iowrite32(VMCI_CONTROL_RESET, - vmci_dev->iobase + VMCI_CONTROL_ADDR); + vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); dma_free_coherent(&pdev->dev, PAGE_SIZE, vmci_dev->notification_bitmap, vmci_dev->notification_base); @@ -700,7 +732,7 @@ static void vmci_guest_remove_device(struct pci_dev *pdev) spin_unlock_irq(&vmci_dev_spinlock); dev_dbg(&pdev->dev, "Resetting vmci device\n"); - iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR); + vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); /* * Free IRQ and then disable MSI/MSI-X as appropriate. For diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 9911ecfc18ba..8fc00e2685cf 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -82,6 +82,18 @@ enum { */ #define VMCI_MAX_PINNED_QP_MEMORY ((size_t)(32 * 1024)) +/* + * The version of the VMCI device that supports MMIO access to registers + * requests 256KB for BAR1 whereas the version of VMCI that supports + * MSI/MSI-X only requests 8KB. The layout of the larger 256KB region is: + * - the first 128KB are used for MSI/MSI-X. + * - the following 64KB are used for MMIO register access. + * - the remaining 64KB are unused. + */ +#define VMCI_WITH_MMIO_ACCESS_BAR_SIZE ((size_t)(256 * 1024)) +#define VMCI_MMIO_ACCESS_OFFSET ((size_t)(128 * 1024)) +#define VMCI_MMIO_ACCESS_SIZE ((size_t)(64 * 1024)) + /* * We have a fixed set of resource IDs available in the VMX. * This allows us to have a very simple implementation since we statically -- 2.25.1
Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 3/8] VMCI: dma dg: detect DMA datagram capability
Detect the VMCI DMA datagram capability, and if present, ack it to the device. Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- drivers/misc/vmw_vmci/vmci_guest.c | 11 +++++++++++ include/linux/vmw_vmci_defs.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 38ee7ed32ab9..5a99d8e27873 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -565,6 +565,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, } } + if (mmio_base != NULL) { + if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { + caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; + } else { + dev_err(&pdev->dev, + "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); + error = -ENXIO; + goto err_free_data_buffer; + } + } + dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); /* Let the host know which capabilities we intend to use. */ diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 8fc00e2685cf..1ce2cffdc3ae 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -39,6 +39,7 @@ #define VMCI_CAPS_DATAGRAM BIT(2) #define VMCI_CAPS_NOTIFICATIONS BIT(3) #define VMCI_CAPS_PPN64 BIT(4) +#define VMCI_CAPS_DMA_DATAGRAM BIT(5) /* Interrupt Cause register bits. */ #define VMCI_ICR_DATAGRAM BIT(0) -- 2.25.1
Tell the device the page size used by the OS. Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- drivers/misc/vmw_vmci/vmci_guest.c | 9 +++++++++ include/linux/vmw_vmci_defs.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 5a99d8e27873..808680dc0820 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -581,6 +581,15 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, /* Let the host know which capabilities we intend to use. */ vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { + uint32_t page_shift; + + /* Let the device know the size for pages passed down. */ + vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); + page_shift = vmci_read_reg(vmci_dev, VMCI_GUEST_PAGE_SHIFT); + dev_info(&pdev->dev, "Using page shift %d\n", page_shift); + } + /* Set up global device so that we can start sending datagrams */ spin_lock_irq(&vmci_dev_spinlock); vmci_dev_g = vmci_dev; diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 1ce2cffdc3ae..4167779469fd 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -21,6 +21,7 @@ #define VMCI_CAPS_ADDR 0x18 #define VMCI_RESULT_LOW_ADDR 0x1c #define VMCI_RESULT_HIGH_ADDR 0x20 +#define VMCI_GUEST_PAGE_SHIFT 0x34 /* Max number of devices. */ #define VMCI_MAX_DEVICES 1 -- 2.25.1
Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 5/8] VMCI: dma dg: register dummy IRQ handlers for DMA datagrams
Register dummy interrupt handlers for DMA datagrams in preparation for DMA datagram receive operations. Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- drivers/misc/vmw_vmci/vmci_guest.c | 42 +++++++++++++++++++++++++++--- include/linux/vmw_vmci_defs.h | 14 ++++++++-- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 808680dc0820..0920fbc6b64f 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -414,6 +414,9 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) icr &= ~VMCI_ICR_NOTIFICATION; } + if (icr & VMCI_ICR_DMA_DATAGRAM) + icr &= ~VMCI_ICR_DMA_DATAGRAM; + if (icr != 0) dev_warn(dev->dev, "Ignoring unknown interrupt cause (%d)\n", @@ -438,6 +441,16 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) return IRQ_HANDLED; } +/* + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, + * which is for the completion of a DMA datagram send or receive operation. + * Will only get called if we are using MSI-X with exclusive vectors. + */ +static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) +{ + return IRQ_HANDLED; +} + /* * Most of the initialization at module load time is done here. */ @@ -447,6 +460,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, struct vmci_guest_device *vmci_dev; void __iomem *iobase = NULL; void __iomem *mmio_base = NULL; + unsigned int num_irq_vectors; unsigned int capabilities; unsigned int caps_in_use; unsigned long cmd; @@ -635,8 +649,12 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, * Enable interrupts. Try MSI-X first, then MSI, and then fallback on * legacy interrupts. */ - error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS, - PCI_IRQ_MSIX); + if (vmci_dev->mmio_base != NULL) + num_irq_vectors = VMCI_MAX_INTRS; + else + num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; + error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, + PCI_IRQ_MSIX); if (error < 0) { error = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); @@ -674,6 +692,17 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, pci_irq_vector(pdev, 1), error); goto err_free_irq; } + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { + error = request_irq(pci_irq_vector(pdev, 2), + vmci_interrupt_dma_datagram, + 0, KBUILD_MODNAME, vmci_dev); + if (error) { + dev_err(&pdev->dev, + "Failed to allocate irq %u: %d\n", + pci_irq_vector(pdev, 2), error); + goto err_free_bm_irq; + } + } } dev_dbg(&pdev->dev, "Registered device\n"); @@ -684,6 +713,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, cmd = VMCI_IMR_DATAGRAM; if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) cmd |= VMCI_IMR_NOTIFICATION; + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) + cmd |= VMCI_IMR_DMA_DATAGRAM; vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); /* Enable interrupts. */ @@ -694,6 +725,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_call_vsock_callback(false); return 0; +err_free_bm_irq: + free_irq(pci_irq_vector(pdev, 1), vmci_dev); err_free_irq: free_irq(pci_irq_vector(pdev, 0), vmci_dev); tasklet_kill(&vmci_dev->datagram_tasklet); @@ -759,8 +792,11 @@ static void vmci_guest_remove_device(struct pci_dev *pdev) * MSI-X, we might have multiple vectors, each with their own * IRQ, which we must free too. */ - if (vmci_dev->exclusive_vectors) + if (vmci_dev->exclusive_vectors) { free_irq(pci_irq_vector(pdev, 1), vmci_dev); + if (vmci_dev->mmio_base != NULL) + free_irq(pci_irq_vector(pdev, 2), vmci_dev); + } free_irq(pci_irq_vector(pdev, 0), vmci_dev); pci_free_irq_vectors(pdev); diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 4167779469fd..2b70c024dacb 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -45,13 +45,22 @@ /* Interrupt Cause register bits. */ #define VMCI_ICR_DATAGRAM BIT(0) #define VMCI_ICR_NOTIFICATION BIT(1) +#define VMCI_ICR_DMA_DATAGRAM BIT(2) /* Interrupt Mask register bits. */ #define VMCI_IMR_DATAGRAM BIT(0) #define VMCI_IMR_NOTIFICATION BIT(1) +#define VMCI_IMR_DMA_DATAGRAM BIT(2) -/* Maximum MSI/MSI-X interrupt vectors in the device. */ -#define VMCI_MAX_INTRS 2 +/* + * Maximum MSI/MSI-X interrupt vectors in the device. + * If VMCI_CAPS_DMA_DATAGRAM is supported by the device, + * VMCI_MAX_INTRS_DMA_DATAGRAM vectors are available, + * otherwise only VMCI_MAX_INTRS_NOTIFICATION. + */ +#define VMCI_MAX_INTRS_NOTIFICATION 2 +#define VMCI_MAX_INTRS_DMA_DATAGRAM 3 +#define VMCI_MAX_INTRS VMCI_MAX_INTRS_DMA_DATAGRAM /* * Supported interrupt vectors. There is one for each ICR value above, @@ -60,6 +69,7 @@ enum { VMCI_INTR_DATAGRAM = 0, VMCI_INTR_NOTIFICATION = 1, + VMCI_INTR_DMA_DATAGRAM = 2, }; /* -- 2.25.1
Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 6/8] VMCI: dma dg: allocate send and receive buffers for DMA datagrams
If DMA datagrams are used, allocate send and receive buffers in coherent DMA memory. This is done in preparation for the send and receive datagram operations, where the buffers are used for the exchange of data between driver and device. Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- drivers/misc/vmw_vmci/vmci_guest.c | 66 ++++++++++++++++++++++++++---- include/linux/vmw_vmci_defs.h | 4 ++ 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 0920fbc6b64f..c207ca2ca42e 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -31,6 +31,12 @@ #define VMCI_UTIL_NUM_RESOURCES 1 +/* + * Datagram buffers for DMA send/receive must accommodate at least + * a maximum sized datagram and the header. + */ +#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) + static bool vmci_disable_msi; module_param_named(disable_msi, vmci_disable_msi, bool, 0); MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); @@ -53,6 +59,9 @@ struct vmci_guest_device { struct tasklet_struct bm_tasklet; void *data_buffer; + dma_addr_t data_buffer_base; + void *tx_buffer; + dma_addr_t tx_buffer_base; void *notification_bitmap; dma_addr_t notification_base; }; @@ -451,6 +460,24 @@ static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) return IRQ_HANDLED; } +static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) +{ + if (vmci_dev->mmio_base != NULL) { + if (vmci_dev->tx_buffer != NULL) + dma_free_coherent(vmci_dev->dev, + VMCI_DMA_DG_BUFFER_SIZE, + vmci_dev->tx_buffer, + vmci_dev->tx_buffer_base); + if (vmci_dev->data_buffer != NULL) + dma_free_coherent(vmci_dev->dev, + VMCI_DMA_DG_BUFFER_SIZE, + vmci_dev->data_buffer, + vmci_dev->data_buffer_base); + } else { + vfree(vmci_dev->data_buffer); + } +} + /* * Most of the initialization at module load time is done here. */ @@ -520,11 +547,27 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, tasklet_init(&vmci_dev->bm_tasklet, vmci_process_bitmap, (unsigned long)vmci_dev); - vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + if (mmio_base != NULL) { + vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, + &vmci_dev->tx_buffer_base, + GFP_KERNEL); + if (!vmci_dev->tx_buffer) { + dev_err(&pdev->dev, + "Can't allocate memory for datagram tx buffer\n"); + return -ENOMEM; + } + + vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, + &vmci_dev->data_buffer_base, + GFP_KERNEL); + } else { + vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + } if (!vmci_dev->data_buffer) { dev_err(&pdev->dev, "Can't allocate memory for datagram buffer\n"); - return -ENOMEM; + error = -ENOMEM; + goto err_free_data_buffers; } pci_set_master(pdev); /* To enable queue_pair functionality. */ @@ -542,7 +585,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, if (!(capabilities & VMCI_CAPS_DATAGRAM)) { dev_err(&pdev->dev, "Device does not support datagrams\n"); error = -ENXIO; - goto err_free_data_buffer; + goto err_free_data_buffers; } caps_in_use = VMCI_CAPS_DATAGRAM; @@ -586,7 +629,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, dev_err(&pdev->dev, "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); error = -ENXIO; - goto err_free_data_buffer; + goto err_free_data_buffers; } } @@ -602,6 +645,12 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); page_shift = vmci_read_reg(vmci_dev, VMCI_GUEST_PAGE_SHIFT); dev_info(&pdev->dev, "Using page shift %d\n", page_shift); + + /* Configure the high order parts of the data in/out buffers. */ + vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), + VMCI_DATA_IN_HIGH_ADDR); + vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), + VMCI_DATA_OUT_HIGH_ADDR); } /* Set up global device so that we can start sending datagrams */ @@ -755,8 +804,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_dev_g = NULL; spin_unlock_irq(&vmci_dev_spinlock); -err_free_data_buffer: - vfree(vmci_dev->data_buffer); +err_free_data_buffers: + vmci_free_dg_buffers(vmci_dev); /* The rest are managed resources and will be freed by PCI core */ return error; @@ -814,7 +863,10 @@ static void vmci_guest_remove_device(struct pci_dev *pdev) vmci_dev->notification_base); } - vfree(vmci_dev->data_buffer); + vmci_free_dg_buffers(vmci_dev); + + if (vmci_dev->mmio_base != NULL) + pci_iounmap(pdev, vmci_dev->mmio_base); /* The rest are managed resources and will be freed by PCI core */ } diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 2b70c024dacb..8bc37d8244a8 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -21,6 +21,10 @@ #define VMCI_CAPS_ADDR 0x18 #define VMCI_RESULT_LOW_ADDR 0x1c #define VMCI_RESULT_HIGH_ADDR 0x20 +#define VMCI_DATA_OUT_LOW_ADDR 0x24 +#define VMCI_DATA_OUT_HIGH_ADDR 0x28 +#define VMCI_DATA_IN_LOW_ADDR 0x2c +#define VMCI_DATA_IN_HIGH_ADDR 0x30 #define VMCI_GUEST_PAGE_SHIFT 0x34 /* Max number of devices. */ -- 2.25.1
Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 7/8] VMCI: dma dg: add support for DMA datagrams sends
Use DMA based send operation from the transmit buffer instead of the iowrite8_rep based datagram send when DMA datagrams are supported. The outgoing datagram is sent as inline data in the VMCI transmit buffer. Once the header has been configured, the send is initiated by writing the lower 32 bit of the buffer base address to the VMCI_DATA_OUT_LOW_ADDR register. Only then will the device process the header and the datagram itself. Following that, the driver busy waits (it isn't possible to sleep on the send path) for the header busy flag to change - indicating that the send is complete. Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- drivers/misc/vmw_vmci/vmci_guest.c | 45 ++++++++++++++++++++++++++++-- include/linux/vmw_vmci_defs.h | 34 ++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index c207ca2ca42e..ae2fd9c791d0 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/processor.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/init.h> @@ -114,6 +115,47 @@ static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) iowrite32(val, dev->iobase + reg); } +static int vmci_write_data(struct vmci_guest_device *dev, + struct vmci_datagram *dg) +{ + int result; + + if (dev->mmio_base != NULL) { + struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; + u8 *dg_out_buffer = (u8 *)(buffer_header + 1); + + if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) + return VMCI_ERROR_INVALID_ARGS; + + /* + * Initialize send buffer with outgoing datagram + * and set up header for inline data. Device will + * not access buffer asynchronously - only after + * the write to VMCI_DATA_OUT_LOW_ADDR. + */ + memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); + buffer_header->opcode = 0; + buffer_header->size = VMCI_DG_SIZE(dg); + buffer_header->busy = 1; + + vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), + VMCI_DATA_OUT_LOW_ADDR); + + /* Caller holds a spinlock, so cannot block. */ + spin_until_cond(buffer_header->busy == 0); + + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); + if (result == VMCI_SUCCESS) + result = (int)buffer_header->result; + } else { + iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, + dg, VMCI_DG_SIZE(dg)); + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); + } + + return result; +} + /* * VM to hypervisor call mechanism. We use the standard VMware naming * convention since shared code is calling this function as well. @@ -139,8 +181,7 @@ int vmci_send_datagram(struct vmci_datagram *dg) spin_lock_irqsave(&vmci_dev_spinlock, flags); if (vmci_dev_g) { - iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR, - dg, VMCI_DG_SIZE(dg)); + vmci_write_data(vmci_dev_g, dg); result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); } else { result = VMCI_ERROR_UNAVAILABLE; diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 8bc37d8244a8..6fb663b36f72 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -110,6 +110,40 @@ enum { #define VMCI_MMIO_ACCESS_OFFSET ((size_t)(128 * 1024)) #define VMCI_MMIO_ACCESS_SIZE ((size_t)(64 * 1024)) +/* + * For VMCI devices supporting the VMCI_CAPS_DMA_DATAGRAM capability, the + * sending and receiving of datagrams can be performed using DMA to/from + * a driver allocated buffer. + * Sending and receiving will be handled as follows: + * - when sending datagrams, the driver initializes the buffer where the + * data part will refer to the outgoing VMCI datagram, sets the busy flag + * to 1 and writes the address of the buffer to VMCI_DATA_OUT_HIGH_ADDR + * and VMCI_DATA_OUT_LOW_ADDR. Writing to VMCI_DATA_OUT_LOW_ADDR triggers + * the device processing of the buffer. When the device has processed the + * buffer, it will write the result value to the buffer and then clear the + * busy flag. + * - when receiving datagrams, the driver initializes the buffer where the + * data part will describe the receive buffer, clears the busy flag and + * writes the address of the buffer to VMCI_DATA_IN_HIGH_ADDR and + * VMCI_DATA_IN_LOW_ADDR. Writing to VMCI_DATA_IN_LOW_ADDR triggers the + * device processing of the buffer. The device will copy as many available + * datagrams into the buffer as possible, and then sets the busy flag. + * When the busy flag is set, the driver will process the datagrams in the + * buffer. + */ +struct vmci_data_in_out_header { + uint32_t busy; + uint32_t opcode; + uint32_t size; + uint32_t rsvd; + uint64_t result; +}; + +struct vmci_sg_elem { + uint64_t addr; + uint64_t size; +}; + /* * We have a fixed set of resource IDs available in the VMX. * This allows us to have a very simple implementation since we statically -- 2.25.1
Jorgen Hansen
2022-Feb-03 13:12 UTC
[PATCH v2 8/8] VMCI: dma dg: add support for DMA datagrams receive
Use the DMA based receive operation instead of the ioread8_rep based datagram receive when DMA datagrams are supported. In the receive operation, configure the header to point to the page aligned VMCI_MAX_DG_SIZE part of the receive buffer using s/g configuration for the header. This ensures that the existing dispatch routine can be used with little modification. Initiate the receive by writing the lower 32 bit of the buffer to the VMCI_DATA_IN_LOW_ADDR register, and wait for the busy flag to be changed by the device using a wait queue. The existing dispatch routine for received datagrams is reused for the DMA datagrams with a few modifications: - the receive buffer is always the maximum size for DMA datagrams (IO ports would try with a shorter buffer first to reduce overhead of the ioread8_rep operation). - for DMA datagrams, datagrams are provided contiguous in the buffer as opposed to IO port datagrams, where they can start on any page boundary Reviewed-by: Vishnu Dasa <vdasa at vmware.com> Signed-off-by: Jorgen Hansen <jhansen at vmware.com> --- drivers/misc/vmw_vmci/vmci_guest.c | 103 ++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 24 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index ae2fd9c791d0..67fac1b8f262 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -58,6 +58,7 @@ struct vmci_guest_device { struct tasklet_struct datagram_tasklet; struct tasklet_struct bm_tasklet; + struct wait_queue_head inout_wq; void *data_buffer; dma_addr_t data_buffer_base; @@ -115,6 +116,36 @@ static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) iowrite32(val, dev->iobase + reg); } +static void vmci_read_data(struct vmci_guest_device *vmci_dev, + void *dest, size_t size) +{ + if (vmci_dev->mmio_base == NULL) + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + dest, size); + else { + /* + * For DMA datagrams, the data_buffer will contain the header on the + * first page, followed by the incoming datagram(s) on the following + * pages. The header uses an S/G element immediately following the + * header on the first page to point to the data area. + */ + struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; + struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); + size_t buffer_offset = dest - vmci_dev->data_buffer; + + buffer_header->opcode = 1; + buffer_header->size = 1; + buffer_header->busy = 0; + sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; + sg_array[0].size = size; + + vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), + VMCI_DATA_IN_LOW_ADDR); + + wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); + } +} + static int vmci_write_data(struct vmci_guest_device *dev, struct vmci_datagram *dg) { @@ -261,15 +292,17 @@ static int vmci_check_host_caps(struct pci_dev *pdev) } /* - * Reads datagrams from the data in port and dispatches them. We - * always start reading datagrams into only the first page of the - * datagram buffer. If the datagrams don't fit into one page, we - * use the maximum datagram buffer size for the remainder of the - * invocation. This is a simple heuristic for not penalizing - * small datagrams. + * Reads datagrams from the device and dispatches them. For IO port + * based access to the device, we always start reading datagrams into + * only the first page of the datagram buffer. If the datagrams don't + * fit into one page, we use the maximum datagram buffer size for the + * remainder of the invocation. This is a simple heuristic for not + * penalizing small datagrams. For DMA-based datagrams, we always + * use the maximum datagram buffer size, since there is no performance + * penalty for doing so. * * This function assumes that it has exclusive access to the data - * in port for the duration of the call. + * in register(s) for the duration of the call. */ static void vmci_dispatch_dgs(unsigned long data) { @@ -277,23 +310,41 @@ static void vmci_dispatch_dgs(unsigned long data) u8 *dg_in_buffer = vmci_dev->data_buffer; struct vmci_datagram *dg; size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; - size_t current_dg_in_buffer_size = PAGE_SIZE; + size_t current_dg_in_buffer_size; size_t remaining_bytes; + bool is_io_port = vmci_dev->mmio_base == NULL; BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); - ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer, current_dg_in_buffer_size); + if (!is_io_port) { + /* For mmio, the first page is used for the header. */ + dg_in_buffer += PAGE_SIZE; + + /* + * For DMA-based datagram operations, there is no performance + * penalty for reading the maximum buffer size. + */ + current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; + } else { + current_dg_in_buffer_size = PAGE_SIZE; + } + vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); dg = (struct vmci_datagram *)dg_in_buffer; remaining_bytes = current_dg_in_buffer_size; + /* + * Read through the buffer until an invalid datagram header is + * encountered. The exit condition for datagrams read through + * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram + * can start on any page boundary in the buffer. + */ while (dg->dst.resource != VMCI_INVALID_ID || - remaining_bytes > PAGE_SIZE) { + (is_io_port && remaining_bytes > PAGE_SIZE)) { unsigned dg_in_size; /* - * When the input buffer spans multiple pages, a datagram can - * start on any page boundary in the buffer. + * If using VMCI_DATA_IN_ADDR, skip to the next page + * as a datagram can start on any page boundary. */ if (dg->dst.resource == VMCI_INVALID_ID) { dg = (struct vmci_datagram *)roundup( @@ -343,11 +394,10 @@ static void vmci_dispatch_dgs(unsigned long data) current_dg_in_buffer_size dg_in_buffer_size; - ioread8_rep(vmci_dev->iobase + - VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer + + vmci_read_data(vmci_dev, + dg_in_buffer + remaining_bytes, - current_dg_in_buffer_size - + current_dg_in_buffer_size - remaining_bytes); } @@ -385,10 +435,8 @@ static void vmci_dispatch_dgs(unsigned long data) current_dg_in_buffer_size = dg_in_buffer_size; for (;;) { - ioread8_rep(vmci_dev->iobase + - VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer, - current_dg_in_buffer_size); + vmci_read_data(vmci_dev, dg_in_buffer, + current_dg_in_buffer_size); if (bytes_to_skip <= current_dg_in_buffer_size) break; @@ -405,8 +453,7 @@ static void vmci_dispatch_dgs(unsigned long data) if (remaining_bytes < VMCI_DG_HEADERSIZE) { /* Get the next batch of datagrams. */ - ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, - vmci_dev->data_buffer, + vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); dg = (struct vmci_datagram *)dg_in_buffer; remaining_bytes = current_dg_in_buffer_size; @@ -464,8 +511,11 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) icr &= ~VMCI_ICR_NOTIFICATION; } - if (icr & VMCI_ICR_DMA_DATAGRAM) + + if (icr & VMCI_ICR_DMA_DATAGRAM) { + wake_up_all(&dev->inout_wq); icr &= ~VMCI_ICR_DMA_DATAGRAM; + } if (icr != 0) dev_warn(dev->dev, @@ -498,6 +548,10 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) */ static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) { + struct vmci_guest_device *dev = _dev; + + wake_up_all(&dev->inout_wq); + return IRQ_HANDLED; } @@ -587,6 +641,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_dispatch_dgs, (unsigned long)vmci_dev); tasklet_init(&vmci_dev->bm_tasklet, vmci_process_bitmap, (unsigned long)vmci_dev); + init_waitqueue_head(&vmci_dev->inout_wq); if (mmio_base != NULL) { vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, -- 2.25.1