Here's the patch series I ended up with. I haven't coded up the QEMU side yet, so no idea if the new driver works. Questions: (1) Do we win from separating ISR, NOTIFY and COMMON? (2) I used a "u8 bar"; should I use a bir and pack it instead? BIR seems a little obscure (noone else in the kernel source seems to refer to it). Cheers, Rusty.
Rusty Russell
2011-Dec-08 10:30 UTC
[RFC 1/11] virtio: use u32, not bitmap for struct virtio_device's features
It seemed like a good idea, but it's actually a pain when we get more than 32 feature bits. Just change it to a u32 for now. --- drivers/char/virtio_console.c | 2 +- drivers/lguest/lguest_device.c | 2 +- drivers/s390/kvm/kvm_virtio.c | 2 +- drivers/virtio/virtio.c | 10 +++++----- drivers/virtio/virtio_mmio.c | 8 ++------ drivers/virtio/virtio_pci.c | 3 +-- drivers/virtio/virtio_ring.c | 2 +- include/linux/virtio.h | 3 +-- include/linux/virtio_config.h | 2 +- tools/virtio/linux/virtio.h | 18 ++---------------- tools/virtio/virtio_test.c | 5 ++--- 11 files changed, 18 insertions(+), 39 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -331,7 +331,7 @@ static inline bool use_multiport(struct */ if (!portdev->vdev) return 0; - return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT); + return portdev->vdev->features & (1 << VIRTIO_CONSOLE_F_MULTIPORT); } static void free_buf(struct port_buffer *buf) diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -144,7 +144,7 @@ static void lg_finalize_features(struct memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { - if (test_bit(i, vdev->features)) + if (vdev->features & (1 << i)) out_features[i / 8] |= (1 << (i % 8)); } diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -105,7 +105,7 @@ static void kvm_finalize_features(struct memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { - if (test_bit(i, vdev->features)) + if (vdev->features & (1 << i)) out_features[i / 8] |= (1 << (i % 8)); } } diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -41,9 +41,9 @@ static ssize_t features_show(struct devi /* We actually represent this as a bitstring, as it could be * arbitrary length in future. */ - for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++) + for (i = 0; i < sizeof(dev->features)*8; i++) len += sprintf(buf+len, "%c", - test_bit(i, dev->features) ? '1' : '0'); + dev->features & (1ULL << i) ? '1' : '0'); len += sprintf(buf+len, "\n"); return len; } @@ -122,18 +122,18 @@ static int virtio_dev_probe(struct devic device_features = dev->config->get_features(dev); /* Features supported by both device and driver into dev->features. */ - memset(dev->features, 0, sizeof(dev->features)); + dev->features = 0; for (i = 0; i < drv->feature_table_size; i++) { unsigned int f = drv->feature_table[i]; BUG_ON(f >= 32); if (device_features & (1 << f)) - set_bit(f, dev->features); + dev->features |= (1 << f); } /* Transport features always preserved to pass to finalize_features. */ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) if (device_features & (1 << i)) - set_bit(i, dev->features); + dev->features |= (1 << i); dev->config->finalize_features(dev); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -112,16 +112,12 @@ static u32 vm_get_features(struct virtio static void vm_finalize_features(struct virtio_device *vdev) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - int i; /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); - for (i = 0; i < ARRAY_SIZE(vdev->features); i++) { - writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); - writel(vdev->features[i], - vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); - } + writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); + writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); } static void vm_get(struct virtio_device *vdev, unsigned offset, diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -121,8 +121,7 @@ static void vp_finalize_features(struct vring_transport_features(vdev); /* We only support 32 feature bits. */ - BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1); - iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); + iowrite32(vdev->features, vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); } /* virtio config->get() implementation */ diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -685,7 +685,7 @@ void vring_transport_features(struct vir break; default: /* We don't understand this bit. */ - clear_bit(i, vdev->features); + vdev->features &= ~(1 << i); } } } diff --git a/include/linux/virtio.h b/include/linux/virtio.h --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -66,8 +66,7 @@ struct virtio_device { struct virtio_device_id id; struct virtio_config_ops *config; struct list_head vqs; - /* Note that this is a Linux set_bit-style bitmap. */ - unsigned long features[1]; + u32 features; void *priv; }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -142,7 +142,7 @@ static inline bool virtio_has_feature(co if (fbit < VIRTIO_TRANSPORT_F_START) virtio_check_driver_offered_feature(vdev, fbit); - return test_bit(fbit, vdev->features); + return vdev->features & (1 << fbit); } /** diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -131,29 +131,15 @@ static inline void kfree(void *p) #define BITS_PER_BYTE 8 #define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) -/* TODO: Not atomic as it should be: - * we don't use this for anything important. */ -static inline void clear_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - - *p &= ~mask; -} - -static inline int test_bit(int nr, const volatile unsigned long *addr) -{ - return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); -} /* The only feature we care to support */ #define virtio_has_feature(dev, feature) \ - test_bit((feature), (dev)->features) + ((dev)->features & (1 << (feature))) /* end of stubs */ struct virtio_device { void *dev; - unsigned long features[1]; + u32 features; }; struct virtqueue { diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -55,7 +55,7 @@ void vhost_vq_setup(struct vdev_info *de { struct vhost_vring_state state = { .index = info->idx }; struct vhost_vring_file file = { .index = info->idx }; - unsigned long long features = dev->vdev.features[0]; + unsigned long long features = dev->vdev.features; struct vhost_vring_addr addr = { .index = info->idx, .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, @@ -106,8 +106,7 @@ static void vdev_info_init(struct vdev_i { int r; memset(dev, 0, sizeof *dev); - dev->vdev.features[0] = features; - dev->vdev.features[1] = features >> 32; + dev->vdev.features = features; dev->buf_size = 1024; dev->buf = malloc(dev->buf_size); assert(dev->buf);
Change the u32 to a u64, and make sure to use 1ULL everywhere! --- drivers/char/virtio_console.c | 2 +- drivers/lguest/lguest_device.c | 10 +++++----- drivers/s390/kvm/kvm_virtio.c | 10 +++++----- drivers/virtio/virtio.c | 12 ++++++------ drivers/virtio/virtio_mmio.c | 14 +++++++++----- drivers/virtio/virtio_pci.c | 5 ++--- drivers/virtio/virtio_ring.c | 2 +- include/linux/virtio.h | 2 +- include/linux/virtio_config.h | 8 ++++---- tools/virtio/linux/virtio.h | 4 ++-- 10 files changed, 36 insertions(+), 33 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -331,7 +331,7 @@ static inline bool use_multiport(struct */ if (!portdev->vdev) return 0; - return portdev->vdev->features & (1 << VIRTIO_CONSOLE_F_MULTIPORT); + return portdev->vdev->features & (1ULL << VIRTIO_CONSOLE_F_MULTIPORT); } static void free_buf(struct port_buffer *buf) diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -94,17 +94,17 @@ static unsigned desc_size(const struct l } /* This gets the device's feature bits. */ -static u32 lg_get_features(struct virtio_device *vdev) +static u64 lg_get_features(struct virtio_device *vdev) { unsigned int i; - u32 features = 0; + u64 features = 0; struct lguest_device_desc *desc = to_lgdev(vdev)->desc; u8 *in_features = lg_features(desc); /* We do this the slow but generic way. */ - for (i = 0; i < min(desc->feature_len * 8, 32); i++) + for (i = 0; i < min(desc->feature_len * 8, 64); i++) if (in_features[i / 8] & (1 << (i % 8))) - features |= (1 << i); + features |= (1ULL << i); return features; } @@ -144,7 +144,7 @@ static void lg_finalize_features(struct memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { - if (vdev->features & (1 << i)) + if (vdev->features & (1ULL << i)) out_features[i / 8] |= (1 << (i % 8)); } diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -79,16 +79,16 @@ static unsigned desc_size(const struct k } /* This gets the device's feature bits. */ -static u32 kvm_get_features(struct virtio_device *vdev) +static u64 kvm_get_features(struct virtio_device *vdev) { unsigned int i; - u32 features = 0; + u64 features = 0; struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; u8 *in_features = kvm_vq_features(desc); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) + for (i = 0; i < min(desc->feature_len * 8, 64); i++) if (in_features[i / 8] & (1 << (i % 8))) - features |= (1 << i); + features |= (1ULL << i); return features; } @@ -105,7 +105,7 @@ static void kvm_finalize_features(struct memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { - if (vdev->features & (1 << i)) + if (vdev->features & (1ULL << i)) out_features[i / 8] |= (1 << (i % 8)); } } diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -113,7 +113,7 @@ static int virtio_dev_probe(struct devic struct virtio_device *dev = container_of(_d,struct virtio_device,dev); struct virtio_driver *drv = container_of(dev->dev.driver, struct virtio_driver, driver); - u32 device_features; + u64 device_features; /* We have a driver! */ add_status(dev, VIRTIO_CONFIG_S_DRIVER); @@ -125,15 +125,15 @@ static int virtio_dev_probe(struct devic dev->features = 0; for (i = 0; i < drv->feature_table_size; i++) { unsigned int f = drv->feature_table[i]; - BUG_ON(f >= 32); - if (device_features & (1 << f)) - dev->features |= (1 << f); + BUG_ON(f >= 64); + if (device_features & (1ULL << f)) + dev->features |= (1ULL << f); } /* Transport features always preserved to pass to finalize_features. */ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) - if (device_features & (1 << i)) - dev->features |= (1 << i); + if (device_features & (1ULL << i)) + dev->features |= (1ULL << i); dev->config->finalize_features(dev); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -99,14 +99,16 @@ struct virtio_mmio_vq_info { /* Configuration interface */ -static u32 vm_get_features(struct virtio_device *vdev) +static u64 vm_get_features(struct virtio_device *vdev) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + u64 features; - /* TODO: Features > 32 bits */ writel(0, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL); - - return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES); + features = readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES); + writel(1, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL); + features |= ((u64)readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES) << 32); + return features; } static void vm_finalize_features(struct virtio_device *vdev) @@ -117,7 +119,9 @@ static void vm_finalize_features(struct vring_transport_features(vdev); writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); - writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); + writel((u32)vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); + writel(1, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL); + writel(vdev->features >> 32, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES); } static void vm_get(struct virtio_device *vdev, unsigned offset, diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -103,12 +103,11 @@ static struct virtio_pci_device *to_vp_d } /* virtio config->get_features() implementation */ -static u32 vp_get_features(struct virtio_device *vdev) +static u64 vp_get_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - /* When someone needs more than 32 feature bits, we'll need to - * steal a bit to indicate that the rest are somewhere else. */ + /* We only support 32 feature bits. */ return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -685,7 +685,7 @@ void vring_transport_features(struct vir break; default: /* We don't understand this bit. */ - vdev->features &= ~(1 << i); + vdev->features &= ~(1ULL << i); } } } diff --git a/include/linux/virtio.h b/include/linux/virtio.h --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -66,7 +66,7 @@ struct virtio_device { struct virtio_device_id id; struct virtio_config_ops *config; struct list_head vqs; - u32 features; + u64 features; void *priv; }; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -117,7 +117,7 @@ struct virtio_config_ops { vq_callback_t *callbacks[], const char *names[]); void (*del_vqs)(struct virtio_device *); - u32 (*get_features)(struct virtio_device *vdev); + u64 (*get_features)(struct virtio_device *vdev); void (*finalize_features)(struct virtio_device *vdev); }; @@ -135,14 +135,14 @@ static inline bool virtio_has_feature(co { /* Did you forget to fix assumptions on max features? */ if (__builtin_constant_p(fbit)) - BUILD_BUG_ON(fbit >= 32); + BUILD_BUG_ON(fbit >= 64); else - BUG_ON(fbit >= 32); + BUG_ON(fbit >= 64); if (fbit < VIRTIO_TRANSPORT_F_START) virtio_check_driver_offered_feature(vdev, fbit); - return vdev->features & (1 << fbit); + return vdev->features & (1ULL << fbit); } /** diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -134,12 +134,12 @@ static inline void kfree(void *p) /* The only feature we care to support */ #define virtio_has_feature(dev, feature) \ - ((dev)->features & (1 << (feature))) + ((dev)->features & (1ULL << (feature))) /* end of stubs */ struct virtio_device { void *dev; - u32 features; + u64 features; }; struct virtqueue {
Rusty, I can't find the actual patches, could you verify that they were indeed sent? On Thu, 2011-12-08 at 20:52 +1030, Rusty Russell wrote:> Here's the patch series I ended up with. I haven't coded up the QEMU > side yet, so no idea if the new driver works. > > Questions: > (1) Do we win from separating ISR, NOTIFY and COMMON?By separating ISR, NOTIFY and COMMON we can place ISR and NOTIFY in PIO and COMMON in MMIO. This gives us the benefit of having the small data path use fast PIO, while big config path can use MMIO.> (2) I used a "u8 bar"; should I use a bir and pack it instead? BIR > seems a little obscure (noone else in the kernel source seems to > refer to it).BIR is a concept from the PCI spec, but it was only used for MSI-X. I don't expect to see it all around the kernel source. -- Sasha.
From: Michael S Tsirkin <mst at redhat.com> Virtio drivers should map the part of the range they need, not necessarily all of it. Signed-off-by: Michael S. Tsirkin <mst at redhat.com> Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> --- include/asm-generic/io.h | 4 ++++ include/asm-generic/iomap.h | 11 +++++++++++ lib/iomap.c | 41 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 9120887..3cf1787 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -286,6 +286,10 @@ static inline void writesb(const void __iomem *addr, const void *buf, int len) /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, + unsigned offset, + unsigned long minlen, + unsigned long maxlen); static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) { } diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 98dcd76..6f192d4 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -70,8 +70,19 @@ extern void ioport_unmap(void __iomem *); /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, + unsigned offset, + unsigned long minlen, + unsigned long maxlen); extern void pci_iounmap(struct pci_dev *dev, void __iomem *); #else +static inline void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, + unsigned offset, + unsigned long minlen, + unsigned long maxlen) +{ + return NULL; +} struct pci_dev; static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) { diff --git a/lib/iomap.c b/lib/iomap.c index 5dbcb4b..93ae915 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -243,26 +243,37 @@ EXPORT_SYMBOL(ioport_unmap); #ifdef CONFIG_PCI /** - * pci_iomap - create a virtual mapping cookie for a PCI BAR + * pci_iomap_range - create a virtual mapping cookie for a PCI BAR * @dev: PCI device that owns the BAR * @bar: BAR number - * @maxlen: length of the memory to map + * @offset: map memory at the given offset in BAR + * @minlen: min length of the memory to map + * @maxlen: max length of the memory to map * * Using this function you will get a __iomem address to your device BAR. * You can access it using ioread*() and iowrite*(). These functions hide * the details if this is a MMIO or PIO address space and will just do what * you expect from them in the correct way. * + * @minlen specifies the minimum length to map. We check that BAR is + * large enough. * @maxlen specifies the maximum length to map. If you want to get access to - * the complete BAR without checking for its length first, pass %0 here. + * the complete BAR from offset to the end, pass %0 here. * */ -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, + unsigned offset, + unsigned long minlen, + unsigned long maxlen) { resource_size_t start = pci_resource_start(dev, bar); resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); - if (!len || !start) + if (len <= offset || !start) + return NULL; + len -= offset; + start += offset; + if (len < minlen) return NULL; if (maxlen && len > maxlen) len = maxlen; @@ -277,10 +288,30 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) return NULL; } +/** + * pci_iomap - create a virtual mapping cookie for a PCI BAR + * @dev: PCI device that owns the BAR + * @bar: BAR number + * @maxlen: length of the memory to map + * + * Using this function you will get a __iomem address to your device BAR. + * You can access it using ioread*() and iowrite*(). These functions hide + * the details if this is a MMIO or PIO address space and will just do what + * you expect from them in the correct way. + * + * @maxlen specifies the maximum length to map. If you want to get access to + * the complete BAR without checking for its length first, pass %0 here. + * */ +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + return pci_iomap_range(dev, bar, 0, 0, maxlen); +} + void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { IO_COND(addr, /* nothing */, iounmap(addr)); } EXPORT_SYMBOL(pci_iomap); +EXPORT_SYMBOL(pci_iomap_range); EXPORT_SYMBOL(pci_iounmap); #endif /* CONFIG_PCI */
Rusty Russell
2011-Dec-08 10:34 UTC
[RFC 4/11] virtio-pci: define layout for virtio vendor-specific capabilities.
Based on patch by Michael S. Tsirkin <mst at redhat.com>, but I found it hard to follow so changed to use structures which are more self-documenting. Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> --- include/linux/virtio_pci.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -92,4 +92,45 @@ /* The alignment to use between consumer and producer parts of vring. * x86 pagesize again. */ #define VIRTIO_PCI_VRING_ALIGN 4096 + +/* IDs for different capabilities. Must all exist. */ +/* FIXME: Do we win from separating ISR, NOTIFY and COMMON? */ +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR access */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific confiuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 + +/* This is the PCI capability header: */ +struct virtio_pci_cap { + u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + u8 cap_next; /* Generic PCI field: next ptr. */ + u8 cfg_type; /* One of the VIRTIO_PCI_CAP_*_CFG. */ +/* FIXME: Should we use a bir, instead of raw bar number? */ + u8 bar; /* Where to find it. */ + __le32 offset; /* Offset within bar. */ + __le32 length; /* Length. */ +}; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +struct virtio_pci_common_cfg { + /* About the whole device. */ + __le32 device_feature_select; /* read-write */ + __le32 device_feature; /* read-only */ + __le32 guest_feature_select; /* read-write */ + __le32 guest_feature; /* read-only */ + __le16 msix_config; /* read-write */ + __u8 device_status; /* read-write */ + __u8 unused; + + /* About a specific virtqueue. */ + __le16 queue_select; /* read-write */ + __le16 queue_align; /* read-write, power of 2. */ + __le16 queue_size; /* read-write, power of 2. */ + __le16 queue_msix_vector;/* read-write */ + __le64 queue_address; /* read-write: 0xFFFFFFFFFFFFFFFF == DNE. */ +}; #endif
Rusty Russell
2011-Dec-08 10:35 UTC
[RFC 6/11] virtio_pci: move old defines to legacy, introduce new structure.
We don't *remove* the old ones, unless VIRTIO_PCI_NO_LEGACY is defined, but they get a friendly #warning about the change. Note that config option is not promted; we always enable it for now. Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> --- drivers/virtio/Kconfig | 12 +++++++ drivers/virtio/Makefile | 2 - drivers/virtio/virtio_pci_legacy.c | 16 ++++----- include/linux/virtio_pci.h | 63 ++++++++++++++++++++++++------------- 4 files changed, 62 insertions(+), 31 deletions(-) diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -25,6 +25,18 @@ config VIRTIO_PCI If unsure, say M. +config VIRTIO_PCI_LEGACY + bool + default y + depends on VIRTIO_PCI + ---help--- + The old BAR0 virtio pci layout was deprecated early 2012. + + So look out into your driveway. Do you have a flying car? If + so, you can happily disable this option and virtio will not + break. Otherwise, leave it set. Unless you're testing what + life will be like in The Future. + config VIRTIO_BALLOON tristate "Virtio balloon driver (EXPERIMENTAL)" select VIRTIO diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o -obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci_legacy.c rename from drivers/virtio/virtio_pci.c rename to drivers/virtio/virtio_pci_legacy.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -1,5 +1,5 @@ /* - * Virtio PCI driver + * Virtio PCI driver (legacy mode) * * This module allows virtio devices to be used over a virtual PCI device. * This can be used with QEMU based VMMs like KVM or Xen. @@ -27,7 +27,7 @@ #include <linux/spinlock.h> MODULE_AUTHOR("Anthony Liguori <aliguori at us.ibm.com>"); -MODULE_DESCRIPTION("virtio-pci"); +MODULE_DESCRIPTION("virtio-pci-legacy"); MODULE_LICENSE("GPL"); MODULE_VERSION("1"); @@ -629,7 +629,7 @@ static int __devinit virtio_pci_probe(st return -ENODEV; if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { - printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", + printk(KERN_ERR "virtio_pci_legacy: expected ABI version %d, got %d\n", VIRTIO_PCI_ABI_VERSION, pci_dev->revision); return -ENODEV; } @@ -654,7 +654,7 @@ static int __devinit virtio_pci_probe(st if (err) goto out; - err = pci_request_regions(pci_dev, "virtio-pci"); + err = pci_request_regions(pci_dev, "virtio-pci-legacy"); if (err) goto out_enable_device; @@ -721,8 +721,8 @@ static int virtio_pci_resume(struct pci_ } #endif -static struct pci_driver virtio_pci_driver = { - .name = "virtio-pci", +static struct pci_driver virtio_pci_driver_legacy = { + .name = "virtio-pci-legacy", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, .remove = __devexit_p(virtio_pci_remove), @@ -734,14 +734,14 @@ static struct pci_driver virtio_pci_driv static int __init virtio_pci_init(void) { - return pci_register_driver(&virtio_pci_driver); + return pci_register_driver(&virtio_pci_driver_legacy); } module_init(virtio_pci_init); static void __exit virtio_pci_exit(void) { - pci_unregister_driver(&virtio_pci_driver); + pci_unregister_driver(&virtio_pci_driver_legacy); } module_exit(virtio_pci_exit); diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -42,56 +42,75 @@ #include <linux/virtio_config.h> /* A 32-bit r/o bitmask of the features supported by the host */ -#define VIRTIO_PCI_HOST_FEATURES 0 +#define VIRTIO_PCI_LEGACY_HOST_FEATURES 0 /* A 32-bit r/w bitmask of features activated by the guest */ -#define VIRTIO_PCI_GUEST_FEATURES 4 +#define VIRTIO_PCI_LEGACY_GUEST_FEATURES 4 /* A 32-bit r/w PFN for the currently selected queue */ -#define VIRTIO_PCI_QUEUE_PFN 8 +#define VIRTIO_PCI_LEGACY_QUEUE_PFN 8 /* A 16-bit r/o queue size for the currently selected queue */ -#define VIRTIO_PCI_QUEUE_NUM 12 +#define VIRTIO_PCI_LEGACY_QUEUE_NUM 12 /* A 16-bit r/w queue selector */ -#define VIRTIO_PCI_QUEUE_SEL 14 +#define VIRTIO_PCI_LEGACY_QUEUE_SEL 14 /* A 16-bit r/w queue notifier */ -#define VIRTIO_PCI_QUEUE_NOTIFY 16 +#define VIRTIO_PCI_LEGACY_QUEUE_NOTIFY 16 /* An 8-bit device status register. */ -#define VIRTIO_PCI_STATUS 18 +#define VIRTIO_PCI_LEGACY_STATUS 18 /* An 8-bit r/o interrupt status register. Reading the value will return the * current contents of the ISR and will also clear it. This is effectively * a read-and-acknowledge. */ -#define VIRTIO_PCI_ISR 19 - -/* The bit of the ISR which indicates a device configuration change. */ -#define VIRTIO_PCI_ISR_CONFIG 0x2 +#define VIRTIO_PCI_LEGACY_ISR 19 /* MSI-X registers: only enabled if MSI-X is enabled. */ /* A 16-bit vector for configuration changes. */ -#define VIRTIO_MSI_CONFIG_VECTOR 20 +#define VIRTIO_MSI_LEGACY_CONFIG_VECTOR 20 /* A 16-bit vector for selected queue notifications. */ -#define VIRTIO_MSI_QUEUE_VECTOR 22 -/* Vector value used to disable MSI for queue */ -#define VIRTIO_MSI_NO_VECTOR 0xffff +#define VIRTIO_MSI_LEGACY_QUEUE_VECTOR 22 /* The remaining space is defined by each driver as the per-driver * configuration space */ -#define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20) +#define VIRTIO_PCI_LEGACY_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20) + +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_LEGACY_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_LEGACY_VRING_ALIGN 4096 + +#ifndef VIRTIO_PCI_NO_LEGACY +/* Don't break compile of old userspace code. These will go away. */ +#warning "Please support virtio_pci non-legacy mode!" +#define VIRTIO_PCI_HOST_FEATURES VIRTIO_PCI_LEGACY_HOST_FEATURES +#define VIRTIO_PCI_GUEST_FEATURES VIRTIO_PCI_LEGACY_GUEST_FEATURES +#define VIRTIO_PCI_QUEUE_PFN VIRTIO_PCI_LEGACY_QUEUE_PFN +#define VIRTIO_PCI_QUEUE_NUM VIRTIO_PCI_LEGACY_QUEUE_NUM +#define VIRTIO_PCI_QUEUE_SEL VIRTIO_PCI_LEGACY_QUEUE_SEL +#define VIRTIO_PCI_QUEUE_NOTIFY VIRTIO_PCI_LEGACY_QUEUE_NOTIFY +#define VIRTIO_PCI_STATUS VIRTIO_PCI_LEGACY_STATUS +#define VIRTIO_PCI_ISR VIRTIO_PCI_LEGACY_ISR +#define VIRTIO_MSI_CONFIG_VECTOR VIRTIO_MSI_LEGACY_CONFIG_VECTOR +#define VIRTIO_MSI_QUEUE_VECTOR VIRTIO_MSI_LEGACY_QUEUE_VECTOR +#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_LEGACY_CONFIG(dev) +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT VIRTIO_PCI_LEGACY_QUEUE_ADDR_SHIFT +#define VIRTIO_PCI_VRING_ALIGN VIRTIO_PCI_LEGACY_VRING_ALIGN +#endif /* ...!KERNEL */ /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 -/* How many bits to shift physical queue address written to QUEUE_PFN. - * 12 is historical, and due to x86 page size. */ -#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 +/* Vector value used to disable MSI for queue */ +#define VIRTIO_MSI_NO_VECTOR 0xffff -/* The alignment to use between consumer and producer parts of vring. - * x86 pagesize again. */ -#define VIRTIO_PCI_VRING_ALIGN 4096 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 /* IDs for different capabilities. Must all exist. */ /* FIXME: Do we win from separating ISR, NOTIFY and COMMON? */
Rusty Russell
2011-Dec-08 10:38 UTC
[RFC 6/11] virtio_pci: don't use the legacy driver if we find the new PCI capabilities.
With module option to override. I assume I can call pci_find_capability() before pci_request_regions? --- drivers/virtio/virtio_pci_legacy.c | 20 +++++++++++++++++++- include/linux/virtio_pci.h | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -26,6 +26,10 @@ #include <linux/highmem.h> #include <linux/spinlock.h> +static bool force_nonlegacy; +module_param(force_nonlegacy, bool, 0644); +MODULE_PARM_DESC(force_nonlegacy, "Take over non-legacy virtio devices too"); + MODULE_AUTHOR("Anthony Liguori <aliguori at us.ibm.com>"); MODULE_DESCRIPTION("virtio-pci-legacy"); MODULE_LICENSE("GPL"); @@ -622,7 +626,7 @@ static int __devinit virtio_pci_probe(st const struct pci_device_id *id) { struct virtio_pci_device *vp_dev; - int err; + int err, cap; /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) @@ -654,6 +658,21 @@ static int __devinit virtio_pci_probe(st if (err) goto out; + /* We leave modern virtio-pci for the modern driver. */ + cap = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG); + if (cap) { + if (force_nonlegacy) + dev_info(&pci_dev->dev, + "virtio_pci_legacy: forcing legacy mode!\n"); + else { + dev_info(&pci_dev->dev, + "virtio_pci_legacy: leaving to" + " non-legacy driver\n"); + err = -ENODEV; + goto out_enable_device; + } + } + err = pci_request_regions(pci_dev, "virtio-pci-legacy"); if (err) goto out_enable_device; diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -152,4 +152,23 @@ struct virtio_pci_common_cfg { __le16 queue_msix_vector;/* read-write */ __le64 queue_address; /* read-write: 0xFFFFFFFFFFFFFFFF == DNE. */ }; + +#ifdef __KERNEL__ +/* Returns offset of the capability, or 0. */ +static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type) +{ + int pos; + + for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); + pos > 0; + pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { + u8 type; + pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, + cfg_type), &type); + if (type == cfg_type) + return pos; + } + return 0; +} +#endif /* __KERNEL__ */ #endif
Differences: 1) Uses 4 pci capabilities to demark common, irq, notify and dev-specific areas. 2) Guest sets queue size, using host-provided maximum. 3) Guest sets queue alignment, rather than ABI-defined 4096. 4) More than 32 feature bits (a lot more!). --- drivers/virtio/Makefile | 1 drivers/virtio/virtio_pci.c | 868 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 869 insertions(+) diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o +obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c new file mode 100644 --- /dev/null +++ b/drivers/virtio/virtio_pci.c @@ -0,0 +1,869 @@ +/* + * Virtio PCI driver + * + * This module allows virtio devices to be used over a virtual PCI + * device. Copyright 2011, Rusty Russell IBM Corporation, but based + * on the older virtio_pci_legacy.c, which was Copyright IBM + * Corp. 2007. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#define VIRTIO_PCI_NO_LEGACY +#include <linux/module.h> +#include <linux/list.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> +#include <linux/virtio_pci.h> +#include <linux/highmem.h> +#include <linux/spinlock.h> + +MODULE_AUTHOR("Rusty Russell <rusty at rustcorp.com.au>"); +MODULE_DESCRIPTION("virtio-pci"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("2"); + +/* Use cacheline size as a good guess at a nice alignment. */ +#define VIRTIO_PCI_ALIGN SMP_CACHE_BYTES + +/* Our device structure */ +struct virtio_pci_device +{ + struct virtio_device vdev; + struct pci_dev *pci_dev; + + /* The IO mapping for the PCI config space */ + struct virtio_pci_common_cfg __iomem *common; + /* Where to read and clear interrupt */ + u8 __iomem *isr; + /* Write the virtqueue index here to notify device of activity. */ + __le16 __iomem *notify; + /* Device-specific data. */ + void __iomem *device; + + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; + + /* MSI-X support */ + int msix_enabled; + int intx_enabled; + struct msix_entry *msix_entries; + /* Name strings for interrupts. This size should be enough, + * and I'm too lazy to allocate each name separately. */ + char (*msix_names)[256]; + /* Number of available vectors */ + unsigned msix_vectors; + /* Vectors allocated, excluding per-vq vectors if any */ + unsigned msix_used_vectors; + /* Whether we have vector per vq */ + bool per_vq_vectors; +}; + +/* Constants for MSI-X */ +/* Use first vector for configuration changes, second and the rest for + * virtqueues Thus, we need at least 2 vectors for MSI. */ +enum { + VP_MSIX_CONFIG_VECTOR = 0, + VP_MSIX_VQ_VECTOR = 1, +}; + +struct virtio_pci_vq_info +{ + /* the actual virtqueue */ + struct virtqueue *vq; + + /* the number of entries in the queue */ + int num; + + /* the index of the queue */ + int queue_index; + + /* the virtual address of the ring queue */ + void *queue; + + /* the list node for the virtqueues list */ + struct list_head node; + + /* MSI-X vector (or none) */ + unsigned msix_vector; +}; + +/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ +static struct pci_device_id virtio_pci_id_table[] = { + { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { 0 }, +}; + +MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); + +/* Convert a generic virtio device to our structure */ +static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) +{ + return container_of(vdev, struct virtio_pci_device, vdev); +} + +/* There is no iowrite64. We use two 32-bit ops. */ +static void iowrite64(u64 val, const __le64 *addr) +{ + iowrite32((u32)val, (__le32 *)addr); + iowrite32(val >> 32, (__le32 *)addr + 1); +} + +/* There is no ioread64. We use two 32-bit ops. */ +static u64 ioread64(__le64 *addr) +{ + return ioread32(addr) | ((u64)ioread32((__le32 *)addr + 1) << 32); +} + +static u64 vp_get_features(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + u64 features; + + iowrite32(0, &vp_dev->common->device_feature_select); + features = ioread32(&vp_dev->common->device_feature); + iowrite32(1, &vp_dev->common->device_feature_select); + features |= ((u64)ioread32(&vp_dev->common->device_feature) << 32); + return features; +} + +static void vp_finalize_features(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + iowrite32(0, &vp_dev->common->guest_feature_select); + iowrite32((u32)vdev->features, &vp_dev->common->guest_feature); + iowrite32(1, &vp_dev->common->guest_feature_select); + iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature); +} + +/* virtio config->get() implementation */ +static void vp_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + void __iomem *ioaddr = vp_dev->device + offset; + u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + ptr[i] = ioread8(ioaddr + i); +} + +/* the config->set() implementation. it's symmetric to the config->get() + * implementation */ +static void vp_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + void __iomem *ioaddr = vp_dev->device + offset; + const u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + iowrite8(ptr[i], ioaddr + i); +} + +/* config->{get,set}_status() implementations */ +static u8 vp_get_status(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + return ioread8(&vp_dev->common->device_status); +} + +static void vp_set_status(struct virtio_device *vdev, u8 status) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* We should never be setting status to 0. */ + BUG_ON(status == 0); + iowrite8(status, &vp_dev->common->device_status); +} + +/* wait for pending irq handlers */ +static void vp_synchronize_vectors(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) + synchronize_irq(vp_dev->pci_dev->irq); + + for (i = 0; i < vp_dev->msix_vectors; ++i) + synchronize_irq(vp_dev->msix_entries[i].vector); +} + +static void vp_reset(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* 0 status means a reset. */ + iowrite8(0, &vp_dev->common->device_status); + /* Flush out the status write, and flush in device writes, + * including MSi-X interrupts, if any. */ + ioread8(&vp_dev->common->device_status); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); +} + +/* the notify function used when creating a virt queue */ +static void vp_notify(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_vq_info *info = vq->priv; + + /* we write the queue's selector into the notification register to + * signal the other end */ + iowrite16(info->queue_index, vp_dev->notify); +} + +/* Handle a configuration change: Tell driver if it wants to know. */ +static irqreturn_t vp_config_changed(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_driver *drv; + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + if (drv->config_changed) + drv->config_changed(&vp_dev->vdev); + return IRQ_HANDLED; +} + +/* Notify all virtqueues on an interrupt. */ +static irqreturn_t vp_vring_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_pci_vq_info *info; + irqreturn_t ret = IRQ_NONE; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->virtqueues, node) { + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + spin_unlock_irqrestore(&vp_dev->lock, flags); + + return ret; +} + +/* A small wrapper to also acknowledge the interrupt when it's handled. + * I really need an EIO hook for the vring so I can ack the interrupt once we + * know that we'll be handling the IRQ but before we invoke the callback since + * the callback may notify the host which results in the host attempting to + * raise an interrupt that we would then mask once we acknowledged the + * interrupt. */ +static irqreturn_t vp_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + u8 isr; + + /* reading the ISR has the effect of also clearing it so it's very + * important to save off the value. */ + isr = ioread8(vp_dev->isr); + + /* It's definitely not us if the ISR was not high */ + if (!isr) + return IRQ_NONE; + + /* Configuration change? Tell driver if it wants to know. */ + if (isr & VIRTIO_PCI_ISR_CONFIG) + vp_config_changed(irq, opaque); + + return vp_vring_interrupt(irq, opaque); +} + +static void vp_free_vectors(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) { + free_irq(vp_dev->pci_dev->irq, vp_dev); + vp_dev->intx_enabled = 0; + } + + for (i = 0; i < vp_dev->msix_used_vectors; ++i) + free_irq(vp_dev->msix_entries[i].vector, vp_dev); + + if (vp_dev->msix_enabled) { + /* Disable the vector used for configuration */ + iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->msix_config); + /* Flush the write out to device */ + ioread16(&vp_dev->common->msix_config); + + pci_disable_msix(vp_dev->pci_dev); + vp_dev->msix_enabled = 0; + vp_dev->msix_vectors = 0; + } + + vp_dev->msix_used_vectors = 0; + kfree(vp_dev->msix_names); + vp_dev->msix_names = NULL; + kfree(vp_dev->msix_entries); + vp_dev->msix_entries = NULL; +} + +static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, + bool per_vq_vectors) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); + unsigned i, v; + int err = -ENOMEM; + + vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, + GFP_KERNEL); + if (!vp_dev->msix_entries) + goto error; + vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, + GFP_KERNEL); + if (!vp_dev->msix_names) + goto error; + + for (i = 0; i < nvectors; ++i) + vp_dev->msix_entries[i].entry = i; + + /* pci_enable_msix returns positive if we can't get this many. */ + err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); + if (err > 0) + err = -ENOSPC; + if (err) + goto error; + vp_dev->msix_vectors = nvectors; + vp_dev->msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-config", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_config_changed, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + + iowrite16(v, &vp_dev->common->msix_config); + /* Verify we had enough resources to assign the vector */ + v = ioread16(&vp_dev->common->msix_config); + if (v == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto error; + } + + if (!per_vq_vectors) { + /* Shared vector for all VQs */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-virtqueues", name); + err = request_irq(vp_dev->msix_entries[v].vector, + vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + } + return 0; +error: + vp_free_vectors(vdev); + return err; +} + +static int vp_request_intx(struct virtio_device *vdev) +{ + int err; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + IRQF_SHARED, dev_name(&vdev->dev), vp_dev); + if (!err) + vp_dev->intx_enabled = 1; + return err; +} + +static void *alloc_virtqueue_pages(u16 *num) +{ + void *pages; + + /* 1024 entries uses about 32k */ + if (*num > 1024) + *num = 1024; + + for (; *num; *num /= 2) { + size_t size = PAGE_ALIGN(vring_size(*num, VIRTIO_PCI_ALIGN)); + pages = alloc_pages_exact(size, + GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN); + if (pages) + return pages; + } + return NULL; +} + +static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name, + u16 msix_vec) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_vq_info *info; + struct virtqueue *vq; + u16 num; + int err; + + /* Select the queue we're interested in */ + iowrite16(index, &vp_dev->common->queue_select); + + switch (ioread64(&vp_dev->common->queue_address)) { + case 0xFFFFFFFFFFFFFFFFULL: + return ERR_PTR(-ENOENT); + case 0: + /* Uninitialized. Excellent. */ + break; + default: + /* We've already set this up? */ + return ERR_PTR(-EBUSY); + } + + /* Maximum size must be a power of 2. */ + num = ioread16(&vp_dev->common->queue_size); + if (num & (num - 1)) { + dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num); + return ERR_PTR(-EINVAL); + } + + /* allocate and fill out our structure the represents an active + * queue */ + info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + info->queue_index = index; + info->msix_vector = msix_vec; + + info->queue = alloc_virtqueue_pages(&num); + if (info->queue == NULL) { + err = -ENOMEM; + goto out_info; + } + info->num = num; + + /* create the vring */ + vq = vring_new_virtqueue(info->num, VIRTIO_PCI_ALIGN, + vdev, info->queue, vp_notify, callback, name); + if (!vq) { + err = -ENOMEM; + goto out_alloc_pages; + } + + vq->priv = info; + info->vq = vq; + + if (msix_vec != VIRTIO_MSI_NO_VECTOR) { + iowrite16(msix_vec, &vp_dev->common->queue_msix_vector); + msix_vec = ioread16(&vp_dev->common->queue_msix_vector); + if (msix_vec == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto out_new_virtqueue; + } + } + + if (callback) { + unsigned long flags; + spin_lock_irqsave(&vp_dev->lock, flags); + list_add(&info->node, &vp_dev->virtqueues); + spin_unlock_irqrestore(&vp_dev->lock, flags); + } else { + INIT_LIST_HEAD(&info->node); + } + + /* Activate the queue. */ + iowrite64(virt_to_phys(info->queue), &vp_dev->common->queue_address); + iowrite16(VIRTIO_PCI_ALIGN, &vp_dev->common->queue_align); + iowrite16(num, &vp_dev->common->queue_size); + + return vq; + +out_new_virtqueue: + vring_del_virtqueue(vq); +out_alloc_pages: + free_pages_exact(info->queue, + PAGE_ALIGN(vring_size(num, VIRTIO_PCI_ALIGN))); +out_info: + kfree(info); + return ERR_PTR(err); +} + +static void vp_del_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_vq_info *info = vq->priv; + unsigned long flags, size; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_del(&info->node); + spin_unlock_irqrestore(&vp_dev->lock, flags); + + /* Select and deactivate the queue */ + iowrite16(info->queue_index, &vp_dev->common->queue_select); + + if (vp_dev->msix_enabled) { + iowrite16(VIRTIO_MSI_NO_VECTOR, + &vp_dev->common->queue_msix_vector); + /* Flush the write out to device */ + ioread16(&vp_dev->common->queue_msix_vector); + } + + vring_del_virtqueue(vq); + + /* This is for our own benefit, not the device's! */ + iowrite64(0, &vp_dev->common->queue_address); + iowrite16(0, &vp_dev->common->queue_size); + iowrite16(0, &vp_dev->common->queue_align); + + size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_ALIGN)); + free_pages_exact(info->queue, size); + kfree(info); +} + +/* the config->del_vqs() implementation */ +static void vp_del_vqs(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq, *n; + struct virtio_pci_vq_info *info; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + info = vq->priv; + if (vp_dev->per_vq_vectors && + info->msix_vector != VIRTIO_MSI_NO_VECTOR) + free_irq(vp_dev->msix_entries[info->msix_vector].vector, + vq); + vp_del_vq(vq); + } + vp_dev->per_vq_vectors = false; + + vp_free_vectors(vdev); +} + +static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[], + bool use_msix, + bool per_vq_vectors) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + u16 msix_vec; + int i, err, nvectors, allocated_vectors; + + if (!use_msix) { + /* Old style: one normal interrupt for change and all vqs. */ + err = vp_request_intx(vdev); + if (err) + goto error_request; + } else { + if (per_vq_vectors) { + /* Best option: one for change interrupt, one per vq. */ + nvectors = 1; + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++nvectors; + } else { + /* Second best: one for change, shared for all vqs. */ + nvectors = 2; + } + + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); + if (err) + goto error_request; + } + + vp_dev->per_vq_vectors = per_vq_vectors; + allocated_vectors = vp_dev->msix_used_vectors; + for (i = 0; i < nvqs; ++i) { + if (!callbacks[i] || !vp_dev->msix_enabled) + msix_vec = VIRTIO_MSI_NO_VECTOR; + else if (vp_dev->per_vq_vectors) + msix_vec = allocated_vectors++; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); + goto error_find; + } + + if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) + continue; + + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], + sizeof *vp_dev->msix_names, + "%s-%s", + dev_name(&vp_dev->vdev.dev), names[i]); + err = request_irq(vp_dev->msix_entries[msix_vec].vector, + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], + vqs[i]); + if (err) { + vp_del_vq(vqs[i]); + goto error_find; + } + } + return 0; + +error_find: + vp_del_vqs(vdev); + +error_request: + return err; +} + +/* the config->find_vqs() implementation */ +static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + int err; + + /* Try MSI-X with one vector per queue. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); + if (!err) + return 0; + /* Fallback: MSI-X with one vector for config, one shared for queues. */ + err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + true, false); + if (!err) + return 0; + /* Finally fall back to regular interrupts. */ + return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, + false, false); +} + +static struct virtio_config_ops virtio_pci_config_ops = { + .get = vp_get, + .set = vp_set, + .get_status = vp_get_status, + .set_status = vp_set_status, + .reset = vp_reset, + .find_vqs = vp_find_vqs, + .del_vqs = vp_del_vqs, + .get_features = vp_get_features, + .finalize_features = vp_finalize_features, +}; + +static void virtio_pci_release_dev(struct device *_d) +{ + /* + * No need for a release method as we allocate/free + * all devices together with the pci devices. + * Provide an empty one to avoid getting a warning from core. + */ +} + +static void __iomem *map_capability(struct pci_dev *dev, int off, size_t expect) +{ + u8 bar; + u32 offset, length; + void __iomem *p; + + pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap, bar), + &bar); + pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset), + &offset); + pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length), + &length); + + if (length < expect) { + /* FIXME: I assume we want to report errors as PCI device? */ + dev_err(&dev->dev, + "virtio_pci: small capability len %u (%u expected)\n", + length, expect); + return NULL; + } + + p = pci_iomap_range(dev, bar, offset, length, PAGE_SIZE); + if (!p) + /* FIXME: I assume we want to report errors as PCI device? */ + dev_err(&dev->dev, + "virtio_pci: unable to map virtio %u@%u on bar %i\n", + length, offset, bar); + return p; +} + + +/* the PCI probing function */ +static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) +{ + struct virtio_pci_device *vp_dev; + int err, common, isr, notify, device; + + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) + return -ENODEV; + + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { + printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", + VIRTIO_PCI_ABI_VERSION, pci_dev->revision); + return -ENODEV; + } + + /* allocate our structure and fill it out */ + vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); + if (vp_dev == NULL) + return -ENOMEM; + + vp_dev->vdev.dev.parent = &pci_dev->dev; + vp_dev->vdev.dev.release = virtio_pci_release_dev; + vp_dev->vdev.config = &virtio_pci_config_ops; + vp_dev->pci_dev = pci_dev; + INIT_LIST_HEAD(&vp_dev->virtqueues); + spin_lock_init(&vp_dev->lock); + + /* Disable MSI/MSIX to bring device to a known good state. */ + pci_msi_off(pci_dev); + + /* enable the device */ + err = pci_enable_device(pci_dev); + if (err) + goto out; + + /* check for a legacy bar0 device. */ + common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG); + if (!common) { + dev_info(&pci_dev->dev, + "virtio_pci: leaving for legacy driver\n"); + err = -ENODEV; + goto out_enable_device; + } + isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG); + notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG); + device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG); + if (!isr || !notify || !device) { + dev_err(&pci_dev->dev, + "virtio_pci: missing capabilities %i/%i/%i/%i\n", + common, isr, notify, device); + goto out_enable_device; + } + + err = pci_request_regions(pci_dev, "virtio-pci"); + if (err) + goto out_enable_device; + + vp_dev->common = map_capability(pci_dev, common, + sizeof(struct virtio_pci_common_cfg)); + if (!vp_dev->common) + goto out_req_regions; + vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8)); + if (!vp_dev->isr) + goto out_map_common; + vp_dev->notify = map_capability(pci_dev, notify, sizeof(u16)); + if (!vp_dev->notify) + goto out_map_isr; + vp_dev->device = map_capability(pci_dev, device, 0); + if (!vp_dev->device) + goto out_map_notify; + + pci_set_drvdata(pci_dev, vp_dev); + pci_set_master(pci_dev); + + /* we use the subsystem vendor/device id as the virtio vendor/device + * id. this allows us to use the same PCI vendor/device id for all + * virtio devices and to identify the particular virtio driver by + * the subsystem ids */ + vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; + vp_dev->vdev.id.device = pci_dev->subsystem_device; + + /* finally register the virtio device */ + err = register_virtio_device(&vp_dev->vdev); + if (err) + goto out_set_drvdata; + + return 0; + +out_set_drvdata: + pci_set_drvdata(pci_dev, NULL); + pci_iounmap(pci_dev, vp_dev->device); +out_map_notify: + pci_iounmap(pci_dev, vp_dev->notify); +out_map_isr: + pci_iounmap(pci_dev, vp_dev->isr); +out_map_common: + pci_iounmap(pci_dev, vp_dev->common); +out_req_regions: + pci_release_regions(pci_dev); +out_enable_device: + pci_disable_device(pci_dev); +out: + kfree(vp_dev); + return err; +} + +static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) +{ + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + + unregister_virtio_device(&vp_dev->vdev); + + vp_del_vqs(&vp_dev->vdev); + pci_set_drvdata(pci_dev, NULL); + pci_iounmap(pci_dev, vp_dev->device); + pci_iounmap(pci_dev, vp_dev->notify); + pci_iounmap(pci_dev, vp_dev->isr); + pci_iounmap(pci_dev, vp_dev->common); + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); + kfree(vp_dev); +} + +#ifdef CONFIG_PM +static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state) +{ + pci_save_state(pci_dev); + pci_set_power_state(pci_dev, PCI_D3hot); + return 0; +} + +static int virtio_pci_resume(struct pci_dev *pci_dev) +{ + pci_restore_state(pci_dev); + pci_set_power_state(pci_dev, PCI_D0); + return 0; +} +#endif + +static struct pci_driver virtio_pci_driver = { + .name = "virtio-pci", + .id_table = virtio_pci_id_table, + .probe = virtio_pci_probe, + .remove = __devexit_p(virtio_pci_remove), +#ifdef CONFIG_PM + .suspend = virtio_pci_suspend, + .resume = virtio_pci_resume, +#endif +}; + +static int __init virtio_pci_init(void) +{ + return pci_register_driver(&virtio_pci_driver); +} + +module_init(virtio_pci_init); + +static void __exit virtio_pci_exit(void) +{ + pci_unregister_driver(&virtio_pci_driver); +} + +module_exit(virtio_pci_exit);
Rusty Russell
2011-Dec-08 10:40 UTC
[RFC 8/11] virtio_pci: share structure between legacy and modern.
They're almost identical: we add a "legacy" ioregion (what was "ioaddr" in the legacy driver), and move it out to virtio_pci-common.h. --- drivers/virtio/virtio_pci-common.h | 72 ++++++++++++++++++++++ drivers/virtio/virtio_pci.c | 64 ------------------- drivers/virtio/virtio_pci_legacy.c | 120 +++++++++---------------------------- 3 files changed, 105 insertions(+), 151 deletions(-) diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h new file mode 100644 --- /dev/null +++ b/drivers/virtio/virtio_pci-common.h @@ -0,0 +1,72 @@ +#include <linux/pci.h> +#include <linux/virtio_pci.h> + +/* Our device structure: shared by virtio_pci and virtio_pci_legacy. */ +struct virtio_pci_device +{ + struct virtio_device vdev; + struct pci_dev *pci_dev; + + /* The IO mapping for the PCI config space (non-legacy mode) */ + struct virtio_pci_common_cfg __iomem *common; + /* Device-specific data (non-legacy mode). */ + void __iomem *device; + + /* In legacy mode, these two point to within ->legacy. */ + /* Where to read and clear interrupt */ + u8 __iomem *isr; + /* Write the virtqueue index here to notify device of activity. */ + __le16 __iomem *notify; + +#ifdef CONFIG_VIRTIO_PCI_LEGACY + /* Instead of common, notify and device, legacy uses this: */ + void __iomem *legacy; +#endif + + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; + + /* MSI-X support */ + int msix_enabled; + int intx_enabled; + struct msix_entry *msix_entries; + /* Name strings for interrupts. This size should be enough, + * and I'm too lazy to allocate each name separately. */ + char (*msix_names)[256]; + /* Number of available vectors */ + unsigned msix_vectors; + /* Vectors allocated, excluding per-vq vectors if any */ + unsigned msix_used_vectors; + /* Whether we have vector per vq */ + bool per_vq_vectors; +}; + +/* Constants for MSI-X */ +/* Use first vector for configuration changes, second and the rest for + * virtqueues Thus, we need at least 2 vectors for MSI. */ +enum { + VP_MSIX_CONFIG_VECTOR = 0, + VP_MSIX_VQ_VECTOR = 1, +}; + +struct virtio_pci_vq_info +{ + /* the actual virtqueue */ + struct virtqueue *vq; + + /* the number of entries in the queue */ + int num; + + /* the index of the queue */ + int queue_index; + + /* the virtual address of the ring queue */ + void *queue; + + /* the list node for the virtqueues list */ + struct list_head node; + + /* MSI-X vector (or none) */ + unsigned msix_vector; +}; diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -22,6 +22,7 @@ #include <linux/virtio_pci.h> #include <linux/highmem.h> #include <linux/spinlock.h> +#include "virtio_pci-common.h" MODULE_AUTHOR("Rusty Russell <rusty at rustcorp.com.au>"); MODULE_DESCRIPTION("virtio-pci"); @@ -31,69 +32,6 @@ MODULE_VERSION("2"); /* Use cacheline size as a good guess at a nice alignment. */ #define VIRTIO_PCI_ALIGN SMP_CACHE_BYTES -/* Our device structure */ -struct virtio_pci_device -{ - struct virtio_device vdev; - struct pci_dev *pci_dev; - - /* The IO mapping for the PCI config space */ - struct virtio_pci_common_cfg __iomem *common; - /* Where to read and clear interrupt */ - u8 __iomem *isr; - /* Write the virtqueue index here to notify device of activity. */ - __le16 __iomem *notify; - /* Device-specific data. */ - void __iomem *device; - - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; - - /* MSI-X support */ - int msix_enabled; - int intx_enabled; - struct msix_entry *msix_entries; - /* Name strings for interrupts. This size should be enough, - * and I'm too lazy to allocate each name separately. */ - char (*msix_names)[256]; - /* Number of available vectors */ - unsigned msix_vectors; - /* Vectors allocated, excluding per-vq vectors if any */ - unsigned msix_used_vectors; - /* Whether we have vector per vq */ - bool per_vq_vectors; -}; - -/* Constants for MSI-X */ -/* Use first vector for configuration changes, second and the rest for - * virtqueues Thus, we need at least 2 vectors for MSI. */ -enum { - VP_MSIX_CONFIG_VECTOR = 0, - VP_MSIX_VQ_VECTOR = 1, -}; - -struct virtio_pci_vq_info -{ - /* the actual virtqueue */ - struct virtqueue *vq; - - /* the number of entries in the queue */ - int num; - - /* the index of the queue */ - int queue_index; - - /* the virtual address of the ring queue */ - void *queue; - - /* the list node for the virtqueues list */ - struct list_head node; - - /* MSI-X vector (or none) */ - unsigned msix_vector; -}; - /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ static struct pci_device_id virtio_pci_id_table[] = { { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -25,6 +25,7 @@ #include <linux/virtio_pci.h> #include <linux/highmem.h> #include <linux/spinlock.h> +#include "virtio_pci-common.h" static bool force_nonlegacy; module_param(force_nonlegacy, bool, 0644); @@ -35,63 +36,6 @@ MODULE_DESCRIPTION("virtio-pci-legacy"); MODULE_LICENSE("GPL"); MODULE_VERSION("1"); -/* Our device structure */ -struct virtio_pci_device -{ - struct virtio_device vdev; - struct pci_dev *pci_dev; - - /* the IO mapping for the PCI config space */ - void __iomem *ioaddr; - - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; - - /* MSI-X support */ - int msix_enabled; - int intx_enabled; - struct msix_entry *msix_entries; - /* Name strings for interrupts. This size should be enough, - * and I'm too lazy to allocate each name separately. */ - char (*msix_names)[256]; - /* Number of available vectors */ - unsigned msix_vectors; - /* Vectors allocated, excluding per-vq vectors if any */ - unsigned msix_used_vectors; - /* Whether we have vector per vq */ - bool per_vq_vectors; -}; - -/* Constants for MSI-X */ -/* Use first vector for configuration changes, second and the rest for - * virtqueues Thus, we need at least 2 vectors for MSI. */ -enum { - VP_MSIX_CONFIG_VECTOR = 0, - VP_MSIX_VQ_VECTOR = 1, -}; - -struct virtio_pci_vq_info -{ - /* the actual virtqueue */ - struct virtqueue *vq; - - /* the number of entries in the queue */ - int num; - - /* the index of the queue */ - int queue_index; - - /* the virtual address of the ring queue */ - void *queue; - - /* the list node for the virtqueues list */ - struct list_head node; - - /* MSI-X vector (or none) */ - unsigned msix_vector; -}; - /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ static struct pci_device_id virtio_pci_id_table[] = { { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, @@ -112,7 +56,7 @@ static u64 vp_get_features(struct virtio struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* We only support 32 feature bits. */ - return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); + return ioread32(vp_dev->legacy + VIRTIO_PCI_HOST_FEATURES); } /* virtio config->finalize_features() implementation */ @@ -124,7 +68,7 @@ static void vp_finalize_features(struct vring_transport_features(vdev); /* We only support 32 feature bits. */ - iowrite32(vdev->features, vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); + iowrite32(vdev->features, vp_dev->legacy+VIRTIO_PCI_GUEST_FEATURES); } /* virtio config->get() implementation */ @@ -132,13 +76,13 @@ static void vp_get(struct virtio_device void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *legacy = vp_dev->legacy + VIRTIO_PCI_CONFIG(vp_dev) + offset; u8 *ptr = buf; int i; for (i = 0; i < len; i++) - ptr[i] = ioread8(ioaddr + i); + ptr[i] = ioread8(legacy + i); } /* the config->set() implementation. it's symmetric to the config->get() @@ -147,20 +91,20 @@ static void vp_set(struct virtio_device const void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *legacy = vp_dev->legacy + VIRTIO_PCI_CONFIG(vp_dev) + offset; const u8 *ptr = buf; int i; for (i = 0; i < len; i++) - iowrite8(ptr[i], ioaddr + i); + iowrite8(ptr[i], legacy + i); } /* config->{get,set}_status() implementations */ static u8 vp_get_status(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + return ioread8(vp_dev->legacy + VIRTIO_PCI_STATUS); } static void vp_set_status(struct virtio_device *vdev, u8 status) @@ -168,7 +112,7 @@ static void vp_set_status(struct virtio_ struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* We should never be setting status to 0. */ BUG_ON(status == 0); - iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + iowrite8(status, vp_dev->legacy + VIRTIO_PCI_STATUS); } /* wait for pending irq handlers */ @@ -188,10 +132,10 @@ static void vp_reset(struct virtio_devic { struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* 0 status means a reset. */ - iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + iowrite8(0, vp_dev->legacy + VIRTIO_PCI_STATUS); /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + ioread8(vp_dev->legacy + VIRTIO_PCI_STATUS); /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); } @@ -204,7 +148,7 @@ static void vp_notify(struct virtqueue * /* we write the queue's selector into the notification register to * signal the other end */ - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); + iowrite16(info->queue_index, vp_dev->legacy + VIRTIO_PCI_QUEUE_NOTIFY); } /* Handle a configuration change: Tell driver if it wants to know. */ @@ -251,7 +195,7 @@ static irqreturn_t vp_interrupt(int irq, /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ - isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + isr = ioread8(vp_dev->legacy + VIRTIO_PCI_ISR); /* It's definitely not us if the ISR was not high */ if (!isr) @@ -280,9 +224,9 @@ static void vp_free_vectors(struct virti if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); /* Flush the write out to device */ - ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); pci_disable_msix(vp_dev->pci_dev); vp_dev->msix_enabled = 0; @@ -336,9 +280,9 @@ static int vp_request_msix_vectors(struc goto error; ++vp_dev->msix_used_vectors; - iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + iowrite16(v, vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); /* Verify we had enough resources to assign the vector */ - v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + v = ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); if (v == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto error; @@ -387,11 +331,11 @@ static struct virtqueue *setup_vq(struct int err; /* Select the queue we're interested in */ - iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(index, vp_dev->legacy + VIRTIO_PCI_QUEUE_SEL); /* Check if queue is either not available or already active. */ - num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); - if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) + num = ioread16(vp_dev->legacy + VIRTIO_PCI_QUEUE_NUM); + if (!num || ioread32(vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN)) return ERR_PTR(-ENOENT); /* allocate and fill out our structure the represents an active @@ -413,7 +357,7 @@ static struct virtqueue *setup_vq(struct /* activate the queue */ iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, @@ -427,8 +371,8 @@ static struct virtqueue *setup_vq(struct info->vq = vq; if (msix_vec != VIRTIO_MSI_NO_VECTOR) { - iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + iowrite16(msix_vec, vp_dev->legacy + VIRTIO_MSI_QUEUE_VECTOR); + msix_vec = ioread16(vp_dev->legacy + VIRTIO_MSI_QUEUE_VECTOR); if (msix_vec == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto out_assign; @@ -448,7 +392,7 @@ static struct virtqueue *setup_vq(struct out_assign: vring_del_virtqueue(vq); out_activate_queue: - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + iowrite32(0, vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN); free_pages_exact(info->queue, size); out_info: kfree(info); @@ -465,19 +409,19 @@ static void vp_del_vq(struct virtqueue * list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); - iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(info->queue_index, vp_dev->legacy + VIRTIO_PCI_QUEUE_SEL); if (vp_dev->msix_enabled) { iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + vp_dev->legacy + VIRTIO_MSI_QUEUE_VECTOR); /* Flush the write out to device */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + ioread8(vp_dev->legacy + VIRTIO_PCI_ISR); } vring_del_virtqueue(vq); /* Select and deactivate the queue */ - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + iowrite32(0, vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN); size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); free_pages_exact(info->queue, size); @@ -676,8 +620,8 @@ static int __devinit virtio_pci_probe(st if (err) goto out_enable_device; - vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (vp_dev->ioaddr == NULL) + vp_dev->legacy = pci_iomap(pci_dev, 0, 0); + if (vp_dev->legacy == NULL) goto out_req_regions; pci_set_drvdata(pci_dev, vp_dev); @@ -699,7 +643,7 @@ static int __devinit virtio_pci_probe(st out_set_drvdata: pci_set_drvdata(pci_dev, NULL); - pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_iounmap(pci_dev, vp_dev->legacy); out_req_regions: pci_release_regions(pci_dev); out_enable_device: @@ -717,7 +661,7 @@ static void __devexit virtio_pci_remove( vp_del_vqs(&vp_dev->vdev); pci_set_drvdata(pci_dev, NULL); - pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_iounmap(pci_dev, vp_dev->legacy); pci_release_regions(pci_dev); pci_disable_device(pci_dev); kfree(vp_dev);
Rusty Russell
2011-Dec-08 10:41 UTC
[RFC 9/11] virtio_pci: share interrupt/notify handlers between legacy and modern.
If we make the legacy driver set up the ->notify and ->isr pointers in the struct virtio_pci_device structure, we can use them in common code (the positions have changed, but the semantics haven't). --- drivers/virtio/Makefile | 4 - drivers/virtio/virtio_pci-common.c | 81 ++++++++++++++++++++++++++++++++ drivers/virtio/virtio_pci-common.h | 34 +++++++++++++ drivers/virtio/virtio_pci.c | 87 ++--------------------------------- drivers/virtio/virtio_pci_legacy.c | 91 ++++--------------------------------- 5 files changed, 135 insertions(+), 162 deletions(-) diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o -obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o -obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o +obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-common.o +obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o virtio_pci-common.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o diff --git a/drivers/virtio/virtio_pci-common.c b/drivers/virtio/virtio_pci-common.c new file mode 100644 --- /dev/null +++ b/drivers/virtio/virtio_pci-common.c @@ -0,0 +1,81 @@ +/* + * Virtio PCI driver - common code for legacy and non-legacy. + * + * Copyright 2011, Rusty Russell IBM Corporation, but based on the + * older virtio_pci_legacy.c, which was Copyright IBM Corp. 2007. But + * most of the interrupt setup code was written by Michael S. Tsirkin. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#define VIRTIO_PCI_NO_LEGACY +#include "virtio_pci-common.h" +#include <linux/virtio_ring.h> + +/* the notify function used when creating a virt queue */ +void virtio_pci_notify(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_vq_info *info = vq->priv; + + /* we write the queue's selector into the notification register to + * signal the other end */ + iowrite16(info->queue_index, vp_dev->notify); +} + +/* Handle a configuration change: Tell driver if it wants to know. */ +irqreturn_t virtio_pci_config_changed(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_driver *drv; + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + if (drv->config_changed) + drv->config_changed(&vp_dev->vdev); + return IRQ_HANDLED; +} + +/* Notify all virtqueues on an interrupt. */ +irqreturn_t virtio_pci_vring_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + struct virtio_pci_vq_info *info; + irqreturn_t ret = IRQ_NONE; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->virtqueues, node) { + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + spin_unlock_irqrestore(&vp_dev->lock, flags); + + return ret; +} + +/* A small wrapper to also acknowledge the interrupt when it's handled. + * I really need an EIO hook for the vring so I can ack the interrupt once we + * know that we'll be handling the IRQ but before we invoke the callback since + * the callback may notify the host which results in the host attempting to + * raise an interrupt that we would then mask once we acknowledged the + * interrupt. */ +irqreturn_t virtio_pci_interrupt(int irq, void *opaque) +{ + struct virtio_pci_device *vp_dev = opaque; + u8 isr; + + /* reading the ISR has the effect of also clearing it so it's very + * important to save off the value. */ + isr = ioread8(vp_dev->isr); + + /* It's definitely not us if the ISR was not high */ + if (!isr) + return IRQ_NONE; + + /* Configuration change? Tell driver if it wants to know. */ + if (isr & VIRTIO_PCI_ISR_CONFIG) + virtio_pci_config_changed(irq, opaque); + + return virtio_pci_vring_interrupt(irq, opaque); +} diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h --- a/drivers/virtio/virtio_pci-common.h +++ b/drivers/virtio/virtio_pci-common.h @@ -42,6 +42,12 @@ struct virtio_pci_device bool per_vq_vectors; }; +/* Convert a generic virtio device to our structure */ +static inline struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) +{ + return container_of(vdev, struct virtio_pci_device, vdev); +} + /* Constants for MSI-X */ /* Use first vector for configuration changes, second and the rest for * virtqueues Thus, we need at least 2 vectors for MSI. */ @@ -70,3 +76,31 @@ struct virtio_pci_vq_info /* MSI-X vector (or none) */ unsigned msix_vector; }; + +/* the notify function used when creating a virt queue */ +void virtio_pci_notify(struct virtqueue *vq); +/* Handle a configuration change: Tell driver if it wants to know. */ +irqreturn_t virtio_pci_config_changed(int irq, void *opaque); +/* Notify all virtqueues on an interrupt. */ +irqreturn_t virtio_pci_vring_interrupt(int irq, void *opaque); +/* Acknowledge, check for config or vq interrupt. */ +irqreturn_t virtio_pci_interrupt(int irq, void *opaque); + +/* Core of a config->find_vqs() implementation */ +int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev, + __le16 __iomem *msix_config, + struct virtqueue *(setup_vq)(struct virtio_pci_device *, + unsigned, + void (*)(struct virtqueue*), + const char *, + u16 msix_vec), + void (*del_vq)(struct virtqueue *vq), + unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]); + +/* the core of a config->del_vqs() implementation */ +void virtio_pci_del_vqs(struct virtio_pci_device *vp_dev, + __le16 __iomem *msix_config, + void (*del_vq)(struct virtqueue *vq)); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -40,12 +40,6 @@ static struct pci_device_id virtio_pci_i MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); -/* Convert a generic virtio device to our structure */ -static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) -{ - return container_of(vdev, struct virtio_pci_device, vdev); -} - /* There is no iowrite64. We use two 32-bit ops. */ static void iowrite64(u64 val, const __le64 *addr) { @@ -151,74 +145,6 @@ static void vp_reset(struct virtio_devic vp_synchronize_vectors(vdev); } -/* the notify function used when creating a virt queue */ -static void vp_notify(struct virtqueue *vq) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - struct virtio_pci_vq_info *info = vq->priv; - - /* we write the queue's selector into the notification register to - * signal the other end */ - iowrite16(info->queue_index, vp_dev->notify); -} - -/* Handle a configuration change: Tell driver if it wants to know. */ -static irqreturn_t vp_config_changed(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - struct virtio_driver *drv; - drv = container_of(vp_dev->vdev.dev.driver, - struct virtio_driver, driver); - - if (drv->config_changed) - drv->config_changed(&vp_dev->vdev); - return IRQ_HANDLED; -} - -/* Notify all virtqueues on an interrupt. */ -static irqreturn_t vp_vring_interrupt(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - struct virtio_pci_vq_info *info; - irqreturn_t ret = IRQ_NONE; - unsigned long flags; - - spin_lock_irqsave(&vp_dev->lock, flags); - list_for_each_entry(info, &vp_dev->virtqueues, node) { - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) - ret = IRQ_HANDLED; - } - spin_unlock_irqrestore(&vp_dev->lock, flags); - - return ret; -} - -/* A small wrapper to also acknowledge the interrupt when it's handled. - * I really need an EIO hook for the vring so I can ack the interrupt once we - * know that we'll be handling the IRQ but before we invoke the callback since - * the callback may notify the host which results in the host attempting to - * raise an interrupt that we would then mask once we acknowledged the - * interrupt. */ -static irqreturn_t vp_interrupt(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - u8 isr; - - /* reading the ISR has the effect of also clearing it so it's very - * important to save off the value. */ - isr = ioread8(vp_dev->isr); - - /* It's definitely not us if the ISR was not high */ - if (!isr) - return IRQ_NONE; - - /* Configuration change? Tell driver if it wants to know. */ - if (isr & VIRTIO_PCI_ISR_CONFIG) - vp_config_changed(irq, opaque); - - return vp_vring_interrupt(irq, opaque); -} - static void vp_free_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -284,7 +210,7 @@ static int vp_request_msix_vectors(struc snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(vp_dev->msix_entries[v].vector, - vp_config_changed, 0, vp_dev->msix_names[v], + virtio_pci_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -304,8 +230,8 @@ static int vp_request_msix_vectors(struc snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(vp_dev->msix_entries[v].vector, - vp_vring_interrupt, 0, vp_dev->msix_names[v], - vp_dev); + virtio_pci_vring_interrupt, 0, + vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; @@ -321,7 +247,7 @@ static int vp_request_intx(struct virtio int err; struct virtio_pci_device *vp_dev = to_vp_device(vdev); - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (!err) vp_dev->intx_enabled = 1; @@ -396,7 +322,8 @@ static struct virtqueue *setup_vq(struct /* create the vring */ vq = vring_new_virtqueue(info->num, VIRTIO_PCI_ALIGN, - vdev, info->queue, vp_notify, callback, name); + vdev, info->queue, virtio_pci_notify, + callback, name); if (!vq) { err = -ENOMEM; goto out_alloc_pages; diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -44,12 +44,6 @@ static struct pci_device_id virtio_pci_i MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); -/* Convert a generic virtio device to our structure */ -static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) -{ - return container_of(vdev, struct virtio_pci_device, vdev); -} - /* virtio config->get_features() implementation */ static u64 vp_get_features(struct virtio_device *vdev) { @@ -140,74 +134,6 @@ static void vp_reset(struct virtio_devic vp_synchronize_vectors(vdev); } -/* the notify function used when creating a virt queue */ -static void vp_notify(struct virtqueue *vq) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - struct virtio_pci_vq_info *info = vq->priv; - - /* we write the queue's selector into the notification register to - * signal the other end */ - iowrite16(info->queue_index, vp_dev->legacy + VIRTIO_PCI_QUEUE_NOTIFY); -} - -/* Handle a configuration change: Tell driver if it wants to know. */ -static irqreturn_t vp_config_changed(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - struct virtio_driver *drv; - drv = container_of(vp_dev->vdev.dev.driver, - struct virtio_driver, driver); - - if (drv && drv->config_changed) - drv->config_changed(&vp_dev->vdev); - return IRQ_HANDLED; -} - -/* Notify all virtqueues on an interrupt. */ -static irqreturn_t vp_vring_interrupt(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - struct virtio_pci_vq_info *info; - irqreturn_t ret = IRQ_NONE; - unsigned long flags; - - spin_lock_irqsave(&vp_dev->lock, flags); - list_for_each_entry(info, &vp_dev->virtqueues, node) { - if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) - ret = IRQ_HANDLED; - } - spin_unlock_irqrestore(&vp_dev->lock, flags); - - return ret; -} - -/* A small wrapper to also acknowledge the interrupt when it's handled. - * I really need an EIO hook for the vring so I can ack the interrupt once we - * know that we'll be handling the IRQ but before we invoke the callback since - * the callback may notify the host which results in the host attempting to - * raise an interrupt that we would then mask once we acknowledged the - * interrupt. */ -static irqreturn_t vp_interrupt(int irq, void *opaque) -{ - struct virtio_pci_device *vp_dev = opaque; - u8 isr; - - /* reading the ISR has the effect of also clearing it so it's very - * important to save off the value. */ - isr = ioread8(vp_dev->legacy + VIRTIO_PCI_ISR); - - /* It's definitely not us if the ISR was not high */ - if (!isr) - return IRQ_NONE; - - /* Configuration change? Tell driver if it wants to know. */ - if (isr & VIRTIO_PCI_ISR_CONFIG) - vp_config_changed(irq, opaque); - - return vp_vring_interrupt(irq, opaque); -} - static void vp_free_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -274,7 +200,7 @@ static int vp_request_msix_vectors(struc snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(vp_dev->msix_entries[v].vector, - vp_config_changed, 0, vp_dev->msix_names[v], + virtio_pci_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -294,8 +220,8 @@ static int vp_request_msix_vectors(struc snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(vp_dev->msix_entries[v].vector, - vp_vring_interrupt, 0, vp_dev->msix_names[v], - vp_dev); + virtio_pci_vring_interrupt, 0, + vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; @@ -311,7 +237,7 @@ static int vp_request_intx(struct virtio int err; struct virtio_pci_device *vp_dev = to_vp_device(vdev); - err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, + err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (!err) vp_dev->intx_enabled = 1; @@ -361,7 +287,8 @@ static struct virtqueue *setup_vq(struct /* create the vring */ vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, - vdev, info->queue, vp_notify, callback, name); + vdev, info->queue, virtio_pci_notify, + callback, name); if (!vq) { err = -ENOMEM; goto out_activate_queue; @@ -624,6 +551,10 @@ static int __devinit virtio_pci_probe(st if (vp_dev->legacy == NULL) goto out_req_regions; + /* Setting this lets us share interrupt handlers with virtio_pci */ + vp_dev->isr = vp_dev->legacy + VIRTIO_PCI_LEGACY_ISR; + vp_dev->notify = vp_dev->legacy + VIRTIO_PCI_LEGACY_QUEUE_NOTIFY; + pci_set_drvdata(pci_dev, vp_dev); pci_set_master(pci_dev);
Rusty Russell
2011-Dec-08 10:42 UTC
[RFC 10/11] virtio_pci: share virtqueue setup/teardown between modern and legacy driver.
There's a great deal of work in setting up and disabling interrupts, particularly with MSI-X, which is generic. So we move most of the work out to helpers which take the location of the msix_config register, and setup_vq and del_vq functions. --- drivers/virtio/virtio_pci-common.c | 249 +++++++++++++++++++++++++++++++++++++ drivers/virtio/virtio_pci-common.h | 19 ++ drivers/virtio/virtio_pci.c | 224 +-------------------------------- drivers/virtio/virtio_pci_legacy.c | 229 ++-------------------------------- 4 files changed, 291 insertions(+), 430 deletions(-) diff --git a/drivers/virtio/virtio_pci-common.c b/drivers/virtio/virtio_pci-common.c --- a/drivers/virtio/virtio_pci-common.c +++ b/drivers/virtio/virtio_pci-common.c @@ -11,6 +11,7 @@ #define VIRTIO_PCI_NO_LEGACY #include "virtio_pci-common.h" #include <linux/virtio_ring.h> +#include <linux/interrupt.h> /* the notify function used when creating a virt queue */ void virtio_pci_notify(struct virtqueue *vq) @@ -79,3 +80,251 @@ irqreturn_t virtio_pci_interrupt(int irq return virtio_pci_vring_interrupt(irq, opaque); } + +static void vp_free_vectors(struct virtio_device *vdev, + __le16 __iomem *msix_config) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) { + free_irq(vp_dev->pci_dev->irq, vp_dev); + vp_dev->intx_enabled = 0; + } + + for (i = 0; i < vp_dev->msix_used_vectors; ++i) + free_irq(vp_dev->msix_entries[i].vector, vp_dev); + + if (vp_dev->msix_enabled) { + /* Disable the vector used for configuration */ + iowrite16(VIRTIO_MSI_NO_VECTOR, msix_config); + /* Flush the write out to device */ + ioread16(msix_config); + + pci_disable_msix(vp_dev->pci_dev); + vp_dev->msix_enabled = 0; + vp_dev->msix_vectors = 0; + } + + vp_dev->msix_used_vectors = 0; + kfree(vp_dev->msix_names); + vp_dev->msix_names = NULL; + kfree(vp_dev->msix_entries); + vp_dev->msix_entries = NULL; +} + +static int vp_request_msix_vectors(struct virtio_device *vdev, + __le16 __iomem *msix_config, + int nvectors, bool per_vq_vectors) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + const char *name = dev_name(&vp_dev->vdev.dev); + unsigned i, v; + int err = -ENOMEM; + + vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, + GFP_KERNEL); + if (!vp_dev->msix_entries) + goto error; + vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, + GFP_KERNEL); + if (!vp_dev->msix_names) + goto error; + + for (i = 0; i < nvectors; ++i) + vp_dev->msix_entries[i].entry = i; + + /* pci_enable_msix returns positive if we can't get this many. */ + err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); + if (err > 0) + err = -ENOSPC; + if (err) + goto error; + vp_dev->msix_vectors = nvectors; + vp_dev->msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-config", name); + err = request_irq(vp_dev->msix_entries[v].vector, + virtio_pci_config_changed, 0, vp_dev->msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + + iowrite16(v, msix_config); + /* Verify we had enough resources to assign the vector */ + v = ioread16(msix_config); + if (v == VIRTIO_MSI_NO_VECTOR) { + err = -EBUSY; + goto error; + } + + if (!per_vq_vectors) { + /* Shared vector for all VQs */ + v = vp_dev->msix_used_vectors; + snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, + "%s-virtqueues", name); + err = request_irq(vp_dev->msix_entries[v].vector, + virtio_pci_vring_interrupt, 0, + vp_dev->msix_names[v], vp_dev); + if (err) + goto error; + ++vp_dev->msix_used_vectors; + } + return 0; +error: + vp_free_vectors(vdev, msix_config); + return err; +} + +static int vp_request_intx(struct virtio_device *vdev) +{ + int err; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt, + IRQF_SHARED, dev_name(&vdev->dev), vp_dev); + if (!err) + vp_dev->intx_enabled = 1; + return err; +} + +static int vp_try_to_find_vqs(struct virtio_device *vdev, + __le16 __iomem *msix_config, + struct virtqueue *(setup_vq)(struct virtio_device*, + unsigned, + void (*)(struct + virtqueue *), + const char *, + u16 msix_vec), + void (*del_vq)(struct virtqueue *vq), + unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[], + bool use_msix, + bool per_vq_vectors) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + u16 msix_vec; + int i, err, nvectors, allocated_vectors; + + if (!use_msix) { + /* Old style: one normal interrupt for change and all vqs. */ + err = vp_request_intx(vdev); + if (err) + goto error_request; + } else { + if (per_vq_vectors) { + /* Best option: one for change interrupt, one per vq. */ + nvectors = 1; + for (i = 0; i < nvqs; ++i) + if (callbacks[i]) + ++nvectors; + } else { + /* Second best: one for change, shared for all vqs. */ + nvectors = 2; + } + + err = vp_request_msix_vectors(vdev, msix_config, + nvectors, per_vq_vectors); + if (err) + goto error_request; + } + + vp_dev->per_vq_vectors = per_vq_vectors; + allocated_vectors = vp_dev->msix_used_vectors; + for (i = 0; i < nvqs; ++i) { + if (!callbacks[i] || !vp_dev->msix_enabled) + msix_vec = VIRTIO_MSI_NO_VECTOR; + else if (vp_dev->per_vq_vectors) + msix_vec = allocated_vectors++; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); + goto error_find; + } + + if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) + continue; + + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], + sizeof *vp_dev->msix_names, + "%s-%s", + dev_name(&vp_dev->vdev.dev), names[i]); + err = request_irq(vp_dev->msix_entries[msix_vec].vector, + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], + vqs[i]); + if (err) { + del_vq(vqs[i]); + goto error_find; + } + } + return 0; + +error_find: + virtio_pci_del_vqs(vdev, msix_config, del_vq); + +error_request: + return err; +} + +/* the config->find_vqs() implementation */ +int virtio_pci_find_vqs(struct virtio_device *vdev, + __le16 __iomem *msix_config, + struct virtqueue *(setup_vq)(struct virtio_device *, + unsigned, + void (*)(struct virtqueue*), + const char *, + u16 msix_vec), + void (*del_vq)(struct virtqueue *vq), + unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) +{ + int err; + + /* Try MSI-X with one vector per queue. */ + err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq, + nvqs, vqs, callbacks, names, true, true); + if (!err) + return 0; + /* Fallback: MSI-X with one vector for config, one shared for queues. */ + err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq, + nvqs, vqs, callbacks, names, true, false); + if (!err) + return 0; + /* Finally fall back to regular interrupts. */ + return vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq, + nvqs, vqs, callbacks, names, false, false); +} + +/* the core of a config->del_vqs() implementation */ +void virtio_pci_del_vqs(struct virtio_device *vdev, + __le16 __iomem *msix_config, + void (*del_vq)(struct virtqueue *vq)) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq, *n; + struct virtio_pci_vq_info *info; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + info = vq->priv; + if (vp_dev->per_vq_vectors && + info->msix_vector != VIRTIO_MSI_NO_VECTOR) + free_irq(vp_dev->msix_entries[info->msix_vector].vector, + vq); + del_vq(vq); + } + vp_dev->per_vq_vectors = false; + + vp_free_vectors(vdev, msix_config); +} diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h --- a/drivers/virtio/virtio_pci-common.h +++ b/drivers/virtio/virtio_pci-common.h @@ -87,6 +87,25 @@ irqreturn_t virtio_pci_vring_interrupt(i irqreturn_t virtio_pci_interrupt(int irq, void *opaque); /* Core of a config->find_vqs() implementation */ +int virtio_pci_find_vqs(struct virtio_device *vdev, + __le16 __iomem *msix_config, + struct virtqueue *(setup_vq)(struct virtio_device *, + unsigned, + void (*)(struct virtqueue*), + const char *, + u16 msix_vec), + void (*del_vq)(struct virtqueue *vq), + unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]); + +/* the core of a config->del_vqs() implementation */ +void virtio_pci_del_vqs(struct virtio_device *vdev, + __le16 __iomem *msix_config, + void (*del_vq)(struct virtqueue *vq)); + +/* Core of a config->find_vqs() implementation */ int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config, struct virtqueue *(setup_vq)(struct virtio_pci_device *, diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -145,115 +145,6 @@ static void vp_reset(struct virtio_devic vp_synchronize_vectors(vdev); } -static void vp_free_vectors(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i; - - if (vp_dev->intx_enabled) { - free_irq(vp_dev->pci_dev->irq, vp_dev); - vp_dev->intx_enabled = 0; - } - - for (i = 0; i < vp_dev->msix_used_vectors; ++i) - free_irq(vp_dev->msix_entries[i].vector, vp_dev); - - if (vp_dev->msix_enabled) { - /* Disable the vector used for configuration */ - iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->msix_config); - /* Flush the write out to device */ - ioread16(&vp_dev->common->msix_config); - - pci_disable_msix(vp_dev->pci_dev); - vp_dev->msix_enabled = 0; - vp_dev->msix_vectors = 0; - } - - vp_dev->msix_used_vectors = 0; - kfree(vp_dev->msix_names); - vp_dev->msix_names = NULL; - kfree(vp_dev->msix_entries); - vp_dev->msix_entries = NULL; -} - -static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, - bool per_vq_vectors) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - const char *name = dev_name(&vp_dev->vdev.dev); - unsigned i, v; - int err = -ENOMEM; - - vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, - GFP_KERNEL); - if (!vp_dev->msix_entries) - goto error; - vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, - GFP_KERNEL); - if (!vp_dev->msix_names) - goto error; - - for (i = 0; i < nvectors; ++i) - vp_dev->msix_entries[i].entry = i; - - /* pci_enable_msix returns positive if we can't get this many. */ - err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); - if (err > 0) - err = -ENOSPC; - if (err) - goto error; - vp_dev->msix_vectors = nvectors; - vp_dev->msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-config", name); - err = request_irq(vp_dev->msix_entries[v].vector, - virtio_pci_config_changed, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - - iowrite16(v, &vp_dev->common->msix_config); - /* Verify we had enough resources to assign the vector */ - v = ioread16(&vp_dev->common->msix_config); - if (v == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto error; - } - - if (!per_vq_vectors) { - /* Shared vector for all VQs */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-virtqueues", name); - err = request_irq(vp_dev->msix_entries[v].vector, - virtio_pci_vring_interrupt, 0, - vp_dev->msix_names[v], vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - } - return 0; -error: - vp_free_vectors(vdev); - return err; -} - -static int vp_request_intx(struct virtio_device *vdev) -{ - int err; - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt, - IRQF_SHARED, dev_name(&vdev->dev), vp_dev); - if (!err) - vp_dev->intx_enabled = 1; - return err; -} - static void *alloc_virtqueue_pages(u16 *num) { void *pages; @@ -403,116 +294,19 @@ static void vp_del_vq(struct virtqueue * static void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtqueue *vq, *n; - struct virtio_pci_vq_info *info; - - list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - info = vq->priv; - if (vp_dev->per_vq_vectors && - info->msix_vector != VIRTIO_MSI_NO_VECTOR) - free_irq(vp_dev->msix_entries[info->msix_vector].vector, - vq); - vp_del_vq(vq); - } - vp_dev->per_vq_vectors = false; - - vp_free_vectors(vdev); + virtio_pci_del_vqs(vdev, &vp_dev->common->msix_config, vp_del_vq); } -static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char *names[], - bool use_msix, - bool per_vq_vectors) +static int vp_find_vqs(struct virtio_device *vdev, + unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char *names[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - u16 msix_vec; - int i, err, nvectors, allocated_vectors; - - if (!use_msix) { - /* Old style: one normal interrupt for change and all vqs. */ - err = vp_request_intx(vdev); - if (err) - goto error_request; - } else { - if (per_vq_vectors) { - /* Best option: one for change interrupt, one per vq. */ - nvectors = 1; - for (i = 0; i < nvqs; ++i) - if (callbacks[i]) - ++nvectors; - } else { - /* Second best: one for change, shared for all vqs. */ - nvectors = 2; - } - - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); - if (err) - goto error_request; - } - - vp_dev->per_vq_vectors = per_vq_vectors; - allocated_vectors = vp_dev->msix_used_vectors; - for (i = 0; i < nvqs; ++i) { - if (!callbacks[i] || !vp_dev->msix_enabled) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) - msix_vec = allocated_vectors++; - else - msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec); - if (IS_ERR(vqs[i])) { - err = PTR_ERR(vqs[i]); - goto error_find; - } - - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) - continue; - - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", - dev_name(&vp_dev->vdev.dev), names[i]); - err = request_irq(vp_dev->msix_entries[msix_vec].vector, - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); - if (err) { - vp_del_vq(vqs[i]); - goto error_find; - } - } - return 0; - -error_find: - vp_del_vqs(vdev); - -error_request: - return err; -} - -/* the config->find_vqs() implementation */ -static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char *names[]) -{ - int err; - - /* Try MSI-X with one vector per queue. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); - if (!err) - return 0; - /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - true, false); - if (!err) - return 0; - /* Finally fall back to regular interrupts. */ - return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - false, false); + return virtio_pci_find_vqs(vdev, &vp_dev->common->msix_config, + setup_vq, vp_del_vq, + nvqs, vqs, callbacks, names); } static struct virtio_config_ops virtio_pci_config_ops = { diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -134,120 +134,11 @@ static void vp_reset(struct virtio_devic vp_synchronize_vectors(vdev); } -static void vp_free_vectors(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i; - - if (vp_dev->intx_enabled) { - free_irq(vp_dev->pci_dev->irq, vp_dev); - vp_dev->intx_enabled = 0; - } - - for (i = 0; i < vp_dev->msix_used_vectors; ++i) - free_irq(vp_dev->msix_entries[i].vector, vp_dev); - - if (vp_dev->msix_enabled) { - /* Disable the vector used for configuration */ - iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); - /* Flush the write out to device */ - ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); - - pci_disable_msix(vp_dev->pci_dev); - vp_dev->msix_enabled = 0; - vp_dev->msix_vectors = 0; - } - - vp_dev->msix_used_vectors = 0; - kfree(vp_dev->msix_names); - vp_dev->msix_names = NULL; - kfree(vp_dev->msix_entries); - vp_dev->msix_entries = NULL; -} - -static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, - bool per_vq_vectors) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - const char *name = dev_name(&vp_dev->vdev.dev); - unsigned i, v; - int err = -ENOMEM; - - vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, - GFP_KERNEL); - if (!vp_dev->msix_entries) - goto error; - vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, - GFP_KERNEL); - if (!vp_dev->msix_names) - goto error; - - for (i = 0; i < nvectors; ++i) - vp_dev->msix_entries[i].entry = i; - - /* pci_enable_msix returns positive if we can't get this many. */ - err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors); - if (err > 0) - err = -ENOSPC; - if (err) - goto error; - vp_dev->msix_vectors = nvectors; - vp_dev->msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-config", name); - err = request_irq(vp_dev->msix_entries[v].vector, - virtio_pci_config_changed, 0, vp_dev->msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - - iowrite16(v, vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - v = ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR); - if (v == VIRTIO_MSI_NO_VECTOR) { - err = -EBUSY; - goto error; - } - - if (!per_vq_vectors) { - /* Shared vector for all VQs */ - v = vp_dev->msix_used_vectors; - snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, - "%s-virtqueues", name); - err = request_irq(vp_dev->msix_entries[v].vector, - virtio_pci_vring_interrupt, 0, - vp_dev->msix_names[v], vp_dev); - if (err) - goto error; - ++vp_dev->msix_used_vectors; - } - return 0; -error: - vp_free_vectors(vdev); - return err; -} - -static int vp_request_intx(struct virtio_device *vdev) -{ - int err; - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt, - IRQF_SHARED, dev_name(&vdev->dev), vp_dev); - if (!err) - vp_dev->intx_enabled = 1; - return err; -} - -static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, - void (*callback)(struct virtqueue *vq), - const char *name, - u16 msix_vec) +static struct virtqueue *setup_legacy_vq(struct virtio_device *vdev, + unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name, + u16 msix_vec) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; @@ -326,7 +217,7 @@ out_info: return ERR_PTR(err); } -static void vp_del_vq(struct virtqueue *vq) +static void del_legacy_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_vq_info *info = vq->priv; @@ -359,94 +250,10 @@ static void vp_del_vq(struct virtqueue * static void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtqueue *vq, *n; - struct virtio_pci_vq_info *info; - list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - info = vq->priv; - if (vp_dev->per_vq_vectors && - info->msix_vector != VIRTIO_MSI_NO_VECTOR) - free_irq(vp_dev->msix_entries[info->msix_vector].vector, - vq); - vp_del_vq(vq); - } - vp_dev->per_vq_vectors = false; - - vp_free_vectors(vdev); -} - -static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char *names[], - bool use_msix, - bool per_vq_vectors) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - u16 msix_vec; - int i, err, nvectors, allocated_vectors; - - if (!use_msix) { - /* Old style: one normal interrupt for change and all vqs. */ - err = vp_request_intx(vdev); - if (err) - goto error_request; - } else { - if (per_vq_vectors) { - /* Best option: one for change interrupt, one per vq. */ - nvectors = 1; - for (i = 0; i < nvqs; ++i) - if (callbacks[i]) - ++nvectors; - } else { - /* Second best: one for change, shared for all vqs. */ - nvectors = 2; - } - - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); - if (err) - goto error_request; - } - - vp_dev->per_vq_vectors = per_vq_vectors; - allocated_vectors = vp_dev->msix_used_vectors; - for (i = 0; i < nvqs; ++i) { - if (!callbacks[i] || !vp_dev->msix_enabled) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) - msix_vec = allocated_vectors++; - else - msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec); - if (IS_ERR(vqs[i])) { - err = PTR_ERR(vqs[i]); - goto error_find; - } - - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) - continue; - - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", - dev_name(&vp_dev->vdev.dev), names[i]); - err = request_irq(vp_dev->msix_entries[msix_vec].vector, - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); - if (err) { - vp_del_vq(vqs[i]); - goto error_find; - } - } - return 0; - -error_find: - vp_del_vqs(vdev); - -error_request: - return err; + return virtio_pci_del_vqs(vdev, vp_dev->legacy + + VIRTIO_MSI_LEGACY_CONFIG_VECTOR, + del_legacy_vq); } /* the config->find_vqs() implementation */ @@ -455,20 +262,12 @@ static int vp_find_vqs(struct virtio_dev vq_callback_t *callbacks[], const char *names[]) { - int err; + struct virtio_pci_device *vp_dev = to_vp_device(vdev); - /* Try MSI-X with one vector per queue. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); - if (!err) - return 0; - /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - true, false); - if (!err) - return 0; - /* Finally fall back to regular interrupts. */ - return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, - false, false); + return virtio_pci_find_vqs(vdev, vp_dev->legacy + + VIRTIO_MSI_LEGACY_CONFIG_VECTOR, + setup_legacy_vq, del_legacy_vq, + nvqs, vqs, callbacks, names); } static struct virtio_config_ops virtio_pci_config_ops = {
Our helpers can take a virtio_pci_device, rather than converting from a virtio_device all the time. They couldn't do this when they were called from the common virtio code, but now we wrap them anyway, it simplifies things. --- drivers/virtio/virtio_pci-common.c | 54 ++++++++++++++++--------------------- drivers/virtio/virtio_pci-common.h | 6 ++-- drivers/virtio/virtio_pci.c | 10 +++--- drivers/virtio/virtio_pci_legacy.c | 9 ++---- 4 files changed, 36 insertions(+), 43 deletions(-) diff --git a/drivers/virtio/virtio_pci-common.c b/drivers/virtio/virtio_pci-common.c --- a/drivers/virtio/virtio_pci-common.c +++ b/drivers/virtio/virtio_pci-common.c @@ -81,10 +81,9 @@ irqreturn_t virtio_pci_interrupt(int irq return virtio_pci_vring_interrupt(irq, opaque); } -static void vp_free_vectors(struct virtio_device *vdev, +static void vp_free_vectors(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; if (vp_dev->intx_enabled) { @@ -113,11 +112,10 @@ static void vp_free_vectors(struct virti vp_dev->msix_entries = NULL; } -static int vp_request_msix_vectors(struct virtio_device *vdev, +static int vp_request_msix_vectors(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config, int nvectors, bool per_vq_vectors) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); unsigned i, v; int err = -ENOMEM; @@ -176,30 +174,28 @@ static int vp_request_msix_vectors(struc } return 0; error: - vp_free_vectors(vdev, msix_config); + vp_free_vectors(vp_dev, msix_config); return err; } -static int vp_request_intx(struct virtio_device *vdev) +static int vp_request_intx(struct virtio_pci_device *vp_dev) { int err; - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt, - IRQF_SHARED, dev_name(&vdev->dev), vp_dev); + IRQF_SHARED, dev_name(&vp_dev->vdev.dev), vp_dev); if (!err) vp_dev->intx_enabled = 1; return err; } -static int vp_try_to_find_vqs(struct virtio_device *vdev, +static int vp_try_to_find_vqs(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config, - struct virtqueue *(setup_vq)(struct virtio_device*, - unsigned, - void (*)(struct - virtqueue *), - const char *, - u16 msix_vec), + struct virtqueue *(setup_vq) + (struct virtio_pci_device *, + unsigned, + void (*)(struct virtqueue *), + const char *, + u16 msix_vec), void (*del_vq)(struct virtqueue *vq), unsigned nvqs, struct virtqueue *vqs[], @@ -208,13 +204,12 @@ static int vp_try_to_find_vqs(struct vir bool use_msix, bool per_vq_vectors) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); u16 msix_vec; int i, err, nvectors, allocated_vectors; if (!use_msix) { /* Old style: one normal interrupt for change and all vqs. */ - err = vp_request_intx(vdev); + err = vp_request_intx(vp_dev); if (err) goto error_request; } else { @@ -229,7 +224,7 @@ static int vp_try_to_find_vqs(struct vir nvectors = 2; } - err = vp_request_msix_vectors(vdev, msix_config, + err = vp_request_msix_vectors(vp_dev, msix_config, nvectors, per_vq_vectors); if (err) goto error_request; @@ -244,7 +239,7 @@ static int vp_try_to_find_vqs(struct vir msix_vec = allocated_vectors++; else msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec); + vqs[i] = setup_vq(vp_dev, i, callbacks[i], names[i], msix_vec); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; @@ -270,16 +265,16 @@ static int vp_try_to_find_vqs(struct vir return 0; error_find: - virtio_pci_del_vqs(vdev, msix_config, del_vq); + virtio_pci_del_vqs(vp_dev, msix_config, del_vq); error_request: return err; } /* the config->find_vqs() implementation */ -int virtio_pci_find_vqs(struct virtio_device *vdev, +int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config, - struct virtqueue *(setup_vq)(struct virtio_device *, + struct virtqueue *(setup_vq)(struct virtio_pci_device *, unsigned, void (*)(struct virtqueue*), const char *, @@ -293,30 +288,29 @@ int virtio_pci_find_vqs(struct virtio_de int err; /* Try MSI-X with one vector per queue. */ - err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq, + err = vp_try_to_find_vqs(vp_dev, msix_config, setup_vq, del_vq, nvqs, vqs, callbacks, names, true, true); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq, + err = vp_try_to_find_vqs(vp_dev, msix_config, setup_vq, del_vq, nvqs, vqs, callbacks, names, true, false); if (!err) return 0; /* Finally fall back to regular interrupts. */ - return vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq, + return vp_try_to_find_vqs(vp_dev, msix_config, setup_vq, del_vq, nvqs, vqs, callbacks, names, false, false); } /* the core of a config->del_vqs() implementation */ -void virtio_pci_del_vqs(struct virtio_device *vdev, +void virtio_pci_del_vqs(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config, void (*del_vq)(struct virtqueue *vq)) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq, *n; struct virtio_pci_vq_info *info; - list_for_each_entry_safe(vq, n, &vdev->vqs, list) { + list_for_each_entry_safe(vq, n, &vp_dev->vdev.vqs, list) { info = vq->priv; if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR) @@ -326,5 +320,5 @@ void virtio_pci_del_vqs(struct virtio_de } vp_dev->per_vq_vectors = false; - vp_free_vectors(vdev, msix_config); + vp_free_vectors(vp_dev, msix_config); } diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h --- a/drivers/virtio/virtio_pci-common.h +++ b/drivers/virtio/virtio_pci-common.h @@ -87,9 +87,9 @@ irqreturn_t virtio_pci_vring_interrupt(i irqreturn_t virtio_pci_interrupt(int irq, void *opaque); /* Core of a config->find_vqs() implementation */ -int virtio_pci_find_vqs(struct virtio_device *vdev, +int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config, - struct virtqueue *(setup_vq)(struct virtio_device *, + struct virtqueue *(setup_vq)(struct virtio_pci_device *, unsigned, void (*)(struct virtqueue*), const char *, @@ -101,7 +101,7 @@ int virtio_pci_find_vqs(struct virtio_de const char *names[]); /* the core of a config->del_vqs() implementation */ -void virtio_pci_del_vqs(struct virtio_device *vdev, +void virtio_pci_del_vqs(struct virtio_pci_device *vp_dev, __le16 __iomem *msix_config, void (*del_vq)(struct virtqueue *vq)); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -163,12 +163,12 @@ static void *alloc_virtqueue_pages(u16 * return NULL; } -static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index, +static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, + unsigned index, void (*callback)(struct virtqueue *vq), const char *name, u16 msix_vec) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq; u16 num; @@ -213,7 +213,7 @@ static struct virtqueue *setup_vq(struct /* create the vring */ vq = vring_new_virtqueue(info->num, VIRTIO_PCI_ALIGN, - vdev, info->queue, virtio_pci_notify, + &vp_dev->vdev, info->queue, virtio_pci_notify, callback, name); if (!vq) { err = -ENOMEM; @@ -294,7 +294,7 @@ static void vp_del_vq(struct virtqueue * static void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - virtio_pci_del_vqs(vdev, &vp_dev->common->msix_config, vp_del_vq); + virtio_pci_del_vqs(vp_dev, &vp_dev->common->msix_config, vp_del_vq); } static int vp_find_vqs(struct virtio_device *vdev, @@ -304,7 +304,7 @@ static int vp_find_vqs(struct virtio_dev const char *names[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return virtio_pci_find_vqs(vdev, &vp_dev->common->msix_config, + return virtio_pci_find_vqs(vp_dev, &vp_dev->common->msix_config, setup_vq, vp_del_vq, nvqs, vqs, callbacks, names); } diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -134,13 +134,12 @@ static void vp_reset(struct virtio_devic vp_synchronize_vectors(vdev); } -static struct virtqueue *setup_legacy_vq(struct virtio_device *vdev, +static struct virtqueue *setup_legacy_vq(struct virtio_pci_device *vp_dev, unsigned index, void (*callback)(struct virtqueue *vq), const char *name, u16 msix_vec) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq; unsigned long flags, size; @@ -178,7 +177,7 @@ static struct virtqueue *setup_legacy_vq /* create the vring */ vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, - vdev, info->queue, virtio_pci_notify, + &vp_dev->vdev, info->queue, virtio_pci_notify, callback, name); if (!vq) { err = -ENOMEM; @@ -251,7 +250,7 @@ static void vp_del_vqs(struct virtio_dev { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return virtio_pci_del_vqs(vdev, vp_dev->legacy + + return virtio_pci_del_vqs(vp_dev, vp_dev->legacy + VIRTIO_MSI_LEGACY_CONFIG_VECTOR, del_legacy_vq); } @@ -264,7 +263,7 @@ static int vp_find_vqs(struct virtio_dev { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return virtio_pci_find_vqs(vdev, vp_dev->legacy + + return virtio_pci_find_vqs(vp_dev, vp_dev->legacy + VIRTIO_MSI_LEGACY_CONFIG_VECTOR, setup_legacy_vq, del_legacy_vq, nvqs, vqs, callbacks, names);
On Thu, 2011-12-08 at 20:52 +1030, Rusty Russell wrote:> Here's the patch series I ended up with. I haven't coded up the QEMU > side yet, so no idea if the new driver works. > > Questions: > (1) Do we win from separating ISR, NOTIFY and COMMON? > (2) I used a "u8 bar"; should I use a bir and pack it instead? BIR > seems a little obscure (noone else in the kernel source seems to > refer to it).I started implementing it for KVM tools, when I noticed a strange thing: my vq creating was failing because the driver was reading a value other than 0 from the address field of a new vq, and failing. I've added simple prints in the usermode code, and saw the following ordering: 1. queue select vq 0 2. queue read address (returns 0 - new vq) 3. queue write address (good address of vq) 4. queue read address (returns !=0, fails) 4. queue select vq 1
On Thu, Dec 08, 2011 at 09:02:46PM +1030, Rusty Russell wrote:> From: Michael S Tsirkin <mst at redhat.com> > > Virtio drivers should map the part of the range they need, not necessarily > all of it. > > Signed-off-by: Michael S. Tsirkin <mst at redhat.com> > Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>I think that we should add a forcenocache flag. This will let devices put the cap structure in the prefetcheable BAR. That has an advantage that it can be located anywhere in the 2^64 space, while non-prefetcheable BARs are limited to lower 4G for devices behind a PCI-to-PCI bridge.> --- > include/asm-generic/io.h | 4 ++++ > include/asm-generic/iomap.h | 11 +++++++++++ > lib/iomap.c | 41 ++++++++++++++++++++++++++++++++++++----- > 3 files changed, 51 insertions(+), 5 deletions(-) > > diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h > index 9120887..3cf1787 100644 > --- a/include/asm-generic/io.h > +++ b/include/asm-generic/io.h > @@ -286,6 +286,10 @@ static inline void writesb(const void __iomem *addr, const void *buf, int len) > /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ > struct pci_dev; > extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); > +extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, > + unsigned offset, > + unsigned long minlen, > + unsigned long maxlen); > static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) > { > } > diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h > index 98dcd76..6f192d4 100644 > --- a/include/asm-generic/iomap.h > +++ b/include/asm-generic/iomap.h > @@ -70,8 +70,19 @@ extern void ioport_unmap(void __iomem *); > /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ > struct pci_dev; > extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); > +extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, > + unsigned offset, > + unsigned long minlen, > + unsigned long maxlen); > extern void pci_iounmap(struct pci_dev *dev, void __iomem *); > #else > +static inline void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, > + unsigned offset, > + unsigned long minlen, > + unsigned long maxlen) > +{ > + return NULL; > +} > struct pci_dev; > static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) > { > diff --git a/lib/iomap.c b/lib/iomap.c > index 5dbcb4b..93ae915 100644 > --- a/lib/iomap.c > +++ b/lib/iomap.c > @@ -243,26 +243,37 @@ EXPORT_SYMBOL(ioport_unmap); > > #ifdef CONFIG_PCI > /** > - * pci_iomap - create a virtual mapping cookie for a PCI BAR > + * pci_iomap_range - create a virtual mapping cookie for a PCI BAR > * @dev: PCI device that owns the BAR > * @bar: BAR number > - * @maxlen: length of the memory to map > + * @offset: map memory at the given offset in BAR > + * @minlen: min length of the memory to map > + * @maxlen: max length of the memory to map > * > * Using this function you will get a __iomem address to your device BAR. > * You can access it using ioread*() and iowrite*(). These functions hide > * the details if this is a MMIO or PIO address space and will just do what > * you expect from them in the correct way. > * > + * @minlen specifies the minimum length to map. We check that BAR is > + * large enough. > * @maxlen specifies the maximum length to map. If you want to get access to > - * the complete BAR without checking for its length first, pass %0 here. > + * the complete BAR from offset to the end, pass %0 here. > * */ > -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) > +void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, > + unsigned offset, > + unsigned long minlen, > + unsigned long maxlen) > { > resource_size_t start = pci_resource_start(dev, bar); > resource_size_t len = pci_resource_len(dev, bar); > unsigned long flags = pci_resource_flags(dev, bar); > > - if (!len || !start) > + if (len <= offset || !start) > + return NULL; > + len -= offset; > + start += offset; > + if (len < minlen) > return NULL; > if (maxlen && len > maxlen) > len = maxlen; > @@ -277,10 +288,30 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) > return NULL; > } > > +/** > + * pci_iomap - create a virtual mapping cookie for a PCI BAR > + * @dev: PCI device that owns the BAR > + * @bar: BAR number > + * @maxlen: length of the memory to map > + * > + * Using this function you will get a __iomem address to your device BAR. > + * You can access it using ioread*() and iowrite*(). These functions hide > + * the details if this is a MMIO or PIO address space and will just do what > + * you expect from them in the correct way. > + * > + * @maxlen specifies the maximum length to map. If you want to get access to > + * the complete BAR without checking for its length first, pass %0 here. > + * */ > +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) > +{ > + return pci_iomap_range(dev, bar, 0, 0, maxlen); > +} > + > void pci_iounmap(struct pci_dev *dev, void __iomem * addr) > { > IO_COND(addr, /* nothing */, iounmap(addr)); > } > EXPORT_SYMBOL(pci_iomap); > +EXPORT_SYMBOL(pci_iomap_range); > EXPORT_SYMBOL(pci_iounmap); > #endif /* CONFIG_PCI */