Hi, This patchset allows vhost to be used with legacy virtio when guest and host have a different endianness. Patch 7 got rewritten according to Cornelia's and Michael's comments. I have also introduced patch 8 that brings BE vnet headers support to tun/macvtap. This series is enough to have vhost_net working flawlessly. I could succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64 and ppc64le hosts. --- Greg Kurz (8): virtio: introduce virtio_is_little_endian() helper tun: add tun_is_little_endian() helper macvtap: introduce macvtap_is_little_endian() helper vringh: introduce vringh_is_little_endian() helper vhost: introduce vhost_is_little_endian() helper virtio: add explicit big-endian support to memory accessors vhost: feature to set the vring endianness macvtap/tun: add VNET_BE flag drivers/net/Kconfig | 12 ++++++ drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++- drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++- drivers/vhost/Kconfig | 10 +++++ drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++ drivers/vhost/vhost.h | 25 ++++++++++--- include/linux/virtio_byteorder.h | 24 +++++++----- include/linux/virtio_config.h | 19 +++++++--- include/linux/vringh.h | 19 +++++++--- include/uapi/linux/if_tun.h | 2 + include/uapi/linux/vhost.h | 9 +++++ 11 files changed, 303 insertions(+), 33 deletions(-) -- Greg
Greg Kurz
2015-Apr-10 10:15 UTC
[PATCH v4 1/8] virtio: introduce virtio_is_little_endian() helper
Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- include/linux/virtio_config.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ca3ed78..bd1a582 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -205,35 +205,40 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) return 0; } +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); +} + /* Memory accessors */ static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) { - return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) { - return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(virtio_is_little_endian(vdev), val); } static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) { - return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) { - return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(virtio_is_little_endian(vdev), val); } static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) { - return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { - return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } /* Config space accessors. */
Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- drivers/net/tun.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 857dca4..3c3d6c0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -206,14 +206,19 @@ struct tun_struct { u32 flow_count; }; +static inline bool tun_is_little_endian(struct tun_struct *tun) +{ + return tun->flags & TUN_VNET_LE; +} + static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) { - return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val); + return __virtio16_to_cpu(tun_is_little_endian(tun), val); } static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) { - return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val); + return __cpu_to_virtio16(tun_is_little_endian(tun), val); } static inline u32 tun_hashfn(u32 rxhash)
Greg Kurz
2015-Apr-10 10:15 UTC
[PATCH v4 3/8] macvtap: introduce macvtap_is_little_endian() helper
Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- drivers/net/macvtap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 27ecc5c..a2f2958 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -49,14 +49,19 @@ struct macvtap_queue { #define MACVTAP_VNET_LE 0x80000000 +static inline bool macvtap_is_little_endian(struct macvtap_queue *q) +{ + return q->flags & MACVTAP_VNET_LE; +} + static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) { - return __virtio16_to_cpu(q->flags & MACVTAP_VNET_LE, val); + return __virtio16_to_cpu(macvtap_is_little_endian(q), val); } static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val) { - return __cpu_to_virtio16(q->flags & MACVTAP_VNET_LE, val); + return __cpu_to_virtio16(macvtap_is_little_endian(q), val); } static struct proto macvtap_proto = {
Greg Kurz
2015-Apr-10 10:15 UTC
[PATCH v4 4/8] vringh: introduce vringh_is_little_endian() helper
Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- include/linux/vringh.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/vringh.h b/include/linux/vringh.h index a3fa537..3ed62ef 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -226,33 +226,38 @@ static inline void vringh_notify(struct vringh *vrh) vrh->notify(vrh); } +static inline bool vringh_is_little_endian(const struct vringh *vrh) +{ + return vrh->little_endian; +} + static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) { - return __virtio16_to_cpu(vrh->little_endian, val); + return __virtio16_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val) { - return __cpu_to_virtio16(vrh->little_endian, val); + return __cpu_to_virtio16(vringh_is_little_endian(vrh), val); } static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val) { - return __virtio32_to_cpu(vrh->little_endian, val); + return __virtio32_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val) { - return __cpu_to_virtio32(vrh->little_endian, val); + return __cpu_to_virtio32(vringh_is_little_endian(vrh), val); } static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val) { - return __virtio64_to_cpu(vrh->little_endian, val); + return __virtio64_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) { - return __cpu_to_virtio64(vrh->little_endian, val); + return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } #endif /* _LINUX_VRINGH_H */
Greg Kurz
2015-Apr-10 10:16 UTC
[PATCH v4 5/8] vhost: introduce vhost_is_little_endian() helper
Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- drivers/vhost/vhost.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 8c1c792..6a49960 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -173,34 +173,39 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) return vq->acked_features & (1ULL << bit); } +static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) +{ + return vhost_has_feature(vq, VIRTIO_F_VERSION_1); +} + /* Memory accessors */ static inline u16 vhost16_to_cpu(struct vhost_virtqueue *vq, __virtio16 val) { - return __virtio16_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio16 cpu_to_vhost16(struct vhost_virtqueue *vq, u16 val) { - return __cpu_to_virtio16(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(vhost_is_little_endian(vq), val); } static inline u32 vhost32_to_cpu(struct vhost_virtqueue *vq, __virtio32 val) { - return __virtio32_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio32 cpu_to_vhost32(struct vhost_virtqueue *vq, u32 val) { - return __cpu_to_virtio32(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(vhost_is_little_endian(vq), val); } static inline u64 vhost64_to_cpu(struct vhost_virtqueue *vq, __virtio64 val) { - return __virtio64_to_cpu(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(vhost_is_little_endian(vq), val); } static inline __virtio64 cpu_to_vhost64(struct vhost_virtqueue *vq, u64 val) { - return __cpu_to_virtio64(vhost_has_feature(vq, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(vhost_is_little_endian(vq), val); } #endif
Greg Kurz
2015-Apr-10 10:16 UTC
[PATCH v4 6/8] virtio: add explicit big-endian support to memory accessors
The current memory accessors logic is: - little endian if little_endian - native endian (i.e. no byteswap) if !little_endian If we want to fully support cross-endian vhost, we also need to be able to convert to big endian. Instead of changing the little_endian argument to some 3-value enum, this patch changes the logic to: - little endian if little_endian - big endian if !little_endian The native endian case is handled by all users with a trivial helper. This patch doesn't change any functionality, nor it does add overhead. Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- drivers/net/macvtap.c | 4 +++- drivers/net/tun.c | 4 +++- drivers/vhost/vhost.h | 4 +++- include/linux/virtio_byteorder.h | 24 ++++++++++++++---------- include/linux/virtio_config.h | 4 +++- include/linux/vringh.h | 4 +++- 6 files changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a2f2958..0a03a66 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -51,7 +51,9 @@ struct macvtap_queue { static inline bool macvtap_is_little_endian(struct macvtap_queue *q) { - return q->flags & MACVTAP_VNET_LE; + if (q->flags & MACVTAP_VNET_LE) + return true; + return virtio_legacy_is_little_endian(); } static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3c3d6c0..053f9b6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -208,7 +208,9 @@ struct tun_struct { static inline bool tun_is_little_endian(struct tun_struct *tun) { - return tun->flags & TUN_VNET_LE; + if (tun->flags & TUN_VNET_LE) + return true; + return virtio_legacy_is_little_endian(); } static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 6a49960..4e9a186 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -175,7 +175,9 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) { - return vhost_has_feature(vq, VIRTIO_F_VERSION_1); + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + return true; + return virtio_legacy_is_little_endian(); } /* Memory accessors */ diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h index 51865d0..ce63a2c 100644 --- a/include/linux/virtio_byteorder.h +++ b/include/linux/virtio_byteorder.h @@ -3,17 +3,21 @@ #include <linux/types.h> #include <uapi/linux/virtio_types.h> -/* - * Low-level memory accessors for handling virtio in modern little endian and in - * compatibility native endian format. - */ +static inline bool virtio_legacy_is_little_endian(void) +{ +#ifdef __LITTLE_ENDIAN + return true; +#else + return false; +#endif +} static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val) { if (little_endian) return le16_to_cpu((__force __le16)val); else - return (__force u16)val; + return be16_to_cpu((__force __be16)val); } static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) if (little_endian) return (__force __virtio16)cpu_to_le16(val); else - return (__force __virtio16)val; + return (__force __virtio16)cpu_to_be16(val); } static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) if (little_endian) return le32_to_cpu((__force __le32)val); else - return (__force u32)val; + return be32_to_cpu((__force __be32)val); } static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) if (little_endian) return (__force __virtio32)cpu_to_le32(val); else - return (__force __virtio32)val; + return (__force __virtio32)cpu_to_be32(val); } static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) if (little_endian) return le64_to_cpu((__force __le64)val); else - return (__force u64)val; + return be64_to_cpu((__force __be64)val); } static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) if (little_endian) return (__force __virtio64)cpu_to_le64(val); else - return (__force __virtio64)val; + return (__force __virtio64)cpu_to_be64(val); } #endif /* _LINUX_VIRTIO_BYTEORDER */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index bd1a582..36a6daa 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -207,7 +207,9 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) static inline bool virtio_is_little_endian(struct virtio_device *vdev) { - return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); + if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) + return true; + return virtio_legacy_is_little_endian(); } /* Memory accessors */ diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 3ed62ef..d786c2d 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -228,7 +228,9 @@ static inline void vringh_notify(struct vringh *vrh) static inline bool vringh_is_little_endian(const struct vringh *vrh) { - return vrh->little_endian; + if (vrh->little_endian) + return true; + return virtio_legacy_is_little_endian(); } static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
This patch brings cross-endian support to vhost when used to implement legacy virtio devices. Since it is a relatively rare situation, the feature availability is controlled by a kernel config option (not set by default). The vq->is_le boolean field is added to cache the endianness to be used for ring accesses. It defaults to native endian, as expected by legacy virtio devices. When the ring gets active, we force little endian if the device is modern. When the ring is deactivated, we revert to the native endian default. If cross-endian was compiled in, a vq->user_be boolean field is added so that userspace may request a specific endianness. This field is used to override the default when activating the ring of a legacy device. It has no effect on modern devices. Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- drivers/vhost/Kconfig | 10 ++++++ drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++- drivers/vhost/vhost.h | 12 +++++-- include/uapi/linux/vhost.h | 9 +++++ 4 files changed, 103 insertions(+), 4 deletions(-) Changes since v3: - VHOST_SET_VRING_ENDIAN_LEGACY ioctl renamed to VHOST_SET_VRING_BIG_ENDIAN - ioctl API is now: 0 for le, 1 for be, other values are EINVAL - ioctl doesn't filter out modern devices - ioctl stubs return ENOIOCTLCMD - forbid endianness changes when vring is active - logic now handled with vq->is_le and vq->user_be according to device start/stop as suggested by Michael diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 017a1e8..0aec88c 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -32,3 +32,13 @@ config VHOST ---help--- This option is selected by any driver which needs to access the core of vhost. + +config VHOST_SET_ENDIAN_LEGACY + bool "Cross-endian support for host kernel accelerator" + default n + ---help--- + This option allows vhost to support guests with a different byte + ordering from host. It is disabled by default since it adds overhead + and it is only needed by a few platforms (powerpc and arm). + + If unsure, say "N". diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2ee2826..3eb756b 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -199,6 +199,10 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call = NULL; vq->log_ctx = NULL; vq->memory = NULL; + vq->is_le = virtio_legacy_is_little_endian(); +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY + vq->user_be = !vq->is_le; +#endif } static int vhost_worker(void *data) @@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return 0; } +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq, + int __user *argp) +{ + struct vhost_vring_state s; + + if (vq->private_data) + return -EBUSY; + + if (copy_from_user(&s, argp, sizeof(s))) + return -EFAULT; + + if (s.num && s.num != 1) + return -EINVAL; + + vq->user_be = s.num; + + return 0; +} + +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx, + int __user *argp) +{ + struct vhost_vring_state s = { + .index = idx, + .num = vq->user_be + }; + + if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + + return 0; +} +#else +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq, + int __user *argp) +{ + return -ENOIOCTLCMD; +} + +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx, + int __user *argp) +{ + return -ENOIOCTLCMD; +} +#endif /* CONFIG_VHOST_SET_ENDIAN_LEGACY */ + long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL; @@ -806,6 +857,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) } else filep = eventfp; break; + case VHOST_SET_VRING_BIG_ENDIAN: + r = vhost_set_vring_big_endian(vq, argp); + break; + case VHOST_GET_VRING_BIG_ENDIAN: + r = vhost_get_vring_big_endian(vq, idx, argp); + break; default: r = -ENOIOCTLCMD; } @@ -1040,12 +1097,29 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) return 0; } +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; +} +#else +static void vhost_init_is_le(struct vhost_virtqueue *vq) +{ + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) + vq->is_le = true; +} +#endif + int vhost_init_used(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; - if (!vq->private_data) + if (!vq->private_data) { + vq->is_le = virtio_legacy_is_little_endian(); return 0; + } + + vhost_init_is_le(vq); r = vhost_update_used_flags(vq); if (r) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 4e9a186..04b2add 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -106,6 +106,14 @@ struct vhost_virtqueue { /* Log write descriptors */ void __user *log_base; struct vhost_log *log; + + /* Ring endianness. Defaults to legacy native endianness. + * Set to true when starting a modern virtio device. */ + bool is_le; +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY + /* Ring endianness requested by userspace for cross-endian support. */ + bool user_be; +#endif }; struct vhost_dev { @@ -175,9 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) { - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) - return true; - return virtio_legacy_is_little_endian(); + return vq->is_le; } /* Memory accessors */ diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bb6a5b4..5cdebbc 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -103,6 +103,15 @@ struct vhost_memory { /* Get accessor: reads index, writes value in num */ #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Set the vring byte order in num. This is a legacy only API that is simply + * ignored when VIRTIO_F_VERSION_1 is set. + * 0 to set to little-endian + * 1 to set to big-endian + * other values return EINVAL. + */ +#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) +#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) + /* The following ioctls use eventfd file descriptors to signal and poll * for events. */
The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers that are always little-endian. It can also be used to handle the special case of a legacy little-endian device implemented by a big-endian host. Let's add a flag and ioctls for big-endian devices as well. If both flags are set, little-endian wins. Since this is isn't a common usecase, the feature is controlled by a kernel config option (not set by default). Both macvtap and tun are covered by this patch since they share the same API with userland. Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> --- drivers/net/Kconfig | 12 ++++++++ drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++- drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++- include/uapi/linux/if_tun.h | 2 + 4 files changed, 134 insertions(+), 2 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index df51d60..f0e23a0 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -244,6 +244,18 @@ config TUN If you don't know what to use this for, you don't need it. +config TUN_VNET_BE + bool "Support for big-endian vnet headers" + default n + ---help--- + This option allows TUN/TAP and MACVTAP device drivers to parse + vnet headers that are in big-endian byte order. It is useful + when the headers come from a big-endian legacy virtio driver and + the host is little-endian. + + Unless you have a little-endian system hosting a big-endian virtual + machine with a virtio NIC, you should say N. + config VETH tristate "Virtual ethernet pair device" ---help--- diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0a03a66..e0ab1b7 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -48,12 +48,27 @@ struct macvtap_queue { #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) #define MACVTAP_VNET_LE 0x80000000 +#define MACVTAP_VNET_BE 0x40000000 + +#ifdef CONFIG_TUN_VNET_BE +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + if (q->flags & MACVTAP_VNET_BE) + return false; + return virtio_legacy_is_little_endian(); +} +#else +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + return virtio_legacy_is_little_endian(); +} +#endif static inline bool macvtap_is_little_endian(struct macvtap_queue *q) { if (q->flags & MACVTAP_VNET_LE) return true; - return virtio_legacy_is_little_endian(); + return macvtap_legacy_is_little_endian(q); } static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) @@ -1000,6 +1015,43 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) return 0; } +#ifdef CONFIG_TUN_VNET_BE +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s = !!(q->flags & MACVTAP_VNET_BE); + + if (put_user(s, sp)) + return -EFAULT; + + return 0; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s; + + if (get_user(s, sp)) + return -EFAULT; + + if (s) + q->flags |= MACVTAP_VNET_BE; + else + q->flags &= ~MACVTAP_VNET_BE; + + return 0; +} +#else +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_BE */ + /* * provide compatibility with generic tun/tap interface */ @@ -1097,6 +1149,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, q->flags &= ~MACVTAP_VNET_LE; return 0; + case TUNGETVNETBE: + return macvtap_get_vnet_be(q, sp); + + case TUNSETVNETBE: + return macvtap_set_vnet_be(q, sp); + case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 053f9b6..4e12488 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -111,6 +111,7 @@ do { \ #define TUN_FASYNC IFF_ATTACH_QUEUE /* High bits in flags field are unused. */ #define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE) @@ -206,11 +207,25 @@ struct tun_struct { u32 flow_count; }; +#ifdef CONFIG_TUN_VNET_BE +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + if (tun->flags & TUN_VNET_BE) + return false; + return virtio_legacy_is_little_endian(); +} +#else +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +{ + return virtio_legacy_is_little_endian(); +} +#endif + static inline bool tun_is_little_endian(struct tun_struct *tun) { if (tun->flags & TUN_VNET_LE) return true; - return virtio_legacy_is_little_endian(); + return tun_legacy_is_little_endian(tun); } static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) @@ -1836,6 +1851,43 @@ unlock: return ret; } +#ifdef CONFIG_TUN_VNET_BE +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be = !!(tun->flags & TUN_VNET_BE); + + if (put_user(be, argp)) + return EFAULT; + + return 0; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + int be; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + tun->flags |= TUN_VNET_BE; + else + tun->flags &= ~TUN_VNET_BE; + + return 0; +} +#else +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} + +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_BE */ + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@ -2065,6 +2117,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun->flags &= ~TUN_VNET_LE; break; + case TUNGETVNETBE: + ret = tun_get_vnet_be(tun, argp); + break; + + case TUNSETVNETBE: + ret = tun_set_vnet_be(tun, argp); + break; + case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 50ae243..bcac4c0 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -50,6 +50,8 @@ #define TUNGETFILTER _IOR('T', 219, struct sock_fprog) #define TUNSETVNETLE _IOW('T', 220, int) #define TUNGETVNETLE _IOR('T', 221, int) +#define TUNSETVNETBE _IOW('T', 222, int) +#define TUNGETVNETBE _IOR('T', 223, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001
Cornelia Huck
2015-Apr-14 14:20 UTC
[PATCH v4 7/8] vhost: feature to set the vring endianness
On Fri, 10 Apr 2015 12:19:16 +0200 Greg Kurz <gkurz at linux.vnet.ibm.com> wrote:> This patch brings cross-endian support to vhost when used to implement > legacy virtio devices. Since it is a relatively rare situation, the > feature availability is controlled by a kernel config option (not set > by default). > > The vq->is_le boolean field is added to cache the endianness to be > used for ring accesses. It defaults to native endian, as expected > by legacy virtio devices. When the ring gets active, we force little > endian if the device is modern. When the ring is deactivated, we > revert to the native endian default. > > If cross-endian was compiled in, a vq->user_be boolean field is added > so that userspace may request a specific endianness. This field is > used to override the default when activating the ring of a legacy > device. It has no effect on modern devices. > > Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> > --- > drivers/vhost/Kconfig | 10 ++++++ > drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++- > drivers/vhost/vhost.h | 12 +++++-- > include/uapi/linux/vhost.h | 9 +++++ > 4 files changed, 103 insertions(+), 4 deletions(-)> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq, > + int __user *argp) > +{ > + struct vhost_vring_state s; > + > + if (vq->private_data) > + return -EBUSY; > + > + if (copy_from_user(&s, argp, sizeof(s))) > + return -EFAULT; > + > + if (s.num && s.num != 1)Checking for s.num > 1 might be more obvious at a glance?> + return -EINVAL; > + > + vq->user_be = s.num; > + > + return 0; > +} > +(...)> +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY > +static void vhost_init_is_le(struct vhost_virtqueue *vq) > +{ > + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;So if the endianness is not explicitly set to BE, it will be forced to LE (instead of native endian)? Won't that break userspace that does not yet use the new interface when CONFIG_VHOST_SET_ENDIAN_LEGACY is set?> +} > +#else > +static void vhost_init_is_le(struct vhost_virtqueue *vq) > +{ > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) > + vq->is_le = true; > +} > +#endif > + > int vhost_init_used(struct vhost_virtqueue *vq) > { > __virtio16 last_used_idx; > int r; > - if (!vq->private_data) > + if (!vq->private_data) { > + vq->is_le = virtio_legacy_is_little_endian(); > return 0; > + } > + > + vhost_init_is_le(vq); > > r = vhost_update_used_flags(vq); > if (r)
On Fri, 10 Apr 2015 12:15:00 +0200 Greg Kurz <gkurz at linux.vnet.ibm.com> wrote:> Hi, > > This patchset allows vhost to be used with legacy virtio when guest and host > have a different endianness. > > Patch 7 got rewritten according to Cornelia's and Michael's comments. I have > also introduced patch 8 that brings BE vnet headers support to tun/macvtap. > > This series is enough to have vhost_net working flawlessly. I could > succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64 > and ppc64le hosts. >Michael, I am ready to post v5 with the changes suggested by Cornelia... Do you have any extra comments on this v4 ? -- Greg> --- > > Greg Kurz (8): > virtio: introduce virtio_is_little_endian() helper > tun: add tun_is_little_endian() helper > macvtap: introduce macvtap_is_little_endian() helper > vringh: introduce vringh_is_little_endian() helper > vhost: introduce vhost_is_little_endian() helper > virtio: add explicit big-endian support to memory accessors > vhost: feature to set the vring endianness > macvtap/tun: add VNET_BE flag > > > drivers/net/Kconfig | 12 ++++++ > drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++- > drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++- > drivers/vhost/Kconfig | 10 +++++ > drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++ > drivers/vhost/vhost.h | 25 ++++++++++--- > include/linux/virtio_byteorder.h | 24 +++++++----- > include/linux/virtio_config.h | 19 +++++++--- > include/linux/vringh.h | 19 +++++++--- > include/uapi/linux/if_tun.h | 2 + > include/uapi/linux/vhost.h | 9 +++++ > 11 files changed, 303 insertions(+), 33 deletions(-) > > -- > Greg > > _______________________________________________ > Virtualization mailing list > Virtualization at lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/virtualization >
On Fri, 17 Apr 2015 11:18:13 +0200 Greg Kurz <gkurz at linux.vnet.ibm.com> wrote:> On Fri, 10 Apr 2015 12:15:00 +0200 > Greg Kurz <gkurz at linux.vnet.ibm.com> wrote: > > > Hi, > > > > This patchset allows vhost to be used with legacy virtio when guest and host > > have a different endianness. > > > > Patch 7 got rewritten according to Cornelia's and Michael's comments. I have > > also introduced patch 8 that brings BE vnet headers support to tun/macvtap. > > > > This series is enough to have vhost_net working flawlessly. I could > > succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64 > > and ppc64le hosts. > > > > Michael, > > I am ready to post v5 with the changes suggested by Cornelia... Do you have any > extra comments on this v4 ? >Ping ?> -- > Greg > > > --- > > > > Greg Kurz (8): > > virtio: introduce virtio_is_little_endian() helper > > tun: add tun_is_little_endian() helper > > macvtap: introduce macvtap_is_little_endian() helper > > vringh: introduce vringh_is_little_endian() helper > > vhost: introduce vhost_is_little_endian() helper > > virtio: add explicit big-endian support to memory accessors > > vhost: feature to set the vring endianness > > macvtap/tun: add VNET_BE flag > > > > > > drivers/net/Kconfig | 12 ++++++ > > drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++- > > drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++- > > drivers/vhost/Kconfig | 10 +++++ > > drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++ > > drivers/vhost/vhost.h | 25 ++++++++++--- > > include/linux/virtio_byteorder.h | 24 +++++++----- > > include/linux/virtio_config.h | 19 +++++++--- > > include/linux/vringh.h | 19 +++++++--- > > include/uapi/linux/if_tun.h | 2 + > > include/uapi/linux/vhost.h | 9 +++++ > > 11 files changed, 303 insertions(+), 33 deletions(-) > > > > -- > > Greg > > > > _______________________________________________ > > Virtualization mailing list > > Virtualization at lists.linux-foundation.org > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization > > >
Michael S. Tsirkin
2015-Apr-21 14:04 UTC
[PATCH v4 7/8] vhost: feature to set the vring endianness
On Fri, Apr 10, 2015 at 12:19:16PM +0200, Greg Kurz wrote:> This patch brings cross-endian support to vhost when used to implement > legacy virtio devices. Since it is a relatively rare situation, the > feature availability is controlled by a kernel config option (not set > by default). > > The vq->is_le boolean field is added to cache the endianness to be > used for ring accesses. It defaults to native endian, as expected > by legacy virtio devices. When the ring gets active, we force little > endian if the device is modern. When the ring is deactivated, we > revert to the native endian default. > > If cross-endian was compiled in, a vq->user_be boolean field is added > so that userspace may request a specific endianness. This field is > used to override the default when activating the ring of a legacy > device. It has no effect on modern devices. > > Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> > --- > drivers/vhost/Kconfig | 10 ++++++ > drivers/vhost/vhost.c | 76 +++++++++++++++++++++++++++++++++++++++++++- > drivers/vhost/vhost.h | 12 +++++-- > include/uapi/linux/vhost.h | 9 +++++ > 4 files changed, 103 insertions(+), 4 deletions(-) > > Changes since v3: > - VHOST_SET_VRING_ENDIAN_LEGACY ioctl renamed to VHOST_SET_VRING_BIG_ENDIAN > - ioctl API is now: 0 for le, 1 for be, other values are EINVAL > - ioctl doesn't filter out modern devices > - ioctl stubs return ENOIOCTLCMD > - forbid endianness changes when vring is active > - logic now handled with vq->is_le and vq->user_be according to device > start/stop as suggested by Michael > > diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig > index 017a1e8..0aec88c 100644 > --- a/drivers/vhost/Kconfig > +++ b/drivers/vhost/Kconfig > @@ -32,3 +32,13 @@ config VHOST > ---help--- > This option is selected by any driver which needs to access > the core of vhost. > + > +config VHOST_SET_ENDIAN_LEGACYI'd prefer namin this VHOST_CROSS_ENDIAN_LEGACY> + bool "Cross-endian support for host kernel accelerator" > + default n > + ---help--- > + This option allows vhost to support guests with a different byte > + ordering from host. It is disabled by default since it adds overhead > + and it is only needed by a few platforms (powerpc and arm).and is only useful on a few platforms (powerpc and arm). "it" seems to refer to "overhead", which is rarely needed. needed is a bit too strong, you can always e.g. run virtio in userspace.> + > + If unsure, say "N". > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 2ee2826..3eb756b 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -199,6 +199,10 @@ static void vhost_vq_reset(struct vhost_dev *dev, > vq->call = NULL; > vq->log_ctx = NULL; > vq->memory = NULL; > + vq->is_le = virtio_legacy_is_little_endian(); > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY > + vq->user_be = !vq->is_le; > +#endifadd a wrapper for this too?> } > > static int vhost_worker(void *data) > @@ -630,6 +634,53 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) > return 0; > } > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq, > + int __user *argp) > +{ > + struct vhost_vring_state s; > + > + if (vq->private_data) > + return -EBUSY; > + > + if (copy_from_user(&s, argp, sizeof(s))) > + return -EFAULT; > + > + if (s.num && s.num != 1)s.num & ~0x1> + return -EINVAL; > + > + vq->user_be = s.num; > + > + return 0; > +} > + > +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx, > + int __user *argp) > +{ > + struct vhost_vring_state s = { > + .index = idx, > + .num = vq->user_be > + }; > + > + if (copy_to_user(argp, &s, sizeof(s))) > + return -EFAULT; > + > + return 0; > +} > +#else > +static long vhost_set_vring_big_endian(struct vhost_virtqueue *vq, > + int __user *argp) > +{ > + return -ENOIOCTLCMD; > +} > + > +static long vhost_get_vring_big_endian(struct vhost_virtqueue *vq, u32 idx, > + int __user *argp) > +{ > + return -ENOIOCTLCMD; > +} > +#endif /* CONFIG_VHOST_SET_ENDIAN_LEGACY */ > + > long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) > { > struct file *eventfp, *filep = NULL; > @@ -806,6 +857,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) > } else > filep = eventfp; > break; > + case VHOST_SET_VRING_BIG_ENDIAN: > + r = vhost_set_vring_big_endian(vq, argp); > + break; > + case VHOST_GET_VRING_BIG_ENDIAN: > + r = vhost_get_vring_big_endian(vq, idx, argp); > + break; > default: > r = -ENOIOCTLCMD; > } > @@ -1040,12 +1097,29 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) > return 0; > } > > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY > +static void vhost_init_is_le(struct vhost_virtqueue *vq) > +{ > + vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; > +} > +#else > +static void vhost_init_is_le(struct vhost_virtqueue *vq) > +{ > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) > + vq->is_le = true; > +} > +#endif > +I'd prefer localizing ifdefery somewhere near top of file.> int vhost_init_used(struct vhost_virtqueue *vq) > { > __virtio16 last_used_idx; > int r; > - if (!vq->private_data) > + if (!vq->private_data) { > + vq->is_le = virtio_legacy_is_little_endian(); > return 0; > + } > + > + vhost_init_is_le(vq); > > r = vhost_update_used_flags(vq); > if (r) > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 4e9a186..04b2add 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -106,6 +106,14 @@ struct vhost_virtqueue { > /* Log write descriptors */ > void __user *log_base; > struct vhost_log *log; > + > + /* Ring endianness. Defaults to legacy native endianness. > + * Set to true when starting a modern virtio device. */ > + bool is_le; > +#ifdef CONFIG_VHOST_SET_ENDIAN_LEGACY > + /* Ring endianness requested by userspace for cross-endian support. */ > + bool user_be; > +#endif > }; > > struct vhost_dev { > @@ -175,9 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) > > static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) > { > - if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) > - return true; > - return virtio_legacy_is_little_endian(); > + return vq->is_le; > } > > /* Memory accessors */ > diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h > index bb6a5b4..5cdebbc 100644 > --- a/include/uapi/linux/vhost.h > +++ b/include/uapi/linux/vhost.h > @@ -103,6 +103,15 @@ struct vhost_memory { > /* Get accessor: reads index, writes value in num */ > #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) > > +/* Set the vring byte order in num. This is a legacy only API that is simply > + * ignored when VIRTIO_F_VERSION_1 is set. > + * 0 to set to little-endian > + * 1 to set to big-endianHow about defines for these?> + * other values return EINVAL. > + */ > +#define VHOST_SET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) > +#define VHOST_GET_VRING_BIG_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) > + > /* The following ioctls use eventfd file descriptors to signal and poll > * for events. */ >
On Fri, Apr 10, 2015 at 12:20:21PM +0200, Greg Kurz wrote:> The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers > that are always little-endian. It can also be used to handle the special > case of a legacy little-endian device implemented by a big-endian host. > > Let's add a flag and ioctls for big-endian devices as well. If both flags > are set, little-endian wins. > > Since this is isn't a common usecase, the feature is controlled by a kernel > config option (not set by default). > > Both macvtap and tun are covered by this patch since they share the same > API with userland. > > Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> > --- > drivers/net/Kconfig | 12 ++++++++ > drivers/net/macvtap.c | 60 +++++++++++++++++++++++++++++++++++++++++- > drivers/net/tun.c | 62 ++++++++++++++++++++++++++++++++++++++++++- > include/uapi/linux/if_tun.h | 2 + > 4 files changed, 134 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig > index df51d60..f0e23a0 100644 > --- a/drivers/net/Kconfig > +++ b/drivers/net/Kconfig > @@ -244,6 +244,18 @@ config TUN > > If you don't know what to use this for, you don't need it. > > +config TUN_VNET_BE > + bool "Support for big-endian vnet headers" > + default n > + ---help--- > + This option allows TUN/TAP and MACVTAP device drivers to parse > + vnet headers that are in big-endian byte order. It is useful > + when the headers come from a big-endian legacy virtio driver and > + the host is little-endian. > + > + Unless you have a little-endian system hosting a big-endian virtual > + machine with a virtio NIC, you should say N. > +should mention cross-endian, not big-endian, right?> config VETH > tristate "Virtual ethernet pair device" > ---help--- > diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c > index 0a03a66..e0ab1b7 100644 > --- a/drivers/net/macvtap.c > +++ b/drivers/net/macvtap.c > @@ -48,12 +48,27 @@ struct macvtap_queue { > #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) > > #define MACVTAP_VNET_LE 0x80000000 > +#define MACVTAP_VNET_BE 0x40000000 > + > +#ifdef CONFIG_TUN_VNET_BE > +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) > +{ > + if (q->flags & MACVTAP_VNET_BE) > + return false; > + return virtio_legacy_is_little_endian(); > +} > +#else > +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) > +{ > + return virtio_legacy_is_little_endian(); > +} > +#endif > > static inline bool macvtap_is_little_endian(struct macvtap_queue *q) > { > if (q->flags & MACVTAP_VNET_LE) > return true; > - return virtio_legacy_is_little_endian(); > + return macvtap_legacy_is_little_endian(q); > } > > static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) > @@ -1000,6 +1015,43 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) > return 0; > } > > +#ifdef CONFIG_TUN_VNET_BE > +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) > +{ > + int s = !!(q->flags & MACVTAP_VNET_BE); > + > + if (put_user(s, sp)) > + return -EFAULT; > + > + return 0; > +} > + > +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) > +{ > + int s; > + > + if (get_user(s, sp)) > + return -EFAULT; > + > + if (s) > + q->flags |= MACVTAP_VNET_BE; > + else > + q->flags &= ~MACVTAP_VNET_BE; > + > + return 0; > +} > +#else > +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp) > +{ > + return -EINVAL; > +} > + > +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp) > +{ > + return -EINVAL; > +} > +#endif /* CONFIG_TUN_VNET_BE */ > + > /* > * provide compatibility with generic tun/tap interface > */ > @@ -1097,6 +1149,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, > q->flags &= ~MACVTAP_VNET_LE; > return 0; > > + case TUNGETVNETBE: > + return macvtap_get_vnet_be(q, sp); > + > + case TUNSETVNETBE: > + return macvtap_set_vnet_be(q, sp); > + > case TUNSETOFFLOAD: > /* let the user check for future flags */ > if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 053f9b6..4e12488 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -111,6 +111,7 @@ do { \ > #define TUN_FASYNC IFF_ATTACH_QUEUE > /* High bits in flags field are unused. */ > #define TUN_VNET_LE 0x80000000 > +#define TUN_VNET_BE 0x40000000 > > #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ > IFF_MULTI_QUEUE) > @@ -206,11 +207,25 @@ struct tun_struct { > u32 flow_count; > }; > > +#ifdef CONFIG_TUN_VNET_BE > +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) > +{ > + if (tun->flags & TUN_VNET_BE) > + return false; > + return virtio_legacy_is_little_endian(); > +} > +#else > +static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) > +{ > + return virtio_legacy_is_little_endian(); > +} > +#endif > + > static inline bool tun_is_little_endian(struct tun_struct *tun) > { > if (tun->flags & TUN_VNET_LE) > return true; > - return virtio_legacy_is_little_endian(); > + return tun_legacy_is_little_endian(tun); > } > > static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) > @@ -1836,6 +1851,43 @@ unlock: > return ret; > } > > +#ifdef CONFIG_TUN_VNET_BE > +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) > +{ > + int be = !!(tun->flags & TUN_VNET_BE); > + > + if (put_user(be, argp)) > + return EFAULT; > + > + return 0; > +} > + > +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) > +{ > + int be; > + > + if (get_user(be, argp)) > + return -EFAULT; > + > + if (be) > + tun->flags |= TUN_VNET_BE; > + else > + tun->flags &= ~TUN_VNET_BE; > + > + return 0; > +} > +#else > +static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) > +{ > + return -EINVAL; > +} > + > +static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) > +{ > + return -EINVAL; > +} > +#endif /* CONFIG_TUN_VNET_BE */ > + > static long __tun_chr_ioctl(struct file *file, unsigned int cmd, > unsigned long arg, int ifreq_len) > { > @@ -2065,6 +2117,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, > tun->flags &= ~TUN_VNET_LE; > break; > > + case TUNGETVNETBE: > + ret = tun_get_vnet_be(tun, argp); > + break; > + > + case TUNSETVNETBE: > + ret = tun_set_vnet_be(tun, argp); > + break; > + > case TUNATTACHFILTER: > /* Can be set only for TAPs */ > ret = -EINVAL; > diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h > index 50ae243..bcac4c0 100644 > --- a/include/uapi/linux/if_tun.h > +++ b/include/uapi/linux/if_tun.h > @@ -50,6 +50,8 @@ > #define TUNGETFILTER _IOR('T', 219, struct sock_fprog) > #define TUNSETVNETLE _IOW('T', 220, int) > #define TUNGETVNETLE _IOR('T', 221, int) > +#define TUNSETVNETBE _IOW('T', 222, int) > +#define TUNGETVNETBE _IOR('T', 223, int) > > /* TUNSETIFF ifr flags */ > #define IFF_TUN 0x0001
Michael S. Tsirkin
2015-Apr-21 14:09 UTC
[PATCH v4 6/8] virtio: add explicit big-endian support to memory accessors
On Fri, Apr 10, 2015 at 12:16:20PM +0200, Greg Kurz wrote:> The current memory accessors logic is: > - little endian if little_endian > - native endian (i.e. no byteswap) if !little_endian > > If we want to fully support cross-endian vhost, we also need to be > able to convert to big endian. > > Instead of changing the little_endian argument to some 3-value enum, this > patch changes the logic to: > - little endian if little_endian > - big endian if !little_endian > > The native endian case is handled by all users with a trivial helper. This > patch doesn't change any functionality, nor it does add overhead. > > Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com> > ---OK overall. Style comment:> drivers/net/macvtap.c | 4 +++- > drivers/net/tun.c | 4 +++- > drivers/vhost/vhost.h | 4 +++- > include/linux/virtio_byteorder.h | 24 ++++++++++++++---------- > include/linux/virtio_config.h | 4 +++- > include/linux/vringh.h | 4 +++- > 6 files changed, 29 insertions(+), 15 deletions(-) > > diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c > index a2f2958..0a03a66 100644 > --- a/drivers/net/macvtap.c > +++ b/drivers/net/macvtap.c > @@ -51,7 +51,9 @@ struct macvtap_queue { > > static inline bool macvtap_is_little_endian(struct macvtap_queue *q) > { > - return q->flags & MACVTAP_VNET_LE; > + if (q->flags & MACVTAP_VNET_LE) > + return true; > + return virtio_legacy_is_little_endian(); > } >I'd prefer a bit more symmetry: + if (q->flags & MACVTAP_VNET_LE) + return true; + else + return virtio_legacy_is_little_endian(); Or better just: return (q->flags & MACVTAP_VNET_LE) ? true : virtio_legacy_is_little_endian(); might make line long, but your follow-up patch makes it short again, so that's ok.> static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 3c3d6c0..053f9b6 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -208,7 +208,9 @@ struct tun_struct { > > static inline bool tun_is_little_endian(struct tun_struct *tun) > { > - return tun->flags & TUN_VNET_LE; > + if (tun->flags & TUN_VNET_LE) > + return true; > + return virtio_legacy_is_little_endian(); > } > > static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 6a49960..4e9a186 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -175,7 +175,9 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) > > static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq) > { > - return vhost_has_feature(vq, VIRTIO_F_VERSION_1); > + if (vhost_has_feature(vq, VIRTIO_F_VERSION_1)) > + return true; > + return virtio_legacy_is_little_endian(); > } > > /* Memory accessors */ > diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h > index 51865d0..ce63a2c 100644 > --- a/include/linux/virtio_byteorder.h > +++ b/include/linux/virtio_byteorder.h > @@ -3,17 +3,21 @@ > #include <linux/types.h> > #include <uapi/linux/virtio_types.h> > > -/* > - * Low-level memory accessors for handling virtio in modern little endian and in > - * compatibility native endian format. > - */ > +static inline bool virtio_legacy_is_little_endian(void) > +{ > +#ifdef __LITTLE_ENDIAN > + return true; > +#else > + return false; > +#endif > +} > > static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val) > { > if (little_endian) > return le16_to_cpu((__force __le16)val); > else > - return (__force u16)val; > + return be16_to_cpu((__force __be16)val); > } > > static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) > @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) > if (little_endian) > return (__force __virtio16)cpu_to_le16(val); > else > - return (__force __virtio16)val; > + return (__force __virtio16)cpu_to_be16(val); > } > > static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) > @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) > if (little_endian) > return le32_to_cpu((__force __le32)val); > else > - return (__force u32)val; > + return be32_to_cpu((__force __be32)val); > } > > static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) > @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) > if (little_endian) > return (__force __virtio32)cpu_to_le32(val); > else > - return (__force __virtio32)val; > + return (__force __virtio32)cpu_to_be32(val); > } > > static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) > @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) > if (little_endian) > return le64_to_cpu((__force __le64)val); > else > - return (__force u64)val; > + return be64_to_cpu((__force __be64)val); > } > > static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) > @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) > if (little_endian) > return (__force __virtio64)cpu_to_le64(val); > else > - return (__force __virtio64)val; > + return (__force __virtio64)cpu_to_be64(val); > } > > #endif /* _LINUX_VIRTIO_BYTEORDER */ > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h > index bd1a582..36a6daa 100644 > --- a/include/linux/virtio_config.h > +++ b/include/linux/virtio_config.h > @@ -207,7 +207,9 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) > > static inline bool virtio_is_little_endian(struct virtio_device *vdev) > { > - return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); > + if (virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) > + return true; > + return virtio_legacy_is_little_endian(); > } > > /* Memory accessors */ > diff --git a/include/linux/vringh.h b/include/linux/vringh.h > index 3ed62ef..d786c2d 100644 > --- a/include/linux/vringh.h > +++ b/include/linux/vringh.h > @@ -228,7 +228,9 @@ static inline void vringh_notify(struct vringh *vrh) > > static inline bool vringh_is_little_endian(const struct vringh *vrh) > { > - return vrh->little_endian; > + if (vrh->little_endian) > + return true; > + return virtio_legacy_is_little_endian(); > } > > static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
Michael S. Tsirkin
2015-Apr-21 14:10 UTC
[PATCH v4 0/8] vhost: support for cross endian guests
On Fri, Apr 10, 2015 at 12:15:00PM +0200, Greg Kurz wrote:> Hi, > > This patchset allows vhost to be used with legacy virtio when guest and host > have a different endianness. > > Patch 7 got rewritten according to Cornelia's and Michael's comments. I have > also introduced patch 8 that brings BE vnet headers support to tun/macvtap. > > This series is enough to have vhost_net working flawlessly. I could > succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64 > and ppc64le hosts.Looks good overall. A couple of style comments. Thanks!> --- > > Greg Kurz (8): > virtio: introduce virtio_is_little_endian() helper > tun: add tun_is_little_endian() helper > macvtap: introduce macvtap_is_little_endian() helper > vringh: introduce vringh_is_little_endian() helper > vhost: introduce vhost_is_little_endian() helper > virtio: add explicit big-endian support to memory accessors > vhost: feature to set the vring endianness > macvtap/tun: add VNET_BE flag > > > drivers/net/Kconfig | 12 ++++++ > drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++- > drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++- > drivers/vhost/Kconfig | 10 +++++ > drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++ > drivers/vhost/vhost.h | 25 ++++++++++--- > include/linux/virtio_byteorder.h | 24 +++++++----- > include/linux/virtio_config.h | 19 +++++++--- > include/linux/vringh.h | 19 +++++++--- > include/uapi/linux/if_tun.h | 2 + > include/uapi/linux/vhost.h | 9 +++++ > 11 files changed, 303 insertions(+), 33 deletions(-) > > -- > Greg
On Tue, 21 Apr 2015 16:10:18 +0200 "Michael S. Tsirkin" <mst at redhat.com> wrote:> On Fri, Apr 10, 2015 at 12:15:00PM +0200, Greg Kurz wrote: > > Hi, > > > > This patchset allows vhost to be used with legacy virtio when guest and host > > have a different endianness. > > > > Patch 7 got rewritten according to Cornelia's and Michael's comments. I have > > also introduced patch 8 that brings BE vnet headers support to tun/macvtap. > > > > This series is enough to have vhost_net working flawlessly. I could > > succesfully reboot guests from ppc64 to ppc64le and vice-versa on ppc64 > > and ppc64le hosts. > > Looks good overall. > A couple of style comments. > > Thanks! >Thanks for your time Michael.> > --- > > > > Greg Kurz (8): > > virtio: introduce virtio_is_little_endian() helper > > tun: add tun_is_little_endian() helper > > macvtap: introduce macvtap_is_little_endian() helper > > vringh: introduce vringh_is_little_endian() helper > > vhost: introduce vhost_is_little_endian() helper > > virtio: add explicit big-endian support to memory accessors > > vhost: feature to set the vring endianness > > macvtap/tun: add VNET_BE flag > > > > > > drivers/net/Kconfig | 12 ++++++ > > drivers/net/macvtap.c | 69 ++++++++++++++++++++++++++++++++++- > > drivers/net/tun.c | 71 +++++++++++++++++++++++++++++++++++- > > drivers/vhost/Kconfig | 10 +++++ > > drivers/vhost/vhost.c | 76 ++++++++++++++++++++++++++++++++++++++ > > drivers/vhost/vhost.h | 25 ++++++++++--- > > include/linux/virtio_byteorder.h | 24 +++++++----- > > include/linux/virtio_config.h | 19 +++++++--- > > include/linux/vringh.h | 19 +++++++--- > > include/uapi/linux/if_tun.h | 2 + > > include/uapi/linux/vhost.h | 9 +++++ > > 11 files changed, 303 insertions(+), 33 deletions(-) > > > > -- > > Greg >
Apparently Analagous Threads
- [PATCH v4 0/8] vhost: support for cross endian guests
- [PATCH v5 0/8] vhost: support for cross endian guests
- [PATCH v5 0/8] vhost: support for cross endian guests
- [PATCH v6 0/8] vhost: support for cross endian guests
- [PATCH v6 0/8] vhost: support for cross endian guests