On Fri, Aug 03, 2012 at 11:24:20PM +0100, Jean Guyader
wrote:> This is a Linux driver for the V4V inter VM communication system.
>
> I''ve posted the V4V Xen patches for comments, to find more info
about
> V4V you can check out this link.
> http://osdir.com/ml/general/2012-08/msg05904.html
>
> This linux driver exposes two char devices one for TCP one for UDP.
> The interface exposed to userspace are made of IOCTLs, one per
> network operation (listen, bind, accept, send, recv, ...).
I haven''t had a chance to take a look at this and won''t until
next
week. But just a couple of quick questions:
- Is there a test application for this? If so where can I get it
- Is there any code in the Xen repository that uses it.
- Who are the users?
- Why .. TCP and UDP ? Does that mean it masquarades as an Ethernet
device? Why the choice of using a char device?
Thx.>
> Signed-off-by: Jean Guyader <jean.guyader@citrix.com>
> ---
> drivers/xen/Kconfig | 4 +
> drivers/xen/Makefile | 1 +
> drivers/xen/v4v.c | 2639
+++++++++++++++++++++++++++++++++++++++++++
> drivers/xen/v4v_utils.h | 278 +++++
> include/xen/interface/v4v.h | 299 +++++
> include/xen/interface/xen.h | 1 +
> include/xen/v4vdev.h | 34 +
> 7 files changed, 3256 insertions(+)
> create mode 100644 drivers/xen/v4v.c
> create mode 100644 drivers/xen/v4v_utils.h
> create mode 100644 include/xen/interface/v4v.h
> create mode 100644 include/xen/v4vdev.h
>
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 8d2501e..db500cc 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -196,4 +196,8 @@ config XEN_ACPI_PROCESSOR
> called xen_acpi_processor If you do not know what to choose, select
> M here. If the CPUFREQ drivers are built in, select Y here.
>
> +config XEN_V4V
> + tristate "Xen V4V driver"
> + default m
> +
> endmenu
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index fc34886..a3d3014 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -21,6 +21,7 @@ obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
> obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
> obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
> obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
> +obj-$(CONFIG_XEN_V4V) += v4v.o
> xen-evtchn-y := evtchn.o
> xen-gntdev-y := gntdev.o
> xen-gntalloc-y := gntalloc.o
> diff --git a/drivers/xen/v4v.c b/drivers/xen/v4v.c
> new file mode 100644
> index 0000000..141be66
> --- /dev/null
> +++ b/drivers/xen/v4v.c
> @@ -0,0 +1,2639 @@
>
+/******************************************************************************
> + * drivers/xen/v4v/v4v.c
> + *
> + * V4V interdomain communication driver.
> + *
> + * Copyright (c) 2012 Jean Guyader
> + * Copyright (c) 2009 Ross Philipson
> + * Copyright (c) 2009 James McKenzie
> + * Copyright (c) 2009 Citrix Systems, Inc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
copy
> + * of this source file (the "Software"), to deal in the Software
without
> + * restriction, including without limitation the rights to use, copy,
modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the
Software,
> + * and to permit persons to whom the Software is furnished to do so,
subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included
in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/vmalloc.h>
> +#include <linux/interrupt.h>
> +#include <linux/spinlock.h>
> +#include <linux/list.h>
> +#include <linux/socket.h>
> +#include <linux/sched.h>
> +#include <xen/events.h>
> +#include <xen/evtchn.h>
> +#include <xen/page.h>
> +#include <xen/xen.h>
> +#include <linux/fs.h>
> +#include <linux/platform_device.h>
> +#include <linux/miscdevice.h>
> +#include <linux/major.h>
> +#include <linux/proc_fs.h>
> +#include <linux/poll.h>
> +#include <linux/random.h>
> +#include <linux/wait.h>
> +#include <linux/file.h>
> +#include <linux/mount.h>
> +
> +#include <xen/interface/v4v.h>
> +#include <xen/v4vdev.h>
> +#include "v4v_utils.h"
> +
> +#define DEFAULT_RING_SIZE \
> + (V4V_ROUNDUP((((PAGE_SIZE)*32) - sizeof(v4v_ring_t)-V4V_ROUNDUP(1))))
> +
> +/* The type of a ring*/
> +typedef enum {
> + V4V_RTYPE_IDLE = 0,
> + V4V_RTYPE_DGRAM,
> + V4V_RTYPE_LISTENER,
> + V4V_RTYPE_CONNECTOR,
> +} v4v_rtype;
> +
> +/* the state of a v4V_private*/
> +typedef enum {
> + V4V_STATE_IDLE = 0,
> + V4V_STATE_BOUND,
> + V4V_STATE_LISTENING,
> + V4V_STATE_ACCEPTED,
> + V4V_STATE_CONNECTING,
> + V4V_STATE_CONNECTED,
> + V4V_STATE_DISCONNECTED
> +} v4v_state;
> +
> +typedef enum {
> + V4V_PTYPE_DGRAM = 1,
> + V4V_PTYPE_STREAM,
> +} v4v_ptype;
> +
> +static rwlock_t list_lock;
> +static struct list_head ring_list;
> +
> +struct v4v_private;
> +
> +/*
> + * Ring pointer itself is protected by the refcnt the lists its in by
list_lock.
> + *
> + * It''s permittable to decrement the refcnt whilst holding the
read lock, and then
> + * clean up refcnt=0 rings later.
> + *
> + * If a ring has refcnt!=0 we expect ->ring to be non NULL, and for the
ring to
> + * be registered with Xen.
> + */
> +
> +struct ring {
> + struct list_head node;
> + atomic_t refcnt;
> +
> + spinlock_t lock; /* Protects the data in the v4v_ring_t
also privates and sponsor */
> +
> + struct list_head privates; /* Protected by lock */
> + struct v4v_private *sponsor; /* Protected by lock */
> +
> + v4v_rtype type;
> +
> + /* Ring */
> + v4v_ring_t *ring;
> + v4v_pfn_t *pfn_list;
> + size_t pfn_list_npages;
> + int order;
> +};
> +
> +struct v4v_private {
> + struct list_head node;
> + v4v_state state;
> + v4v_ptype ptype;
> + uint32_t desired_ring_size;
> + struct ring *r;
> + wait_queue_head_t readq;
> + wait_queue_head_t writeq;
> + v4v_addr_t peer;
> + uint32_t conid;
> + spinlock_t pending_recv_lock; /* Protects pending messages, and
pending_error */
> + struct list_head pending_recv_list; /* For LISTENER contains
only ... */
> + atomic_t pending_recv_count;
> + int pending_error;
> + int full;
> + int send_blocked;
> + int rx;
> +};
> +
> +struct pending_recv {
> + struct list_head node;
> + v4v_addr_t from;
> + size_t data_len, data_ptr;
> + struct v4v_stream_header sh;
> + uint8_t data[0];
> +} V4V_PACKED;
> +
> +static spinlock_t interrupt_lock;
> +static spinlock_t pending_xmit_lock;
> +static struct list_head pending_xmit_list;
> +static atomic_t pending_xmit_count;
> +
> +enum v4v_pending_xmit_type {
> + V4V_PENDING_XMIT_INLINE = 1, /* Send the inline xmit */
> + V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR, /* Wake up writeq of
sponsor of the ringid from */
> + V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES, /* Wake up writeq of a
private of ringid from with conid */
> +};
> +
> +struct pending_xmit {
> + struct list_head node;
> + enum v4v_pending_xmit_type type;
> + uint32_t conid;
> + struct v4v_ring_id from;
> + v4v_addr_t to;
> + size_t len;
> + uint32_t protocol;
> + uint8_t data[0];
> +};
> +
> +#define MAX_PENDING_RECVS 16
> +
> +/* Hypercalls */
> +
> +static inline int __must_check
> +HYPERVISOR_v4v_op(int cmd, void *arg1, void *arg2,
> + uint32_t arg3, uint32_t arg4)
> +{
> + return _hypercall5(int, v4v_op, cmd, arg1, arg2, arg3, arg4);
> +}
> +
> +static int v4v_info(v4v_info_t *info)
> +{
> + (void)(*(volatile int*)info);
> + return HYPERVISOR_v4v_op (V4VOP_info, info, NULL, 0, 0);
> +}
> +
> +static int H_v4v_register_ring(v4v_ring_t * r, v4v_pfn_t * l, size_t
npages)
> +{
> + (void)(*(volatile int *)r);
> + return HYPERVISOR_v4v_op(V4VOP_register_ring, r, l, npages, 0);
> +}
> +
> +static int H_v4v_unregister_ring(v4v_ring_t * r)
> +{
> + (void)(*(volatile int *)r);
> + return HYPERVISOR_v4v_op(V4VOP_unregister_ring, r, NULL, 0, 0);
> +}
> +
> +static int
> +H_v4v_send(v4v_addr_t * s, v4v_addr_t * d, const void *buf, uint32_t len,
> + uint32_t protocol)
> +{
> + v4v_send_addr_t addr;
> + addr.src = *s;
> + addr.dst = *d;
> + return HYPERVISOR_v4v_op(V4VOP_send, &addr, (void *)buf, len,
protocol);
> +}
> +
> +static int
> +H_v4v_sendv(v4v_addr_t * s, v4v_addr_t * d, const v4v_iov_t * iovs,
> + uint32_t niov, uint32_t protocol)
> +{
> + v4v_send_addr_t addr;
> + addr.src = *s;
> + addr.dst = *d;
> + return HYPERVISOR_v4v_op(V4VOP_sendv, &addr, (void *)iovs,
niov,
> + protocol);
> +}
> +
> +static int H_v4v_notify(v4v_ring_data_t * rd)
> +{
> + return HYPERVISOR_v4v_op(V4VOP_notify, rd, NULL, 0, 0);
> +}
> +
> +static int H_v4v_viptables_add(v4v_viptables_rule_t * rule, int position)
> +{
> + return HYPERVISOR_v4v_op(V4VOP_viptables_add, rule, NULL,
> + position, 0);
> +}
> +
> +static int H_v4v_viptables_del(v4v_viptables_rule_t * rule, int position)
> +{
> + return HYPERVISOR_v4v_op(V4VOP_viptables_del, rule, NULL,
> + position, 0);
> +}
> +
> +static int H_v4v_viptables_list(struct v4v_viptables_list *list)
> +{
> + return HYPERVISOR_v4v_op(V4VOP_viptables_list, list, NULL, 0, 0);
> +}
> +
> +/* Port/Ring uniqueness */
> +
> +/* Need to hold write lock for all of these */
> +
> +static int v4v_id_in_use(struct v4v_ring_id *id)
> +{
> + struct ring *r;
> +
> + list_for_each_entry(r, &ring_list, node) {
> + if ((r->ring->id.addr.port == id->addr.port)
> + && (r->ring->id.partner ==
id->partner))
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static int v4v_port_in_use(uint32_t port, uint32_t * max)
> +{
> + uint32_t ret = 0;
> + struct ring *r;
> +
> + list_for_each_entry(r, &ring_list, node) {
> + if (r->ring->id.addr.port == port)
> + ret++;
> + if (max && (r->ring->id.addr.port >
*max))
> + *max = r->ring->id.addr.port;
> + }
> +
> + return ret;
> +}
> +
> +static uint32_t v4v_random_port(void)
> +{
> + uint32_t port;
> +
> + port = random32();
> + port |= 0x80000000U;
> + if (port > 0xf0000000U) {
> + port -= 0x10000000;
> + }
> +
> + return port;
> +}
> +
> +/* Caller needs to hold lock */
> +static uint32_t v4v_find_spare_port_number(void)
> +{
> + uint32_t port, max = 0x80000000U;
> +
> + port = v4v_random_port();
> + if (!v4v_port_in_use(port, &max)) {
> + return port;
> + } else {
> + port = max + 1;
> + }
> +
> + return port;
> +}
> +
> +/* Ring Goo */
> +
> +static int register_ring(struct ring *r)
> +{
> + return H_v4v_register_ring((void *)r->ring,
> + r->pfn_list,
> + r->pfn_list_npages);
> +}
> +
> +static int unregister_ring(struct ring *r)
> +{
> + return H_v4v_unregister_ring((void *)r->ring);
> +}
> +
> +static void refresh_pfn_list(struct ring *r)
> +{
> + uint8_t *b = (void *)r->ring;
> + int i;
> +
> + for (i = 0; i < r->pfn_list_npages; ++i) {
> + r->pfn_list[i] = pfn_to_mfn(vmalloc_to_pfn(b));
> + b += PAGE_SIZE;
> + }
> +}
> +
> +static void allocate_pfn_list(struct ring *r)
> +{
> + int n = (r->ring->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> + int len = sizeof(v4v_pfn_t) * n;
> +
> + r->pfn_list = kmalloc(len, GFP_KERNEL);
> + if (!r->pfn_list)
> + return;
> + r->pfn_list_npages = n;
> +
> + refresh_pfn_list(r);
> +}
> +
> +static int allocate_ring(struct ring *r, int ring_len)
> +{
> + int len = ring_len + sizeof(v4v_ring_t);
> + int ret = 0;
> +
> + if (ring_len != V4V_ROUNDUP(ring_len)) {
> + ret = -EINVAL;
> + goto fail;
> + }
> +
> + r->ring = NULL;
> + r->pfn_list = NULL;
> + r->order = 0;
> +
> + r->order = get_order(len);
> +
> + r->ring = vmalloc(len);
> +
> + if (!r->ring) {
> + ret = -ENOMEM;
> + goto fail;
> + }
> +
> + memset((void *)r->ring, 0, len);
> +
> + r->ring->magic = V4V_RING_MAGIC;
> + r->ring->len = ring_len;
> + r->ring->rx_ptr = r->ring->tx_ptr = 0;
> +
> + memset((void *)r->ring->ring, 0x5a, ring_len);
> +
> + allocate_pfn_list(r);
> + if (!r->pfn_list) {
> +
> + ret = -ENOMEM;
> + goto fail;
> + }
> +
> + return 0;
> + fail:
> + if (r->ring)
> + vfree(r->ring);
> + if (r->pfn_list)
> + kfree(r->pfn_list);
> +
> + r->ring = NULL;
> + r->pfn_list = NULL;
> +
> + return ret;
> +}
> +
> +/* Caller must hold lock */
> +static void recover_ring(struct ring *r)
> +{
> + /* It''s all gone horribly wrong */
> + r->ring->rx_ptr = r->ring->tx_ptr;
> + /* Xen updates tx_ptr atomically to always be pointing somewhere
sensible */
> +}
> +
> +/* Caller must hold no locks, ring is allocated with a refcnt of 1 */
> +static int new_ring(struct v4v_private *sponsor, struct v4v_ring_id *pid)
> +{
> + struct v4v_ring_id id = *pid;
> + struct ring *r;
> + int ret;
> + unsigned long flags;
> +
> + if (id.addr.domain != V4V_DOMID_NONE)
> + return -EINVAL;
> +
> + r = kmalloc(sizeof(struct ring), GFP_KERNEL);
> + memset(r, 0, sizeof(struct ring));
> +
> + ret = allocate_ring(r, sponsor->desired_ring_size);
> + if (ret) {
> + kfree(r);
> + return ret;
> + }
> +
> + INIT_LIST_HEAD(&r->privates);
> + spin_lock_init(&r->lock);
> + atomic_set(&r->refcnt, 1);
> +
> + write_lock_irqsave(&list_lock, flags);
> + if (sponsor->state != V4V_STATE_IDLE) {
> + ret = -EINVAL;
> + goto fail;
> + }
> +
> + if (!id.addr.port) {
> + id.addr.port = v4v_find_spare_port_number();
> + } else if (v4v_id_in_use(&id)) {
> + ret = -EADDRINUSE;
> + goto fail;
> + }
> +
> + r->ring->id = id;
> + r->sponsor = sponsor;
> + sponsor->r = r;
> + sponsor->state = V4V_STATE_BOUND;
> +
> + ret = register_ring(r);
> + if (ret)
> + goto fail;
> +
> + list_add(&r->node, &ring_list);
> + write_unlock_irqrestore(&list_lock, flags);
> + return 0;
> +
> + fail:
> + write_unlock_irqrestore(&list_lock, flags);
> +
> + vfree(r->ring);
> + kfree(r->pfn_list);
> + kfree(r);
> +
> + sponsor->r = NULL;
> + sponsor->state = V4V_STATE_IDLE;
> +
> + return ret;
> +}
> +
> +/* Cleans up old rings */
> +static void delete_ring(struct ring *r)
> +{
> + int ret;
> +
> + list_del(&r->node);
> +
> + if ((ret = unregister_ring(r))) {
> + printk(KERN_ERR
> + "unregister_ring hypercall failed: %d. Leaking
ring.\n",
> + ret);
> + } else {
> + vfree(r->ring);
> + }
> +
> + kfree(r->pfn_list);
> + kfree(r);
> +}
> +
> +/* Returns !0 if you sucessfully got a reference to the ring */
> +static int get_ring(struct ring *r)
> +{
> + return atomic_add_unless(&r->refcnt, 1, 0);
> +}
> +
> +/* Must be called with DEBUG_WRITELOCK; v4v_write_lock */
> +static void put_ring(struct ring *r)
> +{
> + if (!r)
> + return;
> +
> + if (atomic_dec_and_test(&r->refcnt)) {
> + delete_ring(r);
> + }
> +}
> +
> +/* Caller must hold ring_lock */
> +static struct ring *find_ring_by_id(struct v4v_ring_id *id)
> +{
> + struct ring *r;
> +
> + list_for_each_entry(r, &ring_list, node) {
> + if (!memcmp
> + ((void *)&r->ring->id, id, sizeof(struct
v4v_ring_id)))
> + return r;
> + }
> + return NULL;
> +}
> +
> +/* Caller must hold ring_lock */
> +struct ring *find_ring_by_id_type(struct v4v_ring_id *id, v4v_rtype t)
> +{
> + struct ring *r;
> +
> + list_for_each_entry(r, &ring_list, node) {
> + if (r->type != t)
> + continue;
> + if (!memcmp
> + ((void *)&r->ring->id, id, sizeof(struct
v4v_ring_id)))
> + return r;
> + }
> +
> + return NULL;
> +}
> +
> +/* Pending xmits */
> +
> +/* Caller must hold pending_xmit_lock */
> +
> +static void
> +xmit_queue_wakeup_private(struct v4v_ring_id *from,
> + uint32_t conid, v4v_addr_t * to, int len, int
delete)
> +{
> + struct pending_xmit *p;
> +
> + list_for_each_entry(p, &pending_xmit_list, node) {
> + if (p->type != V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES)
> + continue;
> + if (p->conid != conid)
> + continue;
> +
> + if ((!memcmp(from, &p->from, sizeof(struct
v4v_ring_id)))
> + && (!memcmp(to, &p->to,
sizeof(v4v_addr_t)))) {
> + if (delete) {
> + atomic_dec(&pending_xmit_count);
> + list_del(&p->node);
> + } else {
> + p->len = len;
> + }
> + return;
> + }
> + }
> +
> + if (delete)
> + return;
> +
> + p = kmalloc(sizeof(struct pending_xmit), GFP_ATOMIC);
> + if (!p) {
> + printk(KERN_ERR
> + "Out of memory trying to queue an xmit sponsor
wakeup\n");
> + return;
> + }
> + p->type = V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES;
> + p->conid = conid;
> + p->from = *from;
> + p->to = *to;
> + p->len = len;
> +
> + atomic_inc(&pending_xmit_count);
> + list_add_tail(&p->node, &pending_xmit_list);
> +}
> +
> +/* Caller must hold pending_xmit_lock */
> +static void
> +xmit_queue_wakeup_sponsor(struct v4v_ring_id *from, v4v_addr_t * to,
> + int len, int delete)
> +{
> + struct pending_xmit *p;
> +
> + list_for_each_entry(p, &pending_xmit_list, node) {
> + if (p->type != V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR)
> + continue;
> + if ((!memcmp(from, &p->from, sizeof(struct
v4v_ring_id)))
> + && (!memcmp(to, &p->to,
sizeof(v4v_addr_t)))) {
> + if (delete) {
> + atomic_dec(&pending_xmit_count);
> + list_del(&p->node);
> + } else {
> + p->len = len;
> + }
> + return;
> + }
> + }
> +
> + if (delete)
> + return;
> +
> + p = kmalloc(sizeof(struct pending_xmit), GFP_ATOMIC);
> + if (!p) {
> + printk(KERN_ERR
> + "Out of memory trying to queue an xmit sponsor
wakeup\n");
> + return;
> + }
> + p->type = V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR;
> + p->from = *from;
> + p->to = *to;
> + p->len = len;
> + atomic_inc(&pending_xmit_count);
> + list_add_tail(&p->node, &pending_xmit_list);
> +}
> +
> +static int
> +xmit_queue_inline(struct v4v_ring_id *from, v4v_addr_t * to,
> + void *buf, size_t len, uint32_t protocol)
> +{
> + ssize_t ret;
> + unsigned long flags;
> + struct pending_xmit *p;
> +
> + spin_lock_irqsave(&pending_xmit_lock, flags);
> +
> + ret = H_v4v_send(&from->addr, to, buf, len, protocol);
> + if (ret != -EAGAIN) {
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> + return ret;
> + }
> +
> + p = kmalloc(sizeof(struct pending_xmit) + len, GFP_ATOMIC);
> + if (!p) {
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> + printk(KERN_ERR
> + "Out of memory trying to queue an xmit of %zu
bytes\n",
> + len);
> +
> + return -ENOMEM;
> + }
> +
> + p->type = V4V_PENDING_XMIT_INLINE;
> + p->from = *from;
> + p->to = *to;
> + p->len = len;
> + p->protocol = protocol;
> +
> + if (len)
> + memcpy(p->data, buf, len);
> +
> + list_add_tail(&p->node, &pending_xmit_list);
> + atomic_inc(&pending_xmit_count);
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> +
> + return len;
> +}
> +
> +static void
> +xmit_queue_rst_to(struct v4v_ring_id *from, uint32_t conid, v4v_addr_t *
to)
> +{
> + struct v4v_stream_header sh;
> +
> + if (!to)
> + return;
> +
> + sh.conid = conid;
> + sh.flags = V4V_SHF_RST;
> + xmit_queue_inline(from, to, &sh, sizeof(sh),
V4V_PROTO_STREAM);
> +}
> +
> +/* RX */
> +
> +static int
> +copy_into_pending_recv(struct ring *r, int len, struct v4v_private *p)
> +{
> + struct pending_recv *pending;
> + int k;
> +
> + /* Too much queued? Let the ring take the strain */
> + if (atomic_read(&p->pending_recv_count) >
MAX_PENDING_RECVS) {
> + spin_lock(&p->pending_recv_lock);
> + p->full = 1;
> + spin_unlock(&p->pending_recv_lock);
> +
> + return -1;
> + }
> +
> + pending > + kmalloc(sizeof(struct pending_recv) -
> + sizeof(struct v4v_stream_header) + len, GFP_ATOMIC);
> +
> + if (!pending)
> + return -1;
> +
> + pending->data_ptr = 0;
> + pending->data_len = len - sizeof(struct v4v_stream_header);
> +
> + k = v4v_copy_out(r->ring, &pending->from, NULL,
&pending->sh, len, 1);
> +
> + spin_lock(&p->pending_recv_lock);
> + list_add_tail(&pending->node,
&p->pending_recv_list);
> + atomic_inc(&p->pending_recv_count);
> + p->full = 0;
> + spin_unlock(&p->pending_recv_lock);
> +
> + return 0;
> +}
> +
> +/* Notify */
> +
> +/* Caller must hold list_lock */
> +static void
> +wakeup_privates(struct v4v_ring_id *id, v4v_addr_t * peer, uint32_t conid)
> +{
> + struct ring *r = find_ring_by_id_type(id, V4V_RTYPE_LISTENER);
> + struct v4v_private *p;
> +
> + if (!r)
> + return;
> +
> + list_for_each_entry(p, &r->privates, node) {
> + if ((p->conid == conid)
> + && !memcmp(peer, &p->peer,
sizeof(v4v_addr_t))) {
> + p->send_blocked = 0;
> + wake_up_interruptible_all(&p->writeq);
> + return;
> + }
> + }
> +}
> +
> +/* Caller must hold list_lock */
> +static void wakeup_sponsor(struct v4v_ring_id *id)
> +{
> + struct ring *r = find_ring_by_id(id);
> +
> + if (!r)
> + return;
> +
> + if (!r->sponsor)
> + return;
> +
> + r->sponsor->send_blocked = 0;
> + wake_up_interruptible_all(&r->sponsor->writeq);
> +}
> +
> +static void v4v_null_notify(void)
> +{
> + H_v4v_notify(NULL);
> +}
> +
> +/* Caller must hold list_lock */
> +static void v4v_notify(void)
> +{
> + unsigned long flags;
> + int ret;
> + int nent;
> + struct pending_xmit *p, *n;
> + v4v_ring_data_t *d;
> + int i = 0;
> +
> + spin_lock_irqsave(&pending_xmit_lock, flags);
> +
> + nent = atomic_read(&pending_xmit_count);
> + d = kmalloc(sizeof(v4v_ring_data_t) +
> + nent * sizeof(v4v_ring_data_ent_t), GFP_ATOMIC);
> + if (!d) {
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> + return;
> + }
> + memset(d, 0, sizeof(v4v_ring_data_t));
> +
> + d->magic = V4V_RING_DATA_MAGIC;
> +
> + list_for_each_entry(p, &pending_xmit_list, node) {
> + if (i != nent) {
> + d->data[i].ring = p->to;
> + d->data[i].space_required = p->len;
> + i++;
> + }
> + }
> + d->nent = i;
> +
> + if (H_v4v_notify(d)) {
> + kfree(d);
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> + //MOAN;
> + return;
> + }
> +
> + i = 0;
> + list_for_each_entry_safe(p, n, &pending_xmit_list, node) {
> + int processed = 1;
> +
> + if (i == nent)
> + continue;
> +
> + if (d->data[i].flags & V4V_RING_DATA_F_EXISTS) {
> + switch (p->type) {
> + case V4V_PENDING_XMIT_INLINE:
> + if (!
> + (d->data[i].flags &
> + V4V_RING_DATA_F_SUFFICIENT)) {
> + processed = 0;
> + break;
> + }
> + ret > +
H_v4v_send(&p->from.addr, &p->to, p->data,
> + p->len, p->protocol);
> + if (ret == -EAGAIN)
> + processed = 0;
> + break;
> + case V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR:
> + if (d->
> + data[i].flags &
V4V_RING_DATA_F_SUFFICIENT)
> + {
> + wakeup_sponsor(&p->from);
> + } else {
> + processed = 0;
> + }
> + break;
> + case V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES:
> + if (d->
> + data[i].flags &
V4V_RING_DATA_F_SUFFICIENT)
> + {
> + wakeup_privates(&p->from,
&p->to,
> + p->conid);
> + } else {
> + processed = 0;
> + }
> + break;
> + }
> + }
> + if (processed) {
> + list_del(&p->node); /* No one to talk
to */
> + atomic_dec(&pending_xmit_count);
> + kfree(p);
> + }
> + i++;
> + }
> +
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> + kfree(d);
> +}
> +
> +/* VIPtables */
> +static void
> +v4v_viptables_add(struct v4v_private *p, struct v4v_viptables_rule *rule,
> + int position)
> +{
> + H_v4v_viptables_add(rule, position);
> +}
> +
> +static void
> +v4v_viptables_del(struct v4v_private *p, struct v4v_viptables_rule *rule,
> + int position)
> +{
> + H_v4v_viptables_del(rule, position);
> +}
> +
> +static int v4v_viptables_list(struct v4v_private *p, struct
v4v_viptables_list *list)
> +{
> + return H_v4v_viptables_list(list);
> +}
> +
> +/* State Machines */
> +static int
> +connector_state_machine(struct v4v_private *p, struct v4v_stream_header
*sh)
> +{
> + if (sh->flags & V4V_SHF_ACK) {
> + switch (p->state) {
> + case V4V_STATE_CONNECTING:
> + p->state = V4V_STATE_CONNECTED;
> +
> + spin_lock(&p->pending_recv_lock);
> + p->pending_error = 0;
> + spin_unlock(&p->pending_recv_lock);
> +
> + wake_up_interruptible_all(&p->writeq);
> + return 0;
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_DISCONNECTED:
> + p->state = V4V_STATE_DISCONNECTED;
> +
> + wake_up_interruptible_all(&p->readq);
> + wake_up_interruptible_all(&p->writeq);
> + return 1; /* Send RST */
> + default:
> + break;
> + }
> + }
> +
> + if (sh->flags & V4V_SHF_RST) {
> + switch (p->state) {
> + case V4V_STATE_CONNECTING:
> + spin_lock(&p->pending_recv_lock);
> + p->pending_error = -ECONNREFUSED;
> + spin_unlock(&p->pending_recv_lock);
> + case V4V_STATE_CONNECTED:
> + p->state = V4V_STATE_DISCONNECTED;
> + wake_up_interruptible_all(&p->readq);
> + wake_up_interruptible_all(&p->writeq);
> + return 0;
> + default:
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void
> +acceptor_state_machine(struct v4v_private *p, struct v4v_stream_header
*sh)
> +{
> + if ((sh->flags & V4V_SHF_RST)
> + && ((p->state == V4V_STATE_ACCEPTED))) {
> + p->state = V4V_STATE_DISCONNECTED;
> + wake_up_interruptible_all(&p->readq);
> + wake_up_interruptible_all(&p->writeq);
> + }
> +}
> +
> +/* Interrupt handler */
> +
> +static int connector_interrupt(struct ring *r)
> +{
> + ssize_t msg_len;
> + uint32_t protocol;
> + struct v4v_stream_header sh;
> + v4v_addr_t from;
> + int ret = 0;
> +
> + if (!r->sponsor) {
> + //MOAN;
> + return -1;
> + }
> +
> + msg_len = v4v_copy_out(r->ring, &from, &protocol,
&sh, sizeof(sh), 0); /* Peek the header */
> + if (msg_len == -1) {
> + recover_ring(r);
> + return ret;
> + }
> +
> + if ((protocol != V4V_PROTO_STREAM) || (msg_len < sizeof(sh))) {
> + /* Wrong protocol bin it */
> + v4v_copy_out(r->ring, NULL, NULL, NULL, 0, 1);
> + return ret;
> + }
> +
> + if (sh.flags & V4V_SHF_SYN) { /* This is a connector no-one
should send SYN, send RST back */
> + msg_len > + v4v_copy_out(r->ring,
&from, &protocol, &sh, sizeof(sh), 1);
> + if (msg_len == sizeof(sh))
> + xmit_queue_rst_to(&r->ring->id,
sh.conid, &from);
> + return ret;
> + }
> +
> + /* Right connexion? */
> + if (sh.conid != r->sponsor->conid) {
> + msg_len > + v4v_copy_out(r->ring,
&from, &protocol, &sh, sizeof(sh), 1);
> + xmit_queue_rst_to(&r->ring->id, sh.conid,
&from);
> + return ret;
> + }
> +
> + /* Any messages to eat? */
> + if (sh.flags & (V4V_SHF_ACK | V4V_SHF_RST)) {
> + msg_len > + v4v_copy_out(r->ring,
&from, &protocol, &sh, sizeof(sh), 1);
> + if (msg_len == sizeof(sh)) {
> + if (connector_state_machine(r->sponsor,
&sh))
> + xmit_queue_rst_to(&r->ring->id,
sh.conid,
> + &from);
> + }
> + return ret;
> + }
> + //FIXME set a flag to say wake up the userland process next time,
and do that rather than copy
> + ret = copy_into_pending_recv(r, msg_len, r->sponsor);
> + wake_up_interruptible_all(&r->sponsor->readq);
> +
> + return ret;
> +}
> +
> +static int
> +acceptor_interrupt(struct v4v_private *p, struct ring *r,
> + struct v4v_stream_header *sh, ssize_t msg_len)
> +{
> + v4v_addr_t from;
> + int ret = 0;
> +
> + if (sh->flags & (V4V_SHF_SYN | V4V_SHF_ACK)) { /* This is
an acceptor no-one should send SYN or ACK, send RST back */
> + msg_len > + v4v_copy_out(r->ring,
&from, NULL, sh, sizeof(*sh), 1);
> + if (msg_len == sizeof(*sh))
> + xmit_queue_rst_to(&r->ring->id,
sh->conid, &from);
> + return ret;
> + }
> +
> + /* Is it all over */
> + if (sh->flags & V4V_SHF_RST) {
> + /* Consume the RST */
> + msg_len > + v4v_copy_out(r->ring,
&from, NULL, sh, sizeof(*sh), 1);
> + if (msg_len == sizeof(*sh))
> + acceptor_state_machine(p, sh);
> + return ret;
> + }
> +
> + /* Copy the message out */
> + ret = copy_into_pending_recv(r, msg_len, p);
> + wake_up_interruptible_all(&p->readq);
> +
> + return ret;
> +}
> +
> +static int listener_interrupt(struct ring *r)
> +{
> + int ret = 0;
> + ssize_t msg_len;
> + uint32_t protocol;
> + struct v4v_stream_header sh;
> + struct v4v_private *p;
> + v4v_addr_t from;
> +
> + msg_len = v4v_copy_out(r->ring, &from, &protocol,
&sh, sizeof(sh), 0); /* Peek the header */
> + if (msg_len == -1) {
> + recover_ring(r);
> + return ret;
> + }
> +
> + if ((protocol != V4V_PROTO_STREAM) || (msg_len < sizeof(sh))) {
> + /* Wrong protocol bin it */
> + v4v_copy_out(r->ring, NULL, NULL, NULL, 0, 1);
> + return ret;
> + }
> +
> + list_for_each_entry(p, &r->privates, node) {
> + if ((p->conid == sh.conid)
> + && (!memcmp(&p->peer, &from,
sizeof(v4v_addr_t)))) {
> + ret = acceptor_interrupt(p, r, &sh, msg_len);
> + return ret;
> + }
> + }
> +
> + /* Consume it */
> + if (r->sponsor && (sh.flags & V4V_SHF_RST)) {
> + /*
> + * If we previously received a SYN which has not been
pulled by
> + * v4v_accept() from the pending queue yet, the RST will
be dropped here
> + * and the connection will never be closed.
> + * Hence we must make sure to evict the SYN header from
the pending queue
> + * before it gets picked up by v4v_accept().
> + */
> + struct pending_recv *pending, *t;
> +
> + spin_lock(&r->sponsor->pending_recv_lock);
> + list_for_each_entry_safe(pending, t,
> +
&r->sponsor->pending_recv_list, node) {
> + if (pending->sh.flags & V4V_SHF_SYN
> + && pending->sh.conid == sh.conid) {
> + list_del(&pending->node);
> +
atomic_dec(&r->sponsor->pending_recv_count);
> + kfree(pending);
> + break;
> + }
> + }
> + spin_unlock(&r->sponsor->pending_recv_lock);
> +
> + /* Rst to a listener, should be picked up above for the
connexion, drop it */
> + v4v_copy_out(r->ring, NULL, NULL, NULL, sizeof(sh), 1);
> + return ret;
> + }
> +
> + if (sh.flags & V4V_SHF_SYN) {
> + /* Syn to new connexion */
> + if ((!r->sponsor) || (msg_len != sizeof(sh))) {
> + v4v_copy_out(r->ring, NULL, NULL, NULL,
> + sizeof(sh), 1);
> + return ret;
> + }
> + ret = copy_into_pending_recv(r, msg_len, r->sponsor);
> + wake_up_interruptible_all(&r->sponsor->readq);
> + return ret;
> + }
> +
> + v4v_copy_out(r->ring, NULL, NULL, NULL, sizeof(sh), 1);
> + /* Data for unknown destination, RST them */
> + xmit_queue_rst_to(&r->ring->id, sh.conid, &from);
> +
> + return ret;
> +}
> +
> +static void v4v_interrupt_rx(void)
> +{
> + struct ring *r;
> +
> + read_lock(&list_lock);
> +
> + /* Wake up anyone pending */
> + list_for_each_entry(r, &ring_list, node) {
> + if (r->ring->tx_ptr == r->ring->rx_ptr)
> + continue;
> +
> + switch (r->type) {
> + case V4V_RTYPE_IDLE:
> + v4v_copy_out(r->ring, NULL, NULL, NULL, 1, 1);
> + break;
> + case V4V_RTYPE_DGRAM: /* For datagrams we just wake up
the reader */
> + if (r->sponsor)
> +
wake_up_interruptible_all(&r->sponsor->readq);
> + break;
> + case V4V_RTYPE_CONNECTOR:
> + spin_lock(&r->lock);
> + while ((r->ring->tx_ptr !=
r->ring->rx_ptr)
> + && !connector_interrupt(r)) ;
> + spin_unlock(&r->lock);
> + break;
> + case V4V_RTYPE_LISTENER:
> + spin_lock(&r->lock);
> + while ((r->ring->tx_ptr !=
r->ring->rx_ptr)
> + && !listener_interrupt(r)) ;
> + spin_unlock(&r->lock);
> + break;
> + default: /* enum warning */
> + break;
> + }
> + }
> + read_unlock(&list_lock);
> +}
> +
> +static irqreturn_t v4v_interrupt(int irq, void *dev_id)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&interrupt_lock, flags);
> + v4v_interrupt_rx();
> + v4v_notify();
> + spin_unlock_irqrestore(&interrupt_lock, flags);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static void v4v_fake_irq(void)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&interrupt_lock, flags);
> + v4v_interrupt_rx();
> + v4v_null_notify();
> + spin_unlock_irqrestore(&interrupt_lock, flags);
> +}
> +
> +/* Filesystem gunge */
> +
> +#define V4VFS_MAGIC 0x56345644 /* "V4VD" */
> +
> +static struct vfsmount *v4v_mnt = NULL;
> +static const struct file_operations v4v_fops_stream;
> +
> +static struct dentry *v4vfs_mount_pseudo(struct file_system_type *fs_type,
> + int flags, const char *dev_name,
> + void *data)
> +{
> + return mount_pseudo(fs_type, "v4v:", NULL, NULL,
V4VFS_MAGIC);
> +}
> +
> +static struct file_system_type v4v_fs = {
> + /* No owner field so module can be unloaded */
> + .name = "v4vfs",
> + .mount = v4vfs_mount_pseudo,
> + .kill_sb = kill_litter_super
> +};
> +
> +static int setup_fs(void)
> +{
> + int ret;
> +
> + ret = register_filesystem(&v4v_fs);
> + if (ret) {
> + printk(KERN_ERR
> + "v4v: couldn''t register tedious
filesystem thingy\n");
> + return ret;
> + }
> +
> + v4v_mnt = kern_mount(&v4v_fs);
> + if (IS_ERR(v4v_mnt)) {
> + unregister_filesystem(&v4v_fs);
> + ret = PTR_ERR(v4v_mnt);
> + printk(KERN_ERR
> + "v4v: couldn''t mount tedious
filesystem thingy\n");
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +static void unsetup_fs(void)
> +{
> + mntput(v4v_mnt);
> + unregister_filesystem(&v4v_fs);
> +}
> +
> +/* Methods */
> +
> +static int stream_connected(struct v4v_private *p)
> +{
> + switch (p->state) {
> + case V4V_STATE_ACCEPTED:
> + case V4V_STATE_CONNECTED:
> + return 1;
> + default:
> + return 0;
> + }
> +}
> +
> +static size_t
> +v4v_try_send_sponsor(struct v4v_private *p,
> + v4v_addr_t * dest,
> + const void *buf, size_t len, uint32_t protocol)
> +{
> + size_t ret;
> + unsigned long flags;
> +
> + ret = H_v4v_send(&p->r->ring->id.addr, dest, buf,
len, protocol);
> + spin_lock_irqsave(&pending_xmit_lock, flags);
> + if (ret == -EAGAIN) {
> + /* Add pending xmit */
> + xmit_queue_wakeup_sponsor(&p->r->ring->id,
dest, len, 0);
> + p->send_blocked++;
> +
> + } else {
> + /* Remove pending xmit */
> + xmit_queue_wakeup_sponsor(&p->r->ring->id,
dest, len, 1);
> + p->send_blocked = 0;
> + }
> +
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> +
> + return ret;
> +}
> +
> +static size_t
> +v4v_try_sendv_sponsor(struct v4v_private *p,
> + v4v_addr_t * dest,
> + const v4v_iov_t * iovs, size_t niov, size_t len,
> + uint32_t protocol)
> +{
> + size_t ret;
> + unsigned long flags;
> +
> + ret = H_v4v_sendv(&p->r->ring->id.addr, dest, iovs,
niov, protocol);
> +
> + spin_lock_irqsave(&pending_xmit_lock, flags);
> + if (ret == -EAGAIN) {
> + /* Add pending xmit */
> + xmit_queue_wakeup_sponsor(&p->r->ring->id,
dest, len, 0);
> + p->send_blocked++;
> +
> + } else {
> + /* Remove pending xmit */
> + xmit_queue_wakeup_sponsor(&p->r->ring->id,
dest, len, 1);
> + p->send_blocked = 0;
> + }
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> +
> + return ret;
> +}
> +
> +/*
> + * Try to send from one of the ring''s privates (not its sponsor),
> + * and queue an writeq wakeup if we fail
> + */
> +static size_t
> +v4v_try_sendv_privates(struct v4v_private *p,
> + v4v_addr_t * dest,
> + const v4v_iov_t * iovs, size_t niov, size_t len,
> + uint32_t protocol)
> +{
> + size_t ret;
> + unsigned long flags;
> +
> + ret = H_v4v_sendv(&p->r->ring->id.addr, dest, iovs,
niov, protocol);
> +
> + spin_lock_irqsave(&pending_xmit_lock, flags);
> + if (ret == -EAGAIN) {
> + /* Add pending xmit */
> + xmit_queue_wakeup_private(&p->r->ring->id,
p->conid, dest, len,
> + 0);
> + p->send_blocked++;
> + } else {
> + /* Remove pending xmit */
> + xmit_queue_wakeup_private(&p->r->ring->id,
p->conid, dest, len,
> + 1);
> + p->send_blocked = 0;
> + }
> + spin_unlock_irqrestore(&pending_xmit_lock, flags);
> +
> + return ret;
> +}
> +
> +static ssize_t
> +v4v_sendto_from_sponsor(struct v4v_private *p,
> + const void *buf, size_t len,
> + int nonblock, v4v_addr_t * dest, uint32_t
protocol)
> +{
> + size_t ret = 0, ts_ret;
> +
> + switch (p->state) {
> + case V4V_STATE_CONNECTING:
> + ret = -ENOTCONN;
> + break;
> + case V4V_STATE_DISCONNECTED:
> + ret = -EPIPE;
> + break;
> + case V4V_STATE_BOUND:
> + case V4V_STATE_CONNECTED:
> + break;
> + default:
> + ret = -EINVAL;
> + }
> +
> + if (len > (p->r->ring->len - sizeof(struct
v4v_ring_message_header)))
> + return -EMSGSIZE;
> +
> + if (ret)
> + return ret;
> +
> + if (nonblock) {
> + return H_v4v_send(&p->r->ring->id.addr, dest,
buf, len,
> + protocol);;
> + }
> + /*
> + * I happen to know that wait_event_interruptible will never
> + * evaluate the 2nd argument once it has returned true but
> + * I shouldn''t.
> + *
> + * The EAGAIN will cause xen to send an interrupt which will
> + * via the pending_xmit_list and writeq wake us up.
> + */
> + ret = wait_event_interruptible(p->writeq,
> + ((ts_ret > +
v4v_try_send_sponsor
> + (p, dest,
> + buf, len, protocol)) !=
-EAGAIN));
> + if (ret)
> + ret = ts_ret;
> +
> + return ret;
> +}
> +
> +static ssize_t
> +v4v_stream_sendvto_from_sponsor(struct v4v_private *p,
> + const v4v_iov_t * iovs, size_t niov,
> + size_t len, int nonblock,
> + v4v_addr_t * dest, uint32_t protocol)
> +{
> + size_t ret = 0, ts_ret;
> +
> + switch (p->state) {
> + case V4V_STATE_CONNECTING:
> + return -ENOTCONN;
> + case V4V_STATE_DISCONNECTED:
> + return -EPIPE;
> + case V4V_STATE_BOUND:
> + case V4V_STATE_CONNECTED:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + if (len > (p->r->ring->len - sizeof(struct
v4v_ring_message_header)))
> + return -EMSGSIZE;
> +
> + if (ret)
> + return ret;
> +
> + if (nonblock) {
> + return H_v4v_sendv(&p->r->ring->id.addr,
dest, iovs, niov,
> + protocol);
> + }
> + /*
> + * I happen to know that wait_event_interruptible will never
> + * evaluate the 2nd argument once it has returned true but
> + * I shouldn''t.
> + *
> + * The EAGAIN will cause xen to send an interrupt which will
> + * via the pending_xmit_list and writeq wake us up.
> + */
> + ret = wait_event_interruptible(p->writeq,
> + ((ts_ret > +
v4v_try_sendv_sponsor
> + (p, dest,
> + iovs, niov, len,
> + protocol)) != -EAGAIN)
> + || !stream_connected(p));
> + if (ret == 0)
> + ret = ts_ret;
> +
> + return ret;
> +}
> +static ssize_t
> +v4v_stream_sendvto_from_private(struct v4v_private *p,
> + const v4v_iov_t * iovs, size_t niov,
> + size_t len, int nonblock,
> + v4v_addr_t * dest, uint32_t protocol)
> +{
> + size_t ret = 0, ts_ret;
> +
> + switch (p->state) {
> + case V4V_STATE_DISCONNECTED:
> + return -EPIPE;
> + case V4V_STATE_ACCEPTED:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + if (len > (p->r->ring->len - sizeof(struct
v4v_ring_message_header)))
> + return -EMSGSIZE;
> +
> + if (ret)
> + return ret;
> +
> + if (nonblock) {
> + return H_v4v_sendv(&p->r->ring->id.addr,
dest, iovs, niov,
> + protocol);
> + }
> + /*
> + * I happen to know that wait_event_interruptible will never
> + * evaluate the 2nd argument once it has returned true but
> + * I shouldn''t.
> + *
> + * The EAGAIN will cause xen to send an interrupt which will
> + * via the pending_xmit_list and writeq wake us up.
> + */
> + ret = wait_event_interruptible(p->writeq,
> + ((ts_ret > +
v4v_try_sendv_privates
> + (p, dest,
> + iovs, niov, len,
> + protocol)) != -EAGAIN)
> + || !stream_connected(p));
> + if (ret == 0)
> + ret = ts_ret;
> +
> + return ret;
> +}
> +
> +static int v4v_get_sock_name(struct v4v_private *p, struct v4v_ring_id
*id)
> +{
> + int rc = 0;
> +
> + read_lock(&list_lock);
> + if ((p->r) && (p->r->ring)) {
> + *id = p->r->ring->id;
> + } else {
> + rc = -EINVAL;
> + }
> + read_unlock(&list_lock);
> +
> + return rc;
> +}
> +
> +static int v4v_get_peer_name(struct v4v_private *p, v4v_addr_t * id)
> +{
> + int rc = 0;
> + read_lock(&list_lock);
> +
> + switch (p->state) {
> + case V4V_STATE_CONNECTING:
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_ACCEPTED:
> + *id = p->peer;
> + break;
> + default:
> + rc = -ENOTCONN;
> + }
> +
> + read_unlock(&list_lock);
> + return rc;
> +}
> +
> +static int v4v_set_ring_size(struct v4v_private *p, uint32_t ring_size)
> +{
> +
> + if (ring_size <
> + (sizeof(struct v4v_ring_message_header) + V4V_ROUNDUP(1)))
> + return -EINVAL;
> + if (ring_size != V4V_ROUNDUP(ring_size))
> + return -EINVAL;
> +
> + read_lock(&list_lock);
> + if (p->state != V4V_STATE_IDLE) {
> + read_unlock(&list_lock);
> + return -EINVAL;
> + }
> +
> + p->desired_ring_size = ring_size;
> + read_unlock(&list_lock);
> +
> + return 0;
> +}
> +
> +static ssize_t
> +v4v_recvfrom_dgram(struct v4v_private *p, void *buf, size_t len,
> + int nonblock, int peek, v4v_addr_t * src)
> +{
> + ssize_t ret;
> + uint32_t protocol;
> + v4v_addr_t lsrc;
> +
> + if (!src)
> + src = &lsrc;
> +
> +retry:
> + if (!nonblock) {
> + ret = wait_event_interruptible(p->readq,
> +
(p->r->ring->rx_ptr !> +
p->r->ring->tx_ptr));
> + if (ret)
> + return ret;
> + }
> +
> + read_lock(&list_lock);
> +
> + /*
> + * For datagrams, we know the interrrupt handler will never use
> + * the ring, leave irqs on
> + */
> + spin_lock(&p->r->lock);
> + if (p->r->ring->rx_ptr == p->r->ring->tx_ptr) {
> + spin_unlock(&p->r->lock);
> + if (nonblock) {
> + ret = -EAGAIN;
> + goto unlock;
> + }
> + read_unlock(&list_lock);
> + goto retry;
> + }
> + ret = v4v_copy_out(p->r->ring, src, &protocol, buf, len,
!peek);
> + if (ret < 0) {
> + recover_ring(p->r);
> + spin_unlock(&p->r->lock);
> + read_unlock(&list_lock);
> + goto retry;
> + }
> + spin_unlock(&p->r->lock);
> +
> + if (!peek)
> + v4v_null_notify();
> +
> + if (protocol != V4V_PROTO_DGRAM) {
> + /* If peeking consume the rubbish */
> + if (peek)
> + v4v_copy_out(p->r->ring, NULL, NULL, NULL,
1, 1);
> + read_unlock(&list_lock);
> + goto retry;
> + }
> +
> + if ((p->state == V4V_STATE_CONNECTED) &&
> + memcmp(src, &p->peer, sizeof(v4v_addr_t))) {
> + /* Wrong source - bin it */
> + if (peek)
> + v4v_copy_out(p->r->ring, NULL, NULL, NULL,
1, 1);
> + read_unlock(&list_lock);
> + goto retry;
> + }
> +
> +unlock:
> + read_unlock(&list_lock);
> +
> + return ret;
> +}
> +
> +static ssize_t
> +v4v_recv_stream(struct v4v_private *p, void *_buf, int len, int
recv_flags,
> + int nonblock)
> +{
> + size_t count = 0;
> + int ret = 0;
> + unsigned long flags;
> + int schedule_irq = 0;
> + uint8_t *buf = (void *)_buf;
> +
> + read_lock(&list_lock);
> +
> + switch (p->state) {
> + case V4V_STATE_DISCONNECTED:
> + ret = -EPIPE;
> + goto unlock;
> + case V4V_STATE_CONNECTING:
> + ret = -ENOTCONN;
> + goto unlock;
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_ACCEPTED:
> + break;
> + default:
> + ret = -EINVAL;
> + goto unlock;
> + }
> +
> + do {
> + if (!nonblock) {
> + ret = wait_event_interruptible(p->readq,
> +
(!list_empty(&p->pending_recv_list)
> + ||
!stream_connected(p)));
> +
> + if (ret)
> + break;
> + }
> +
> + spin_lock_irqsave(&p->pending_recv_lock, flags);
> +
> + while (!list_empty(&p->pending_recv_list)
&& len) {
> + size_t to_copy;
> + struct pending_recv *pending;
> + int unlink = 0;
> +
> + pending =
list_first_entry(&p->pending_recv_list,
> + struct pending_recv,
node);
> +
> + if ((pending->data_len - pending->data_ptr)
> len) {
> + to_copy = len;
> + } else {
> + unlink = 1;
> + to_copy = pending->data_len -
pending->data_ptr;
> + }
> +
> + if (!access_ok(VERIFY_WRITE, buf, to_copy)) {
> + printk(KERN_ERR
> + "V4V - ERROR: buf invalid
_buf=%p buf=%p len=%d to_copy=%zu count=%zu\n",
> + _buf, buf, len, to_copy, count);
> +
spin_unlock_irqrestore(&p->pending_recv_lock, flags);
> + read_unlock(&list_lock);
> + return -EFAULT;
> + }
> +
> + if (copy_to_user(buf, pending->data +
pending->data_ptr, to_copy))
> + {
> +
spin_unlock_irqrestore(&p->pending_recv_lock, flags);
> + read_unlock(&list_lock);
> + return -EFAULT;
> + }
> +
> + if (unlink) {
> + list_del(&pending->node);
> + kfree(pending);
> + atomic_dec(&p->pending_recv_count);
> + if (p->full)
> + schedule_irq = 1;
> + } else
> + pending->data_ptr += to_copy;
> +
> + buf += to_copy;
> + count += to_copy;
> + len -= to_copy;
> + }
> +
> + spin_unlock_irqrestore(&p->pending_recv_lock,
flags);
> +
> + if (p->state == V4V_STATE_DISCONNECTED) {
> + ret = -EPIPE;
> + break;
> + }
> +
> + if (nonblock)
> + ret = -EAGAIN;
> +
> + } while ((recv_flags & MSG_WAITALL) && len);
> +
> +unlock:
> + read_unlock(&list_lock);
> +
> + if (schedule_irq)
> + v4v_fake_irq();
> +
> + return count ? count : ret;
> +}
> +
> +static ssize_t
> +v4v_send_stream(struct v4v_private *p, const void *_buf, int len, int
nonblock)
> +{
> + int write_lump;
> + const uint8_t *buf = _buf;
> + size_t count = 0;
> + ssize_t ret;
> + int to_send;
> +
> + write_lump = DEFAULT_RING_SIZE >> 2;
> +
> + switch (p->state) {
> + case V4V_STATE_DISCONNECTED:
> + return -EPIPE;
> + case V4V_STATE_CONNECTING:
> + return -ENOTCONN;
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_ACCEPTED:
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + while (len) {
> + struct v4v_stream_header sh;
> + v4v_iov_t iovs[2];
> +
> + to_send = len > write_lump ? write_lump : len;
> + sh.flags = 0;
> + sh.conid = p->conid;
> +
> + iovs[0].iov_base = (uintptr_t)&sh;
> + iovs[0].iov_len = sizeof (sh);
> +
> + iovs[1].iov_base = (uintptr_t)buf;
> + iovs[1].iov_len = to_send;
> +
> + if (p->state == V4V_STATE_CONNECTED)
> + ret = v4v_stream_sendvto_from_sponsor(
> + p, iovs, 2,
> + to_send + sizeof(struct
v4v_stream_header),
> + nonblock, &p->peer,
V4V_PROTO_STREAM);
> + else
> + ret = v4v_stream_sendvto_from_private(
> + p, iovs, 2,
> + to_send + sizeof(struct
v4v_stream_header),
> + nonblock, &p->peer,
V4V_PROTO_STREAM);
> +
> + if (ret < 0) {
> + return count ? count : ret;
> + }
> +
> + len -= to_send;
> + buf += to_send;
> + count += to_send;
> +
> + if (nonblock)
> + return count;
> + }
> +
> + return count;
> +}
> +
> +static int v4v_bind(struct v4v_private *p, struct v4v_ring_id *ring_id)
> +{
> + int ret = 0;
> +
> + if (ring_id->addr.domain != V4V_DOMID_NONE) {
> + return -EINVAL;
> + }
> +
> + switch (p->ptype) {
> + case V4V_PTYPE_DGRAM:
> + ret = new_ring(p, ring_id);
> + if (!ret)
> + p->r->type = V4V_RTYPE_DGRAM;
> + break;
> + case V4V_PTYPE_STREAM:
> + ret = new_ring(p, ring_id);
> + break;
> + }
> +
> + return ret;
> +}
> +
> +static int v4v_listen(struct v4v_private *p)
> +{
> + if (p->ptype != V4V_PTYPE_STREAM)
> + return -EINVAL;
> +
> + if (p->state != V4V_STATE_BOUND) {
> + return -EINVAL;
> + }
> +
> + p->r->type = V4V_RTYPE_LISTENER;
> + p->state = V4V_STATE_LISTENING;
> +
> + return 0;
> +}
> +
> +static int v4v_connect(struct v4v_private *p, v4v_addr_t * peer, int
nonblock)
> +{
> + struct v4v_stream_header sh;
> + int ret = -EINVAL;
> +
> + if (p->ptype == V4V_PTYPE_DGRAM) {
> + switch (p->state) {
> + case V4V_STATE_BOUND:
> + case V4V_STATE_CONNECTED:
> + if (peer) {
> + p->state = V4V_STATE_CONNECTED;
> + memcpy(&p->peer, peer,
sizeof(v4v_addr_t));
> + } else {
> + p->state = V4V_STATE_BOUND;
> + }
> + return 0;
> + default:
> + return -EINVAL;
> + }
> + }
> + if (p->ptype != V4V_PTYPE_STREAM) {
> + return -EINVAL;
> + }
> +
> + /* Irritiatingly we need to be restartable */
> + switch (p->state) {
> + case V4V_STATE_BOUND:
> + p->r->type = V4V_RTYPE_CONNECTOR;
> + p->state = V4V_STATE_CONNECTING;
> + p->conid = random32();
> + p->peer = *peer;
> +
> + sh.flags = V4V_SHF_SYN;
> + sh.conid = p->conid;
> +
> + ret > +
xmit_queue_inline(&p->r->ring->id, &p->peer, &sh,
> + sizeof(sh), V4V_PROTO_STREAM);
> + if (ret == sizeof(sh))
> + ret = 0;
> +
> + if (ret && (ret != -EAGAIN)) {
> + p->state = V4V_STATE_BOUND;
> + p->r->type = V4V_RTYPE_DGRAM;
> + return ret;
> + }
> +
> + break;
> + case V4V_STATE_CONNECTED:
> + if (memcmp(peer, &p->peer, sizeof(v4v_addr_t))) {
> + return -EINVAL;
> + } else {
> + return 0;
> + }
> + case V4V_STATE_CONNECTING:
> + if (memcmp(peer, &p->peer, sizeof(v4v_addr_t))) {
> + return -EINVAL;
> + }
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + if (nonblock) {
> + return -EINPROGRESS;
> + }
> +
> + while (p->state != V4V_STATE_CONNECTED) {
> + ret > +
wait_event_interruptible(p->writeq,
> + (p->state !> +
V4V_STATE_CONNECTING));
> + if (ret)
> + return ret;
> +
> + if (p->state == V4V_STATE_DISCONNECTED) {
> + p->state = V4V_STATE_BOUND;
> + p->r->type = V4V_RTYPE_DGRAM;
> + ret = -ECONNREFUSED;
> + break;
> + }
> + }
> +
> + return ret;
> +}
> +
> +static int allocate_fd_with_private(void *private)
> +{
> + int fd;
> + struct file *f;
> + struct qstr name = {.name = "" };
> + struct path path;
> + struct inode *ind;
> +
> + fd = get_unused_fd();
> + if (fd < 0)
> + return fd;
> +
> + path.dentry = d_alloc_pseudo(v4v_mnt->mnt_sb, &name);
> + if (unlikely(!path.dentry)) {
> + put_unused_fd(fd);
> + return -ENOMEM;
> + }
> + ind = new_inode(v4v_mnt->mnt_sb);
> + ind->i_ino = get_next_ino();
> + ind->i_fop = v4v_mnt->mnt_root->d_inode->i_fop;
> + ind->i_state = v4v_mnt->mnt_root->d_inode->i_state;
> + ind->i_mode = v4v_mnt->mnt_root->d_inode->i_mode;
> + ind->i_uid = current_fsuid();
> + ind->i_gid = current_fsgid();
> + d_instantiate(path.dentry, ind);
> +
> + path.mnt = mntget(v4v_mnt);
> +
> + f = alloc_file(&path, FMODE_READ | FMODE_WRITE,
&v4v_fops_stream);
> + if (!f) {
> + /* Put back fd ? */
> + return -ENFILE;
> + }
> +
> + f->private_data = private;
> + fd_install(fd, f);
> +
> + return fd;
> +}
> +
> +static int
> +v4v_accept(struct v4v_private *p, struct v4v_addr *peer, int nonblock)
> +{
> + int fd;
> + int ret = 0;
> + struct v4v_private *a = NULL;
> + struct pending_recv *r = NULL;
> + unsigned long flags;
> + struct v4v_stream_header sh;
> +
> + if (p->ptype != V4V_PTYPE_STREAM)
> + return -ENOTTY;
> +
> + if (p->state != V4V_STATE_LISTENING) {
> + return -EINVAL;
> + }
> +
> + /* FIXME: leak! */
> + for (;;) {
> + ret > +
wait_event_interruptible(p->readq,
> + (!list_empty
> +
(&p->pending_recv_list))
> + || nonblock);
> + if (ret)
> + return ret;
> +
> + /* Write lock implicitly has pending_recv_lock */
> + write_lock_irqsave(&list_lock, flags);
> +
> + if (!list_empty(&p->pending_recv_list)) {
> + r = list_first_entry(&p->pending_recv_list,
> + struct pending_recv, node);
> +
> + list_del(&r->node);
> + atomic_dec(&p->pending_recv_count);
> +
> + if ((!r->data_len) && (r->sh.flags
& V4V_SHF_SYN))
> + break;
> +
> + kfree(r);
> + }
> +
> + write_unlock_irqrestore(&list_lock, flags);
> + if (nonblock)
> + return -EAGAIN;
> + }
> + write_unlock_irqrestore(&list_lock, flags);
> +
> + a = kmalloc(sizeof(struct v4v_private), GFP_KERNEL);
> + if (!a) {
> + ret = -ENOMEM;
> + goto release;
> + }
> +
> + memset(a, 0, sizeof(struct v4v_private));
> + a->state = V4V_STATE_ACCEPTED;
> + a->ptype = V4V_PTYPE_STREAM;
> + a->r = p->r;
> + if (!get_ring(a->r)) {
> + a->r = NULL;
> + ret = -EINVAL;
> + goto release;
> + }
> +
> + init_waitqueue_head(&a->readq);
> + init_waitqueue_head(&a->writeq);
> + spin_lock_init(&a->pending_recv_lock);
> + INIT_LIST_HEAD(&a->pending_recv_list);
> + atomic_set(&a->pending_recv_count, 0);
> +
> + a->send_blocked = 0;
> + a->peer = r->from;
> + a->conid = r->sh.conid;
> +
> + if (peer)
> + *peer = r->from;
> +
> + fd = allocate_fd_with_private(a);
> + if (fd < 0) {
> + ret = fd;
> + goto release;
> + }
> +
> + write_lock_irqsave(&list_lock, flags);
> + list_add(&a->node, &a->r->privates);
> + write_unlock_irqrestore(&list_lock, flags);
> +
> + /* Ship the ACK */
> + sh.conid = a->conid;
> + sh.flags = V4V_SHF_ACK;
> +
> + xmit_queue_inline(&a->r->ring->id, &a->peer,
&sh,
> + sizeof(sh), V4V_PROTO_STREAM);
> + kfree(r);
> +
> + return fd;
> +
> + release:
> + kfree(r);
> + if (a) {
> + write_lock_irqsave(&list_lock, flags);
> + if (a->r)
> + put_ring(a->r);
> + write_unlock_irqrestore(&list_lock, flags);
> + kfree(a);
> + }
> + return ret;
> +}
> +
> +ssize_t
> +v4v_sendto(struct v4v_private * p, const void *buf, size_t len, int flags,
> + v4v_addr_t * addr, int nonblock)
> +{
> + ssize_t rc;
> +
> + if (!access_ok(VERIFY_READ, buf, len))
> + return -EFAULT;
> + if (!access_ok(VERIFY_READ, addr, len))
> + return -EFAULT;
> +
> + if (flags & MSG_DONTWAIT)
> + nonblock++;
> +
> + switch (p->ptype) {
> + case V4V_PTYPE_DGRAM:
> + switch (p->state) {
> + case V4V_STATE_BOUND:
> + if (!addr)
> + return -ENOTCONN;
> + rc = v4v_sendto_from_sponsor(p, buf, len,
nonblock,
> + addr,
V4V_PROTO_DGRAM);
> + break;
> +
> + case V4V_STATE_CONNECTED:
> + if (addr)
> + return -EISCONN;
> +
> + rc = v4v_sendto_from_sponsor(p, buf, len,
nonblock,
> + &p->peer,
V4V_PROTO_DGRAM);
> + break;
> +
> + default:
> + return -EINVAL;
> + }
> + break;
> + case V4V_PTYPE_STREAM:
> + if (addr)
> + return -EISCONN;
> + switch (p->state) {
> + case V4V_STATE_CONNECTING:
> + case V4V_STATE_BOUND:
> + return -ENOTCONN;
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_ACCEPTED:
> + rc = v4v_send_stream(p, buf, len, nonblock);
> + break;
> + case V4V_STATE_DISCONNECTED:
> +
> + rc = -EPIPE;
> + break;
> + default:
> +
> + return -EINVAL;
> + }
> + break;
> + default:
> +
> + return -ENOTTY;
> + }
> +
> + if ((rc == -EPIPE) && !(flags & MSG_NOSIGNAL))
> + send_sig(SIGPIPE, current, 0);
> +
> + return rc;
> +}
> +
> +ssize_t
> +v4v_recvfrom(struct v4v_private * p, void *buf, size_t len, int flags,
> + v4v_addr_t * addr, int nonblock)
> +{
> + int peek = 0;
> + ssize_t rc = 0;
> +
> + if (!access_ok(VERIFY_WRITE, buf, len))
> + return -EFAULT;
> + if ((addr) && (!access_ok(VERIFY_WRITE, addr,
sizeof(v4v_addr_t))))
> + return -EFAULT;
> +
> + if (flags & MSG_DONTWAIT)
> + nonblock++;
> + if (flags & MSG_PEEK)
> + peek++;
> +
> + switch (p->ptype) {
> + case V4V_PTYPE_DGRAM:
> + rc = v4v_recvfrom_dgram(p, buf, len, nonblock, peek,
addr);
> + break;
> + case V4V_PTYPE_STREAM:
> + if (peek)
> + return -EINVAL;
> +
> + switch (p->state) {
> + case V4V_STATE_BOUND:
> + return -ENOTCONN;
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_ACCEPTED:
> + if (addr)
> + *addr = p->peer;
> + rc = v4v_recv_stream(p, buf, len, flags,
nonblock);
> + break;
> + case V4V_STATE_DISCONNECTED:
> + rc = 0;
> + break;
> + default:
> + rc = -EINVAL;
> + }
> + }
> +
> + if ((rc > (ssize_t) len) && !(flags & MSG_TRUNC))
> + rc = len;
> +
> + return rc;
> +}
> +
> +/* fops */
> +
> +static int v4v_open_dgram(struct inode *inode, struct file *f)
> +{
> + struct v4v_private *p;
> +
> + p = kmalloc(sizeof(struct v4v_private), GFP_KERNEL);
> + if (!p)
> + return -ENOMEM;
> +
> + memset(p, 0, sizeof(struct v4v_private));
> + p->state = V4V_STATE_IDLE;
> + p->desired_ring_size = DEFAULT_RING_SIZE;
> + p->r = NULL;
> + p->ptype = V4V_PTYPE_DGRAM;
> + p->send_blocked = 0;
> +
> + init_waitqueue_head(&p->readq);
> + init_waitqueue_head(&p->writeq);
> +
> + spin_lock_init(&p->pending_recv_lock);
> + INIT_LIST_HEAD(&p->pending_recv_list);
> + atomic_set(&p->pending_recv_count, 0);
> +
> + f->private_data = p;
> + return 0;
> +}
> +
> +static int v4v_open_stream(struct inode *inode, struct file *f)
> +{
> + struct v4v_private *p;
> +
> + p = kmalloc(sizeof(struct v4v_private), GFP_KERNEL);
> + if (!p)
> + return -ENOMEM;
> +
> + memset(p, 0, sizeof(struct v4v_private));
> + p->state = V4V_STATE_IDLE;
> + p->desired_ring_size = DEFAULT_RING_SIZE;
> + p->r = NULL;
> + p->ptype = V4V_PTYPE_STREAM;
> + p->send_blocked = 0;
> +
> + init_waitqueue_head(&p->readq);
> + init_waitqueue_head(&p->writeq);
> +
> + spin_lock_init(&p->pending_recv_lock);
> + INIT_LIST_HEAD(&p->pending_recv_list);
> + atomic_set(&p->pending_recv_count, 0);
> +
> + f->private_data = p;
> + return 0;
> +}
> +
> +static int v4v_release(struct inode *inode, struct file *f)
> +{
> + struct v4v_private *p = (struct v4v_private *)f->private_data;
> + unsigned long flags;
> + struct pending_recv *pending;
> +
> + if (p->ptype == V4V_PTYPE_STREAM) {
> + switch (p->state) {
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_CONNECTING:
> + case V4V_STATE_ACCEPTED:
> + xmit_queue_rst_to(&p->r->ring->id,
p->conid, &p->peer);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + write_lock_irqsave(&list_lock, flags);
> + if (!p->r) {
> + write_unlock_irqrestore(&list_lock, flags);
> + goto release;
> + }
> +
> + if (p != p->r->sponsor) {
> + put_ring(p->r);
> + list_del(&p->node);
> + write_unlock_irqrestore(&list_lock, flags);
> + goto release;
> + }
> +
> + p->r->sponsor = NULL;
> + put_ring(p->r);
> + write_unlock_irqrestore(&list_lock, flags);
> +
> + while (!list_empty(&p->pending_recv_list)) {
> + pending > +
list_first_entry(&p->pending_recv_list,
> + struct pending_recv, node);
> +
> + list_del(&pending->node);
> + kfree(pending);
> + atomic_dec(&p->pending_recv_count);
> + }
> +
> + release:
> + kfree(p);
> +
> + return 0;
> +}
> +
> +static ssize_t
> +v4v_write(struct file *f, const char __user * buf, size_t count, loff_t *
ppos)
> +{
> + struct v4v_private *p = f->private_data;
> + int nonblock = f->f_flags & O_NONBLOCK;
> +
> + return v4v_sendto(p, buf, count, 0, NULL, nonblock);
> +}
> +
> +static ssize_t
> +v4v_read(struct file *f, char __user * buf, size_t count, loff_t * ppos)
> +{
> + struct v4v_private *p = f->private_data;
> + int nonblock = f->f_flags & O_NONBLOCK;
> +
> + return v4v_recvfrom(p, (void *)buf, count, 0, NULL, nonblock);
> +}
> +
> +static long v4v_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
> +{
> + int rc = -ENOTTY;
> +
> + int nonblock = f->f_flags & O_NONBLOCK;
> + struct v4v_private *p = f->private_data;
> +
> + if (_IOC_TYPE(cmd) != V4V_TYPE)
> + return rc;
> +
> + switch (cmd) {
> + case V4VIOCSETRINGSIZE:
> + if (!access_ok(VERIFY_READ, arg, sizeof(uint32_t)))
> + return -EFAULT;
> + rc = v4v_set_ring_size(p, *(uint32_t *) arg);
> + break;
> + case V4VIOCBIND:
> + if (!access_ok(VERIFY_READ, arg, sizeof(struct
v4v_ring_id)))
> + return -EFAULT;
> + rc = v4v_bind(p, (struct v4v_ring_id *)arg);
> + break;
> + case V4VIOCGETSOCKNAME:
> + if (!access_ok(VERIFY_WRITE, arg, sizeof(struct
v4v_ring_id)))
> + return -EFAULT;
> + rc = v4v_get_sock_name(p, (struct v4v_ring_id *)arg);
> + break;
> + case V4VIOCGETPEERNAME:
> + if (!access_ok(VERIFY_WRITE, arg, sizeof(v4v_addr_t)))
> + return -EFAULT;
> + rc = v4v_get_peer_name(p, (v4v_addr_t *) arg);
> + break;
> + case V4VIOCCONNECT:
> + if (!access_ok(VERIFY_READ, arg, sizeof(v4v_addr_t)))
> + return -EFAULT;
> + /* Bind if not done */
> + if (p->state == V4V_STATE_IDLE) {
> + struct v4v_ring_id id;
> + memset(&id, 0, sizeof(id));
> + id.partner = V4V_DOMID_NONE;
> + id.addr.domain = V4V_DOMID_NONE;
> + id.addr.port = 0;
> + rc = v4v_bind(p, &id);
> + if (rc)
> + break;
> + }
> + rc = v4v_connect(p, (v4v_addr_t *) arg, nonblock);
> + break;
> + case V4VIOCGETCONNECTERR:
> + {
> + unsigned long flags;
> + if (!access_ok(VERIFY_WRITE, arg, sizeof(int)))
> + return -EFAULT;
> +
> + spin_lock_irqsave(&p->pending_recv_lock,
flags);
> + *(int *)arg = p->pending_error;
> + p->pending_error = 0;
> +
spin_unlock_irqrestore(&p->pending_recv_lock, flags);
> + rc = 0;
> + }
> + break;
> + case V4VIOCLISTEN:
> + rc = v4v_listen(p);
> + break;
> + case V4VIOCACCEPT:
> + if (!access_ok(VERIFY_WRITE, arg, sizeof(v4v_addr_t)))
> + return -EFAULT;
> + rc = v4v_accept(p, (v4v_addr_t *) arg, nonblock);
> + break;
> + case V4VIOCSEND:
> + if (!access_ok(VERIFY_READ, arg, sizeof(struct v4v_dev)))
> + return -EFAULT;
> + {
> + struct v4v_dev a = *(struct v4v_dev *)arg;
> +
> + rc = v4v_sendto(p, a.buf, a.len, a.flags, a.addr,
> + nonblock);
> + }
> + break;
> + case V4VIOCRECV:
> + if (!access_ok(VERIFY_READ, arg, sizeof(struct v4v_dev)))
> + return -EFAULT;
> + {
> + struct v4v_dev a = *(struct v4v_dev *)arg;
> + rc = v4v_recvfrom(p, a.buf, a.len, a.flags,
a.addr,
> + nonblock);
> + }
> + break;
> + case V4VIOCVIPTABLESADD:
> + if (!access_ok
> + (VERIFY_READ, arg, sizeof(struct
v4v_viptables_rule_pos)))
> + return -EFAULT;
> + {
> + struct v4v_viptables_rule_pos *rule > +
(struct v4v_viptables_rule_pos *)arg;
> + v4v_viptables_add(p, rule->rule,
rule->position);
> + rc = 0;
> + }
> + break;
> + case V4VIOCVIPTABLESDEL:
> + if (!access_ok
> + (VERIFY_READ, arg, sizeof(struct
v4v_viptables_rule_pos)))
> + return -EFAULT;
> + {
> + struct v4v_viptables_rule_pos *rule > +
(struct v4v_viptables_rule_pos *)arg;
> + v4v_viptables_del(p, rule->rule,
rule->position);
> + rc = 0;
> + }
> + break;
> + case V4VIOCVIPTABLESLIST:
> + if (!access_ok
> + (VERIFY_READ, arg, sizeof(struct v4v_viptables_list)))
> + return -EFAULT;
> + {
> + struct v4v_viptables_list *list > +
(struct v4v_viptables_list *)arg;
> + rc = v4v_viptables_list(p, list);
> + }
> + break;
> + default:
> + printk(KERN_ERR "v4v: unkown ioctl, cmd:0x%x nr:%d
size:0x%x\n",
> + cmd, _IOC_NR(cmd), _IOC_SIZE(cmd));
> + }
> +
> + return rc;
> +}
> +
> +static unsigned int v4v_poll(struct file *f, poll_table * pt)
> +{
> + unsigned int mask = 0;
> + struct v4v_private *p = f->private_data;
> +
> + read_lock(&list_lock);
> +
> + switch (p->ptype) {
> + case V4V_PTYPE_DGRAM:
> + switch (p->state) {
> + case V4V_STATE_CONNECTED:
> + case V4V_STATE_BOUND:
> + poll_wait(f, &p->readq, pt);
> + mask |= POLLOUT | POLLWRNORM;
> + if (p->r->ring->tx_ptr !=
p->r->ring->rx_ptr)
> + mask |= POLLIN | POLLRDNORM;
> + break;
> + default:
> + break;
> + }
> + break;
> + case V4V_PTYPE_STREAM:
> + switch (p->state) {
> + case V4V_STATE_BOUND:
> + break;
> + case V4V_STATE_LISTENING:
> + poll_wait(f, &p->readq, pt);
> + if (!list_empty(&p->pending_recv_list))
> + mask |= POLLIN | POLLRDNORM;
> + break;
> + case V4V_STATE_ACCEPTED:
> + case V4V_STATE_CONNECTED:
> + poll_wait(f, &p->readq, pt);
> + poll_wait(f, &p->writeq, pt);
> + if (!p->send_blocked)
> + mask |= POLLOUT | POLLWRNORM;
> + if (!list_empty(&p->pending_recv_list))
> + mask |= POLLIN | POLLRDNORM;
> + break;
> + case V4V_STATE_CONNECTING:
> + poll_wait(f, &p->writeq, pt);
> + break;
> + case V4V_STATE_DISCONNECTED:
> + mask |= POLLOUT | POLLWRNORM;
> + mask |= POLLIN | POLLRDNORM;
> + break;
> + case V4V_STATE_IDLE:
> + break;
> + }
> + break;
> + }
> +
> + read_unlock(&list_lock);
> + return mask;
> +}
> +
> +static const struct file_operations v4v_fops_stream = {
> + .owner = THIS_MODULE,
> + .write = v4v_write,
> + .read = v4v_read,
> + .unlocked_ioctl = v4v_ioctl,
> + .open = v4v_open_stream,
> + .release = v4v_release,
> + .poll = v4v_poll,
> +};
> +
> +static const struct file_operations v4v_fops_dgram = {
> + .owner = THIS_MODULE,
> + .write = v4v_write,
> + .read = v4v_read,
> + .unlocked_ioctl = v4v_ioctl,
> + .open = v4v_open_dgram,
> + .release = v4v_release,
> + .poll = v4v_poll,
> +};
> +
> +/* Xen VIRQ */
> +static int v4v_irq = -1;
> +
> +static void unbind_virq(void)
> +{
> + unbind_from_irqhandler (v4v_irq, NULL);
> + v4v_irq = -1;
> +}
> +
> +static int bind_evtchn(void)
> +{
> + v4v_info_t info;
> + int result;
> +
> + v4v_info(&info);
> + if (info.ring_magic != V4V_RING_MAGIC)
> + return 1;
> +
> + result > +
bind_interdomain_evtchn_to_irqhandler(
> + 0, info.evtchn,
> + v4v_interrupt, IRQF_SAMPLE_RANDOM,
"v4v", NULL);
> +
> + if (result < 0) {
> + unbind_virq();
> + return result;
> + }
> +
> + v4v_irq = result;
> +
> + return 0;
> +}
> +
> +/* V4V Device */
> +
> +static struct miscdevice v4v_miscdev_dgram = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "v4v_dgram",
> + .fops = &v4v_fops_dgram,
> +};
> +
> +static struct miscdevice v4v_miscdev_stream = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "v4v_stream",
> + .fops = &v4v_fops_stream,
> +};
> +
> +static int v4v_suspend(struct platform_device *dev, pm_message_t state)
> +{
> + unbind_virq();
> + return 0;
> +}
> +
> +static int v4v_resume(struct platform_device *dev)
> +{
> + struct ring *r;
> +
> + read_lock(&list_lock);
> + list_for_each_entry(r, &ring_list, node) {
> + refresh_pfn_list(r);
> + if (register_ring(r)) {
> + printk(KERN_ERR
> + "Failed to re-register a v4v ring on
resume, port=0x%08x\n",
> + r->ring->id.addr.port);
> + }
> + }
> + read_unlock(&list_lock);
> +
> + if (bind_evtchn()) {
> + printk(KERN_ERR "v4v_resume: failed to bind v4v
evtchn\n");
> + return -ENODEV;
> + }
> +
> + return 0;
> +}
> +
> +static void v4v_shutdown(struct platform_device *dev)
> +{
> +}
> +
> +static int __devinit v4v_probe(struct platform_device *dev)
> +{
> + int err = 0;
> + int ret;
> +
> + ret = setup_fs();
> + if (ret)
> + return ret;
> +
> + INIT_LIST_HEAD(&ring_list);
> + rwlock_init(&list_lock);
> + INIT_LIST_HEAD(&pending_xmit_list);
> + spin_lock_init(&pending_xmit_lock);
> + spin_lock_init(&interrupt_lock);
> + atomic_set(&pending_xmit_count, 0);
> +
> + if (bind_evtchn()) {
> + printk(KERN_ERR "failed to bind v4v evtchn\n");
> + unsetup_fs();
> + return -ENODEV;
> + }
> +
> + err = misc_register(&v4v_miscdev_dgram);
> + if (err != 0) {
> + printk(KERN_ERR "Could not register
/dev/v4v_dgram\n");
> + unsetup_fs();
> + return err;
> + }
> +
> + err = misc_register(&v4v_miscdev_stream);
> + if (err != 0) {
> + printk(KERN_ERR "Could not register
/dev/v4v_stream\n");
> + unsetup_fs();
> + return err;
> + }
> +
> + printk(KERN_INFO "Xen V4V device installed.\n");
> + return 0;
> +}
> +
> +/* Platform Gunge */
> +
> +static int __devexit v4v_remove(struct platform_device *dev)
> +{
> + unbind_virq();
> + misc_deregister(&v4v_miscdev_dgram);
> + misc_deregister(&v4v_miscdev_stream);
> + unsetup_fs();
> + return 0;
> +}
> +
> +static struct platform_driver v4v_driver = {
> + .driver = {
> + .name = "v4v",
> + .owner = THIS_MODULE,
> + },
> + .probe = v4v_probe,
> + .remove = __devexit_p(v4v_remove),
> + .shutdown = v4v_shutdown,
> + .suspend = v4v_suspend,
> + .resume = v4v_resume,
> +};
> +
> +static struct platform_device *v4v_platform_device;
> +
> +static int __init v4v_init(void)
> +{
> + int error;
> +
> + if (!xen_domain())
> + {
> + printk(KERN_ERR "v4v only works under Xen\n");
> + return -ENODEV;
> + }
> +
> + error = platform_driver_register(&v4v_driver);
> + if (error)
> + return error;
> +
> + v4v_platform_device = platform_device_alloc("v4v", -1);
> + if (!v4v_platform_device) {
> + platform_driver_unregister(&v4v_driver);
> + return -ENOMEM;
> + }
> +
> + error = platform_device_add(v4v_platform_device);
> + if (error) {
> + platform_device_put(v4v_platform_device);
> + platform_driver_unregister(&v4v_driver);
> + return error;
> + }
> +
> + return 0;
> +}
> +
> +static void __exit v4v_cleanup(void)
> +{
> + platform_device_unregister(v4v_platform_device);
> + platform_driver_unregister(&v4v_driver);
> +}
> +
> +module_init(v4v_init);
> +module_exit(v4v_cleanup);
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/xen/v4v_utils.h b/drivers/xen/v4v_utils.h
> new file mode 100644
> index 0000000..91c00b6
> --- /dev/null
> +++ b/drivers/xen/v4v_utils.h
> @@ -0,0 +1,278 @@
>
+/******************************************************************************
> + * V4V
> + *
> + * Version 2 of v2v (Virtual-to-Virtual)
> + *
> + * Copyright (c) 2010, Citrix Systems
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
> + */
> +
> +#ifndef __V4V_UTILS_H__
> +# define __V4V_UTILS_H__
> +
> +/* Compiler specific hacks */
> +#if defined(__GNUC__)
> +# define V4V_UNUSED __attribute__ ((unused))
> +# ifndef __STRICT_ANSI__
> +# define V4V_INLINE inline
> +# else
> +# define V4V_INLINE
> +# endif
> +#else /* !__GNUC__ */
> +# define V4V_UNUSED
> +# define V4V_INLINE
> +#endif
> +
> +
> +/*
> + * Utility functions
> + */
> +static V4V_INLINE uint32_t
> +v4v_ring_bytes_to_read (volatile struct v4v_ring *r)
> +{
> + int32_t ret;
> + ret = r->tx_ptr - r->rx_ptr;
> + if (ret >= 0)
> + return ret;
> + return (uint32_t) (r->len + ret);
> +}
> +
> +
> +/*
> + * Copy at most t bytes of the next message in the ring, into the buffer
> + * at _buf, setting from and protocol if they are not NULL, returns
> + * the actual length of the message, or -1 if there is nothing to read
> + */
> +V4V_UNUSED static V4V_INLINE ssize_t
> +v4v_copy_out (struct v4v_ring *r, struct v4v_addr *from, uint32_t *
protocol,
> + void *_buf, size_t t, int consume)
> +{
> + volatile struct v4v_ring_message_header *mh;
> + /* unnecessary cast from void * required by MSVC compiler */
> + uint8_t *buf = (uint8_t *) _buf;
> + uint32_t btr = v4v_ring_bytes_to_read (r);
> + uint32_t rxp = r->rx_ptr;
> + uint32_t bte;
> + uint32_t len;
> + ssize_t ret;
> +
> +
> + if (btr < sizeof (*mh))
> + return -1;
> +
> + /*
> + * Becuase the message_header is 128 bits long and the ring is 128
bit
> + * aligned, we''re gaurunteed never to wrap
> + */
> + mh = (volatile struct v4v_ring_message_header *)
&r->ring[r->rx_ptr];
> +
> + len = mh->len;
> +
> + if (btr < len)
> + {
> + return -1;
> + }
> +
> +#if defined(__GNUC__)
> + if (from)
> + *from = mh->source;
> +#else
> + /* MSVC can''t do the above */
> + if (from)
> + memcpy((void *) from, (void *) &(mh->source),
sizeof(struct v4v_addr));
> +#endif
> +
> + if (protocol)
> + *protocol = mh->protocol;
> +
> + rxp += sizeof (*mh);
> + if (rxp == r->len)
> + rxp = 0;
> + len -= sizeof (*mh);
> + ret = len;
> +
> + bte = r->len - rxp;
> +
> + if (bte < len)
> + {
> + if (t < bte)
> + {
> + if (buf)
> + {
> + memcpy (buf, (void *)
&r->ring[rxp], t);
> + buf += t;
> + }
> +
> + rxp = 0;
> + len -= bte;
> + t = 0;
> + }
> + else
> + {
> + if (buf)
> + {
> + memcpy (buf, (void *)
&r->ring[rxp], bte);
> + buf += bte;
> + }
> + rxp = 0;
> + len -= bte;
> + t -= bte;
> + }
> + }
> +
> + if (buf && t)
> + memcpy (buf, (void *) &r->ring[rxp], (t < len) ?
t : len);
> +
> +
> + rxp += V4V_ROUNDUP (len);
> + if (rxp == r->len)
> + rxp = 0;
> +
> + mb ();
> +
> + if (consume)
> + r->rx_ptr = rxp;
> +
> + return ret;
> +}
> +
> +static V4V_INLINE void
> +v4v_memcpy_skip (void *_dst, const void *_src, size_t len, size_t *skip)
> +{
> + const uint8_t *src = (const uint8_t *) _src;
> + uint8_t *dst = (uint8_t *) _dst;
> +
> + if (!*skip)
> + {
> + memcpy (dst, src, len);
> + return;
> + }
> +
> + if (*skip >= len)
> + {
> + *skip -= len;
> + return;
> + }
> +
> + src += *skip;
> + dst += *skip;
> + len -= *skip;
> + *skip = 0;
> +
> + memcpy (dst, src, len);
> +}
> +
> +/*
> + * Copy at most t bytes of the next message in the ring, into the buffer
> + * at _buf, skipping skip bytes, setting from and protocol if they are not
> + * NULL, returns the actual length of the message, or -1 if there is
> + * nothing to read
> + */
> +static ssize_t
> +v4v_copy_out_offset(struct v4v_ring *r, struct v4v_addr *from,
> + uint32_t * protocol, void *_buf, size_t t, int
consume,
> + size_t skip) V4V_UNUSED;
> +
> +V4V_INLINE static ssize_t
> +v4v_copy_out_offset(struct v4v_ring *r, struct v4v_addr *from,
> + uint32_t * protocol, void *_buf, size_t t, int
consume,
> + size_t skip)
> +{
> + volatile struct v4v_ring_message_header *mh;
> + /* unnecessary cast from void * required by MSVC compiler */
> + uint8_t *buf = (uint8_t *) _buf;
> + uint32_t btr = v4v_ring_bytes_to_read (r);
> + uint32_t rxp = r->rx_ptr;
> + uint32_t bte;
> + uint32_t len;
> + ssize_t ret;
> +
> + buf -= skip;
> +
> + if (btr < sizeof (*mh))
> + return -1;
> +
> + /*
> + * Becuase the message_header is 128 bits long and the ring is 128
bit
> + * aligned, we''re gaurunteed never to wrap
> + */
> + mh = (volatile struct v4v_ring_message_header
*)&r->ring[r->rx_ptr];
> +
> + len = mh->len;
> + if (btr < len)
> + return -1;
> +
> +#if defined(__GNUC__)
> + if (from)
> + *from = mh->source;
> +#else
> + /* MSVC can''t do the above */
> + if (from)
> + memcpy((void *)from, (void *)&(mh->source),
sizeof(struct v4v_addr));
> +#endif
> +
> + if (protocol)
> + *protocol = mh->protocol;
> +
> + rxp += sizeof (*mh);
> + if (rxp == r->len)
> + rxp = 0;
> + len -= sizeof (*mh);
> + ret = len;
> +
> + bte = r->len - rxp;
> +
> + if (bte < len)
> + {
> + if (t < bte)
> + {
> + if (buf)
> + {
> + v4v_memcpy_skip (buf, (void *)
&r->ring[rxp], t, &skip);
> + buf += t;
> + }
> +
> + rxp = 0;
> + len -= bte;
> + t = 0;
> + }
> + else
> + {
> + if (buf)
> + {
> + v4v_memcpy_skip (buf, (void *)
&r->ring[rxp], bte,
> + &skip);
> + buf += bte;
> + }
> + rxp = 0;
> + len -= bte;
> + t -= bte;
> + }
> + }
> +
> + if (buf && t)
> + v4v_memcpy_skip (buf, (void *) &r->ring[rxp], (t
< len) ? t : len,
> + &skip);
> +
> +
> + rxp += V4V_ROUNDUP (len);
> + if (rxp == r->len)
> + rxp = 0;
> +
> + mb ();
> +
> + if (consume)
> + r->rx_ptr = rxp;
> +
> + return ret;
> +}
> +
> +#endif /* !__V4V_UTILS_H__ */
> diff --git a/include/xen/interface/v4v.h b/include/xen/interface/v4v.h
> new file mode 100644
> index 0000000..36ff95c
> --- /dev/null
> +++ b/include/xen/interface/v4v.h
> @@ -0,0 +1,299 @@
>
+/******************************************************************************
> + * V4V
> + *
> + * Version 2 of v2v (Virtual-to-Virtual)
> + *
> + * Copyright (c) 2010, Citrix Systems
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
> + */
> +
> +#ifndef __XEN_PUBLIC_V4V_H__
> +#define __XEN_PUBLIC_V4V_H__
> +
> +/*
> + * Structure definitions
> + */
> +
> +#define V4V_RING_MAGIC 0xA822F72BB0B9D8CC
> +#define V4V_RING_DATA_MAGIC 0x45FE852220B801E4
> +
> +#define V4V_PROTO_DGRAM 0x3c2c1db8
> +#define V4V_PROTO_STREAM 0x70f6a8e5
> +
> +#define V4V_DOMID_INVALID (0x7FFFU)
> +#define V4V_DOMID_NONE V4V_DOMID_INVALID
> +#define V4V_DOMID_ANY V4V_DOMID_INVALID
> +#define V4V_PORT_NONE 0
> +
> +typedef struct v4v_iov
> +{
> + uint64_t iov_base;
> + uint64_t iov_len;
> +} v4v_iov_t;
> +
> +typedef struct v4v_addr
> +{
> + uint32_t port;
> + domid_t domain;
> + uint16_t pad;
> +} v4v_addr_t;
> +
> +typedef struct v4v_ring_id
> +{
> + v4v_addr_t addr;
> + domid_t partner;
> + uint16_t pad;
> +} v4v_ring_id_t;
> +
> +typedef uint64_t v4v_pfn_t;
> +
> +typedef struct
> +{
> + v4v_addr_t src;
> + v4v_addr_t dst;
> +} v4v_send_addr_t;
> +
> +/*
> + * v4v_ring
> + * id:
> + * xen only looks at this during register/unregister
> + * and will fill in id.addr.domain
> + *
> + * rx_ptr: rx pointer, modified by domain
> + * tx_ptr: tx pointer, modified by xen
> + *
> + */
> +struct v4v_ring
> +{
> + uint64_t magic;
> + v4v_ring_id_t id;
> + uint32_t len;
> + uint32_t rx_ptr;
> + uint32_t tx_ptr;
> + uint8_t reserved[32];
> + uint8_t ring[0];
> +};
> +typedef struct v4v_ring v4v_ring_t;
> +
> +#define V4V_RING_DATA_F_EMPTY (1U << 0) /* Ring is empty */
> +#define V4V_RING_DATA_F_EXISTS (1U << 1) /* Ring exists */
> +#define V4V_RING_DATA_F_PENDING (1U << 2) /* Pending interrupt
exists - do not
> + * rely on this field - for
> + * profiling only */
> +#define V4V_RING_DATA_F_SUFFICIENT (1U << 3) /* Sufficient space to
queue
> + * space_required bytes
exists */
> +
> +#if defined(__GNUC__)
> +# define V4V_RING_DATA_ENT_FULLRING
> +# define V4V_RING_DATA_ENT_FULL
> +#else
> +# define V4V_RING_DATA_ENT_FULLRING fullring
> +# define V4V_RING_DATA_ENT_FULL full
> +#endif
> +typedef struct v4v_ring_data_ent
> +{
> + v4v_addr_t ring;
> + uint16_t flags;
> + uint16_t pad;
> + uint32_t space_required;
> + uint32_t max_message_size;
> +} v4v_ring_data_ent_t;
> +
> +typedef struct v4v_ring_data
> +{
> + uint64_t magic;
> + uint32_t nent;
> + uint32_t pad;
> + uint64_t reserved[4];
> + v4v_ring_data_ent_t data[0];
> +} v4v_ring_data_t;
> +
> +struct v4v_info
> +{
> + uint64_t ring_magic;
> + uint64_t data_magic;
> + evtchn_port_t evtchn;
> +};
> +typedef struct v4v_info v4v_info_t;
> +
> +#define V4V_ROUNDUP(a) (((a) +0xf ) & ~0xf)
> +/*
> + * Messages on the ring are padded to 128 bits
> + * Len here refers to the exact length of the data not including the
> + * 128 bit header. The message uses
> + * ((len +0xf) & ~0xf) + sizeof(v4v_ring_message_header) bytes
> + */
> +
> +#define V4V_SHF_SYN (1 << 0)
> +#define V4V_SHF_ACK (1 << 1)
> +#define V4V_SHF_RST (1 << 2)
> +
> +#define V4V_SHF_PING (1 << 8)
> +#define V4V_SHF_PONG (1 << 9)
> +
> +struct v4v_stream_header
> +{
> + uint32_t flags;
> + uint32_t conid;
> +};
> +
> +struct v4v_ring_message_header
> +{
> + uint32_t len;
> + uint32_t pad0;
> + v4v_addr_t source;
> + uint32_t protocol;
> + uint32_t pad1;
> + uint8_t data[0];
> +};
> +
> +typedef struct v4v_viptables_rule
> +{
> + v4v_addr_t src;
> + v4v_addr_t dst;
> + uint32_t accept;
> + uint32_t pad;
> +} v4v_viptables_rule_t;
> +
> +typedef struct v4v_viptables_list
> +{
> + uint32_t start_rule;
> + uint32_t nb_rules;
> + struct v4v_viptables_rule rules[0];
> +} v4v_viptables_list_t;
> +
> +/*
> + * HYPERCALLS
> + */
> +
> +#define V4VOP_register_ring 1
> +/*
> + * Registers a ring with Xen, if a ring with the same v4v_ring_id exists,
> + * this ring takes its place, registration will not change tx_ptr
> + * unless it is invalid
> + *
> + * do_v4v_op(V4VOP_unregister_ring,
> + * v4v_ring, XEN_GUEST_HANDLE(v4v_pfn),
> + * npage, 0)
> + */
> +
> +
> +#define V4VOP_unregister_ring 2
> +/*
> + * Unregister a ring.
> + *
> + * v4v_hypercall(V4VOP_send, v4v_ring, NULL, 0, 0)
> + */
> +
> +#define V4VOP_send 3
> +/*
> + * Sends len bytes of buf to dst, giving src as the source address (xen
will
> + * ignore src->domain and put your domain in the actually message), xen
> + * first looks for a ring with id.addr==dst and id.partner==sending_domain
> + * if that fails it looks for id.addr==dst and id.partner==DOMID_ANY.
> + * protocol is the 32 bit protocol number used from the message
> + * most likely V4V_PROTO_DGRAM or STREAM. If insufficient space exists
> + * it will return -EAGAIN and xen will twing the V4V_INTERRUPT when
> + * sufficient space becomes available
> + *
> + * v4v_hypercall(V4VOP_send,
> + * v4v_send_addr_t addr,
> + * void* buf,
> + * uint32_t len,
> + * uint32_t protocol)
> + */
> +
> +
> +#define V4VOP_notify 4
> +/* Asks xen for information about other rings in the system
> + *
> + * ent->ring is the v4v_addr_t of the ring you want information on
> + * the same matching rules are used as for V4VOP_send.
> + *
> + * ent->space_required if this field is not null xen will check
> + * that there is space in the destination ring for this many bytes
> + * of payload. If there is it will set the V4V_RING_DATA_F_SUFFICIENT
> + * and CANCEL any pending interrupt for that ent->ring, if insufficient
> + * space is available it will schedule an interrupt and the flag will
> + * not be set.
> + *
> + * The flags are set by xen when notify replies
> + * V4V_RING_DATA_F_EMPTY ring is empty
> + * V4V_RING_DATA_F_PENDING interrupt is pending - don''t rely on
this
> + * V4V_RING_DATA_F_SUFFICIENT sufficient space for space_required is there
> + * V4V_RING_DATA_F_EXISTS ring exists
> + *
> + * v4v_hypercall(V4VOP_notify,
> + * XEN_GUEST_HANDLE(v4v_ring_data_ent) ent,
> + * NULL, nent, 0)
> + */
> +
> +#define V4VOP_sendv 5
> +/*
> + * Identical to V4VOP_send except rather than buf and len it takes
> + * an array of v4v_iov and a length of the array.
> + *
> + * v4v_hypercall(V4VOP_sendv,
> + * v4v_send_addr_t addr,
> + * v4v_iov iov,
> + * uint32_t niov,
> + * uint32_t protocol)
> + */
> +
> +#define V4VOP_viptables_add 6
> +/*
> + * Insert a filtering rules after a given position.
> + *
> + * v4v_hypercall(V4VOP_viptables_add,
> + * v4v_viptables_rule_t rule,
> + * NULL,
> + * uint32_t position, 0)
> + */
> +
> +#define V4VOP_viptables_del 7
> +/*
> + * Delete a filtering rules at a position or the rule
> + * that matches "rule".
> + *
> + * v4v_hypercall(V4VOP_viptables_del,
> + * v4v_viptables_rule_t rule,
> + * NULL,
> + * uint32_t position, 0)
> + */
> +
> +#define V4VOP_viptables_list 8
> +/*
> + * Delete a filtering rules at a position or the rule
> + * that matches "rule".
> + *
> + * v4v_hypercall(V4VOP_viptables_list,
> + * v4v_vitpables_list_t list,
> + * NULL, 0, 0)
> + */
> +
> +#define V4VOP_info 9
> +/*
> + * v4v_hypercall(V4VOP_info,
> + * XEN_GUEST_HANDLE(v4v_info_t) info,
> + * NULL, 0, 0)
> + */
> +
> +#endif /* __XEN_PUBLIC_V4V_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-set-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index a890804..395f6cd 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -59,6 +59,7 @@
> #define __HYPERVISOR_physdev_op 33
> #define __HYPERVISOR_hvm_op 34
> #define __HYPERVISOR_tmem_op 38
> +#define __HYPERVISOR_v4v_op 39
>
> /* Architecture-specific hypercall definitions. */
> #define __HYPERVISOR_arch_0 48
> diff --git a/include/xen/v4vdev.h b/include/xen/v4vdev.h
> new file mode 100644
> index 0000000..a30b608
> --- /dev/null
> +++ b/include/xen/v4vdev.h
> @@ -0,0 +1,34 @@
> +#ifndef __V4V_DGRAM_H__
> +#define __V4V_DGRAM_H__
> +
> +struct v4v_dev
> +{
> + void *buf;
> + size_t len;
> + int flags;
> + v4v_addr_t *addr;
> +};
> +
> +struct v4v_viptables_rule_pos
> +{
> + struct v4v_viptables_rule* rule;
> + int position;
> +};
> +
> +#define V4V_TYPE ''W''
> +
> +#define V4VIOCSETRINGSIZE _IOW (V4V_TYPE, 1, uint32_t)
> +#define V4VIOCBIND _IOW (V4V_TYPE, 2, v4v_ring_id_t)
> +#define V4VIOCGETSOCKNAME _IOW (V4V_TYPE, 3, v4v_ring_id_t)
> +#define V4VIOCGETPEERNAME _IOW (V4V_TYPE, 4, v4v_addr_t)
> +#define V4VIOCCONNECT _IOW (V4V_TYPE, 5, v4v_addr_t)
> +#define V4VIOCGETCONNECTERR _IOW (V4V_TYPE, 6, int)
> +#define V4VIOCLISTEN _IOW (V4V_TYPE, 7, uint32_t) /*unused args */
> +#define V4VIOCACCEPT _IOW (V4V_TYPE, 8, v4v_addr_t)
> +#define V4VIOCSEND _IOW (V4V_TYPE, 9, struct v4v_dev)
> +#define V4VIOCRECV _IOW (V4V_TYPE, 10, struct v4v_dev)
> +#define V4VIOCVIPTABLESADD _IOW (V4V_TYPE, 11, struct
v4v_viptables_rule_pos)
> +#define V4VIOCVIPTABLESDEL _IOW (V4V_TYPE, 12, struct
v4v_viptables_rule_pos)
> +#define V4VIOCVIPTABLESLIST _IOW (V4V_TYPE, 13, struct v4v_viptables_list)
> +
> +#endif
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel