This converts the virtio_net driver from Rusty's
draft III implementation to use the generic bus.
Since every device needs to get its MAC address from
somewhere, we read it from the virtio configuration
space. It's up to the virtio host to fill in valid
data here.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Index: linux-2.6/drivers/net/virtio_net.c
==================================================================---
linux-2.6.orig/drivers/net/virtio_net.c
+++ linux-2.6/drivers/net/virtio_net.c
@@ -43,7 +43,7 @@ struct virtnet_info
 
 static bool skb_xmit_done(struct virtio_device *vdev)
 {
-	struct virtnet_info *vi = vdev->priv;
+	struct virtnet_info *vi = vdev->dev.driver_data;
 
 	/* In case we were waiting for output buffers. */
 	netif_wake_queue(vi->ndev);
@@ -100,7 +100,7 @@ static void try_fill_recv(struct virtnet
 
 static bool skb_recv_done(struct virtio_device *vdev)
 {
-	struct virtnet_info *vi = vdev->priv;
+	struct virtnet_info *vi = vdev->dev.driver_data;
 
 	netif_rx_schedule(vi->ndev);
 	/* Suppress further interrupts. */
@@ -217,21 +217,17 @@ static int virtnet_close(struct net_devi
 	return 0;
 }
 
-static struct virtio_driver_ops virtnet_ops = {
-	.in = skb_recv_done,
-	.out = skb_xmit_done,
-};
-
-struct net_device *virtnet_probe(struct virtio_device *vdev,
-				 const u8 mac[ETH_ALEN])
+static int virtnet_probe(struct device *self)
 {
+	struct virtio_device *vdev = to_virtio_dev(self);
+	const u8 *mac = (void *)&vdev->config.driver;
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
 
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
 	if (!dev)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	SET_MODULE_OWNER(dev);
 
@@ -242,13 +238,12 @@ struct net_device *virtnet_probe(struct 
 	dev->poll = virtnet_poll;
 	dev->hard_start_xmit = start_xmit;
 	dev->weight = 16;
-	SET_NETDEV_DEV(dev, vdev->dev);
+	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	vi = netdev_priv(dev);
 	vi->vdev = vdev;
 	vi->ndev = dev;
-	vdev->priv = vi;
-	vdev->driver_ops = &virtnet_ops;
+	vdev->dev.driver_data = vi;
 	skb_queue_head_init(&vi->recv);
 	skb_queue_head_init(&vi->send);
 
@@ -258,20 +253,49 @@ struct net_device *virtnet_probe(struct 
 		goto free;
 	}
 	pr_debug("virtnet: registered device %s\n", dev->name);
-	return dev;
+	return 0;
 
 free:
 	free_netdev(dev);
-	return ERR_PTR(err);
+	return err;
 }
-EXPORT_SYMBOL_GPL(virtnet_probe);
 
-void virtnet_remove(struct net_device *dev)
+int virtnet_remove(struct device *dev)
 {
-	unregister_netdev(dev);
-	free_netdev(dev);
+	struct virtnet_info *vi = dev->driver_data;
+	unregister_netdev(vi->ndev);
+	free_netdev(vi->ndev);
+	return 0;
+}
+
+static struct virtio_device_id virtnet_device_ids[] = {
+	{ .device_type = "virtnet" },
+	{ },
+};
+
+static struct virtio_driver virtio_net = {
+	.ids = virtnet_device_ids,
+	.drv = {
+		.name = "virtnet",
+		.owner = THIS_MODULE,
+		.probe = virtnet_probe,
+		.remove = virtnet_remove,
+	},
+	.in = skb_recv_done,
+	.out = skb_xmit_done,
+};
+
+static int __init virtnet_init(void)
+{
+	return virtio_driver_register(&virtio_net);
+}
+module_init(virtnet_init);
+
+static void __exit virtnet_exit(void)
+{
+	return virtio_driver_unregister(&virtio_net);
 }
-EXPORT_SYMBOL_GPL(virtnet_remove);
+module_exit(virtnet_exit);
 
 MODULE_DESCRIPTION("Virtio network driver");
 MODULE_LICENSE("GPL");
Index: linux-2.6/include/linux/virtio_net.h
==================================================================---
linux-2.6.orig/include/linux/virtio_net.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _LINUX_VIRTIO_NET_H
-#define _LINUX_VIRTIO_NET_H
-#include <linux/types.h>
-#include <linux/etherdevice.h>
-struct net_device;
-struct virtio_device;
-
-struct net_device *virtnet_probe(struct virtio_device *vdev,
-				 const u8 mac[ETH_ALEN]);
-void virtnet_remove(struct net_device *dev);
-
-#endif /* _LINUX_VIRTIO_NET_H */
--
Similar to the virtio_net patch, this converts the
block driver from draft III. The virtio config space
here contains the block device size and the
max_hw_segments setting.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
Index: linux-2.6/drivers/block/virtio_blk.c
==================================================================---
linux-2.6.orig/drivers/block/virtio_blk.c
+++ linux-2.6/drivers/block/virtio_blk.c
@@ -66,7 +66,7 @@ static bool finish(struct virtio_blk *vb
  * they might still have read access after we free them. */
 static bool blk_out_done(struct virtio_device *vdev)
 {
-	struct virtio_blk *vblk = vdev->priv;
+	struct virtio_blk *vblk = vdev->dev.driver_data;
 	struct virtblk_req *vbr;
 	unsigned int len, finished = 0;
 	unsigned long flags;
@@ -86,7 +86,7 @@ static bool blk_out_done(struct virtio_d
 
 static bool blk_in_done(struct virtio_device *vdev)
 {
-	struct virtio_blk *vblk = vdev->priv;
+	struct virtio_blk *vblk = vdev->dev.driver_data;
 	struct virtblk_req *vbr;
 	unsigned int len, finished = 0;
 	unsigned long flags;
@@ -290,22 +290,20 @@ static int virtblk_ioctl(struct inode *i
 			      (void __user *)data);
 }
 
-static struct virtio_driver_ops virtblk_ops = {
-	.in = blk_in_done,
-	.out = blk_out_done,
-};
-
-
 static struct block_device_operations virtblk_fops = {
 	.ioctl = virtblk_ioctl,
 	.owner = THIS_MODULE,
 };
 
-struct gendisk *virtblk_probe(struct virtio_device *vdev)
+static int virtblk_probe(struct device *dev)
 {
+	struct virtio_device *vdev = to_virtio_dev(dev);
 	struct virtio_blk *vblk;
+	struct virtio_blk_config *config;
 	int err, major;
 
+	config = (void *)&vdev->config.driver;
+
 	vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
 	if (!vblk) {
 		err = -ENOMEM;
@@ -315,8 +313,7 @@ struct gendisk *virtblk_probe(struct vir
 	INIT_LIST_HEAD(&vblk->reqs);
 	spin_lock_init(&vblk->lock);
 	vblk->vdev = vdev;
-	vdev->priv = vblk;
-	vdev->driver_ops = &virtblk_ops;
+	vdev->dev.driver_data = vblk;
 
 	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
 	if (!vblk->pool) {
@@ -350,10 +347,12 @@ struct gendisk *virtblk_probe(struct vir
 	vblk->disk->fops = &virtblk_fops;
 
 	blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
+	set_capacity(vblk->disk, config->capacity);
+	blk_queue_max_hw_segments(vblk->disk->queue,
config->max_hw_segments);
 
 	/* Caller can do blk_queue_max_hw_segments(), set_capacity()
 	 * etc then add_disk(). */
-	return vblk->disk;
+	return 0;
 
 out_put_disk:
 	put_disk(vblk->disk);
@@ -364,13 +363,12 @@ out_mempool:
 out_free_vblk:
 	kfree(vblk);
 out:
-	return ERR_PTR(err);
+	return err;
 }
-EXPORT_SYMBOL_GPL(virtblk_probe);
 
-void virtblk_remove(struct gendisk *disk)
+static int virtblk_remove(struct device *dev)
 {
-	struct virtio_blk *vblk = disk->private_data;
+	struct virtio_blk *vblk = dev->driver_data;
 	int major = vblk->disk->major;
 
 	BUG_ON(!list_empty(&vblk->reqs));
@@ -379,5 +377,36 @@ void virtblk_remove(struct gendisk *disk
 	unregister_blkdev(major, "virtblk");
 	mempool_destroy(vblk->pool);
 	kfree(vblk);
+	return 0;
 }
-EXPORT_SYMBOL_GPL(virtblk_remove);
+
+static struct virtio_device_id virtblk_ids[] = {
+	{ .device_type = "block" },
+	{ },
+};
+
+static struct virtio_driver virtblk_driver = {
+	.drv = {
+		.name = "virtblk",
+		.owner = THIS_MODULE,
+		.probe = virtblk_probe,
+		.remove = virtblk_remove,
+	},
+	.ids = virtblk_ids,
+	.in = blk_in_done,
+	.out = blk_out_done,
+};
+
+static int __init virtblk_init(void)
+{
+	return virtio_driver_register(&virtblk_driver);
+}
+module_init(virtblk_init);
+
+static void __exit virtblk_exit(void)
+{
+	virtio_driver_unregister(&virtblk_driver);
+}
+module_exit(virtblk_exit);
+
+MODULE_LICENSE("GPL");
Index: linux-2.6/include/linux/virtio_blk.h
==================================================================---
linux-2.6.orig/include/linux/virtio_blk.h
+++ linux-2.6/include/linux/virtio_blk.h
@@ -29,11 +29,10 @@ struct virtio_blk_inhdr
 	unsigned char status;
 };
 
-#ifdef __KERNEL__
-struct gendisk;
-struct virtio_device;
+struct virtio_blk_config
+{
+	unsigned long long capacity;
+	unsigned long max_hw_segments;
+};
 
-struct gendisk *virtblk_probe(struct virtio_device *vdev);
-void virtblk_remove(struct gendisk *disk);
-#endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_BLK_H */
--
arnd@arndb.de
2007-Jul-06  06:00 UTC
[RFC 4/4] Example virtio host implementation, using chardev
Since I still haven't set up lguest and don't have a kvm
capable machine, I decided to write my own host implementation,
based on Rusty's read/write based lguest host from virtio
draft III.
Now this does _not_ use any hypervisor at all, but instead
runs expects a user application to do the actual device
emulation, communicating through a character device.
There are a number of problems with this that are not solved
yet, so please regard this code as demonstration only and
don't try to run it.
Locking is one problem, since the code I based this on
expected to be able to hold a spinlock for the duration of
the hcall. This is not possible during copy_{to,from}_user,
so bad things can happen if a driver detaches a buffer
while it's being accessed from user space.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Index: linux-2.6/drivers/char/virtiosrv.c
==================================================================--- /dev/null
+++ linux-2.6/drivers/char/virtiosrv.c
@@ -0,0 +1,573 @@
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioctl.h>
+#include <linux/highmem.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+#include <linux/virtio.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define VIRTIOSRV_MAX_SGLEN 18
+#define VIRTIOSRV_MAX_INBUFS 16
+
+struct buf_head
+{
+	unsigned long len;
+};
+
+struct virtiosrv_inbuf
+{
+	unsigned int num;
+	unsigned int used;
+	bool finished;
+	struct scatterlist sg[VIRTIOSRV_MAX_SGLEN];
+	void *data;
+};
+
+struct virtiosrv_device {
+	struct virtio_device vdev;
+
+	/* Outgoing */
+	bool out_running;
+	unsigned out_junk;
+	unsigned sg_elem, sg_num, sg_off, sg_done;
+	void *out_data;
+
+	/* This is the first entry of the scatter list. */
+	struct buf_head out_head;
+	struct scatterlist sg[1+VIRTIOSRV_MAX_SGLEN];
+
+	/* Incoming */
+	bool in_running;
+	unsigned discard;
+	unsigned in_sg;
+	unsigned in_done;
+	struct virtiosrv_inbuf *curr_in;
+	struct buf_head in_head;
+		struct virtiosrv_inbuf in[VIRTIOSRV_MAX_INBUFS];
+
+	wait_queue_head_t in_wq;
+	wait_queue_head_t out_wq;
+
+	spinlock_t lock;
+	struct virtiosrv_config_data {
+		char device_type[16];
+		char device_id[16];
+	} *data;
+};
+
+static inline struct virtiosrv_device *to_virtiosrv_dev(struct virtio_device
*vdev)
+{
+	return container_of(vdev, struct virtiosrv_device, vdev);
+}
+
+static void *virtiosrv_get_outbuf(struct virtio_device *vdev, unsigned int
*len)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+	void *ret;
+
+	spin_lock(&sdev->lock);
+	if (sdev->sg_elem == sdev->sg_num) {
+		ret = sdev->out_data;
+		sdev->sg_num = 0;
+	} else
+		ret = NULL;
+	spin_unlock(&sdev->lock);
+
+	return ret;
+}
+
+static void *virtiosrv_get_inbuf(struct virtio_device *vdev, unsigned int *len)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+	unsigned int i;
+	void *ret = NULL;
+
+	spin_lock(&sdev->lock);
+	for (i = 0; i < ARRAY_SIZE(sdev->in); i++) {
+		if (sdev->in[i].finished) {
+			ret = sdev->in[i].data;
+			*len = sdev->in[i].used;
+			sdev->in[i].num = 0;
+			sdev->in[i].finished = false;
+			break;
+		}
+	}
+	spin_unlock(&sdev->lock);
+	return ret;
+}
+
+static struct virtiosrv_inbuf *find_inbuf(struct virtiosrv_device *sdev)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(sdev->in); i++)
+		if (sdev->in[i].num)
+			return &sdev->in[i];
+
+	return NULL;
+}
+
+static unsigned long sg_len(const struct scatterlist sg[],
+			    unsigned int num)
+{
+	unsigned long len, i;
+
+	for (i = len = 0; i < num; i++)
+		len += sg[i].length;
+	return len;
+}
+
+static unsigned long virtiosrv_add_outbuf(struct virtio_device *vdev,
+				       const struct scatterlist sg[],
+				       unsigned int num,
+				       void *data)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+
+	BUG_ON(num > VIRTIOSRV_MAX_SGLEN);
+	BUG_ON(num == 0);
+
+	spin_lock(&sdev->lock);
+	/* We force them into single-file */
+	if (sdev->sg_num) {
+		spin_unlock(&sdev->lock);
+		return -ENOSPC;
+	}
+
+	sdev->sg_elem = sdev->sg_off = sdev->sg_done = 0;
+	sdev->sg_num = 1 + num;
+	sdev->out_head.len = sg_len(sg, num);
+	sdev->out_data = data;
+	memcpy(sdev->sg + 1, sg, num * sizeof(*sg));
+
+	/* First descriptor points at metadata */
+	sdev->sg[0].page =
pfn_to_page(virt_to_phys(&sdev->out_head)/PAGE_SIZE);
+	sdev->sg[0].offset = offset_in_page(&sdev->out_head);
+	sdev->sg[0].length = sizeof(sdev->out_head);
+	spin_unlock(&sdev->lock);
+
+	/* With only one, id is always 0 */
+	return 0;
+}
+
+static unsigned long virtiosrv_add_inbuf(struct virtio_device *vdev,
+				      struct scatterlist sg[],
+				      unsigned int num,
+				      void *data)
+
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+	unsigned int i;
+
+	BUG_ON(num > VIRTIOSRV_MAX_SGLEN);
+	BUG_ON(num == 0);
+
+	spin_lock(&sdev->lock);
+	/* Find empty inbuf. */
+	for (i = 0; sdev->in[i].num != 0; i++) {
+		if (i == ARRAY_SIZE(sdev->in) - 1) {
+			spin_unlock(&sdev->lock);
+			return -ENOSPC;
+		}
+	}
+
+	sdev->in[i].num = num;
+	sdev->in[i].finished = false;
+	sdev->in[i].data = data;
+	memcpy(sdev->in[i].sg, sg, num * sizeof(*sg));
+	spin_unlock(&sdev->lock);
+
+	return i;
+}
+
+static void virtiosrv_sync(struct virtio_device *vdev, enum virtio_dir inout)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+
+	if (sdev->out_running)
+		wake_up_all(&sdev->out_wq);
+	if (sdev->in_running)
+		wake_up_all(&sdev->in_wq);
+}
+
+static void virtiosrv_detach_outbuf(struct virtio_device *vdev, unsigned long
id)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+
+	spin_lock(&sdev->lock);
+	BUG_ON(id != 0);
+	BUG_ON(sdev->sg_num == 0);
+
+	/* Already started sending?  Fill with junk. */
+	if (sdev->sg_done > 0)
+		sdev->out_junk = sdev->out_head.len - sdev->sg_done;
+	else
+		sdev->sg_num = 0;
+	spin_unlock(&sdev->lock);
+}
+
+static void virtiosrv_detach_inbuf(struct virtio_device *vdev, unsigned long
id)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+
+	spin_lock(&sdev->lock);
+	BUG_ON(id >= VIRTIOSRV_MAX_INBUFS);
+	BUG_ON(!sdev->in[id].num);
+
+	/* Detach while being used?  Discard the rest. */
+	if (sdev->curr_in == &sdev->in[id]) {
+		sdev->discard = sdev->in_head.len - sdev->in_done;
+		sdev->curr_in = NULL;
+	}
+	sdev->in[id].num = 0;
+	spin_unlock(&sdev->lock);
+}
+
+static bool virtiosrv_restart_in(struct virtio_device *vdev)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+
+	spin_lock(&sdev->lock);
+	BUG_ON(sdev->in_running);
+	sdev->in_running = true;
+	spin_unlock(&sdev->lock);
+
+	wake_up_all(&sdev->in_wq);
+	return true;
+}
+
+static bool virtiosrv_restart_out(struct virtio_device *vdev)
+{
+	struct virtiosrv_device *sdev = to_virtiosrv_dev(vdev);
+
+	spin_lock(&sdev->lock);
+	BUG_ON(sdev->out_running);
+	sdev->out_running = true;
+	spin_unlock(&sdev->lock);
+
+	wake_up_all(&sdev->out_wq);
+	return true;
+}
+
+static struct virtio_ops virtiosrv_ops = {
+	.add_outbuf = virtiosrv_add_outbuf,
+	.add_inbuf = virtiosrv_add_inbuf,
+	.sync = virtiosrv_sync,
+	.get_outbuf = virtiosrv_get_outbuf,
+	.get_inbuf = virtiosrv_get_inbuf,
+	.detach_outbuf = virtiosrv_detach_outbuf,
+	.detach_inbuf = virtiosrv_detach_inbuf,
+	.restart_in = virtiosrv_restart_in,
+	.restart_out = virtiosrv_restart_out,
+};
+
+static struct device virtiosrv_device = {
+	.bus_id = "virtio",
+};
+
+static int virtiosrv_register_device(struct virtiosrv_device *sdev, struct file
*file)
+{
+	/*
+	 * make sure only one thread gets to do the setup, the mutex
+	 * protects both the write to the file->private_data pointer
+	 * and the idr.
+	 */
+	static DEFINE_MUTEX(setup_mutex);
+	int ret;
+
+	mutex_lock(&setup_mutex);
+	ret = -EBUSY;
+	if (file->private_data)
+		goto out;
+
+	ret = virtio_device_register(&sdev->vdev);
+	if (ret)
+		goto out;
+	file->private_data = sdev;
+out:
+	mutex_unlock(&setup_mutex);
+	return ret;
+}
+
+static int virtiosrv_setup(struct file *file, const void __user *buf, size_t
len)
+{
+	struct virtiosrv_device *sdev;
+	struct virtio_device *vdev;
+	int ret;
+
+	/* allocate device */
+	if (len != sizeof (vdev->config))
+		return -EINVAL;
+	ret = -ENOMEM;
+	sdev = kzalloc(sizeof (*sdev), GFP_KERNEL);
+	if (!sdev)
+		goto out;
+	vdev = &sdev->vdev;
+
+	/* set up device data */
+	ret = -EFAULT;
+	if (copy_from_user(&vdev->config, buf, len))
+		goto out;
+	vdev->id.device_type = sdev->data->device_type;
+	vdev->ops = &virtiosrv_ops;
+	vdev->dev.parent = &virtiosrv_device;
+	sdev->data = (void*)&vdev->config.host;
+	snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "virtio:%s",
sdev->data->device_id);
+	init_waitqueue_head(&sdev->in_wq);
+	init_waitqueue_head(&sdev->out_wq);
+	spin_lock_init(&sdev->lock);
+
+	ret = virtiosrv_register_device(sdev, file);
+	if (ret)
+		goto out;
+	return len;
+
+out:
+	/* something went wrong, clean up */
+	kfree(sdev);
+	return ret;
+}
+
+static int virtiosrv_close(struct inode *inode, struct file *file)
+{
+	struct virtiosrv_device *sdev = file->private_data;
+
+	if (sdev)
+		return 0;
+
+	file->private_data = NULL;
+	virtio_device_unregister(&sdev->vdev);
+	kfree(sdev);
+	return 0;
+}
+
+/**
+ * virtiosrv_read - read one data element into the virtio device
+ *
+ * TODO: locking against virtio_ops
+ */
+static ssize_t virtiosrv_read(struct file *file, char __user *buf,
+				size_t len, loff_t *off)
+{
+	struct virtiosrv_device *sdev = file->private_data;
+	struct virtio_driver *vdrv = to_virtio_drv(sdev->vdev.dev.driver);
+	struct scatterlist *sg;
+	void *page;
+	int err;
+
+	if (!sdev)
+		return -ENODEV;
+
+	/* no data available at device, user needs to wait */
+	if ((file->f_flags & O_NONBLOCK) && (sdev->sg_elem >=
sdev->sg_num))
+		return -EAGAIN;
+	err = wait_event_interruptible(sdev->out_wq,
+					sdev->sg_elem >= sdev->sg_num);
+	if (err)
+		return -ERESTARTSYS;
+
+	/*
+	 * user requested too small buffer, don't lose length
+	 * information
+	 */
+	sg = &sdev->sg[sdev->sg_elem];
+	if (sg->length > len)
+		return -ENOSPC;
+
+	/*
+	 * If buffer was detached while we're reading, send
+	 * junk after header
+	 */
+	if (sdev->sg_elem > 0 && sdev->out_junk) {
+		if (sdev->out_junk > len)
+			return -ENOSPC;
+
+		while (sdev->out_junk) {
+			err = put_user(42, buf);
+			if (err)
+				return -EFAULT;
+			buf++;
+			sdev->out_junk--;
+		}
+		sdev->sg_num = 0;
+		return sdev->out_junk;
+	}
+
+	/* copy one sg element to user space */
+	page = kmap(sg->page);
+	err = copy_to_user(buf, page + sg->offset, sg->length);
+	kunmap(page);
+	if (err)
+		return -EFAULT;
+
+	sdev->sg_done += sg->length;
+	sdev->sg_elem++;
+
+	if (sdev->sg_elem == sdev->sg_num)
+		sdev->out_running = vdrv->out(&sdev->vdev);
+
+	return sg->length;
+}
+
+/**
+ * virtiosrv_write - write data from the virtio device
+ *
+ * TODO: locking against virtio_ops
+ */
+static ssize_t virtiosrv_write(struct file *file, const char __user *buf,
+				size_t len, loff_t *off)
+{
+	struct virtiosrv_device *sdev = file->private_data;
+	ssize_t ret;
+	if (!sdev)
+		return virtiosrv_setup(file, buf, len);
+
+	while (unlikely(sdev->discard)) {
+		size_t discard = min_t(size_t, sdev->discard, len);
+		sdev->discard -= discard;
+		return discard;
+	}
+
+	ret = 0;
+	if (!sdev->curr_in) {
+		/* Haven't got the whole head yet?  Try reading more. */
+		if (sdev->in_done < sizeof(sdev->in_head)) {
+			size_t head_len;
+			head_len = sizeof (sdev->in_head) - sdev->in_done;
+			head_len = min(len, head_len);
+
+			ret = copy_from_user(&sdev->in_head + sdev->in_done,
+						buf, head_len);
+			if (ret)
+				return -EFAULT;
+			sdev->in_done += head_len;
+			len -= head_len;
+			if (sdev->in_done < sizeof(sdev->in_head))
+				return head_len;
+			buf += head_len;
+			ret = head_len;
+		}
+
+		/* try to find a free inbuf, wait if necessary */
+		sdev->curr_in = find_inbuf(sdev);
+		if (!sdev->curr_in) {
+			int err;
+			if (file->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			err = wait_event_interruptible(sdev->in_wq,
+				(sdev->curr_in = find_inbuf(sdev)) != NULL);
+			if (err)
+				return -ERESTARTSYS;
+		}
+
+		sdev->in_sg = 0;
+		sdev->in_done = 0;
+	}
+
+	/* Continue reading this buffer.  If it fills, we discard the rest. */
+	do {
+		struct scatterlist *sg;
+		void *page;
+		unsigned read;
+
+		sg = &sdev->curr_in->sg[sdev->in_sg];
+
+		read = min_t(size_t, sg->length, len);
+		if (sdev->in_head.len - sdev->in_done < read)
+			read = sdev->in_head.len - sdev->in_done;
+
+		page = kmap(sg->page);
+		ret = copy_from_user(page + sg->offset, buf, read);
+		kunmap(page);
+		if (ret)
+			return -EFAULT;
+
+		sg->offset += read;
+		sg->length -= read;
+		sdev->in_done += read;
+		buf += read;
+		ret += read;
+		len -= read;
+
+		if (sdev->in_done == sdev->in_head.len)
+			break;
+
+		if (sg->length != 0)
+			return ret;
+
+		sdev->in_sg++;
+	} while (sdev->in_sg < sdev->curr_in->num);
+
+	/* We finished the buffer: may need to discard some more data. */
+	if (sdev->in_done < sdev->in_head.len)
+		sdev->discard = sdev->in_head.len - sdev->in_done;
+
+	sdev->curr_in->finished = true;
+	sdev->curr_in->used = sdev->in_done;
+	sdev->curr_in = NULL;
+	sdev->in_done = 0;
+	return ret;
+}
+
+static unsigned int virtiosrv_poll(struct file *file,
+			struct poll_table_struct *wait)
+{
+	struct virtiosrv_device *sdev = file->private_data;
+	int mask = 0;
+
+	if (!sdev)
+		return 0;
+
+	poll_wait(file, &sdev->in_wq, wait);
+	poll_wait(file, &sdev->out_wq, wait);
+	if (sdev->sg_elem < sdev->sg_num)
+		mask |= POLLIN | POLLRDNORM;
+	if (find_inbuf(sdev))
+		mask |= POLLOUT | POLLWRNORM;
+
+	return mask;
+}
+
+static struct file_operations virtiosrv_fops = {
+	.owner = THIS_MODULE,
+	.open = nonseekable_open,
+	.release = virtiosrv_close,
+	.read = virtiosrv_read,
+	.write = virtiosrv_write,
+	.poll = virtiosrv_poll,
+};
+
+static struct miscdevice virtiosrv = {
+	.name = "virtiosrv",
+	.fops = &virtiosrv_fops,
+};
+
+static int __init virtiosrv_init(void)
+{
+	int ret;
+	ret = device_register(&virtiosrv_device);
+	if (ret)
+		return ret;
+
+	ret = misc_register(&virtiosrv);
+	if (ret)
+		device_unregister(&virtiosrv_device);
+
+	return ret;
+}
+module_init(virtiosrv_init);
+
+static void __exit virtiosrv_exit(void)
+{
+	misc_deregister(&virtiosrv);
+	device_unregister(&virtiosrv_device);
+}
+module_exit(virtiosrv_exit);
Index: linux-2.6/drivers/char/Makefile
==================================================================---
linux-2.6.orig/drivers/char/Makefile
+++ linux-2.6/drivers/char/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_IPMI_HANDLER)	+= ipmi/
 
 obj-$(CONFIG_HANGCHECK_TIMER)	+= hangcheck-timer.o
 obj-$(CONFIG_TCG_TPM)		+= tpm/
+obj-$(CONFIG_VIRTIO_SERVER)	+= virtiosrv.o
 
 # Files generated that shall be removed upon make clean
 clean-files := consolemap_deftbl.c defkeymap.c
--
This adds a new bus_type for virtio that is intended to
be completely agnostic of the underlying host transport
and the upper-level protocol.
Device drivers and host drivers register here, and operations
are provided that let a device driver talk to the device
emulation in the hypervisor without knowing the kind of hypervisor.
Module autoloading through udev should also work.
Loosely based on Rusty's Virtio draft III.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
--- /dev/null
+++ linux-2.6/include/linux/virtio.h
@@ -0,0 +1,174 @@
+#ifndef _LINUX_VIRTIO_H
+#define _LINUX_VIRTIO_H
+#include <linux/types.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+
+/**
+ * virtio_device_id - match a virtio device to a driver
+ * @device_type: string identifying the virtio interface.
+ * @driver_data: used internally by the driver.
+ */
+struct virtio_device_id {
+	const char *device_type;
+	unsigned long driver_data;
+};
+
+/**
+ * virtio_config - virtual device configuration.
+ * @host: structured data interpreted by the host driver.
+ * @driver: structured data interpreted by the device driver.
+ *
+ * The configuration space is what gets used to tell a driver
+ * about the device, e.g. MAC address or block device size.
+ * All fields in here are read-only in the virtual machine,
+ * they are set up by the host.
+ *
+ * The host part remains opaque to the device driver, it can
+ * contain e.g. lguest device index numbers or part of a PCI
+ * configuration space.
+ *
+ * The 256 bytes total intentionally match the size of the
+ * legacy PCI config registers, but the driver should not
+ * expect the layout to be derived from PCI.
+ *
+ * Every virtio_driver should define a data structure for the
+ * virtio_config->driver data, which becomes part of its ABI.
+ */
+struct virtio_config {
+	const char host[128];
+	const char driver[128];
+};
+
+/**
+ * virtio_device - description and routines to drive a virtual device.
+ * @id: identifier for the device type
+ * @dev: the device itself.
+ * @ops: the operations for this virtual device.
+ * @driver_ops: set by the driver for callbacks.
+ * @config: driver specific configuration data.
+ * @config_size: size of the @config data.
+ */
+struct virtio_device {
+	struct virtio_device_id id;
+	struct device dev;
+	struct virtio_ops *ops;
+	struct virtio_config config;
+};
+
+static inline struct virtio_device *to_virtio_dev(struct device *dev)
+{
+	return container_of(dev, struct virtio_device, dev);
+}
+
+/**
+ * virtio_driver - driver callbacks for a virtual device.
+ * @device_table: identifiers for compatible devices,
+ *  zero-terminated array
+ * @drv: the driver as known to the driver core.
+ * @in: inbufs have been completed.
+ *	Usually called from an interrupt handler.
+ *	Return false to suppress further inbuf callbacks.
+ * @out: outbufs have been completed.
+ *	Usually called from an interrupt handler.
+ *	Return false to suppress further outbuf callbacks.
+ */
+struct virtio_driver {
+	struct virtio_device_id *ids;
+	struct device_driver drv;
+	bool (*in)(struct virtio_device *dev);
+	bool (*out)(struct virtio_device *dev);
+};
+
+static inline struct virtio_driver *to_virtio_drv(struct device_driver *drv)
+{
+	return container_of(drv, struct virtio_driver, drv);
+}
+
+int virtio_device_register(struct virtio_device *vdev);
+void virtio_device_unregister(struct virtio_device *vdev);
+int virtio_driver_register(struct virtio_driver *vdrv);
+void virtio_driver_unregister(struct virtio_driver *vdrv);
+
+enum virtio_dir {
+	VIRTIO_IN = 0x1,
+	VIRTIO_OUT = 0x2,
+};
+
+/**
+ * virtio_ops - virtio abstraction layer
+ * @add_outbuf: prepare to send data to the other end:
+ *	vdev: the virtio_device
+ *	sg: the description of the buffer(s).
+ *	num: the size of the sg array.
+ *	data: the token returned by the get_outbuf function.
+ *      Returns a unique id or an error.
+ * @add_inbuf: prepare to receive data from the other end:
+ *	vdev: the virtio_device
+ *	sg: the description of the buffer(s).
+ *	num: the size of the sg array.
+ *	data: the token returned by the get_inbuf function.
+ *      Returns a unique id or an error (eg. -ENOSPC).
+ * @sync: update after add_inbuf and/or add_outbuf
+ *	vdev: the virtio_device we're talking about.
+ *	inout: VIRTIO_IN and/or VIRTIO_OUT
+ *	After one or more add_inbuf/add_outbuf calls, invoke this to kick
+ *	the virtio layer.
+ * @get_outbuf: get the next used outbuf.
+ *	vdev: the virtio_device we're talking about.
+ *	len: the length written into the outbuf
+ *	Returns NULL or the "data" token handed to add_outbuf (which has
been
+ *	detached).
+ * @get_inbuf: get the next used inbuf.
+ *	vdev: the virtio_device we're talking about.
+ *	len: the length read from the inbuf
+ *	Returns NULL or the "data" token handed to add_inbuf (which has
been
+ *	detached).
+ * @detach_outbuf: make sure sent sg can no longer be read.
+ *	vdev: the virtio_device we're talking about.
+ *	id: the id returned from add_outbuf.
+ *	This is usually used for shutdown.  Don't try to detach twice.
+ * @detach_inbuf: make sure sent sg can no longer be written to.
+ *	vdev: the virtio_device we're talking about.
+ *	id: the id returned from add_inbuf.
+ *	This is usually used for shutdown.  Don't try to detach twice.
+ * @restart_in: restart calls to driver_ops->in after it returned false.
+ *	vdev: the virtio_device we're talking about.
+ *	This returns "false" (and doesn't re-enable) if there are
pending
+ *	inbufs, to avoid a race.
+ * @restart_out: restart calls to driver_ops->out after it returned false.
+ *	vdev: the virtio_device we're talking about.
+ *	This returns "false" (and doesn't re-enable) if there are
pending
+ *	outbufs, to avoid a race.
+ *
+ * Locking rules are straightforward: the driver is responsible for
+ * locking.  Outbuf operations can be called in parallel to inbuf
+ * operations, but no two outbuf operations nor two inbuf operations
+ * may be invoked simultaneously.
+ *
+ * All operations can be called in any context.
+ */
+struct virtio_ops {
+	unsigned long (*add_outbuf)(struct virtio_device *vdev,
+				    const struct scatterlist sg[],
+				    unsigned int num,
+				    void *data);
+
+	unsigned long (*add_inbuf)(struct virtio_device *vdev,
+				   struct scatterlist sg[],
+				   unsigned int num,
+				   void *data);
+
+	void (*sync)(struct virtio_device *vdev, enum virtio_dir inout);
+
+	void *(*get_outbuf)(struct virtio_device *vdev, unsigned int *len);
+	void *(*get_inbuf)(struct virtio_device *vdev, unsigned int *len);
+
+	void (*detach_outbuf)(struct virtio_device *vdev, unsigned long id);
+	void (*detach_inbuf)(struct virtio_device *vdev, unsigned long id);
+
+	bool (*restart_in)(struct virtio_device *vdev);
+	bool (*restart_out)(struct virtio_device *vdev);
+};
+#endif /* _LINUX_VIRTIO_H */
Index: linux-2.6/drivers/base/virtio.c
==================================================================--- /dev/null
+++ linux-2.6/drivers/base/virtio.c
@@ -0,0 +1,153 @@
+/* Virtual I/O bus implementation
+ *
+ * Copyright 2007 Arnd Bergmann <arnd@arndb.de> IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/device.h>
+#include <linux/virtio.h>
+
+/**
+ * virtio_match - match a virtio device to its driver by device ID
+ *
+ * Used internally by the bus type implementation. Whenever a new
+ * device or driver is added to the system, the match function gets
+ * called for all possible combinations with existing drivers or
+ * devices.
+ *
+ * When a match is found, we attempt to bind the device to its
+ * driver by calling the driver's probe function.
+ */
+static int virtio_match(struct device *dev, struct device_driver *drv)
+{
+	struct virtio_device *vdev = to_virtio_dev(dev);
+	struct virtio_driver *vdrv = to_virtio_drv(drv);
+	struct virtio_device_id *id;
+
+	/* return true if one id in the driver matches */
+	for (id = vdrv->ids; id->device_type; id++)
+		if (strcmp(id->device_type, vdev->id.device_type) == 0)
+			return 1;
+
+	/* did not match any */
+	return 0;
+}
+
+/**
+ * virtio_uevent - add modalias field to uevent message
+ *
+ * Used internally by the uevent mechanism to add more
+ * information to a virtio device event.
+ */
+static int virtio_uevent(struct device *dev, char **envp, int num_envp,
+				char *buf, int buf_size)
+{
+	struct virtio_device *vdev = to_virtio_dev(dev);
+
+	envp[0] = buf;
+	snprintf(buf, buf_size, "MODALIAS=virtio:%s",
+			vdev->id.device_type);
+	return 0;
+}
+
+static ssize_t modalias_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct virtio_device *vdev = to_virtio_dev(dev);
+	return scnprintf(buf, PAGE_SIZE, "virtio:%s\n",
vdev->id.device_type);
+}
+
+static struct device_attribute virtio_dev_attrs[] = {
+	__ATTR_RO(modalias),
+	__ATTR_NULL,
+};
+
+static struct bus_type virtio_bus = {
+	.name = "virtio",
+	.owner = THIS_MODULE,
+	.match = virtio_match,
+	.uevent = virtio_uevent,
+	.dev_attrs = virtio_dev_attrs,
+};
+
+/**
+ * virtio_device_register - add a new virtio device
+ * @vdev: device that will be registered
+ *
+ * A virtio bus driver implementation calls this
+ * function for every device it discovers on its
+ * respective bus implementation.
+ */
+int virtio_device_register(struct virtio_device *vdev)
+{
+	vdev->dev.bus = &virtio_bus;
+	return device_register(&vdev->dev);
+}
+EXPORT_SYMBOL_GPL(virtio_device_register);
+
+/**
+ * virtio_device_unregister - remove a virtio device
+ * @vdev: device that will be unregistered
+ *
+ * If a virtio bus driver supports hot unplugging of
+ * devices, this function will be used at remove
+ * time.
+ */
+void virtio_device_unregister(struct virtio_device *vdev)
+{
+	device_unregister(&vdev->dev);
+}
+EXPORT_SYMBOL_GPL(virtio_device_unregister);
+
+/**
+ * virtio_driver_register - add a new virtio driver
+ * @vdrv: driver that will be registered
+ *
+ * A virtio device driver module needs to call this
+ * function in its module_init handler, and the
+ * respective unregister function for its module_exit
+ * handler.
+ */
+int virtio_driver_register(struct virtio_driver *vdrv)
+{
+	vdrv->drv.bus = &virtio_bus;
+	return driver_register(&vdrv->drv);
+}
+EXPORT_SYMBOL_GPL(virtio_driver_register);
+
+/**
+ * virtio_driver_unregister - remove a virtio driver
+ * @vdrv: driver that will be unregistered
+ */
+void virtio_driver_unregister(struct virtio_driver *vdrv)
+{
+	driver_unregister(&vdrv->drv);
+}
+EXPORT_SYMBOL_GPL(virtio_driver_unregister);
+
+static int __init virtio_bus_init(void)
+{
+	return bus_register(&virtio_bus);
+}
+module_init(virtio_bus_init);
+
+static void __exit virtio_bus_exit(void)
+{
+	bus_unregister(&virtio_bus);
+}
+module_exit(virtio_bus_exit);
+
+MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>");
+MODULE_LICENSE("GPL");
Index: linux-2.6/drivers/base/Kconfig
==================================================================---
linux-2.6.orig/drivers/base/Kconfig
+++ linux-2.6/drivers/base/Kconfig
@@ -53,4 +53,7 @@ config SYS_HYPERVISOR
 	bool
 	default n
 
+config VIRTIO
+	tristate "Virtual I/O device bus"
+
 endmenu
Index: linux-2.6/drivers/base/Makefile
==================================================================---
linux-2.6.orig/drivers/base/Makefile
+++ linux-2.6/drivers/base/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_NUMA)	+= node.o
 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
 obj-$(CONFIG_SMP)	+= topology.o
 obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
+obj-$(CONFIG_VIRTIO)	+= virtio.o
 
 ifeq ($(CONFIG_DEBUG_DRIVER),y)
 EXTRA_CFLAGS += -DDEBUG
--
This is a subject that came up in the virtio BOF session at OLS. I decided to go forward and implement something that I like, based on the latest virtio proposal at the time, which was draft III. It's not a drop-in replacement, because it's missing a host implementation. I first started my own, which is not done yet, but wanted to do one for lguest and one for emulated PCI next. It's also entirely untested. As things evolved, draft IV is completely different, and these patches don't make sense any more on them, because there is no longer the concept of a virtio_device, but instead there are devices that have an arbitrary number of virtqueue structures. I'd still like to discuss my approach to see if there is reason to continue down that road, so I'm posting what I have right now. I think among the options we have to go on are: 1. screw the virtio_bus, and let every host do its own stuff -- no autoloading, standalone drivers, chardev host etc. 2. get virtio_device back from the dead, and allow it to have multiple virtqueues, either two or an unlimited number. 3. screw the multiple-virtqueue idea, go back to the draft III stuff with my changes. Arnd <><
On Fri, 2007-07-06 at 14:42 +0200, arnd@arndb.de wrote:> This is a subject that came up in the virtio BOF session > at OLS. I decided to go forward and implement something > that I like, based on the latest virtio proposal at the > time, which was draft III. > > It's not a drop-in replacement, because it's missing a > host implementation. I first started my own, which is > not done yet, but wanted to do one for lguest and one > for emulated PCI next. It's also entirely untested.Hi Arnd, I think it will come down to how neat PCI<->virtio is. Can we push further towards PCI without screwing non-PCI? eg. can we use pci_device_id? struct pci_driver? (Might be pushing it, but should probably be considered: it'd be neat if some platforms could #define virtio_driver_register pci_driver_register). Standardizing how to pack the info for each device into the config space would be especially useful. Our drivers are going to get more featureful, and we're going to need a versioning/compatibility scheme too. Basically, I'd like to see someone start with work from the PCI side, then make sure non-PCI isn't overly burdened. Cheers, Rusty.
arnd@arndb.de wrote:> This is a subject that came up in the virtio BOF session > at OLS. I decided to go forward and implement something > that I like, based on the latest virtio proposal at the > time, which was draft III. > > It's not a drop-in replacement, because it's missing a > host implementation. I first started my own, which is > not done yet, but wanted to do one for lguest and one > for emulated PCI next. It's also entirely untested. > > As things evolved, draft IV is completely different, and > these patches don't make sense any more on them, because > there is no longer the concept of a virtio_device, but > instead there are devices that have an arbitrary number > of virtqueue structures. > > I'd still like to discuss my approach to see if there is > reason to continue down that road, so I'm posting what > I have right now. > > I think among the options we have to go on are: > > 1. screw the virtio_bus, and let every host do its own > stuff -- no autoloading, standalone drivers, chardev > host etc. > >That's a bit against the spirit of virtio. I think that the control path will become quite large over time, so it makes sense to share as much as possible.> 2. get virtio_device back from the dead, and allow it > to have multiple virtqueues, either two or an unlimited > number. >This is the best option IMO.> 3. screw the multiple-virtqueue idea, go back to the > draft III stuff with my changes. >I'm sure Rusty will just love this one. 4. Delegate the responsibility for extracting the queues to hypervisor-specific code, but keep the rest shared. Just for completeness; option 2 is better. -- error compiling committee.c: too many arguments to function