plain text document attachment (lguest64-device.patch)
We started working a little bit on the devices for lguest64.
This is still very much a work-in-progress and needs much more work.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Glauber de Oliveira Costa <glommer@gmail.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Index: work-pv/include/asm-x86_64/lguest_device.h
==================================================================--- /dev/null
+++ work-pv/include/asm-x86_64/lguest_device.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_LGUEST_DEVICE_H
+#define _ASM_LGUEST_DEVICE_H
+/* Everything you need to know about lguest devices. */
+#include <linux/device.h>
+#include <asm/lguest.h>
+#include <asm/lguest_user.h>
+
+struct lguest_device {
+ /* Unique busid, and index into lguest_page->devices[] */
+ /* By convention, each device can use irq index+1 if it wants to. */
+ unsigned int index;
+
+ struct device dev;
+
+ /* Driver can hang data off here. */
+ void *private;
+};
+
+struct lguest_driver {
+ const char *name;
+ struct module *owner;
+ u16 device_type;
+ int (*probe)(struct lguest_device *dev);
+ void (*remove)(struct lguest_device *dev);
+
+ struct device_driver drv;
+};
+
+extern int register_lguest_driver(struct lguest_driver *drv);
+extern void unregister_lguest_driver(struct lguest_driver *drv);
+#endif /* _ASM_LGUEST_DEVICE_H */
Index: work-pv/arch/x86_64/lguest/lguest_bus.c
==================================================================--- /dev/null
+++ work-pv/arch/x86_64/lguest/lguest_bus.c
@@ -0,0 +1,180 @@
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <asm/lguest_device.h>
+#include <asm/lguest.h>
+#include <asm/io.h>
+
+static ssize_t type_show(struct device *_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ return sprintf(buf, "%hu", lguest_devices[dev->index].type);
+}
+static ssize_t features_show(struct device *_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ return sprintf(buf, "%hx", lguest_devices[dev->index].features);
+}
+static ssize_t pfn_show(struct device *_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ return sprintf(buf, "%llu", lguest_devices[dev->index].pfn);
+}
+static ssize_t status_show(struct device *_dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ return sprintf(buf, "%hx", lguest_devices[dev->index].status);
+}
+static ssize_t status_store(struct device *_dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) !=
1)
+ return -EINVAL;
+ return count;
+}
+static struct device_attribute lguest_dev_attrs[] = {
+ __ATTR_RO(type),
+ __ATTR_RO(features),
+ __ATTR_RO(pfn),
+ __ATTR(status, 0644, status_show, status_store),
+ __ATTR_NULL
+};
+
+static int lguest_dev_match(struct device *_dev, struct device_driver *_drv)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv);
+
+ return (drv->device_type == lguest_devices[dev->index].type);
+}
+
+struct lguest_bus {
+ struct bus_type bus;
+ struct device dev;
+};
+
+static struct lguest_bus lguest_bus = {
+ .bus = {
+ .name = "lguest",
+ .match = lguest_dev_match,
+ .dev_attrs = lguest_dev_attrs,
+ },
+ .dev = {
+ .parent = NULL,
+ .bus_id = "lguest",
+ }
+};
+
+static int lguest_dev_probe(struct device *_dev)
+{
+ int ret;
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ struct lguest_driver *drv = container_of(dev->dev.driver,
+ struct lguest_driver, drv);
+
+ lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER;
+ ret = drv->probe(dev);
+ if (ret == 0)
+ lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK;
+ return ret;
+}
+
+static int lguest_dev_remove(struct device *_dev)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+ struct lguest_driver *drv = container_of(dev->dev.driver,
+ struct lguest_driver, drv);
+
+ if (dev->dev.driver && drv->remove)
+ drv->remove(dev);
+ put_device(&dev->dev);
+ return 0;
+}
+
+int register_lguest_driver(struct lguest_driver *drv)
+{
+ if (!lguest_devices)
+ return 0;
+
+ drv->drv.bus = &lguest_bus.bus;
+ drv->drv.name = drv->name;
+ drv->drv.owner = drv->owner;
+ drv->drv.probe = lguest_dev_probe;
+ drv->drv.remove = lguest_dev_remove;
+
+ return driver_register(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(register_lguest_driver);
+
+void unregister_lguest_driver(struct lguest_driver *drv)
+{
+ if (!lguest_devices)
+ return;
+
+ driver_unregister(&drv->drv);
+}
+EXPORT_SYMBOL_GPL(unregister_lguest_driver);
+
+static void release_lguest_device(struct device *_dev)
+{
+ struct lguest_device *dev = container_of(_dev,struct lguest_device,dev);
+
+ lguest_devices[dev->index].status |= LGUEST_DEVICE_S_REMOVED_ACK;
+ kfree(dev);
+}
+
+static void add_lguest_device(unsigned int index)
+{
+ struct lguest_device *new;
+
+ lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE;
+ new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL);
+ if (!new) {
+ printk(KERN_EMERG "Cannot allocate lguest device %u\n", index);
+ lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
+ return;
+ }
+
+ new->index = index;
+ new->private = NULL;
+ memset(&new->dev, 0, sizeof(new->dev));
+ new->dev.parent = &lguest_bus.dev;
+ new->dev.bus = &lguest_bus.bus;
+ new->dev.release = release_lguest_device;
+ sprintf(new->dev.bus_id, "%u", index);
+ if (device_register(&new->dev) != 0) {
+ printk(KERN_EMERG "Cannot register lguest device %u\n", index);
+ lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED;
+ kfree(new);
+ }
+}
+
+static void scan_devices(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < LGUEST_MAX_DEVICES; i++)
+ if (lguest_devices[i].type)
+ add_lguest_device(i);
+}
+
+static int __init lguest_bus_init(void)
+{
+ if (strcmp(paravirt_ops.name, "lguest") != 0)
+ return 0;
+
+ /* Devices are in page above top of "normal" mem. */
+ lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE);
+
+ if (bus_register(&lguest_bus.bus) != 0
+ || device_register(&lguest_bus.dev) != 0)
+ panic("lguest bus registration failed");
+
+ scan_devices();
+ return 0;
+}
+postcore_initcall(lguest_bus_init);
Index: work-pv/arch/x86_64/lguest/io.c
==================================================================--- /dev/null
+++ work-pv/arch/x86_64/lguest/io.c
@@ -0,0 +1,425 @@
+/* Simple I/O model for guests, based on shared memory.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/types.h>
+#include <linux/futex.h>
+#include <linux/jhash.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/uaccess.h>
+#include <asm/lguest.h>
+#include <asm/lguest_user.h>
+#include "lguest.h"
+
+static struct list_head dma_hash[64];
+
+/* FIXME: allow multi-page lengths. */
+static int check_dma_list(struct lguest_guest_info *linfo,
+ const struct lguest_dma *dma)
+{
+ unsigned int i;
+
+ for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
+ if (!dma->len[i])
+ return 1;
+ if (!lguest_address_ok(linfo, dma->addr[i]))
+ goto kill;
+ if (dma->len[i] > PAGE_SIZE)
+ goto kill;
+ /* We could do over a page, but is it worth it? */
+ if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE)
+ goto kill;
+ }
+ return 1;
+
+kill:
+ kill_guest(linfo, "bad DMA entry: %u@%#llx", dma->len[i],
dma->addr[i]);
+ return 0;
+}
+
+static unsigned int hash(const union futex_key *key)
+{
+ return jhash2((u32*)&key->both.word,
+ (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+ key->both.offset)
+ % ARRAY_SIZE(dma_hash);
+}
+
+/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */
+static void unlink_dma(struct lguest_dma_info *dmainfo)
+{
+ BUG_ON(!mutex_is_locked(&lguest_lock));
+ dmainfo->interrupt = 0;
+ list_del(&dmainfo->list);
+ drop_futex_key_refs(&dmainfo->key);
+}
+
+static inline int key_eq(const union futex_key *a, const union futex_key *b)
+{
+ return (a->both.word == b->both.word
+ && a->both.ptr == b->both.ptr
+ && a->both.offset == b->both.offset);
+}
+
+static u32 unbind_dma(struct lguest_guest_info *linfo,
+ const union futex_key *key,
+ unsigned long dmas)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < LGUEST_MAX_DMA; i++) {
+ if (key_eq(key, &linfo->dma[i].key) && dmas ==
linfo->dma[i].dmas) {
+ unlink_dma(&linfo->dma[i]);
+ ret = 1;
+ break;
+ }
+ }
+ return ret;
+}
+
+u32 bind_dma(struct lguest_guest_info *linfo, unsigned long addr,
+ unsigned long dmas, u16 numdmas, u8 interrupt)
+{
+ unsigned int i;
+ u32 ret = 0;
+ union futex_key key;
+
+ printk("inside the handler, with args: %lx, %lx, %x,
%x\n",addr,dmas,numdmas,interrupt);
+ if (interrupt >= LGUEST_IRQS)
+ return 0;
+
+ mutex_lock(&lguest_lock);
+ down_read(¤t->mm->mmap_sem);
+ printk("Trying to get futex key... ");
+ if (get_futex_key((u32 __user *)addr, &key) != 0) {
+ kill_guest(linfo, "bad dma address %#lx", addr);
+ goto unlock;
+ }
+ printk("Got it.\n");
+ get_futex_key_refs(&key);
+
+ if (interrupt == 0)
+ ret = unbind_dma(linfo, &key, dmas);
+ else {
+ for (i = 0; i < LGUEST_MAX_DMA; i++) {
+ if (linfo->dma[i].interrupt == 0) {
+ linfo->dma[i].dmas = dmas;
+ linfo->dma[i].num_dmas = numdmas;
+ linfo->dma[i].next_dma = 0;
+ linfo->dma[i].key = key;
+ linfo->dma[i].guest_id = linfo->guest_id;
+ linfo->dma[i].interrupt = interrupt;
+ list_add(&linfo->dma[i].list,
+ &dma_hash[hash(&key)]);
+ ret = 1;
+ printk("Will return, holding a reference\n");
+ goto unlock;
+ }
+ }
+ }
+ printk("Will return, _without_ a reference\n");
+ drop_futex_key_refs(&key);
+unlock:
+ up_read(¤t->mm->mmap_sem);
+ mutex_unlock(&lguest_lock);
+ return ret;
+}
+/* lhread from another guest */
+static int lhread_other(struct lguest_guest_info *linfo,
+ void *buf, u32 addr, unsigned bytes)
+{
+ if (addr + bytes < addr
+ || !lguest_address_ok(linfo, addr+bytes)
+ || access_process_vm(linfo->tsk, addr, buf, bytes, 0) != bytes) {
+ memset(buf, 0, bytes);
+ kill_guest(linfo, "bad address in registered DMA struct");
+ return 0;
+ }
+ return 1;
+}
+
+/* lhwrite to another guest */
+static int lhwrite_other(struct lguest_guest_info *linfo, u32 addr,
+ const void *buf, unsigned bytes)
+{
+ if (addr + bytes < addr
+ || !lguest_address_ok(linfo, addr+bytes)
+ || (access_process_vm(linfo->tsk, addr, (void *)buf, bytes, 1)
+ != bytes)) {
+ kill_guest(linfo, "bad address writing to registered DMA");
+ return 0;
+ }
+ return 1;
+}
+
+static u32 copy_data(const struct lguest_dma *src,
+ const struct lguest_dma *dst,
+ struct page *pages[])
+{
+ unsigned int totlen, si, di, srcoff, dstoff;
+ void *maddr = NULL;
+
+ totlen = 0;
+ si = di = 0;
+ srcoff = dstoff = 0;
+ while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si]
+ && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di])
{
+ u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff);
+
+ if (!maddr)
+ maddr = kmap(pages[di]);
+
+ /* FIXME: This is not completely portable, since
+ archs do different things for copy_to_user_page. */
+ if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE,
+ (void *__user)src->addr[si], len) != 0) {
+ totlen = 0;
+ break;
+ }
+
+ totlen += len;
+ srcoff += len;
+ dstoff += len;
+ if (srcoff == src->len[si]) {
+ si++;
+ srcoff = 0;
+ }
+ if (dstoff == dst->len[di]) {
+ kunmap(pages[di]);
+ maddr = NULL;
+ di++;
+ dstoff = 0;
+ }
+ }
+
+ if (maddr)
+ kunmap(pages[di]);
+
+ return totlen;
+}
+
+/* Src is us, ie. current. */
+static u32 do_dma(struct lguest_guest_info *srclg, const struct lguest_dma
*src,
+ struct lguest_guest_info *dstlg, const struct lguest_dma *dst)
+{
+ int i;
+ u32 ret;
+ struct page *pages[LGUEST_MAX_DMA_SECTIONS];
+
+ if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src))
+ return 0;
+
+ /* First get the destination pages */
+ for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
+ if (dst->len[i] == 0)
+ break;
+ if (get_user_pages(dstlg->tsk, dstlg->mm,
+ dst->addr[i], 1, 1, 1, pages+i, NULL)
+ != 1) {
+ ret = 0;
+ goto drop_pages;
+ }
+ }
+
+ /* Now copy until we run out of src or dst. */
+ ret = copy_data(src, dst, pages);
+
+drop_pages:
+ while (--i >= 0)
+ put_page(pages[i]);
+ return ret;
+}
+
+/* We cache one process to wakeup: helps for batching & wakes outside
locks. */
+void set_wakeup_process(struct lguest_guest_info *linfo,
+ struct task_struct *p)
+{
+ if (p == linfo->wake)
+ return;
+
+ if (linfo->wake) {
+ wake_up_process(linfo->wake);
+ put_task_struct(linfo->wake);
+ }
+ linfo->wake = p;
+ if (linfo->wake)
+ get_task_struct(linfo->wake);
+}
+
+static int dma_transfer(struct lguest_guest_info *srclg,
+ unsigned long udma,
+ struct lguest_dma_info *dst)
+{
+#if 0
+ struct lguest_dma dst_dma, src_dma;
+ struct lguest_guest_info *dstlg;
+ u32 i, dma = 0;
+
+ dstlg = &lguests[dst->guest_id];
+ /* Get our dma list. */
+ lhread(srclg, &src_dma, udma, sizeof(src_dma));
+
+ /* We can't deadlock against them dmaing to us, because this
+ * is all under the lguest_lock. */
+ down_read(&dstlg->mm->mmap_sem);
+
+ for (i = 0; i < dst->num_dmas; i++) {
+ dma = (dst->next_dma + i) % dst->num_dmas;
+ if (!lhread_other(dstlg, &dst_dma,
+ dst->dmas + dma * sizeof(struct lguest_dma),
+ sizeof(dst_dma))) {
+ goto fail;
+ }
+ if (!dst_dma.used_len)
+ break;
+ }
+ if (i != dst->num_dmas) {
+ unsigned long used_lenp;
+ unsigned int ret;
+
+ ret = do_dma(srclg, &src_dma, dstlg, &dst_dma);
+ /* Put used length in src. */
+ lhwrite_u32(srclg,
+ udma+offsetof(struct lguest_dma, used_len), ret);
+ if (ret == 0 && src_dma.len[0] != 0)
+ goto fail;
+
+ /* Make sure destination sees contents before length. */
+ mb();
+ used_lenp = dst->dmas
+ + dma * sizeof(struct lguest_dma)
+ + offsetof(struct lguest_dma, used_len);
+ lhwrite_other(dstlg, used_lenp, &ret, sizeof(ret));
+ dst->next_dma++;
+ }
+ up_read(&dstlg->mm->mmap_sem);
+
+ /* Do this last so dst doesn't simply sleep on lock. */
+ set_bit(dst->interrupt, dstlg->irqs_pending);
+ set_wakeup_process(srclg, dstlg->tsk);
+ return i == dst->num_dmas;
+
+fail:
+ up_read(&dstlg->mm->mmap_sem);
+#endif
+ return 0;
+}
+
+int send_dma(struct lguest_guest_info *linfo, unsigned long addr,
+ unsigned long udma)
+{
+ union futex_key key;
+ int pending = 0, empty = 0;
+
+ printk("inside send_dma, with args: %lx, %lx\n",addr,udma);
+again:
+ mutex_lock(&lguest_lock);
+ down_read(¤t->mm->mmap_sem);
+ if (get_futex_key((u32 __user *)addr, &key) != 0) {
+ kill_guest(linfo, "bad sending DMA address");
+ goto unlock;
+ }
+ /* Shared mapping? Look for other guests... */
+ if (key.shared.offset & 1) {
+ struct lguest_dma_info *i, *n;
+ list_for_each_entry_safe(i, n, &dma_hash[hash(&key)], list) {
+ if (i->guest_id == linfo->guest_id)
+ continue;
+ if (!key_eq(&key, &i->key))
+ continue;
+
+ empty += dma_transfer(linfo, udma, i);
+ break;
+ }
+ if (empty == 1) {
+ /* Give any recipients one chance to restock. */
+ up_read(¤t->mm->mmap_sem);
+ mutex_unlock(&lguest_lock);
+ yield();
+ empty++;
+ goto again;
+ }
+ pending = 0;
+ } else {
+ /* Private mapping: tell our userspace. */
+ linfo->dma_is_pending = 1;
+ linfo->pending_dma = udma;
+ linfo->pending_addr = addr;
+ pending = 1;
+ }
+unlock:
+ up_read(¤t->mm->mmap_sem);
+ mutex_unlock(&lguest_lock);
+ printk("Returning send_dma with pending: %x\n",pending);
+ return pending;
+}
+void release_all_dma(struct lguest_guest_info *linfo)
+{
+ unsigned int i;
+
+ BUG_ON(!mutex_is_locked(&lguest_lock));
+
+ down_read(&linfo->mm->mmap_sem);
+ for (i = 0; i < LGUEST_MAX_DMA; i++) {
+ if (linfo->dma[i].interrupt)
+ unlink_dma(&linfo->dma[i]);
+ }
+ up_read(&linfo->mm->mmap_sem);
+}
+
+/* Userspace wants a dma buffer from this guest. */
+unsigned long get_dma_buffer(struct lguest_guest_info *linfo,
+ unsigned long addr, unsigned long *interrupt)
+{
+ unsigned long ret = 0;
+ union futex_key key;
+ struct lguest_dma_info *i;
+
+ mutex_lock(&lguest_lock);
+ down_read(¤t->mm->mmap_sem);
+ if (get_futex_key((u32 __user *)addr, &key) != 0) {
+ kill_guest(linfo, "bad registered DMA buffer");
+ goto unlock;
+ }
+ list_for_each_entry(i, &dma_hash[hash(&key)], list) {
+ if (key_eq(&key, &i->key) && i->guest_id ==
linfo->guest_id) {
+ unsigned int j;
+ for (j = 0; j < i->num_dmas; j++) {
+ struct lguest_dma dma;
+
+ ret = i->dmas + j * sizeof(struct lguest_dma);
+ lhread(linfo, &dma, ret, sizeof(dma));
+ if (dma.used_len == 0)
+ break;
+ }
+ *interrupt = i->interrupt;
+ break;
+ }
+ }
+unlock:
+ up_read(¤t->mm->mmap_sem);
+ mutex_unlock(&lguest_lock);
+ return ret;
+}
+
+void lguest_io_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(dma_hash); i++)
+ INIT_LIST_HEAD(&dma_hash[i]);
+}
--