plain text document attachment (lguest64-device.patch) We started working a little bit on the devices for lguest64. This is still very much a work-in-progress and needs much more work. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Glauber de Oliveira Costa <glommer@gmail.com> Cc: Chris Wright <chrisw@sous-sol.org> Index: work-pv/include/asm-x86_64/lguest_device.h ==================================================================--- /dev/null +++ work-pv/include/asm-x86_64/lguest_device.h @@ -0,0 +1,31 @@ +#ifndef _ASM_LGUEST_DEVICE_H +#define _ASM_LGUEST_DEVICE_H +/* Everything you need to know about lguest devices. */ +#include <linux/device.h> +#include <asm/lguest.h> +#include <asm/lguest_user.h> + +struct lguest_device { + /* Unique busid, and index into lguest_page->devices[] */ + /* By convention, each device can use irq index+1 if it wants to. */ + unsigned int index; + + struct device dev; + + /* Driver can hang data off here. */ + void *private; +}; + +struct lguest_driver { + const char *name; + struct module *owner; + u16 device_type; + int (*probe)(struct lguest_device *dev); + void (*remove)(struct lguest_device *dev); + + struct device_driver drv; +}; + +extern int register_lguest_driver(struct lguest_driver *drv); +extern void unregister_lguest_driver(struct lguest_driver *drv); +#endif /* _ASM_LGUEST_DEVICE_H */ Index: work-pv/arch/x86_64/lguest/lguest_bus.c ==================================================================--- /dev/null +++ work-pv/arch/x86_64/lguest/lguest_bus.c @@ -0,0 +1,180 @@ +#include <linux/init.h> +#include <linux/bootmem.h> +#include <asm/lguest_device.h> +#include <asm/lguest.h> +#include <asm/io.h> + +static ssize_t type_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hu", lguest_devices[dev->index].type); +} +static ssize_t features_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hx", lguest_devices[dev->index].features); +} +static ssize_t pfn_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%llu", lguest_devices[dev->index].pfn); +} +static ssize_t status_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + return sprintf(buf, "%hx", lguest_devices[dev->index].status); +} +static ssize_t status_store(struct device *_dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + if (sscanf(buf, "%hi", &lguest_devices[dev->index].status) != 1) + return -EINVAL; + return count; +} +static struct device_attribute lguest_dev_attrs[] = { + __ATTR_RO(type), + __ATTR_RO(features), + __ATTR_RO(pfn), + __ATTR(status, 0644, status_show, status_store), + __ATTR_NULL +}; + +static int lguest_dev_match(struct device *_dev, struct device_driver *_drv) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + struct lguest_driver *drv = container_of(_drv,struct lguest_driver,drv); + + return (drv->device_type == lguest_devices[dev->index].type); +} + +struct lguest_bus { + struct bus_type bus; + struct device dev; +}; + +static struct lguest_bus lguest_bus = { + .bus = { + .name = "lguest", + .match = lguest_dev_match, + .dev_attrs = lguest_dev_attrs, + }, + .dev = { + .parent = NULL, + .bus_id = "lguest", + } +}; + +static int lguest_dev_probe(struct device *_dev) +{ + int ret; + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + struct lguest_driver *drv = container_of(dev->dev.driver, + struct lguest_driver, drv); + + lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER; + ret = drv->probe(dev); + if (ret == 0) + lguest_devices[dev->index].status |= LGUEST_DEVICE_S_DRIVER_OK; + return ret; +} + +static int lguest_dev_remove(struct device *_dev) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + struct lguest_driver *drv = container_of(dev->dev.driver, + struct lguest_driver, drv); + + if (dev->dev.driver && drv->remove) + drv->remove(dev); + put_device(&dev->dev); + return 0; +} + +int register_lguest_driver(struct lguest_driver *drv) +{ + if (!lguest_devices) + return 0; + + drv->drv.bus = &lguest_bus.bus; + drv->drv.name = drv->name; + drv->drv.owner = drv->owner; + drv->drv.probe = lguest_dev_probe; + drv->drv.remove = lguest_dev_remove; + + return driver_register(&drv->drv); +} +EXPORT_SYMBOL_GPL(register_lguest_driver); + +void unregister_lguest_driver(struct lguest_driver *drv) +{ + if (!lguest_devices) + return; + + driver_unregister(&drv->drv); +} +EXPORT_SYMBOL_GPL(unregister_lguest_driver); + +static void release_lguest_device(struct device *_dev) +{ + struct lguest_device *dev = container_of(_dev,struct lguest_device,dev); + + lguest_devices[dev->index].status |= LGUEST_DEVICE_S_REMOVED_ACK; + kfree(dev); +} + +static void add_lguest_device(unsigned int index) +{ + struct lguest_device *new; + + lguest_devices[index].status |= LGUEST_DEVICE_S_ACKNOWLEDGE; + new = kmalloc(sizeof(struct lguest_device), GFP_KERNEL); + if (!new) { + printk(KERN_EMERG "Cannot allocate lguest device %u\n", index); + lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; + return; + } + + new->index = index; + new->private = NULL; + memset(&new->dev, 0, sizeof(new->dev)); + new->dev.parent = &lguest_bus.dev; + new->dev.bus = &lguest_bus.bus; + new->dev.release = release_lguest_device; + sprintf(new->dev.bus_id, "%u", index); + if (device_register(&new->dev) != 0) { + printk(KERN_EMERG "Cannot register lguest device %u\n", index); + lguest_devices[index].status |= LGUEST_DEVICE_S_FAILED; + kfree(new); + } +} + +static void scan_devices(void) +{ + unsigned int i; + + for (i = 0; i < LGUEST_MAX_DEVICES; i++) + if (lguest_devices[i].type) + add_lguest_device(i); +} + +static int __init lguest_bus_init(void) +{ + if (strcmp(paravirt_ops.name, "lguest") != 0) + return 0; + + /* Devices are in page above top of "normal" mem. */ + lguest_devices = ioremap(max_pfn << PAGE_SHIFT, PAGE_SIZE); + + if (bus_register(&lguest_bus.bus) != 0 + || device_register(&lguest_bus.dev) != 0) + panic("lguest bus registration failed"); + + scan_devices(); + return 0; +} +postcore_initcall(lguest_bus_init); Index: work-pv/arch/x86_64/lguest/io.c ==================================================================--- /dev/null +++ work-pv/arch/x86_64/lguest/io.c @@ -0,0 +1,425 @@ +/* Simple I/O model for guests, based on shared memory. + * Copyright (C) 2006 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <linux/types.h> +#include <linux/futex.h> +#include <linux/jhash.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/uaccess.h> +#include <asm/lguest.h> +#include <asm/lguest_user.h> +#include "lguest.h" + +static struct list_head dma_hash[64]; + +/* FIXME: allow multi-page lengths. */ +static int check_dma_list(struct lguest_guest_info *linfo, + const struct lguest_dma *dma) +{ + unsigned int i; + + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (!dma->len[i]) + return 1; + if (!lguest_address_ok(linfo, dma->addr[i])) + goto kill; + if (dma->len[i] > PAGE_SIZE) + goto kill; + /* We could do over a page, but is it worth it? */ + if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) + goto kill; + } + return 1; + +kill: + kill_guest(linfo, "bad DMA entry: %u@%#llx", dma->len[i], dma->addr[i]); + return 0; +} + +static unsigned int hash(const union futex_key *key) +{ + return jhash2((u32*)&key->both.word, + (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + key->both.offset) + % ARRAY_SIZE(dma_hash); +} + +/* Must hold read lock on dmainfo owner's current->mm->mmap_sem */ +static void unlink_dma(struct lguest_dma_info *dmainfo) +{ + BUG_ON(!mutex_is_locked(&lguest_lock)); + dmainfo->interrupt = 0; + list_del(&dmainfo->list); + drop_futex_key_refs(&dmainfo->key); +} + +static inline int key_eq(const union futex_key *a, const union futex_key *b) +{ + return (a->both.word == b->both.word + && a->both.ptr == b->both.ptr + && a->both.offset == b->both.offset); +} + +static u32 unbind_dma(struct lguest_guest_info *linfo, + const union futex_key *key, + unsigned long dmas) +{ + int i, ret = 0; + + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (key_eq(key, &linfo->dma[i].key) && dmas == linfo->dma[i].dmas) { + unlink_dma(&linfo->dma[i]); + ret = 1; + break; + } + } + return ret; +} + +u32 bind_dma(struct lguest_guest_info *linfo, unsigned long addr, + unsigned long dmas, u16 numdmas, u8 interrupt) +{ + unsigned int i; + u32 ret = 0; + union futex_key key; + + printk("inside the handler, with args: %lx, %lx, %x, %x\n",addr,dmas,numdmas,interrupt); + if (interrupt >= LGUEST_IRQS) + return 0; + + mutex_lock(&lguest_lock); + down_read(¤t->mm->mmap_sem); + printk("Trying to get futex key... "); + if (get_futex_key((u32 __user *)addr, &key) != 0) { + kill_guest(linfo, "bad dma address %#lx", addr); + goto unlock; + } + printk("Got it.\n"); + get_futex_key_refs(&key); + + if (interrupt == 0) + ret = unbind_dma(linfo, &key, dmas); + else { + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (linfo->dma[i].interrupt == 0) { + linfo->dma[i].dmas = dmas; + linfo->dma[i].num_dmas = numdmas; + linfo->dma[i].next_dma = 0; + linfo->dma[i].key = key; + linfo->dma[i].guest_id = linfo->guest_id; + linfo->dma[i].interrupt = interrupt; + list_add(&linfo->dma[i].list, + &dma_hash[hash(&key)]); + ret = 1; + printk("Will return, holding a reference\n"); + goto unlock; + } + } + } + printk("Will return, _without_ a reference\n"); + drop_futex_key_refs(&key); +unlock: + up_read(¤t->mm->mmap_sem); + mutex_unlock(&lguest_lock); + return ret; +} +/* lhread from another guest */ +static int lhread_other(struct lguest_guest_info *linfo, + void *buf, u32 addr, unsigned bytes) +{ + if (addr + bytes < addr + || !lguest_address_ok(linfo, addr+bytes) + || access_process_vm(linfo->tsk, addr, buf, bytes, 0) != bytes) { + memset(buf, 0, bytes); + kill_guest(linfo, "bad address in registered DMA struct"); + return 0; + } + return 1; +} + +/* lhwrite to another guest */ +static int lhwrite_other(struct lguest_guest_info *linfo, u32 addr, + const void *buf, unsigned bytes) +{ + if (addr + bytes < addr + || !lguest_address_ok(linfo, addr+bytes) + || (access_process_vm(linfo->tsk, addr, (void *)buf, bytes, 1) + != bytes)) { + kill_guest(linfo, "bad address writing to registered DMA"); + return 0; + } + return 1; +} + +static u32 copy_data(const struct lguest_dma *src, + const struct lguest_dma *dst, + struct page *pages[]) +{ + unsigned int totlen, si, di, srcoff, dstoff; + void *maddr = NULL; + + totlen = 0; + si = di = 0; + srcoff = dstoff = 0; + while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] + && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { + u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); + + if (!maddr) + maddr = kmap(pages[di]); + + /* FIXME: This is not completely portable, since + archs do different things for copy_to_user_page. */ + if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, + (void *__user)src->addr[si], len) != 0) { + totlen = 0; + break; + } + + totlen += len; + srcoff += len; + dstoff += len; + if (srcoff == src->len[si]) { + si++; + srcoff = 0; + } + if (dstoff == dst->len[di]) { + kunmap(pages[di]); + maddr = NULL; + di++; + dstoff = 0; + } + } + + if (maddr) + kunmap(pages[di]); + + return totlen; +} + +/* Src is us, ie. current. */ +static u32 do_dma(struct lguest_guest_info *srclg, const struct lguest_dma *src, + struct lguest_guest_info *dstlg, const struct lguest_dma *dst) +{ + int i; + u32 ret; + struct page *pages[LGUEST_MAX_DMA_SECTIONS]; + + if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) + return 0; + + /* First get the destination pages */ + for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { + if (dst->len[i] == 0) + break; + if (get_user_pages(dstlg->tsk, dstlg->mm, + dst->addr[i], 1, 1, 1, pages+i, NULL) + != 1) { + ret = 0; + goto drop_pages; + } + } + + /* Now copy until we run out of src or dst. */ + ret = copy_data(src, dst, pages); + +drop_pages: + while (--i >= 0) + put_page(pages[i]); + return ret; +} + +/* We cache one process to wakeup: helps for batching & wakes outside locks. */ +void set_wakeup_process(struct lguest_guest_info *linfo, + struct task_struct *p) +{ + if (p == linfo->wake) + return; + + if (linfo->wake) { + wake_up_process(linfo->wake); + put_task_struct(linfo->wake); + } + linfo->wake = p; + if (linfo->wake) + get_task_struct(linfo->wake); +} + +static int dma_transfer(struct lguest_guest_info *srclg, + unsigned long udma, + struct lguest_dma_info *dst) +{ +#if 0 + struct lguest_dma dst_dma, src_dma; + struct lguest_guest_info *dstlg; + u32 i, dma = 0; + + dstlg = &lguests[dst->guest_id]; + /* Get our dma list. */ + lhread(srclg, &src_dma, udma, sizeof(src_dma)); + + /* We can't deadlock against them dmaing to us, because this + * is all under the lguest_lock. */ + down_read(&dstlg->mm->mmap_sem); + + for (i = 0; i < dst->num_dmas; i++) { + dma = (dst->next_dma + i) % dst->num_dmas; + if (!lhread_other(dstlg, &dst_dma, + dst->dmas + dma * sizeof(struct lguest_dma), + sizeof(dst_dma))) { + goto fail; + } + if (!dst_dma.used_len) + break; + } + if (i != dst->num_dmas) { + unsigned long used_lenp; + unsigned int ret; + + ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); + /* Put used length in src. */ + lhwrite_u32(srclg, + udma+offsetof(struct lguest_dma, used_len), ret); + if (ret == 0 && src_dma.len[0] != 0) + goto fail; + + /* Make sure destination sees contents before length. */ + mb(); + used_lenp = dst->dmas + + dma * sizeof(struct lguest_dma) + + offsetof(struct lguest_dma, used_len); + lhwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); + dst->next_dma++; + } + up_read(&dstlg->mm->mmap_sem); + + /* Do this last so dst doesn't simply sleep on lock. */ + set_bit(dst->interrupt, dstlg->irqs_pending); + set_wakeup_process(srclg, dstlg->tsk); + return i == dst->num_dmas; + +fail: + up_read(&dstlg->mm->mmap_sem); +#endif + return 0; +} + +int send_dma(struct lguest_guest_info *linfo, unsigned long addr, + unsigned long udma) +{ + union futex_key key; + int pending = 0, empty = 0; + + printk("inside send_dma, with args: %lx, %lx\n",addr,udma); +again: + mutex_lock(&lguest_lock); + down_read(¤t->mm->mmap_sem); + if (get_futex_key((u32 __user *)addr, &key) != 0) { + kill_guest(linfo, "bad sending DMA address"); + goto unlock; + } + /* Shared mapping? Look for other guests... */ + if (key.shared.offset & 1) { + struct lguest_dma_info *i, *n; + list_for_each_entry_safe(i, n, &dma_hash[hash(&key)], list) { + if (i->guest_id == linfo->guest_id) + continue; + if (!key_eq(&key, &i->key)) + continue; + + empty += dma_transfer(linfo, udma, i); + break; + } + if (empty == 1) { + /* Give any recipients one chance to restock. */ + up_read(¤t->mm->mmap_sem); + mutex_unlock(&lguest_lock); + yield(); + empty++; + goto again; + } + pending = 0; + } else { + /* Private mapping: tell our userspace. */ + linfo->dma_is_pending = 1; + linfo->pending_dma = udma; + linfo->pending_addr = addr; + pending = 1; + } +unlock: + up_read(¤t->mm->mmap_sem); + mutex_unlock(&lguest_lock); + printk("Returning send_dma with pending: %x\n",pending); + return pending; +} +void release_all_dma(struct lguest_guest_info *linfo) +{ + unsigned int i; + + BUG_ON(!mutex_is_locked(&lguest_lock)); + + down_read(&linfo->mm->mmap_sem); + for (i = 0; i < LGUEST_MAX_DMA; i++) { + if (linfo->dma[i].interrupt) + unlink_dma(&linfo->dma[i]); + } + up_read(&linfo->mm->mmap_sem); +} + +/* Userspace wants a dma buffer from this guest. */ +unsigned long get_dma_buffer(struct lguest_guest_info *linfo, + unsigned long addr, unsigned long *interrupt) +{ + unsigned long ret = 0; + union futex_key key; + struct lguest_dma_info *i; + + mutex_lock(&lguest_lock); + down_read(¤t->mm->mmap_sem); + if (get_futex_key((u32 __user *)addr, &key) != 0) { + kill_guest(linfo, "bad registered DMA buffer"); + goto unlock; + } + list_for_each_entry(i, &dma_hash[hash(&key)], list) { + if (key_eq(&key, &i->key) && i->guest_id == linfo->guest_id) { + unsigned int j; + for (j = 0; j < i->num_dmas; j++) { + struct lguest_dma dma; + + ret = i->dmas + j * sizeof(struct lguest_dma); + lhread(linfo, &dma, ret, sizeof(dma)); + if (dma.used_len == 0) + break; + } + *interrupt = i->interrupt; + break; + } + } +unlock: + up_read(¤t->mm->mmap_sem); + mutex_unlock(&lguest_lock); + return ret; +} + +void lguest_io_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dma_hash); i++) + INIT_LIST_HEAD(&dma_hash[i]); +} --