Daniel De Graaf
2011-Jan-21 15:59 UTC
[Xen-devel] [SPAM] [PATCH v5] Userspace grant communication
Changes since v4: - Fixed patches to pass checkpatch.pl - Removed the old patch #2 changing memory allocation - One new patch, unmap notification, inspired by robust futex code [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually [PATCH 3/6] xen-gntdev: Add reference counting to maps [PATCH 4/6] xen-gntdev: Support mapping in HVM domains [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-21 15:59 UTC
[Xen-devel] [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
Because there is no limitation on how many times a user can open a given device file, an per-file-description limit on the number of pages granted offers little to no benefit. Change to a global limit and remove the ioctl() as the parameter can now be changed via sysfs. Xen tools changeset 22768:f8d801e5573e is needed to eliminate the error this change produces in xc_gnttab_set_max_grants. Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> --- drivers/xen/gntdev.c | 50 ++++++++++++++------------------------------------ 1 files changed, 14 insertions(+), 36 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1e31cdc..23d208a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -45,15 +45,15 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, " "Gerd Hoffmann <kraxel@redhat.com>"); MODULE_DESCRIPTION("User-space granted page access driver"); -static int limit = 1024; +static int limit = 1024*1024; module_param(limit, int, 0644); -MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at " - "once by a gntdev instance"); +MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " + "the gntdev device"); + +static atomic_t pages_mapped = ATOMIC_INIT(0); struct gntdev_priv { struct list_head maps; - uint32_t used; - uint32_t limit; /* lock protects maps from concurrent changes */ spinlock_t lock; struct mm_struct *mm; @@ -82,9 +82,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv, #ifdef DEBUG struct grant_map *map; - pr_debug("maps list (priv %p, usage %d/%d)\n", - priv, priv->used, priv->limit); - + pr_debug("%s: maps list (priv %p)\n", __func__, priv); list_for_each_entry(map, &priv->maps, next) pr_debug(" index %2d, count %2d %s\n", map->index, map->count, @@ -121,9 +119,6 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->count = count; add->priv = priv; - if (add->count + priv->used > priv->limit) - goto err; - return add; err: @@ -154,7 +149,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct grant_map *add) list_add_tail(&add->next, &priv->maps); done: - priv->used += add->count; gntdev_print_maps(priv, "[new]", add->index); } @@ -200,7 +194,7 @@ static int gntdev_del_map(struct grant_map *map) if (map->unmap_ops[i].handle) return -EBUSY; - map->priv->used -= map->count; + atomic_sub(map->count, &pages_mapped); list_del(&map->next); return 0; } @@ -386,7 +380,6 @@ static int gntdev_open(struct inode *inode, struct file *flip) INIT_LIST_HEAD(&priv->maps); spin_lock_init(&priv->lock); - priv->limit = limit; priv->mm = get_task_mm(current); if (!priv->mm) { @@ -443,19 +436,24 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, pr_debug("priv %p, add %d\n", priv, op.count); if (unlikely(op.count <= 0)) return -EINVAL; - if (unlikely(op.count > priv->limit)) - return -EINVAL; err = -ENOMEM; map = gntdev_alloc_map(priv, op.count); if (!map) return err; + if (copy_from_user(map->grants, &u->refs, sizeof(map->grants[0]) * op.count) != 0) { gntdev_free_map(map); return err; } + if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { + pr_debug("can''t map: over limit\n"); + gntdev_free_map(map); + return err; + } + spin_lock(&priv->lock); gntdev_add_map(priv, map); op.index = map->index << PAGE_SHIFT; @@ -518,23 +516,6 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, return 0; } -static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv, - struct ioctl_gntdev_set_max_grants __user *u) -{ - struct ioctl_gntdev_set_max_grants op; - - if (copy_from_user(&op, u, sizeof(op)) != 0) - return -EFAULT; - pr_debug("priv %p, limit %d\n", priv, op.count); - if (op.count > limit) - return -E2BIG; - - spin_lock(&priv->lock); - priv->limit = op.count; - spin_unlock(&priv->lock); - return 0; -} - static long gntdev_ioctl(struct file *flip, unsigned int cmd, unsigned long arg) { @@ -551,9 +532,6 @@ static long gntdev_ioctl(struct file *flip, case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); - case IOCTL_GNTDEV_SET_MAX_GRANTS: - return gntdev_ioctl_set_max_grants(priv, ptr); - default: pr_debug("priv %p, unknown cmd %x\n", priv, cmd); return -ENOIOCTLCMD; -- 1.7.3.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-21 15:59 UTC
[Xen-devel] [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
This should be faster if many mappings exist, and also removes the only user of map->vma not related to PTE modification. Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> --- drivers/xen/gntdev.c | 32 ++++++++------------------------ 1 files changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 23d208a..ce8c37c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -167,23 +167,6 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, return NULL; } -static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv, - unsigned long vaddr) -{ - struct grant_map *map; - - list_for_each_entry(map, &priv->maps, next) { - if (!map->vma) - continue; - if (vaddr < map->vma->vm_start) - continue; - if (vaddr >= map->vma->vm_end) - continue; - return map; - } - return NULL; -} - static int gntdev_del_map(struct grant_map *map) { int i; @@ -494,22 +477,23 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, struct ioctl_gntdev_get_offset_for_vaddr __user *u) { struct ioctl_gntdev_get_offset_for_vaddr op; + struct vm_area_struct *vma; struct grant_map *map; if (copy_from_user(&op, u, sizeof(op)) != 0) return -EFAULT; pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); - spin_lock(&priv->lock); - map = gntdev_find_map_vaddr(priv, op.vaddr); - if (map == NULL || - map->vma->vm_start != op.vaddr) { - spin_unlock(&priv->lock); + vma = find_vma(current->mm, op.vaddr); + if (!vma || vma->vm_ops != &gntdev_vmops) return -EINVAL; - } + + map = vma->vm_private_data; + if (!map) + return -EINVAL; + op.offset = map->index << PAGE_SHIFT; op.count = map->count; - spin_unlock(&priv->lock); if (copy_to_user(u, &op, sizeof(op)) != 0) return -EFAULT; -- 1.7.3.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-21 15:59 UTC
[Xen-devel] [PATCH 3/6] xen-gntdev: Add reference counting to maps
This allows userspace to perform mmap() on the gntdev device and then immediately close the filehandle or remove the mapping using the remove ioctl, with the mapped area remaining valid until unmapped. This also fixes an infinite loop when a gntdev device is closed without first unmapping all areas. Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> --- drivers/xen/gntdev.c | 67 ++++++++++++++++++++----------------------------- 1 files changed, 27 insertions(+), 40 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index ce8c37c..256162b 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -62,12 +62,12 @@ struct gntdev_priv { struct grant_map { struct list_head next; - struct gntdev_priv *priv; struct vm_area_struct *vma; int index; int count; int flags; int is_mapped; + atomic_t users; struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; @@ -117,7 +117,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->index = 0; add->count = count; - add->priv = priv; + atomic_set(&add->users, 1); return add; @@ -167,28 +167,18 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, return NULL; } -static int gntdev_del_map(struct grant_map *map) -{ - int i; - - if (map->vma) - return -EBUSY; - for (i = 0; i < map->count; i++) - if (map->unmap_ops[i].handle) - return -EBUSY; - - atomic_sub(map->count, &pages_mapped); - list_del(&map->next); - return 0; -} - -static void gntdev_free_map(struct grant_map *map) +static void gntdev_put_map(struct grant_map *map) { int i; if (!map) return; + if (!atomic_dec_and_test(&map->users)) + return; + + atomic_sub(map->count, &pages_mapped); + if (map->pages) for (i = 0; i < map->count; i++) { if (map->pages[i]) @@ -267,6 +257,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma) map->is_mapped = 0; map->vma = NULL; vma->vm_private_data = NULL; + gntdev_put_map(map); } static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -388,17 +379,14 @@ static int gntdev_release(struct inode *inode, struct file *flip) { struct gntdev_priv *priv = flip->private_data; struct grant_map *map; - int err; pr_debug("priv %p\n", priv); spin_lock(&priv->lock); while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); - err = gntdev_del_map(map); - if (WARN_ON(err)) - gntdev_free_map(map); - + list_del(&map->next); + gntdev_put_map(map); } spin_unlock(&priv->lock); @@ -425,15 +413,15 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, if (!map) return err; - if (copy_from_user(map->grants, &u->refs, - sizeof(map->grants[0]) * op.count) != 0) { - gntdev_free_map(map); + if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { + pr_debug("can''t map: over limit\n"); + gntdev_put_map(map); return err; } - if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { - pr_debug("can''t map: over limit\n"); - gntdev_free_map(map); + if (copy_from_user(map->grants, &u->refs, + sizeof(map->grants[0]) * op.count) != 0) { + gntdev_put_map(map); return err; } @@ -442,13 +430,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, op.index = map->index << PAGE_SHIFT; spin_unlock(&priv->lock); - if (copy_to_user(u, &op, sizeof(op)) != 0) { - spin_lock(&priv->lock); - gntdev_del_map(map); - spin_unlock(&priv->lock); - gntdev_free_map(map); - return err; - } + if (copy_to_user(u, &op, sizeof(op)) != 0) + return -EFAULT; + return 0; } @@ -465,11 +449,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, spin_lock(&priv->lock); map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); - if (map) - err = gntdev_del_map(map); + if (map) { + list_del(&map->next); + gntdev_put_map(map); + err = 0; + } spin_unlock(&priv->lock); - if (!err) - gntdev_free_map(map); return err; } @@ -549,6 +534,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; } + atomic_inc(&map->users); + vma->vm_ops = &gntdev_vmops; vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; -- 1.7.3.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-21 15:59 UTC
[Xen-devel] [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
HVM does not allow direct PTE modification, so instead we request that Xen change its internal p2m mappings on the allocated pages and map the memory into userspace normally. Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> --- drivers/xen/gntdev.c | 115 +++++++++++++++++++++++++++++++------------- drivers/xen/grant-table.c | 6 ++ 2 files changed, 87 insertions(+), 34 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 256162b..a5710e8 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -32,6 +32,7 @@ #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/slab.h> +#include <linux/highmem.h> #include <xen/xen.h> #include <xen/grant_table.h> @@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " static atomic_t pages_mapped = ATOMIC_INIT(0); +static int use_ptemod; + struct gntdev_priv { struct list_head maps; /* lock protects maps from concurrent changes */ @@ -74,6 +77,8 @@ struct grant_map { struct page **pages; }; +static int unmap_grant_pages(struct grant_map *map, int offset, int pages); + /* ------------------------------------------------------------------ */ static void gntdev_print_maps(struct gntdev_priv *priv, @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map) atomic_sub(map->count, &pages_mapped); - if (map->pages) + if (map->pages) { + if (!use_ptemod) + unmap_grant_pages(map, 0, map->count); + for (i = 0; i < map->count; i++) { - if (map->pages[i]) + uint32_t check, *tmp; + if (!map->pages[i]) + continue; + /* XXX When unmapping in an HVM domain, Xen will + * sometimes end up mapping the GFN to an invalid MFN. + * In this case, writes will be discarded and reads will + * return all 0xFF bytes. Leak these unusable GFNs + * until Xen supports fixing their p2m mapping. + */ + tmp = kmap(map->pages[i]); + *tmp = 0xdeaddead; + mb(); + check = *tmp; + kunmap(map->pages[i]); + if (check == 0xdeaddead) __free_page(map->pages[i]); + else + pr_debug("Discard page %d=%ld\n", i, + page_to_pfn(map->pages[i])); } + } kfree(map->pages); kfree(map->grants); kfree(map->map_ops); @@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, { struct grant_map *map = data; unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; u64 pte_maddr; BUG_ON(pgnr >= map->count); pte_maddr = arbitrary_virt_to_machine(pte).maddr; - gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, - GNTMAP_contains_pte | map->flags, + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, map->grants[pgnr].ref, map->grants[pgnr].domid); - gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, - GNTMAP_contains_pte | map->flags, + gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, 0 /* handle */); return 0; } @@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, static int map_grant_pages(struct grant_map *map) { int i, err = 0; + phys_addr_t addr; + + if (!use_ptemod) { + for (i = 0; i < map->count; i++) { + addr = (phys_addr_t) + pfn_to_kaddr(page_to_pfn(map->pages[i])); + gnttab_set_map_op(&map->map_ops[i], addr, map->flags, + map->grants[i].ref, + map->grants[i].domid); + gnttab_set_unmap_op(&map->unmap_ops[i], addr, + map->flags, 0 /* handle */); + } + } pr_debug("map %d+%d\n", map->index, map->count); err = gnttab_map_refs(map->map_ops, map->pages, map->count); @@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma) gntdev_put_map(map); } -static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - pr_debug("vaddr %p, pgoff %ld (shouldn''t happen)\n", - vmf->virtual_address, vmf->pgoff); - vmf->flags = VM_FAULT_ERROR; - return 0; -} - static struct vm_operations_struct gntdev_vmops = { .close = gntdev_vma_close, - .fault = gntdev_vma_fault, }; /* ------------------------------------------------------------------ */ @@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip) INIT_LIST_HEAD(&priv->maps); spin_lock_init(&priv->lock); - priv->mm = get_task_mm(current); - if (!priv->mm) { - kfree(priv); - return -ENOMEM; + if (use_ptemod) { + priv->mm = get_task_mm(current); + if (!priv->mm) { + kfree(priv); + return -ENOMEM; + } + priv->mn.ops = &gntdev_mmu_ops; + ret = mmu_notifier_register(&priv->mn, priv->mm); + mmput(priv->mm); } - priv->mn.ops = &gntdev_mmu_ops; - ret = mmu_notifier_register(&priv->mn, priv->mm); - mmput(priv->mm); if (ret) { kfree(priv); @@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip) } spin_unlock(&priv->lock); - mmu_notifier_unregister(&priv->mn, priv->mm); + if (use_ptemod) + mmu_notifier_unregister(&priv->mn, priv->mm); kfree(priv); return 0; } @@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) int index = vma->vm_pgoff; int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; struct grant_map *map; - int err = -EINVAL; + int i, err = -EINVAL; if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) return -EINVAL; @@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; - if (map->vma) + if (use_ptemod && map->vma) goto unlock_out; - if (priv->mm != vma->vm_mm) { + if (use_ptemod && priv->mm != vma->vm_mm) { printk(KERN_WARNING "Huh? Other mm?\n"); goto unlock_out; } @@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; vma->vm_private_data = map; - map->vma = vma; - map->flags = GNTMAP_host_map | GNTMAP_application_map; + if (use_ptemod) + map->vma = vma; + + map->flags = GNTMAP_host_map; if (!(vma->vm_flags & VM_WRITE)) map->flags |= GNTMAP_readonly; spin_unlock(&priv->lock); - err = apply_to_page_range(vma->vm_mm, vma->vm_start, - vma->vm_end - vma->vm_start, - find_grant_ptes, map); - if (err) { - printk(KERN_WARNING "find_grant_ptes() failure.\n"); - return err; + if (use_ptemod) { + err = apply_to_page_range(vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, + find_grant_ptes, map); + if (err) { + printk(KERN_WARNING "find_grant_ptes() failure.\n"); + return err; + } } err = map_grant_pages(map); @@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->is_mapped = 1; + if (!use_ptemod) { + for (i = 0; i < count; i++) { + err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, + map->pages[i]); + if (err) + return err; + } + } + return 0; unlock_out: @@ -595,6 +640,8 @@ static int __init gntdev_init(void) if (!xen_domain()) return -ENODEV; + use_ptemod = xen_pv_domain(); + err = misc_register(&gntdev_miscdev); if (err != 0) { printk(KERN_ERR "Could not register gntdev device\n"); diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 9ef54eb..9428ced 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (ret) return ret; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return ret; + for (i = 0; i < count; i++) { /* m2p override only supported for GNTMAP_contains_pte mappings */ if (!(map_ops[i].flags & GNTMAP_contains_pte)) @@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, if (ret) return ret; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return ret; + for (i = 0; i < count; i++) { ret = m2p_remove_override(pages[i]); if (ret) -- 1.7.3.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-21 15:59 UTC
[Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
This allows a userspace application to allocate a shared page for implementing inter-domain communication or device drivers. These shared pages can be mapped using the gntdev device or by the kernel in another domain. Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> --- drivers/xen/Kconfig | 8 + drivers/xen/Makefile | 2 + drivers/xen/gntalloc.c | 477 ++++++++++++++++++++++++++++++++++++++++++++++++ include/xen/gntalloc.h | 70 +++++++ 4 files changed, 557 insertions(+), 0 deletions(-) create mode 100644 drivers/xen/gntalloc.c create mode 100644 include/xen/gntalloc.h diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 07bec09..a3d7afb 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -80,6 +80,14 @@ config XEN_GNTDEV help Allows userspace processes to use grants. +config XEN_GRANT_DEV_ALLOC + tristate "User-space grant reference allocator driver" + depends on XEN + help + Allows userspace processes to create pages with access granted + to other domains. This can be used to implement frontend drivers + or as part of an inter-domain shared memory channel. + config XEN_PLATFORM_PCI tristate "xen platform pci device driver" depends on XEN_PVHVM && PCI diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 5088cc2..9585a1d 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o +xen-gntalloc-y := gntalloc.o xen-platform-pci-y := platform-pci.o diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c new file mode 100644 index 0000000..a230dc4 --- /dev/null +++ b/drivers/xen/gntalloc.c @@ -0,0 +1,477 @@ +/****************************************************************************** + * gntalloc.c + * + * Device for creating grant references (in user-space) that may be shared + * with other domains. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * This driver exists to allow userspace programs in Linux to allocate kernel + * memory that will later be shared with another domain. Without this device, + * Linux userspace programs cannot create grant references. + * + * How this stuff works: + * X -> granting a page to Y + * Y -> mapping the grant from X + * + * 1. X uses the gntalloc device to allocate a page of kernel memory, P. + * 2. X creates an entry in the grant table that says domid(Y) can access P. + * This is done without a hypercall unless the grant table needs expansion. + * 3. X gives the grant reference identifier, GREF, to Y. + * 4. Y maps the page, either directly into kernel memory for use in a backend + * driver, or via a the gntdev device to map into the address space of an + * application running in Y. This is the first point at which Xen does any + * tracking of the page. + * 5. A program in X mmap()s a segment of the gntalloc device that corresponds + * to the shared page, and can now communicate with Y over the shared page. + * + * + * NOTE TO USERSPACE LIBRARIES: + * The grant allocation and mmap()ing are, naturally, two separate operations. + * You set up the sharing by calling the create ioctl() and then the mmap(). + * Teardown requires munmap() and either close() or ioctl(). + * + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant + * reference, this device can be used to consume kernel memory by leaving grant + * references mapped by another domain when an application exits. Therefore, + * there is a global limit on the number of pages that can be allocated. When + * all references to the page are unmapped, it will be freed during the next + * grant operation. + */ + +#include <linux/atomic.h> +#include <linux/module.h> +#include <linux/miscdevice.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <linux/types.h> +#include <linux/list.h> + +#include <xen/xen.h> +#include <xen/page.h> +#include <xen/grant_table.h> +#include <xen/gntalloc.h> + +static int limit = 1024; +module_param(limit, int, 0644); +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " + "the gntalloc device"); + +static LIST_HEAD(gref_list); +static DEFINE_SPINLOCK(gref_lock); +static int gref_size; + +/* Metadata on a grant reference. */ +struct gntalloc_gref { + struct list_head next_all; /* list entry gref_list */ + struct list_head next_file; /* list entry file->list, if open */ + struct page *page; /* The shared page */ + uint64_t file_index; /* File offset for mmap() */ + unsigned int users; /* Use count - when zero, waiting on Xen */ + grant_ref_t gref_id; /* The grant reference number */ +}; + +struct gntalloc_file_private_data { + struct list_head list; + uint64_t index; +}; + +static void __del_gref(struct gntalloc_gref *gref); + +static void do_cleanup(void) +{ + struct gntalloc_gref *gref, *n; + list_for_each_entry_safe(gref, n, &gref_list, next_all) { + if (!gref->users) + __del_gref(gref); + } +} + +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, + uint32_t *gref_ids, struct gntalloc_file_private_data *priv) +{ + int i, rc, readonly; + LIST_HEAD(queue_all); + LIST_HEAD(queue_file); + struct gntalloc_gref *gref; + + readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); + rc = -ENOMEM; + for (i = 0; i < op->count; i++) { + gref = kzalloc(sizeof(*gref), GFP_KERNEL); + if (!gref) + goto undo; + list_add_tail(&gref->next_all, &queue_all); + list_add_tail(&gref->next_file, &queue_file); + gref->users = 1; + gref->file_index = op->index + i * PAGE_SIZE; + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!gref->page) + goto undo; + + /* Grant foreign access to the page. */ + gref->gref_id = gnttab_grant_foreign_access(op->domid, + pfn_to_mfn(page_to_pfn(gref->page)), readonly); + if (gref->gref_id < 0) { + rc = gref->gref_id; + goto undo; + } + gref_ids[i] = gref->gref_id; + } + + /* Add to gref lists. */ + spin_lock(&gref_lock); + list_splice_tail(&queue_all, &gref_list); + list_splice_tail(&queue_file, &priv->list); + spin_unlock(&gref_lock); + + return 0; + +undo: + list_for_each_entry(gref, &queue_file, next_file) { + if (gref->page && gref->gref_id >= 0) { + __del_gref(gref); + } else { + if (gref->page) + __free_page(gref->page); + list_del(&gref->next_all); + kfree(gref); + } + } + + /* It''s possible for the target domain to map the just-allocated grant + * references by blindly guessing their IDs; if this is done, then + * __del_gref will leave them in the queue_all list. They need to be + * added to the global list so that we can free them when they are no + * longer referenced. + */ + if (unlikely(!list_empty(&queue_all))) { + spin_lock(&gref_lock); + list_splice_tail(&queue_all, &gref_list); + spin_unlock(&gref_lock); + } + return rc; +} + +static void __del_gref(struct gntalloc_gref *gref) +{ + if (gnttab_query_foreign_access(gref->gref_id)) + return; + + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) + return; + + gref_size--; + list_del(&gref->next_all); + + __free_page(gref->page); + kfree(gref); +} + +/* finds contiguous grant references in a file, returns the first */ +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, + uint64_t index, uint32_t count) +{ + struct gntalloc_gref *rv = NULL, *gref; + list_for_each_entry(gref, &priv->list, next_file) { + if (gref->file_index == index && !rv) + rv = gref; + if (rv) { + if (gref->file_index != index) + return NULL; + index += PAGE_SIZE; + count--; + if (count == 0) + return rv; + } + } + return NULL; +} + +/* + * ------------------------------------- + * File operations. + * ------------------------------------- + */ +static int gntalloc_open(struct inode *inode, struct file *filp) +{ + struct gntalloc_file_private_data *priv; + + try_module_get(THIS_MODULE); + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + goto out_nomem; + INIT_LIST_HEAD(&priv->list); + + filp->private_data = priv; + + pr_debug("%s: priv %p\n", __func__, priv); + + return 0; + +out_nomem: + return -ENOMEM; +} + +static int gntalloc_release(struct inode *inode, struct file *filp) +{ + struct gntalloc_file_private_data *priv = filp->private_data; + struct gntalloc_gref *gref; + + pr_debug("%s: priv %p\n", __func__, priv); + + spin_lock(&gref_lock); + while (!list_empty(&priv->list)) { + gref = list_entry(priv->list.next, + struct gntalloc_gref, next_file); + list_del(&gref->next_file); + gref->users--; + if (gref->users == 0) + __del_gref(gref); + } + kfree(priv); + spin_unlock(&gref_lock); + + module_put(THIS_MODULE); + + return 0; +} + +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, + struct ioctl_gntalloc_alloc_gref __user *arg) +{ + int rc = 0; + struct ioctl_gntalloc_alloc_gref op; + uint32_t *gref_ids; + + pr_debug("%s: priv %p\n", __func__, priv); + + if (copy_from_user(&op, arg, sizeof(op))) { + rc = -EFAULT; + goto out; + } + + gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); + if (!gref_ids) { + rc = -ENOMEM; + goto out; + } + + spin_lock(&gref_lock); + do_cleanup(); + if (gref_size + op.count > limit) { + spin_unlock(&gref_lock); + rc = -ENOSPC; + goto out_free; + } + gref_size += op.count; + op.index = priv->index; + priv->index += op.count * PAGE_SIZE; + spin_unlock(&gref_lock); + + rc = add_grefs(&op, gref_ids, priv); + if (rc < 0) + goto out_free; + + if (copy_to_user(arg, &op, sizeof(op))) { + rc = -EFAULT; + goto out_free; + } + if (copy_to_user(arg->gref_ids, gref_ids, + sizeof(gref_ids[0]) * op.count)) { + rc = -EFAULT; + goto out_free; + } + +out_free: + kfree(gref_ids); +out: + return rc; +} + +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, + void __user *arg) +{ + int i, rc = 0; + struct ioctl_gntalloc_dealloc_gref op; + struct gntalloc_gref *gref, *n; + + pr_debug("%s: priv %p\n", __func__, priv); + + if (copy_from_user(&op, arg, sizeof(op))) { + rc = -EFAULT; + goto dealloc_grant_out; + } + + spin_lock(&gref_lock); + gref = find_grefs(priv, op.index, op.count); + if (gref) { + for (i = 0; i < op.count; i++) { + n = list_entry(gref->next_file.next, + struct gntalloc_gref, next_file); + list_del(&gref->next_file); + gref->users--; + gref = n; + } + } else { + rc = -EINVAL; + } + + do_cleanup(); + + spin_unlock(&gref_lock); +dealloc_grant_out: + return rc; +} + +static long gntalloc_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct gntalloc_file_private_data *priv = filp->private_data; + + switch (cmd) { + case IOCTL_GNTALLOC_ALLOC_GREF: + return gntalloc_ioctl_alloc(priv, (void __user *)arg); + + case IOCTL_GNTALLOC_DEALLOC_GREF: + return gntalloc_ioctl_dealloc(priv, (void __user *)arg); + + default: + return -ENOIOCTLCMD; + } + + return 0; +} + +static void gntalloc_vma_close(struct vm_area_struct *vma) +{ + struct gntalloc_gref *gref = vma->vm_private_data; + if (!gref) + return; + + spin_lock(&gref_lock); + gref->users--; + if (gref->users == 0) + __del_gref(gref); + spin_unlock(&gref_lock); +} + +static struct vm_operations_struct gntalloc_vmops = { + .close = gntalloc_vma_close, +}; + +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct gntalloc_file_private_data *priv = filp->private_data; + struct gntalloc_gref *gref; + int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int rv, i; + + pr_debug("%s: priv %p, page %lu+%d\n", __func__, + priv, vma->vm_pgoff, count); + + if (!(vma->vm_flags & VM_SHARED)) { + printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); + return -EINVAL; + } + + spin_lock(&gref_lock); + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); + if (gref == NULL) { + rv = -ENOENT; + pr_debug("%s: Could not find grant reference", + __func__); + goto out_unlock; + } + + vma->vm_private_data = gref; + + vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTCOPY; + vma->vm_flags |= VM_IO; + vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; + + vma->vm_ops = &gntalloc_vmops; + + for (i = 0; i < count; i++) { + gref->users++; + rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, + gref->page); + if (rv) + goto out_unlock; + + gref = list_entry(gref->next_file.next, + struct gntalloc_gref, next_file); + } + rv = 0; + +out_unlock: + spin_unlock(&gref_lock); + return rv; +} + +static const struct file_operations gntalloc_fops = { + .owner = THIS_MODULE, + .open = gntalloc_open, + .release = gntalloc_release, + .unlocked_ioctl = gntalloc_ioctl, + .mmap = gntalloc_mmap +}; + +/* + * ------------------------------------- + * Module creation/destruction. + * ------------------------------------- + */ +static struct miscdevice gntalloc_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/gntalloc", + .fops = &gntalloc_fops, +}; + +static int __init gntalloc_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&gntalloc_miscdev); + if (err != 0) { + printk(KERN_ERR "Could not register misc gntalloc device\n"); + return err; + } + + pr_debug("Created grant allocation device at %d,%d\n", + MISC_MAJOR, gntalloc_miscdev.minor); + + return 0; +} + +static void __exit gntalloc_exit(void) +{ + misc_deregister(&gntalloc_miscdev); +} + +module_init(gntalloc_init); +module_exit(gntalloc_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " + "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); +MODULE_DESCRIPTION("User-space grant reference allocator driver"); diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h new file mode 100644 index 0000000..e1d6d0f --- /dev/null +++ b/include/xen/gntalloc.h @@ -0,0 +1,70 @@ +/****************************************************************************** + * gntalloc.h + * + * Interface to /dev/xen/gntalloc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_GNTALLOC_H__ +#define __LINUX_PUBLIC_GNTALLOC_H__ + +/* + * Allocates a new page and creates a new grant reference. + */ +#define IOCTL_GNTALLOC_ALLOC_GREF \ +_IOC(_IOC_NONE, ''G'', 5, sizeof(struct ioctl_gntalloc_alloc_gref)) +struct ioctl_gntalloc_alloc_gref { + /* IN parameters */ + /* The ID of the domain to be given access to the grants. */ + uint16_t domid; + /* Flags for this mapping */ + uint16_t flags; + /* Number of pages to map */ + uint32_t count; + /* OUT parameters */ + /* The offset to be used on a subsequent call to mmap(). */ + uint64_t index; + /* The grant references of the newly created grant, one per page */ + /* Variable size, depending on count */ + uint32_t gref_ids[1]; +}; + +#define GNTALLOC_FLAG_WRITABLE 1 + +/* + * Deallocates the grant reference, allowing the associated page to be freed if + * no other domains are using it. + */ +#define IOCTL_GNTALLOC_DEALLOC_GREF \ +_IOC(_IOC_NONE, ''G'', 6, sizeof(struct ioctl_gntalloc_dealloc_gref)) +struct ioctl_gntalloc_dealloc_gref { + /* IN parameters */ + /* The offset returned in the map operation */ + uint64_t index; + /* Number of references to unmap */ + uint32_t count; +}; +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ -- 1.7.3.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-21 15:59 UTC
[Xen-devel] [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
This ioctl allows the users of a shared page to be notified when the other end exits abnormally. Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> --- drivers/xen/gntalloc.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/gntdev.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++- include/xen/gntalloc.h | 28 ++++++++++++++++++++++++ include/xen/gntdev.h | 27 +++++++++++++++++++++++ 4 files changed, 165 insertions(+), 1 deletions(-) diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index a230dc4..8ac0dfc 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -60,11 +60,13 @@ #include <linux/uaccess.h> #include <linux/types.h> #include <linux/list.h> +#include <linux/highmem.h> #include <xen/xen.h> #include <xen/page.h> #include <xen/grant_table.h> #include <xen/gntalloc.h> +#include <xen/events.h> static int limit = 1024; module_param(limit, int, 0644); @@ -83,6 +85,9 @@ struct gntalloc_gref { uint64_t file_index; /* File offset for mmap() */ unsigned int users; /* Use count - when zero, waiting on Xen */ grant_ref_t gref_id; /* The grant reference number */ + unsigned notify_flags:2; /* Unmap notification flags */ + unsigned notify_pgoff:12; /* Offset of the byte to clear */ + int notify_event; /* Port (event channel) to notify */ }; struct gntalloc_file_private_data { @@ -169,6 +174,16 @@ undo: static void __del_gref(struct gntalloc_gref *gref) { + if (gref->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) { + uint8_t *tmp = kmap(gref->page); + tmp[gref->notify_pgoff] = 0; + kunmap(gref->page); + } + if (gref->notify_flags & UNMAP_NOTIFY_SEND_EVENT) + notify_remote_via_evtchn(gref->notify_event); + + gref->notify_flags = 0; + if (gnttab_query_foreign_access(gref->gref_id)) return; @@ -339,6 +354,43 @@ dealloc_grant_out: return rc; } +static long gntalloc_ioctl_notify(struct gntalloc_file_private_data *priv, + void __user *arg) +{ + struct ioctl_gntalloc_unmap_notify op; + struct gntalloc_gref *gref; + uint64_t index; + int pgoff; + int rc; + + if (copy_from_user(&op, arg, sizeof(op))) + return -EFAULT; + + index = op.index & ~(PAGE_SIZE - 1); + pgoff = op.index & (PAGE_SIZE - 1); + + spin_lock(&gref_lock); + + gref = find_grefs(priv, index, 1); + if (!gref) { + rc = -ENOENT; + goto unlock_out; + } + + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) { + rc = -EINVAL; + goto unlock_out; + } + + gref->notify_flags = op.action; + gref->notify_pgoff = pgoff; + gref->notify_event = op.event_channel_port; + rc = 0; + unlock_out: + spin_unlock(&gref_lock); + return rc; +} + static long gntalloc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -351,6 +403,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, case IOCTL_GNTALLOC_DEALLOC_GREF: return gntalloc_ioctl_dealloc(priv, (void __user *)arg); + case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY: + return gntalloc_ioctl_notify(priv, (void __user *)arg); + default: return -ENOIOCTLCMD; } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a5710e8..e360d0a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -37,6 +37,7 @@ #include <xen/xen.h> #include <xen/grant_table.h> #include <xen/gntdev.h> +#include <xen/events.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> #include <asm/xen/page.h> @@ -70,6 +71,9 @@ struct grant_map { int count; int flags; int is_mapped; + int notify_flags; + int notify_offset; + int notify_event; atomic_t users; struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; @@ -165,7 +169,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, list_for_each_entry(map, &priv->maps, next) { if (map->index != index) continue; - if (map->count != count) + if (count && map->count != count) continue; return map; } @@ -184,6 +188,10 @@ static void gntdev_put_map(struct grant_map *map) atomic_sub(map->count, &pages_mapped); + if (map->notify_flags & UNMAP_NOTIFY_SEND_EVENT) { + notify_remote_via_evtchn(map->notify_event); + } + if (map->pages) { if (!use_ptemod) unmap_grant_pages(map, 0, map->count); @@ -272,6 +280,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) { int i, err = 0; + if (map->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) { + int pgno = (map->notify_offset >> PAGE_SHIFT); + if (pgno >= offset && pgno < pages + offset) { + uint8_t *tmp = kmap(map->pages[pgno]); + tmp[map->notify_offset & (PAGE_SIZE-1)] = 0; + kunmap(map->pages[pgno]); + map->notify_flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; + } + } + pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); if (err) @@ -517,6 +535,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, return 0; } +static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) +{ + struct ioctl_gntdev_unmap_notify op; + struct grant_map *map; + int rc; + + if (copy_from_user(&op, u, sizeof(op))) + return -EFAULT; + + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) + return -EINVAL; + + spin_lock(&priv->lock); + + list_for_each_entry(map, &priv->maps, next) { + uint64_t begin = map->index << PAGE_SHIFT; + uint64_t end = (map->index + map->count) << PAGE_SHIFT; + if (op.index >= begin && op.index < end) + goto found; + } + rc = -ENOENT; + goto unlock_out; + + found: + map->notify_flags = op.action; + map->notify_offset = op.index - (map->index << PAGE_SHIFT); + map->notify_event = op.event_channel_port; + rc = 0; + unlock_out: + spin_unlock(&priv->lock); + return rc; +} + static long gntdev_ioctl(struct file *flip, unsigned int cmd, unsigned long arg) { @@ -533,6 +584,9 @@ static long gntdev_ioctl(struct file *flip, case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); + case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: + return gntdev_ioctl_notify(priv, ptr); + default: pr_debug("priv %p, unknown cmd %x\n", priv, cmd); return -ENOIOCTLCMD; diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h index e1d6d0f..92339f5 100644 --- a/include/xen/gntalloc.h +++ b/include/xen/gntalloc.h @@ -67,4 +67,32 @@ struct ioctl_gntalloc_dealloc_gref { /* Number of references to unmap */ uint32_t count; }; + +/* + * Sets up an unmap notification within the page, so that the other side can do + * cleanup if this side crashes. Required to implement cross-domain robust + * mutexes or close notification on communication channels. + * + * Each mapped page only supports one notification; multiple calls referring to + * the same page overwrite the previous notification. You must clear the + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it + * to occur. + */ +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \ +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntalloc_unmap_notify)) +struct ioctl_gntalloc_unmap_notify { + /* IN parameters */ + /* Index of a byte in the page */ + uint64_t index; + /* Action(s) to take on unmap */ + uint32_t action; + /* Event channel to notify */ + uint32_t event_channel_port; +}; + +/* Clear (set to zero) the byte specified by index */ +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 +/* Send an interrupt on the indicated event channel */ +#define UNMAP_NOTIFY_SEND_EVENT 0x2 + #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h index eb23f41..5d9b9b4 100644 --- a/include/xen/gntdev.h +++ b/include/xen/gntdev.h @@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants { uint32_t count; }; +/* + * Sets up an unmap notification within the page, so that the other side can do + * cleanup if this side crashes. Required to implement cross-domain robust + * mutexes or close notification on communication channels. + * + * Each mapped page only supports one notification; multiple calls referring to + * the same page overwrite the previous notification. You must clear the + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it + * to occur. + */ +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntdev_unmap_notify)) +struct ioctl_gntdev_unmap_notify { + /* IN parameters */ + /* Index of a byte in the page */ + uint64_t index; + /* Action(s) to take on unmap */ + uint32_t action; + /* Event channel to notify */ + uint32_t event_channel_port; +}; + +/* Clear (set to zero) the byte specified by index */ +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 +/* Send an interrupt on the indicated event channel */ +#define UNMAP_NOTIFY_SEND_EVENT 0x2 + #endif /* __LINUX_PUBLIC_GNTDEV_H__ */ -- 1.7.3.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Jan-27 18:52 UTC
Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote:> This allows a userspace application to allocate a shared page for > implementing inter-domain communication or device drivers. These > shared pages can be mapped using the gntdev device or by the kernel > in another domain. > > Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> > --- > drivers/xen/Kconfig | 8 + > drivers/xen/Makefile | 2 + > drivers/xen/gntalloc.c | 477 ++++++++++++++++++++++++++++++++++++++++++++++++ > include/xen/gntalloc.h | 70 +++++++ > 4 files changed, 557 insertions(+), 0 deletions(-) > create mode 100644 drivers/xen/gntalloc.c > create mode 100644 include/xen/gntalloc.h > > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig > index 07bec09..a3d7afb 100644 > --- a/drivers/xen/Kconfig > +++ b/drivers/xen/Kconfig > @@ -80,6 +80,14 @@ config XEN_GNTDEV > help > Allows userspace processes to use grants. > > +config XEN_GRANT_DEV_ALLOC > + tristate "User-space grant reference allocator driver" > + depends on XEN > + help > + Allows userspace processes to create pages with access granted > + to other domains. This can be used to implement frontend drivers > + or as part of an inter-domain shared memory channel. > + > config XEN_PLATFORM_PCI > tristate "xen platform pci device driver" > depends on XEN_PVHVM && PCI > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile > index 5088cc2..9585a1d 100644 > --- a/drivers/xen/Makefile > +++ b/drivers/xen/Makefile > @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o > obj-$(CONFIG_XEN_BALLOON) += balloon.o > obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o > obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o > +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o > obj-$(CONFIG_XENFS) += xenfs/ > obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o > obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o > @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o > > xen-evtchn-y := evtchn.o > xen-gntdev-y := gntdev.o > +xen-gntalloc-y := gntalloc.o > > xen-platform-pci-y := platform-pci.o > diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c > new file mode 100644 > index 0000000..a230dc4 > --- /dev/null > +++ b/drivers/xen/gntalloc.c > @@ -0,0 +1,477 @@ > +/****************************************************************************** > + * gntalloc.c > + * > + * Device for creating grant references (in user-space) that may be shared > + * with other domains. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + */ > + > +/* > + * This driver exists to allow userspace programs in Linux to allocate kernel > + * memory that will later be shared with another domain. Without this device, > + * Linux userspace programs cannot create grant references. > + * > + * How this stuff works: > + * X -> granting a page to Y > + * Y -> mapping the grant from X > + * > + * 1. X uses the gntalloc device to allocate a page of kernel memory, P. > + * 2. X creates an entry in the grant table that says domid(Y) can access P. > + * This is done without a hypercall unless the grant table needs expansion. > + * 3. X gives the grant reference identifier, GREF, to Y. > + * 4. Y maps the page, either directly into kernel memory for use in a backend > + * driver, or via a the gntdev device to map into the address space of an > + * application running in Y. This is the first point at which Xen does any > + * tracking of the page. > + * 5. A program in X mmap()s a segment of the gntalloc device that corresponds > + * to the shared page, and can now communicate with Y over the shared page. > + * > + * > + * NOTE TO USERSPACE LIBRARIES: > + * The grant allocation and mmap()ing are, naturally, two separate operations. > + * You set up the sharing by calling the create ioctl() and then the mmap(). > + * Teardown requires munmap() and either close() or ioctl(). > + * > + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant > + * reference, this device can be used to consume kernel memory by leaving grant > + * references mapped by another domain when an application exits. Therefore, > + * there is a global limit on the number of pages that can be allocated. When > + * all references to the page are unmapped, it will be freed during the next > + * grant operation. > + */ > + > +#include <linux/atomic.h> > +#include <linux/module.h> > +#include <linux/miscdevice.h> > +#include <linux/kernel.h> > +#include <linux/init.h> > +#include <linux/slab.h> > +#include <linux/fs.h> > +#include <linux/device.h> > +#include <linux/mm.h> > +#include <linux/uaccess.h> > +#include <linux/types.h> > +#include <linux/list.h> > + > +#include <xen/xen.h> > +#include <xen/page.h> > +#include <xen/grant_table.h> > +#include <xen/gntalloc.h> > + > +static int limit = 1024; > +module_param(limit, int, 0644); > +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " > + "the gntalloc device"); > + > +static LIST_HEAD(gref_list); > +static DEFINE_SPINLOCK(gref_lock); > +static int gref_size; > + > +/* Metadata on a grant reference. */ > +struct gntalloc_gref { > + struct list_head next_all; /* list entry gref_list */How about ''next_gref'' ?> + struct list_head next_file; /* list entry file->list, if open */ > + struct page *page; /* The shared page */ > + uint64_t file_index; /* File offset for mmap() */ > + unsigned int users; /* Use count - when zero, waiting on Xen */ > + grant_ref_t gref_id; /* The grant reference number */ > +}; > + > +struct gntalloc_file_private_data { > + struct list_head list; > + uint64_t index; > +}; > + > +static void __del_gref(struct gntalloc_gref *gref); > + > +static void do_cleanup(void) > +{ > + struct gntalloc_gref *gref, *n; > + list_for_each_entry_safe(gref, n, &gref_list, next_all) { > + if (!gref->users) > + __del_gref(gref); > + } > +} > + > +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, > + uint32_t *gref_ids, struct gntalloc_file_private_data *priv) > +{ > + int i, rc, readonly; > + LIST_HEAD(queue_all); > + LIST_HEAD(queue_file); > + struct gntalloc_gref *gref; > + > + readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); > + rc = -ENOMEM; > + for (i = 0; i < op->count; i++) { > + gref = kzalloc(sizeof(*gref), GFP_KERNEL); > + if (!gref) > + goto undo; > + list_add_tail(&gref->next_all, &queue_all); > + list_add_tail(&gref->next_file, &queue_file); > + gref->users = 1; > + gref->file_index = op->index + i * PAGE_SIZE; > + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); > + if (!gref->page) > + goto undo; > + > + /* Grant foreign access to the page. */ > + gref->gref_id = gnttab_grant_foreign_access(op->domid, > + pfn_to_mfn(page_to_pfn(gref->page)), readonly); > + if (gref->gref_id < 0) { > + rc = gref->gref_id; > + goto undo; > + } > + gref_ids[i] = gref->gref_id; > + } > + > + /* Add to gref lists. */ > + spin_lock(&gref_lock); > + list_splice_tail(&queue_all, &gref_list); > + list_splice_tail(&queue_file, &priv->list); > + spin_unlock(&gref_lock); > + > + return 0; > + > +undo: > + list_for_each_entry(gref, &queue_file, next_file) { > + if (gref->page && gref->gref_id >= 0) { > + __del_gref(gref); > + } else { > + if (gref->page) > + __free_page(gref->page); > + list_del(&gref->next_all);Ohh, you are actually deleting them from queue_all, ah. Can you add a comment about that since the first time I read this I thought you were iterating over the queue_file and deleting from _that_ list. Tricky.. Can collapse this deleting into __del_gref? Looks like you should just preface the gnttab with "if gref->page && gref->gref_id" and also the gref_size, and the "if (gref->page)" before deleting?> + kfree(gref); > + } > + } > + > + /* It''s possible for the target domain to map the just-allocated grant > + * references by blindly guessing their IDs; if this is done, then > + * __del_gref will leave them in the queue_all list. They need to be > + * added to the global list so that we can free them when they are no > + * longer referenced. > + */ > + if (unlikely(!list_empty(&queue_all))) { > + spin_lock(&gref_lock); > + list_splice_tail(&queue_all, &gref_list); > + spin_unlock(&gref_lock); > + } > + return rc; > +} > + > +static void __del_gref(struct gntalloc_gref *gref) > +{ > + if (gnttab_query_foreign_access(gref->gref_id)) > + return; > + > + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) > + return; > + > + gref_size--; > + list_del(&gref->next_all); > + > + __free_page(gref->page); > + kfree(gref); > +} > + > +/* finds contiguous grant references in a file, returns the first */ > +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, > + uint64_t index, uint32_t count) > +{ > + struct gntalloc_gref *rv = NULL, *gref; > + list_for_each_entry(gref, &priv->list, next_file) { > + if (gref->file_index == index && !rv) > + rv = gref; > + if (rv) { > + if (gref->file_index != index) > + return NULL; > + index += PAGE_SIZE; > + count--; > + if (count == 0) > + return rv; > + } > + } > + return NULL; > +} > + > +/* > + * ------------------------------------- > + * File operations. > + * ------------------------------------- > + */ > +static int gntalloc_open(struct inode *inode, struct file *filp) > +{ > + struct gntalloc_file_private_data *priv; > + > + try_module_get(THIS_MODULE);No checking if it fails?> + > + priv = kzalloc(sizeof(*priv), GFP_KERNEL); > + if (!priv) > + goto out_nomem; > + INIT_LIST_HEAD(&priv->list); > + > + filp->private_data = priv; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + return 0; > + > +out_nomem:You should also do: module_put(THIS_MODULE);> + return -ENOMEM; > +} > + > +static int gntalloc_release(struct inode *inode, struct file *filp) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + struct gntalloc_gref *gref; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + spin_lock(&gref_lock); > + while (!list_empty(&priv->list)) { > + gref = list_entry(priv->list.next, > + struct gntalloc_gref, next_file);> + list_del(&gref->next_file); > + gref->users--; > + if (gref->users == 0) > + __del_gref(gref); > + } > + kfree(priv); > + spin_unlock(&gref_lock); > + > + module_put(THIS_MODULE); > + > + return 0; > +} > + > +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, > + struct ioctl_gntalloc_alloc_gref __user *arg) > +{ > + int rc = 0; > + struct ioctl_gntalloc_alloc_gref op; > + uint32_t *gref_ids; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + if (copy_from_user(&op, arg, sizeof(op))) { > + rc = -EFAULT; > + goto out; > + } > + > + gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); > + if (!gref_ids) { > + rc = -ENOMEM; > + goto out; > + } > + > + spin_lock(&gref_lock); > + do_cleanup();Hmm, why the cleanup here? I see it gntalloc_ioctl_dealloc which makes sense since the users-- might have been decremented to zero. But here?> + if (gref_size + op.count > limit) { > + spin_unlock(&gref_lock); > + rc = -ENOSPC; > + goto out_free; > + } > + gref_size += op.count; > + op.index = priv->index; > + priv->index += op.count * PAGE_SIZE; > + spin_unlock(&gref_lock); > + > + rc = add_grefs(&op, gref_ids, priv); > + if (rc < 0) > + goto out_free; > + > + if (copy_to_user(arg, &op, sizeof(op))) { > + rc = -EFAULT; > + goto out_free;Not something that would clean the newly added grant? Say the code I suggested below the ''out'' label.> + } > + if (copy_to_user(arg->gref_ids, gref_ids, > + sizeof(gref_ids[0]) * op.count)) { > + rc = -EFAULT; > + goto out_free;Ditto.> + } > + > +out_free: > + kfree(gref_ids); > +out: > + return rc;out_del_gref: gref->users--; __del_gref(gref); goto out_free;> +} > + > +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, > + void __user *arg) > +{ > + int i, rc = 0; > + struct ioctl_gntalloc_dealloc_gref op; > + struct gntalloc_gref *gref, *n; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + if (copy_from_user(&op, arg, sizeof(op))) { > + rc = -EFAULT; > + goto dealloc_grant_out; > + } > + > + spin_lock(&gref_lock); > + gref = find_grefs(priv, op.index, op.count); > + if (gref) { > + for (i = 0; i < op.count; i++) { > + n = list_entry(gref->next_file.next, > + struct gntalloc_gref, next_file);Might want to add a comment that you are deleting only from ->next_file b/c the do_cleanpup will take of deleting from the other list (that is if the users is at zero).> + list_del(&gref->next_file); > + gref->users--; > + gref = n; > + } > + } else { > + rc = -EINVAL; > + } > + > + do_cleanup(); > + > + spin_unlock(&gref_lock); > +dealloc_grant_out: > + return rc; > +} > + > +static long gntalloc_ioctl(struct file *filp, unsigned int cmd, > + unsigned long arg) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + > + switch (cmd) { > + case IOCTL_GNTALLOC_ALLOC_GREF: > + return gntalloc_ioctl_alloc(priv, (void __user *)arg); > + > + case IOCTL_GNTALLOC_DEALLOC_GREF: > + return gntalloc_ioctl_dealloc(priv, (void __user *)arg); > + > + default: > + return -ENOIOCTLCMD; > + } > + > + return 0; > +} > + > +static void gntalloc_vma_close(struct vm_area_struct *vma) > +{ > + struct gntalloc_gref *gref = vma->vm_private_data; > + if (!gref) > + return; > + > + spin_lock(&gref_lock); > + gref->users--; > + if (gref->users == 0) > + __del_gref(gref);I just want to be convienced here that I am wrong. If the ''ioctl_deallo'' has not been done, what will if unmap this VMA? Will it be OK to yank the gref from gref_list while (and kfree it) while it is still referenced in the filp->private_data? Or would end up trying to derefence the *priv and go BOOM?> + spin_unlock(&gref_lock); > +} > + > +static struct vm_operations_struct gntalloc_vmops = { > + .close = gntalloc_vma_close, > +}; > + > +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + struct gntalloc_gref *gref; > + int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > + int rv, i; > + > + pr_debug("%s: priv %p, page %lu+%d\n", __func__, > + priv, vma->vm_pgoff, count); > + > + if (!(vma->vm_flags & VM_SHARED)) { > + printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); > + return -EINVAL; > + } > + > + spin_lock(&gref_lock); > + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); > + if (gref == NULL) { > + rv = -ENOENT; > + pr_debug("%s: Could not find grant reference", > + __func__); > + goto out_unlock; > + } > + > + vma->vm_private_data = gref; > + > + vma->vm_flags |= VM_RESERVED; > + vma->vm_flags |= VM_DONTCOPY; > + vma->vm_flags |= VM_IO;Hmm, I don''t think you need the VM_IO.> + vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; > + > + vma->vm_ops = &gntalloc_vmops; > + > + for (i = 0; i < count; i++) { > + gref->users++; > + rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, > + gref->page); > + if (rv) > + goto out_unlock; > + > + gref = list_entry(gref->next_file.next, > + struct gntalloc_gref, next_file); > + } > + rv = 0; > + > +out_unlock: > + spin_unlock(&gref_lock); > + return rv; > +} > + > +static const struct file_operations gntalloc_fops = { > + .owner = THIS_MODULE, > + .open = gntalloc_open, > + .release = gntalloc_release, > + .unlocked_ioctl = gntalloc_ioctl, > + .mmap = gntalloc_mmap > +}; > + > +/* > + * ------------------------------------- > + * Module creation/destruction. > + * ------------------------------------- > + */ > +static struct miscdevice gntalloc_miscdev = { > + .minor = MISC_DYNAMIC_MINOR, > + .name = "xen/gntalloc", > + .fops = &gntalloc_fops, > +}; > + > +static int __init gntalloc_init(void) > +{ > + int err; > + > + if (!xen_domain()) > + return -ENODEV; > + > + err = misc_register(&gntalloc_miscdev); > + if (err != 0) { > + printk(KERN_ERR "Could not register misc gntalloc device\n"); > + return err; > + } > + > + pr_debug("Created grant allocation device at %d,%d\n", > + MISC_MAJOR, gntalloc_miscdev.minor); > + > + return 0; > +} > + > +static void __exit gntalloc_exit(void) > +{ > + misc_deregister(&gntalloc_miscdev); > +} > + > +module_init(gntalloc_init); > +module_exit(gntalloc_exit); > + > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " > + "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); > +MODULE_DESCRIPTION("User-space grant reference allocator driver"); > diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h > new file mode 100644 > index 0000000..e1d6d0f > --- /dev/null > +++ b/include/xen/gntalloc.h > @@ -0,0 +1,70 @@ > +/****************************************************************************** > + * gntalloc.h > + *Copyright?> + * Interface to /dev/xen/gntalloc. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version 2 > + * as published by the Free Software Foundation; or, when distributed > + * separately from the Linux kernel or incorporated into other > + * software packages, subject to the following license: > + * > + * Permission is hereby granted, free of charge, to any person obtaining a copy > + * of this source file (the "Software"), to deal in the Software without > + * restriction, including without limitation the rights to use, copy, modify, > + * merge, publish, distribute, sublicense, and/or sell copies of the Software, > + * and to permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#ifndef __LINUX_PUBLIC_GNTALLOC_H__ > +#define __LINUX_PUBLIC_GNTALLOC_H__ > + > +/* > + * Allocates a new page and creates a new grant reference. > + */ > +#define IOCTL_GNTALLOC_ALLOC_GREF \ > +_IOC(_IOC_NONE, ''G'', 5, sizeof(struct ioctl_gntalloc_alloc_gref)) > +struct ioctl_gntalloc_alloc_gref { > + /* IN parameters */ > + /* The ID of the domain to be given access to the grants. */ > + uint16_t domid; > + /* Flags for this mapping */ > + uint16_t flags; > + /* Number of pages to map */ > + uint32_t count; > + /* OUT parameters */ > + /* The offset to be used on a subsequent call to mmap(). */ > + uint64_t index; > + /* The grant references of the newly created grant, one per page */ > + /* Variable size, depending on count */ > + uint32_t gref_ids[1]; > +}; > + > +#define GNTALLOC_FLAG_WRITABLE 1 > + > +/* > + * Deallocates the grant reference, allowing the associated page to be freed if > + * no other domains are using it. > + */ > +#define IOCTL_GNTALLOC_DEALLOC_GREF \ > +_IOC(_IOC_NONE, ''G'', 6, sizeof(struct ioctl_gntalloc_dealloc_gref)) > +struct ioctl_gntalloc_dealloc_gref { > + /* IN parameters */ > + /* The offset returned in the map operation */ > + uint64_t index; > + /* Number of references to unmap */ > + uint32_t count; > +}; > +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ > -- > 1.7.3.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Jan-27 18:52 UTC
[Xen-devel] Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
On Fri, Jan 21, 2011 at 10:59:06AM -0500, Daniel De Graaf wrote:> HVM does not allow direct PTE modification, so instead we request > that Xen change its internal p2m mappings on the allocated pages and > map the memory into userspace normally. > > Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> > --- > drivers/xen/gntdev.c | 115 +++++++++++++++++++++++++++++++------------- > drivers/xen/grant-table.c | 6 ++ > 2 files changed, 87 insertions(+), 34 deletions(-) > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index 256162b..a5710e8 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -32,6 +32,7 @@ > #include <linux/sched.h> > #include <linux/spinlock.h> > #include <linux/slab.h> > +#include <linux/highmem.h> > > #include <xen/xen.h> > #include <xen/grant_table.h> > @@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " > > static atomic_t pages_mapped = ATOMIC_INIT(0); > > +static int use_ptemod; > + > struct gntdev_priv { > struct list_head maps; > /* lock protects maps from concurrent changes */ > @@ -74,6 +77,8 @@ struct grant_map { > struct page **pages; > }; > > +static int unmap_grant_pages(struct grant_map *map, int offset, int pages); > + > /* ------------------------------------------------------------------ */ > > static void gntdev_print_maps(struct gntdev_priv *priv, > @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map) > > atomic_sub(map->count, &pages_mapped); > > - if (map->pages) > + if (map->pages) { > + if (!use_ptemod) > + unmap_grant_pages(map, 0, map->count); > + > for (i = 0; i < map->count; i++) { > - if (map->pages[i]) > + uint32_t check, *tmp; > + if (!map->pages[i]) > + continue; > + /* XXX When unmapping in an HVM domain, Xen will > + * sometimes end up mapping the GFN to an invalid MFN. > + * In this case, writes will be discarded and reads will > + * return all 0xFF bytes. Leak these unusable GFNsI forgot to ask, under what version of Xen did you run this? I want to add that to the comment so when it gets fixed we know what the failing version is.> + * until Xen supports fixing their p2m mapping. > + */ > + tmp = kmap(map->pages[i]); > + *tmp = 0xdeaddead; > + mb(); > + check = *tmp; > + kunmap(map->pages[i]); > + if (check == 0xdeaddead) > __free_page(map->pages[i]); > + else > + pr_debug("Discard page %d=%ld\n", i, > + page_to_pfn(map->pages[i])); > } > + } > kfree(map->pages); > kfree(map->grants); > kfree(map->map_ops); > @@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, > { > struct grant_map *map = data; > unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; > + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; > u64 pte_maddr; > > BUG_ON(pgnr >= map->count); > pte_maddr = arbitrary_virt_to_machine(pte).maddr; > > - gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, > - GNTMAP_contains_pte | map->flags, > + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, > map->grants[pgnr].ref, > map->grants[pgnr].domid); > - gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, > - GNTMAP_contains_pte | map->flags, > + gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, > 0 /* handle */); > return 0; > } > @@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, > static int map_grant_pages(struct grant_map *map) > { > int i, err = 0; > + phys_addr_t addr; > + > + if (!use_ptemod) { > + for (i = 0; i < map->count; i++) { > + addr = (phys_addr_t) > + pfn_to_kaddr(page_to_pfn(map->pages[i])); > + gnttab_set_map_op(&map->map_ops[i], addr, map->flags, > + map->grants[i].ref, > + map->grants[i].domid); > + gnttab_set_unmap_op(&map->unmap_ops[i], addr, > + map->flags, 0 /* handle */); > + } > + } > > pr_debug("map %d+%d\n", map->index, map->count); > err = gnttab_map_refs(map->map_ops, map->pages, map->count); > @@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma) > gntdev_put_map(map); > } > > -static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > -{ > - pr_debug("vaddr %p, pgoff %ld (shouldn''t happen)\n", > - vmf->virtual_address, vmf->pgoff); > - vmf->flags = VM_FAULT_ERROR; > - return 0; > -} > - > static struct vm_operations_struct gntdev_vmops = { > .close = gntdev_vma_close, > - .fault = gntdev_vma_fault, > }; > > /* ------------------------------------------------------------------ */ > @@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip) > INIT_LIST_HEAD(&priv->maps); > spin_lock_init(&priv->lock); > > - priv->mm = get_task_mm(current); > - if (!priv->mm) { > - kfree(priv); > - return -ENOMEM; > + if (use_ptemod) { > + priv->mm = get_task_mm(current); > + if (!priv->mm) { > + kfree(priv); > + return -ENOMEM; > + } > + priv->mn.ops = &gntdev_mmu_ops; > + ret = mmu_notifier_register(&priv->mn, priv->mm); > + mmput(priv->mm); > } > - priv->mn.ops = &gntdev_mmu_ops; > - ret = mmu_notifier_register(&priv->mn, priv->mm); > - mmput(priv->mm); > > if (ret) { > kfree(priv); > @@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip) > } > spin_unlock(&priv->lock); > > - mmu_notifier_unregister(&priv->mn, priv->mm); > + if (use_ptemod) > + mmu_notifier_unregister(&priv->mn, priv->mm); > kfree(priv); > return 0; > } > @@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) > int index = vma->vm_pgoff; > int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > struct grant_map *map; > - int err = -EINVAL; > + int i, err = -EINVAL; > > if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) > return -EINVAL; > @@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) > map = gntdev_find_map_index(priv, index, count); > if (!map) > goto unlock_out; > - if (map->vma) > + if (use_ptemod && map->vma) > goto unlock_out; > - if (priv->mm != vma->vm_mm) { > + if (use_ptemod && priv->mm != vma->vm_mm) { > printk(KERN_WARNING "Huh? Other mm?\n"); > goto unlock_out; > } > @@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) > vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; > > vma->vm_private_data = map; > - map->vma = vma; > > - map->flags = GNTMAP_host_map | GNTMAP_application_map; > + if (use_ptemod) > + map->vma = vma; > + > + map->flags = GNTMAP_host_map; > if (!(vma->vm_flags & VM_WRITE)) > map->flags |= GNTMAP_readonly; > > spin_unlock(&priv->lock); > > - err = apply_to_page_range(vma->vm_mm, vma->vm_start, > - vma->vm_end - vma->vm_start, > - find_grant_ptes, map); > - if (err) { > - printk(KERN_WARNING "find_grant_ptes() failure.\n"); > - return err; > + if (use_ptemod) { > + err = apply_to_page_range(vma->vm_mm, vma->vm_start, > + vma->vm_end - vma->vm_start, > + find_grant_ptes, map); > + if (err) { > + printk(KERN_WARNING "find_grant_ptes() failure.\n"); > + return err; > + } > } > > err = map_grant_pages(map); > @@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) > > map->is_mapped = 1; > > + if (!use_ptemod) { > + for (i = 0; i < count; i++) { > + err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, > + map->pages[i]); > + if (err) > + return err; > + } > + } > + > return 0; > > unlock_out: > @@ -595,6 +640,8 @@ static int __init gntdev_init(void) > if (!xen_domain()) > return -ENODEV; > > + use_ptemod = xen_pv_domain(); > + > err = misc_register(&gntdev_miscdev); > if (err != 0) { > printk(KERN_ERR "Could not register gntdev device\n"); > diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c > index 9ef54eb..9428ced 100644 > --- a/drivers/xen/grant-table.c > +++ b/drivers/xen/grant-table.c > @@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, > if (ret) > return ret; > > + if (xen_feature(XENFEAT_auto_translated_physmap)) > + return ret; > + > for (i = 0; i < count; i++) { > /* m2p override only supported for GNTMAP_contains_pte mappings */ > if (!(map_ops[i].flags & GNTMAP_contains_pte)) > @@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, > if (ret) > return ret; > > + if (xen_feature(XENFEAT_auto_translated_physmap)) > + return ret; > + > for (i = 0; i < count; i++) { > ret = m2p_remove_override(pages[i]); > if (ret) > -- > 1.7.3.4_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Jan-27 19:20 UTC
[Xen-devel] Re: [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
On Fri, Jan 21, 2011 at 10:59:08AM -0500, Daniel De Graaf wrote:> This ioctl allows the users of a shared page to be notified when > the other end exits abnormally. > > Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> > --- > drivers/xen/gntalloc.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ > drivers/xen/gntdev.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++- > include/xen/gntalloc.h | 28 ++++++++++++++++++++++++ > include/xen/gntdev.h | 27 +++++++++++++++++++++++ > 4 files changed, 165 insertions(+), 1 deletions(-) > > diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c > index a230dc4..8ac0dfc 100644 > --- a/drivers/xen/gntalloc.c > +++ b/drivers/xen/gntalloc.c > @@ -60,11 +60,13 @@ > #include <linux/uaccess.h> > #include <linux/types.h> > #include <linux/list.h> > +#include <linux/highmem.h> > > #include <xen/xen.h> > #include <xen/page.h> > #include <xen/grant_table.h> > #include <xen/gntalloc.h> > +#include <xen/events.h> > > static int limit = 1024; > module_param(limit, int, 0644); > @@ -83,6 +85,9 @@ struct gntalloc_gref { > uint64_t file_index; /* File offset for mmap() */ > unsigned int users; /* Use count - when zero, waiting on Xen */ > grant_ref_t gref_id; /* The grant reference number */ > + unsigned notify_flags:2; /* Unmap notification flags */Can you add to the comment: bits 0-1> + unsigned notify_pgoff:12; /* Offset of the byte to clear */^^ in the page And "bits 2-14.".> + int notify_event; /* Port (event channel) to notify */ > }; > > struct gntalloc_file_private_data { > @@ -169,6 +174,16 @@ undo: > > static void __del_gref(struct gntalloc_gref *gref) > { > + if (gref->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) { > + uint8_t *tmp = kmap(gref->page); > + tmp[gref->notify_pgoff] = 0; > + kunmap(gref->page); > + } > + if (gref->notify_flags & UNMAP_NOTIFY_SEND_EVENT) > + notify_remote_via_evtchn(gref->notify_event); > + > + gref->notify_flags = 0; > + > if (gnttab_query_foreign_access(gref->gref_id)) > return; > > @@ -339,6 +354,43 @@ dealloc_grant_out: > return rc; > } > > +static long gntalloc_ioctl_notify(struct gntalloc_file_private_data *priv, > + void __user *arg)Call it ''..ioctl_unmap_notify''. When I read it first time I thought this would do just notify.. while in actuality it can do both.> +{ > + struct ioctl_gntalloc_unmap_notify op; > + struct gntalloc_gref *gref; > + uint64_t index; > + int pgoff; > + int rc; > + > + if (copy_from_user(&op, arg, sizeof(op))) > + return -EFAULT; > + > + index = op.index & ~(PAGE_SIZE - 1); > + pgoff = op.index & (PAGE_SIZE - 1);You don''t want to tell the user if the messed up? Say 0xdeadf00d?> + > + spin_lock(&gref_lock); > + > + gref = find_grefs(priv, index, 1); > + if (!gref) { > + rc = -ENOENT; > + goto unlock_out; > + } > + > + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) { > + rc = -EINVAL; > + goto unlock_out; > + } > + > + gref->notify_flags = op.action; > + gref->notify_pgoff = pgoff; > + gref->notify_event = op.event_channel_port; > + rc = 0; > + unlock_out: > + spin_unlock(&gref_lock); > + return rc; > +} > + > static long gntalloc_ioctl(struct file *filp, unsigned int cmd, > unsigned long arg) > { > @@ -351,6 +403,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, > case IOCTL_GNTALLOC_DEALLOC_GREF: > return gntalloc_ioctl_dealloc(priv, (void __user *)arg); > > + case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY: > + return gntalloc_ioctl_notify(priv, (void __user *)arg); > + > default: > return -ENOIOCTLCMD; > } > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index a5710e8..e360d0a 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -37,6 +37,7 @@ > #include <xen/xen.h> > #include <xen/grant_table.h> > #include <xen/gntdev.h> > +#include <xen/events.h> > #include <asm/xen/hypervisor.h> > #include <asm/xen/hypercall.h> > #include <asm/xen/page.h> > @@ -70,6 +71,9 @@ struct grant_map { > int count; > int flags; > int is_mapped; > + int notify_flags; > + int notify_offset; > + int notify_event;Would it make sense to put these inside a struct?> atomic_t users; > struct ioctl_gntdev_grant_ref *grants; > struct gnttab_map_grant_ref *map_ops; > @@ -165,7 +169,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, > list_for_each_entry(map, &priv->maps, next) { > if (map->index != index) > continue; > - if (map->count != count) > + if (count && map->count != count) > continue; > return map; > } > @@ -184,6 +188,10 @@ static void gntdev_put_map(struct grant_map *map) > > atomic_sub(map->count, &pages_mapped); > > + if (map->notify_flags & UNMAP_NOTIFY_SEND_EVENT) { > + notify_remote_via_evtchn(map->notify_event); > + } > + > if (map->pages) { > if (!use_ptemod) > unmap_grant_pages(map, 0, map->count); > @@ -272,6 +280,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) > { > int i, err = 0; > > + if (map->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) { > + int pgno = (map->notify_offset >> PAGE_SHIFT); > + if (pgno >= offset && pgno < pages + offset) {Whoa, that looks to violate what ''notify_offset'' actually means. Perhaps a better name for that variable? notify_addr?> + uint8_t *tmp = kmap(map->pages[pgno]); > + tmp[map->notify_offset & (PAGE_SIZE-1)] = 0; > + kunmap(map->pages[pgno]); > + map->notify_flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; > + } > + } > + > pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); > err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); > if (err) > @@ -517,6 +535,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, > return 0; > } > > +static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)> +{ > + struct ioctl_gntdev_unmap_notify op; > + struct grant_map *map; > + int rc; > + > + if (copy_from_user(&op, u, sizeof(op))) > + return -EFAULT; > + > + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) > + return -EINVAL; > + > + spin_lock(&priv->lock); > + > + list_for_each_entry(map, &priv->maps, next) { > + uint64_t begin = map->index << PAGE_SHIFT; > + uint64_t end = (map->index + map->count) << PAGE_SHIFT; > + if (op.index >= begin && op.index < end) > + goto found; > + } > + rc = -ENOENT; > + goto unlock_out; > + > + found: > + map->notify_flags = op.action; > + map->notify_offset = op.index - (map->index << PAGE_SHIFT);Minus? So say the index is 0xdeadbeef, wouldn''t this throw this off? Should you also check the index to make sure it is within a PAGE_SIZE offset? (the ioctl accepts 64-bit values for the index).> + map->notify_event = op.event_channel_port; > + rc = 0; > + unlock_out: > + spin_unlock(&priv->lock); > + return rc; > +} > + > static long gntdev_ioctl(struct file *flip, > unsigned int cmd, unsigned long arg) > { > @@ -533,6 +584,9 @@ static long gntdev_ioctl(struct file *flip, > case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: > return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); > > + case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: > + return gntdev_ioctl_notify(priv, ptr); > + > default: > pr_debug("priv %p, unknown cmd %x\n", priv, cmd); > return -ENOIOCTLCMD; > diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h > index e1d6d0f..92339f5 100644 > --- a/include/xen/gntalloc.h > +++ b/include/xen/gntalloc.h > @@ -67,4 +67,32 @@ struct ioctl_gntalloc_dealloc_gref { > /* Number of references to unmap */ > uint32_t count; > }; > + > +/* > + * Sets up an unmap notification within the page, so that the other side can do > + * cleanup if this side crashes. Required to implement cross-domain robust > + * mutexes or close notification on communication channels. > + * > + * Each mapped page only supports one notification; multiple calls referring to > + * the same page overwrite the previous notification. You must clear the > + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it > + * to occur. > + */ > +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \ > +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntalloc_unmap_notify)) > +struct ioctl_gntalloc_unmap_notify { > + /* IN parameters */ > + /* Index of a byte in the page */ > + uint64_t index; > + /* Action(s) to take on unmap */ > + uint32_t action; > + /* Event channel to notify */ > + uint32_t event_channel_port; > +}; > + > +/* Clear (set to zero) the byte specified by index */ > +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 > +/* Send an interrupt on the indicated event channel */ > +#define UNMAP_NOTIFY_SEND_EVENT 0x2 > + > #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ > diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h > index eb23f41..5d9b9b4 100644 > --- a/include/xen/gntdev.h > +++ b/include/xen/gntdev.h > @@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants { > uint32_t count; > }; > > +/* > + * Sets up an unmap notification within the page, so that the other side can do > + * cleanup if this side crashes. Required to implement cross-domain robust > + * mutexes or close notification on communication channels. > + * > + * Each mapped page only supports one notification; multiple calls referring to > + * the same page overwrite the previous notification. You must clear the > + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it > + * to occur. > + */ > +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ > +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntdev_unmap_notify)) > +struct ioctl_gntdev_unmap_notify { > + /* IN parameters */ > + /* Index of a byte in the page */ > + uint64_t index; > + /* Action(s) to take on unmap */ > + uint32_t action; > + /* Event channel to notify */ > + uint32_t event_channel_port; > +}; > + > +/* Clear (set to zero) the byte specified by index */ > +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 > +/* Send an interrupt on the indicated event channel */ > +#define UNMAP_NOTIFY_SEND_EVENT 0x2 > + > #endif /* __LINUX_PUBLIC_GNTDEV_H__ */ > -- > 1.7.3.4_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Jan-27 19:23 UTC
Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
On Thu, Jan 27, 2011 at 01:52:10PM -0500, Konrad Rzeszutek Wilk wrote:> On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote: > > This allows a userspace application to allocate a shared page for > > implementing inter-domain communication or device drivers. These > > shared pages can be mapped using the gntdev device or by the kernel > > in another domain. > > > > Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> > > --- > > drivers/xen/Kconfig | 8 + > > drivers/xen/Makefile | 2 + > > drivers/xen/gntalloc.c | 477 ++++++++++++++++++++++++++++++++++++++++++++++++ > > include/xen/gntalloc.h | 70 +++++++ > > 4 files changed, 557 insertions(+), 0 deletions(-) > > create mode 100644 drivers/xen/gntalloc.c > > create mode 100644 include/xen/gntalloc.h > > > > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig > > index 07bec09..a3d7afb 100644 > > --- a/drivers/xen/Kconfig > > +++ b/drivers/xen/Kconfig > > @@ -80,6 +80,14 @@ config XEN_GNTDEV > > help > > Allows userspace processes to use grants. > > > > +config XEN_GRANT_DEV_ALLOC > > + tristate "User-space grant reference allocator driver" > > + depends on XEN > > + help > > + Allows userspace processes to create pages with access granted > > + to other domains. This can be used to implement frontend drivers > > + or as part of an inter-domain shared memory channel.> > + > > config XEN_PLATFORM_PCI > > tristate "xen platform pci device driver" > > depends on XEN_PVHVM && PCI > > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile > > index 5088cc2..9585a1d 100644 > > --- a/drivers/xen/Makefile > > +++ b/drivers/xen/Makefile > > @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o > > obj-$(CONFIG_XEN_BALLOON) += balloon.o > > obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o > > obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o > > +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o > > obj-$(CONFIG_XENFS) += xenfs/ > > obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o > > obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o > > @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o > > > > xen-evtchn-y := evtchn.o > > xen-gntdev-y := gntdev.o > > +xen-gntalloc-y := gntalloc.o > > > > xen-platform-pci-y := platform-pci.o > > diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c > > new file mode 100644 > > index 0000000..a230dc4 > > --- /dev/null > > +++ b/drivers/xen/gntalloc.c > > @@ -0,0 +1,477 @@ > > +/****************************************************************************** > > + * gntalloc.c > > + * > > + * Device for creating grant references (in user-space) that may be shared > > + * with other domains. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program; if not, write to the Free Software > > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > > + */ > > + > > +/* > > + * This driver exists to allow userspace programs in Linux to allocate kernel > > + * memory that will later be shared with another domain. Without this device, > > + * Linux userspace programs cannot create grant references.Why can''t this be done in gntdev? Is there a simple test program for this? _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-27 19:26 UTC
[Xen-devel] Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
On 01/27/2011 01:52 PM, Konrad Rzeszutek Wilk wrote:> On Fri, Jan 21, 2011 at 10:59:06AM -0500, Daniel De Graaf wrote: >> HVM does not allow direct PTE modification, so instead we request >> that Xen change its internal p2m mappings on the allocated pages and >> map the memory into userspace normally. >> >> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> >> --- >> drivers/xen/gntdev.c | 115 +++++++++++++++++++++++++++++++------------- >> drivers/xen/grant-table.c | 6 ++ >> 2 files changed, 87 insertions(+), 34 deletions(-) >> >> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c >> index 256162b..a5710e8 100644 >> --- a/drivers/xen/gntdev.c >> +++ b/drivers/xen/gntdev.c >> @@ -32,6 +32,7 @@ >> #include <linux/sched.h> >> #include <linux/spinlock.h> >> #include <linux/slab.h> >> +#include <linux/highmem.h> >> >> #include <xen/xen.h> >> #include <xen/grant_table.h> >> @@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " >> >> static atomic_t pages_mapped = ATOMIC_INIT(0); >> >> +static int use_ptemod; >> + >> struct gntdev_priv { >> struct list_head maps; >> /* lock protects maps from concurrent changes */ >> @@ -74,6 +77,8 @@ struct grant_map { >> struct page **pages; >> }; >> >> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages); >> + >> /* ------------------------------------------------------------------ */ >> >> static void gntdev_print_maps(struct gntdev_priv *priv, >> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map) >> >> atomic_sub(map->count, &pages_mapped); >> >> - if (map->pages) >> + if (map->pages) { >> + if (!use_ptemod) >> + unmap_grant_pages(map, 0, map->count); >> + >> for (i = 0; i < map->count; i++) { >> - if (map->pages[i]) >> + uint32_t check, *tmp; >> + if (!map->pages[i]) >> + continue; >> + /* XXX When unmapping in an HVM domain, Xen will >> + * sometimes end up mapping the GFN to an invalid MFN. >> + * In this case, writes will be discarded and reads will >> + * return all 0xFF bytes. Leak these unusable GFNs > > I forgot to ask, under what version of Xen did you run this? I want to add > that to the comment so when it gets fixed we know what the failing version is.4.1-unstable. In particular, 22641:4e108cf56d07 exhibits the bug, and I have not found a version of 4.1 that doesn''t (although I haven''t searched for that in particular). I could try to find other versions also exhibit this behavior, if it would be useful to have a list. Note for reproducing: at r22641, you need to revert 22402:7d2fdc083c9c or the grant table of the HVM guest will not be readable by Xen. My test that exercises the bug is an HVM-to-HVM grant.>> + * until Xen supports fixing their p2m mapping. >> + */ >> + tmp = kmap(map->pages[i]); >> + *tmp = 0xdeaddead; >> + mb(); >> + check = *tmp; >> + kunmap(map->pages[i]); >> + if (check == 0xdeaddead) >> __free_page(map->pages[i]); >> + else >> + pr_debug("Discard page %d=%ld\n", i, >> + page_to_pfn(map->pages[i])); >> } >> + } >> kfree(map->pages); >> kfree(map->grants); >> kfree(map->map_ops); >> @@ -198,17 +224,16 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, >> { >> struct grant_map *map = data; >> unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; >> + int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte; >> u64 pte_maddr; >> >> BUG_ON(pgnr >= map->count); >> pte_maddr = arbitrary_virt_to_machine(pte).maddr; >> >> - gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, >> - GNTMAP_contains_pte | map->flags, >> + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, >> map->grants[pgnr].ref, >> map->grants[pgnr].domid); >> - gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, >> - GNTMAP_contains_pte | map->flags, >> + gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags, >> 0 /* handle */); >> return 0; >> } >> @@ -216,6 +241,19 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, >> static int map_grant_pages(struct grant_map *map) >> { >> int i, err = 0; >> + phys_addr_t addr; >> + >> + if (!use_ptemod) { >> + for (i = 0; i < map->count; i++) { >> + addr = (phys_addr_t) >> + pfn_to_kaddr(page_to_pfn(map->pages[i])); >> + gnttab_set_map_op(&map->map_ops[i], addr, map->flags, >> + map->grants[i].ref, >> + map->grants[i].domid); >> + gnttab_set_unmap_op(&map->unmap_ops[i], addr, >> + map->flags, 0 /* handle */); >> + } >> + } >> >> pr_debug("map %d+%d\n", map->index, map->count); >> err = gnttab_map_refs(map->map_ops, map->pages, map->count); >> @@ -260,17 +298,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma) >> gntdev_put_map(map); >> } >> >> -static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) >> -{ >> - pr_debug("vaddr %p, pgoff %ld (shouldn''t happen)\n", >> - vmf->virtual_address, vmf->pgoff); >> - vmf->flags = VM_FAULT_ERROR; >> - return 0; >> -} >> - >> static struct vm_operations_struct gntdev_vmops = { >> .close = gntdev_vma_close, >> - .fault = gntdev_vma_fault, >> }; >> >> /* ------------------------------------------------------------------ */ >> @@ -355,14 +384,16 @@ static int gntdev_open(struct inode *inode, struct file *flip) >> INIT_LIST_HEAD(&priv->maps); >> spin_lock_init(&priv->lock); >> >> - priv->mm = get_task_mm(current); >> - if (!priv->mm) { >> - kfree(priv); >> - return -ENOMEM; >> + if (use_ptemod) { >> + priv->mm = get_task_mm(current); >> + if (!priv->mm) { >> + kfree(priv); >> + return -ENOMEM; >> + } >> + priv->mn.ops = &gntdev_mmu_ops; >> + ret = mmu_notifier_register(&priv->mn, priv->mm); >> + mmput(priv->mm); >> } >> - priv->mn.ops = &gntdev_mmu_ops; >> - ret = mmu_notifier_register(&priv->mn, priv->mm); >> - mmput(priv->mm); >> >> if (ret) { >> kfree(priv); >> @@ -390,7 +421,8 @@ static int gntdev_release(struct inode *inode, struct file *flip) >> } >> spin_unlock(&priv->lock); >> >> - mmu_notifier_unregister(&priv->mn, priv->mm); >> + if (use_ptemod) >> + mmu_notifier_unregister(&priv->mn, priv->mm); >> kfree(priv); >> return 0; >> } >> @@ -515,7 +547,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) >> int index = vma->vm_pgoff; >> int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; >> struct grant_map *map; >> - int err = -EINVAL; >> + int i, err = -EINVAL; >> >> if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) >> return -EINVAL; >> @@ -527,9 +559,9 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) >> map = gntdev_find_map_index(priv, index, count); >> if (!map) >> goto unlock_out; >> - if (map->vma) >> + if (use_ptemod && map->vma) >> goto unlock_out; >> - if (priv->mm != vma->vm_mm) { >> + if (use_ptemod && priv->mm != vma->vm_mm) { >> printk(KERN_WARNING "Huh? Other mm?\n"); >> goto unlock_out; >> } >> @@ -541,20 +573,24 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) >> vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP; >> >> vma->vm_private_data = map; >> - map->vma = vma; >> >> - map->flags = GNTMAP_host_map | GNTMAP_application_map; >> + if (use_ptemod) >> + map->vma = vma; >> + >> + map->flags = GNTMAP_host_map; >> if (!(vma->vm_flags & VM_WRITE)) >> map->flags |= GNTMAP_readonly; >> >> spin_unlock(&priv->lock); >> >> - err = apply_to_page_range(vma->vm_mm, vma->vm_start, >> - vma->vm_end - vma->vm_start, >> - find_grant_ptes, map); >> - if (err) { >> - printk(KERN_WARNING "find_grant_ptes() failure.\n"); >> - return err; >> + if (use_ptemod) { >> + err = apply_to_page_range(vma->vm_mm, vma->vm_start, >> + vma->vm_end - vma->vm_start, >> + find_grant_ptes, map); >> + if (err) { >> + printk(KERN_WARNING "find_grant_ptes() failure.\n"); >> + return err; >> + } >> } >> >> err = map_grant_pages(map); >> @@ -565,6 +601,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) >> >> map->is_mapped = 1; >> >> + if (!use_ptemod) { >> + for (i = 0; i < count; i++) { >> + err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, >> + map->pages[i]); >> + if (err) >> + return err; >> + } >> + } >> + >> return 0; >> >> unlock_out: >> @@ -595,6 +640,8 @@ static int __init gntdev_init(void) >> if (!xen_domain()) >> return -ENODEV; >> >> + use_ptemod = xen_pv_domain(); >> + >> err = misc_register(&gntdev_miscdev); >> if (err != 0) { >> printk(KERN_ERR "Could not register gntdev device\n"); >> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c >> index 9ef54eb..9428ced 100644 >> --- a/drivers/xen/grant-table.c >> +++ b/drivers/xen/grant-table.c >> @@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, >> if (ret) >> return ret; >> >> + if (xen_feature(XENFEAT_auto_translated_physmap)) >> + return ret; >> + >> for (i = 0; i < count; i++) { >> /* m2p override only supported for GNTMAP_contains_pte mappings */ >> if (!(map_ops[i].flags & GNTMAP_contains_pte)) >> @@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, >> if (ret) >> return ret; >> >> + if (xen_feature(XENFEAT_auto_translated_physmap)) >> + return ret; >> + >> for (i = 0; i < count; i++) { >> ret = m2p_remove_override(pages[i]); >> if (ret) >> -- >> 1.7.3.4_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-27 19:51 UTC
Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
On 01/27/2011 02:23 PM, Konrad Rzeszutek Wilk wrote:> On Thu, Jan 27, 2011 at 01:52:10PM -0500, Konrad Rzeszutek Wilk wrote: >> On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote: >>> This allows a userspace application to allocate a shared page for >>> implementing inter-domain communication or device drivers. These >>> shared pages can be mapped using the gntdev device or by the kernel >>> in another domain. >>> >>> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> >>> --- >>> drivers/xen/Kconfig | 8 + >>> drivers/xen/Makefile | 2 + >>> drivers/xen/gntalloc.c | 477 ++++++++++++++++++++++++++++++++++++++++++++++++ >>> include/xen/gntalloc.h | 70 +++++++ >>> 4 files changed, 557 insertions(+), 0 deletions(-) >>> create mode 100644 drivers/xen/gntalloc.c >>> create mode 100644 include/xen/gntalloc.h >>> >>> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig >>> index 07bec09..a3d7afb 100644 >>> --- a/drivers/xen/Kconfig >>> +++ b/drivers/xen/Kconfig >>> @@ -80,6 +80,14 @@ config XEN_GNTDEV >>> help >>> Allows userspace processes to use grants. >>> >>> +config XEN_GRANT_DEV_ALLOC >>> + tristate "User-space grant reference allocator driver" >>> + depends on XEN >>> + help >>> + Allows userspace processes to create pages with access granted >>> + to other domains. This can be used to implement frontend drivers >>> + or as part of an inter-domain shared memory channel. > > >>> + >>> config XEN_PLATFORM_PCI >>> tristate "xen platform pci device driver" >>> depends on XEN_PVHVM && PCI >>> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile >>> index 5088cc2..9585a1d 100644 >>> --- a/drivers/xen/Makefile >>> +++ b/drivers/xen/Makefile >>> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o >>> obj-$(CONFIG_XEN_BALLOON) += balloon.o >>> obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o >>> obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o >>> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o >>> obj-$(CONFIG_XENFS) += xenfs/ >>> obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o >>> obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o >>> @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o >>> >>> xen-evtchn-y := evtchn.o >>> xen-gntdev-y := gntdev.o >>> +xen-gntalloc-y := gntalloc.o >>> >>> xen-platform-pci-y := platform-pci.o >>> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c >>> new file mode 100644 >>> index 0000000..a230dc4 >>> --- /dev/null >>> +++ b/drivers/xen/gntalloc.c >>> @@ -0,0 +1,477 @@ >>> +/****************************************************************************** >>> + * gntalloc.c >>> + * >>> + * Device for creating grant references (in user-space) that may be shared >>> + * with other domains. >>> + * >>> + * This program is distributed in the hope that it will be useful, >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>> + * GNU General Public License for more details. >>> + * >>> + * You should have received a copy of the GNU General Public License >>> + * along with this program; if not, write to the Free Software >>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA >>> + */ >>> + >>> +/* >>> + * This driver exists to allow userspace programs in Linux to allocate kernel >>> + * memory that will later be shared with another domain. Without this device, >>> + * Linux userspace programs cannot create grant references. > > Why can''t this be done in gntdev? Is there a simple test program for this? >I didn''t do this in gntdev so that it would be possible to allow programs to map grants without also giving them rights to allocate (or vice versa). The actions taken on the pages are also different (gntdev remaps them, gntalloc just shares) so much of the surrounding map/unmap logic is different. If it''s preferred, I could try to add the ioctls to gntdev; there is some overlap in functionality, especially when using the HVM code in gntdev. The limits for gntalloc need to remain different, because a page allocated by gntalloc is impossible to reclaim using the OOM killer (unlike pages allocated by gntdev). Simple test program (for gntdev and gntalloc) attached. -- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-27 20:09 UTC
[Xen-devel] Re: [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
On 01/27/2011 02:20 PM, Konrad Rzeszutek Wilk wrote:> On Fri, Jan 21, 2011 at 10:59:08AM -0500, Daniel De Graaf wrote: >> This ioctl allows the users of a shared page to be notified when >> the other end exits abnormally. >> >> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> >> --- >> drivers/xen/gntalloc.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ >> drivers/xen/gntdev.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++- >> include/xen/gntalloc.h | 28 ++++++++++++++++++++++++ >> include/xen/gntdev.h | 27 +++++++++++++++++++++++ >> 4 files changed, 165 insertions(+), 1 deletions(-) >> >> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c >> index a230dc4..8ac0dfc 100644 >> --- a/drivers/xen/gntalloc.c >> +++ b/drivers/xen/gntalloc.c >> @@ -60,11 +60,13 @@ >> #include <linux/uaccess.h> >> #include <linux/types.h> >> #include <linux/list.h> >> +#include <linux/highmem.h> >> >> #include <xen/xen.h> >> #include <xen/page.h> >> #include <xen/grant_table.h> >> #include <xen/gntalloc.h> >> +#include <xen/events.h> >> >> static int limit = 1024; >> module_param(limit, int, 0644); >> @@ -83,6 +85,9 @@ struct gntalloc_gref { >> uint64_t file_index; /* File offset for mmap() */ >> unsigned int users; /* Use count - when zero, waiting on Xen */ >> grant_ref_t gref_id; /* The grant reference number */ >> + unsigned notify_flags:2; /* Unmap notification flags */ > > Can you add to the comment: bits 0-1 >> + unsigned notify_pgoff:12; /* Offset of the byte to clear */ > ^^ in the page > And "bits 2-14.".OK, will do. I''m just using a bitfield here to save memory, so noting the exact bits didn''t seem necessary.> >> + int notify_event; /* Port (event channel) to notify */ >> }; >> >> struct gntalloc_file_private_data { >> @@ -169,6 +174,16 @@ undo: >> >> static void __del_gref(struct gntalloc_gref *gref) >> { >> + if (gref->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) { >> + uint8_t *tmp = kmap(gref->page); >> + tmp[gref->notify_pgoff] = 0; >> + kunmap(gref->page); >> + } >> + if (gref->notify_flags & UNMAP_NOTIFY_SEND_EVENT) >> + notify_remote_via_evtchn(gref->notify_event); >> + >> + gref->notify_flags = 0; >> + >> if (gnttab_query_foreign_access(gref->gref_id)) >> return; >> >> @@ -339,6 +354,43 @@ dealloc_grant_out: >> return rc; >> } >> >> +static long gntalloc_ioctl_notify(struct gntalloc_file_private_data *priv, >> + void __user *arg) > > Call it ''..ioctl_unmap_notify''. When I read it first time I thought this > would do just notify.. while in actuality it can do both.It''s not doing anything else besides adding/removing the notification, but it does do the notify only on unmap or delete, so that''s a good name.>> +{ >> + struct ioctl_gntalloc_unmap_notify op; >> + struct gntalloc_gref *gref; >> + uint64_t index; >> + int pgoff; >> + int rc; >> + >> + if (copy_from_user(&op, arg, sizeof(op))) >> + return -EFAULT; >> + >> + index = op.index & ~(PAGE_SIZE - 1); >> + pgoff = op.index & (PAGE_SIZE - 1); > > You don''t want to tell the user if the messed up? Say 0xdeadf00d?I''m not sure how the user would mess up without notification: either it will be a valid offset, or find_grefs will fail and return -ENOENT.>> + >> + spin_lock(&gref_lock); >> + >> + gref = find_grefs(priv, index, 1); >> + if (!gref) { >> + rc = -ENOENT; >> + goto unlock_out; >> + } >> + >> + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) { >> + rc = -EINVAL; >> + goto unlock_out; >> + } >> + >> + gref->notify_flags = op.action; >> + gref->notify_pgoff = pgoff; >> + gref->notify_event = op.event_channel_port; >> + rc = 0; >> + unlock_out: >> + spin_unlock(&gref_lock); >> + return rc; >> +} >> + >> static long gntalloc_ioctl(struct file *filp, unsigned int cmd, >> unsigned long arg) >> { >> @@ -351,6 +403,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, >> case IOCTL_GNTALLOC_DEALLOC_GREF: >> return gntalloc_ioctl_dealloc(priv, (void __user *)arg); >> >> + case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY: >> + return gntalloc_ioctl_notify(priv, (void __user *)arg); >> + >> default: >> return -ENOIOCTLCMD; >> } >> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c >> index a5710e8..e360d0a 100644 >> --- a/drivers/xen/gntdev.c >> +++ b/drivers/xen/gntdev.c >> @@ -37,6 +37,7 @@ >> #include <xen/xen.h> >> #include <xen/grant_table.h> >> #include <xen/gntdev.h> >> +#include <xen/events.h> >> #include <asm/xen/hypervisor.h> >> #include <asm/xen/hypercall.h> >> #include <asm/xen/page.h> >> @@ -70,6 +71,9 @@ struct grant_map { >> int count; >> int flags; >> int is_mapped; >> + int notify_flags; >> + int notify_offset; >> + int notify_event; > > Would it make sense to put these inside a struct?Yes, they''re related enough. I''ll do that in the next revision.>> atomic_t users; >> struct ioctl_gntdev_grant_ref *grants; >> struct gnttab_map_grant_ref *map_ops; >> @@ -165,7 +169,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, >> list_for_each_entry(map, &priv->maps, next) { >> if (map->index != index) >> continue; >> - if (map->count != count) >> + if (count && map->count != count) >> continue; >> return map; >> } >> @@ -184,6 +188,10 @@ static void gntdev_put_map(struct grant_map *map) >> >> atomic_sub(map->count, &pages_mapped); >> >> + if (map->notify_flags & UNMAP_NOTIFY_SEND_EVENT) { >> + notify_remote_via_evtchn(map->notify_event); >> + } >> + >> if (map->pages) { >> if (!use_ptemod) >> unmap_grant_pages(map, 0, map->count); >> @@ -272,6 +280,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) >> { >> int i, err = 0; >> >> + if (map->notify_flags & UNMAP_NOTIFY_CLEAR_BYTE) { >> + int pgno = (map->notify_offset >> PAGE_SHIFT); >> + if (pgno >= offset && pgno < pages + offset) { > > Whoa, that looks to violate what ''notify_offset'' actually means. > Perhaps a better name for that variable? notify_addr?It''s not an absolute address within the device, just within the multi-page structure pointed to by map. notify_addr is also a good name, if you think that it makes it clearer.>> + uint8_t *tmp = kmap(map->pages[pgno]); >> + tmp[map->notify_offset & (PAGE_SIZE-1)] = 0; >> + kunmap(map->pages[pgno]); >> + map->notify_flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; >> + } >> + } >> + >> pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); >> err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); >> if (err) >> @@ -517,6 +535,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, >> return 0; >> } >> >> +static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) > >> +{ >> + struct ioctl_gntdev_unmap_notify op; >> + struct grant_map *map; >> + int rc; >> + >> + if (copy_from_user(&op, u, sizeof(op))) >> + return -EFAULT; >> + >> + if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) >> + return -EINVAL; >> + >> + spin_lock(&priv->lock); >> + >> + list_for_each_entry(map, &priv->maps, next) { >> + uint64_t begin = map->index << PAGE_SHIFT; >> + uint64_t end = (map->index + map->count) << PAGE_SHIFT; >> + if (op.index >= begin && op.index < end) >> + goto found; >> + } >> + rc = -ENOENT; >> + goto unlock_out; >> + >> + found: >> + map->notify_flags = op.action; >> + map->notify_offset = op.index - (map->index << PAGE_SHIFT); > > Minus? So say the index is 0xdeadbeef, wouldn''t this throw this off?If op.index = 0xdeadbeef and map->index = 0xdead9, then we want 0x2eef to be the offset.> Should you also check the index to make sure it is within a PAGE_SIZE > offset? (the ioctl accepts 64-bit values for the index).If we get here, the offset is within the multi-page entry pointed to by map. It is valid to have notify_offset > PAGE_SIZE if map->count > 1; in this case, the notify points to a byte on the 2nd or later page.>> + map->notify_event = op.event_channel_port; >> + rc = 0; >> + unlock_out: >> + spin_unlock(&priv->lock); >> + return rc; >> +} >> + >> static long gntdev_ioctl(struct file *flip, >> unsigned int cmd, unsigned long arg) >> { >> @@ -533,6 +584,9 @@ static long gntdev_ioctl(struct file *flip, >> case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: >> return gntdev_ioctl_get_offset_for_vaddr(priv, ptr); >> >> + case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: >> + return gntdev_ioctl_notify(priv, ptr); >> + >> default: >> pr_debug("priv %p, unknown cmd %x\n", priv, cmd); >> return -ENOIOCTLCMD; >> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h >> index e1d6d0f..92339f5 100644 >> --- a/include/xen/gntalloc.h >> +++ b/include/xen/gntalloc.h >> @@ -67,4 +67,32 @@ struct ioctl_gntalloc_dealloc_gref { >> /* Number of references to unmap */ >> uint32_t count; >> }; >> + >> +/* >> + * Sets up an unmap notification within the page, so that the other side can do >> + * cleanup if this side crashes. Required to implement cross-domain robust >> + * mutexes or close notification on communication channels. >> + * >> + * Each mapped page only supports one notification; multiple calls referring to >> + * the same page overwrite the previous notification. You must clear the >> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it >> + * to occur. >> + */ >> +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \ >> +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntalloc_unmap_notify)) >> +struct ioctl_gntalloc_unmap_notify { >> + /* IN parameters */ >> + /* Index of a byte in the page */ >> + uint64_t index; >> + /* Action(s) to take on unmap */ >> + uint32_t action; >> + /* Event channel to notify */ >> + uint32_t event_channel_port; >> +}; >> + >> +/* Clear (set to zero) the byte specified by index */ >> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 >> +/* Send an interrupt on the indicated event channel */ >> +#define UNMAP_NOTIFY_SEND_EVENT 0x2 >> + >> #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ >> diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h >> index eb23f41..5d9b9b4 100644 >> --- a/include/xen/gntdev.h >> +++ b/include/xen/gntdev.h >> @@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants { >> uint32_t count; >> }; >> >> +/* >> + * Sets up an unmap notification within the page, so that the other side can do >> + * cleanup if this side crashes. Required to implement cross-domain robust >> + * mutexes or close notification on communication channels. >> + * >> + * Each mapped page only supports one notification; multiple calls referring to >> + * the same page overwrite the previous notification. You must clear the >> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it >> + * to occur. >> + */ >> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ >> +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntdev_unmap_notify)) >> +struct ioctl_gntdev_unmap_notify { >> + /* IN parameters */ >> + /* Index of a byte in the page */ >> + uint64_t index; >> + /* Action(s) to take on unmap */ >> + uint32_t action; >> + /* Event channel to notify */ >> + uint32_t event_channel_port; >> +}; >> + >> +/* Clear (set to zero) the byte specified by index */ >> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 >> +/* Send an interrupt on the indicated event channel */ >> +#define UNMAP_NOTIFY_SEND_EVENT 0x2 >> + >> #endif /* __LINUX_PUBLIC_GNTDEV_H__ */ >> -- >> 1.7.3.4 >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Jan-27 20:55 UTC
Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
On 01/27/2011 01:52 PM, Konrad Rzeszutek Wilk wrote:> On Fri, Jan 21, 2011 at 10:59:07AM -0500, Daniel De Graaf wrote: >> This allows a userspace application to allocate a shared page for >> implementing inter-domain communication or device drivers. These >> shared pages can be mapped using the gntdev device or by the kernel >> in another domain. >> >> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> >> --- >> drivers/xen/Kconfig | 8 + >> drivers/xen/Makefile | 2 + >> drivers/xen/gntalloc.c | 477 ++++++++++++++++++++++++++++++++++++++++++++++++ >> include/xen/gntalloc.h | 70 +++++++ >> 4 files changed, 557 insertions(+), 0 deletions(-) >> create mode 100644 drivers/xen/gntalloc.c >> create mode 100644 include/xen/gntalloc.h >> >> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig >> index 07bec09..a3d7afb 100644 >> --- a/drivers/xen/Kconfig >> +++ b/drivers/xen/Kconfig >> @@ -80,6 +80,14 @@ config XEN_GNTDEV >> help >> Allows userspace processes to use grants. >> >> +config XEN_GRANT_DEV_ALLOC >> + tristate "User-space grant reference allocator driver" >> + depends on XEN >> + help >> + Allows userspace processes to create pages with access granted >> + to other domains. This can be used to implement frontend drivers >> + or as part of an inter-domain shared memory channel. >> + >> config XEN_PLATFORM_PCI >> tristate "xen platform pci device driver" >> depends on XEN_PVHVM && PCI >> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile >> index 5088cc2..9585a1d 100644 >> --- a/drivers/xen/Makefile >> +++ b/drivers/xen/Makefile >> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o >> obj-$(CONFIG_XEN_BALLOON) += balloon.o >> obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o >> obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o >> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o >> obj-$(CONFIG_XENFS) += xenfs/ >> obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o >> obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o >> @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o >> >> xen-evtchn-y := evtchn.o >> xen-gntdev-y := gntdev.o >> +xen-gntalloc-y := gntalloc.o >> >> xen-platform-pci-y := platform-pci.o >> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c >> new file mode 100644 >> index 0000000..a230dc4 >> --- /dev/null >> +++ b/drivers/xen/gntalloc.c >> @@ -0,0 +1,477 @@ >> +/****************************************************************************** >> + * gntalloc.c >> + * >> + * Device for creating grant references (in user-space) that may be shared >> + * with other domains. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA >> + */ >> + >> +/* >> + * This driver exists to allow userspace programs in Linux to allocate kernel >> + * memory that will later be shared with another domain. Without this device, >> + * Linux userspace programs cannot create grant references. >> + * >> + * How this stuff works: >> + * X -> granting a page to Y >> + * Y -> mapping the grant from X >> + * >> + * 1. X uses the gntalloc device to allocate a page of kernel memory, P. >> + * 2. X creates an entry in the grant table that says domid(Y) can access P. >> + * This is done without a hypercall unless the grant table needs expansion. >> + * 3. X gives the grant reference identifier, GREF, to Y. >> + * 4. Y maps the page, either directly into kernel memory for use in a backend >> + * driver, or via a the gntdev device to map into the address space of an >> + * application running in Y. This is the first point at which Xen does any >> + * tracking of the page. >> + * 5. A program in X mmap()s a segment of the gntalloc device that corresponds >> + * to the shared page, and can now communicate with Y over the shared page. >> + * >> + * >> + * NOTE TO USERSPACE LIBRARIES: >> + * The grant allocation and mmap()ing are, naturally, two separate operations. >> + * You set up the sharing by calling the create ioctl() and then the mmap(). >> + * Teardown requires munmap() and either close() or ioctl(). >> + * >> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant >> + * reference, this device can be used to consume kernel memory by leaving grant >> + * references mapped by another domain when an application exits. Therefore, >> + * there is a global limit on the number of pages that can be allocated. When >> + * all references to the page are unmapped, it will be freed during the next >> + * grant operation. >> + */ >> + >> +#include <linux/atomic.h> >> +#include <linux/module.h> >> +#include <linux/miscdevice.h> >> +#include <linux/kernel.h> >> +#include <linux/init.h> >> +#include <linux/slab.h> >> +#include <linux/fs.h> >> +#include <linux/device.h> >> +#include <linux/mm.h> >> +#include <linux/uaccess.h> >> +#include <linux/types.h> >> +#include <linux/list.h> >> + >> +#include <xen/xen.h> >> +#include <xen/page.h> >> +#include <xen/grant_table.h> >> +#include <xen/gntalloc.h> >> + >> +static int limit = 1024; >> +module_param(limit, int, 0644); >> +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " >> + "the gntalloc device"); >> + >> +static LIST_HEAD(gref_list); >> +static DEFINE_SPINLOCK(gref_lock); >> +static int gref_size; >> + >> +/* Metadata on a grant reference. */ >> +struct gntalloc_gref { >> + struct list_head next_all; /* list entry gref_list */ > > How about ''next_gref'' ?Seems like a good name. Will change.>> + struct list_head next_file; /* list entry file->list, if open */ >> + struct page *page; /* The shared page */ >> + uint64_t file_index; /* File offset for mmap() */ >> + unsigned int users; /* Use count - when zero, waiting on Xen */ >> + grant_ref_t gref_id; /* The grant reference number */ >> +}; >> + >> +struct gntalloc_file_private_data { >> + struct list_head list; >> + uint64_t index; >> +}; >> + >> +static void __del_gref(struct gntalloc_gref *gref); >> + >> +static void do_cleanup(void) >> +{ >> + struct gntalloc_gref *gref, *n; >> + list_for_each_entry_safe(gref, n, &gref_list, next_all) { >> + if (!gref->users) >> + __del_gref(gref); >> + } >> +} >> + >> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, >> + uint32_t *gref_ids, struct gntalloc_file_private_data *priv) >> +{ >> + int i, rc, readonly; >> + LIST_HEAD(queue_all); >> + LIST_HEAD(queue_file); >> + struct gntalloc_gref *gref; >> + >> + readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); >> + rc = -ENOMEM; >> + for (i = 0; i < op->count; i++) { >> + gref = kzalloc(sizeof(*gref), GFP_KERNEL); >> + if (!gref) >> + goto undo; >> + list_add_tail(&gref->next_all, &queue_all); >> + list_add_tail(&gref->next_file, &queue_file); >> + gref->users = 1; >> + gref->file_index = op->index + i * PAGE_SIZE; >> + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); >> + if (!gref->page) >> + goto undo; >> + >> + /* Grant foreign access to the page. */ >> + gref->gref_id = gnttab_grant_foreign_access(op->domid, >> + pfn_to_mfn(page_to_pfn(gref->page)), readonly); >> + if (gref->gref_id < 0) { >> + rc = gref->gref_id; >> + goto undo; >> + } >> + gref_ids[i] = gref->gref_id; >> + } >> + >> + /* Add to gref lists. */ >> + spin_lock(&gref_lock); >> + list_splice_tail(&queue_all, &gref_list); >> + list_splice_tail(&queue_file, &priv->list); >> + spin_unlock(&gref_lock); >> + >> + return 0; >> + >> +undo: >> + list_for_each_entry(gref, &queue_file, next_file) { >> + if (gref->page && gref->gref_id >= 0) { >> + __del_gref(gref); >> + } else { >> + if (gref->page) >> + __free_page(gref->page); >> + list_del(&gref->next_all); > > Ohh, you are actually deleting them from queue_all, ah. > Can you add a comment about that since the first time I read this > I thought you were iterating over the queue_file and deleting > from _that_ list. Tricky.. > > Can collapse this deleting into __del_gref? Looks like > you should just preface the gnttab with "if gref->page && gref->gref_id" > and also the gref_size, and the "if (gref->page)" before deleting?Yes, this can be collapsed into __del_gref. That will probably clarify the deletion, but I''ll stick a comment about __del_gref removing from queue_all and not queue_file so it''s clearer what will/won''t break.> >> + kfree(gref); >> + } >> + } >> + >> + /* It''s possible for the target domain to map the just-allocated grant >> + * references by blindly guessing their IDs; if this is done, then >> + * __del_gref will leave them in the queue_all list. They need to be >> + * added to the global list so that we can free them when they are no >> + * longer referenced. >> + */ >> + if (unlikely(!list_empty(&queue_all))) { >> + spin_lock(&gref_lock); >> + list_splice_tail(&queue_all, &gref_list); >> + spin_unlock(&gref_lock); >> + } >> + return rc; >> +} >> + >> +static void __del_gref(struct gntalloc_gref *gref) >> +{ >> + if (gnttab_query_foreign_access(gref->gref_id)) >> + return; >> + >> + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) >> + return; >> + >> + gref_size--; >> + list_del(&gref->next_all); >> + >> + __free_page(gref->page); >> + kfree(gref); >> +} >> + >> +/* finds contiguous grant references in a file, returns the first */ >> +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, >> + uint64_t index, uint32_t count) >> +{ >> + struct gntalloc_gref *rv = NULL, *gref; >> + list_for_each_entry(gref, &priv->list, next_file) { >> + if (gref->file_index == index && !rv) >> + rv = gref; >> + if (rv) { >> + if (gref->file_index != index) >> + return NULL; >> + index += PAGE_SIZE; >> + count--; >> + if (count == 0) >> + return rv; >> + } >> + } >> + return NULL; >> +} >> + >> +/* >> + * ------------------------------------- >> + * File operations. >> + * ------------------------------------- >> + */ >> +static int gntalloc_open(struct inode *inode, struct file *filp) >> +{ >> + struct gntalloc_file_private_data *priv; >> + >> + try_module_get(THIS_MODULE); > > No checking if it fails?Actually, looking at it again, this seems redundant: won''t open() itself prevent rmmod of the module until everything is closed? Regardless, while the failure seems unlikely it should be checked for.>> + >> + priv = kzalloc(sizeof(*priv), GFP_KERNEL); >> + if (!priv) >> + goto out_nomem; >> + INIT_LIST_HEAD(&priv->list); >> + >> + filp->private_data = priv; >> + >> + pr_debug("%s: priv %p\n", __func__, priv); >> + >> + return 0; >> + >> +out_nomem: > > You should also do: > > module_put(THIS_MODULE); >Agreed, or just get rid of the module_get.>> + return -ENOMEM; >> +} >> + >> +static int gntalloc_release(struct inode *inode, struct file *filp) >> +{ >> + struct gntalloc_file_private_data *priv = filp->private_data; >> + struct gntalloc_gref *gref; >> + >> + pr_debug("%s: priv %p\n", __func__, priv); >> + >> + spin_lock(&gref_lock); >> + while (!list_empty(&priv->list)) { >> + gref = list_entry(priv->list.next, >> + struct gntalloc_gref, next_file); > > >> + list_del(&gref->next_file); >> + gref->users--; >> + if (gref->users == 0) >> + __del_gref(gref); >> + } >> + kfree(priv); >> + spin_unlock(&gref_lock); >> + >> + module_put(THIS_MODULE); >> + >> + return 0; >> +} >> + >> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, >> + struct ioctl_gntalloc_alloc_gref __user *arg) >> +{ >> + int rc = 0; >> + struct ioctl_gntalloc_alloc_gref op; >> + uint32_t *gref_ids; >> + >> + pr_debug("%s: priv %p\n", __func__, priv); >> + >> + if (copy_from_user(&op, arg, sizeof(op))) { >> + rc = -EFAULT; >> + goto out; >> + } >> + >> + gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); >> + if (!gref_ids) { >> + rc = -ENOMEM; >> + goto out; >> + } >> + >> + spin_lock(&gref_lock); >> + do_cleanup(); > > Hmm, why the cleanup here? I see it gntalloc_ioctl_dealloc > which makes sense since the users-- might have been decremented to zero. > But here?This is to clean up pages that were at zero (local) users but were still mapped by remote domains. Since those pages count towards the limit that we are about to enforce, it is a good idea to remove any pages that have been unmapped by remote domains since last time we checked. This would be much cleaner if Xen allowed a domain to force others to unmap its pages, but that''s a significant change to the semantics of shared memory in the hypervisor.>> + if (gref_size + op.count > limit) { >> + spin_unlock(&gref_lock); >> + rc = -ENOSPC; >> + goto out_free; >> + } >> + gref_size += op.count; >> + op.index = priv->index; >> + priv->index += op.count * PAGE_SIZE; >> + spin_unlock(&gref_lock); >> + >> + rc = add_grefs(&op, gref_ids, priv); >> + if (rc < 0) >> + goto out_free; >> + >> + if (copy_to_user(arg, &op, sizeof(op))) { >> + rc = -EFAULT; >> + goto out_free; > > Not something that would clean the newly added grant? Say > the code I suggested below the ''out'' label. >That races with a concurrent removal operation that has guessed the offset we just added, and removed the gref. As soon as we unlock gref_lock at the end of add_grefs, gref is unsafe to dereference. This could be solved by a per-file lock, or by holding gref_lock for longer, but the copy_to_user producing -EFAULT seemed unlikely enough that forcing a close() seemed the better choice - especially since the userspace application will be segfaulting soon if it is trying to read the offsets.>> + } >> + if (copy_to_user(arg->gref_ids, gref_ids, >> + sizeof(gref_ids[0]) * op.count)) { >> + rc = -EFAULT; >> + goto out_free; > > Ditto. >> + } >> + >> +out_free: >> + kfree(gref_ids); >> +out: >> + return rc; > > out_del_gref: > gref->users--; > __del_gref(gref); > > goto out_free; >> +} >> + >> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, >> + void __user *arg) >> +{ >> + int i, rc = 0; >> + struct ioctl_gntalloc_dealloc_gref op; >> + struct gntalloc_gref *gref, *n; >> + >> + pr_debug("%s: priv %p\n", __func__, priv); >> + >> + if (copy_from_user(&op, arg, sizeof(op))) { >> + rc = -EFAULT; >> + goto dealloc_grant_out; >> + } >> + >> + spin_lock(&gref_lock); >> + gref = find_grefs(priv, op.index, op.count); >> + if (gref) { >> + for (i = 0; i < op.count; i++) { >> + n = list_entry(gref->next_file.next, >> + struct gntalloc_gref, next_file); > > Might want to add a comment that you are deleting only from ->next_file > b/c the do_cleanpup will take of deleting from the other list (that is > if the users is at zero).Right.>> + list_del(&gref->next_file); >> + gref->users--; >> + gref = n; >> + } >> + } else { >> + rc = -EINVAL; >> + } >> + >> + do_cleanup(); >> + >> + spin_unlock(&gref_lock); >> +dealloc_grant_out: >> + return rc; >> +} >> + >> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd, >> + unsigned long arg) >> +{ >> + struct gntalloc_file_private_data *priv = filp->private_data; >> + >> + switch (cmd) { >> + case IOCTL_GNTALLOC_ALLOC_GREF: >> + return gntalloc_ioctl_alloc(priv, (void __user *)arg); >> + >> + case IOCTL_GNTALLOC_DEALLOC_GREF: >> + return gntalloc_ioctl_dealloc(priv, (void __user *)arg); >> + >> + default: >> + return -ENOIOCTLCMD; >> + } >> + >> + return 0; >> +} >> + >> +static void gntalloc_vma_close(struct vm_area_struct *vma) >> +{ >> + struct gntalloc_gref *gref = vma->vm_private_data; >> + if (!gref) >> + return; >> + >> + spin_lock(&gref_lock); >> + gref->users--; >> + if (gref->users == 0) >> + __del_gref(gref); > > I just want to be convienced here that I am wrong. > > If the ''ioctl_deallo'' has not been done, what will if unmap this VMA? > Will it be OK to yank the gref from gref_list while (and kfree it) while > it is still referenced in the filp->private_data? Or would end up trying > to derefence the *priv and go BOOM?The VMA itself is unmapped regardless. The gref structure (and the pages pointed to by the vma) is deallocated when the last reference goes away. In your example, it would be on _release() of the file or a later dealloc ioctl. The only time __del_gref is called here is when the file has been closed or the segment has already had ioctl_dealloc run on it.>> + spin_unlock(&gref_lock); >> +} >> + >> +static struct vm_operations_struct gntalloc_vmops = { >> + .close = gntalloc_vma_close, >> +}; >> + >> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) >> +{ >> + struct gntalloc_file_private_data *priv = filp->private_data; >> + struct gntalloc_gref *gref; >> + int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; >> + int rv, i; >> + >> + pr_debug("%s: priv %p, page %lu+%d\n", __func__, >> + priv, vma->vm_pgoff, count); >> + >> + if (!(vma->vm_flags & VM_SHARED)) { >> + printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); >> + return -EINVAL; >> + } >> + >> + spin_lock(&gref_lock); >> + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); >> + if (gref == NULL) { >> + rv = -ENOENT; >> + pr_debug("%s: Could not find grant reference", >> + __func__); >> + goto out_unlock; >> + } >> + >> + vma->vm_private_data = gref; >> + >> + vma->vm_flags |= VM_RESERVED; >> + vma->vm_flags |= VM_DONTCOPY; >> + vma->vm_flags |= VM_IO; > > Hmm, I don''t think you need the VM_IO.Agreed, in fact it''ll probably trigger warnings now that we force VM_IO to imply pfn=mfn.>> + vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; >> + >> + vma->vm_ops = &gntalloc_vmops; >> + >> + for (i = 0; i < count; i++) { >> + gref->users++; >> + rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, >> + gref->page); >> + if (rv) >> + goto out_unlock; >> + >> + gref = list_entry(gref->next_file.next, >> + struct gntalloc_gref, next_file); >> + } >> + rv = 0; >> + >> +out_unlock: >> + spin_unlock(&gref_lock); >> + return rv; >> +} >> + >> +static const struct file_operations gntalloc_fops = { >> + .owner = THIS_MODULE, >> + .open = gntalloc_open, >> + .release = gntalloc_release, >> + .unlocked_ioctl = gntalloc_ioctl, >> + .mmap = gntalloc_mmap >> +}; >> + >> +/* >> + * ------------------------------------- >> + * Module creation/destruction. >> + * ------------------------------------- >> + */ >> +static struct miscdevice gntalloc_miscdev = { >> + .minor = MISC_DYNAMIC_MINOR, >> + .name = "xen/gntalloc", >> + .fops = &gntalloc_fops, >> +}; >> + >> +static int __init gntalloc_init(void) >> +{ >> + int err; >> + >> + if (!xen_domain()) >> + return -ENODEV; >> + >> + err = misc_register(&gntalloc_miscdev); >> + if (err != 0) { >> + printk(KERN_ERR "Could not register misc gntalloc device\n"); >> + return err; >> + } >> + >> + pr_debug("Created grant allocation device at %d,%d\n", >> + MISC_MAJOR, gntalloc_miscdev.minor); >> + >> + return 0; >> +} >> + >> +static void __exit gntalloc_exit(void) >> +{ >> + misc_deregister(&gntalloc_miscdev); >> +} >> + >> +module_init(gntalloc_init); >> +module_exit(gntalloc_exit); >> + >> +MODULE_LICENSE("GPL"); >> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " >> + "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); >> +MODULE_DESCRIPTION("User-space grant reference allocator driver"); >> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h >> new file mode 100644 >> index 0000000..e1d6d0f >> --- /dev/null >> +++ b/include/xen/gntalloc.h >> @@ -0,0 +1,70 @@ >> +/****************************************************************************** >> + * gntalloc.h >> + * > > Copyright?Public domain. The GPL text will need to be removed.>> + * Interface to /dev/xen/gntalloc. >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU General Public License version 2 >> + * as published by the Free Software Foundation; or, when distributed >> + * separately from the Linux kernel or incorporated into other >> + * software packages, subject to the following license: >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a copy >> + * of this source file (the "Software"), to deal in the Software without >> + * restriction, including without limitation the rights to use, copy, modify, >> + * merge, publish, distribute, sublicense, and/or sell copies of the Software, >> + * and to permit persons to whom the Software is furnished to do so, subject to >> + * the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE >> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS >> + * IN THE SOFTWARE. >> + */ >> + >> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__ >> +#define __LINUX_PUBLIC_GNTALLOC_H__ >> + >> +/* >> + * Allocates a new page and creates a new grant reference. >> + */ >> +#define IOCTL_GNTALLOC_ALLOC_GREF \ >> +_IOC(_IOC_NONE, ''G'', 5, sizeof(struct ioctl_gntalloc_alloc_gref)) >> +struct ioctl_gntalloc_alloc_gref { >> + /* IN parameters */ >> + /* The ID of the domain to be given access to the grants. */ >> + uint16_t domid; >> + /* Flags for this mapping */ >> + uint16_t flags; >> + /* Number of pages to map */ >> + uint32_t count; >> + /* OUT parameters */ >> + /* The offset to be used on a subsequent call to mmap(). */ >> + uint64_t index; >> + /* The grant references of the newly created grant, one per page */ >> + /* Variable size, depending on count */ >> + uint32_t gref_ids[1]; >> +}; >> + >> +#define GNTALLOC_FLAG_WRITABLE 1 >> + >> +/* >> + * Deallocates the grant reference, allowing the associated page to be freed if >> + * no other domains are using it. >> + */ >> +#define IOCTL_GNTALLOC_DEALLOC_GREF \ >> +_IOC(_IOC_NONE, ''G'', 6, sizeof(struct ioctl_gntalloc_dealloc_gref)) >> +struct ioctl_gntalloc_dealloc_gref { >> + /* IN parameters */ >> + /* The offset returned in the map operation */ >> + uint64_t index; >> + /* Number of references to unmap */ >> + uint32_t count; >> +}; >> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ >> -- >> 1.7.3.4 >>_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Jan-27 21:29 UTC
Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
> >> + try_module_get(THIS_MODULE); > > > > No checking if it fails? > > Actually, looking at it again, this seems redundant: won''t open() itself > prevent rmmod of the module until everything is closed?I hope so :-)> > Regardless, while the failure seems unlikely it should be checked for.OK. .. big snip ..> >> + spin_lock(&gref_lock); > >> + do_cleanup(); > > > > Hmm, why the cleanup here? I see it gntalloc_ioctl_dealloc > > which makes sense since the users-- might have been decremented to zero. > > But here? > > This is to clean up pages that were at zero (local) users but were still > mapped by remote domains. Since those pages count towards the limit that > we are about to enforce, it is a good idea to remove any pages that have > been unmapped by remote domains since last time we checked.Ok, can you put that as comment right before calling do_cleanup, please?> > This would be much cleaner if Xen allowed a domain to force others to > unmap its pages, but that''s a significant change to the semantics of > shared memory in the hypervisor. > > >> + if (gref_size + op.count > limit) { > >> + spin_unlock(&gref_lock); > >> + rc = -ENOSPC; > >> + goto out_free; > >> + } > >> + gref_size += op.count; > >> + op.index = priv->index; > >> + priv->index += op.count * PAGE_SIZE; > >> + spin_unlock(&gref_lock); > >> + > >> + rc = add_grefs(&op, gref_ids, priv); > >> + if (rc < 0) > >> + goto out_free; > >> + > >> + if (copy_to_user(arg, &op, sizeof(op))) { > >> + rc = -EFAULT; > >> + goto out_free; > > > > Not something that would clean the newly added grant? Say > > the code I suggested below the ''out'' label. > > > > That races with a concurrent removal operation that has guessed > the offset we just added, and removed the gref. As soon as we unlock > gref_lock at the end of add_grefs, gref is unsafe to dereference.Aha! Can you put a comment about this so in the future we won''t try to correct this "mistake" ?> > This could be solved by a per-file lock, or by holding gref_lock > for longer, but the copy_to_user producing -EFAULT seemed unlikely > enough that forcing a close() seemed the better choice - especially > since the userspace application will be segfaulting soon if it is > trying to read the offsets.True enought. .. big snip..> >> + spin_lock(&gref_lock); > >> + gref->users--; > >> + if (gref->users == 0) > >> + __del_gref(gref); > > > > I just want to be convienced here that I am wrong. > > > > If the ''ioctl_deallo'' has not been done, what will if unmap this VMA? > > Will it be OK to yank the gref from gref_list while (and kfree it) while > > it is still referenced in the filp->private_data? Or would end up trying > > to derefence the *priv and go BOOM? > > The VMA itself is unmapped regardless. The gref structure (and the pages > pointed to by the vma) is deallocated when the last reference goes away. > In your example, it would be on _release() of the file or a later dealloc > ioctl.OK, you convienced me.> > The only time __del_gref is called here is when the file has been closed > or the segment has already had ioctl_dealloc run on it.<nods> _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2011-Mar-04 15:57 UTC
[Xen-devel] Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
On Thu, 2011-01-27 at 18:52 +0000, Konrad Rzeszutek Wilk wrote:> > @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map) > > > > atomic_sub(map->count, &pages_mapped); > > > > - if (map->pages) > > + if (map->pages) { > > + if (!use_ptemod) > > + unmap_grant_pages(map, 0, map->count); > > + > > for (i = 0; i < map->count; i++) { > > - if (map->pages[i]) > > + uint32_t check, *tmp; > > + if (!map->pages[i]) > > + continue; > > + /* XXX When unmapping in an HVM domain, Xen will > > + * sometimes end up mapping the GFN to an invalid MFN. > > + * In this case, writes will be discarded and reads will > > + * return all 0xFF bytes. Leak these unusable GFNs > > I forgot to ask, under what version of Xen did you run this? I want to add > that to the comment so when it gets fixed we know what the failing version is. > > > + * until Xen supports fixing their p2m mapping. > > + */ > > + tmp = kmap(map->pages[i]); > > + *tmp = 0xdeaddead;I''ve just tripped over this check which faults in my PV guest. Seems to be related to the handling failures of map_grant_pages()? Was the underlying Xen issue here reported somewhere more obvious than this comment buried in a patch to the kernel? If not please can you raise it as a separate thread clearly marked as a hypervisor issue/question, all I can find is bits and pieces spread through the threads associated with this kernel patch. I don''t think I''ve got a clear enough picture of the issue from those fragments to pull it together into a sensible report. Ian.> > + mb(); > > + check = *tmp; > > + kunmap(map->pages[i]);_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Mar-04 16:34 UTC
[Xen-devel] Re: [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
On 03/04/2011 10:57 AM, Ian Campbell wrote:> On Thu, 2011-01-27 at 18:52 +0000, Konrad Rzeszutek Wilk wrote: >>> @@ -179,11 +184,32 @@ static void gntdev_put_map(struct grant_map *map) >>> >>> atomic_sub(map->count, &pages_mapped); >>> >>> - if (map->pages) >>> + if (map->pages) { >>> + if (!use_ptemod) >>> + unmap_grant_pages(map, 0, map->count); >>> + >>> for (i = 0; i < map->count; i++) { >>> - if (map->pages[i]) >>> + uint32_t check, *tmp; >>> + if (!map->pages[i]) >>> + continue; >>> + /* XXX When unmapping in an HVM domain, Xen will >>> + * sometimes end up mapping the GFN to an invalid MFN. >>> + * In this case, writes will be discarded and reads will >>> + * return all 0xFF bytes. Leak these unusable GFNs >> >> I forgot to ask, under what version of Xen did you run this? I want to add >> that to the comment so when it gets fixed we know what the failing version is. >> >>> + * until Xen supports fixing their p2m mapping. >>> + */ >>> + tmp = kmap(map->pages[i]); >>> + *tmp = 0xdeaddead; > > I''ve just tripped over this check which faults in my PV guest. Seems to > be related to the handling failures of map_grant_pages()? > > Was the underlying Xen issue here reported somewhere more obvious than > this comment buried in a patch to the kernel? > > If not please can you raise it as a separate thread clearly marked as a > hypervisor issue/question, all I can find is bits and pieces spread > through the threads associated with this kernel patch. I don''t think > I''ve got a clear enough picture of the issue from those fragments to > pull it together into a sensible report. > > Ian. >I think there may be other bugs lurking here with these freed pages; I have observed that even pages that pass this kmap check can become bad at a later time. This might be due to TLB issues; I haven''t had a chance to debug it. I do have a patch that prevents the pages that have been granted from being recycled into general use by the kernel; I hadn''t posted it yet because it didn''t resolve the issue completely. 8<--------------------------------------------------------- xen-gntdev: avoid reuse of possibly-bad pages In HVM, the unmap hypercall does not reliably associate a valid MFN with a just-unmapped GFN. A simple validity test of the page is not sufficient to determine if it will remain valid; pages have been observed to remain mapped and later become invalid. Instead of releasing the pages to the allocator, keep them in a list to reuse their GFNs for future mappings, which should always produce a valid mapping. ** Note: this patch is an RFC, not for a stable patch queue ** Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index d43ff30..b9b1577 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -54,6 +54,12 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " static atomic_t pages_mapped = ATOMIC_INIT(0); +/* Pages that are unsafe to use except as gntdev pages due to bad PFN/MFN + * mapping after an unmap. + */ +static LIST_HEAD(page_reuse); +static DEFINE_SPINLOCK(page_reuse_lock); + static int use_ptemod; struct gntdev_priv { @@ -122,13 +128,21 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) NULL == add->pages) goto err; + spin_lock(&page_reuse_lock); for (i = 0; i < count; i++) { - add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); - if (add->pages[i] == NULL) - goto err; + if (list_empty(&page_reuse)) { + add->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (add->pages[i] == NULL) + goto err_unlock; + } else { + add->pages[i] = list_entry(page_reuse.next, + struct page, lru); + list_del(&add->pages[i]->lru); + } add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; } + spin_unlock(&page_reuse_lock); add->index = 0; add->count = count; @@ -136,12 +150,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) return add; +err_unlock: + for (i = 0; i < count; i++) { + if (add->pages[i]) + list_add(&add->pages[i]->lru, &page_reuse); + } + spin_unlock(&page_reuse_lock); err: - if (add->pages) - for (i = 0; i < count; i++) { - if (add->pages[i]) - __free_page(add->pages[i]); - } kfree(add->pages); kfree(add->grants); kfree(add->map_ops); @@ -202,29 +217,14 @@ static void gntdev_put_map(struct grant_map *map) if (!use_ptemod) unmap_grant_pages(map, 0, map->count); + spin_lock(&page_reuse_lock); for (i = 0; i < map->count; i++) { uint32_t check, *tmp; if (!map->pages[i]) continue; - /* XXX When unmapping in an HVM domain, Xen will - * sometimes end up mapping the GFN to an invalid MFN. - * In this case, writes will be discarded and reads will - * return all 0xFF bytes. Leak these unusable GFNs - * until Xen supports fixing their p2m mapping. - * - * Confirmed present in Xen 4.1-RC3 with HVM source - */ - tmp = kmap(map->pages[i]); - *tmp = 0xdeaddead; - mb(); - check = *tmp; - kunmap(map->pages[i]); - if (check == 0xdeaddead) - __free_page(map->pages[i]); - else - pr_debug("Discard page %d=%ld\n", i, - page_to_pfn(map->pages[i])); + list_add(&map->pages[i]->lru, &page_reuse); } + spin_unlock(&page_reuse_lock); } kfree(map->pages); kfree(map->grants); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel