Daniel De Graaf
2011-Feb-03  17:18 UTC
[Xen-devel] [PATCH v6] Userspace grant communication
Changes since v5:
  - Added a tested xen version to workaround in #4
  - Cleaned up variable names & structures
  - Clarified some of the cleanup in gntalloc
  - Removed copyright statement from public-domain files
[PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
[PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
[PATCH 3/6] xen-gntdev: Add reference counting to maps
[PATCH 4/6] xen-gntdev: Support mapping in HVM domains
[PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
[PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl
Test/Demo code (also updated):
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
struct ioctl_gntdev_grant_ref {
	/* The domain ID of the grant to be mapped. */
	uint32_t domid;
	/* The grant reference of the grant to be mapped. */
	uint32_t ref;
};
/*
 * Allocates a new page and creates a new grant reference.
 */
#define IOCTL_GNTALLOC_ALLOC_GREF \
_IOC(_IOC_NONE, ''G'', 5, sizeof(struct
ioctl_gntalloc_alloc_gref))
struct ioctl_gntalloc_alloc_gref {
	/* IN parameters */
	/* The ID of the domain to be given access to the grants. */
	uint16_t domid;
	/* Flags for this mapping */
	uint16_t flags;
	/* Number of pages to map */
	uint32_t count;
	/* OUT parameters */
	/* The offset to be used on a subsequent call to mmap(). */
	uint64_t index;
	/* The grant references of the newly created grant, one per page */
	/* Variable size, depending on count */
	uint32_t gref_ids[1];
};
#define GNTALLOC_FLAG_WRITABLE 1
/*
 * Deallocates the grant reference, allowing the associated page to be freed if
 * no other domains are using it.
 */
#define IOCTL_GNTALLOC_DEALLOC_GREF \
_IOC(_IOC_NONE, ''G'', 6, sizeof(struct
ioctl_gntalloc_dealloc_gref))
struct ioctl_gntalloc_dealloc_gref {
	/* IN parameters */
	/* The offset returned in the map operation */
	uint64_t index;
	/* Number of references to unmap */
	uint32_t count;
};
#define IOCTL_GNTDEV_MAP_GRANT_REF \
_IOC(_IOC_NONE, ''G'', 0, sizeof(struct
ioctl_gntdev_map_grant_ref))
struct ioctl_gntdev_map_grant_ref {
    /* IN parameters */
    /* The number of grants to be mapped. */
    uint32_t count;
    uint32_t pad;
    /* OUT parameters */
    /* The offset to be used on a subsequent call to mmap(). */
    uint64_t index;
    /* Variable IN parameter. */
    /* Array of grant references, of size @count. */
    struct ioctl_gntdev_grant_ref refs[1];
};
#define GNTDEV_MAP_WRITABLE 0x1
#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
_IOC(_IOC_NONE, ''G'', 1, sizeof(struct
ioctl_gntdev_unmap_grant_ref))
struct ioctl_gntdev_unmap_grant_ref {
    /* IN parameters */
    /* The offset was returned by the corresponding map operation. */
    uint64_t index;
    /* The number of pages to be unmapped. */
    uint32_t count;
    uint32_t pad;
};
/*
 * Sets up an unmap notification within the page, so that the other side can do
 * cleanup if this side crashes. Required to implement cross-domain robust
 * mutexes or close notification on communication channels.
 *
 * Each mapped page only supports one notification; multiple calls referring to
 * the same page overwrite the previous notification. You must clear the
 * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
 * to occur.
 */
#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
_IOC(_IOC_NONE, ''G'', 7, sizeof(struct
ioctl_gntdev_unmap_notify))
struct ioctl_gntdev_unmap_notify {
	/* IN parameters */
	/* Index of a byte in the page */
	uint64_t index;
	/* Action(s) to take on unmap */
	uint32_t action;
	/* Event channel to notify */
	uint32_t event_channel_port;
};
/* Clear (set to zero) the byte specified by index */
#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
/* Send an interrupt on the indicated event channel */
#define UNMAP_NOTIFY_SEND_EVENT 0x2
/*
 * Sets up an unmap notification within the page, so that the other side can do
 * cleanup if this side crashes. Required to implement cross-domain robust
 * mutexes or close notification on communication channels.
 *
 * Each mapped page only supports one notification; multiple calls referring to
 * the same page overwrite the previous notification. You must clear the
 * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
 * to occur.
 */
#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
_IOC(_IOC_NONE, ''G'', 7, sizeof(struct
ioctl_gntalloc_unmap_notify))
struct ioctl_gntalloc_unmap_notify {
	/* IN parameters */
	/* Index of a byte in the page */
	uint64_t index;
	/* Action(s) to take on unmap */
	uint32_t action;
	/* Event channel to notify */
	uint32_t event_channel_port;
};
/* Clear (set to zero) the byte specified by index */
#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
/* Send an interrupt on the indicated event channel */
#define UNMAP_NOTIFY_SEND_EVENT 0x2
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif
int a_fd;
int d_fd;
struct shr_page {
	uint64_t id;
	char buffer[64];
	uint8_t notifies[8];
};
struct data {
	struct shr_page* mem;
	int handle;
} items[128];
void sa(int id)
{
	struct ioctl_gntalloc_alloc_gref arg = {
		.domid = id,
		.flags = GNTALLOC_FLAG_WRITABLE,
		.count = 1
	};
	int rv = ioctl(a_fd, IOCTL_GNTALLOC_ALLOC_GREF, &arg);
	if (rv) {
		printf("src-add error: %s (rv=%d)\n", strerror(errno), rv);
		return;
	}
	int i=0;
	while (items[i].mem) i++;
	items[i].mem = mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, a_fd,
arg.index);
	if (items[i].mem == MAP_FAILED) {
		items[i].mem = 0;
		printf("mmap failed: SHOULD NOT HAPPEN\n");
		return;
	}
	items[i].handle = arg.index;
	printf("Created shared page with domain %d, grant #%d. Mapped locally at
%d=%p\n",
		id, arg.gref_ids[0], arg.index, items[i].mem);
	items[i].mem->id = rand() | ((long)(getpid()) << 32);
	items[i].mem->notifies[0] = 1;
	struct ioctl_gntalloc_unmap_notify uarg = {
		.index = arg.index + offsetof(struct shr_page, notifies[0]),
		.action = UNMAP_NOTIFY_CLEAR_BYTE
	};
	rv = ioctl(a_fd, IOCTL_GNTALLOC_SET_UNMAP_NOTIFY, &uarg);
	if (rv)
		printf("gntalloc unmap notify error: %s (rv=%d)\n", strerror(errno),
rv);
}
void sd(int ref) {
	struct ioctl_gntalloc_dealloc_gref arg = {
		.index = ref,
		.count = 1
	};
	int rv = ioctl(a_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg);
	if (rv)
		printf("src-del error: %s (rv=%d)\n", strerror(errno), rv);
	else
		printf("Stopped offering grant at offset %d\n", ref);
}
void mm(int domid, int refid) {
	struct ioctl_gntdev_map_grant_ref arg = {
		.count = 1,
		.refs[0].domid = domid,
		.refs[0].ref = refid,
	};
	int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
	if (rv) {
		printf("Could not map grant %d.%d: %s (rv=%d)\n", domid, refid,
strerror(errno), rv);
		return;
	}
	int i=0,j=1;
	while (items[i].mem) i++;
	items[i].mem = mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, d_fd,
arg.index);
	if (items[i].mem == MAP_FAILED) {
		items[i].mem = 0;
		printf("Could not map grant %d.%d: %s (map failed)\n", domid, refid,
strerror(errno), rv);
		return;
	}
	items[i].handle = arg.index;
	printf("Mapped grant %d.%d as %d=%p\n", domid, refid, arg.index,
items[i].mem);
	while (items[i].mem->notifies[j]) j++;
	items[i].mem->notifies[j] = 1;
	struct ioctl_gntalloc_unmap_notify uarg = {
		.index = arg.index + offsetof(struct shr_page, notifies[j]),
		.action = UNMAP_NOTIFY_CLEAR_BYTE
	};
	rv = ioctl(d_fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &uarg);
	if (rv)
		printf("gntdev unmap notify error: %s (rv=%d)\n", strerror(errno),
rv);
}
void gu(int index) {
	struct ioctl_gntdev_unmap_grant_ref arg = {
		.index = index,
		.count = 1,
	};
	int rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
	if (rv)
		printf("gu error: %s (rv=%d)\n", strerror(errno), rv);
	else
		printf("Unhooked mapped grant at offset %d\n", index);
}
void mu(void* addr) {
	int i = 0;
	munmap(addr, 4096);
	while (i < 128)
	{
		if (items[i].mem == addr)
			items[i].mem = 0;
		i++;
	}
	printf("Unmapped page at %p\n", addr);
}
void show(char* word) {
	int i;
	int wlen = strlen(word);
	for(i=0; i < 128; i++) {
		if (!items[i].mem)
			continue;
		memmove(items[i].mem->buffer + wlen, items[i].mem->buffer, 63 - wlen);
		memcpy(items[i].mem->buffer, word, wlen);
		printf("%02d(%ld,%d): id %16lx n=%d%d%d%d%d%d%d%d b=%s\n",
			i, items[i].mem, items[i].handle, items[i].mem->id,
			items[i].mem->notifies[0], items[i].mem->notifies[1],
items[i].mem->notifies[2], items[i].mem->notifies[3],
			items[i].mem->notifies[4], items[i].mem->notifies[5],
items[i].mem->notifies[6], items[i].mem->notifies[7],
			items[i].mem->buffer);
	}
	printf("END\n");
}
int main(int argc, char** argv) {
	a_fd = open("/dev/xen/gntalloc", O_RDWR);
	d_fd = open("/dev/xen/gntdev", O_RDWR);
	printf(
		"add <domid>           return gntref, address\n"
		"map <domid> <ref>     return index, address\n"
		"adel <gntref>         delete <add> internal\n"
		"ddel <index>          delete <map> internal\n"
		"unmap <address>       unmap memory\n"
		"show                  show all pages\n"
		"<word>                append word to all mapped pages,
show\n"
		" PID %x\n", getpid()
	);
	while (1) {
		char line[80];
		char word[80];
		long a, b;
		printf("\n> ");
		fflush(stdout);
		fgets(line, 80, stdin);
		sscanf(line, "%s %ld %ld", word, &a, &b);
		if (!strcmp(word, "add")) {
			sa(a);
		} else if (!strcmp(word, "map")) {
			mm(a, b);
		} else if (!strcmp(word, "adel")) {
			sd(a);
		} else if (!strcmp(word, "ddel")) {
			gu(a);
		} else if (!strcmp(word, "unmap")) {
			mu((void*)a);
		} else if (!strcmp(word, "show")) {
			show("");
		} else {
			show(word);
		}
	}
}
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-03  17:18 UTC
[Xen-devel] [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open
Because there is no limitation on how many times a user can open a
given device file, an per-file-description limit on the number of
pages granted offers little to no benefit. Change to a global limit
and remove the ioctl() as the parameter can now be changed via sysfs.
Xen tools changeset 22768:f8d801e5573e is needed to eliminate the
error this change produces in xc_gnttab_set_max_grants.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   50 ++++++++++++++------------------------------------
 1 files changed, 14 insertions(+), 36 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2b777c0..3ca47d1 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -45,15 +45,15 @@ MODULE_AUTHOR("Derek G. Murray
<Derek.Murray@cl.cam.ac.uk>, "
 	      "Gerd Hoffmann <kraxel@redhat.com>");
 MODULE_DESCRIPTION("User-space granted page access driver");
 
-static int limit = 1024;
+static int limit = 1024*1024;
 module_param(limit, int, 0644);
-MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped at
"
-		"once by a gntdev instance");
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by
"
+		"the gntdev device");
+
+static atomic_t pages_mapped = ATOMIC_INIT(0);
 
 struct gntdev_priv {
 	struct list_head maps;
-	uint32_t used;
-	uint32_t limit;
 	/* lock protects maps from concurrent changes */
 	spinlock_t lock;
 	struct mm_struct *mm;
@@ -82,9 +82,7 @@ static void gntdev_print_maps(struct gntdev_priv *priv,
 #ifdef DEBUG
 	struct grant_map *map;
 
-	pr_debug("maps list (priv %p, usage %d/%d)\n",
-	       priv, priv->used, priv->limit);
-
+	pr_debug("%s: maps list (priv %p)\n", __func__, priv);
 	list_for_each_entry(map, &priv->maps, next)
 		pr_debug("  index %2d, count %2d %s\n",
 		       map->index, map->count,
@@ -121,9 +119,6 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv
*priv, int count)
 	add->count = count;
 	add->priv  = priv;
 
-	if (add->count + priv->used > priv->limit)
-		goto err;
-
 	return add;
 
 err:
@@ -154,7 +149,6 @@ static void gntdev_add_map(struct gntdev_priv *priv, struct
grant_map *add)
 	list_add_tail(&add->next, &priv->maps);
 
 done:
-	priv->used += add->count;
 	gntdev_print_maps(priv, "[new]", add->index);
 }
 
@@ -200,7 +194,7 @@ static int gntdev_del_map(struct grant_map *map)
 		if (map->unmap_ops[i].handle)
 			return -EBUSY;
 
-	map->priv->used -= map->count;
+	atomic_sub(map->count, &pages_mapped);
 	list_del(&map->next);
 	return 0;
 }
@@ -385,7 +379,6 @@ static int gntdev_open(struct inode *inode, struct file
*flip)
 
 	INIT_LIST_HEAD(&priv->maps);
 	spin_lock_init(&priv->lock);
-	priv->limit = limit;
 
 	priv->mm = get_task_mm(current);
 	if (!priv->mm) {
@@ -442,19 +435,24 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv
*priv,
 	pr_debug("priv %p, add %d\n", priv, op.count);
 	if (unlikely(op.count <= 0))
 		return -EINVAL;
-	if (unlikely(op.count > priv->limit))
-		return -EINVAL;
 
 	err = -ENOMEM;
 	map = gntdev_alloc_map(priv, op.count);
 	if (!map)
 		return err;
+
 	if (copy_from_user(map->grants, &u->refs,
 			   sizeof(map->grants[0]) * op.count) != 0) {
 		gntdev_free_map(map);
 		return err;
 	}
 
+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
+		pr_debug("can''t map: over limit\n");
+		gntdev_free_map(map);
+		return err;
+	}
+
 	spin_lock(&priv->lock);
 	gntdev_add_map(priv, map);
 	op.index = map->index << PAGE_SHIFT;
@@ -517,23 +515,6 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct
gntdev_priv *priv,
 	return 0;
 }
 
-static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
-					struct ioctl_gntdev_set_max_grants __user *u)
-{
-	struct ioctl_gntdev_set_max_grants op;
-
-	if (copy_from_user(&op, u, sizeof(op)) != 0)
-		return -EFAULT;
-	pr_debug("priv %p, limit %d\n", priv, op.count);
-	if (op.count > limit)
-		return -E2BIG;
-
-	spin_lock(&priv->lock);
-	priv->limit = op.count;
-	spin_unlock(&priv->lock);
-	return 0;
-}
-
 static long gntdev_ioctl(struct file *flip,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -550,9 +531,6 @@ static long gntdev_ioctl(struct file *flip,
 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
 
-	case IOCTL_GNTDEV_SET_MAX_GRANTS:
-		return gntdev_ioctl_set_max_grants(priv, ptr);
-
 	default:
 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
 		return -ENOIOCTLCMD;
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-03  17:19 UTC
[Xen-devel] [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually
This should be faster if many mappings exist, and also removes
the only user of map->vma not related to PTE modification.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   32 ++++++++------------------------
 1 files changed, 8 insertions(+), 24 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 3ca47d1..a42554a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -167,23 +167,6 @@ static struct grant_map *gntdev_find_map_index(struct
gntdev_priv *priv,
 	return NULL;
 }
 
-static struct grant_map *gntdev_find_map_vaddr(struct gntdev_priv *priv,
-					       unsigned long vaddr)
-{
-	struct grant_map *map;
-
-	list_for_each_entry(map, &priv->maps, next) {
-		if (!map->vma)
-			continue;
-		if (vaddr < map->vma->vm_start)
-			continue;
-		if (vaddr >= map->vma->vm_end)
-			continue;
-		return map;
-	}
-	return NULL;
-}
-
 static int gntdev_del_map(struct grant_map *map)
 {
 	int i;
@@ -493,22 +476,23 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct
gntdev_priv *priv,
 					      struct ioctl_gntdev_get_offset_for_vaddr __user *u)
 {
 	struct ioctl_gntdev_get_offset_for_vaddr op;
+	struct vm_area_struct *vma;
 	struct grant_map *map;
 
 	if (copy_from_user(&op, u, sizeof(op)) != 0)
 		return -EFAULT;
 	pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned
long)op.vaddr);
 
-	spin_lock(&priv->lock);
-	map = gntdev_find_map_vaddr(priv, op.vaddr);
-	if (map == NULL ||
-	    map->vma->vm_start != op.vaddr) {
-		spin_unlock(&priv->lock);
+	vma = find_vma(current->mm, op.vaddr);
+	if (!vma || vma->vm_ops != &gntdev_vmops)
 		return -EINVAL;
-	}
+
+	map = vma->vm_private_data;
+	if (!map)
+		return -EINVAL;
+
 	op.offset = map->index << PAGE_SHIFT;
 	op.count = map->count;
-	spin_unlock(&priv->lock);
 
 	if (copy_to_user(u, &op, sizeof(op)) != 0)
 		return -EFAULT;
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-03  17:19 UTC
[Xen-devel] [PATCH 3/6] xen-gntdev: Add reference counting to maps
This allows userspace to perform mmap() on the gntdev device and then
immediately close the filehandle or remove the mapping using the
remove ioctl, with the mapped area remaining valid until unmapped.
This also fixes an infinite loop when a gntdev device is closed
without first unmapping all areas.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   67 ++++++++++++++++++++-----------------------------
 1 files changed, 27 insertions(+), 40 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a42554a..1581403 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -62,12 +62,12 @@ struct gntdev_priv {
 
 struct grant_map {
 	struct list_head next;
-	struct gntdev_priv *priv;
 	struct vm_area_struct *vma;
 	int index;
 	int count;
 	int flags;
 	int is_mapped;
+	atomic_t users;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
 	struct gnttab_unmap_grant_ref *unmap_ops;
@@ -117,7 +117,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv
*priv, int count)
 
 	add->index = 0;
 	add->count = count;
-	add->priv  = priv;
+	atomic_set(&add->users, 1);
 
 	return add;
 
@@ -167,28 +167,18 @@ static struct grant_map *gntdev_find_map_index(struct
gntdev_priv *priv,
 	return NULL;
 }
 
-static int gntdev_del_map(struct grant_map *map)
-{
-	int i;
-
-	if (map->vma)
-		return -EBUSY;
-	for (i = 0; i < map->count; i++)
-		if (map->unmap_ops[i].handle)
-			return -EBUSY;
-
-	atomic_sub(map->count, &pages_mapped);
-	list_del(&map->next);
-	return 0;
-}
-
-static void gntdev_free_map(struct grant_map *map)
+static void gntdev_put_map(struct grant_map *map)
 {
 	int i;
 
 	if (!map)
 		return;
 
+	if (!atomic_dec_and_test(&map->users))
+		return;
+
+	atomic_sub(map->count, &pages_mapped);
+
 	if (map->pages)
 		for (i = 0; i < map->count; i++) {
 			if (map->pages[i])
@@ -266,6 +256,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	map->is_mapped = 0;
 	map->vma = NULL;
 	vma->vm_private_data = NULL;
+	gntdev_put_map(map);
 }
 
 static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -387,17 +378,14 @@ static int gntdev_release(struct inode *inode, struct file
*flip)
 {
 	struct gntdev_priv *priv = flip->private_data;
 	struct grant_map *map;
-	int err;
 
 	pr_debug("priv %p\n", priv);
 
 	spin_lock(&priv->lock);
 	while (!list_empty(&priv->maps)) {
 		map = list_entry(priv->maps.next, struct grant_map, next);
-		err = gntdev_del_map(map);
-		if (WARN_ON(err))
-			gntdev_free_map(map);
-
+		list_del(&map->next);
+		gntdev_put_map(map);
 	}
 	spin_unlock(&priv->lock);
 
@@ -424,15 +412,15 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv
*priv,
 	if (!map)
 		return err;
 
-	if (copy_from_user(map->grants, &u->refs,
-			   sizeof(map->grants[0]) * op.count) != 0) {
-		gntdev_free_map(map);
+	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
+		pr_debug("can''t map: over limit\n");
+		gntdev_put_map(map);
 		return err;
 	}
 
-	if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) {
-		pr_debug("can''t map: over limit\n");
-		gntdev_free_map(map);
+	if (copy_from_user(map->grants, &u->refs,
+			   sizeof(map->grants[0]) * op.count) != 0) {
+		gntdev_put_map(map);
 		return err;
 	}
 
@@ -441,13 +429,9 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv
*priv,
 	op.index = map->index << PAGE_SHIFT;
 	spin_unlock(&priv->lock);
 
-	if (copy_to_user(u, &op, sizeof(op)) != 0) {
-		spin_lock(&priv->lock);
-		gntdev_del_map(map);
-		spin_unlock(&priv->lock);
-		gntdev_free_map(map);
-		return err;
-	}
+	if (copy_to_user(u, &op, sizeof(op)) != 0)
+		return -EFAULT;
+
 	return 0;
 }
 
@@ -464,11 +448,12 @@ static long gntdev_ioctl_unmap_grant_ref(struct
gntdev_priv *priv,
 
 	spin_lock(&priv->lock);
 	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
-	if (map)
-		err = gntdev_del_map(map);
+	if (map) {
+		list_del(&map->next);
+		gntdev_put_map(map);
+		err = 0;
+	}
 	spin_unlock(&priv->lock);
-	if (!err)
-		gntdev_free_map(map);
 	return err;
 }
 
@@ -548,6 +533,8 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 		goto unlock_out;
 	}
 
+	atomic_inc(&map->users);
+
 	vma->vm_ops = &gntdev_vmops;
 
 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-03  17:19 UTC
[Xen-devel] [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
HVM does not allow direct PTE modification, so instead we request
that Xen change its internal p2m mappings on the allocated pages and
map the memory into userspace normally.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c      |  117 ++++++++++++++++++++++++++++++++-------------
 drivers/xen/grant-table.c |    6 ++
 2 files changed, 89 insertions(+), 34 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1581403..58fddf3 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/highmem.h>
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -52,6 +53,8 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that
may be mapped by "
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+static int use_ptemod;
+
 struct gntdev_priv {
 	struct list_head maps;
 	/* lock protects maps from concurrent changes */
@@ -74,6 +77,8 @@ struct grant_map {
 	struct page **pages;
 };
 
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+
 /* ------------------------------------------------------------------ */
 
 static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
-	if (map->pages)
+	if (map->pages) {
+		if (!use_ptemod)
+			unmap_grant_pages(map, 0, map->count);
+
 		for (i = 0; i < map->count; i++) {
-			if (map->pages[i])
+			uint32_t check, *tmp;
+			if (!map->pages[i])
+				continue;
+			/* XXX When unmapping in an HVM domain, Xen will
+			 * sometimes end up mapping the GFN to an invalid MFN.
+			 * In this case, writes will be discarded and reads will
+			 * return all 0xFF bytes.  Leak these unusable GFNs
+			 * until Xen supports fixing their p2m mapping.
+			 *
+			 * Confirmed present in Xen 4.1-RC3 with HVM source
+			 */
+			tmp = kmap(map->pages[i]);
+			*tmp = 0xdeaddead;
+			mb();
+			check = *tmp;
+			kunmap(map->pages[i]);
+			if (check == 0xdeaddead)
 				__free_page(map->pages[i]);
+			else
+				pr_debug("Discard page %d=%ld\n", i,
+					page_to_pfn(map->pages[i]));
 		}
+	}
 	kfree(map->pages);
 	kfree(map->grants);
 	kfree(map->map_ops);
@@ -197,17 +225,16 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr,
void *data)
 {
 	struct grant_map *map = data;
 	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
 	u64 pte_maddr;
 
 	BUG_ON(pgnr >= map->count);
 	pte_maddr = arbitrary_virt_to_machine(pte).maddr;
 
-	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr,
-			  GNTMAP_contains_pte | map->flags,
+	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
 			  map->grants[pgnr].ref,
 			  map->grants[pgnr].domid);
-	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr,
-			    GNTMAP_contains_pte | map->flags,
+	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
 			    0 /* handle */);
 	return 0;
 }
@@ -215,6 +242,19 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr,
void *data)
 static int map_grant_pages(struct grant_map *map)
 {
 	int i, err = 0;
+	phys_addr_t addr;
+
+	if (!use_ptemod) {
+		for (i = 0; i < map->count; i++) {
+			addr = (phys_addr_t)
+				pfn_to_kaddr(page_to_pfn(map->pages[i]));
+			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
+				map->grants[i].ref,
+				map->grants[i].domid);
+			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
+				map->flags, 0 /* handle */);
+		}
+	}
 
 	pr_debug("map %d+%d\n", map->index, map->count);
 	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
@@ -259,17 +299,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	gntdev_put_map(map);
 }
 
-static int gntdev_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	pr_debug("vaddr %p, pgoff %ld (shouldn''t happen)\n",
-			vmf->virtual_address, vmf->pgoff);
-	vmf->flags = VM_FAULT_ERROR;
-	return 0;
-}
-
 static struct vm_operations_struct gntdev_vmops = {
 	.close = gntdev_vma_close,
-	.fault = gntdev_vma_fault,
 };
 
 /* ------------------------------------------------------------------ */
@@ -354,14 +385,16 @@ static int gntdev_open(struct inode *inode, struct file
*flip)
 	INIT_LIST_HEAD(&priv->maps);
 	spin_lock_init(&priv->lock);
 
-	priv->mm = get_task_mm(current);
-	if (!priv->mm) {
-		kfree(priv);
-		return -ENOMEM;
+	if (use_ptemod) {
+		priv->mm = get_task_mm(current);
+		if (!priv->mm) {
+			kfree(priv);
+			return -ENOMEM;
+		}
+		priv->mn.ops = &gntdev_mmu_ops;
+		ret = mmu_notifier_register(&priv->mn, priv->mm);
+		mmput(priv->mm);
 	}
-	priv->mn.ops = &gntdev_mmu_ops;
-	ret = mmu_notifier_register(&priv->mn, priv->mm);
-	mmput(priv->mm);
 
 	if (ret) {
 		kfree(priv);
@@ -389,7 +422,8 @@ static int gntdev_release(struct inode *inode, struct file
*flip)
 	}
 	spin_unlock(&priv->lock);
 
-	mmu_notifier_unregister(&priv->mn, priv->mm);
+	if (use_ptemod)
+		mmu_notifier_unregister(&priv->mn, priv->mm);
 	kfree(priv);
 	return 0;
 }
@@ -514,7 +548,7 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 	int index = vma->vm_pgoff;
 	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	struct grant_map *map;
-	int err = -EINVAL;
+	int i, err = -EINVAL;
 
 	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags &
VM_SHARED))
 		return -EINVAL;
@@ -526,9 +560,9 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 	map = gntdev_find_map_index(priv, index, count);
 	if (!map)
 		goto unlock_out;
-	if (map->vma)
+	if (use_ptemod && map->vma)
 		goto unlock_out;
-	if (priv->mm != vma->vm_mm) {
+	if (use_ptemod && priv->mm != vma->vm_mm) {
 		printk(KERN_WARNING "Huh? Other mm?\n");
 		goto unlock_out;
 	}
@@ -540,20 +574,24 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 	vma->vm_flags |= VM_RESERVED|VM_DONTCOPY|VM_DONTEXPAND|VM_PFNMAP;
 
 	vma->vm_private_data = map;
-	map->vma = vma;
 
-	map->flags = GNTMAP_host_map | GNTMAP_application_map;
+	if (use_ptemod)
+		map->vma = vma;
+
+	map->flags = GNTMAP_host_map;
 	if (!(vma->vm_flags & VM_WRITE))
 		map->flags |= GNTMAP_readonly;
 
 	spin_unlock(&priv->lock);
 
-	err = apply_to_page_range(vma->vm_mm, vma->vm_start,
-				  vma->vm_end - vma->vm_start,
-				  find_grant_ptes, map);
-	if (err) {
-		printk(KERN_WARNING "find_grant_ptes() failure.\n");
-		return err;
+	if (use_ptemod) {
+		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
+					  vma->vm_end - vma->vm_start,
+					  find_grant_ptes, map);
+		if (err) {
+			printk(KERN_WARNING "find_grant_ptes() failure.\n");
+			return err;
+		}
 	}
 
 	err = map_grant_pages(map);
@@ -564,6 +602,15 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 
 	map->is_mapped = 1;
 
+	if (!use_ptemod) {
+		for (i = 0; i < count; i++) {
+			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
+				map->pages[i]);
+			if (err)
+				return err;
+		}
+	}
+
 	return 0;
 
 unlock_out:
@@ -594,6 +641,8 @@ static int __init gntdev_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
+	use_ptemod = xen_pv_domain();
+
 	err = misc_register(&gntdev_miscdev);
 	if (err != 0) {
 		printk(KERN_ERR "Could not register gntdev device\n");
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 9ef54eb..9428ced 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -458,6 +458,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		/* m2p override only supported for GNTMAP_contains_pte mappings */
 		if (!(map_ops[i].flags & GNTMAP_contains_pte))
@@ -483,6 +486,9 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref
*unmap_ops,
 	if (ret)
 		return ret;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return ret;
+
 	for (i = 0; i < count; i++) {
 		ret = m2p_remove_override(pages[i]);
 		if (ret)
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-03  17:19 UTC
[Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
This allows a userspace application to allocate a shared page for
implementing inter-domain communication or device drivers. These
shared pages can be mapped using the gntdev device or by the kernel
in another domain.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/Kconfig    |    8 +
 drivers/xen/Makefile   |    2 +
 drivers/xen/gntalloc.c |  486 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/gntalloc.h |   50 +++++
 4 files changed, 546 insertions(+), 0 deletions(-)
 create mode 100644 drivers/xen/gntalloc.c
 create mode 100644 include/xen/gntalloc.h
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 19f1f3c..69d2cd5 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -142,6 +142,14 @@ config XEN_GNTDEV
 	help
 	  Allows userspace processes to use grants.
 
+config XEN_GRANT_DEV_ALLOC
+	tristate "User-space grant reference allocator driver"
+	depends on XEN
+	help
+	  Allows userspace processes to create pages with access granted
+	  to other domains. This can be used to implement frontend drivers
+	  or as part of an inter-domain shared memory channel.
+
 config XEN_PLATFORM_PCI
 	tristate "xen platform pci device driver"
 	depends on XEN_PVHVM && PCI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 5c3b031..09364b9 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)	+= xen-evtchn.o
 obj-$(CONFIG_XEN_GNTDEV)	+= xen-gntdev.o
+obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
 obj-$(CONFIG_XENFS)		+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
@@ -19,5 +20,6 @@ obj-$(CONFIG_XEN_DOM0)		+= pci.o
 
 xen-evtchn-y			:= evtchn.o
 xen-gntdev-y				:= gntdev.o
+xen-gntalloc-y				:= gntalloc.o
 
 xen-platform-pci-y		:= platform-pci.o
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
new file mode 100644
index 0000000..d06bf2b
--- /dev/null
+++ b/drivers/xen/gntalloc.c
@@ -0,0 +1,486 @@
+/******************************************************************************
+ * gntalloc.c
+ *
+ * Device for creating grant references (in user-space) that may be shared
+ * with other domains.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * This driver exists to allow userspace programs in Linux to allocate kernel
+ * memory that will later be shared with another domain.  Without this device,
+ * Linux userspace programs cannot create grant references.
+ *
+ * How this stuff works:
+ *   X -> granting a page to Y
+ *   Y -> mapping the grant from X
+ *
+ *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
+ *   2. X creates an entry in the grant table that says domid(Y) can access P.
+ *      This is done without a hypercall unless the grant table needs
expansion.
+ *   3. X gives the grant reference identifier, GREF, to Y.
+ *   4. Y maps the page, either directly into kernel memory for use in a
backend
+ *      driver, or via a the gntdev device to map into the address space of an
+ *      application running in Y. This is the first point at which Xen does any
+ *      tracking of the page.
+ *   5. A program in X mmap()s a segment of the gntalloc device that
corresponds
+ *      to the shared page, and can now communicate with Y over the shared
page.
+ *
+ *
+ * NOTE TO USERSPACE LIBRARIES:
+ *   The grant allocation and mmap()ing are, naturally, two separate
operations.
+ *   You set up the sharing by calling the create ioctl() and then the mmap().
+ *   Teardown requires munmap() and either close() or ioctl().
+ *
+ * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
+ * reference, this device can be used to consume kernel memory by leaving grant
+ * references mapped by another domain when an application exits. Therefore,
+ * there is a global limit on the number of pages that can be allocated. When
+ * all references to the page are unmapped, it will be freed during the next
+ * grant operation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/types.h>
+#include <linux/list.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+#include <xen/gntalloc.h>
+
+static int limit = 1024;
+module_param(limit, int, 0644);
+MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by
"
+		"the gntalloc device");
+
+static LIST_HEAD(gref_list);
+static DEFINE_SPINLOCK(gref_lock);
+static int gref_size;
+
+/* Metadata on a grant reference. */
+struct gntalloc_gref {
+	struct list_head next_gref;  /* list entry gref_list */
+	struct list_head next_file;  /* list entry file->list, if open */
+	struct page *page;	     /* The shared page */
+	uint64_t file_index;         /* File offset for mmap() */
+	unsigned int users;          /* Use count - when zero, waiting on Xen */
+	grant_ref_t gref_id;         /* The grant reference number */
+};
+
+struct gntalloc_file_private_data {
+	struct list_head list;
+	uint64_t index;
+};
+
+static void __del_gref(struct gntalloc_gref *gref);
+
+static void do_cleanup(void)
+{
+	struct gntalloc_gref *gref, *n;
+	list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
+		if (!gref->users)
+			__del_gref(gref);
+	}
+}
+
+static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
+	uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
+{
+	int i, rc, readonly;
+	LIST_HEAD(queue_gref);
+	LIST_HEAD(queue_file);
+	struct gntalloc_gref *gref;
+
+	readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
+	rc = -ENOMEM;
+	for (i = 0; i < op->count; i++) {
+		gref = kzalloc(sizeof(*gref), GFP_KERNEL);
+		if (!gref)
+			goto undo;
+		list_add_tail(&gref->next_gref, &queue_gref);
+		list_add_tail(&gref->next_file, &queue_file);
+		gref->users = 1;
+		gref->file_index = op->index + i * PAGE_SIZE;
+		gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!gref->page)
+			goto undo;
+
+		/* Grant foreign access to the page. */
+		gref->gref_id = gnttab_grant_foreign_access(op->domid,
+			pfn_to_mfn(page_to_pfn(gref->page)), readonly);
+		if (gref->gref_id < 0) {
+			rc = gref->gref_id;
+			goto undo;
+		}
+		gref_ids[i] = gref->gref_id;
+	}
+
+	/* Add to gref lists. */
+	spin_lock(&gref_lock);
+	list_splice_tail(&queue_gref, &gref_list);
+	list_splice_tail(&queue_file, &priv->list);
+	spin_unlock(&gref_lock);
+
+	return 0;
+
+undo:
+	spin_lock(&gref_lock);
+	gref_size -= (op->count - i);
+
+	list_for_each_entry(gref, &queue_file, next_file) {
+		/* __del_gref does not remove from queue_file */
+		__del_gref(gref);
+	}
+
+	/* It''s possible for the target domain to map the just-allocated
grant
+	 * references by blindly guessing their IDs; if this is done, then
+	 * __del_gref will leave them in the queue_gref list. They need to be
+	 * added to the global list so that we can free them when they are no
+	 * longer referenced.
+	 */
+	if (unlikely(!list_empty(&queue_gref)))
+		list_splice_tail(&queue_gref, &gref_list);
+	spin_unlock(&gref_lock);
+	return rc;
+}
+
+static void __del_gref(struct gntalloc_gref *gref)
+{
+	if (gref->gref_id > 0) {
+		if (gnttab_query_foreign_access(gref->gref_id))
+			return;
+
+		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
+			return;
+	}
+
+	gref_size--;
+	list_del(&gref->next_gref);
+
+	if (gref->page)
+		__free_page(gref->page);
+
+	kfree(gref);
+}
+
+/* finds contiguous grant references in a file, returns the first */
+static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data
*priv,
+		uint64_t index, uint32_t count)
+{
+	struct gntalloc_gref *rv = NULL, *gref;
+	list_for_each_entry(gref, &priv->list, next_file) {
+		if (gref->file_index == index && !rv)
+			rv = gref;
+		if (rv) {
+			if (gref->file_index != index)
+				return NULL;
+			index += PAGE_SIZE;
+			count--;
+			if (count == 0)
+				return rv;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * -------------------------------------
+ *  File operations.
+ * -------------------------------------
+ */
+static int gntalloc_open(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto out_nomem;
+	INIT_LIST_HEAD(&priv->list);
+
+	filp->private_data = priv;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	return 0;
+
+out_nomem:
+	return -ENOMEM;
+}
+
+static int gntalloc_release(struct inode *inode, struct file *filp)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	spin_lock(&gref_lock);
+	while (!list_empty(&priv->list)) {
+		gref = list_entry(priv->list.next,
+			struct gntalloc_gref, next_file);
+		list_del(&gref->next_file);
+		gref->users--;
+		if (gref->users == 0)
+			__del_gref(gref);
+	}
+	kfree(priv);
+	spin_unlock(&gref_lock);
+
+	return 0;
+}
+
+static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
+		struct ioctl_gntalloc_alloc_gref __user *arg)
+{
+	int rc = 0;
+	struct ioctl_gntalloc_alloc_gref op;
+	uint32_t *gref_ids;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY);
+	if (!gref_ids) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock(&gref_lock);
+	/* Clean up pages that were at zero (local) users but were still mapped
+	 * by remote domains. Since those pages count towards the limit that we
+	 * are about to enforce, removing them here is a good idea.
+	 */
+	do_cleanup();
+	if (gref_size + op.count > limit) {
+		spin_unlock(&gref_lock);
+		rc = -ENOSPC;
+		goto out_free;
+	}
+	gref_size += op.count;
+	op.index = priv->index;
+	priv->index += op.count * PAGE_SIZE;
+	spin_unlock(&gref_lock);
+
+	rc = add_grefs(&op, gref_ids, priv);
+	if (rc < 0)
+		goto out_free;
+
+	/* Once we finish add_grefs, it is unsafe to touch the new reference,
+	 * since it is possible for a concurrent ioctl to remove it (by guessing
+	 * its index). If the userspace application doesn''t provide valid
memory
+	 * to write the IDs to, then it will need to close the file in order to
+	 * release - which it will do by segfaulting when it tries to access the
+	 * IDs to close them.
+	 */
+	if (copy_to_user(arg, &op, sizeof(op))) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+	if (copy_to_user(arg->gref_ids, gref_ids,
+			sizeof(gref_ids[0]) * op.count)) {
+		rc = -EFAULT;
+		goto out_free;
+	}
+
+out_free:
+	kfree(gref_ids);
+out:
+	return rc;
+}
+
+static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
+		void __user *arg)
+{
+	int i, rc = 0;
+	struct ioctl_gntalloc_dealloc_gref op;
+	struct gntalloc_gref *gref, *n;
+
+	pr_debug("%s: priv %p\n", __func__, priv);
+
+	if (copy_from_user(&op, arg, sizeof(op))) {
+		rc = -EFAULT;
+		goto dealloc_grant_out;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, op.index, op.count);
+	if (gref) {
+		/* Remove from the file list only, and decrease reference count.
+		 * The later call to do_cleanup() will remove from gref_list and
+		 * free the memory if the pages aren''t mapped anywhere.
+		 */
+		for (i = 0; i < op.count; i++) {
+			n = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+			list_del(&gref->next_file);
+			gref->users--;
+			gref = n;
+		}
+	} else {
+		rc = -EINVAL;
+	}
+
+	do_cleanup();
+
+	spin_unlock(&gref_lock);
+dealloc_grant_out:
+	return rc;
+}
+
+static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+
+	switch (cmd) {
+	case IOCTL_GNTALLOC_ALLOC_GREF:
+		return gntalloc_ioctl_alloc(priv, (void __user *)arg);
+
+	case IOCTL_GNTALLOC_DEALLOC_GREF:
+		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return 0;
+}
+
+static void gntalloc_vma_close(struct vm_area_struct *vma)
+{
+	struct gntalloc_gref *gref = vma->vm_private_data;
+	if (!gref)
+		return;
+
+	spin_lock(&gref_lock);
+	gref->users--;
+	if (gref->users == 0)
+		__del_gref(gref);
+	spin_unlock(&gref_lock);
+}
+
+static struct vm_operations_struct gntalloc_vmops = {
+	.close = gntalloc_vma_close,
+};
+
+static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_gref *gref;
+	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int rv, i;
+
+	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
+		       priv, vma->vm_pgoff, count);
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
+		return -EINVAL;
+	}
+
+	spin_lock(&gref_lock);
+	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
+	if (gref == NULL) {
+		rv = -ENOENT;
+		pr_debug("%s: Could not find grant reference",
+				__func__);
+		goto out_unlock;
+	}
+
+	vma->vm_private_data = gref;
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTCOPY;
+	vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP;
+
+	vma->vm_ops = &gntalloc_vmops;
+
+	for (i = 0; i < count; i++) {
+		gref->users++;
+		rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+				gref->page);
+		if (rv)
+			goto out_unlock;
+
+		gref = list_entry(gref->next_file.next,
+				struct gntalloc_gref, next_file);
+	}
+	rv = 0;
+
+out_unlock:
+	spin_unlock(&gref_lock);
+	return rv;
+}
+
+static const struct file_operations gntalloc_fops = {
+	.owner = THIS_MODULE,
+	.open = gntalloc_open,
+	.release = gntalloc_release,
+	.unlocked_ioctl = gntalloc_ioctl,
+	.mmap = gntalloc_mmap
+};
+
+/*
+ * -------------------------------------
+ * Module creation/destruction.
+ * -------------------------------------
+ */
+static struct miscdevice gntalloc_miscdev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "xen/gntalloc",
+	.fops	= &gntalloc_fops,
+};
+
+static int __init gntalloc_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	err = misc_register(&gntalloc_miscdev);
+	if (err != 0) {
+		printk(KERN_ERR "Could not register misc gntalloc device\n");
+		return err;
+	}
+
+	pr_debug("Created grant allocation device at %d,%d\n",
+			MISC_MAJOR, gntalloc_miscdev.minor);
+
+	return 0;
+}
+
+static void __exit gntalloc_exit(void)
+{
+	misc_deregister(&gntalloc_miscdev);
+}
+
+module_init(gntalloc_init);
+module_exit(gntalloc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>,
"
+		"Daniel De Graaf <dgdegra@tycho.nsa.gov>");
+MODULE_DESCRIPTION("User-space grant reference allocator driver");
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
new file mode 100644
index 0000000..bc3b85e
--- /dev/null
+++ b/include/xen/gntalloc.h
@@ -0,0 +1,50 @@
+/******************************************************************************
+ * gntalloc.h
+ *
+ * Interface to /dev/xen/gntalloc.
+ *
+ * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov>
+ *
+ * This file is in the public domain.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTALLOC_H__
+#define __LINUX_PUBLIC_GNTALLOC_H__
+
+/*
+ * Allocates a new page and creates a new grant reference.
+ */
+#define IOCTL_GNTALLOC_ALLOC_GREF \
+_IOC(_IOC_NONE, ''G'', 5, sizeof(struct
ioctl_gntalloc_alloc_gref))
+struct ioctl_gntalloc_alloc_gref {
+	/* IN parameters */
+	/* The ID of the domain to be given access to the grants. */
+	uint16_t domid;
+	/* Flags for this mapping */
+	uint16_t flags;
+	/* Number of pages to map */
+	uint32_t count;
+	/* OUT parameters */
+	/* The offset to be used on a subsequent call to mmap(). */
+	uint64_t index;
+	/* The grant references of the newly created grant, one per page */
+	/* Variable size, depending on count */
+	uint32_t gref_ids[1];
+};
+
+#define GNTALLOC_FLAG_WRITABLE 1
+
+/*
+ * Deallocates the grant reference, allowing the associated page to be freed if
+ * no other domains are using it.
+ */
+#define IOCTL_GNTALLOC_DEALLOC_GREF \
+_IOC(_IOC_NONE, ''G'', 6, sizeof(struct
ioctl_gntalloc_dealloc_gref))
+struct ioctl_gntalloc_dealloc_gref {
+	/* IN parameters */
+	/* The offset returned in the map operation */
+	uint64_t index;
+	/* Number of references to unmap */
+	uint32_t count;
+};
+#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-03  17:19 UTC
[Xen-devel] [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
This ioctl allows the users of a shared page to be notified when
the other end exits abnormally.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntalloc.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/gntdev.c   |   61 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/xen/gntalloc.h |   28 ++++++++++++++++++++++
 include/xen/gntdev.h   |   27 +++++++++++++++++++++
 4 files changed, 174 insertions(+), 1 deletions(-)
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index d06bf2b..a7ffdfe 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -60,11 +60,13 @@
 #include <linux/uaccess.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/highmem.h>
 
 #include <xen/xen.h>
 #include <xen/page.h>
 #include <xen/grant_table.h>
 #include <xen/gntalloc.h>
+#include <xen/events.h>
 
 static int limit = 1024;
 module_param(limit, int, 0644);
@@ -75,6 +77,12 @@ static LIST_HEAD(gref_list);
 static DEFINE_SPINLOCK(gref_lock);
 static int gref_size;
 
+struct notify_info {
+	uint16_t pgoff:12;    /* Bits 0-11: Offset of the byte to clear */
+	uint16_t flags:2;     /* Bits 12-13: Unmap notification flags */
+	int event;            /* Port (event channel) to notify */
+};
+
 /* Metadata on a grant reference. */
 struct gntalloc_gref {
 	struct list_head next_gref;  /* list entry gref_list */
@@ -83,6 +91,7 @@ struct gntalloc_gref {
 	uint64_t file_index;         /* File offset for mmap() */
 	unsigned int users;          /* Use count - when zero, waiting on Xen */
 	grant_ref_t gref_id;         /* The grant reference number */
+	struct notify_info notify;   /* Unmap notification */
 };
 
 struct gntalloc_file_private_data {
@@ -164,6 +173,16 @@ undo:
 
 static void __del_gref(struct gntalloc_gref *gref)
 {
+	if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+		uint8_t *tmp = kmap(gref->page);
+		tmp[gref->notify.pgoff] = 0;
+		kunmap(gref->page);
+	}
+	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+		notify_remote_via_evtchn(gref->notify.event);
+
+	gref->notify.flags = 0;
+
 	if (gref->gref_id > 0) {
 		if (gnttab_query_foreign_access(gref->gref_id))
 			return;
@@ -349,6 +368,43 @@ dealloc_grant_out:
 	return rc;
 }
 
+static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data
*priv,
+		void __user *arg)
+{
+	struct ioctl_gntalloc_unmap_notify op;
+	struct gntalloc_gref *gref;
+	uint64_t index;
+	int pgoff;
+	int rc;
+
+	if (copy_from_user(&op, arg, sizeof(op)))
+		return -EFAULT;
+
+	index = op.index & ~(PAGE_SIZE - 1);
+	pgoff = op.index & (PAGE_SIZE - 1);
+
+	spin_lock(&gref_lock);
+
+	gref = find_grefs(priv, index, 1);
+	if (!gref) {
+		rc = -ENOENT;
+		goto unlock_out;
+	}
+
+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
+	gref->notify.flags = op.action;
+	gref->notify.pgoff = pgoff;
+	gref->notify.event = op.event_channel_port;
+	rc = 0;
+ unlock_out:
+	spin_unlock(&gref_lock);
+	return rc;
+}
+
 static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -361,6 +417,9 @@ static long gntalloc_ioctl(struct file *filp, unsigned int
cmd,
 	case IOCTL_GNTALLOC_DEALLOC_GREF:
 		return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
 
+	case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
+		return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
+
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 58fddf3..91706c2 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -37,6 +37,7 @@
 #include <xen/xen.h>
 #include <xen/grant_table.h>
 #include <xen/gntdev.h>
+#include <xen/events.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
@@ -63,6 +64,13 @@ struct gntdev_priv {
 	struct mmu_notifier mn;
 };
 
+struct unmap_notify {
+	int flags;
+	/* Address relative to the start of the grant_map */
+	int addr;
+	int event;
+};
+
 struct grant_map {
 	struct list_head next;
 	struct vm_area_struct *vma;
@@ -71,6 +79,7 @@ struct grant_map {
 	int flags;
 	int is_mapped;
 	atomic_t users;
+	struct unmap_notify notify;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
 	struct gnttab_unmap_grant_ref *unmap_ops;
@@ -165,7 +174,7 @@ static struct grant_map *gntdev_find_map_index(struct
gntdev_priv *priv,
 	list_for_each_entry(map, &priv->maps, next) {
 		if (map->index != index)
 			continue;
-		if (map->count != count)
+		if (count && map->count != count)
 			continue;
 		return map;
 	}
@@ -184,6 +193,10 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
+	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+		notify_remote_via_evtchn(map->notify.event);
+	}
+
 	if (map->pages) {
 		if (!use_ptemod)
 			unmap_grant_pages(map, 0, map->count);
@@ -273,6 +286,16 @@ static int unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 {
 	int i, err = 0;
 
+	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+		int pgno = (map->notify.addr >> PAGE_SHIFT);
+		if (pgno >= offset && pgno < offset + pages) {
+			uint8_t *tmp = kmap(map->pages[pgno]);
+			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+			kunmap(map->pages[pgno]);
+			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+		}
+	}
+
 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count,
offset, pages);
 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
 	if (err)
@@ -518,6 +541,39 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct
gntdev_priv *priv,
 	return 0;
 }
 
+static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
+{
+	struct ioctl_gntdev_unmap_notify op;
+	struct grant_map *map;
+	int rc;
+
+	if (copy_from_user(&op, u, sizeof(op)))
+		return -EFAULT;
+
+	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
+		return -EINVAL;
+
+	spin_lock(&priv->lock);
+
+	list_for_each_entry(map, &priv->maps, next) {
+		uint64_t begin = map->index << PAGE_SHIFT;
+		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
+		if (op.index >= begin && op.index < end)
+			goto found;
+	}
+	rc = -ENOENT;
+	goto unlock_out;
+
+ found:
+	map->notify.flags = op.action;
+	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
+	map->notify.event = op.event_channel_port;
+	rc = 0;
+ unlock_out:
+	spin_unlock(&priv->lock);
+	return rc;
+}
+
 static long gntdev_ioctl(struct file *flip,
 			 unsigned int cmd, unsigned long arg)
 {
@@ -534,6 +590,9 @@ static long gntdev_ioctl(struct file *flip,
 	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
 		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);
 
+	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
+		return gntdev_ioctl_notify(priv, ptr);
+
 	default:
 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
 		return -ENOIOCTLCMD;
diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
index bc3b85e..257cc8d 100644
--- a/include/xen/gntalloc.h
+++ b/include/xen/gntalloc.h
@@ -47,4 +47,32 @@ struct ioctl_gntalloc_dealloc_gref {
 	/* Number of references to unmap */
 	uint32_t count;
 };
+
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, ''G'', 7, sizeof(struct
ioctl_gntalloc_unmap_notify))
+struct ioctl_gntalloc_unmap_notify {
+	/* IN parameters */
+	/* Index of a byte in the page */
+	uint64_t index;
+	/* Action(s) to take on unmap */
+	uint32_t action;
+	/* Event channel to notify */
+	uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
index eb23f41..5d9b9b4 100644
--- a/include/xen/gntdev.h
+++ b/include/xen/gntdev.h
@@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants {
 	uint32_t count;
 };
 
+/*
+ * Sets up an unmap notification within the page, so that the other side can do
+ * cleanup if this side crashes. Required to implement cross-domain robust
+ * mutexes or close notification on communication channels.
+ *
+ * Each mapped page only supports one notification; multiple calls referring to
+ * the same page overwrite the previous notification. You must clear the
+ * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
+ * to occur.
+ */
+#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
+_IOC(_IOC_NONE, ''G'', 7, sizeof(struct
ioctl_gntdev_unmap_notify))
+struct ioctl_gntdev_unmap_notify {
+	/* IN parameters */
+	/* Index of a byte in the page */
+	uint64_t index;
+	/* Action(s) to take on unmap */
+	uint32_t action;
+	/* Event channel to notify */
+	uint32_t event_channel_port;
+};
+
+/* Clear (set to zero) the byte specified by index */
+#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
+/* Send an interrupt on the indicated event channel */
+#define UNMAP_NOTIFY_SEND_EVENT 0x2
+
 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-03  19:16 UTC
[Xen-devel] [PATCH] xen-gntdev: Fix memory leak when mmap fails
Fix for a memory leak introduced in patch 3. Doesn''t merge cleanly
across
patch 4; I can post a corrected version of both patches if that would be
preferred.
------------------------------------------------------------->8
The error path did not decrement the reference count of the grant structure.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   14 ++++++++------
 1 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 91706c2..6c754a9 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -649,15 +649,13 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 					  find_grant_ptes, map);
 		if (err) {
 			printk(KERN_WARNING "find_grant_ptes() failure.\n");
-			return err;
+			goto out_put_map;
 		}
 	}
 
 	err = map_grant_pages(map);
-	if (err) {
-		printk(KERN_WARNING "map_grant_pages() failure.\n");
-		return err;
-	}
+	if (err)
+		goto out_put_map;
 
 	map->is_mapped = 1;
 
@@ -666,7 +664,7 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
 				map->pages[i]);
 			if (err)
-				return err;
+				goto out_put_map;
 		}
 	}
 
@@ -675,6 +673,10 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 unlock_out:
 	spin_unlock(&priv->lock);
 	return err;
+
+out_put_map:
+	gntdev_put_map(map);
+	return err;
 }
 
 static const struct file_operations gntdev_fops = {
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-07  23:14 UTC
Re: [Xen-devel] [PATCH v6] Userspace grant communication
On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:> Changes since v5: > - Added a tested xen version to workaround in #4 > - Cleaned up variable names & structures > - Clarified some of the cleanup in gntalloc > - Removed copyright statement from public-domain files > > [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open > [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually > [PATCH 3/6] xen-gntdev: Add reference counting to maps > [PATCH 4/6] xen-gntdev: Support mapping in HVM domains > [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver > [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl > > Test/Demo code (also updated):I played with this (two PV domains) and I got two failures: 1). When forgetting to unmap a grant page and quitting the tool 2). when unmapping appropriately. Attached are the logs from the domain exporting the grants (domain 4), and the faulting (domain 3 and domain 5). This is using this patchset (devel/gntalloc.v6) and sticking it on top of 2.6.38-rc2 with a whole bunch of patches. To be specific: git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git #master _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-08  14:14 UTC
[Xen-devel] [PATCH] xen-gntdev: Fix unmap notify on PV domains
In paravirtualized guests, the struct page* for mappings is only a
placeholder, and cannot be used to access the granted memory. Use the
userspace mapping that we have set up in order to implement
UNMAP_NOTIFY_CLEAR_BYTE.
    
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 06de2c0..2b67f15 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -287,7 +287,12 @@ static int unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 
 	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 		int pgno = (map->notify.addr >> PAGE_SHIFT);
-		if (pgno >= offset && pgno < offset + pages) {
+		if (pgno >= offset && pgno < offset + pages &&
use_ptemod) {
+			void __user *tmp;
+			tmp = map->vma->vm_start + map->notify.addr;
+			copy_to_user(tmp, &err, 1);
+			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
 			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
 			kunmap(map->pages[pgno]);
@@ -296,7 +301,7 @@ static int unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 	}
 
 	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count,
offset, pages);
-	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages);
+	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset,
pages);
 	if (err)
 		return err;
 
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-08  21:49 UTC
Re: [Xen-devel] [PATCH v6] Userspace grant communication
On Mon, Feb 07, 2011 at 06:14:16PM -0500, Konrad Rzeszutek Wilk wrote:> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote: > > Changes since v5: > > - Added a tested xen version to workaround in #4 > > - Cleaned up variable names & structures > > - Clarified some of the cleanup in gntalloc > > - Removed copyright statement from public-domain files > > > > [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open > > [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually > > [PATCH 3/6] xen-gntdev: Add reference counting to maps > > [PATCH 4/6] xen-gntdev: Support mapping in HVM domains > > [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver > > [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl > > > > Test/Demo code (also updated): > > I played with this (two PV domains) and I got two failures:With your latest patch the issue described earlier disappear, but if I try to map an non-existed page (say I am confused) I get this: diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 75f8037..2dd2efa 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -17,7 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#undef DEBUG +#define DEBUG 1 #include <linux/module.h> #include <linux/kernel.h> @@ -304,12 +304,16 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); - if (err) + if (err) { + printk(KERN_WARNING "grant unmapping hypercall failed: %d\n", err); return err; - + } for (i = 0; i < pages; i++) { - if (map->unmap_ops[offset+i].status) + if (map->unmap_ops[offset+i].status) { + printk(KERN_WARNING "%lx is still active: %d\n", + offset+i, map->unmap_ops[offset+i].status); err = -EINVAL; + } map->unmap_ops[offset+i].handle = 0; } return err; (it differs from #master with this patch):> map 1 9[ 234.522158] priv ffff88001afac420, add 1 [ 234.522742] gntdev_print_maps: maps list (priv ffff88001afac420) [ 234.523061] index 0, count 1 [new] [ 234.523892] map 0+1 at 7fdee5890000 (pgoff 0) [ 234.524464] map 0+1 Could not map grant 1.9: Invalid argument (map failed)> map 1 8[ 236.313135] priv ffff88001afac420, add 1 [ 236.313739] gntdev_print_maps: maps list (priv ffff88001afac420) [ 236.314062] index 0, count 1 [ 236.314062] index 1, count 1 [new] [ 236.315521] map 1+1 at 7fdee588f000 (pgoff 1) [ 236.316120] map 1+1 Mapped grant 1.8 as 4096=0x7fdee588f000> map 1 10[ 242.833149] priv ffff88001afac420, add 1 [ 242.833813] gntdev_print_maps: maps list (priv ffff88001afac420) [ 242.834063] index 0, count 1 [ 242.834063] index 1, count 1 [ 242.834063] index 2, count 1 [new] [ 242.836265] map 2+1 at 7fdee588e000 (pgoff 2) [ 242.836866] map 2+1 Could not map grant 1.10: Invalid argument (map failed)> map 3 10[ 247.224151] priv ffff88001afac420, add 1 [ 247.224808] gntdev_print_maps: maps list (priv ffff88001afac420) [ 247.225062] index 0, count 1 [ 247.225062] index 1, count 1 [ 247.225062] index 2, count 1 [ 247.225062] index 3, count 1 [new][ 247.227637] map 3+1 at 7fdee588d000 (pgoff 3) [ 247.228180] map 3+1 Could not map grant 3.10: Invalid argument (map failed)> unmap 140366365671424Unmapped page at 0x7fa9975d4000> show00(140595310424064,4096): id 99c6b8b4567 n=11000000 b=adjakasdaadaskda END> ^C[ 252.265142] map 0+1 (7fdee588f000 7fdee5890000)[ 252.265724] map 0+1 [0+1] [ 252.296327] 0 is still active: -1 [ 252.296834] ------------[ cut here ]------------ [ 252.297310] WARNING: at /home/konrad/ssd/linux/drivers/xen/gntdev.c:396 mn_release+0x6e/0x90() [ 252.297310] Modules linked in: xen_evtchn fbcon tileblit font bitblit softcursor ttm drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma] [ 252.297310] Pid: 2313, comm: test_gnt Tainted: G W 2.6.38-rc4-xtt-00169-gb68565e-dirty #3 [ 252.297310] Call Trace: [ 252.297310] [<ffffffff81049580>] ? warn_slowpath_common+0x80/0x98 [ 252.297310] [<ffffffff810495ad>] ? warn_slowpath_null+0x15/0x17 [ 252.297310] [<ffffffff8126dcd6>] ? mn_release+0x6e/0x90 [ 252.297310] [<ffffffff810ec5f1>] ? __mmu_notifier_release+0x51/0x85 [ 252.297310] [<ffffffff810d9886>] ? exit_mmap+0x27/0x101 [ 252.297310] [<ffffffff810473c2>] ? mmput+0x30/0xd7 [ 252.297310] [<ffffffff8104b197>] ? exit_mm+0x129/0x136 [ 252.297310] [<ffffffff8104cc3f>] ? do_exit+0x208/0x7aa [ 252.297310] [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf [ 252.297310] [<ffffffff81006cd2>] ? check_events+0x12/0x20 [ 252.297310] [<ffffffff8127ad0a>] ? put_ldisc+0xac/0xb1 [ 252.297310] [<ffffffff8127adf9>] ? tty_ldisc_deref+0x9/0xb [ 252.297310] [<ffffffff81273a97>] ? tty_read+0x8c/0xc8 [ 252.297310] [<ffffffff8100a598>] ? do_notify_resume+0x27/0x5f [ 252.297310] [<ffffffff8100ac60>] ? int_signal+0x12/0x17 [ 252.297310] ---[ end trace e3791f053b755549 ]--- [ 252.297310] map 1+1 (7fdee588f000 7fdee5890000) [ 252.297310] map 1+1 [0+1] [ 252.315756] map 2+1 (7fdee588d000 7fdee588e000) [ 252.316341] map 2+1 [0+1] [ 252.346990] 0 is still active: -4 [ 252.347569] ------------[ cut here ]------------ [ 252.347976] WARNING: at /home/konrad/ssd/linux/drivers/xen/gntdev.c:396 mn_release+0x6e/0x90() [ 252.347976] Modules linked in: xen_evtchn fbcon tileblit font bitblit softcursor ttm drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma] [ 252.347976] Pid: 2313, comm: test_gnt Tainted: G W 2.6.38-rc4-xtt-00169-gb68565e-dirty #3 [ 252.347976] Call Trace: [ 252.347976] [<ffffffff81049580>] ? warn_slowpath_common+0x80/0x98 [ 252.347976] [<ffffffff810495ad>] ? warn_slowpath_null+0x15/0x17 [ 252.347976] [<ffffffff8126dcd6>] ? mn_release+0x6e/0x90 [ 252.347976] [<ffffffff810ec5f1>] ? __mmu_notifier_release+0x51/0x85 [ 252.347976] [<ffffffff810d9886>] ? exit_mmap+0x27/0x101 [ 252.347976] [<ffffffff810473c2>] ? mmput+0x30/0xd7 [ 252.347976] [<ffffffff8104b197>] ? exit_mm+0x129/0x136 [ 252.347976] [<ffffffff8104cc3f>] ? do_exit+0x208/0x7aa [ 252.347976] [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf [ 252.347976] [<ffffffff81006cd2>] ? check_events+0x12/0x20 [ 252.347976] [<ffffffff811fd1ff>] ? do_raw_spin_lock+0x6b/0x120 [ 252.347976] [<ffffffff8104d253>] ? do_group_exit+0x72/0x9a [ 252.347976] [<ffffffff81059fcd>] ? get_signal_to_deliver+0x360/0x37f [ 252.347976] [<ffffffff812785e6>] ? n_tty_read+0x6d5/0x7ad [ 252.347976] [<ffffffff81009f53>] ? do_signal+0x6d/0x68b [ 252.347976] [<ffffffff8103dbd8>] ? __wake_up+0x3f/0x48 [ 252.347976] [<ffffffff8127ad0a>] ? put_ldisc+0xac/0xb1 [ 252.347976] [<ffffffff8127adf9>] ? tty_ldisc_deref+0x9/0xb [ 252.347976] [<ffffffff81273a97>] ? tty_read+0x8c/0xc8 [ 252.347976] [<ffffffff8100a598>] ? do_notify_resume+0x27/0x5f [ 252.347976] [<ffffffff8100ac60>] ? int_signal+0x12/0x17 [ 252.347976] map 3+1 (7fdee588d000 7fdee588e000) [ 252.347976] map 3+1 [0+1] [ 252.396334] 0 is still active: -4 [ 252.396925] ------------[ cut here ]------------ [ 252.397301] WARNING: at /home/konrad/ssd/linux/drivers/xen/gntdev.c:396 mn_release+0x6e/0x90() [ 252.397301] Modules linked in: xen_evtchn fbcon tileblit font bitblit softcursor ttm drm_kms_helper xen_blkfront xen_netfront xen_fbfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_kbdfront xenfs [last unloaded: dump_dma] [ 252.397301] Pid: 2313, comm: test_gnt Tainted: G W 2.6.38-rc4-xtt-00169-gb68565e-dirty #3 [ 252.397301] Call Trace: [ 252.397301] [<ffffffff81049580>] ? warn_slowpath_common+0x80/0x98 [ 252.397301] [<ffffffff810495ad>] ? warn_slowpath_null+0x15/0x17 [ 252.397301] [<ffffffff8126dcd6>] ? mn_release+0x6e/0x90 [ 252.397301] [<ffffffff810ec5f1>] ? __mmu_notifier_release+0x51/0x85 [ 252.397301] [<ffffffff810d9886>] ? exit_mmap+0x27/0x101 [ 252.397301] [<ffffffff810473c2>] ? mmput+0x30/0xd7 [ 252.397301] [<ffffffff8104b197>] ? exit_mm+0x129/0x136 [ 252.397301] [<ffffffff8104cc3f>] ? do_exit+0x208/0x7aa [ 252.397301] [<ffffffff81006689>] ? xen_force_evtchn_callback+0xd/0xf [ 252.397301] [<ffffffff81006cd2>] ? check_events+0x12/0x20 [ 252.397301] [<ffffffff811fd1ff>] ? do_raw_spin_lock+0x6b/0x120 [ 252.397301] [<ffffffff8104d253>] ? do_group_exit+0x72/0x9a [ 252.397301] [<ffffffff81059fcd>] ? get_signal_to_deliver+0x360/0x37f [ 252.397301] [<ffffffff812785e6>] ? n_tty_read+0x6d5/0x7ad [ 252.397301] [<ffffffff81009f53>] ? do_signal+0x6d/0x68b [ 252.397301] [<ffffffff8103dbd8>] ? __wake_up+0x3f/0x48 [ 252.397301] [<ffffffff8127ad0a>] ? put_ldisc+0xac/0xb1 [ 252.397301] [<ffffffff8127adf9>] ? tty_ldisc_deref+0x9/0xb [ 252.397301] [<ffffffff81273a97>] ? tty_read+0x8c/0xc8 [ 252.397301] [<ffffffff8100a598>] ? do_notify_resume+0x27/0x5f [ 252.397301] [<ffffffff8100ac60>] ? int_signal+0x12/0x17 [ 252.397301] ---[ end trace e3791f053b75554b ]--- [ 252.414807] close ffff88001e35ea10 [ 252.415439] priv ffff88001afac420 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-08  22:48 UTC
Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
On Thu, Feb 03, 2011 at 12:19:03PM -0500, Daniel De Graaf wrote:> This allows a userspace application to allocate a shared page for > implementing inter-domain communication or device drivers. These > shared pages can be mapped using the gntdev device or by the kernel > in another domain. > > Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> > --- > drivers/xen/Kconfig | 8 + > drivers/xen/Makefile | 2 + > drivers/xen/gntalloc.c | 486 ++++++++++++++++++++++++++++++++++++++++++++++++ > include/xen/gntalloc.h | 50 +++++ > 4 files changed, 546 insertions(+), 0 deletions(-) > create mode 100644 drivers/xen/gntalloc.c > create mode 100644 include/xen/gntalloc.h > > diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig > index 19f1f3c..69d2cd5 100644 > --- a/drivers/xen/Kconfig > +++ b/drivers/xen/Kconfig > @@ -142,6 +142,14 @@ config XEN_GNTDEV > help > Allows userspace processes to use grants. > > +config XEN_GRANT_DEV_ALLOC > + tristate "User-space grant reference allocator driver" > + depends on XEN > + help > + Allows userspace processes to create pages with access granted > + to other domains. This can be used to implement frontend drivers > + or as part of an inter-domain shared memory channel. > + > config XEN_PLATFORM_PCI > tristate "xen platform pci device driver" > depends on XEN_PVHVM && PCI > diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile > index 5c3b031..09364b9 100644 > --- a/drivers/xen/Makefile > +++ b/drivers/xen/Makefile > @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o > obj-$(CONFIG_XEN_BALLOON) += balloon.o > obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o > obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o > +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o > obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ > obj-$(CONFIG_XENFS) += xenfs/ > obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o > @@ -19,5 +20,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o > > xen-evtchn-y := evtchn.o > xen-gntdev-y := gntdev.o > +xen-gntalloc-y := gntalloc.o > > xen-platform-pci-y := platform-pci.o > diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c > new file mode 100644 > index 0000000..d06bf2b > --- /dev/null > +++ b/drivers/xen/gntalloc.c > @@ -0,0 +1,486 @@ > +/****************************************************************************** > + * gntalloc.c > + * > + * Device for creating grant references (in user-space) that may be shared > + * with other domains. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + */ > + > +/* > + * This driver exists to allow userspace programs in Linux to allocate kernel > + * memory that will later be shared with another domain. Without this device, > + * Linux userspace programs cannot create grant references. > + * > + * How this stuff works: > + * X -> granting a page to Y > + * Y -> mapping the grant from X > + * > + * 1. X uses the gntalloc device to allocate a page of kernel memory, P. > + * 2. X creates an entry in the grant table that says domid(Y) can access P. > + * This is done without a hypercall unless the grant table needs expansion. > + * 3. X gives the grant reference identifier, GREF, to Y. > + * 4. Y maps the page, either directly into kernel memory for use in a backend > + * driver, or via a the gntdev device to map into the address space of an > + * application running in Y. This is the first point at which Xen does any > + * tracking of the page. > + * 5. A program in X mmap()s a segment of the gntalloc device that corresponds > + * to the shared page, and can now communicate with Y over the shared page. > + * > + * > + * NOTE TO USERSPACE LIBRARIES: > + * The grant allocation and mmap()ing are, naturally, two separate operations. > + * You set up the sharing by calling the create ioctl() and then the mmap(). > + * Teardown requires munmap() and either close() or ioctl(). > + * > + * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant > + * reference, this device can be used to consume kernel memory by leaving grant > + * references mapped by another domain when an application exits. Therefore, > + * there is a global limit on the number of pages that can be allocated. When > + * all references to the page are unmapped, it will be freed during the next > + * grant operation. > + */ > + > +#include <linux/atomic.h> > +#include <linux/module.h> > +#include <linux/miscdevice.h> > +#include <linux/kernel.h> > +#include <linux/init.h> > +#include <linux/slab.h> > +#include <linux/fs.h> > +#include <linux/device.h> > +#include <linux/mm.h> > +#include <linux/uaccess.h> > +#include <linux/types.h> > +#include <linux/list.h> > + > +#include <xen/xen.h> > +#include <xen/page.h> > +#include <xen/grant_table.h> > +#include <xen/gntalloc.h> > + > +static int limit = 1024; > +module_param(limit, int, 0644); > +MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " > + "the gntalloc device"); > + > +static LIST_HEAD(gref_list); > +static DEFINE_SPINLOCK(gref_lock); > +static int gref_size; > + > +/* Metadata on a grant reference. */ > +struct gntalloc_gref { > + struct list_head next_gref; /* list entry gref_list */ > + struct list_head next_file; /* list entry file->list, if open */ > + struct page *page; /* The shared page */ > + uint64_t file_index; /* File offset for mmap() */ > + unsigned int users; /* Use count - when zero, waiting on Xen */ > + grant_ref_t gref_id; /* The grant reference number */ > +}; > + > +struct gntalloc_file_private_data { > + struct list_head list; > + uint64_t index; > +}; > + > +static void __del_gref(struct gntalloc_gref *gref); > + > +static void do_cleanup(void) > +{ > + struct gntalloc_gref *gref, *n; > + list_for_each_entry_safe(gref, n, &gref_list, next_gref) { > + if (!gref->users) > + __del_gref(gref); > + } > +} > + > +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, > + uint32_t *gref_ids, struct gntalloc_file_private_data *priv) > +{ > + int i, rc, readonly; > + LIST_HEAD(queue_gref); > + LIST_HEAD(queue_file); > + struct gntalloc_gref *gref; > + > + readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); > + rc = -ENOMEM; > + for (i = 0; i < op->count; i++) { > + gref = kzalloc(sizeof(*gref), GFP_KERNEL); > + if (!gref) > + goto undo; > + list_add_tail(&gref->next_gref, &queue_gref); > + list_add_tail(&gref->next_file, &queue_file); > + gref->users = 1; > + gref->file_index = op->index + i * PAGE_SIZE; > + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); > + if (!gref->page) > + goto undo; > + > + /* Grant foreign access to the page. */ > + gref->gref_id = gnttab_grant_foreign_access(op->domid, > + pfn_to_mfn(page_to_pfn(gref->page)), readonly); > + if (gref->gref_id < 0) { > + rc = gref->gref_id; > + goto undo; > + } > + gref_ids[i] = gref->gref_id; > + } > + > + /* Add to gref lists. */ > + spin_lock(&gref_lock); > + list_splice_tail(&queue_gref, &gref_list); > + list_splice_tail(&queue_file, &priv->list); > + spin_unlock(&gref_lock); > + > + return 0; > + > +undo: > + spin_lock(&gref_lock); > + gref_size -= (op->count - i);So we decrease the gref_size by the count of the ones that we allocated..> + > + list_for_each_entry(gref, &queue_file, next_file) { > + /* __del_gref does not remove from queue_file */ > + __del_gref(gref);.. but __del_gref decreases the gref_size by one, so wouldn''t we decrease by too much?> + } > + > + /* It''s possible for the target domain to map the just-allocated grant > + * references by blindly guessing their IDs; if this is done, then > + * __del_gref will leave them in the queue_gref list. They need to be > + * added to the global list so that we can free them when they are no > + * longer referenced. > + */ > + if (unlikely(!list_empty(&queue_gref))) > + list_splice_tail(&queue_gref, &gref_list); > + spin_unlock(&gref_lock); > + return rc; > +} > + > +static void __del_gref(struct gntalloc_gref *gref) > +{ > + if (gref->gref_id > 0) { > + if (gnttab_query_foreign_access(gref->gref_id)) > + return; > + > + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) > + return; > + } > + > + gref_size--; > + list_del(&gref->next_gref); > + > + if (gref->page) > + __free_page(gref->page); > + > + kfree(gref); > +} > + > +/* finds contiguous grant references in a file, returns the first */ > +static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv, > + uint64_t index, uint32_t count) > +{ > + struct gntalloc_gref *rv = NULL, *gref; > + list_for_each_entry(gref, &priv->list, next_file) { > + if (gref->file_index == index && !rv) > + rv = gref; > + if (rv) { > + if (gref->file_index != index) > + return NULL; > + index += PAGE_SIZE; > + count--; > + if (count == 0) > + return rv; > + } > + } > + return NULL; > +} > + > +/* > + * ------------------------------------- > + * File operations. > + * ------------------------------------- > + */ > +static int gntalloc_open(struct inode *inode, struct file *filp) > +{ > + struct gntalloc_file_private_data *priv; > + > + priv = kzalloc(sizeof(*priv), GFP_KERNEL); > + if (!priv) > + goto out_nomem; > + INIT_LIST_HEAD(&priv->list); > + > + filp->private_data = priv; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + return 0; > + > +out_nomem: > + return -ENOMEM; > +} > + > +static int gntalloc_release(struct inode *inode, struct file *filp) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + struct gntalloc_gref *gref; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + spin_lock(&gref_lock); > + while (!list_empty(&priv->list)) { > + gref = list_entry(priv->list.next, > + struct gntalloc_gref, next_file); > + list_del(&gref->next_file); > + gref->users--; > + if (gref->users == 0) > + __del_gref(gref); > + } > + kfree(priv); > + spin_unlock(&gref_lock); > + > + return 0; > +} > + > +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, > + struct ioctl_gntalloc_alloc_gref __user *arg) > +{ > + int rc = 0; > + struct ioctl_gntalloc_alloc_gref op; > + uint32_t *gref_ids; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + if (copy_from_user(&op, arg, sizeof(op))) { > + rc = -EFAULT; > + goto out; > + } > + > + gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); > + if (!gref_ids) { > + rc = -ENOMEM; > + goto out; > + } > + > + spin_lock(&gref_lock); > + /* Clean up pages that were at zero (local) users but were still mapped > + * by remote domains. Since those pages count towards the limit that we > + * are about to enforce, removing them here is a good idea. > + */ > + do_cleanup(); > + if (gref_size + op.count > limit) { > + spin_unlock(&gref_lock); > + rc = -ENOSPC; > + goto out_free; > + } > + gref_size += op.count; > + op.index = priv->index; > + priv->index += op.count * PAGE_SIZE; > + spin_unlock(&gref_lock); > + > + rc = add_grefs(&op, gref_ids, priv); > + if (rc < 0) > + goto out_free;Should we cleanup up priv->index to its earlier value?> + > + /* Once we finish add_grefs, it is unsafe to touch the new reference, > + * since it is possible for a concurrent ioctl to remove it (by guessing > + * its index). If the userspace application doesn''t provide valid memory > + * to write the IDs to, then it will need to close the file in order to > + * release - which it will do by segfaulting when it tries to access the > + * IDs to close them. > + */ > + if (copy_to_user(arg, &op, sizeof(op))) { > + rc = -EFAULT; > + goto out_free; > + } > + if (copy_to_user(arg->gref_ids, gref_ids, > + sizeof(gref_ids[0]) * op.count)) { > + rc = -EFAULT; > + goto out_free; > + } > + > +out_free: > + kfree(gref_ids); > +out: > + return rc; > +} > + > +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, > + void __user *arg) > +{ > + int i, rc = 0; > + struct ioctl_gntalloc_dealloc_gref op; > + struct gntalloc_gref *gref, *n; > + > + pr_debug("%s: priv %p\n", __func__, priv); > + > + if (copy_from_user(&op, arg, sizeof(op))) { > + rc = -EFAULT; > + goto dealloc_grant_out; > + } > + > + spin_lock(&gref_lock); > + gref = find_grefs(priv, op.index, op.count); > + if (gref) { > + /* Remove from the file list only, and decrease reference count. > + * The later call to do_cleanup() will remove from gref_list and > + * free the memory if the pages aren''t mapped anywhere. > + */ > + for (i = 0; i < op.count; i++) { > + n = list_entry(gref->next_file.next, > + struct gntalloc_gref, next_file); > + list_del(&gref->next_file); > + gref->users--; > + gref = n; > + } > + } else { > + rc = -EINVAL; > + } > + > + do_cleanup(); > + > + spin_unlock(&gref_lock); > +dealloc_grant_out: > + return rc; > +} > + > +static long gntalloc_ioctl(struct file *filp, unsigned int cmd, > + unsigned long arg) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + > + switch (cmd) { > + case IOCTL_GNTALLOC_ALLOC_GREF: > + return gntalloc_ioctl_alloc(priv, (void __user *)arg); > + > + case IOCTL_GNTALLOC_DEALLOC_GREF: > + return gntalloc_ioctl_dealloc(priv, (void __user *)arg); > + > + default: > + return -ENOIOCTLCMD; > + } > + > + return 0; > +} > + > +static void gntalloc_vma_close(struct vm_area_struct *vma) > +{ > + struct gntalloc_gref *gref = vma->vm_private_data; > + if (!gref) > + return; > + > + spin_lock(&gref_lock); > + gref->users--; > + if (gref->users == 0) > + __del_gref(gref); > + spin_unlock(&gref_lock); > +} > + > +static struct vm_operations_struct gntalloc_vmops = { > + .close = gntalloc_vma_close, > +}; > + > +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) > +{ > + struct gntalloc_file_private_data *priv = filp->private_data; > + struct gntalloc_gref *gref; > + int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > + int rv, i; > + > + pr_debug("%s: priv %p, page %lu+%d\n", __func__, > + priv, vma->vm_pgoff, count); > + > + if (!(vma->vm_flags & VM_SHARED)) { > + printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); > + return -EINVAL; > + } > + > + spin_lock(&gref_lock); > + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); > + if (gref == NULL) { > + rv = -ENOENT; > + pr_debug("%s: Could not find grant reference", > + __func__); > + goto out_unlock; > + } > + > + vma->vm_private_data = gref; > + > + vma->vm_flags |= VM_RESERVED; > + vma->vm_flags |= VM_DONTCOPY; > + vma->vm_flags |= VM_PFNMAP | VM_PFN_AT_MMAP; > + > + vma->vm_ops = &gntalloc_vmops; > + > + for (i = 0; i < count; i++) { > + gref->users++; > + rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, > + gref->page); > + if (rv) > + goto out_unlock; > + > + gref = list_entry(gref->next_file.next, > + struct gntalloc_gref, next_file); > + } > + rv = 0; > + > +out_unlock: > + spin_unlock(&gref_lock); > + return rv; > +} > + > +static const struct file_operations gntalloc_fops = { > + .owner = THIS_MODULE, > + .open = gntalloc_open, > + .release = gntalloc_release, > + .unlocked_ioctl = gntalloc_ioctl, > + .mmap = gntalloc_mmap > +}; > + > +/* > + * ------------------------------------- > + * Module creation/destruction. > + * ------------------------------------- > + */ > +static struct miscdevice gntalloc_miscdev = { > + .minor = MISC_DYNAMIC_MINOR, > + .name = "xen/gntalloc", > + .fops = &gntalloc_fops, > +}; > + > +static int __init gntalloc_init(void) > +{ > + int err; > + > + if (!xen_domain()) > + return -ENODEV; > + > + err = misc_register(&gntalloc_miscdev); > + if (err != 0) { > + printk(KERN_ERR "Could not register misc gntalloc device\n"); > + return err; > + } > + > + pr_debug("Created grant allocation device at %d,%d\n", > + MISC_MAJOR, gntalloc_miscdev.minor); > + > + return 0; > +} > + > +static void __exit gntalloc_exit(void) > +{ > + misc_deregister(&gntalloc_miscdev); > +} > + > +module_init(gntalloc_init); > +module_exit(gntalloc_exit); > + > +MODULE_LICENSE("GPL"); > +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, " > + "Daniel De Graaf <dgdegra@tycho.nsa.gov>"); > +MODULE_DESCRIPTION("User-space grant reference allocator driver"); > diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h > new file mode 100644 > index 0000000..bc3b85e > --- /dev/null > +++ b/include/xen/gntalloc.h > @@ -0,0 +1,50 @@ > +/****************************************************************************** > + * gntalloc.h > + * > + * Interface to /dev/xen/gntalloc. > + * > + * Author: Daniel De Graaf <dgdegra@tycho.nsa.gov> > + * > + * This file is in the public domain. > + */ > + > +#ifndef __LINUX_PUBLIC_GNTALLOC_H__ > +#define __LINUX_PUBLIC_GNTALLOC_H__ > + > +/* > + * Allocates a new page and creates a new grant reference. > + */ > +#define IOCTL_GNTALLOC_ALLOC_GREF \ > +_IOC(_IOC_NONE, ''G'', 5, sizeof(struct ioctl_gntalloc_alloc_gref)) > +struct ioctl_gntalloc_alloc_gref { > + /* IN parameters */ > + /* The ID of the domain to be given access to the grants. */ > + uint16_t domid; > + /* Flags for this mapping */ > + uint16_t flags; > + /* Number of pages to map */ > + uint32_t count; > + /* OUT parameters */ > + /* The offset to be used on a subsequent call to mmap(). */ > + uint64_t index; > + /* The grant references of the newly created grant, one per page */ > + /* Variable size, depending on count */ > + uint32_t gref_ids[1]; > +}; > + > +#define GNTALLOC_FLAG_WRITABLE 1 > + > +/* > + * Deallocates the grant reference, allowing the associated page to be freed if > + * no other domains are using it. > + */ > +#define IOCTL_GNTALLOC_DEALLOC_GREF \ > +_IOC(_IOC_NONE, ''G'', 6, sizeof(struct ioctl_gntalloc_dealloc_gref)) > +struct ioctl_gntalloc_dealloc_gref { > + /* IN parameters */ > + /* The offset returned in the map operation */ > + uint64_t index; > + /* Number of references to unmap */ > + uint32_t count; > +}; > +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ > -- > 1.7.3.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-08  22:58 UTC
[Xen-devel] Re: [PATCH] xen-gntdev: Fix unmap notify on PV domains
On Tue, Feb 08, 2011 at 09:14:06AM -0500, Daniel De Graaf wrote:> In paravirtualized guests, the struct page* for mappings is only a > placeholder, and cannot be used to access the granted memory. Use the > userspace mapping that we have set up in order to implement > UNMAP_NOTIFY_CLEAR_BYTE. > > Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index 06de2c0..2b67f15 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -287,7 +287,12 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) > > if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { > int pgno = (map->notify.addr >> PAGE_SHIFT); > - if (pgno >= offset && pgno < offset + pages) { > + if (pgno >= offset && pgno < offset + pages && use_ptemod) { > + void __user *tmp; > + tmp = map->vma->vm_start + map->notify.addr; > + copy_to_user(tmp, &err, 1);The compiler really hates that. You could use a uninitialized_value macro to ignore it.> + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; > + } else if (pgno >= offset && pgno < offset + pages) { > uint8_t *tmp = kmap(map->pages[pgno]); > tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; > kunmap(map->pages[pgno]); > @@ -296,7 +301,7 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) > } > > pr_debug("map %d+%d [%d+%d]\n", map->index, map->count, offset, pages); > - err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages, pages); > + err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); > if (err) > return err; >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  18:52 UTC
Re: [Xen-devel] [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver
On 02/08/2011 05:48 PM, Konrad Rzeszutek Wilk wrote:> On Thu, Feb 03, 2011 at 12:19:03PM -0500, Daniel De Graaf wrote: >> This allows a userspace application to allocate a shared page for >> implementing inter-domain communication or device drivers. These >> shared pages can be mapped using the gntdev device or by the kernel >> in another domain. >> >> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> >> --- >> drivers/xen/Kconfig | 8 + >> drivers/xen/Makefile | 2 + >> drivers/xen/gntalloc.c | 486 ++++++++++++++++++++++++++++++++++++++++++++++++ >> include/xen/gntalloc.h | 50 +++++ >> 4 files changed, 546 insertions(+), 0 deletions(-) >> create mode 100644 drivers/xen/gntalloc.c >> create mode 100644 include/xen/gntalloc.h >>[snip]>> +static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, >> + uint32_t *gref_ids, struct gntalloc_file_private_data *priv) >> +{ >> + int i, rc, readonly; >> + LIST_HEAD(queue_gref); >> + LIST_HEAD(queue_file); >> + struct gntalloc_gref *gref; >> + >> + readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE); >> + rc = -ENOMEM; >> + for (i = 0; i < op->count; i++) { >> + gref = kzalloc(sizeof(*gref), GFP_KERNEL); >> + if (!gref) >> + goto undo; >> + list_add_tail(&gref->next_gref, &queue_gref); >> + list_add_tail(&gref->next_file, &queue_file); >> + gref->users = 1; >> + gref->file_index = op->index + i * PAGE_SIZE; >> + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO); >> + if (!gref->page) >> + goto undo; >> + >> + /* Grant foreign access to the page. */ >> + gref->gref_id = gnttab_grant_foreign_access(op->domid, >> + pfn_to_mfn(page_to_pfn(gref->page)), readonly); >> + if (gref->gref_id < 0) { >> + rc = gref->gref_id; >> + goto undo; >> + } >> + gref_ids[i] = gref->gref_id; >> + } >> + >> + /* Add to gref lists. */ >> + spin_lock(&gref_lock); >> + list_splice_tail(&queue_gref, &gref_list); >> + list_splice_tail(&queue_file, &priv->list); >> + spin_unlock(&gref_lock); >> + >> + return 0; >> + >> +undo: >> + spin_lock(&gref_lock); >> + gref_size -= (op->count - i); > > So we decrease the gref_size by the count of the ones that we > allocated..No, (op->count - i) is the number that we haven''t yet allocated. Those aren''t in queue_file, so __del_gref is never called on them.>> + >> + list_for_each_entry(gref, &queue_file, next_file) { >> + /* __del_gref does not remove from queue_file */ >> + __del_gref(gref); > > .. but __del_gref decreases the gref_size by one, so wouldn''t > we decrease by too much?[snip]>> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, >> + struct ioctl_gntalloc_alloc_gref __user *arg) >> +{ >> + int rc = 0; >> + struct ioctl_gntalloc_alloc_gref op; >> + uint32_t *gref_ids; >> + >> + pr_debug("%s: priv %p\n", __func__, priv); >> + >> + if (copy_from_user(&op, arg, sizeof(op))) { >> + rc = -EFAULT; >> + goto out; >> + } >> + >> + gref_ids = kzalloc(sizeof(gref_ids[0]) * op.count, GFP_TEMPORARY); >> + if (!gref_ids) { >> + rc = -ENOMEM; >> + goto out; >> + } >> + >> + spin_lock(&gref_lock); >> + /* Clean up pages that were at zero (local) users but were still mapped >> + * by remote domains. Since those pages count towards the limit that we >> + * are about to enforce, removing them here is a good idea. >> + */ >> + do_cleanup(); >> + if (gref_size + op.count > limit) { >> + spin_unlock(&gref_lock); >> + rc = -ENOSPC; >> + goto out_free; >> + } >> + gref_size += op.count; >> + op.index = priv->index; >> + priv->index += op.count * PAGE_SIZE; >> + spin_unlock(&gref_lock); >> + >> + rc = add_grefs(&op, gref_ids, priv); >> + if (rc < 0) >> + goto out_free; > > Should we cleanup up priv->index to its earlier value?We could, but this could also introduce a possible race if two threads were running the ioctl at the same time. There''s no harm in letting the index increase, since it is 64-bit so doesn''t have overflow issues. -- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  20:11 UTC
[Xen-devel] [PATCH] xen-gntdev: Use map->vma for checking map validity
The is_mapped flag used to be set at the completion of the map operation,
but was not checked in all error paths. Use map->vma instead, which will
now be cleared if the initial grant mapping fails.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |    8 ++------
 1 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 75f8037..4ca4262 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -77,7 +77,6 @@ struct grant_map {
 	int index;
 	int count;
 	int flags;
-	int is_mapped;
 	atomic_t users;
 	struct unmap_notify notify;
 	struct ioctl_gntdev_grant_ref *grants;
@@ -322,7 +321,6 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
 	struct grant_map *map = vma->vm_private_data;
 
 	pr_debug("close %p\n", vma);
-	map->is_mapped = 0;
 	map->vma = NULL;
 	vma->vm_private_data = NULL;
 	gntdev_put_map(map);
@@ -347,8 +345,6 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
 	list_for_each_entry(map, &priv->maps, next) {
 		if (!map->vma)
 			continue;
-		if (!map->is_mapped)
-			continue;
 		if (map->vma->vm_start >= end)
 			continue;
 		if (map->vma->vm_end <= start)
@@ -663,8 +659,6 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 	if (err)
 		goto out_put_map;
 
-	map->is_mapped = 1;
-
 	if (!use_ptemod) {
 		for (i = 0; i < count; i++) {
 			err = vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE,
@@ -681,6 +675,8 @@ unlock_out:
 	return err;
 
 out_put_map:
+	if (use_ptemod)
+		map->vma = NULL;
 	gntdev_put_map(map);
 	return err;
 }
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  20:12 UTC
[Xen-devel] [PATCH] xen-gntdev: Avoid unmapping ranges twice
In paravirtualized domains, mn_invl_page or mn_invl_range_start can
unmap a segment of a mapped region without unmapping all pages. When
the region is later released, the pages will be unmapped twice, leading
to an incorrect -EINVAL return.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   33 +++++++++++++++++++++++++++++++--
 1 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 4ca4262..4687cd5 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -282,7 +282,7 @@ static int map_grant_pages(struct grant_map *map)
 	return err;
 }
 
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
 	int i, err = 0;
 
@@ -301,7 +301,6 @@ static int unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 		}
 	}
 
-	pr_debug("map %d+%d [%d+%d]\n", map->index, map->count,
offset, pages);
 	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset,
pages);
 	if (err)
 		return err;
@@ -314,6 +313,36 @@ static int unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 	return err;
 }
 
+static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+{
+	int range, err = 0;
+
+	pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count,
offset, pages);
+
+	/* It is possible the requested range will have a "hole" where we
+	 * already unmapped some of the grants. Only unmap valid ranges.
+	 */
+	while (pages && !err) {
+		while (pages && !map->unmap_ops[offset].handle) {
+			offset++;
+			pages--;
+		}
+		range = 0;
+		while (range < pages) {
+			if (!map->unmap_ops[offset+range].handle) {
+				range--;
+				break;
+			}
+			range++;
+		}
+		err = __unmap_grant_pages(map, offset, range);
+		offset += range;
+		pages -= range;
+	}
+
+	return err;
+}
+
 /* ------------------------------------------------------------------ */
 
 static void gntdev_vma_close(struct vm_area_struct *vma)
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  20:33 UTC
[Xen-devel] [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 4687cd5..00e4644 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 		if (pgno >= offset && pgno < offset + pages &&
use_ptemod) {
 			void __user *tmp;
 			tmp = map->vma->vm_start + map->notify.addr;
-			copy_to_user(tmp, &err, 1);
+			WARN_ON(copy_to_user(tmp, &err, 1));
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
@@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv,
void __user *u)
 	goto unlock_out;
 
  found:
+	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
+			(op.flags & GNTMAP_readonly)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
 	map->notify.flags = op.action;
 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 	map->notify.event = op.event_channel_port;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  21:09 UTC
[Xen-devel] [PATCH v2] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 4687cd5..00e4644 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 		if (pgno >= offset && pgno < offset + pages &&
use_ptemod) {
 			void __user *tmp;
 			tmp = map->vma->vm_start + map->notify.addr;
-			copy_to_user(tmp, &err, 1);
+			WARN_ON(copy_to_user(tmp, &err, 1));
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
@@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv,
void __user *u)
 	goto unlock_out;
 
  found:
+	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
+			(map->flags & GNTMAP_readonly)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
 	map->notify.flags = op.action;
 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 	map->notify.event = op.event_channel_port;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  21:11 UTC
[Xen-devel] [PATCH] xen-gntdev: Avoid double-mapping memory
If an already-mapped area of the device was mapped into userspace a
second time, a hypercall was incorrectly made to remap the memory
again. Avoid the hypercall on later mmap calls, and fail the mmap call
if a writable mapping is attempted on a read-only range.
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   15 ++++++++++++---
 1 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 00e4644..e9d9180 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -258,6 +258,9 @@ static int map_grant_pages(struct grant_map *map)
 	phys_addr_t addr;
 
 	if (!use_ptemod) {
+		/* Note: it could already be mapped */
+		if (map->map_ops[0].handle)
+			return 0;
 		for (i = 0; i < map->count; i++) {
 			addr = (phys_addr_t)
 				pfn_to_kaddr(page_to_pfn(map->pages[i]));
@@ -674,9 +677,15 @@ static int gntdev_mmap(struct file *flip, struct
vm_area_struct *vma)
 	if (use_ptemod)
 		map->vma = vma;
 
-	map->flags = GNTMAP_host_map;
-	if (!(vma->vm_flags & VM_WRITE))
-		map->flags |= GNTMAP_readonly;
+	if (map->flags) {
+		if ((vma->vm_flags & VM_WRITE) &&
+				(map->flags & GNTMAP_readonly))
+			return -EINVAL;
+	} else {
+		map->flags = GNTMAP_host_map;
+		if (!(vma->vm_flags & VM_WRITE))
+			map->flags |= GNTMAP_readonly;
+	}
 
 	spin_unlock(&priv->lock);
 
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Jeremy Fitzhardinge
2011-Feb-09  22:22 UTC
[Xen-devel] Re: [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
On 02/09/2011 12:33 PM, Daniel De Graaf wrote:> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index 4687cd5..00e4644 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) > if (pgno >= offset && pgno < offset + pages && use_ptemod) { > void __user *tmp; > tmp = map->vma->vm_start + map->notify.addr; > - copy_to_user(tmp, &err, 1); > + WARN_ON(copy_to_user(tmp, &err, 1));Please don''t put side-effecty predicates in WARN_ON/BUG_ON. There''s no useful report we can return? J> map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; > } else if (pgno >= offset && pgno < offset + pages) { > uint8_t *tmp = kmap(map->pages[pgno]); > @@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) > goto unlock_out; > > found: > + if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) && > + (op.flags & GNTMAP_readonly)) { > + rc = -EINVAL; > + goto unlock_out; > + } > + > map->notify.flags = op.action; > map->notify.addr = op.index - (map->index << PAGE_SHIFT); > map->notify.event = op.event_channel_port; >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  23:11 UTC
[Xen-devel] Re: [PATCH] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
On 02/09/2011 05:22 PM, Jeremy Fitzhardinge wrote:> On 02/09/2011 12:33 PM, Daniel De Graaf wrote: >> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov> >> >> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c >> index 4687cd5..00e4644 100644 >> --- a/drivers/xen/gntdev.c >> +++ b/drivers/xen/gntdev.c >> @@ -291,7 +291,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) >> if (pgno >= offset && pgno < offset + pages && use_ptemod) { >> void __user *tmp; >> tmp = map->vma->vm_start + map->notify.addr; >> - copy_to_user(tmp, &err, 1); >> + WARN_ON(copy_to_user(tmp, &err, 1)); > > Please don''t put side-effecty predicates in WARN_ON/BUG_ON. > > There''s no useful report we can return? > > JThis code is called when the application may be crashing or exiting, so there is not guaranteed to be a return path to the program. The change in the second part of this patch should prevent the copy_to_user from failing. Placing the call inside WARN_ON is clearly a bad idea. Will resend a more sane version of this patch with a comment explaining why we don''t return.> >> map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; >> } else if (pgno >= offset && pgno < offset + pages) { >> uint8_t *tmp = kmap(map->pages[pgno]); >> @@ -596,6 +596,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) >> goto unlock_out; >> >> found: >> + if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) && >> + (op.flags & GNTMAP_readonly)) { >> + rc = -EINVAL; >> + goto unlock_out; >> + } >> + >> map->notify.flags = op.action; >> map->notify.addr = op.index - (map->index << PAGE_SHIFT); >> map->notify.event = op.event_channel_port; >> >-- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-09  23:15 UTC
[Xen-devel] [PATCH v3] xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings
Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 drivers/xen/gntdev.c |   10 +++++++++-
 1 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2c4cc94..2a4733c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -294,7 +294,9 @@ static int __unmap_grant_pages(struct grant_map *map, int
offset, int pages)
 		if (pgno >= offset && pgno < offset + pages &&
use_ptemod) {
 			void __user *tmp;
 			tmp = map->vma->vm_start + map->notify.addr;
-			copy_to_user(tmp, &err, 1);
+			err = copy_to_user(tmp, &err, 1);
+			if (err)
+				return err;
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		} else if (pgno >= offset && pgno < offset + pages) {
 			uint8_t *tmp = kmap(map->pages[pgno]);
@@ -599,6 +601,12 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv,
void __user *u)
 	goto unlock_out;
 
  found:
+	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
+			(map->flags & GNTMAP_readonly)) {
+		rc = -EINVAL;
+		goto unlock_out;
+	}
+
 	map->notify.flags = op.action;
 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 	map->notify.event = op.event_channel_port;
-- 
1.7.3.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-14  15:37 UTC
Re: [Xen-devel] [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h > index bc3b85e..257cc8d 100644 > --- a/include/xen/gntalloc.h > +++ b/include/xen/gntalloc.h > @@ -47,4 +47,32 @@ struct ioctl_gntalloc_dealloc_gref { > /* Number of references to unmap */ > uint32_t count; > }; > + > +/* > + * Sets up an unmap notification within the page, so that the other side can do > + * cleanup if this side crashes. Required to implement cross-domain robust > + * mutexes or close notification on communication channels. > + * > + * Each mapped page only supports one notification; multiple calls referring to > + * the same page overwrite the previous notification. You must clear the > + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it > + * to occur. > + */ > +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \ > +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntalloc_unmap_notify)) > +struct ioctl_gntalloc_unmap_notify { > + /* IN parameters */ > + /* Index of a byte in the page */ > + uint64_t index;That isn''t actually the whole truth. It is the index within the page and also the "offset used on a subsequent call to mmap()" (ioctl_gntdev_map_grant_ref) It might make sense to change the description to: "offset of the mmap region and the index within the page" perhaps?> + /* Action(s) to take on unmap */ > + uint32_t action; > + /* Event channel to notify */ > + uint32_t event_channel_port; > +}; > + > +/* Clear (set to zero) the byte specified by index */ > +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 > +/* Send an interrupt on the indicated event channel */ > +#define UNMAP_NOTIFY_SEND_EVENT 0x2 > + > #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */ > diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h > index eb23f41..5d9b9b4 100644 > --- a/include/xen/gntdev.h > +++ b/include/xen/gntdev.h > @@ -116,4 +116,31 @@ struct ioctl_gntdev_set_max_grants { > uint32_t count; > }; > > +/* > + * Sets up an unmap notification within the page, so that the other side can do > + * cleanup if this side crashes. Required to implement cross-domain robust > + * mutexes or close notification on communication channels. > + * > + * Each mapped page only supports one notification; multiple calls referring to > + * the same page overwrite the previous notification. You must clear the > + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it > + * to occur. > + */ > +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ > +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntdev_unmap_notify)) > +struct ioctl_gntdev_unmap_notify { > + /* IN parameters */ > + /* Index of a byte in the page */ > + uint64_t index;Ditto.> + /* Action(s) to take on unmap */ > + uint32_t action; > + /* Event channel to notify */ > + uint32_t event_channel_port; > +}; > + > +/* Clear (set to zero) the byte specified by index */ > +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 > +/* Send an interrupt on the indicated event channel */ > +#define UNMAP_NOTIFY_SEND_EVENT 0x2 > + > #endif /* __LINUX_PUBLIC_GNTDEV_H__ */ > -- > 1.7.3.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-14  15:51 UTC
Re: [Xen-devel] [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages); > + > /* ------------------------------------------------------------------ */ > > static void gntdev_print_maps(struct gntdev_priv *priv, > @@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map) > > atomic_sub(map->count, &pages_mapped); > > - if (map->pages) > + if (map->pages) { > + if (!use_ptemod) > + unmap_grant_pages(map, 0, map->count);In the past (before this patch) the unmap_grant_pages would be called on the .ioctl, .release, and .close (on VMA). This adds it now also on the mmu_notifier_ops paths. Why? _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-14  16:14 UTC
[Xen-devel] Re: [PATCH v6] Userspace grant communication
On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote:> Changes since v5: > - Added a tested xen version to workaround in #4 > - Cleaned up variable names & structures > - Clarified some of the cleanup in gntalloc > - Removed copyright statement from public-domain files > > [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open > [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually > [PATCH 3/6] xen-gntdev: Add reference counting to maps > [PATCH 4/6] xen-gntdev: Support mapping in HVM domains > [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver > [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctlHey Daniel, I took a look at the patchset and then the bug-fixes: Daniel De Graaf (12): xen-gntdev: Change page limit to be global instead of per-open xen-gntdev: Use find_vma rather than iterating our vma list manually xen-gntdev: Add reference counting to maps xen-gntdev: Support mapping in HVM domains xen-gntalloc: Userspace grant allocation driver xen/gntalloc,gntdev: Add unmap notify ioctl xen-gntdev: Fix memory leak when mmap fails xen-gntdev: Fix unmap notify on PV domains xen-gntdev: Use map->vma for checking map validity xen-gntdev: Avoid unmapping ranges twice xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings xen-gntdev: Avoid double-mapping memory And besides the two questions I posted today they look OK to me. However I have on question that I think points to a bug. Say that I call GNTDEV_MAP_GRANT_REF three times. The first time I provide a count of 4, then 1, and then once more 1. The first call would end up with priv having: priv-map[0] => map.count=4, map.user=1, map.index=0. We return op.index as 0. The next call: priv-map[0] => map.count=4, map.user=1, map.index=0. priv-map[1] => map.count=1, map.user=1, map.index=5 (gntdev_add_map ends up adding the index and the count from the previous map to it). We return op.index as 20480. The last call ends up with priv-map[0] => map.count=4, map.user=1, map.index=0. priv-map[1] => map.count=1, map.user=1, map.index=5 priv-map[2] => map.count=1, map.user=1, map.index=0. And we return op.index as = 0. It looks as gntdev_add_map ends does not do anything to the map.index if the "if (add->index + add->count < map->index)" comes out true, and we end up with op.index=0. Which naturally is incorrect as that is associated with grant_map that has four entries! I hadn''t yet tried to modify the nice test-case program you provided to see if this is can happen in practice, but it sure looks like it could? _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-14  16:38 UTC
Re: [Xen-devel] Re: [PATCH v6] Userspace grant communication
On Mon, Feb 14, 2011 at 11:14:03AM -0500, Konrad Rzeszutek Wilk wrote:> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote: > > Changes since v5: > > - Added a tested xen version to workaround in #4 > > - Cleaned up variable names & structures > > - Clarified some of the cleanup in gntalloc > > - Removed copyright statement from public-domain files > > > > [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open > > [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually > > [PATCH 3/6] xen-gntdev: Add reference counting to maps > > [PATCH 4/6] xen-gntdev: Support mapping in HVM domains > > [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver > > [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl > > Hey Daniel, > > I took a look at the patchset and then the bug-fixes: > > Daniel De Graaf (12): > xen-gntdev: Change page limit to be global instead of per-open > xen-gntdev: Use find_vma rather than iterating our vma list manually > xen-gntdev: Add reference counting to maps > xen-gntdev: Support mapping in HVM domains > xen-gntalloc: Userspace grant allocation driver > xen/gntalloc,gntdev: Add unmap notify ioctl > xen-gntdev: Fix memory leak when mmap fails > xen-gntdev: Fix unmap notify on PV domains > xen-gntdev: Use map->vma for checking map validity > xen-gntdev: Avoid unmapping ranges twice > xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings > xen-gntdev: Avoid double-mapping memory > > > And besides the two questions I posted today they look OK to me. HoweverOh, and Jeremy''s comment about the WARN_ON on the copy_to_user.. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-14  17:43 UTC
Re: [Xen-devel] [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
On 02/14/2011 10:51 AM, Konrad Rzeszutek Wilk wrote:>> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages); >> + >> /* ------------------------------------------------------------------ */ >> >> static void gntdev_print_maps(struct gntdev_priv *priv, >> @@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map) >> >> atomic_sub(map->count, &pages_mapped); >> >> - if (map->pages) >> + if (map->pages) { >> + if (!use_ptemod) >> + unmap_grant_pages(map, 0, map->count); > > In the past (before this patch) the unmap_grant_pages would be called > on the .ioctl, .release, and .close (on VMA). This adds it now also > on the mmu_notifier_ops paths. Why? >This does not actually add the unmap on the mmu_notifier path. The MMU notifier is used only if use_ptemod is true, and unmap_grant_pages is only called when use_ptemod is false. The HVM path for map and unmap is slightly different: HVM keeps the pages mapped until the area is deleted, while the PV case (use_ptemod being true) must unmap them when userspace unmaps the range. In the normal use case, this makes no difference to users since unmap time is deletion time. -- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-14  17:55 UTC
[Xen-devel] Re: [PATCH v6] Userspace grant communication
On 02/14/2011 11:14 AM, Konrad Rzeszutek Wilk wrote:> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote: >> Changes since v5: >> - Added a tested xen version to workaround in #4 >> - Cleaned up variable names & structures >> - Clarified some of the cleanup in gntalloc >> - Removed copyright statement from public-domain files >> >> [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open >> [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually >> [PATCH 3/6] xen-gntdev: Add reference counting to maps >> [PATCH 4/6] xen-gntdev: Support mapping in HVM domains >> [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver >> [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl > > Hey Daniel, > > I took a look at the patchset and then the bug-fixes: > > Daniel De Graaf (12): > xen-gntdev: Change page limit to be global instead of per-open > xen-gntdev: Use find_vma rather than iterating our vma list manually > xen-gntdev: Add reference counting to maps > xen-gntdev: Support mapping in HVM domains > xen-gntalloc: Userspace grant allocation driver > xen/gntalloc,gntdev: Add unmap notify ioctl > xen-gntdev: Fix memory leak when mmap fails > xen-gntdev: Fix unmap notify on PV domains > xen-gntdev: Use map->vma for checking map validity > xen-gntdev: Avoid unmapping ranges twice > xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings > xen-gntdev: Avoid double-mapping memory > > > And besides the two questions I posted today they look OK to me. However > I have on question that I think points to a bug. > > Say that I call GNTDEV_MAP_GRANT_REF three times. The first time I provide > a count of 4, then 1, and then once more 1. > > The first call would end up with priv having: > > priv-map[0] => map.count=4, map.user=1, map.index=0. We return op.index as 0. > > The next call: > > priv-map[0] => map.count=4, map.user=1, map.index=0. > priv-map[1] => map.count=1, map.user=1, map.index=5 (gntdev_add_map > ends up adding the index and the count from the previous map to it). We return op.index as 20480. >I think this will come out with map.index=4, op.index=8192, since the only entry in priv->maps has map->index = 0 and map->count = 4.> The last call ends up with > priv-map[0] => map.count=4, map.user=1, map.index=0. > priv-map[1] => map.count=1, map.user=1, map.index=5 > priv-map[2] => map.count=1, map.user=1, map.index=0. And we return > op.index as = 0. >How do we return that? The "goto done" branch should not be taken unless there is a hole in the existing priv->maps list created by a previous deletion. I see add->index starting at 0, then set to 4 and then 5, its final value.> It looks as gntdev_add_map ends does not do anything to the > map.index if the "if (add->index + add->count < map->index)" comes > out true, and we end up with op.index=0. Which naturally is > incorrect as that is associated with grant_map that has four entries! > > I hadn''t yet tried to modify the nice test-case program you provided > to see if this is can happen in practice, but it sure looks like it could?This code wasn''t changed from the old gntdev code. In gntalloc, I went with the much simpler method of an always-incrementing index for generating offsets; there''s no reason that that can''t be done here if it looks like there''s a mistake. The code does look correct to me, and I have tested it with variable-size grants (although not in the 4/1/1 configuration you suggested). -- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-14  17:56 UTC
Re: [Xen-devel] Re: [PATCH v6] Userspace grant communication
On 02/14/2011 11:38 AM, Konrad Rzeszutek Wilk wrote:> On Mon, Feb 14, 2011 at 11:14:03AM -0500, Konrad Rzeszutek Wilk wrote: >> On Thu, Feb 03, 2011 at 12:18:58PM -0500, Daniel De Graaf wrote: >>> Changes since v5: >>> - Added a tested xen version to workaround in #4 >>> - Cleaned up variable names & structures >>> - Clarified some of the cleanup in gntalloc >>> - Removed copyright statement from public-domain files >>> >>> [PATCH 1/6] xen-gntdev: Change page limit to be global instead of per-open >>> [PATCH 2/6] xen-gntdev: Use find_vma rather than iterating our vma list manually >>> [PATCH 3/6] xen-gntdev: Add reference counting to maps >>> [PATCH 4/6] xen-gntdev: Support mapping in HVM domains >>> [PATCH 5/6] xen-gntalloc: Userspace grant allocation driver >>> [PATCH 6/6] xen/gntalloc,gntdev: Add unmap notify ioctl >> >> Hey Daniel, >> >> I took a look at the patchset and then the bug-fixes: >> >> Daniel De Graaf (12): >> xen-gntdev: Change page limit to be global instead of per-open >> xen-gntdev: Use find_vma rather than iterating our vma list manually >> xen-gntdev: Add reference counting to maps >> xen-gntdev: Support mapping in HVM domains >> xen-gntalloc: Userspace grant allocation driver >> xen/gntalloc,gntdev: Add unmap notify ioctl >> xen-gntdev: Fix memory leak when mmap fails >> xen-gntdev: Fix unmap notify on PV domains >> xen-gntdev: Use map->vma for checking map validity >> xen-gntdev: Avoid unmapping ranges twice >> xen-gntdev: prevent using UNMAP_NOTIFY_CLEAR_BYTE on read-only mappings >> xen-gntdev: Avoid double-mapping memory >> >> >> And besides the two questions I posted today they look OK to me. However > > Oh, and Jeremy''s comment about the WARN_ON on the copy_to_user.. >I did post an updated version of that patch without the bad WARN_ON. It still resolves to a WARN_ON in all paths, but it''s at a higher level. -- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-14  18:07 UTC
Re: [Xen-devel] [PATCH 6/6] xen/gntalloc, gntdev: Add unmap notify ioctl
On 02/14/2011 10:37 AM, Konrad Rzeszutek Wilk wrote:>> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h >> index bc3b85e..257cc8d 100644 >> --- a/include/xen/gntalloc.h >> +++ b/include/xen/gntalloc.h >> @@ -47,4 +47,32 @@ struct ioctl_gntalloc_dealloc_gref { >> /* Number of references to unmap */ >> uint32_t count; >> }; >> + >> +/* >> + * Sets up an unmap notification within the page, so that the other side can do >> + * cleanup if this side crashes. Required to implement cross-domain robust >> + * mutexes or close notification on communication channels. >> + * >> + * Each mapped page only supports one notification; multiple calls referring to >> + * the same page overwrite the previous notification. You must clear the >> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it >> + * to occur. >> + */ >> +#define IOCTL_GNTALLOC_SET_UNMAP_NOTIFY \ >> +_IOC(_IOC_NONE, ''G'', 7, sizeof(struct ioctl_gntalloc_unmap_notify)) >> +struct ioctl_gntalloc_unmap_notify { >> + /* IN parameters */ >> + /* Index of a byte in the page */ >> + uint64_t index; > > That isn''t actually the whole truth. It is the index within the page > and also the "offset used on a subsequent call to mmap()" (ioctl_gntdev_map_grant_ref) > > It might make sense to change the description to: "offset of the mmap region > and the index within the page" perhaps?How about: /* Offset in the file descriptor for a byte within the page (same as * used in mmap). If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to * be cleared. Otherwise, it can be any byte in the page whose * notification we are adjusting. */ Would you prefer a patch that just changes the descriptions, or a repost of the patch that adds the ioctl with the better descriptions?>> + /* Action(s) to take on unmap */ >> + uint32_t action; >> + /* Event channel to notify */ >> + uint32_t event_channel_port; >> +}; >> + >> +/* Clear (set to zero) the byte specified by index */ >> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 >> +/* Send an interrupt on the indicated event channel */ >> +#define UNMAP_NOTIFY_SEND_EVENT 0x2 >> + >> #endif /* __LINUX_PUBLIC_GNTALLOC_H__ */[and the same for gntdev] -- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-14  18:52 UTC
Re: [Xen-devel] [PATCH 4/6] xen-gntdev: Support mapping in HVM domains
On Mon, Feb 14, 2011 at 12:43:10PM -0500, Daniel De Graaf wrote:> On 02/14/2011 10:51 AM, Konrad Rzeszutek Wilk wrote: > >> +static int unmap_grant_pages(struct grant_map *map, int offset, int pages); > >> + > >> /* ------------------------------------------------------------------ */ > >> > >> static void gntdev_print_maps(struct gntdev_priv *priv, > >> @@ -179,11 +184,34 @@ static void gntdev_put_map(struct grant_map *map) > >> > >> atomic_sub(map->count, &pages_mapped); > >> > >> - if (map->pages) > >> + if (map->pages) { > >> + if (!use_ptemod) > >> + unmap_grant_pages(map, 0, map->count); > > > > In the past (before this patch) the unmap_grant_pages would be called > > on the .ioctl, .release, and .close (on VMA). This adds it now also > > on the mmu_notifier_ops paths. Why? > > > This does not actually add the unmap on the mmu_notifier path. The MMUDuh, you are right. I misread my notes. I meant that in the past the .ioctl, .release, and .close would just do gntdev_put_map, but now they are also calling the unmap_grant_pages (on the HVM path).> notifier is used only if use_ptemod is true, and unmap_grant_pages is > only called when use_ptemod is false... and that would explain it - you need to call it on, and the MMU notifier is unavailable for you on HVM case. Not exactly sure why that wasn''t clear before.> > The HVM path for map and unmap is slightly different: HVM keeps the pages > mapped until the area is deleted, while the PV case (use_ptemod being true) > must unmap them when userspace unmaps the range. In the normal use case, > this makes no difference to users since unmap time is deletion time.<nods> Let me augment the path description to contain this.> > -- > Daniel De Graaf > National Security Agency_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-14  19:04 UTC
[Xen-devel] Re: [PATCH v6] Userspace grant communication
> > And besides the two questions I posted today they look OK to me. However > > I have on question that I think points to a bug... snip ..> I think this will come out with map.index=4, op.index=8192, since the only > entry in priv->maps has map->index = 0 and map->count = 4.Duh! Yes, and that changes how this all works out. ..> much simpler method of an always-incrementing index for generating offsets; > there''s no reason that that can''t be done here if it looks like there''s a > mistake. The code does look correct to me, and I have tested it with variable-size > grants (although not in the 4/1/1 configuration you suggested).That is OK. Thanks for working through this example. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Konrad Rzeszutek Wilk
2011-Feb-14  19:21 UTC
Re: [Xen-devel] Re: [PATCH v6] Userspace grant communication
> >> And besides the two questions I posted today they look OK to me. However > > > > Oh, and Jeremy''s comment about the WARN_ON on the copy_to_user.. > > > > I did post an updated version of that patch without the bad WARN_ON. It still > resolves to a WARN_ON in all paths, but it''s at a higher level.Ah there it is. Look in git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git stable/gntalloc.v6 [maybe in an hour - git.kernel.org takes a bit of time to resync to the mirrors] to see if I am missing anything please. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel De Graaf
2011-Feb-14  20:55 UTC
Re: [Xen-devel] Re: [PATCH v6] Userspace grant communication
On 02/14/2011 02:21 PM, Konrad Rzeszutek Wilk wrote:>>>> And besides the two questions I posted today they look OK to me. However >>> >>> Oh, and Jeremy''s comment about the WARN_ON on the copy_to_user.. >>> >> >> I did post an updated version of that patch without the bad WARN_ON. It still >> resolves to a WARN_ON in all paths, but it''s at a higher level. > > Ah there it is. > > Look in git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git stable/gntalloc.v6 > > [maybe in an hour - git.kernel.org takes a bit of time to resync to the mirrors] > to see if I am missing anything please.9960be970cea52c1cb7d7c747ff6da367e1c01b5 looks good to me, nothing missing. -- Daniel De Graaf National Security Agency _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel