Olaf Hering
2011-Nov-08  21:04 UTC
[Xen-devel] [PATCH] RFC: mem_event: use wait queue when ring is full
# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1320786230 -3600
# Node ID 601d9ee4f1364e5e05b69f24be5e6c3b33e428ca
# Parent  455f064fe54eeb57a43aa0c45a56cc4c4847d7a0
RFC: mem_event: use wait queue when ring is full
CAUTION: while the patch by itself is supposed to be complete,
the added usage of waitqueues will lead to guest hangs and
even sudden host reboots!
This change is based on an idea/patch from Adin Scannell.
If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions.
A request from another domain will use the existing ->req_producers
functionality because sleeping is not possible if the vcpu does not
belong to the target domain.
This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager,
Signed-off-by: Olaf Hering <olaf@aepfle.de>
diff -r 455f064fe54e -r 601d9ee4f136 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4026,7 +4026,7 @@ static int hvm_memory_event_traps(long p
         return 1;
     
     rc = mem_event_check_ring(d, &d->mem_access);
-    if ( rc )
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r 455f064fe54e -r 601d9ee4f136 xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -39,6 +40,7 @@
 
 static int mem_event_enable(struct domain *d,
                             xen_domctl_mem_event_op_t *mec,
+                            int mem_event_bit,
                             struct mem_event_domain *med)
 {
     int rc;
@@ -94,8 +96,12 @@ static int mem_event_enable(struct domai
 
     mem_event_ring_lock_init(med);
 
+    med->mem_event_bit = mem_event_bit;
+
+    init_waitqueue_head(&med->wq);
+
     /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, med);
 
     return 0;
 
@@ -111,6 +117,9 @@ static int mem_event_enable(struct domai
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -120,13 +129,18 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d, struct mem_event_domain
*med, mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX req_prod;
 
     mem_event_ring_lock(med);
 
+    if (RING_FREE_REQUESTS(&med->front_ring) == 0) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -142,6 +156,20 @@ void mem_event_put_request(struct domain
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req)
+{
+	if (_mem_event_put_request(d, med, req))
+		return;
+	if (current->domain == d) {
+		wait_event(med->wq, _mem_event_put_request(d, med, req));
+		return;
+	}
+	/* Ring was full, unable to sleep */
+	printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
d->domain_id, req->type, req->flags, (unsigned long)req->gfn);
 }
 
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp)
@@ -165,21 +193,27 @@ void mem_event_get_response(struct mem_e
     mem_event_ring_unlock(med);
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->mem_event_bit, &v->pause_flags)
)
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->mem_event_bit, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
+/**
+ * mem_event_put_req_producers - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_put_req_producers() releases a claimed slot in the mem_event ring.
+ */
 void mem_event_put_req_producers(struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
@@ -187,9 +221,26 @@ void mem_event_put_req_producers(struct 
     mem_event_ring_unlock(med);
 }
 
+/**
+ * mem_event_check_ring - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_check_ring() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_put_req_producers().
+ */
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
     int free_requests;
     int ring_full = 1;
 
@@ -205,8 +256,8 @@ int mem_event_check_ring(struct domain *
         ring_full = 0;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
+    if ( current->domain == d )
+        ring_full = 0;
 
     mem_event_ring_unlock(med);
 
@@ -274,7 +325,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_me_mem_paging, med);
         }
         break;
 
@@ -313,7 +364,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_me_mem_access, med);
         }
         break;
 
diff -r 455f064fe54e -r 601d9ee4f136 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,19 +253,11 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
     mem_event_request_t req;
 
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
-
     if ( v->domain != d )
     {
         /* XXX This path needs some attention.  For now, just fail foreign 
@@ -275,20 +267,19 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d)
lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_check_ring(d, &d->mem_share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_share)) return page;
-
+    memset(&req, 0, sizeof(req));
+    req.type = MEM_EVENT_TYPE_SHARED;
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_share, &req);
-
-    return page;
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -647,13 +638,14 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0); 
     if(!page) 
     {
         /* We''ve failed to obtain memory for private page. Need to
re-add the
          * gfn_info to relevant list */
         list_add(&gfn_info->list, &hash_entry->gfns);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r 455f064fe54e -r 601d9ee4f136 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -840,17 +840,13 @@ void p2m_mem_paging_drop_page(struct dom
     struct vcpu *v = current;
     mem_event_request_t req;
 
-    /* Check that there''s space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+   /* Send release notification to pager */
+   memset(&req, 0, sizeof(req));
+   req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
+   req.gfn = gfn;
+   req.vcpu_id = v->vcpu_id;
 
-        mem_event_put_request(d, &d->mem_paging, &req);
-    }
+   mem_event_put_request(d, &d->mem_paging, &req);
 }
 
 /**
@@ -1027,8 +1023,8 @@ void p2m_mem_paging_resume(struct domain
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Unpause all vcpus that were paused because the ring was full */
+    wake_up(&d->mem_paging.wq);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long
gla,
@@ -1067,7 +1063,7 @@ void p2m_mem_access_check(unsigned long 
                    "Memory access permissions failure, no mem_event
listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_pause(v, &d->mem_access);
         }
         else
         {
@@ -1117,9 +1113,11 @@ void p2m_mem_access_resume(struct p2m_do
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full or no
listener
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wakeup all vcpus waiting because the ring was full */
+    wake_up(&d->mem_access.wq);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_unpause_vcpus(d, &d->mem_access);
 }
 
 
diff -r 455f064fe54e -r 601d9ee4f136 xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -25,12 +25,12 @@
 #define __MEM_EVENT_H__
 
 /* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med);
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_req_producers(struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req);
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r 455f064fe54e -r 601d9ee4f136 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -192,6 +193,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int mem_event_bit;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct domain
@@ -588,9 +593,12 @@ extern struct domain *domain_list;
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked on mem_paging ring. */
+#define _VPF_me_mem_paging   4
+#define VPF_me_mem_paging    (1UL<<_VPF_me_mem_paging)
+ /* VCPU is blocked on mem_access ring. */
+#define _VPF_me_mem_access   5
+#define VPF_me_mem_access    (1UL<<_VPF_me_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel