thr3ads.net - Xen devel - [PATCH] mem_event: use wait queue when ring is full [Dec 2011]

If this information is useful, please help other people find it:
Share via:

Olaf Hering

2011-Dec-05 11:19 UTC

[PATCH] mem_event: use wait queue when ring is full

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1323083831 -3600
# Node ID cd163bcd0f066e52ee74e42090188d632f0086bf
# Parent  a4d7c27ec1f190ecbb9a909609f6ef0eca250c00
mem_event: use wait queue when ring is full

This change is based on an idea/patch from Adin Scannell.

If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.

A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.

This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.


v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4156,8 +4156,8 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -39,6 +40,7 @@
 
 static int mem_event_enable(struct domain *d,
                             xen_domctl_mem_event_op_t *mec,
+                            int bit,
                             struct mem_event_domain *med)
 {
     int rc;
@@ -107,8 +109,12 @@ static int mem_event_enable(struct domai
 
     mem_event_ring_lock_init(med);
 
+    med->bit = bit;
+
+    init_waitqueue_head(&med->wq);
+
     /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    mem_event_wake_waiters(d, med);
 
     return 0;
 
@@ -124,6 +130,9 @@ static int mem_event_enable(struct domai
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -133,13 +142,24 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+                                  struct mem_event_domain *med,
+                                  mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, claimed_req;
     RING_IDX req_prod;
 
     mem_event_ring_lock(med);
 
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    /* Foreign requests must succeed because their vcpus can not sleep */
+    claimed_req = med->foreign_producers;
+    if ( !free_req || ( current->domain == d && free_req <=
claimed_req ) ) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -147,14 +167,35 @@ void mem_event_put_request(struct domain
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    /* Update accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    /* Go to sleep if request came from guest */
+    if (current->domain == d) {
+        wait_event(med->wq, _mem_event_put_request(d, med, req));
+        return;
+    }
+    /* Ring was full anyway, unable to sleep in non-guest context */
+    if (!_mem_event_put_request(d, med, req))
+        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
d->domain_id,
+                req->type, req->flags, (unsigned long)req->gfn);
 }
 
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp)
@@ -178,32 +219,97 @@ void mem_event_get_response(struct mem_e
     mem_event_ring_unlock(med);
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+    int free_req;
+
+    mem_event_ring_lock(med);
+
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    if ( free_req )
+        wake_up_nr(&med->wq, free_req);
+
+    mem_event_ring_unlock(med);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to 
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->bit, &v->pause_flags) )
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->bit, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
+    int free_req;
     int ring_full = 1;
 
     if ( !med->ring_page )
@@ -211,16 +317,17 @@ int mem_event_check_ring(struct domain *
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+    if ( current->domain == d ) {
+        med->target_producers++;
+        ring_full = 0;
+    } else if ( med->foreign_producers + med->target_producers + 1 <
free_req )
     {
-        med->req_producers++;
+        med->foreign_producers++;
         ring_full = 0;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
     mem_event_ring_unlock(med);
 
     return ring_full;
@@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
         }
         break;
 
@@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med);
+            rc = mem_event_enable(d, mec, _VPF_mem_access, med);
         }
         break;
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d)
lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
-
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
+    vcpu_pause_nosync(v);
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -653,7 +643,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0); 
     if(!page) 
     {
         /* We''ve failed to obtain memory for private page. Need to
re-add the
@@ -661,6 +651,7 @@ gfn_found:
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
 
-    /* Check that there''s space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* Send release notification to pager */
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there''s space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_release_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1078,8 +1070,8 @@ void p2m_mem_paging_resume(struct domain
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->paging);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long
gla,
@@ -1108,7 +1100,7 @@ void p2m_mem_access_check(unsigned long 
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the
message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
+    res = mem_event_claim_slot(d, &d->mem_event->access);
     if ( res < 0 ) 
     {
         /* No listener */
@@ -1118,7 +1110,7 @@ void p2m_mem_access_check(unsigned long 
                    "Memory access permissions failure, no mem_event
listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_sleep(v, &d->mem_event->access);
         }
         else
         {
@@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
     if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
         vcpu_unpause(d->vcpu[rsp.vcpu_id]);
 
-    /* Unpause any domains that were paused because the ring was full or no
listener
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->access);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_wake_waiters(d, &d->mem_event->access);
 }
 
 
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req);
 void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,8 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    unsigned short foreign_producers;
+    unsigned short target_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +194,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int bit;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct mem_event_per_domain
@@ -615,9 +621,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

Olaf Hering

2011-Dec-05 11:33 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

On Mon, Dec 05, Olaf Hering wrote:
> Wakeup will take the number of free slots into account.
This is not entirely tree unless this additional change is applied:

diff -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -235,6 +235,7 @@ void mem_event_wake_requesters(struct me
     mem_event_ring_lock(med);
 
     free_req = RING_FREE_REQUESTS(&med->front_ring);
+    free_req -= med->foreign_producers;
     if ( free_req )
         wake_up_nr(&med->wq, free_req);
 

And perhaps the wake_up should be done outside the ring lock?

Olaf

Andres Lagar-Cavilla

2011-Dec-05 15:45 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

> Date: Mon, 05 Dec 2011 12:19:03 +0100
> From: Olaf Hering <olaf@aepfle.de>
> To: xen-devel@lists.xensource.com
> Subject: [Xen-devel] [PATCH] mem_event: use wait queue when ring is
> 	full
> Message-ID: <cd163bcd0f066e52ee74.1323083943@probook.site>
> Content-Type: text/plain; charset="us-ascii"
>
> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1323083831 -3600
> # Node ID cd163bcd0f066e52ee74e42090188d632f0086bf
> # Parent  a4d7c27ec1f190ecbb9a909609f6ef0eca250c00
> mem_event: use wait queue when ring is full
>
> This change is based on an idea/patch from Adin Scannell.
>
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
>
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
>
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
>
>
> v5:
>   - rename mem_event_check_ring() to mem_event_claim_slot()
>   - rename mem_event_put_req_producers() to mem_event_release_slot()
>   - add local/foreign request accounting
>   - keep room for at least one guest request
>
> v4:
>  - fix off-by-one bug in _mem_event_put_request
>  - add mem_event_wake_requesters() and use wake_up_nr()
>  - rename mem_event_mark_and_pause() and mem_event_mark_and_pause()
> functions
>  - req_producers counts foreign request producers, rename member
>
> v3:
>  - rename ->mem_event_bit to ->bit
>  - remove me_ from new VPF_ defines
>
> v2:
>  - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise
> the
>    vcpu will not wake_up after a wait_event because the pause_count was
>    increased twice. Fixes guest hangs.
>  - update free space check in _mem_event_put_request()
>  - simplify mem_event_put_request()
>
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4156,8 +4156,8 @@ static int hvm_memory_event_traps(long p
>      if ( (p & HVMPME_onchangeonly) && (value == old) )
>          return 1;
>
> -    rc = mem_event_check_ring(d, &d->mem_event->access);
> -    if ( rc )
> +    rc = mem_event_claim_slot(d, &d->mem_event->access);
> +    if ( rc < 0 )
>          return rc;
>
>      memset(&req, 0, sizeof(req));
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -23,6 +23,7 @@
>
>  #include <asm/domain.h>
>  #include <xen/event.h>
> +#include <xen/wait.h>
>  #include <asm/p2m.h>
>  #include <asm/mem_event.h>
>  #include <asm/mem_paging.h>
> @@ -39,6 +40,7 @@
>
>  static int mem_event_enable(struct domain *d,
>                              xen_domctl_mem_event_op_t *mec,
> +                            int bit,
>                              struct mem_event_domain *med)
>  {
>      int rc;
> @@ -107,8 +109,12 @@ static int mem_event_enable(struct domai
>
>      mem_event_ring_lock_init(med);
>
> +    med->bit = bit;I think it''s been asked before for this to have a more expressive name.
> +
> +    init_waitqueue_head(&med->wq);
> +
>      /* Wake any VCPUs paused for memory events */
> -    mem_event_unpause_vcpus(d);
> +    mem_event_wake_waiters(d, med);
>
>      return 0;
>
> @@ -124,6 +130,9 @@ static int mem_event_enable(struct domai
>
>  static int mem_event_disable(struct mem_event_domain *med)
>  {
> +    if (!list_empty(&med->wq.list))
> +        return -EBUSY;
> +What does the caller do with EBUSY? Retry?
>      unmap_domain_page(med->ring_page);
>      med->ring_page = NULL;
>
> @@ -133,13 +142,24 @@ static int mem_event_disable(struct mem_
>      return 0;
>  }
>
> -void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req)
> +static int _mem_event_put_request(struct domain *d,
> +                                  struct mem_event_domain *med,
> +                                  mem_event_request_t *req)
>  {
>      mem_event_front_ring_t *front_ring;
> +    int free_req, claimed_req;
>      RING_IDX req_prod;
>
>      mem_event_ring_lock(med);
>
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    /* Foreign requests must succeed because their vcpus can not sleep */
> +    claimed_req = med->foreign_producers;
> +    if ( !free_req || ( current->domain == d && free_req <=
claimed_req )
> ) {
> +        mem_event_ring_unlock(med);
> +        return 0;
> +    }
> +
>      front_ring = &med->front_ring;
>      req_prod = front_ring->req_prod_pvt;
>
> @@ -147,14 +167,35 @@ void mem_event_put_request(struct domain
>      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
>      req_prod++;
>
> +    /* Update accounting */
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
> +
>      /* Update ring */
> -    med->req_producers--;
>      front_ring->req_prod_pvt = req_prod;
>      RING_PUSH_REQUESTS(front_ring);
>
>      mem_event_ring_unlock(med);
>
>      notify_via_xen_event_channel(d, med->xen_port);
> +
> +    return 1;
> +}
> +
> +void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
> +                           mem_event_request_t *req)
> +{
> +    /* Go to sleep if request came from guest */
> +    if (current->domain == d) {
> +        wait_event(med->wq, _mem_event_put_request(d, med, req));
> +        return;
> +    }
> +    /* Ring was full anyway, unable to sleep in non-guest context */
> +    if (!_mem_event_put_request(d, med, req))
> +        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
> +                req->type, req->flags, (unsigned long)req->gfn);
>  }
>
>  void mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp)
> @@ -178,32 +219,97 @@ void mem_event_get_response(struct mem_e
>      mem_event_ring_unlock(med);
>  }
>
> -void mem_event_unpause_vcpus(struct domain *d)
> +/**
> + * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
> + * ring. Only as many as can place another request in the ring will
> + * resume execution.
> + */
> +void mem_event_wake_requesters(struct mem_event_domain *med)
> +{
> +    int free_req;
> +
> +    mem_event_ring_lock(med);
> +
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    if ( free_req )
> +        wake_up_nr(&med->wq, free_req);
> +
> +    mem_event_ring_unlock(med);
> +}
> +
> +/**
> + * mem_event_wake_waiters - Wake all vcpus waiting for the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
> + * become available.
> + */
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med)
>  {
>      struct vcpu *v;
>
>      for_each_vcpu ( d, v )
> -        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
> +        if ( test_and_clear_bit(med->bit, &v->pause_flags) )
>              vcpu_wake(v);
>  }
>
> -void mem_event_mark_and_pause(struct vcpu *v)
> +/**
> + * mem_event_mark_and_sleep - Put vcpu to sleep
> + * @v: guest vcpu
> + * @med: mem_event ring
> + *
> + * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
> + * The vcpu will resume execution in mem_event_wake_waiters().
> + */
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med)
>  {
> -    set_bit(_VPF_mem_event, &v->pause_flags);
> +    set_bit(med->bit, &v->pause_flags);
>      vcpu_sleep_nosync(v);
>  }
>
> -void mem_event_put_req_producers(struct mem_event_domain *med)
> +/**
> + * mem_event_release_slot - Release a claimed slot
> + * @med: mem_event ring
> + *
> + * mem_event_release_slot() releases a claimed slot in the mem_event
> ring.
> + */
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med)
>  {
>      mem_event_ring_lock(med);
> -    med->req_producers--;
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
>      mem_event_ring_unlock(med);
>  }
>
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
> +/**
> + * mem_event_claim_slot - Check state of a mem_event ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * Return codes: < 0: the ring is not yet configured
> + *                 0: the ring has some room
> + *               > 0: the ring is full
> + *
> + * mem_event_claim_slot() checks the state of the given mem_event ring.
> + * If the current vcpu belongs to the guest domain, the function assumes
> that
> + * mem_event_put_request() will sleep until the ring has room again.
> + * A guest can always place at least one request.
> + *
> + * If the current vcpu does not belong to the target domain the caller
> must try
> + * again until there is room. A slot is claimed and the caller can place
> a
> + * request. If the caller does not need to send a request, the claimed
> slot has
> + * to be released with mem_event_release_slot().
> + */
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
>  {
> -    struct vcpu *curr = current;
> -    int free_requests;
> +    int free_req;
>      int ring_full = 1;
>
>      if ( !med->ring_page )
> @@ -211,16 +317,17 @@ int mem_event_check_ring(struct domain *
>
>      mem_event_ring_lock(med);
>
> -    free_requests = RING_FREE_REQUESTS(&med->front_ring);
> -    if ( med->req_producers < free_requests )
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +
> +    if ( current->domain == d ) {
> +        med->target_producers++;
> +        ring_full = 0;
> +    } else if ( med->foreign_producers + med->target_producers + 1
<
> free_req )
>      {
> -        med->req_producers++;
> +        med->foreign_producers++;
>          ring_full = 0;
>      }
>
> -    if ( ring_full && (curr->domain == d) )
> -        mem_event_mark_and_pause(curr);
> -
>      mem_event_ring_unlock(med);
>
>      return ring_full;
> @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( p2m->pod.entry_count )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med);
> +            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
>          }
>          break;
>
> @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med);
> +            rc = mem_event_enable(d, mec, _VPF_mem_access, med);Ok, the idea of bit is that different vcpus will sleep with different
pause flags, depending on the ring they''re sleeping on. But this is
only
used in wake_waiters, which is not used by all rings. In fact, why do we
need wake_waiters with wait queues?
>          }
>          break;
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/mem_sharing.c
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
>  #endif
>
>
> -static struct page_info* mem_sharing_alloc_page(struct domain *d,
> -                                                unsigned long gfn)
> +static void mem_sharing_notify_helper(struct domain *d, unsigned long
> gfn)
>  {
> -    struct page_info* page;
>      struct vcpu *v = current;
> -    mem_event_request_t req;
> -
> -    page = alloc_domheap_page(d, 0);
> -    if(page != NULL) return page;
> -
> -    memset(&req, 0, sizeof(req));
> -    req.type = MEM_EVENT_TYPE_SHARED;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
>
>      if ( v->domain != d )
>      {
> @@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
>          gdprintk(XENLOG_ERR,
>                   "Failed alloc on unshare path for foreign (%d)
> lookup\n",
>                   d->domain_id);
> -        return page;
> +        return;
>      }
>
> -    vcpu_pause_nosync(v);
> -    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
> +    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
> +        return;
>
> -    if(mem_event_check_ring(d, &d->mem_event->share)) return
page;
> -
> +    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
>      req.gfn = gfn;
>      req.p2mt = p2m_ram_shared;
>      req.vcpu_id = v->vcpu_id;
>      mem_event_put_request(d, &d->mem_event->share, &req);
> -
> -    return page;
> +    vcpu_pause_nosync(v);
>  }
>
>  unsigned int mem_sharing_get_nr_saved_mfns(void)
> @@ -653,7 +643,7 @@ gfn_found:
>      if(ret == 0) goto private_page_found;
>
>      old_page = page;
> -    page = mem_sharing_alloc_page(d, gfn);
> +    page = alloc_domheap_page(d, 0);
>      if(!page)
>      {
>          /* We''ve failed to obtain memory for private page. Need
to re-add
> the
> @@ -661,6 +651,7 @@ gfn_found:
>          list_add(&gfn_info->list, &hash_entry->gfns);
>          put_gfn(d, gfn);
>          shr_unlock();
> +        mem_sharing_notify_helper(d, gfn);This is nice. Do you think PoD could use this, should it ever run into a
ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than a
sharing ring (which is bit rotted) we could have an ENOMEM ring with a
utility launched by xencommons listening. The problem, again, is what if
ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared page)
>          return -ENOMEM;
>      }
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
>   */
>  void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
>  {
> -    struct vcpu *v = current;
> -    mem_event_request_t req;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn
> };
>
> -    /* Check that there''s space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
> -    {
> -        /* Send release notification to pager */
> -        memset(&req, 0, sizeof(req));
> -        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
> -        req.gfn = gfn;
> -        req.vcpu_id = v->vcpu_id;
> +    /* Send release notification to pager */
> +    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
>
> -        mem_event_put_request(d, &d->mem_event->paging,
&req);
> -    }
> +    mem_event_put_request(d, &d->mem_event->paging, &req);
>  }
>
>  /**
> @@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>      /* Check that there''s space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) )
> +    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
>          return;
>
>      memset(&req, 0, sizeof(req));
> @@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
>      else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged
)
>      {
>          /* gfn is already on its way back and vcpu is not paused */
> -        mem_event_put_req_producers(&d->mem_event->paging);
> +        mem_event_release_slot(d, &d->mem_event->paging);
>          return;
>      }
>
> @@ -1078,8 +1070,8 @@ void p2m_mem_paging_resume(struct domain
>      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
>          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>
> -    /* Unpause any domains that were paused because the ring was full */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->paging);
>  }
>
>  void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned
> long gla,
> @@ -1108,7 +1100,7 @@ void p2m_mem_access_check(unsigned long
>      p2m_unlock(p2m);
>
>      /* Otherwise, check if there is a memory event listener, and send the
> message along */
> -    res = mem_event_check_ring(d, &d->mem_event->access);
> +    res = mem_event_claim_slot(d, &d->mem_event->access);
>      if ( res < 0 )
>      {
>          /* No listener */
> @@ -1118,7 +1110,7 @@ void p2m_mem_access_check(unsigned long
>                     "Memory access permissions failure, no mem_event
> listener: pausing VCPU %d, dom %d\n",
>                     v->vcpu_id, d->domain_id);
>
> -            mem_event_mark_and_pause(v);
> +            mem_event_mark_and_sleep(v, &d->mem_event->access);
>          }
>          else
>          {
> @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
>      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
>          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>
> -    /* Unpause any domains that were paused because the ring was full or
> no listener
> -     * was available */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->access);
> +
> +    /* Unpause all vcpus that were paused because no listener was
> available */
> +    mem_event_wake_waiters(d, &d->mem_event->access);Is this not used in p2m_mem_paging_resume? Why the difference? Why are two
mechanisms needed (wake_requesters, wake_sleepers)?

Thanks
Andres
>  }
>
>
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/asm-x86/mem_event.h
> --- a/xen/include/asm-x86/mem_event.h
> +++ b/xen/include/asm-x86/mem_event.h
> @@ -24,13 +24,13 @@
>  #ifndef __MEM_EVENT_H__
>  #define __MEM_EVENT_H__
>
> -/* Pauses VCPU while marking pause flag for mem event */
> -void mem_event_mark_and_pause(struct vcpu *v);
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
> -void mem_event_put_req_producers(struct mem_event_domain *med);
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med);
>  void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req);
>  void mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp);
> -void mem_event_unpause_vcpus(struct domain *d);
> +void mem_event_wake_requesters(struct mem_event_domain *med);
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med);
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med);
>
>  int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>                       XEN_GUEST_HANDLE(void) u_domctl);
> diff -r a4d7c27ec1f1 -r cd163bcd0f06 xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -14,6 +14,7 @@
>  #include <xen/nodemask.h>
>  #include <xen/radix-tree.h>
>  #include <xen/multicall.h>
> +#include <xen/wait.h>
>  #include <public/xen.h>
>  #include <public/domctl.h>
>  #include <public/sysctl.h>
> @@ -183,7 +184,8 @@ struct mem_event_domain
>  {
>      /* ring lock */
>      spinlock_t ring_lock;
> -    unsigned int req_producers;
> +    unsigned short foreign_producers;
> +    unsigned short target_producers;
>      /* shared page */
>      mem_event_shared_page_t *shared_page;
>      /* shared ring page */
> @@ -192,6 +194,10 @@ struct mem_event_domain
>      mem_event_front_ring_t front_ring;
>      /* event channel port (vcpu0 only) */
>      int xen_port;
> +    /* mem_event bit for vcpu->pause_flags */
> +    int bit;
> +    /* list of vcpus waiting for room in the ring */
> +    struct waitqueue_head wq;
>  };
>
>  struct mem_event_per_domain
> @@ -615,9 +621,12 @@ static inline struct domain *next_domain
>   /* VCPU affinity has changed: migrating to a new CPU. */
>  #define _VPF_migrating       3
>  #define VPF_migrating        (1UL<<_VPF_migrating)
> - /* VCPU is blocked on memory-event ring. */
> -#define _VPF_mem_event       4
> -#define VPF_mem_event        (1UL<<_VPF_mem_event)
> + /* VCPU is blocked due to missing mem_paging ring. */
> +#define _VPF_mem_paging      4
> +#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
> + /* VCPU is blocked due to missing mem_access ring. */
> +#define _VPF_mem_access      5
> +#define VPF_mem_access       (1UL<<_VPF_mem_access)
>
>  static inline int vcpu_runnable(struct vcpu *v)
>  {
>
>
>
> ------------------------------
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
>
> End of Xen-devel Digest, Vol 82, Issue 66
> *****************************************
>

Olaf Hering

2011-Dec-05 16:20 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

On Mon, Dec 05, Andres Lagar-Cavilla wrote:
> > +    med->bit = bit;
> I think it''s been asked before for this to have a more expressive
name.
I have to recheck, AFAIK it was the mem_bit where mem_ is redundant.
> >  static int mem_event_disable(struct mem_event_domain *med)
> >  {
> > +    if (!list_empty(&med->wq.list))
> > +        return -EBUSY;
> > +
> What does the caller do with EBUSY? Retry?
Yes, and mail the devs at xen-devel that something isn''t right ;-)

At least the pager uses this just in the exit path. I dont know about
access and sharing, wether these tools enable/disable more than once at
guest runtime.
> > @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
> >              if ( p2m->pod.entry_count )
> >                  break;
> >
> > -            rc = mem_event_enable(d, mec, med);
> > +            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
> >          }
> >          break;
> >
> > @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
> >              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
> >                  break;
> >
> > -            rc = mem_event_enable(d, mec, med);
> > +            rc = mem_event_enable(d, mec, _VPF_mem_access, med);
> Ok, the idea of bit is that different vcpus will sleep with different
> pause flags, depending on the ring they''re sleeping on. But this
is only
> used in wake_waiters, which is not used by all rings. In fact, why do we
> need wake_waiters with wait queues?
Before this patch, mem_event_unpause_vcpus() was used to resume waiters
for the ring itself and for room in the ring.
Now there is mem_event_wake_waiters(), which indicates the ring is
active, and there is mem_event_wake_requesters() which indicates the
ring has room to place guest requests.

I agree that only _VPF_mem_access is really needed, and _VPF_mem_paging
could be removed because paging without having a ring first is not
possible.

> > @@ -653,7 +643,7 @@ gfn_found:
> >      if(ret == 0) goto private_page_found;
> >
> >      old_page = page;
> > -    page = mem_sharing_alloc_page(d, gfn);
> > +    page = alloc_domheap_page(d, 0);
> >      if(!page)
> >      {
> >          /* We''ve failed to obtain memory for private page.
Need to re-add
> > the
> > @@ -661,6 +651,7 @@ gfn_found:
> >          list_add(&gfn_info->list, &hash_entry->gfns);
> >          put_gfn(d, gfn);
> >          shr_unlock();
> > +        mem_sharing_notify_helper(d, gfn);
> This is nice. Do you think PoD could use this, should it ever run into a
> ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than a
> sharing ring (which is bit rotted) we could have an ENOMEM ring with a
> utility launched by xencommons listening. The problem, again, is what if
> ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared page)
I have no idea about mem_sharing. I just move the existing code outside
the lock so that mem_event_put_request() is (hopefully) called without
any locks from mem_sharing_get_nr_saved_mfns().
Since there is appearently no user of a sharing ring, this whole new
mem_sharing_notify_helper() is a big no-op.
> > @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
> >      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
> >          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
> >
> > -    /* Unpause any domains that were paused because the ring was full
or
> > no listener
> > -     * was available */
> > -    mem_event_unpause_vcpus(d);
> > +    /* Wake vcpus waiting for room in the ring */
> > +    mem_event_wake_requesters(&d->mem_event->access);
> > +
> > +    /* Unpause all vcpus that were paused because no listener was
> > available */
> > +    mem_event_wake_waiters(d, &d->mem_event->access);
> Is this not used in p2m_mem_paging_resume? Why the difference? Why are two
> mechanisms needed (wake_requesters, wake_sleepers)?
As said above, wake_sleepers is for those who wait for the ring itself,
and wake_requesters is for room in the ring.
p2m_mem_paging_resume() always has a ring, so it does not need to call
wake_sleepers.


Do you have a suggestion for a better name?

Olaf

Andres Lagar-Cavilla

2011-Dec-05 16:34 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

> On Mon, Dec 05, Andres Lagar-Cavilla wrote:
>
>> > +    med->bit = bit;
>> I think it''s been asked before for this to have a more
expressive name.
>
> I have to recheck, AFAIK it was the mem_bit where mem_ is redundant.how about pause_flag?
>
>> >  static int mem_event_disable(struct mem_event_domain *med)
>> >  {
>> > +    if (!list_empty(&med->wq.list))
>> > +        return -EBUSY;
>> > +
>> What does the caller do with EBUSY? Retry?
>
> Yes, and mail the devs at xen-devel that something isn''t right ;-)Heh, good one :)
>
> At least the pager uses this just in the exit path. I dont know about
> access and sharing, wether these tools enable/disable more than once at
> guest runtime.
>
>> > @@ -287,7 +394,7 @@ int mem_event_domctl(struct domain *d, x
>> >              if ( p2m->pod.entry_count )
>> >                  break;
>> >
>> > -            rc = mem_event_enable(d, mec, med);
>> > +            rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
>> >          }
>> >          break;
>> >
>> > @@ -326,7 +433,7 @@ int mem_event_domctl(struct domain *d, x
>> >              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>> >                  break;
>> >
>> > -            rc = mem_event_enable(d, mec, med);
>> > +            rc = mem_event_enable(d, mec, _VPF_mem_access, med);
>
>> Ok, the idea of bit is that different vcpus will sleep with different
>> pause flags, depending on the ring they''re sleeping on. But
this is only
>> used in wake_waiters, which is not used by all rings. In fact, why do
we
>> need wake_waiters with wait queues?
>
> Before this patch, mem_event_unpause_vcpus() was used to resume waiters
> for the ring itself and for room in the ring.
> Now there is mem_event_wake_waiters(), which indicates the ring is
> active, and there is mem_event_wake_requesters() which indicates the
> ring has room to place guest requests.
I think that if there is no ring where one is expected, harsher actions
should happen. That is what we do in our patch. e.g.
p2m_mem_paging_populate -> no ring -> crash domain, or
p2m_mem_access_check -> access_required -> no ring -> crash domain.

That would eliminate wake_waiters, methinks?
>
> I agree that only _VPF_mem_access is really needed, and _VPF_mem_paging
> could be removed because paging without having a ring first is not
> possible.
>
>
>> > @@ -653,7 +643,7 @@ gfn_found:
>> >      if(ret == 0) goto private_page_found;
>> >
>> >      old_page = page;
>> > -    page = mem_sharing_alloc_page(d, gfn);
>> > +    page = alloc_domheap_page(d, 0);
>> >      if(!page)
>> >      {
>> >          /* We''ve failed to obtain memory for private
page. Need to
>> re-add
>> > the
>> > @@ -661,6 +651,7 @@ gfn_found:
>> >          list_add(&gfn_info->list,
&hash_entry->gfns);
>> >          put_gfn(d, gfn);
>> >          shr_unlock();
>> > +        mem_sharing_notify_helper(d, gfn);
>> This is nice. Do you think PoD could use this, should it ever run into
a
>> ENOMEM situation? And what about mem_paging_prep? Perhaps, rather than
a
>> sharing ring (which is bit rotted) we could have an ENOMEM ring with a
>> utility launched by xencommons listening. The problem, again, is what
if
>> ENOMEM is itself caused by dom0 (e.g. writable mapping of a shared
page)
>
> I have no idea about mem_sharing. I just move the existing code outside
> the lock so that mem_event_put_request() is (hopefully) called without
> any locks from mem_sharing_get_nr_saved_mfns().
> Since there is appearently no user of a sharing ring, this whole new
> mem_sharing_notify_helper() is a big no-op.Fair enough. I do think that generally, for x86/mm, an ENOMEM mem_event
ring is a good idea. Later...
>
>> > @@ -1167,9 +1159,11 @@ void p2m_mem_access_resume(struct domain
>> >      if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
>> >          vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>> >
>> > -    /* Unpause any domains that were paused because the ring was
full
>> or
>> > no listener
>> > -     * was available */
>> > -    mem_event_unpause_vcpus(d);
>> > +    /* Wake vcpus waiting for room in the ring */
>> > +    mem_event_wake_requesters(&d->mem_event->access);
>> > +
>> > +    /* Unpause all vcpus that were paused because no listener was
>> > available */
>> > +    mem_event_wake_waiters(d, &d->mem_event->access);
>> Is this not used in p2m_mem_paging_resume? Why the difference? Why are
>> two
>> mechanisms needed (wake_requesters, wake_sleepers)?
>
> As said above, wake_sleepers is for those who wait for the ring itself,
> and wake_requesters is for room in the ring.
> p2m_mem_paging_resume() always has a ring, so it does not need to call
> wake_sleepers.
>
>
> Do you have a suggestion for a better name?
>
> Olaf
>

Olaf Hering

2011-Dec-07 13:20 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

On Mon, Dec 05, Andres Lagar-Cavilla wrote:
> > On Mon, Dec 05, Andres Lagar-Cavilla wrote:
> >
> >> > +    med->bit = bit;
> >> I think it''s been asked before for this to have a more
expressive name.
> >
> > I have to recheck, AFAIK it was the mem_bit where mem_ is redundant.
> how about pause_flag?
I made this change in my patch.
> > Before this patch, mem_event_unpause_vcpus() was used to resume
waiters
> > for the ring itself and for room in the ring.
> > Now there is mem_event_wake_waiters(), which indicates the ring is
> > active, and there is mem_event_wake_requesters() which indicates the
> > ring has room to place guest requests.
> 
> I think that if there is no ring where one is expected, harsher actions
> should happen. That is what we do in our patch. e.g.
> p2m_mem_paging_populate -> no ring -> crash domain, or
> p2m_mem_access_check -> access_required -> no ring -> crash
domain.
> 
> That would eliminate wake_waiters, methinks?
In p2m_mem_paging_populate() a sanity check could be added. I think it
would indicate bad p2mt state because nominate was called without ring.
How else can a gfn enter paging state?

Olaf

Andres Lagar-Cavilla

2011-Dec-07 16:27 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

> On Mon, Dec 05, Andres Lagar-Cavilla wrote:
>
>> > On Mon, Dec 05, Andres Lagar-Cavilla wrote:
>> >
>> >> > +    med->bit = bit;
>> >> I think it''s been asked before for this to have a
more expressive
>> name.
>> >
>> > I have to recheck, AFAIK it was the mem_bit where mem_ is
redundant.
>> how about pause_flag?
>
> I made this change in my patch.
>
>> > Before this patch, mem_event_unpause_vcpus() was used to resume
>> waiters
>> > for the ring itself and for room in the ring.
>> > Now there is mem_event_wake_waiters(), which indicates the ring is
>> > active, and there is mem_event_wake_requesters() which indicates
the
>> > ring has room to place guest requests.
>>
>> I think that if there is no ring where one is expected, harsher actions
>> should happen. That is what we do in our patch. e.g.
>> p2m_mem_paging_populate -> no ring -> crash domain, or
>> p2m_mem_access_check -> access_required -> no ring -> crash
domain.
>>
>> That would eliminate wake_waiters, methinks?
>
> In p2m_mem_paging_populate() a sanity check could be added. I think it
> would indicate bad p2mt state because nominate was called without ring.
> How else can a gfn enter paging state?Definitely. Crash that domain. Maybe the pager crashed and burned, or quit
carelessly.
Andres>
> Olaf
>

Olaf Hering

2011-Dec-09 19:23 UTC

head link

[PATCH] mem_event: use wait queue when ring is full

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1323456817 -3600
# Node ID b25b2bcc2c6a987086bf37ec67a64d989813eb4d
# Parent  1c58bb664d8d55e475d179cb5f81693991859fc8
mem_event: use wait queue when ring is full

This change is based on an idea/patch from Adin Scannell.

If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.

A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.

This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.

v6:
  - take foreign requests into account before calling wake_up_nr()
  - call wake_up_nr() outside of ring lock
 - rename ->bit to ->pause_flag
 - update comment in mem_event_enable()

v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4172,8 +4172,8 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -41,6 +42,7 @@ static int mem_event_enable(
     struct domain *d,
     xen_domctl_mem_event_op_t *mec,
     struct mem_event_domain *med,
+    int pause_flag,
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
@@ -110,8 +112,12 @@ static int mem_event_enable(
 
     mem_event_ring_lock_init(med);
 
-    /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    med->pause_flag = pause_flag;
+
+    init_waitqueue_head(&med->wq);
+
+    /* Wake any VCPUs waiting for the ring to appear */
+    mem_event_wake_waiters(d, med);
 
     return 0;
 
@@ -127,6 +133,9 @@ static int mem_event_enable(
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -136,13 +145,24 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+                                  struct mem_event_domain *med,
+                                  mem_event_request_t *req)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, claimed_req;
     RING_IDX req_prod;
 
     mem_event_ring_lock(med);
 
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    /* Foreign requests must succeed because their vcpus can not sleep */
+    claimed_req = med->foreign_producers;
+    if ( !free_req || ( current->domain == d && free_req <=
claimed_req ) ) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
@@ -156,14 +176,35 @@ void mem_event_put_request(struct domain
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    /* Update accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    /* Go to sleep if request came from guest */
+    if (current->domain == d) {
+        wait_event(med->wq, _mem_event_put_request(d, med, req));
+        return;
+    }
+    /* Ring was full anyway, unable to sleep in non-guest context */
+    if (!_mem_event_put_request(d, med, req))
+        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
d->domain_id,
+                req->type, req->flags, (unsigned long)req->gfn);
 }
 
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp)
@@ -195,32 +236,97 @@ int mem_event_get_response(struct mem_ev
     return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+    int free_req;
+
+    mem_event_ring_lock(med);
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    free_req -= med->foreign_producers;
+    mem_event_ring_unlock(med);
+
+    if ( free_req )
+        wake_up_nr(&med->wq, free_req);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->pause_flag, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
+    int free_req;
     int ring_full = 1;
 
     if ( !med->ring_page )
@@ -228,16 +334,17 @@ int mem_event_check_ring(struct domain *
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+    if ( current->domain == d ) {
+        med->target_producers++;
+        ring_full = 0;
+    } else if ( med->foreign_producers + med->target_producers + 1 <
free_req )
     {
-        med->req_producers++;
+        med->foreign_producers++;
         ring_full = 0;
     }
 
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
     mem_event_ring_unlock(med);
 
     return ring_full;
@@ -316,7 +423,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_paging_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_paging,
mem_paging_notification);
         }
         break;
 
@@ -355,7 +462,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_access_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_access,
mem_access_notification);
         }
         break;
 
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d)
lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
-
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
+    vcpu_pause_nosync(v);
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -656,7 +646,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0);
     if(!page) 
     {
         /* We''ve failed to obtain memory for private page. Need to
re-add the
@@ -664,6 +654,7 @@ gfn_found:
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
 
-    /* Check that there''s space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* Send release notification to pager */
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there''s space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_release_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1080,8 +1072,8 @@ void p2m_mem_paging_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->paging);
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long
gla,
@@ -1115,7 +1107,7 @@ bool_t p2m_mem_access_check(unsigned lon
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the
message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
+    res = mem_event_claim_slot(d, &d->mem_event->access);
     if ( res < 0 ) 
     {
         /* No listener */
@@ -1125,7 +1117,7 @@ bool_t p2m_mem_access_check(unsigned lon
                    "Memory access permissions failure, no mem_event
listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_sleep(v, &d->mem_event->access);
         }
         else
         {
@@ -1186,9 +1178,11 @@ void p2m_mem_access_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full or no
listener
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->access);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_wake_waiters(d, &d->mem_event->access);
 }
 
 
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req);
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,8 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    unsigned short foreign_producers;
+    unsigned short target_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +194,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct mem_event_per_domain
@@ -615,9 +621,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

Andres Lagar-Cavilla

2011-Dec-10 05:22 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

Olaf,
Tim pointed out we need both solutions to ring management in the
hypervisor. With our patch ("Improve ring management for memory events. Do
not lose guest events."), we can handle the common case quickly, without
preempting VMs. With your patch, we can handle extreme situations of ring
congestion with the big hammer called wait queue.

After thinking a little bit about how to integrate both solutions, I''ve
come to one way of doing it. Might not be the best, but it''s an option.

In our patch, we have this pseudo-code snippet
arch/x86/mm/mem_event.c: mem_event_put_request
{
    if (foreign vcpu) and (space_in_ring < vcpus)
        /* Foreign vcpus need to retry. No mercy */
        return -EAGAIN

    if ( mem_event_ring_free(d, med) == 0 )
    {
        /* This *should* never happen */
        gdprintk(XENLOG_WARNING, "possible lost mem_event for domain
%d\n",
                                 d->domain_id);
        ret = -EBUSY; <-- *go to sleep on a waitqueue here, instead*
    }
    else
        put_event

    if (space_in_ring < vcpus) && (guest vcpu)
        pause this vcpu
        ret = -EBUSY
}

I hope my pseudo-code description makes sense. All the meta data for queue
management from your patch is necessary. But there is one single,
well-defined, point for wait queue invocation. And the rest is taken care
of (hopefully)

A thought,
Andres> Date: Fri, 09 Dec 2011 20:23:55 +0100
> From: Olaf Hering <olaf@aepfle.de>
> To: xen-devel@lists.xensource.com
> Subject: [Xen-devel] [PATCH] mem_event: use wait queue when ring is
> 	full
> Message-ID: <b25b2bcc2c6a987086bf.1323458635@probook.site>
> Content-Type: text/plain; charset="us-ascii"
>
> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1323456817 -3600
> # Node ID b25b2bcc2c6a987086bf37ec67a64d989813eb4d
> # Parent  1c58bb664d8d55e475d179cb5f81693991859fc8
> mem_event: use wait queue when ring is full
>
> This change is based on an idea/patch from Adin Scannell.
>
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
>
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
>
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
>
> v6:
>   - take foreign requests into account before calling wake_up_nr()
>   - call wake_up_nr() outside of ring lock
>  - rename ->bit to ->pause_flag
>  - update comment in mem_event_enable()
>
> v5:
>   - rename mem_event_check_ring() to mem_event_claim_slot()
>   - rename mem_event_put_req_producers() to mem_event_release_slot()
>   - add local/foreign request accounting
>   - keep room for at least one guest request
>
> v4:
>  - fix off-by-one bug in _mem_event_put_request
>  - add mem_event_wake_requesters() and use wake_up_nr()
>  - rename mem_event_mark_and_pause() and mem_event_mark_and_pause()
> functions
>  - req_producers counts foreign request producers, rename member
>
> v3:
>  - rename ->mem_event_bit to ->bit
>  - remove me_ from new VPF_ defines
>
> v2:
>  - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise
> the
>    vcpu will not wake_up after a wait_event because the pause_count was
>    increased twice. Fixes guest hangs.
>  - update free space check in _mem_event_put_request()
>  - simplify mem_event_put_request()
>
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -4172,8 +4172,8 @@ static int hvm_memory_event_traps(long p
>      if ( (p & HVMPME_onchangeonly) && (value == old) )
>          return 1;
>
> -    rc = mem_event_check_ring(d, &d->mem_event->access);
> -    if ( rc )
> +    rc = mem_event_claim_slot(d, &d->mem_event->access);
> +    if ( rc < 0 )
>          return rc;
>
>      memset(&req, 0, sizeof(req));
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -23,6 +23,7 @@
>
>  #include <asm/domain.h>
>  #include <xen/event.h>
> +#include <xen/wait.h>
>  #include <asm/p2m.h>
>  #include <asm/mem_event.h>
>  #include <asm/mem_paging.h>
> @@ -41,6 +42,7 @@ static int mem_event_enable(
>      struct domain *d,
>      xen_domctl_mem_event_op_t *mec,
>      struct mem_event_domain *med,
> +    int pause_flag,
>      xen_event_channel_notification_t notification_fn)
>  {
>      int rc;
> @@ -110,8 +112,12 @@ static int mem_event_enable(
>
>      mem_event_ring_lock_init(med);
>
> -    /* Wake any VCPUs paused for memory events */
> -    mem_event_unpause_vcpus(d);
> +    med->pause_flag = pause_flag;
> +
> +    init_waitqueue_head(&med->wq);
> +
> +    /* Wake any VCPUs waiting for the ring to appear */
> +    mem_event_wake_waiters(d, med);
>
>      return 0;
>
> @@ -127,6 +133,9 @@ static int mem_event_enable(
>
>  static int mem_event_disable(struct mem_event_domain *med)
>  {
> +    if (!list_empty(&med->wq.list))
> +        return -EBUSY;
> +
>      unmap_domain_page(med->ring_page);
>      med->ring_page = NULL;
>
> @@ -136,13 +145,24 @@ static int mem_event_disable(struct mem_
>      return 0;
>  }
>
> -void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req)
> +static int _mem_event_put_request(struct domain *d,
> +                                  struct mem_event_domain *med,
> +                                  mem_event_request_t *req)
>  {
>      mem_event_front_ring_t *front_ring;
> +    int free_req, claimed_req;
>      RING_IDX req_prod;
>
>      mem_event_ring_lock(med);
>
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    /* Foreign requests must succeed because their vcpus can not sleep */
> +    claimed_req = med->foreign_producers;
> +    if ( !free_req || ( current->domain == d && free_req <=
claimed_req )
> ) {
> +        mem_event_ring_unlock(med);
> +        return 0;
> +    }
> +
>      front_ring = &med->front_ring;
>      req_prod = front_ring->req_prod_pvt;
>
> @@ -156,14 +176,35 @@ void mem_event_put_request(struct domain
>      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
>      req_prod++;
>
> +    /* Update accounting */
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
> +
>      /* Update ring */
> -    med->req_producers--;
>      front_ring->req_prod_pvt = req_prod;
>      RING_PUSH_REQUESTS(front_ring);
>
>      mem_event_ring_unlock(med);
>
>      notify_via_xen_event_channel(d, med->xen_port);
> +
> +    return 1;
> +}
> +
> +void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
> +                           mem_event_request_t *req)
> +{
> +    /* Go to sleep if request came from guest */
> +    if (current->domain == d) {
> +        wait_event(med->wq, _mem_event_put_request(d, med, req));
> +        return;
> +    }
> +    /* Ring was full anyway, unable to sleep in non-guest context */
> +    if (!_mem_event_put_request(d, med, req))
> +        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
> +                req->type, req->flags, (unsigned long)req->gfn);
>  }
>
>  int mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp)
> @@ -195,32 +236,97 @@ int mem_event_get_response(struct mem_ev
>      return 1;
>  }
>
> -void mem_event_unpause_vcpus(struct domain *d)
> +/**
> + * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
> + * ring. Only as many as can place another request in the ring will
> + * resume execution.
> + */
> +void mem_event_wake_requesters(struct mem_event_domain *med)
> +{
> +    int free_req;
> +
> +    mem_event_ring_lock(med);
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    free_req -= med->foreign_producers;
> +    mem_event_ring_unlock(med);
> +
> +    if ( free_req )
> +        wake_up_nr(&med->wq, free_req);
> +}
> +
> +/**
> + * mem_event_wake_waiters - Wake all vcpus waiting for the ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
> + * become available.
> + */
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med)
>  {
>      struct vcpu *v;
>
>      for_each_vcpu ( d, v )
> -        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
> +        if ( test_and_clear_bit(med->pause_flag,
&v->pause_flags) )
>              vcpu_wake(v);
>  }
>
> -void mem_event_mark_and_pause(struct vcpu *v)
> +/**
> + * mem_event_mark_and_sleep - Put vcpu to sleep
> + * @v: guest vcpu
> + * @med: mem_event ring
> + *
> + * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
> + * The vcpu will resume execution in mem_event_wake_waiters().
> + */
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med)
>  {
> -    set_bit(_VPF_mem_event, &v->pause_flags);
> +    set_bit(med->pause_flag, &v->pause_flags);
>      vcpu_sleep_nosync(v);
>  }
>
> -void mem_event_put_req_producers(struct mem_event_domain *med)
> +/**
> + * mem_event_release_slot - Release a claimed slot
> + * @med: mem_event ring
> + *
> + * mem_event_release_slot() releases a claimed slot in the mem_event
> ring.
> + */
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med)
>  {
>      mem_event_ring_lock(med);
> -    med->req_producers--;
> +    if ( current->domain == d )
> +        med->target_producers--;
> +    else
> +        med->foreign_producers--;
>      mem_event_ring_unlock(med);
>  }
>
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
> +/**
> + * mem_event_claim_slot - Check state of a mem_event ring
> + * @d: guest domain
> + * @med: mem_event ring
> + *
> + * Return codes: < 0: the ring is not yet configured
> + *                 0: the ring has some room
> + *               > 0: the ring is full
> + *
> + * mem_event_claim_slot() checks the state of the given mem_event ring.
> + * If the current vcpu belongs to the guest domain, the function assumes
> that
> + * mem_event_put_request() will sleep until the ring has room again.
> + * A guest can always place at least one request.
> + *
> + * If the current vcpu does not belong to the target domain the caller
> must try
> + * again until there is room. A slot is claimed and the caller can place
> a
> + * request. If the caller does not need to send a request, the claimed
> slot has
> + * to be released with mem_event_release_slot().
> + */
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
>  {
> -    struct vcpu *curr = current;
> -    int free_requests;
> +    int free_req;
>      int ring_full = 1;
>
>      if ( !med->ring_page )
> @@ -228,16 +334,17 @@ int mem_event_check_ring(struct domain *
>
>      mem_event_ring_lock(med);
>
> -    free_requests = RING_FREE_REQUESTS(&med->front_ring);
> -    if ( med->req_producers < free_requests )
> +    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +
> +    if ( current->domain == d ) {
> +        med->target_producers++;
> +        ring_full = 0;
> +    } else if ( med->foreign_producers + med->target_producers + 1
<
> free_req )
>      {
> -        med->req_producers++;
> +        med->foreign_producers++;
>          ring_full = 0;
>      }
>
> -    if ( ring_full && (curr->domain == d) )
> -        mem_event_mark_and_pause(curr);
> -
>      mem_event_ring_unlock(med);
>
>      return ring_full;
> @@ -316,7 +423,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( p2m->pod.entry_count )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med, mem_paging_notification);
> +            rc = mem_event_enable(d, mec, med, _VPF_mem_paging,
> mem_paging_notification);
>          }
>          break;
>
> @@ -355,7 +462,7 @@ int mem_event_domctl(struct domain *d, x
>              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>                  break;
>
> -            rc = mem_event_enable(d, mec, med, mem_access_notification);
> +            rc = mem_event_enable(d, mec, med, _VPF_mem_access,
> mem_access_notification);
>          }
>          break;
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/mem_sharing.c
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
>  #endif
>
>
> -static struct page_info* mem_sharing_alloc_page(struct domain *d,
> -                                                unsigned long gfn)
> +static void mem_sharing_notify_helper(struct domain *d, unsigned long
> gfn)
>  {
> -    struct page_info* page;
>      struct vcpu *v = current;
> -    mem_event_request_t req;
> -
> -    page = alloc_domheap_page(d, 0);
> -    if(page != NULL) return page;
> -
> -    memset(&req, 0, sizeof(req));
> -    req.type = MEM_EVENT_TYPE_SHARED;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
>
>      if ( v->domain != d )
>      {
> @@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
>          gdprintk(XENLOG_ERR,
>                   "Failed alloc on unshare path for foreign (%d)
> lookup\n",
>                   d->domain_id);
> -        return page;
> +        return;
>      }
>
> -    vcpu_pause_nosync(v);
> -    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
> +    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
> +        return;
>
> -    if(mem_event_check_ring(d, &d->mem_event->share)) return
page;
> -
> +    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
>      req.gfn = gfn;
>      req.p2mt = p2m_ram_shared;
>      req.vcpu_id = v->vcpu_id;
>      mem_event_put_request(d, &d->mem_event->share, &req);
> -
> -    return page;
> +    vcpu_pause_nosync(v);
>  }
>
>  unsigned int mem_sharing_get_nr_saved_mfns(void)
> @@ -656,7 +646,7 @@ gfn_found:
>      if(ret == 0) goto private_page_found;
>
>      old_page = page;
> -    page = mem_sharing_alloc_page(d, gfn);
> +    page = alloc_domheap_page(d, 0);
>      if(!page)
>      {
>          /* We''ve failed to obtain memory for private page. Need
to re-add
> the
> @@ -664,6 +654,7 @@ gfn_found:
>          list_add(&gfn_info->list, &hash_entry->gfns);
>          put_gfn(d, gfn);
>          shr_unlock();
> +        mem_sharing_notify_helper(d, gfn);
>          return -ENOMEM;
>      }
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
>   */
>  void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
>  {
> -    struct vcpu *v = current;
> -    mem_event_request_t req;
> +    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn
> };
>
> -    /* Check that there''s space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
> -    {
> -        /* Send release notification to pager */
> -        memset(&req, 0, sizeof(req));
> -        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
> -        req.gfn = gfn;
> -        req.vcpu_id = v->vcpu_id;
> +    /* Send release notification to pager */
> +    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
>
> -        mem_event_put_request(d, &d->mem_event->paging,
&req);
> -    }
> +    mem_event_put_request(d, &d->mem_event->paging, &req);
>  }
>
>  /**
> @@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
>      struct p2m_domain *p2m = p2m_get_hostp2m(d);
>
>      /* Check that there''s space on the ring for this request */
> -    if ( mem_event_check_ring(d, &d->mem_event->paging) )
> +    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
>          return;
>
>      memset(&req, 0, sizeof(req));
> @@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
>      else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged
)
>      {
>          /* gfn is already on its way back and vcpu is not paused */
> -        mem_event_put_req_producers(&d->mem_event->paging);
> +        mem_event_release_slot(d, &d->mem_event->paging);
>          return;
>      }
>
> @@ -1080,8 +1072,8 @@ void p2m_mem_paging_resume(struct domain
>              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>      }
>
> -    /* Unpause any domains that were paused because the ring was full */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->paging);
>  }
>
>  bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned
> long gla,
> @@ -1115,7 +1107,7 @@ bool_t p2m_mem_access_check(unsigned lon
>      p2m_unlock(p2m);
>
>      /* Otherwise, check if there is a memory event listener, and send the
> message along */
> -    res = mem_event_check_ring(d, &d->mem_event->access);
> +    res = mem_event_claim_slot(d, &d->mem_event->access);
>      if ( res < 0 )
>      {
>          /* No listener */
> @@ -1125,7 +1117,7 @@ bool_t p2m_mem_access_check(unsigned lon
>                     "Memory access permissions failure, no mem_event
> listener: pausing VCPU %d, dom %d\n",
>                     v->vcpu_id, d->domain_id);
>
> -            mem_event_mark_and_pause(v);
> +            mem_event_mark_and_sleep(v, &d->mem_event->access);
>          }
>          else
>          {
> @@ -1186,9 +1178,11 @@ void p2m_mem_access_resume(struct domain
>              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
>      }
>
> -    /* Unpause any domains that were paused because the ring was full or
> no listener
> -     * was available */
> -    mem_event_unpause_vcpus(d);
> +    /* Wake vcpus waiting for room in the ring */
> +    mem_event_wake_requesters(&d->mem_event->access);
> +
> +    /* Unpause all vcpus that were paused because no listener was
> available */
> +    mem_event_wake_waiters(d, &d->mem_event->access);
>  }
>
>
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/asm-x86/mem_event.h
> --- a/xen/include/asm-x86/mem_event.h
> +++ b/xen/include/asm-x86/mem_event.h
> @@ -24,13 +24,13 @@
>  #ifndef __MEM_EVENT_H__
>  #define __MEM_EVENT_H__
>
> -/* Pauses VCPU while marking pause flag for mem event */
> -void mem_event_mark_and_pause(struct vcpu *v);
> -int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
> -void mem_event_put_req_producers(struct mem_event_domain *med);
> +int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
> +void mem_event_release_slot(struct domain *d, struct mem_event_domain
> *med);
>  void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med, mem_event_request_t *req);
>  int mem_event_get_response(struct mem_event_domain *med,
> mem_event_response_t *rsp);
> -void mem_event_unpause_vcpus(struct domain *d);
> +void mem_event_wake_requesters(struct mem_event_domain *med);
> +void mem_event_wake_waiters(struct domain *d, struct mem_event_domain
> *med);
> +void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain
> *med);
>
>  int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
>                       XEN_GUEST_HANDLE(void) u_domctl);
> diff -r 1c58bb664d8d -r b25b2bcc2c6a xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -14,6 +14,7 @@
>  #include <xen/nodemask.h>
>  #include <xen/radix-tree.h>
>  #include <xen/multicall.h>
> +#include <xen/wait.h>
>  #include <public/xen.h>
>  #include <public/domctl.h>
>  #include <public/sysctl.h>
> @@ -183,7 +184,8 @@ struct mem_event_domain
>  {
>      /* ring lock */
>      spinlock_t ring_lock;
> -    unsigned int req_producers;
> +    unsigned short foreign_producers;
> +    unsigned short target_producers;
>      /* shared page */
>      mem_event_shared_page_t *shared_page;
>      /* shared ring page */
> @@ -192,6 +194,10 @@ struct mem_event_domain
>      mem_event_front_ring_t front_ring;
>      /* event channel port (vcpu0 only) */
>      int xen_port;
> +    /* mem_event bit for vcpu->pause_flags */
> +    int pause_flag;
> +    /* list of vcpus waiting for room in the ring */
> +    struct waitqueue_head wq;
>  };
>
>  struct mem_event_per_domain
> @@ -615,9 +621,12 @@ static inline struct domain *next_domain
>   /* VCPU affinity has changed: migrating to a new CPU. */
>  #define _VPF_migrating       3
>  #define VPF_migrating        (1UL<<_VPF_migrating)
> - /* VCPU is blocked on memory-event ring. */
> -#define _VPF_mem_event       4
> -#define VPF_mem_event        (1UL<<_VPF_mem_event)
> + /* VCPU is blocked due to missing mem_paging ring. */
> +#define _VPF_mem_paging      4
> +#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
> + /* VCPU is blocked due to missing mem_access ring. */
> +#define _VPF_mem_access      5
> +#define VPF_mem_access       (1UL<<_VPF_mem_access)
>
>  static inline int vcpu_runnable(struct vcpu *v)
>  {
>
>
>

Olaf Hering

2011-Dec-13 13:40 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

On Fri, Dec 09, Andres Lagar-Cavilla wrote:
> Olaf,
> Tim pointed out we need both solutions to ring management in the
> hypervisor. With our patch ("Improve ring management for memory
events. Do
> not lose guest events."), we can handle the common case quickly,
without
> preempting VMs. With your patch, we can handle extreme situations of ring
> congestion with the big hammer called wait queue.
With my patch the requests get processed as they come in, both foreign
and target requests get handled equally. There is no special accounting.

A few questions about your requirements:
- Is the goal is that each guest vcpu can always put at least one request?
- How many requests should foreign vcpus place in the ring if the guest
  has more vcpus than available slots in the ring? Just a single one so
  that foreigners can also make some progress?
- Should access and paging have the same rules for accounting?

Olaf

Tim Deegan

2011-Dec-15 12:43 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

Hi, 

At 20:23 +0100 on 09 Dec (1323462235), Olaf Hering
wrote:> mem_event: use wait queue when ring is full
> 
> This change is based on an idea/patch from Adin Scannell.
> 
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
> 
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
> 
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
[...] > Signed-off-by: Olaf Hering <olaf@aepfle.de>
This patch is OK by me as it stands, so
Acked-by: Tim Deegan <tim@xen.org>

but I''ll wait for an explicit ack from Andres or Adin before applying
it. 

Cheers,

Tim.

Olaf Hering

2011-Dec-15 13:15 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

On Thu, Dec 15, Tim Deegan wrote:
> This patch is OK by me as it stands, so
> Acked-by: Tim Deegan <tim@xen.org>
> 
> but I''ll wait for an explicit ack from Andres or Adin before
applying
> it. 
Thanks.

I will add some sort of accounting as it is done in the version from
Andres, so that each vcpu can place at least one request.
Hopefully I will send v7 of my change out today.

Olaf

Andres Lagar-Cavilla

2011-Dec-15 14:56 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

> Date: Tue, 13 Dec 2011 14:40:16 +0100
> From: Olaf Hering <olaf@aepfle.de>
> To: Andres Lagar-Cavilla <andres@lagarcavilla.org>
> Cc: xen-devel@lists.xensource.com, tim@xen.org
> Subject: Re: [Xen-devel] [PATCH] mem_event: use wait queue when ring
> 	is full
> Message-ID: <20111213134016.GA20700@aepfle.de>
> Content-Type: text/plain; charset=utf-8
>
> On Fri, Dec 09, Andres Lagar-Cavilla wrote:
>
>> Olaf,
>> Tim pointed out we need both solutions to ring management in the
>> hypervisor. With our patch ("Improve ring management for memory
events.
>> Do
>> not lose guest events."), we can handle the common case quickly,
without
>> preempting VMs. With your patch, we can handle extreme situations of
>> ring
>> congestion with the big hammer called wait queue.
>
> With my patch the requests get processed as they come in, both foreign
> and target requests get handled equally. There is no special accounting.
>
> A few questions about your requirements:
> - Is the goal is that each guest vcpu can always put at least one request?Yes
> - How many requests should foreign vcpus place in the ring if the guest
>   has more vcpus than available slots in the ring? Just a single one so
>   that foreigners can also make some progress?The idea is that foreign vcpus can place as many events as they want as
long as each guest vcpu that is not blocked on a men event has room to
send one men event. When we reach that border condition, no more foreign
men events.

The case for which there are way too many guest vcpus needs to be handled,
either by capping the max number of vcpus for domains using a men event,
or by growing the ring size.
> - Should access and paging have the same rules for accounting?Absolutely.

And both should use wait queues in extreme cases in which a guest vcpu
with a single action generates multiple memory events. Given that when we
hit a border condition the guest vcpu will place one event and be flagged
VPF_mem_event_paused (or whatever that flag is named), if a guest vcpu
generates another event when flagged, that''s our queue for putting the
vcpu on a wait queue.

Thanks
Andres>
> Olaf
>
>
>

Olaf Hering

2011-Dec-16 16:40 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

On Thu, Dec 15, Andres Lagar-Cavilla wrote:
> > - How many requests should foreign vcpus place in the ring if the
guest
> >   has more vcpus than available slots in the ring? Just a single one
so
> >   that foreigners can also make some progress?
> The idea is that foreign vcpus can place as many events as they want as
> long as each guest vcpu that is not blocked on a men event has room to
> send one men event. When we reach that border condition, no more foreign
> men events.
> 
> The case for which there are way too many guest vcpus needs to be handled,
> either by capping the max number of vcpus for domains using a men event,
> or by growing the ring size.
Right now the ring is one page, so it can not hold more than 64 entries.
If that ever changes, the accounting can be adjusted.
> > - Should access and paging have the same rules for accounting?
> Absolutely.
> 
> And both should use wait queues in extreme cases in which a guest vcpu
> with a single action generates multiple memory events. Given that when we
> hit a border condition the guest vcpu will place one event and be flagged
> VPF_mem_event_paused (or whatever that flag is named), if a guest vcpu
> generates another event when flagged, that''s our queue for putting
the
> vcpu on a wait queue.
An extra flag is not needed.

Below is an incremental patch (on top of v6) which does some accounting.
Its just compile tested.

Olaf


diff -r 5d5d10e1568b xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -114,6 +114,19 @@ static int mem_event_enable(
 
     med->pause_flag = pause_flag;
 
+    /*
+     * Configure ring accounting:
+     * Each guest vcpu should be able to place at least one request.
+     * If there are more vcpus than available slots in the ring, not all vcpus
+     * can place requests in the ring anyway.  A minimum (arbitrary) number of
+     * foreign requests will be allowed in this case.
+     */
+    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
+        med->max_foreign = RING_SIZE(&med->front_ring) -
d->max_vcpus;
+    if ( med->max_foreign < 13 )
+        med->max_foreign = 13;
+    med->max_target = RING_SIZE(&med->front_ring) -
med->max_foreign;
+
     init_waitqueue_head(&med->wq);
 
     /* Wake any VCPUs waiting for the ring to appear */
@@ -147,23 +160,28 @@ static int mem_event_disable(struct mem_
 
 static int _mem_event_put_request(struct domain *d,
                                   struct mem_event_domain *med,
-                                  mem_event_request_t *req)
+                                  mem_event_request_t *req,
+                                  int *done)
 {
     mem_event_front_ring_t *front_ring;
-    int free_req, claimed_req;
+    int free_req, claimed_req, ret;
     RING_IDX req_prod;
 
+    if ( *done )
+        return 1;
+
     mem_event_ring_lock(med);
 
-    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    front_ring = &med->front_ring;
+
     /* Foreign requests must succeed because their vcpus can not sleep */
     claimed_req = med->foreign_producers;
+    free_req = RING_FREE_REQUESTS(front_ring);
     if ( !free_req || ( current->domain == d && free_req <=
claimed_req ) ) {
         mem_event_ring_unlock(med);
         return 0;
     }
 
-    front_ring = &med->front_ring;
     req_prod = front_ring->req_prod_pvt;
 
     if ( current->domain != d )
@@ -176,9 +194,18 @@ static int _mem_event_put_request(struct
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    ret = 1;
+    *done = 1;
+    free_req--;
+
     /* Update accounting */
     if ( current->domain == d )
+    {
         med->target_producers--;
+        /* Ring is full, no more requests from this vcpu, go to sleep */
+        if ( free_req < med->max_target )
+            ret = 0;
+    }
     else
         med->foreign_producers--;
 
@@ -190,19 +217,20 @@ static int _mem_event_put_request(struct
 
     notify_via_xen_event_channel(d, med->xen_port);
 
-    return 1;
+    return ret;
 }
 
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
                            mem_event_request_t *req)
 {
+    int done = 0;
     /* Go to sleep if request came from guest */
     if (current->domain == d) {
-        wait_event(med->wq, _mem_event_put_request(d, med, req));
+        wait_event(med->wq, _mem_event_put_request(d, med, req, &done));
         return;
     }
     /* Ring was full anyway, unable to sleep in non-guest context */
-    if (!_mem_event_put_request(d, med, req))
+    if (!_mem_event_put_request(d, med, req, &done))
         printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
d->domain_id,
                 req->type, req->flags, (unsigned long)req->gfn);
 }
@@ -341,7 +369,8 @@ int mem_event_claim_slot(struct domain *
         med->target_producers++;
         ring_full = 0;
     }
-    else if ( med->foreign_producers + med->target_producers + 1 <
free_req )
+    else if ( med->foreign_producers + med->target_producers <
free_req &&
+              med->foreign_producers < med->max_foreign )
     {
         med->foreign_producers++;
         ring_full = 0;
diff -r 5d5d10e1568b xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -184,8 +184,11 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned short foreign_producers;
-    unsigned short target_producers;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char max_foreign;
+    unsigned char target_producers;
+    unsigned char max_target;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */

Andres Lagar-Cavilla

2011-Dec-16 17:04 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

> On Thu, Dec 15, Andres Lagar-Cavilla wrote:
>
>> > - How many requests should foreign vcpus place in the ring if the
>> guest
>> >   has more vcpus than available slots in the ring? Just a single
one
>> so
>> >   that foreigners can also make some progress?
>> The idea is that foreign vcpus can place as many events as they want as
>> long as each guest vcpu that is not blocked on a men event has room to
>> send one men event. When we reach that border condition, no more
foreign
>> men events.
>>
>> The case for which there are way too many guest vcpus needs to be
>> handled,
>> either by capping the max number of vcpus for domains using a men
event,
>> or by growing the ring size.
>
> Right now the ring is one page, so it can not hold more than 64 entries.
> If that ever changes, the accounting can be adjusted.
>
>> > - Should access and paging have the same rules for accounting?
>> Absolutely.
>>
>> And both should use wait queues in extreme cases in which a guest vcpu
>> with a single action generates multiple memory events. Given that when
>> we
>> hit a border condition the guest vcpu will place one event and be
>> flagged
>> VPF_mem_event_paused (or whatever that flag is named), if a guest vcpu
>> generates another event when flagged, that''s our queue for
putting the
>> vcpu on a wait queue.
>
> An extra flag is not needed.Can you elaborate? Which flag is not needed? And why?
>
> Below is an incremental patch (on top of v6) which does some accounting.
> Its just compile tested.
>
> Olaf
>
>
> diff -r 5d5d10e1568b xen/arch/x86/mm/mem_event.c
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -114,6 +114,19 @@ static int mem_event_enable(
>
>      med->pause_flag = pause_flag;
>
> +    /*
> +     * Configure ring accounting:
> +     * Each guest vcpu should be able to place at least one request.
> +     * If there are more vcpus than available slots in the ring, not all
> vcpus
> +     * can place requests in the ring anyway.  A minimum (arbitrary)
> number of
> +     * foreign requests will be allowed in this case.
> +     */
> +    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
> +        med->max_foreign = RING_SIZE(&med->front_ring) -
d->max_vcpus;
> +    if ( med->max_foreign < 13 )
> +        med->max_foreign = 13;Magic number! Why?

More generally, does this patch apply on top of a previous patch?
What''s
the context here?

Thanks
Andres
> +    med->max_target = RING_SIZE(&med->front_ring) -
med->max_foreign;
> +
>      init_waitqueue_head(&med->wq);
>
>      /* Wake any VCPUs waiting for the ring to appear */
> @@ -147,23 +160,28 @@ static int mem_event_disable(struct mem_
>
>  static int _mem_event_put_request(struct domain *d,
>                                    struct mem_event_domain *med,
> -                                  mem_event_request_t *req)
> +                                  mem_event_request_t *req,
> +                                  int *done)
>  {
>      mem_event_front_ring_t *front_ring;
> -    int free_req, claimed_req;
> +    int free_req, claimed_req, ret;
>      RING_IDX req_prod;
>
> +    if ( *done )
> +        return 1;
> +
>      mem_event_ring_lock(med);
>
> -    free_req = RING_FREE_REQUESTS(&med->front_ring);
> +    front_ring = &med->front_ring;
> +
>      /* Foreign requests must succeed because their vcpus can not sleep */
>      claimed_req = med->foreign_producers;
> +    free_req = RING_FREE_REQUESTS(front_ring);
>      if ( !free_req || ( current->domain == d && free_req <=
claimed_req )
> ) {
>          mem_event_ring_unlock(med);
>          return 0;
>      }
>
> -    front_ring = &med->front_ring;
>      req_prod = front_ring->req_prod_pvt;
>
>      if ( current->domain != d )
> @@ -176,9 +194,18 @@ static int _mem_event_put_request(struct
>      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
>      req_prod++;
>
> +    ret = 1;
> +    *done = 1;
> +    free_req--;
> +
>      /* Update accounting */
>      if ( current->domain == d )
> +    {
>          med->target_producers--;
> +        /* Ring is full, no more requests from this vcpu, go to sleep */
> +        if ( free_req < med->max_target )
> +            ret = 0;
> +    }
>      else
>          med->foreign_producers--;
>
> @@ -190,19 +217,20 @@ static int _mem_event_put_request(struct
>
>      notify_via_xen_event_channel(d, med->xen_port);
>
> -    return 1;
> +    return ret;
>  }
>
>  void mem_event_put_request(struct domain *d, struct mem_event_domain
> *med,
>                             mem_event_request_t *req)
>  {
> +    int done = 0;
>      /* Go to sleep if request came from guest */
>      if (current->domain == d) {
> -        wait_event(med->wq, _mem_event_put_request(d, med, req));
> +        wait_event(med->wq, _mem_event_put_request(d, med, req,
&done));
>          return;
>      }
>      /* Ring was full anyway, unable to sleep in non-guest context */
> -    if (!_mem_event_put_request(d, med, req))
> +    if (!_mem_event_put_request(d, med, req, &done))
>          printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
> d->domain_id,
>                  req->type, req->flags, (unsigned long)req->gfn);
>  }
> @@ -341,7 +369,8 @@ int mem_event_claim_slot(struct domain *
>          med->target_producers++;
>          ring_full = 0;
>      }
> -    else if ( med->foreign_producers + med->target_producers + 1
<
> free_req )
> +    else if ( med->foreign_producers + med->target_producers <
free_req
> &&
> +              med->foreign_producers < med->max_foreign )
>      {
>          med->foreign_producers++;
>          ring_full = 0;
> diff -r 5d5d10e1568b xen/include/xen/sched.h
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -184,8 +184,11 @@ struct mem_event_domain
>  {
>      /* ring lock */
>      spinlock_t ring_lock;
> -    unsigned short foreign_producers;
> -    unsigned short target_producers;
> +    /* The ring has 64 entries */
> +    unsigned char foreign_producers;
> +    unsigned char max_foreign;
> +    unsigned char target_producers;
> +    unsigned char max_target;
>      /* shared page */
>      mem_event_shared_page_t *shared_page;
>      /* shared ring page */
>

Olaf Hering

2011-Dec-16 17:33 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

On Fri, Dec 16, Andres Lagar-Cavilla wrote:
> >> And both should use wait queues in extreme cases in which a guest
> >> vcpu with a single action generates multiple memory events. Given
> >> that when we hit a border condition the guest vcpu will place one
> >> event and be flagged VPF_mem_event_paused (or whatever that flag
is
> >> named), if a guest vcpu generates another event when flagged,
> >> that''s our queue for putting the vcpu on a wait queue.
> >
> > An extra flag is not needed.
> Can you elaborate? Which flag is not needed? And why?
The flag you mentioned in your earlier reply, VPF_mem_event_paused.
Since the vcpu is preempted, a new pause_flag will be no gain.
> > +    /*
> > +     * Configure ring accounting:
> > +     * Each guest vcpu should be able to place at least one request.
> > +     * If there are more vcpus than available slots in the ring, not
all vcpus
> > +     * can place requests in the ring anyway.  A minimum (arbitrary)
number of
> > +     * foreign requests will be allowed in this case.
> > +     */
> > +    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
> > +        med->max_foreign = RING_SIZE(&med->front_ring) -
d->max_vcpus;
> > +    if ( med->max_foreign < 13 )
> > +        med->max_foreign = 13;
> Magic number! Why?
Yes, an arbitrary number of slots for foreign requests.
Which amount is correct? 1? 5? 10?
1 is probably closer to the goal of ''let each vcpu put at least one
request''.
> More generally, does this patch apply on top of a previous patch?
What''s
> the context here?
As I said, its on top of v6 of my patch. I will send out the full patch
later, but I wont be able to actually test the newer version this year.

Olaf

Olaf Hering

2011-Dec-19 11:39 UTC

head link

[PATCH] mem_event: use wait queue when ring is full

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1324294734 -3600
# Node ID d910d738f31b4ee1a6b0727ac0a66cff4c8933b0
# Parent  03138a08366b895d79e143119d4c9c72833cdbcd
mem_event: use wait queue when ring is full

This change is based on an idea/patch from Adin Scannell.

If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.

A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.

This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.

v7:
  - add ring accounting so that each vcpu can place at least one request

v6:
  - take foreign requests into account before calling wake_up_nr()
  - call wake_up_nr() outside of ring lock
 - rename ->bit to ->pause_flag
 - update comment in mem_event_enable()

v5:
  - rename mem_event_check_ring() to mem_event_claim_slot()
  - rename mem_event_put_req_producers() to mem_event_release_slot()
  - add local/foreign request accounting
  - keep room for at least one guest request

v4:
 - fix off-by-one bug in _mem_event_put_request
 - add mem_event_wake_requesters() and use wake_up_nr()
 - rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
 - req_producers counts foreign request producers, rename member

v3:
 - rename ->mem_event_bit to ->bit
 - remove me_ from new VPF_ defines

v2:
 - p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
   vcpu will not wake_up after a wait_event because the pause_count was
   increased twice. Fixes guest hangs.
 - update free space check in _mem_event_put_request()
 - simplify mem_event_put_request()

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 03138a08366b -r d910d738f31b xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4172,8 +4172,8 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
         return rc;
     
     memset(&req, 0, sizeof(req));
diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
 
 #include <asm/domain.h>
 #include <xen/event.h>
+#include <xen/wait.h>
 #include <asm/p2m.h>
 #include <asm/mem_event.h>
 #include <asm/mem_paging.h>
@@ -41,6 +42,7 @@ static int mem_event_enable(
     struct domain *d,
     xen_domctl_mem_event_op_t *mec,
     struct mem_event_domain *med,
+    int pause_flag,
     xen_event_channel_notification_t notification_fn)
 {
     int rc;
@@ -110,8 +112,25 @@ static int mem_event_enable(
 
     mem_event_ring_lock_init(med);
 
-    /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    med->pause_flag = pause_flag;
+
+    /*
+     * Configure ring accounting:
+     * Each guest vcpu should be able to place at least one request.
+     * If there are more vcpus than available slots in the ring, not all vcpus
+     * can place requests in the ring anyway.  A minimum (arbitrary) number of
+     * foreign requests will be allowed in this case.
+     */
+    if ( d->max_vcpus < RING_SIZE(&med->front_ring) )
+        med->max_foreign = RING_SIZE(&med->front_ring) -
d->max_vcpus;
+    if ( med->max_foreign < 13 )
+        med->max_foreign = 13;
+    med->max_target = RING_SIZE(&med->front_ring) -
med->max_foreign;
+
+    init_waitqueue_head(&med->wq);
+
+    /* Wake any VCPUs waiting for the ring to appear */
+    mem_event_wake_waiters(d, med);
 
     return 0;
 
@@ -127,6 +146,9 @@ static int mem_event_enable(
 
 static int mem_event_disable(struct mem_event_domain *med)
 {
+    if (!list_empty(&med->wq.list))
+        return -EBUSY;
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -136,14 +158,30 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+                                  struct mem_event_domain *med,
+                                  mem_event_request_t *req,
+                                  int *done)
 {
     mem_event_front_ring_t *front_ring;
+    int free_req, claimed_req, ret;
     RING_IDX req_prod;
 
+    if ( *done )
+        return 1;
+
     mem_event_ring_lock(med);
 
     front_ring = &med->front_ring;
+
+    /* Foreign requests must succeed because their vcpus can not sleep */
+    claimed_req = med->foreign_producers;
+    free_req = RING_FREE_REQUESTS(front_ring);
+    if ( !free_req || ( current->domain == d && free_req <=
claimed_req ) ) {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     req_prod = front_ring->req_prod_pvt;
 
     if ( current->domain != d )
@@ -156,14 +194,45 @@ void mem_event_put_request(struct domain
     memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
     req_prod++;
 
+    ret = 1;
+    *done = 1;
+    free_req--;
+
+    /* Update accounting */
+    if ( current->domain == d )
+    {
+        med->target_producers--;
+        /* Ring is full, no more requests from this vcpu, go to sleep */
+        if ( free_req < med->max_target )
+            ret = 0;
+    }
+    else
+        med->foreign_producers--;
+
     /* Update ring */
-    med->req_producers--;
     front_ring->req_prod_pvt = req_prod;
     RING_PUSH_REQUESTS(front_ring);
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return ret;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    int done = 0;
+    /* Go to sleep if request came from guest */
+    if (current->domain == d) {
+        wait_event(med->wq, _mem_event_put_request(d, med, req, &done));
+        return;
+    }
+    /* Ring was full anyway, unable to sleep in non-guest context */
+    if (!_mem_event_put_request(d, med, req, &done))
+        printk("Failed to put memreq: d %u t %x f %x gfn %lx\n",
d->domain_id,
+                req->type, req->flags, (unsigned long)req->gfn);
 }
 
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp)
@@ -195,32 +264,97 @@ int mem_event_get_response(struct mem_ev
     return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+    int free_req;
+
+    mem_event_ring_lock(med);
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+    free_req -= med->foreign_producers;
+    mem_event_ring_unlock(med);
+
+    if ( free_req )
+        wake_up_nr(&med->wq, free_req);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
 {
     struct vcpu *v;
 
     for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+        if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
             vcpu_wake(v);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
+    set_bit(med->pause_flag, &v->pause_flags);
     vcpu_sleep_nosync(v);
 }
 
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
 {
     mem_event_ring_lock(med);
-    med->req_producers--;
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
     mem_event_ring_unlock(med);
 }
 
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ *                 0: the ring has some room
+ *               > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
+    int free_req;
     int ring_full = 1;
 
     if ( !med->ring_page )
@@ -228,15 +362,19 @@ int mem_event_check_ring(struct domain *
 
     mem_event_ring_lock(med);
 
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+    if ( current->domain == d )
     {
-        med->req_producers++;
+        med->target_producers++;
         ring_full = 0;
     }
-
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
+    else if ( med->foreign_producers + med->target_producers <
free_req &&
+              med->foreign_producers < med->max_foreign )
+    {
+        med->foreign_producers++;
+        ring_full = 0;
+    }
 
     mem_event_ring_unlock(med);
 
@@ -316,7 +454,7 @@ int mem_event_domctl(struct domain *d, x
             if ( p2m->pod.entry_count )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_paging_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_paging,
mem_paging_notification);
         }
         break;
 
@@ -355,7 +493,7 @@ int mem_event_domctl(struct domain *d, x
             if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                 break;
 
-            rc = mem_event_enable(d, mec, med, mem_access_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_access,
mem_access_notification);
         }
         break;
 
diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
 #endif
 
 
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
 {
-    struct page_info* page;
     struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
 
     if ( v->domain != d )
     {
@@ -275,20 +267,18 @@ static struct page_info* mem_sharing_all
         gdprintk(XENLOG_ERR, 
                  "Failed alloc on unshare path for foreign (%d)
lookup\n",
                  d->domain_id);
-        return page;
+        return;
     }
 
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+        return;
 
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
-
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
     mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
+    vcpu_pause_nosync(v);
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -656,7 +646,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0);
     if(!page) 
     {
         /* We''ve failed to obtain memory for private page. Need to
re-add the
@@ -664,6 +654,7 @@ gfn_found:
         list_add(&gfn_info->list, &hash_entry->gfns);
         put_gfn(d, gfn);
         shr_unlock();
+        mem_sharing_notify_helper(d, gfn);
         return -ENOMEM;
     }
 
diff -r 03138a08366b -r d910d738f31b xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,12 @@ int p2m_mem_paging_evict(struct domain *
  */
 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
-    struct vcpu *v = current;
-    mem_event_request_t req;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
 
-    /* Check that there''s space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* Send release notification to pager */
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
 
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    mem_event_put_request(d, &d->mem_event->paging, &req);
 }
 
 /**
@@ -908,7 +900,7 @@ void p2m_mem_paging_populate(struct doma
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there''s space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    if ( mem_event_claim_slot(d, &d->mem_event->paging) )
         return;
 
     memset(&req, 0, sizeof(req));
@@ -938,7 +930,7 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_release_slot(d, &d->mem_event->paging);
         return;
     }
 
@@ -1080,8 +1072,8 @@ void p2m_mem_paging_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->paging);
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long
gla,
@@ -1115,7 +1107,7 @@ bool_t p2m_mem_access_check(unsigned lon
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the
message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
+    res = mem_event_claim_slot(d, &d->mem_event->access);
     if ( res < 0 ) 
     {
         /* No listener */
@@ -1125,7 +1117,7 @@ bool_t p2m_mem_access_check(unsigned lon
                    "Memory access permissions failure, no mem_event
listener: pausing VCPU %d, dom %d\n",
                    v->vcpu_id, d->domain_id);
 
-            mem_event_mark_and_pause(v);
+            mem_event_mark_and_sleep(v, &d->mem_event->access);
         }
         else
         {
@@ -1186,9 +1178,11 @@ void p2m_mem_access_resume(struct domain
             vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause any domains that were paused because the ring was full or no
listener
-     * was available */
-    mem_event_unpause_vcpus(d);
+    /* Wake vcpus waiting for room in the ring */
+    mem_event_wake_requesters(&d->mem_event->access);
+
+    /* Unpause all vcpus that were paused because no listener was available */
+    mem_event_wake_waiters(d, &d->mem_event->access);
 }
 
 
diff -r 03138a08366b -r d910d738f31b xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
 void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
mem_event_request_t *req);
 int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t
*rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r 03138a08366b -r d910d738f31b xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -14,6 +14,7 @@
 #include <xen/nodemask.h>
 #include <xen/radix-tree.h>
 #include <xen/multicall.h>
+#include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
 #include <public/sysctl.h>
@@ -183,7 +184,11 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char max_foreign;
+    unsigned char target_producers;
+    unsigned char max_target;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
@@ -192,6 +197,10 @@ struct mem_event_domain
     mem_event_front_ring_t front_ring;
     /* event channel port (vcpu0 only) */
     int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
 };
 
 struct mem_event_per_domain
@@ -615,9 +624,12 @@ static inline struct domain *next_domain
  /* VCPU affinity has changed: migrating to a new CPU. */
 #define _VPF_migrating       3
 #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
 
 static inline int vcpu_runnable(struct vcpu *v)
 {

Tim Deegan

2011-Dec-22 11:27 UTC

head link

Re: [PATCH] mem_event: use wait queue when ring is full

At 12:39 +0100 on 19 Dec (1324298386), Olaf Hering
wrote:> mem_event: use wait queue when ring is full
> 
> This change is based on an idea/patch from Adin Scannell.
> 
> If the ring is full, put the current vcpu to sleep if it belongs to the
> target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
> will take the number of free slots into account.
> 
> A request from foreign domain has to succeed once a slot was claimed
> because such vcpus can not sleep.
> 
> This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
> full ring will lead to harmless inconsistency in the pager.
Thanks.  As before, I''m happy to apply this when Adin or Andres acks
it.

Cheers,

Tim.

Xen devel - Dec 2011 - [PATCH] mem_event: use wait queue when ring is full

[PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

[PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full

[PATCH] mem_event: use wait queue when ring is full

Re: [PATCH] mem_event: use wait queue when ring is full