After getting a report of 3.2.3''s xenmon crashing Xen (as it turned out
this was because c/s 17000 was backported to that tree without also
applying c/s 17515), I figured that the hypervisor shouldn''t rely on
any
specific state of the actual trace buffer (as it is shared writable with
Dom0)
I chose to use ''volatile'' qualifiers rather than sprinkling
around
barriers - this can certainly be changed if considered unacceptable.
To make clear what purpose specific variables have and/or where they
got loaded from, the patch also changes the type of some of them to
be explicitly u32/s32, and removes pointless assertions (like checking
an unsigned variable to be >= 0).
I also took the prototype adjustment of __trace_var() as an opportunity
to simplify the TRACE_xD() macros. Similar simplification could be done
on the (quite numerous) direct callers of the function.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- 2010-06-15.orig/xen/common/trace.c	2010-06-29 17:04:45.000000000 +0200
+++ 2010-06-15/xen/common/trace.c	2010-06-29 17:05:09.000000000 +0200
@@ -52,14 +52,14 @@ static struct t_info *t_info;
 #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
 /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
 #define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) /
sizeof(uint32_t))
-static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
+static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
 
 /* Number of records lost due to per-CPU trace buffer being full. */
 static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -145,7 +145,7 @@ static int alloc_trace_bufs(void)
 
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
 
-        buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+        per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
         buf->cons = buf->prod = 0;
         per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
 
@@ -196,6 +196,7 @@ out_dealloc:
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
         if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
         {
+            per_cpu(t_bufs, cpu) = NULL;
             ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
             free_xenheap_pages(rawbuf, order);
         }
@@ -410,19 +411,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
     return rc;
 }
 
-static inline int calc_rec_size(int cycles, int extra) 
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
 {
-    int rec_size;
-    rec_size = 4;
+    unsigned int rec_size = 4;
+
     if ( cycles )
         rec_size += 8;
     rec_size += extra;
     return rec_size;
 }
 
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+    if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+         unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+    {
+        tb_init_done = 0;
+        printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons
(%08x)\n",
+               smp_processor_id(), prod, cons);
+        return 1;
+    }
+    return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
 {
-    int x = buf->prod - buf->cons;
+    u32 prod = buf->prod, cons = buf->cons;
+    s32 x = prod - cons;
+
+    if ( bogus(prod, cons) )
+        return data_size;
+
     if ( x < 0 )
         x += 2*data_size;
 
@@ -432,9 +451,14 @@ static inline int calc_unconsumed_bytes(
     return x;
 }
 
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
+static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
 {
-    int x = data_size - buf->prod;
+    u32 prod = buf->prod;
+    s32 x = data_size - prod;
+
+    if ( bogus(prod, buf->cons) )
+        return 0;
+
     if ( x <= 0 )
         x += data_size;
 
@@ -444,35 +468,37 @@ static inline int calc_bytes_to_wrap(str
     return x;
 }
 
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
 {
     return data_size - calc_unconsumed_bytes(buf);
 }
 
-static inline struct t_rec *
-next_record(struct t_buf *buf)
+static inline struct t_rec *next_record(const volatile struct t_buf *buf)
 {
-    int x = buf->prod;
+    u32 x = buf->prod;
+
+    if ( !tb_init_done || bogus(x, buf->cons) )
+        return NULL;
+
     if ( x >= data_size )
         x -= data_size;
 
-    ASSERT(x >= 0);
     ASSERT(x < data_size);
 
     return (struct t_rec *)&this_cpu(t_data)[x];
 }
 
-static inline int __insert_record(struct t_buf *buf,
-                                  unsigned long event,
-                                  int extra,
-                                  int cycles,
-                                  int rec_size,
-                                  unsigned char *extra_data)
+static inline void __insert_record(volatile struct t_buf *buf,
+                                   unsigned long event,
+                                   unsigned int extra,
+                                   bool_t cycles,
+                                   unsigned int rec_size,
+                                   const void *extra_data)
 {
     struct t_rec *rec;
     unsigned char *dst;
-    unsigned long extra_word = extra/sizeof(u32);
-    int local_rec_size = calc_rec_size(cycles, extra);
+    unsigned int extra_word = extra / sizeof(u32);
+    unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
 
     BUG_ON(local_rec_size != rec_size);
@@ -488,11 +514,13 @@ static inline int __insert_record(struct
             printk(XENLOG_WARNING
                    "%s: avail=%u (size=%08x prod=%08x cons=%08x)
rec=%u\n",
                    __func__, next, data_size, buf->prod, buf->cons,
rec_size);
-        return 0;
+        return;
     }
     rmb();
 
     rec = next_record(buf);
+    if ( !rec )
+        return;
     rec->event = event;
     rec->extra_u32 = extra_word;
     dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -509,21 +537,22 @@ static inline int __insert_record(struct
 
     wmb();
 
-    next = buf->prod + rec_size;
+    next = buf->prod;
+    if ( bogus(next, buf->cons) )
+        return;
+    next += rec_size;
     if ( next >= 2*data_size )
         next -= 2*data_size;
-    ASSERT(next >= 0);
     ASSERT(next < 2*data_size);
     buf->prod = next;
-
-    return rec_size;
 }
 
-static inline int insert_wrap_record(struct t_buf *buf, int size)
+static inline void insert_wrap_record(volatile struct t_buf *buf,
+                                      unsigned int size)
 {
-    int space_left = calc_bytes_to_wrap(buf);
-    unsigned long extra_space = space_left - sizeof(u32);
-    int cycles = 0;
+    u32 space_left = calc_bytes_to_wrap(buf);
+    unsigned int extra_space = space_left - sizeof(u32);
+    bool_t cycles = 0;
 
     BUG_ON(space_left > size);
 
@@ -535,17 +564,13 @@ static inline int insert_wrap_record(str
         ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
     }
 
-    return __insert_record(buf,
-                    TRC_TRACE_WRAP_BUFFER,
-                    extra_space,
-                    cycles,
-                    space_left,
-                    NULL);
+    __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+                    space_left, NULL);
 }
 
 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
 
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(volatile struct t_buf *buf)
 {
     struct {
         u32 lost_records;
@@ -560,12 +585,8 @@ static inline int insert_lost_records(st
 
     this_cpu(lost_records) = 0;
 
-    return __insert_record(buf,
-                           TRC_LOST_RECORDS,
-                           sizeof(ed),
-                           1 /* cycles */,
-                           LOST_REC_SIZE,
-                           (unsigned char *)&ed);
+    __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+                    LOST_REC_SIZE, &ed);
 }
 
 /*
@@ -587,13 +608,15 @@ static DECLARE_TASKLET(trace_notify_dom0
  * failure, otherwise 0.  Failure occurs only if the trace buffers are not yet
  * initialised.
  */
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+                 const void *extra_data)
 {
-    struct t_buf *buf;
-    unsigned long flags, bytes_to_tail, bytes_to_wrap;
-    int rec_size, total_size;
-    int extra_word;
-    int started_below_highwater = 0;
+    volatile struct t_buf *buf;
+    unsigned long flags;
+    u32 bytes_to_tail, bytes_to_wrap;
+    unsigned int rec_size, total_size;
+    unsigned int extra_word;
+    bool_t started_below_highwater;
 
     if( !tb_init_done )
         return;
@@ -626,13 +649,11 @@ void __trace_var(u32 event, int cycles, 
 
     /* Read tb_init_done /before/ t_bufs. */
     rmb();
-
-    spin_lock_irqsave(&this_cpu(t_lock), flags);
-
     buf = this_cpu(t_bufs);
-
     if ( unlikely(!buf) )
-        goto unlock;
+        return;
+
+    spin_lock_irqsave(&this_cpu(t_lock), flags);
 
     started_below_highwater = (calc_unconsumed_bytes(buf) <
t_buf_highwater);
 
--- 2010-06-15.orig/xen/include/xen/trace.h	2010-06-29 16:53:25.000000000 +0200
+++ 2010-06-15/xen/include/xen/trace.h	2010-06-25 12:08:36.000000000 +0200
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
 
 int trace_will_trace_event(u32 event);
 
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
 
 static inline void trace_var(u32 event, int cycles, int extra,
                                unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event, 
         {                                                       \
             u32 _d[1];                                          \
             _d[0] = d1;                                         \
-            __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event, 
             u32 _d[2];                                          \
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event, 
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event, 
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event, 
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
 
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event, 
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
             _d[5] = d6;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
 
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
George Dunlap
2010-Jun-30  16:58 UTC
[Xen-devel] Re: [PATCH 5/6] trace: fix security issues
Most of the patch, other than the volatiles, looks good; thanks for doing this. Can you explain exactly what it is you''re trying to accomplish by making things volatile? What kinds of failure modes are you expecting to protect against? Glancing through the code, I can''t really see that making things volatile will make much of a difference. Further comments inline below. On Tue, Jun 29, 2010 at 4:37 PM, Jan Beulich <JBeulich@novell.com> wrote:> --- 2010-06-15.orig/xen/common/trace.c 2010-06-29 17:04:45.000000000 +0200 > +++ 2010-06-15/xen/common/trace.c 2010-06-29 17:05:09.000000000 +0200[snip]> @@ -626,13 +649,11 @@ void __trace_var(u32 event, int cycles, > > /* Read tb_init_done /before/ t_bufs. */ > rmb(); > - > - spin_lock_irqsave(&this_cpu(t_lock), flags); > - > buf = this_cpu(t_bufs); > - > if ( unlikely(!buf) ) > - goto unlock; > + return; > + > + spin_lock_irqsave(&this_cpu(t_lock), flags);This isn''t any good: the whole point of this lock is to keep t_bufs from disappearing under our feet.> > started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); > > --- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-29 16:53:25.000000000 +0200 > +++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200 > @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op > > int trace_will_trace_event(u32 event); > > -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data); > +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *); > > static inline void trace_var(u32 event, int cycles, int extra, > unsigned char *extra_data) > @@ -57,7 +57,7 @@ static inline void trace_var(u32 event, > { \ > u32 _d[1]; \ > _d[0] = d1; \ > - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -68,7 +68,7 @@ static inline void trace_var(u32 event, > u32 _d[2]; \ > _d[0] = d1; \ > _d[1] = d2; \ > - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -80,7 +80,7 @@ static inline void trace_var(u32 event, > _d[0] = d1; \ > _d[1] = d2; \ > _d[2] = d3; \ > - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -93,7 +93,7 @@ static inline void trace_var(u32 event, > _d[1] = d2; \ > _d[2] = d3; \ > _d[3] = d4; \ > - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -107,7 +107,7 @@ static inline void trace_var(u32 event, > _d[2] = d3; \ > _d[3] = d4; \ > _d[4] = d5; \ > - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -122,7 +122,7 @@ static inline void trace_var(u32 event, > _d[3] = d4; \ > _d[4] = d5; \ > _d[5] = d6; \ > - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 30.06.10 at 18:58, George Dunlap <dunlapg@umich.edu> wrote: > Most of the patch, other than the volatiles, looks good; thanks for doing > this. > > Can you explain exactly what it is you''re trying to accomplish by > making things volatile? What kinds of failure modes are you expecting > to protect against? Glancing through the code, I can''t really see > that making things volatile will make much of a difference.The point is to prevent the compiler from doing the memory reads more than once (which it is permitted to do on non-volatiles). As said in the submission comment, this could be done via barriers too, but one of the two has to be there.> Further comments inline below. > > On Tue, Jun 29, 2010 at 4:37 PM, Jan Beulich <JBeulich@novell.com> wrote: >> --- 2010-06-15.orig/xen/common/trace.c 2010-06-29 17:04:45.000000000 +0200 >> +++ 2010-06-15/xen/common/trace.c 2010-06-29 17:05:09.000000000 +0200 > [snip] >> @@ -626,13 +649,11 @@ void __trace_var(u32 event, int cycles, >> >> /* Read tb_init_done /before/ t_bufs. */ >> rmb(); >> - >> - spin_lock_irqsave(&this_cpu(t_lock), flags); >> - >> buf = this_cpu(t_bufs); >> - >> if ( unlikely(!buf) ) >> - goto unlock; >> + return; >> + >> + spin_lock_irqsave(&this_cpu(t_lock), flags); > > This isn''t any good: the whole point of this lock is to keep t_bufs > from disappearing under our feet.Not really - the buffer can''t disappear if we make it here, it can only disappear when tb_init_done wasn''t set yet. But if that''s a major concern, I can of course put the check back into the locked region. And wait, I think in the end there''s no change needed at all - originally I thought going to the "unlock" label here is unsafe as the code there would de-reference buf, but started_below_highwater still being zero would prevent this. So I''ll submit another version in any case, after we settled on whether using volatile qualifiers is acceptable here. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
George Dunlap
2010-Jul-01  08:32 UTC
[Xen-devel] Re: [PATCH 5/6] trace: fix security issues
On Thu, Jul 1, 2010 at 9:04 AM, Jan Beulich <JBeulich@novell.com> wrote:> The point is to prevent the compiler from doing the memory reads > more than once (which it is permitted to do on non-volatiles). As > said in the submission comment, this could be done via barriers too, > but one of the two has to be there.I understand what the volatile modifier is supposed to do. :-) (Although I think you''ve got it backwards -- it forces the compiler to do memory reads every time, instead of doing them only once and caching them in a register.) My question is, why do you think the volatile modifier is necessary? What kinds of situations are you trying to protect against? What terrible havoc can a broken / rogue xentrace binary wreak upon the hypervisor if we don''t have the volatiles in? Moreover, the purpose of volatile is slightly different than memory barriers. AFAICT from the rather sparse documentation, "volatile" doesn''t prevent a compiler from re-ordering memory accesses that it deems independent. That''s what the memory barriers are for. At least one specific example where volatile helps would be appreciated.> Not really - the buffer can''t disappear if we make it here, it can only > disappear when tb_init_done wasn''t set yet. But if that''s a major > concern, I can of course put the check back into the locked region.Hmm -- that happens to be the case now, but it''s not guaranteed to be that way forever, and this is essentially a "trap" laid for anyone who changes that invariant. At some point I would like to implement re-sizable trace buffers, in which case the buffer may disappear after tb_init_done is set. I''d rather not rely on my memory of this discussion. :-)> And wait, I think in the end there''s no change needed at all - > originally I thought going to the "unlock" label here is unsafe as > the code there would de-reference buf, but started_below_highwater > still being zero would prevent this.Yes, that''s in part what setting that was for. I''ll add a comment to that effect.> So I''ll submit another version in any case, after we settled on > whether using volatile qualifiers is acceptable here.I''ve already got things in a nice patchqueue, including my modified patches -- I''ll just pull out the volatile code, make the simple changes we''ve discussed, and post the patch queue. -George _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 01.07.10 at 10:32, George Dunlap <dunlapg@umich.edu> wrote: > On Thu, Jul 1, 2010 at 9:04 AM, Jan Beulich <JBeulich@novell.com> wrote: >> The point is to prevent the compiler from doing the memory reads >> more than once (which it is permitted to do on non-volatiles). As >> said in the submission comment, this could be done via barriers too, >> but one of the two has to be there. > > I understand what the volatile modifier is supposed to do. :-) > (Although I think you''ve got it backwards -- it forces the compiler to > do memory reads every time, instead of doing them only once and > caching them in a register.) My question is, why do you think theI don''t think so - I made each function read the fields exactly once, storing into local variables as necessary.> volatile modifier is necessary? What kinds of situations are you > trying to protect against? What terrible havoc can a broken / rogue > xentrace binary wreak upon the hypervisor if we don''t have the > volatiles in?The issue we had got reported: A crashed hypervisor. The thing is that code that uses fields that code outside the hypervisor may modify must in no case imply that consistency checks done earlier in the code still apply when re-reading the data from memory. Hence we have to make sure that reads happen exactly once, and checks plus any consumption happen on/from the values kept locally, not by re-reading the shared buffer fields.> Moreover, the purpose of volatile is slightly different than memory > barriers. AFAICT from the rather sparse documentation, "volatile" > doesn''t prevent a compiler from re-ordering memory accesses that it > deems independent. That''s what the memory barriers are for.Ordering isn''t a problem here, but enforcing the read-once requirement can be done either way afaict.> At least one specific example where volatile helps would be appreciated.static inline struct t_rec *next_record(const struct t_buf *buf) { u32 x = buf->prod; *** no read at all *** if ( !tb_init_done || bogus(x, buf->cons) ) *** read buf->prod *** return NULL; if ( x >= data_size ) *** read buf->prod again *** x -= data_size; *** x = buf->prod - data_size; (reading buf->prod a third time) *** *** else *** *** x = buf->prod; *** ASSERT(x < data_size); return (struct t_rec *)&this_cpu(t_data)[x]; } Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
George Dunlap
2010-Jul-01  10:15 UTC
[Xen-devel] Re: [PATCH 5/6] trace: fix security issues
On Thu, Jul 1, 2010 at 9:49 AM, Jan Beulich <JBeulich@novell.com> wrote:> Ordering isn''t a problem here, but enforcing the read-once > requirement can be done either way afaict.I see. Yeah, I agree with Keir; memory barriers are the right way to solve this problem. Can you work up a patch add in the appropriate memory barriers? -George> >> At least one specific example where volatile helps would be appreciated. > > static inline struct t_rec *next_record(const struct t_buf *buf) > { > u32 x = buf->prod; > *** no read at all *** > > if ( !tb_init_done || bogus(x, buf->cons) ) > *** read buf->prod *** > return NULL; > > if ( x >= data_size ) > *** read buf->prod again *** > x -= data_size; > *** x = buf->prod - data_size; (reading buf->prod a third time) *** > *** else *** > *** x = buf->prod; *** > > ASSERT(x < data_size); > > return (struct t_rec *)&this_cpu(t_data)[x]; > } > > Jan > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 01.07.10 at 12:15, George Dunlap <George.Dunlap@eu.citrix.com> wrote: > On Thu, Jul 1, 2010 at 9:49 AM, Jan Beulich <JBeulich@novell.com> wrote: >> Ordering isn''t a problem here, but enforcing the read-once >> requirement can be done either way afaict. > > I see. Yeah, I agree with Keir; memory barriers are the right way to > solve this problem. > > Can you work up a patch add in the appropriate memory barriers?Here we go - I suppose the patch will apply cleanly on top of your queue. Subject: trace: insert compiler memory barriers This is to ensure fields shared writably with Dom0 get read only once for any consistency checking followed by actual calculations. I realized there was another multiple-read issue, a fix for which is also included (which at once simplifies __insert_record()). Signed-off-by: Jan Beulich <jbeulich@novell.com> --- a/xen/common/trace.c 2010-07-01 10:05:54.000000000 +0200 +++ b/xen/common/trace.c 2010-07-01 14:01:47.000000000 +0200 @@ -437,11 +437,13 @@ static inline bool_t bogus(u32 prod, u32 static inline u32 calc_unconsumed_bytes(const struct t_buf *buf) { u32 prod = buf->prod, cons = buf->cons; - s32 x = prod - cons; + s32 x; + barrier(); /* must read buf->prod and buf->cons only once */ if ( bogus(prod, cons) ) return data_size; + x = prod - cons; if ( x < 0 ) x += 2*data_size; @@ -453,12 +455,14 @@ static inline u32 calc_unconsumed_bytes( static inline u32 calc_bytes_to_wrap(const struct t_buf *buf) { - u32 prod = buf->prod; - s32 x = data_size - prod; + u32 prod = buf->prod, cons = buf->cons; + s32 x; - if ( bogus(prod, buf->cons) ) + barrier(); /* must read buf->prod and buf->cons only once */ + if ( bogus(prod, cons) ) return 0; + x = data_size - prod; if ( x <= 0 ) x += data_size; @@ -473,11 +477,14 @@ static inline u32 calc_bytes_avail(const return data_size - calc_unconsumed_bytes(buf); } -static inline struct t_rec *next_record(const struct t_buf *buf) +static inline struct t_rec *next_record(const struct t_buf *buf, + uint32_t *next) { - u32 x = buf->prod; + u32 x = buf->prod, cons = buf->cons; - if ( !tb_init_done || bogus(x, buf->cons) ) + barrier(); /* must read buf->prod and buf->cons only once */ + *next = x; + if ( !tb_init_done || bogus(x, cons) ) return NULL; if ( x >= data_size ) @@ -504,23 +511,21 @@ static inline void __insert_record(volat BUG_ON(local_rec_size != rec_size); BUG_ON(extra & 3); + rec = next_record(buf, &next); + if ( !rec ) + return; /* Double-check once more that we have enough space. * Don''t bugcheck here, in case the userland tool is doing * something stupid. */ - next = calc_bytes_avail(buf); - if ( next < rec_size ) + if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) { if ( printk_ratelimit() ) printk(XENLOG_WARNING - "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n", - __func__, next, data_size, buf->prod, buf->cons, rec_size); + "%s: size=%08x prod=%08x cons=%08x rec=%u\n", + __func__, data_size, next, buf->cons, rec_size); return; } - rmb(); - rec = next_record(buf); - if ( !rec ) - return; rec->event = event; rec->extra_u32 = extra_word; dst = (unsigned char *)rec->u.nocycles.extra_u32; @@ -537,9 +542,6 @@ static inline void __insert_record(volat wmb(); - next = buf->prod; - if ( bogus(next, buf->cons) ) - return; next += rec_size; if ( next >= 2*data_size ) next -= 2*data_size; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel