After getting a report of 3.2.3''s xenmon crashing Xen (as it turned out this was because c/s 17000 was backported to that tree without also applying c/s 17515), this is the result of reviewing the code for security issues: - the hypervisor should not rely on any specific state of the actual trace buffer (as it is shared writable with Dom0) - there''s no need to share writably the t_info pages (Dom0 only wants [and needs] to read it) - T_INFO_FIRST_OFFSET wasn''t defined correctly, thus allowing in the num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0 - check_tbuf_size() didn''t consider the case of the incoming size not allowing for the 2*data_size range for t_buf->{prod,cons} - printk()-s should be lower level or rate limited For the first of those items, I chose to use ''volatile'' qualifiers rather than sprinkling around barriers - this can certainly be changed if considered unacceptable. To make clear what purpose specific variables have and/or where they got loaded from, the patch also changes the type of some of them to be explicitly u32/s32, and removes pointless assertions (like checking an unsigned variable to be >= 0). I also took the prototype adjustment of __trace_var() as an opportunity to simplify the TRACE_xD() macros. Similar simplification could be done on the (quite numerous) direct callers of the function. Signed-off-by: Jan Beulich <jbeulich@novell.com> --- 2010-06-15.orig/tools/xenmon/xenbaked.c 2010-06-01 13:39:57.000000000 +0200 +++ 2010-06-15/tools/xenmon/xenbaked.c 2010-06-28 10:23:01.000000000 +0200 @@ -84,7 +84,7 @@ typedef struct settings_st { } settings_t; struct t_struct { - struct t_info *t_info; /* Structure with information about individual buffers */ + const struct t_info *t_info; /* Structure with information about individual buffers */ struct t_buf **meta; /* Pointers to trace buffer metadata */ unsigned char **data; /* Pointers to trace buffer data areas */ }; @@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign } /* Map t_info metadata structure */ - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, - tinfo_size, PROT_READ | PROT_WRITE, - tbufs_mfn); + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, + PROT_READ, tbufs_mfn); if ( tbufs.t_info == 0 ) { @@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign for(i=0; i<num; i++) { - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i]; + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info + + tbufs.t_info->mfn_offset[i]; int j; xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; --- 2010-06-15.orig/tools/xentrace/xentrace.c 2010-06-01 13:39:57.000000000 +0200 +++ 2010-06-15/tools/xentrace/xentrace.c 2010-06-28 10:23:26.000000000 +0200 @@ -63,7 +63,7 @@ typedef struct settings_st { } settings_t; struct t_struct { - struct t_info *t_info; /* Structure with information about individual buffers */ + const struct t_info *t_info; /* Structure with information about individual buffers */ struct t_buf **meta; /* Pointers to trace buffer metadata */ unsigned char **data; /* Pointers to trace buffer data areas */ }; @@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign int i; /* Map t_info metadata structure */ - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, - tinfo_size, PROT_READ | PROT_WRITE, - tbufs_mfn); + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, + PROT_READ, tbufs_mfn); if ( tbufs.t_info == 0 ) { @@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign for(i=0; i<num; i++) { - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i]; + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info + + tbufs.t_info->mfn_offset[i]; int j; xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; --- 2010-06-15.orig/xen/common/trace.c 2010-06-28 09:43:58.000000000 +0200 +++ 2010-06-15/xen/common/trace.c 2010-06-28 11:58:37.000000000 +0200 @@ -51,15 +51,15 @@ static struct t_info *t_info; #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */ #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE)) /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */ -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) / sizeof(uint32_t)) -static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); +#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / sizeof(uint32_t)) +static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs); static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); -static int data_size; +static u32 data_size; /* High water mark for trace buffers; */ /* Send virtual interrupt when buffer level reaches this point */ -static int t_buf_highwater; +static u32 t_buf_highwater; /* Number of records lost due to per-CPU trace buffer being full. */ static DEFINE_PER_CPU(unsigned long, lost_records); @@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL; /** * check_tbuf_size - check to make sure that the proposed size will fit - * in the currently sized struct t_info. + * in the currently sized struct t_info and allows prod and cons to + * reach double the value without overflow. */ -static inline int check_tbuf_size(int size) +static int check_tbuf_size(u32 pages) { - return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / sizeof(uint32_t)); + u32 size = pages * PAGE_SIZE; + + return (size / PAGE_SIZE != pages) || (size + size < size) || + (num_online_cpus() * pages + T_INFO_FIRST_OFFSET > + T_INFO_SIZE / sizeof(uint32_t)); } /** @@ -115,7 +120,7 @@ static int alloc_trace_bufs(void) } t_info->tbuf_size = opt_tbuf_size; - printk("tbuf_size %d\n", t_info->tbuf_size); + printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size); nr_pages = opt_tbuf_size; order = get_order_from_pages(nr_pages); @@ -139,7 +144,7 @@ static int alloc_trace_bufs(void) spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); - buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf; + per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf; buf->cons = buf->prod = 0; per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); @@ -171,7 +176,7 @@ static int alloc_trace_bufs(void) /* Write list first, then write per-cpu offset. */ wmb(); t_info->mfn_offset[cpu]=offset; - printk("p%d mfn %"PRIx32" offset %d\n", + printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n", cpu, mfn, offset); offset+=i; } @@ -190,6 +195,7 @@ out_dealloc: spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) { + per_cpu(t_bufs, cpu) = NULL; ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); free_xenheap_pages(rawbuf, order); } @@ -308,7 +314,7 @@ void __init init_trace_bufs(void) for(i=0; i<T_INFO_PAGES; i++) share_xen_page_with_privileged_guests( - virt_to_page(t_info) + i, XENSHARE_writable); + virt_to_page(t_info) + i, XENSHARE_readonly); if ( opt_tbuf_size == 0 ) { @@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc return rc; } -static inline int calc_rec_size(int cycles, int extra) +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) { - int rec_size; - rec_size = 4; + unsigned int rec_size = 4; + if ( cycles ) rec_size += 8; rec_size += extra; return rec_size; } -static inline int calc_unconsumed_bytes(struct t_buf *buf) +static inline bool_t bogus(u32 prod, u32 cons) +{ + if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) || + unlikely(cons & 3) || unlikely(cons >= 2 * data_size) ) + { + tb_init_done = 0; + printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n", + smp_processor_id(), prod, cons); + return 1; + } + return 0; +} + +static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf) { - int x = buf->prod - buf->cons; + u32 prod = buf->prod, cons = buf->cons; + s32 x = prod - cons; + + if ( bogus(prod, cons) ) + return data_size; + if ( x < 0 ) x += 2*data_size; @@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes( return x; } -static inline int calc_bytes_to_wrap(struct t_buf *buf) +static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf) { - int x = data_size - buf->prod; + u32 prod = buf->prod; + s32 x = data_size - prod; + + if ( bogus(prod, buf->cons) ) + return 0; + if ( x <= 0 ) x += data_size; @@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str return x; } -static inline int calc_bytes_avail(struct t_buf *buf) +static inline u32 calc_bytes_avail(const volatile struct t_buf *buf) { return data_size - calc_unconsumed_bytes(buf); } -static inline struct t_rec * -next_record(struct t_buf *buf) +static inline struct t_rec *next_record(const volatile struct t_buf *buf) { - int x = buf->prod; + u32 x = buf->prod; + + if ( !tb_init_done || bogus(x, buf->cons) ) + return NULL; + if ( x >= data_size ) x -= data_size; - ASSERT(x >= 0); ASSERT(x < data_size); return (struct t_rec *)&this_cpu(t_data)[x]; } -static inline int __insert_record(struct t_buf *buf, - unsigned long event, - int extra, - int cycles, - int rec_size, - unsigned char *extra_data) +static inline void __insert_record(volatile struct t_buf *buf, + unsigned long event, + unsigned int extra, + bool_t cycles, + unsigned int rec_size, + const void *extra_data) { struct t_rec *rec; unsigned char *dst; - unsigned long extra_word = extra/sizeof(u32); - int local_rec_size = calc_rec_size(cycles, extra); + unsigned int extra_word = extra / sizeof(u32); + unsigned int local_rec_size = calc_rec_size(cycles, extra); uint32_t next; BUG_ON(local_rec_size != rec_size); @@ -475,17 +506,20 @@ static inline int __insert_record(struct /* Double-check once more that we have enough space. * Don''t bugcheck here, in case the userland tool is doing * something stupid. */ - if ( calc_bytes_avail(buf) < rec_size ) + next = calc_bytes_avail(buf); + if ( next < rec_size ) { - printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n", - __func__, - calc_bytes_avail(buf), - data_size, buf->prod, buf->cons, data_size, rec_size); - return 0; + if ( printk_ratelimit() ) + printk(XENLOG_WARNING + "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n", + __func__, next, data_size, buf->prod, buf->cons, rec_size); + return; } rmb(); rec = next_record(buf); + if ( !rec ) + return; rec->event = event; rec->extra_u32 = extra_word; dst = (unsigned char *)rec->u.nocycles.extra_u32; @@ -502,21 +536,22 @@ static inline int __insert_record(struct wmb(); - next = buf->prod + rec_size; + next = buf->prod; + if ( bogus(next, buf->cons) ) + return; + next += rec_size; if ( next >= 2*data_size ) next -= 2*data_size; - ASSERT(next >= 0); ASSERT(next < 2*data_size); buf->prod = next; - - return rec_size; } -static inline int insert_wrap_record(struct t_buf *buf, int size) +static inline void insert_wrap_record(volatile struct t_buf *buf, + unsigned int size) { - int space_left = calc_bytes_to_wrap(buf); - unsigned long extra_space = space_left - sizeof(u32); - int cycles = 0; + u32 space_left = calc_bytes_to_wrap(buf); + unsigned int extra_space = space_left - sizeof(u32); + bool_t cycles = 0; BUG_ON(space_left > size); @@ -528,17 +563,13 @@ static inline int insert_wrap_record(str ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); } - return __insert_record(buf, - TRC_TRACE_WRAP_BUFFER, - extra_space, - cycles, - space_left, - NULL); + __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles, + space_left, NULL); } #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */ -static inline int insert_lost_records(struct t_buf *buf) +static inline void insert_lost_records(volatile struct t_buf *buf) { struct { u32 lost_records; @@ -553,12 +584,8 @@ static inline int insert_lost_records(st this_cpu(lost_records) = 0; - return __insert_record(buf, - TRC_LOST_RECORDS, - sizeof(ed), - 1 /* cycles */, - LOST_REC_SIZE, - (unsigned char *)&ed); + __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */, + LOST_REC_SIZE, &ed); } /* @@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0 * failure, otherwise 0. Failure occurs only if the trace buffers are not yet * initialised. */ -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) +void __trace_var(u32 event, bool_t cycles, unsigned int extra, + const void *extra_data) { - struct t_buf *buf; - unsigned long flags, bytes_to_tail, bytes_to_wrap; - int rec_size, total_size; - int extra_word; - int started_below_highwater = 0; + volatile struct t_buf *buf; + unsigned long flags; + u32 bytes_to_tail, bytes_to_wrap; + unsigned int rec_size, total_size; + unsigned int extra_word; + bool_t started_below_highwater; if( !tb_init_done ) return; @@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles, /* Read tb_init_done /before/ t_bufs. */ rmb(); - - spin_lock_irqsave(&this_cpu(t_lock), flags); - buf = this_cpu(t_bufs); - if ( unlikely(!buf) ) - goto unlock; + return; + + spin_lock_irqsave(&this_cpu(t_lock), flags); started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); --- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-28 09:43:58.000000000 +0200 +++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200 @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op int trace_will_trace_event(u32 event); -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data); +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *); static inline void trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) @@ -57,7 +57,7 @@ static inline void trace_var(u32 event, { \ u32 _d[1]; \ _d[0] = d1; \ - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -68,7 +68,7 @@ static inline void trace_var(u32 event, u32 _d[2]; \ _d[0] = d1; \ _d[1] = d2; \ - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -80,7 +80,7 @@ static inline void trace_var(u32 event, _d[0] = d1; \ _d[1] = d2; \ _d[2] = d3; \ - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -93,7 +93,7 @@ static inline void trace_var(u32 event, _d[1] = d2; \ _d[2] = d3; \ _d[3] = d4; \ - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -107,7 +107,7 @@ static inline void trace_var(u32 event, _d[2] = d3; \ _d[3] = d4; \ _d[4] = d5; \ - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) @@ -122,7 +122,7 @@ static inline void trace_var(u32 event, _d[3] = d4; \ _d[4] = d5; \ _d[5] = d6; \ - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \ + __trace_var(_e, 1, sizeof(_d), _d); \ } \ } while ( 0 ) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Both of these xentrace patches need an Ack from George Dunlap. Also I will need to know if they belong in any tree other than xen-unstable. I would never tolerate use of volatiles rather than proper barriers in general. But for xentrace I''ll leave it up to George. -- Keir On 28/06/2010 11:39, "Jan Beulich" <JBeulich@novell.com> wrote:> After getting a report of 3.2.3''s xenmon crashing Xen (as it turned out > this was because c/s 17000 was backported to that tree without also > applying c/s 17515), this is the result of reviewing the code for > security issues: > - the hypervisor should not rely on any specific state of the actual > trace buffer (as it is shared writable with Dom0) > - there''s no need to share writably the t_info pages (Dom0 only wants > [and needs] to read it) > - T_INFO_FIRST_OFFSET wasn''t defined correctly, thus allowing in the > num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0 > - check_tbuf_size() didn''t consider the case of the incoming size not > allowing for the 2*data_size range for t_buf->{prod,cons} > - printk()-s should be lower level or rate limited > > For the first of those items, I chose to use ''volatile'' qualifiers > rather than sprinkling around barriers - this can certainly be changed > if considered unacceptable. > > To make clear what purpose specific variables have and/or where they > got loaded from, the patch also changes the type of some of them to > be explicitly u32/s32, and removes pointless assertions (like checking > an unsigned variable to be >= 0). > > I also took the prototype adjustment of __trace_var() as an opportunity > to simplify the TRACE_xD() macros. Similar simplification could be done > on the (quite numerous) direct callers of the function. > > Signed-off-by: Jan Beulich <jbeulich@novell.com> > > --- 2010-06-15.orig/tools/xenmon/xenbaked.c 2010-06-01 13:39:57.000000000 > +0200 > +++ 2010-06-15/tools/xenmon/xenbaked.c 2010-06-28 10:23:01.000000000 +0200 > @@ -84,7 +84,7 @@ typedef struct settings_st { > } settings_t; > > struct t_struct { > - struct t_info *t_info; /* Structure with information about individual > buffers */ > + const struct t_info *t_info; /* Structure with information about > individual buffers */ > struct t_buf **meta; /* Pointers to trace buffer metadata */ > unsigned char **data; /* Pointers to trace buffer data areas */ > }; > @@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign > } > > /* Map t_info metadata structure */ > - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, > - tinfo_size, PROT_READ | PROT_WRITE, > - tbufs_mfn); > + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, > + PROT_READ, tbufs_mfn); > > if ( tbufs.t_info == 0 ) > { > @@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign > for(i=0; i<num; i++) > { > > - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + > tbufs.t_info->mfn_offset[i]; > + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info > + + tbufs.t_info->mfn_offset[i]; > int j; > xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; > > --- 2010-06-15.orig/tools/xentrace/xentrace.c 2010-06-01 13:39:57.000000000 > +0200 > +++ 2010-06-15/tools/xentrace/xentrace.c 2010-06-28 10:23:26.000000000 > +0200 > @@ -63,7 +63,7 @@ typedef struct settings_st { > } settings_t; > > struct t_struct { > - struct t_info *t_info; /* Structure with information about individual > buffers */ > + const struct t_info *t_info; /* Structure with information about > individual buffers */ > struct t_buf **meta; /* Pointers to trace buffer metadata */ > unsigned char **data; /* Pointers to trace buffer data areas */ > }; > @@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign > int i; > > /* Map t_info metadata structure */ > - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, > - tinfo_size, PROT_READ | PROT_WRITE, > - tbufs_mfn); > + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, > + PROT_READ, tbufs_mfn); > > if ( tbufs.t_info == 0 ) > { > @@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign > for(i=0; i<num; i++) > { > > - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + > tbufs.t_info->mfn_offset[i]; > + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info > + + tbufs.t_info->mfn_offset[i]; > int j; > xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; > > --- 2010-06-15.orig/xen/common/trace.c 2010-06-28 09:43:58.000000000 +0200 > +++ 2010-06-15/xen/common/trace.c 2010-06-28 11:58:37.000000000 +0200 > @@ -51,15 +51,15 @@ static struct t_info *t_info; > #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */ > #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE)) > /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t > */ > -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + > 1) / sizeof(uint32_t)) > -static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); > +#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / > sizeof(uint32_t)) > +static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs); > static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); > static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); > -static int data_size; > +static u32 data_size; > > /* High water mark for trace buffers; */ > /* Send virtual interrupt when buffer level reaches this point */ > -static int t_buf_highwater; > +static u32 t_buf_highwater; > > /* Number of records lost due to per-CPU trace buffer being full. */ > static DEFINE_PER_CPU(unsigned long, lost_records); > @@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL; > > /** > * check_tbuf_size - check to make sure that the proposed size will fit > - * in the currently sized struct t_info. > + * in the currently sized struct t_info and allows prod and cons to > + * reach double the value without overflow. > */ > -static inline int check_tbuf_size(int size) > +static int check_tbuf_size(u32 pages) > { > - return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / > sizeof(uint32_t)); > + u32 size = pages * PAGE_SIZE; > + > + return (size / PAGE_SIZE != pages) || (size + size < size) || > + (num_online_cpus() * pages + T_INFO_FIRST_OFFSET > > + T_INFO_SIZE / sizeof(uint32_t)); > } > > /** > @@ -115,7 +120,7 @@ static int alloc_trace_bufs(void) > } > > t_info->tbuf_size = opt_tbuf_size; > - printk("tbuf_size %d\n", t_info->tbuf_size); > + printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size); > > nr_pages = opt_tbuf_size; > order = get_order_from_pages(nr_pages); > @@ -139,7 +144,7 @@ static int alloc_trace_bufs(void) > > spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); > > - buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf; > + per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf; > buf->cons = buf->prod = 0; > per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); > > @@ -171,7 +176,7 @@ static int alloc_trace_bufs(void) > /* Write list first, then write per-cpu offset. */ > wmb(); > t_info->mfn_offset[cpu]=offset; > - printk("p%d mfn %"PRIx32" offset %d\n", > + printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n", > cpu, mfn, offset); > offset+=i; > } > @@ -190,6 +195,7 @@ out_dealloc: > spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); > if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) > { > + per_cpu(t_bufs, cpu) = NULL; > ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); > free_xenheap_pages(rawbuf, order); > } > @@ -308,7 +314,7 @@ void __init init_trace_bufs(void) > > for(i=0; i<T_INFO_PAGES; i++) > share_xen_page_with_privileged_guests( > - virt_to_page(t_info) + i, XENSHARE_writable); > + virt_to_page(t_info) + i, XENSHARE_readonly); > > if ( opt_tbuf_size == 0 ) > { > @@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc > return rc; > } > > -static inline int calc_rec_size(int cycles, int extra) > +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) > { > - int rec_size; > - rec_size = 4; > + unsigned int rec_size = 4; > + > if ( cycles ) > rec_size += 8; > rec_size += extra; > return rec_size; > } > > -static inline int calc_unconsumed_bytes(struct t_buf *buf) > +static inline bool_t bogus(u32 prod, u32 cons) > +{ > + if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) || > + unlikely(cons & 3) || unlikely(cons >= 2 * data_size) ) > + { > + tb_init_done = 0; > + printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons > (%08x)\n", > + smp_processor_id(), prod, cons); > + return 1; > + } > + return 0; > +} > + > +static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf) > { > - int x = buf->prod - buf->cons; > + u32 prod = buf->prod, cons = buf->cons; > + s32 x = prod - cons; > + > + if ( bogus(prod, cons) ) > + return data_size; > + > if ( x < 0 ) > x += 2*data_size; > > @@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes( > return x; > } > > -static inline int calc_bytes_to_wrap(struct t_buf *buf) > +static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf) > { > - int x = data_size - buf->prod; > + u32 prod = buf->prod; > + s32 x = data_size - prod; > + > + if ( bogus(prod, buf->cons) ) > + return 0; > + > if ( x <= 0 ) > x += data_size; > > @@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str > return x; > } > > -static inline int calc_bytes_avail(struct t_buf *buf) > +static inline u32 calc_bytes_avail(const volatile struct t_buf *buf) > { > return data_size - calc_unconsumed_bytes(buf); > } > > -static inline struct t_rec * > -next_record(struct t_buf *buf) > +static inline struct t_rec *next_record(const volatile struct t_buf *buf) > { > - int x = buf->prod; > + u32 x = buf->prod; > + > + if ( !tb_init_done || bogus(x, buf->cons) ) > + return NULL; > + > if ( x >= data_size ) > x -= data_size; > > - ASSERT(x >= 0); > ASSERT(x < data_size); > > return (struct t_rec *)&this_cpu(t_data)[x]; > } > > -static inline int __insert_record(struct t_buf *buf, > - unsigned long event, > - int extra, > - int cycles, > - int rec_size, > - unsigned char *extra_data) > +static inline void __insert_record(volatile struct t_buf *buf, > + unsigned long event, > + unsigned int extra, > + bool_t cycles, > + unsigned int rec_size, > + const void *extra_data) > { > struct t_rec *rec; > unsigned char *dst; > - unsigned long extra_word = extra/sizeof(u32); > - int local_rec_size = calc_rec_size(cycles, extra); > + unsigned int extra_word = extra / sizeof(u32); > + unsigned int local_rec_size = calc_rec_size(cycles, extra); > uint32_t next; > > BUG_ON(local_rec_size != rec_size); > @@ -475,17 +506,20 @@ static inline int __insert_record(struct > /* Double-check once more that we have enough space. > * Don''t bugcheck here, in case the userland tool is doing > * something stupid. */ > - if ( calc_bytes_avail(buf) < rec_size ) > + next = calc_bytes_avail(buf); > + if ( next < rec_size ) > { > - printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n", > - __func__, > - calc_bytes_avail(buf), > - data_size, buf->prod, buf->cons, data_size, rec_size); > - return 0; > + if ( printk_ratelimit() ) > + printk(XENLOG_WARNING > + "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n", > + __func__, next, data_size, buf->prod, buf->cons, > rec_size); > + return; > } > rmb(); > > rec = next_record(buf); > + if ( !rec ) > + return; > rec->event = event; > rec->extra_u32 = extra_word; > dst = (unsigned char *)rec->u.nocycles.extra_u32; > @@ -502,21 +536,22 @@ static inline int __insert_record(struct > > wmb(); > > - next = buf->prod + rec_size; > + next = buf->prod; > + if ( bogus(next, buf->cons) ) > + return; > + next += rec_size; > if ( next >= 2*data_size ) > next -= 2*data_size; > - ASSERT(next >= 0); > ASSERT(next < 2*data_size); > buf->prod = next; > - > - return rec_size; > } > > -static inline int insert_wrap_record(struct t_buf *buf, int size) > +static inline void insert_wrap_record(volatile struct t_buf *buf, > + unsigned int size) > { > - int space_left = calc_bytes_to_wrap(buf); > - unsigned long extra_space = space_left - sizeof(u32); > - int cycles = 0; > + u32 space_left = calc_bytes_to_wrap(buf); > + unsigned int extra_space = space_left - sizeof(u32); > + bool_t cycles = 0; > > BUG_ON(space_left > size); > > @@ -528,17 +563,13 @@ static inline int insert_wrap_record(str > ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); > } > > - return __insert_record(buf, > - TRC_TRACE_WRAP_BUFFER, > - extra_space, > - cycles, > - space_left, > - NULL); > + __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles, > + space_left, NULL); > } > > #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */ > > -static inline int insert_lost_records(struct t_buf *buf) > +static inline void insert_lost_records(volatile struct t_buf *buf) > { > struct { > u32 lost_records; > @@ -553,12 +584,8 @@ static inline int insert_lost_records(st > > this_cpu(lost_records) = 0; > > - return __insert_record(buf, > - TRC_LOST_RECORDS, > - sizeof(ed), > - 1 /* cycles */, > - LOST_REC_SIZE, > - (unsigned char *)&ed); > + __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */, > + LOST_REC_SIZE, &ed); > } > > /* > @@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0 > * failure, otherwise 0. Failure occurs only if the trace buffers are not > yet > * initialised. > */ > -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) > +void __trace_var(u32 event, bool_t cycles, unsigned int extra, > + const void *extra_data) > { > - struct t_buf *buf; > - unsigned long flags, bytes_to_tail, bytes_to_wrap; > - int rec_size, total_size; > - int extra_word; > - int started_below_highwater = 0; > + volatile struct t_buf *buf; > + unsigned long flags; > + u32 bytes_to_tail, bytes_to_wrap; > + unsigned int rec_size, total_size; > + unsigned int extra_word; > + bool_t started_below_highwater; > > if( !tb_init_done ) > return; > @@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles, > > /* Read tb_init_done /before/ t_bufs. */ > rmb(); > - > - spin_lock_irqsave(&this_cpu(t_lock), flags); > - > buf = this_cpu(t_bufs); > - > if ( unlikely(!buf) ) > - goto unlock; > + return; > + > + spin_lock_irqsave(&this_cpu(t_lock), flags); > > started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); > > --- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-28 09:43:58.000000000 > +0200 > +++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200 > @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op > > int trace_will_trace_event(u32 event); > > -void __trace_var(u32 event, int cycles, int extra, unsigned char > *extra_data); > +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *); > > static inline void trace_var(u32 event, int cycles, int extra, > unsigned char *extra_data) > @@ -57,7 +57,7 @@ static inline void trace_var(u32 event, > { \ > u32 _d[1]; \ > _d[0] = d1; \ > - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -68,7 +68,7 @@ static inline void trace_var(u32 event, > u32 _d[2]; \ > _d[0] = d1; \ > _d[1] = d2; \ > - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -80,7 +80,7 @@ static inline void trace_var(u32 event, > _d[0] = d1; \ > _d[1] = d2; \ > _d[2] = d3; \ > - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -93,7 +93,7 @@ static inline void trace_var(u32 event, > _d[1] = d2; \ > _d[2] = d3; \ > _d[3] = d4; \ > - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -107,7 +107,7 @@ static inline void trace_var(u32 event, > _d[2] = d3; \ > _d[3] = d4; \ > _d[4] = d5; \ > - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -122,7 +122,7 @@ static inline void trace_var(u32 event, > _d[3] = d4; \ > _d[4] = d5; \ > _d[5] = d6; \ > - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
>>> On 28.06.10 at 13:12, Keir Fraser <keir.fraser@eu.citrix.com> wrote: > Both of these xentrace patches need an Ack from George Dunlap. Also I will > need to know if they belong in any tree other than xen-unstable.I would see them both as candidates for 4.0.x at least. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan, Thanks for your work in this. Could you please break this patch down into its component changes (described in the list below) so that I can review, discuss, and ack/nack components individually? (And so they show up as individual changesets when someone''s doing archeology...) So far I agree with the following changes:> - there''s no need to share writably the t_info pages (Dom0 only wants > [and needs] to read it) > - T_INFO_FIRST_OFFSET wasn''t defined correctly, thus allowing in the > num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0 > - printk()-s should be lower level or rate limitedBut probably disagree with this one:> - the hypervisor should not rely on any specific state of the actual > trace buffer (as it is shared writable with Dom0)And would like to be able to see the impact of following more clearly (i.e., in separate patches):> - check_tbuf_size() didn''t consider the case of the incoming size not > allowing for the 2*data_size range for t_buf->{prod,cons}> To make clear what purpose specific variables have and/or where they > got loaded from, the patch also changes the type of some of them to > be explicitly u32/s32, and removes pointless assertions (like checking > an unsigned variable to be >= 0). > > I also took the prototype adjustment of __trace_var() as an opportunity > to simplify the TRACE_xD() macros. Similar simplification could be done > on the (quite numerous) direct callers of the function.Thanks again, -George> > Signed-off-by: Jan Beulich <jbeulich@novell.com> > > --- 2010-06-15.orig/tools/xenmon/xenbaked.c 2010-06-01 13:39:57.000000000 +0200 > +++ 2010-06-15/tools/xenmon/xenbaked.c 2010-06-28 10:23:01.000000000 +0200 > @@ -84,7 +84,7 @@ typedef struct settings_st { > } settings_t; > > struct t_struct { > - struct t_info *t_info; /* Structure with information about individual buffers */ > + const struct t_info *t_info; /* Structure with information about individual buffers */ > struct t_buf **meta; /* Pointers to trace buffer metadata */ > unsigned char **data; /* Pointers to trace buffer data areas */ > }; > @@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign > } > > /* Map t_info metadata structure */ > - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, > - tinfo_size, PROT_READ | PROT_WRITE, > - tbufs_mfn); > + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, > + PROT_READ, tbufs_mfn); > > if ( tbufs.t_info == 0 ) > { > @@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign > for(i=0; i<num; i++) > { > > - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i]; > + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info > + + tbufs.t_info->mfn_offset[i]; > int j; > xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; > > --- 2010-06-15.orig/tools/xentrace/xentrace.c 2010-06-01 13:39:57.000000000 +0200 > +++ 2010-06-15/tools/xentrace/xentrace.c 2010-06-28 10:23:26.000000000 +0200 > @@ -63,7 +63,7 @@ typedef struct settings_st { > } settings_t; > > struct t_struct { > - struct t_info *t_info; /* Structure with information about individual buffers */ > + const struct t_info *t_info; /* Structure with information about individual buffers */ > struct t_buf **meta; /* Pointers to trace buffer metadata */ > unsigned char **data; /* Pointers to trace buffer data areas */ > }; > @@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign > int i; > > /* Map t_info metadata structure */ > - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, > - tinfo_size, PROT_READ | PROT_WRITE, > - tbufs_mfn); > + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size, > + PROT_READ, tbufs_mfn); > > if ( tbufs.t_info == 0 ) > { > @@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign > for(i=0; i<num; i++) > { > > - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i]; > + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info > + + tbufs.t_info->mfn_offset[i]; > int j; > xen_pfn_t pfn_list[tbufs.t_info->tbuf_size]; > > --- 2010-06-15.orig/xen/common/trace.c 2010-06-28 09:43:58.000000000 +0200 > +++ 2010-06-15/xen/common/trace.c 2010-06-28 11:58:37.000000000 +0200 > @@ -51,15 +51,15 @@ static struct t_info *t_info; > #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */ > #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE)) > /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */ > -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) / sizeof(uint32_t)) > -static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); > +#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / sizeof(uint32_t)) > +static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs); > static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); > static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); > -static int data_size; > +static u32 data_size; > > /* High water mark for trace buffers; */ > /* Send virtual interrupt when buffer level reaches this point */ > -static int t_buf_highwater; > +static u32 t_buf_highwater; > > /* Number of records lost due to per-CPU trace buffer being full. */ > static DEFINE_PER_CPU(unsigned long, lost_records); > @@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL; > > /** > * check_tbuf_size - check to make sure that the proposed size will fit > - * in the currently sized struct t_info. > + * in the currently sized struct t_info and allows prod and cons to > + * reach double the value without overflow. > */ > -static inline int check_tbuf_size(int size) > +static int check_tbuf_size(u32 pages) > { > - return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / sizeof(uint32_t)); > + u32 size = pages * PAGE_SIZE; > + > + return (size / PAGE_SIZE != pages) || (size + size < size) || > + (num_online_cpus() * pages + T_INFO_FIRST_OFFSET > > + T_INFO_SIZE / sizeof(uint32_t)); > } > > /** > @@ -115,7 +120,7 @@ static int alloc_trace_bufs(void) > } > > t_info->tbuf_size = opt_tbuf_size; > - printk("tbuf_size %d\n", t_info->tbuf_size); > + printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size); > > nr_pages = opt_tbuf_size; > order = get_order_from_pages(nr_pages); > @@ -139,7 +144,7 @@ static int alloc_trace_bufs(void) > > spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); > > - buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf; > + per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf; > buf->cons = buf->prod = 0; > per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); > > @@ -171,7 +176,7 @@ static int alloc_trace_bufs(void) > /* Write list first, then write per-cpu offset. */ > wmb(); > t_info->mfn_offset[cpu]=offset; > - printk("p%d mfn %"PRIx32" offset %d\n", > + printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n", > cpu, mfn, offset); > offset+=i; > } > @@ -190,6 +195,7 @@ out_dealloc: > spin_lock_irqsave(&per_cpu(t_lock, cpu), flags); > if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) ) > { > + per_cpu(t_bufs, cpu) = NULL; > ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); > free_xenheap_pages(rawbuf, order); > } > @@ -308,7 +314,7 @@ void __init init_trace_bufs(void) > > for(i=0; i<T_INFO_PAGES; i++) > share_xen_page_with_privileged_guests( > - virt_to_page(t_info) + i, XENSHARE_writable); > + virt_to_page(t_info) + i, XENSHARE_readonly); > > if ( opt_tbuf_size == 0 ) > { > @@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc > return rc; > } > > -static inline int calc_rec_size(int cycles, int extra) > +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) > { > - int rec_size; > - rec_size = 4; > + unsigned int rec_size = 4; > + > if ( cycles ) > rec_size += 8; > rec_size += extra; > return rec_size; > } > > -static inline int calc_unconsumed_bytes(struct t_buf *buf) > +static inline bool_t bogus(u32 prod, u32 cons) > +{ > + if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) || > + unlikely(cons & 3) || unlikely(cons >= 2 * data_size) ) > + { > + tb_init_done = 0; > + printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n", > + smp_processor_id(), prod, cons); > + return 1; > + } > + return 0; > +} > + > +static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf) > { > - int x = buf->prod - buf->cons; > + u32 prod = buf->prod, cons = buf->cons; > + s32 x = prod - cons; > + > + if ( bogus(prod, cons) ) > + return data_size; > + > if ( x < 0 ) > x += 2*data_size; > > @@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes( > return x; > } > > -static inline int calc_bytes_to_wrap(struct t_buf *buf) > +static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf) > { > - int x = data_size - buf->prod; > + u32 prod = buf->prod; > + s32 x = data_size - prod; > + > + if ( bogus(prod, buf->cons) ) > + return 0; > + > if ( x <= 0 ) > x += data_size; > > @@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str > return x; > } > > -static inline int calc_bytes_avail(struct t_buf *buf) > +static inline u32 calc_bytes_avail(const volatile struct t_buf *buf) > { > return data_size - calc_unconsumed_bytes(buf); > } > > -static inline struct t_rec * > -next_record(struct t_buf *buf) > +static inline struct t_rec *next_record(const volatile struct t_buf *buf) > { > - int x = buf->prod; > + u32 x = buf->prod; > + > + if ( !tb_init_done || bogus(x, buf->cons) ) > + return NULL; > + > if ( x >= data_size ) > x -= data_size; > > - ASSERT(x >= 0); > ASSERT(x < data_size); > > return (struct t_rec *)&this_cpu(t_data)[x]; > } > > -static inline int __insert_record(struct t_buf *buf, > - unsigned long event, > - int extra, > - int cycles, > - int rec_size, > - unsigned char *extra_data) > +static inline void __insert_record(volatile struct t_buf *buf, > + unsigned long event, > + unsigned int extra, > + bool_t cycles, > + unsigned int rec_size, > + const void *extra_data) > { > struct t_rec *rec; > unsigned char *dst; > - unsigned long extra_word = extra/sizeof(u32); > - int local_rec_size = calc_rec_size(cycles, extra); > + unsigned int extra_word = extra / sizeof(u32); > + unsigned int local_rec_size = calc_rec_size(cycles, extra); > uint32_t next; > > BUG_ON(local_rec_size != rec_size); > @@ -475,17 +506,20 @@ static inline int __insert_record(struct > /* Double-check once more that we have enough space. > * Don''t bugcheck here, in case the userland tool is doing > * something stupid. */ > - if ( calc_bytes_avail(buf) < rec_size ) > + next = calc_bytes_avail(buf); > + if ( next < rec_size ) > { > - printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n", > - __func__, > - calc_bytes_avail(buf), > - data_size, buf->prod, buf->cons, data_size, rec_size); > - return 0; > + if ( printk_ratelimit() ) > + printk(XENLOG_WARNING > + "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n", > + __func__, next, data_size, buf->prod, buf->cons, rec_size); > + return; > } > rmb(); > > rec = next_record(buf); > + if ( !rec ) > + return; > rec->event = event; > rec->extra_u32 = extra_word; > dst = (unsigned char *)rec->u.nocycles.extra_u32; > @@ -502,21 +536,22 @@ static inline int __insert_record(struct > > wmb(); > > - next = buf->prod + rec_size; > + next = buf->prod; > + if ( bogus(next, buf->cons) ) > + return; > + next += rec_size; > if ( next >= 2*data_size ) > next -= 2*data_size; > - ASSERT(next >= 0); > ASSERT(next < 2*data_size); > buf->prod = next; > - > - return rec_size; > } > > -static inline int insert_wrap_record(struct t_buf *buf, int size) > +static inline void insert_wrap_record(volatile struct t_buf *buf, > + unsigned int size) > { > - int space_left = calc_bytes_to_wrap(buf); > - unsigned long extra_space = space_left - sizeof(u32); > - int cycles = 0; > + u32 space_left = calc_bytes_to_wrap(buf); > + unsigned int extra_space = space_left - sizeof(u32); > + bool_t cycles = 0; > > BUG_ON(space_left > size); > > @@ -528,17 +563,13 @@ static inline int insert_wrap_record(str > ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); > } > > - return __insert_record(buf, > - TRC_TRACE_WRAP_BUFFER, > - extra_space, > - cycles, > - space_left, > - NULL); > + __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles, > + space_left, NULL); > } > > #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */ > > -static inline int insert_lost_records(struct t_buf *buf) > +static inline void insert_lost_records(volatile struct t_buf *buf) > { > struct { > u32 lost_records; > @@ -553,12 +584,8 @@ static inline int insert_lost_records(st > > this_cpu(lost_records) = 0; > > - return __insert_record(buf, > - TRC_LOST_RECORDS, > - sizeof(ed), > - 1 /* cycles */, > - LOST_REC_SIZE, > - (unsigned char *)&ed); > + __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */, > + LOST_REC_SIZE, &ed); > } > > /* > @@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0 > * failure, otherwise 0. Failure occurs only if the trace buffers are not yet > * initialised. > */ > -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) > +void __trace_var(u32 event, bool_t cycles, unsigned int extra, > + const void *extra_data) > { > - struct t_buf *buf; > - unsigned long flags, bytes_to_tail, bytes_to_wrap; > - int rec_size, total_size; > - int extra_word; > - int started_below_highwater = 0; > + volatile struct t_buf *buf; > + unsigned long flags; > + u32 bytes_to_tail, bytes_to_wrap; > + unsigned int rec_size, total_size; > + unsigned int extra_word; > + bool_t started_below_highwater; > > if( !tb_init_done ) > return; > @@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles, > > /* Read tb_init_done /before/ t_bufs. */ > rmb(); > - > - spin_lock_irqsave(&this_cpu(t_lock), flags); > - > buf = this_cpu(t_bufs); > - > if ( unlikely(!buf) ) > - goto unlock; > + return; > + > + spin_lock_irqsave(&this_cpu(t_lock), flags); > > started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater); > > --- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-28 09:43:58.000000000 +0200 > +++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200 > @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op > > int trace_will_trace_event(u32 event); > > -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data); > +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *); > > static inline void trace_var(u32 event, int cycles, int extra, > unsigned char *extra_data) > @@ -57,7 +57,7 @@ static inline void trace_var(u32 event, > { \ > u32 _d[1]; \ > _d[0] = d1; \ > - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -68,7 +68,7 @@ static inline void trace_var(u32 event, > u32 _d[2]; \ > _d[0] = d1; \ > _d[1] = d2; \ > - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -80,7 +80,7 @@ static inline void trace_var(u32 event, > _d[0] = d1; \ > _d[1] = d2; \ > _d[2] = d3; \ > - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -93,7 +93,7 @@ static inline void trace_var(u32 event, > _d[1] = d2; \ > _d[2] = d3; \ > _d[3] = d4; \ > - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -107,7 +107,7 @@ static inline void trace_var(u32 event, > _d[2] = d3; \ > _d[3] = d4; \ > _d[4] = d5; \ > - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > @@ -122,7 +122,7 @@ static inline void trace_var(u32 event, > _d[3] = d4; \ > _d[4] = d5; \ > _d[5] = d6; \ > - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \ > + __trace_var(_e, 1, sizeof(_d), _d); \ > } \ > } while ( 0 ) > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
After getting a report of 3.2.3''s xenmon crashing Xen (as it turned out this was because c/s 17000 was backported to that tree without also applying c/s 17515), this is the result of reviewing the code for security issues: - the hypervisor should not rely on any specific state of the actual trace buffer (as it is shared writable with Dom0) - there''s no need to share writably the t_info pages (Dom0 only wants [and needs] to read it) - T_INFO_FIRST_OFFSET wasn''t defined correctly, thus allowing in the num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0 - check_tbuf_size() didn''t consider the case of the incoming size not allowing for the 2*data_size range for t_buf->{prod,cons} - printk()-s should be lower level or rate limited For the first of those items, I chose to use ''volatile'' qualifiers rather than sprinkling around barriers - this can certainly be changed if considered unacceptable. To make clear what purpose specific variables have and/or where they got loaded from, the patch also changes the type of some of them to be explicitly u32/s32, and removes pointless assertions (like checking an unsigned variable to be >= 0). I also took the prototype adjustment of __trace_var() as an opportunity to simplify the TRACE_xD() macros. Similar simplification could be done on the (quite numerous) direct callers of the function. Signed-off-by: Jan Beulich <jbeulich@novell.com> _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel