Olaf Hering
2011-May-06 18:25 UTC
[Xen-devel] [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer
This series implements non-contiguous trace buffers. Please review. For some reason its not possible to allocate more than 128MB with repeated calls to alloc_xen_heappage(). Any ideas how to reach the theoretical limit of 256MB per cpu? Also the error path in alloc_trace_bufs() needs a fix, I always run into the assert there. Olaf _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Olaf Hering
2011-May-06 18:25 UTC
[Xen-devel] [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size()
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1304697395 -7200 # Node ID a19b5f66ce46efd6f8f697583f9bdbc2b567fdbd # Parent 39f2942fe56bda90d3285b9f2d4e214f0712375f Move the global variable t_info_first_offset into calculate_tbuf_size() because it is only used there. Change the type from u32 to uint32_t to match type in other places. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 39f2942fe56b -r a19b5f66ce46 xen/common/trace.c --- a/xen/common/trace.c Wed May 04 14:46:32 2011 +0100 +++ b/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 @@ -55,7 +55,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); static u32 data_size; -static u32 t_info_first_offset __read_mostly; /* High water mark for trace buffers; */ /* Send virtual interrupt when buffer level reaches this point */ @@ -94,10 +93,10 @@ static struct notifier_block cpu_nfb = { .notifier_call = cpu_callback }; -static void calc_tinfo_first_offset(void) +static uint32_t calc_tinfo_first_offset(void) { int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]); - t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes); + return fit_to_type(uint32_t, offset_in_bytes); } /** @@ -107,7 +106,7 @@ static void calc_tinfo_first_offset(void * The t_info layout is fixed and cant be changed without breaking xentrace. * Initialize t_info_pages based on number of trace pages. */ -static int calculate_tbuf_size(unsigned int pages) +static int calculate_tbuf_size(unsigned int pages, uint32_t t_info_first_offset) { struct t_buf dummy_size; typeof(dummy_size.prod) max_size; @@ -156,6 +155,7 @@ static int alloc_trace_bufs(unsigned int int i, cpu, order; /* Start after a fixed-size array of NR_CPUS */ uint32_t *t_info_mfn_list; + uint32_t t_info_first_offset; int offset; if ( t_info ) @@ -165,9 +165,9 @@ static int alloc_trace_bufs(unsigned int return -EINVAL; /* Calculate offset in u32 of first mfn */ - calc_tinfo_first_offset(); + t_info_first_offset = calc_tinfo_first_offset(); - pages = calculate_tbuf_size(pages); + pages = calculate_tbuf_size(pages, t_info_first_offset); order = get_order_from_pages(pages); t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Olaf Hering
2011-May-06 18:25 UTC
[Xen-devel] [PATCH 2 of 5] Mark data_size __read_mostly because its only written once
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1304697395 -7200 # Node ID 91c40bb4c01a331a41ab6a14a2b5ec7d12e86a76 # Parent a19b5f66ce46efd6f8f697583f9bdbc2b567fdbd Mark data_size __read_mostly because its only written once. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r a19b5f66ce46 -r 91c40bb4c01a xen/common/trace.c --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 +++ b/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 @@ -54,7 +54,7 @@ static unsigned int t_info_pages; static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); -static u32 data_size; +static u32 data_size __read_mostly; /* High water mark for trace buffers; */ /* Send virtual interrupt when buffer level reaches this point */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Olaf Hering
2011-May-06 18:25 UTC
[Xen-devel] [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1304697395 -7200 # Node ID 1631b61acaa8e88437d0f1861409ab1824de2721 # Parent 91c40bb4c01a331a41ab6a14a2b5ec7d12e86a76 Remove unneeded cast when assigning pointer value to dst. Both arrays are uint32_t and memcpy takes a void pointer. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 91c40bb4c01a -r 1631b61acaa8 xen/common/trace.c --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 +++ b/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 @@ -483,7 +483,7 @@ static inline void __insert_record(struc const void *extra_data) { struct t_rec *rec; - unsigned char *dst; + uint32_t *dst; unsigned int extra_word = extra / sizeof(u32); unsigned int local_rec_size = calc_rec_size(cycles, extra); uint32_t next; @@ -508,13 +508,13 @@ static inline void __insert_record(struc rec->event = event; rec->extra_u32 = extra_word; - dst = (unsigned char *)rec->u.nocycles.extra_u32; + dst = rec->u.nocycles.extra_u32; if ( (rec->cycles_included = cycles) != 0 ) { u64 tsc = (u64)get_cycles(); rec->u.cycles.cycles_lo = (uint32_t)tsc; rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32); - dst = (unsigned char *)rec->u.cycles.extra_u32; + dst = rec->u.cycles.extra_u32; } if ( extra_data && extra ) _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Olaf Hering
2011-May-06 18:25 UTC
[Xen-devel] [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1304700881 -7200 # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162 # Parent 1631b61acaa8e88437d0f1861409ab1824de2721 Update __insert_record() to copy the trace record to individual mfns. This is a prereq before changing the per-cpu allocation from contiguous to non-contiguous allocation. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 +++ b/xen/common/trace.c Fri May 06 18:54:41 2011 +0200 @@ -52,7 +52,6 @@ static struct t_info *t_info; static unsigned int t_info_pages; static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); static u32 data_size __read_mostly; @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int per_cpu(t_bufs, cpu) = buf = rawbuf; buf->cons = buf->prod = 0; - per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); } offset = t_info_first_offset; @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const return data_size - calc_unconsumed_bytes(buf); } -static inline struct t_rec *next_record(const struct t_buf *buf, - uint32_t *next) +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next, + unsigned char **next_page, + uint32_t *offset_in_page) { u32 x = buf->prod, cons = buf->cons; + uint32_t per_cpu_mfn_offset; + uint32_t per_cpu_mfn_nr; + uint32_t *mfn_list; + uint32_t mfn; + unsigned char *this_page; barrier(); /* must read buf->prod and buf->cons only once */ *next = x; @@ -472,7 +476,27 @@ static inline struct t_rec *next_record( ASSERT(x < data_size); - return (struct t_rec *)&this_cpu(t_data)[x]; + /* add leading header to get total offset of next record */ + x += sizeof(struct t_buf); + *offset_in_page = x % PAGE_SIZE; + + /* offset into array of mfns */ + per_cpu_mfn_nr = x / PAGE_SIZE; + per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()]; + mfn_list = (uint32_t *)t_info; + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr]; + this_page = mfn_to_virt(mfn); + if (per_cpu_mfn_nr + 1 >= opt_tbuf_size) + { + /* reached end of buffer? */ + *next_page = NULL; + } + else + { + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1]; + *next_page = mfn_to_virt(mfn); + } + return this_page; } static inline void __insert_record(struct t_buf *buf, @@ -482,28 +506,37 @@ static inline void __insert_record(struc unsigned int rec_size, const void *extra_data) { - struct t_rec *rec; + struct t_rec split_rec, *rec; uint32_t *dst; + unsigned char *this_page, *next_page; unsigned int extra_word = extra / sizeof(u32); unsigned int local_rec_size = calc_rec_size(cycles, extra); uint32_t next; + uint32_t offset; + uint32_t remaining; BUG_ON(local_rec_size != rec_size); BUG_ON(extra & 3); - rec = next_record(buf, &next); - if ( !rec ) + this_page = next_record(buf, &next, &next_page, &offset); + if ( !this_page ) return; - /* Double-check once more that we have enough space. - * Don''t bugcheck here, in case the userland tool is doing - * something stupid. */ - if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) + + remaining = PAGE_SIZE - offset; + + if ( unlikely(rec_size > remaining) ) { - if ( printk_ratelimit() ) + if ( next_page == NULL ) + { + /* access beyond end of buffer */ printk(XENLOG_WARNING - "%s: size=%08x prod=%08x cons=%08x rec=%u\n", - __func__, data_size, next, buf->cons, rec_size); - return; + "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n", + __func__, data_size, next, buf->cons, rec_size, remaining); + return; + } + rec = &split_rec; + } else { + rec = (struct t_rec*)(this_page + offset); } rec->event = event; @@ -520,6 +553,12 @@ static inline void __insert_record(struc if ( extra_data && extra ) memcpy(dst, extra_data, extra); + if ( unlikely(rec_size > remaining) ) + { + memcpy(this_page + offset, rec, remaining); + memcpy(next_page, (char *)rec + remaining, rec_size - remaining); + } + wmb(); next += rec_size; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Olaf Hering
2011-May-06 18:25 UTC
[Xen-devel] [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers
# HG changeset patch # User Olaf Hering <olaf@aepfle.de> # Date 1304706230 -7200 # Node ID bcd0b17bf8a3ab08760b8dcc1ca276defab1ed71 # Parent 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162 Allocate non-contiguous per-cpu trace buffers. Signed-off-by: Olaf Hering <olaf@aepfle.de> diff -r 1c5da4d9e33c -r bcd0b17bf8a3 xen/common/trace.c --- a/xen/common/trace.c Fri May 06 18:54:41 2011 +0200 +++ b/xen/common/trace.c Fri May 06 20:23:50 2011 +0200 @@ -151,7 +151,7 @@ static int calculate_tbuf_size(unsigned */ static int alloc_trace_bufs(unsigned int pages) { - int i, cpu, order; + int i, cpu; /* Start after a fixed-size array of NR_CPUS */ uint32_t *t_info_mfn_list; uint32_t t_info_first_offset; @@ -167,32 +167,10 @@ static int alloc_trace_bufs(unsigned int t_info_first_offset = calc_tinfo_first_offset(); pages = calculate_tbuf_size(pages, t_info_first_offset); - order = get_order_from_pages(pages); t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0); if ( t_info == NULL ) - goto out_dealloc; - - /* - * First, allocate buffers for all of the cpus. If any - * fails, deallocate what you have so far and exit. - */ - for_each_online_cpu(cpu) - { - void *rawbuf; - struct t_buf *buf; - - if ( (rawbuf = alloc_xenheap_pages( - order, MEMF_bits(32 + PAGE_SHIFT))) == NULL ) - { - printk(XENLOG_INFO "xentrace: memory allocation failed " - "on cpu %d\n", cpu); - goto out_dealloc; - } - - per_cpu(t_bufs, cpu) = buf = rawbuf; - buf->cons = buf->prod = 0; - } + goto out_dealloc_t_info; offset = t_info_first_offset; t_info_mfn_list = (uint32_t *)t_info; @@ -204,27 +182,50 @@ static int alloc_trace_bufs(unsigned int t_info->tbuf_size = pages; /* - * Now share the pages so xentrace can map them, and write them in - * the global t_info structure. + * Allocate buffers for all of the cpus. + * If any fails, deallocate what you have so far and exit. */ for_each_online_cpu(cpu) { - void *rawbuf = per_cpu(t_bufs, cpu); - struct page_info *p = virt_to_page(rawbuf); - uint32_t mfn = virt_to_mfn(rawbuf); - - for ( i = 0; i < pages; i++ ) - { - share_xen_page_with_privileged_guests(p + i, XENSHARE_writable); - - t_info_mfn_list[offset + i]=mfn + i; - } - t_info->mfn_offset[cpu]=offset; - printk(XENLOG_INFO "xentrace: p%d mfn %"PRIx32" offset %d\n", - cpu, mfn, offset); - offset+=i; + void *p; + struct t_buf *buf; + struct page_info *pg; spin_lock_init(&per_cpu(t_lock, cpu)); + /* first allocate the first page, it contains the per-cpu metadata */ + p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT)); + if ( !p ) + { + printk(XENLOG_INFO "xentrace: memory allocation failed " + "on cpu %d after %d pages\n", cpu, 0); + goto out_dealloc; + } + per_cpu(t_bufs, cpu) = buf = p; + buf->cons = buf->prod = 0; + + t_info->mfn_offset[cpu] = offset; + t_info_mfn_list[offset] = virt_to_mfn(p); + pg = virt_to_page(p); + share_xen_page_with_privileged_guests(pg, XENSHARE_writable); + + printk(XENLOG_INFO "xentrace: p%d mfn %lx offset %d\n", + cpu, virt_to_mfn(p), offset); + + /* now the remaining trace pages */ + offset++; + for ( i = 1; i < pages; i++ ) + { + p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT)); + if ( !p ) + { + printk(XENLOG_INFO "xentrace: memory allocation failed " + "on cpu %d after %d pages\n", cpu, i); + goto out_dealloc; + } + t_info_mfn_list[offset++] = virt_to_mfn(p); + pg = virt_to_page(p); + share_xen_page_with_privileged_guests(pg, XENSHARE_writable); + } } data_size = (pages * PAGE_SIZE - sizeof(struct t_buf)); @@ -240,14 +241,18 @@ static int alloc_trace_bufs(unsigned int out_dealloc: for_each_online_cpu(cpu) { - void *rawbuf = per_cpu(t_bufs, cpu); per_cpu(t_bufs, cpu) = NULL; - if ( rawbuf ) + offset = t_info->mfn_offset[cpu]; + for ( i = 0; i < pages; i++ ) { - ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated)); - free_xenheap_pages(rawbuf, order); + uint32_t mfn = t_info_mfn_list[offset + i]; + if ( !mfn ) + break; + ASSERT(!(mfn_to_page(mfn)->count_info & PGC_allocated)); + free_xenheap_pages(mfn_to_virt(mfn), 0); } } +out_dealloc_t_info: free_xenheap_pages(t_info, get_order_from_pages(t_info_pages)); t_info = NULL; printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n"); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Olaf Hering
2011-May-08 15:07 UTC
Re: [Xen-devel] [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer
On Fri, May 06, Olaf Hering wrote:> For some reason its not possible to allocate more than 128MB with repeated > calls to alloc_xen_heappage(). Any ideas how to reach the theoretical limit of > 256MB per cpu?The dom0 needs to be ballooned down to make room for the tracebuffers.> Also the error path in alloc_trace_bufs() needs a fix, I always run into the > assert there.Beside this issue, the checks in the bogus() function trigger with a 256MB per-cpu buffer. I will revisit the series and post a new version once I have fixes for these issues. Olaf _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2011-May-09 09:03 UTC
Re: [Xen-devel] [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
On 06/05/2011 19:25, "Olaf Hering" <olaf@aepfle.de> wrote:> # HG changeset patch > # User Olaf Hering <olaf@aepfle.de> > # Date 1304700881 -7200 > # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162 > # Parent 1631b61acaa8e88437d0f1861409ab1824de2721 > Update __insert_record() to copy the trace record to individual mfns. > This is a prereq before changing the per-cpu allocation from contiguous > to non-contiguous allocation.I applied the trivial patches 1-3. I''ll wait for Acks from George for patches 4-5. -- Keir> Signed-off-by: Olaf Hering <olaf@aepfle.de> > > diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c > --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 > +++ b/xen/common/trace.c Fri May 06 18:54:41 2011 +0200 > @@ -52,7 +52,6 @@ static struct t_info *t_info; > static unsigned int t_info_pages; > > static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); > -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); > static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); > static u32 data_size __read_mostly; > > @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int > > per_cpu(t_bufs, cpu) = buf = rawbuf; > buf->cons = buf->prod = 0; > - per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); > } > > offset = t_info_first_offset; > @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const > return data_size - calc_unconsumed_bytes(buf); > } > > -static inline struct t_rec *next_record(const struct t_buf *buf, > - uint32_t *next) > +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next, > + unsigned char **next_page, > + uint32_t *offset_in_page) > { > u32 x = buf->prod, cons = buf->cons; > + uint32_t per_cpu_mfn_offset; > + uint32_t per_cpu_mfn_nr; > + uint32_t *mfn_list; > + uint32_t mfn; > + unsigned char *this_page; > > barrier(); /* must read buf->prod and buf->cons only once */ > *next = x; > @@ -472,7 +476,27 @@ static inline struct t_rec *next_record( > > ASSERT(x < data_size); > > - return (struct t_rec *)&this_cpu(t_data)[x]; > + /* add leading header to get total offset of next record */ > + x += sizeof(struct t_buf); > + *offset_in_page = x % PAGE_SIZE; > + > + /* offset into array of mfns */ > + per_cpu_mfn_nr = x / PAGE_SIZE; > + per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()]; > + mfn_list = (uint32_t *)t_info; > + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr]; > + this_page = mfn_to_virt(mfn); > + if (per_cpu_mfn_nr + 1 >= opt_tbuf_size) > + { > + /* reached end of buffer? */ > + *next_page = NULL; > + } > + else > + { > + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1]; > + *next_page = mfn_to_virt(mfn); > + } > + return this_page; > } > > static inline void __insert_record(struct t_buf *buf, > @@ -482,28 +506,37 @@ static inline void __insert_record(struc > unsigned int rec_size, > const void *extra_data) > { > - struct t_rec *rec; > + struct t_rec split_rec, *rec; > uint32_t *dst; > + unsigned char *this_page, *next_page; > unsigned int extra_word = extra / sizeof(u32); > unsigned int local_rec_size = calc_rec_size(cycles, extra); > uint32_t next; > + uint32_t offset; > + uint32_t remaining; > > BUG_ON(local_rec_size != rec_size); > BUG_ON(extra & 3); > > - rec = next_record(buf, &next); > - if ( !rec ) > + this_page = next_record(buf, &next, &next_page, &offset); > + if ( !this_page ) > return; > - /* Double-check once more that we have enough space. > - * Don''t bugcheck here, in case the userland tool is doing > - * something stupid. */ > - if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) > + > + remaining = PAGE_SIZE - offset; > + > + if ( unlikely(rec_size > remaining) ) > { > - if ( printk_ratelimit() ) > + if ( next_page == NULL ) > + { > + /* access beyond end of buffer */ > printk(XENLOG_WARNING > - "%s: size=%08x prod=%08x cons=%08x rec=%u\n", > - __func__, data_size, next, buf->cons, rec_size); > - return; > + "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n", > + __func__, data_size, next, buf->cons, rec_size, > remaining); > + return; > + } > + rec = &split_rec; > + } else { > + rec = (struct t_rec*)(this_page + offset); > } > > rec->event = event; > @@ -520,6 +553,12 @@ static inline void __insert_record(struc > if ( extra_data && extra ) > memcpy(dst, extra_data, extra); > > + if ( unlikely(rec_size > remaining) ) > + { > + memcpy(this_page + offset, rec, remaining); > + memcpy(next_page, (char *)rec + remaining, rec_size - remaining); > + } > + > wmb(); > > next += rec_size; > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Olaf Hering
2011-May-09 09:31 UTC
Re: [Xen-devel] [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
On Mon, May 09, Keir Fraser wrote:> On 06/05/2011 19:25, "Olaf Hering" <olaf@aepfle.de> wrote: > > > # HG changeset patch > > # User Olaf Hering <olaf@aepfle.de> > > # Date 1304700881 -7200 > > # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162 > > # Parent 1631b61acaa8e88437d0f1861409ab1824de2721 > > Update __insert_record() to copy the trace record to individual mfns. > > This is a prereq before changing the per-cpu allocation from contiguous > > to non-contiguous allocation. > > I applied the trivial patches 1-3. I''ll wait for Acks from George for > patches 4-5.Thanks Keir. There are still issues with large buffers, I will post a new series. Olaf _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
George Dunlap
2011-May-09 11:24 UTC
[Xen-devel] Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
On Fri, 2011-05-06 at 19:25 +0100, Olaf Hering wrote:> # HG changeset patch > # User Olaf Hering <olaf@aepfle.de> > # Date 1304700881 -7200 > # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162 > # Parent 1631b61acaa8e88437d0f1861409ab1824de2721 > Update __insert_record() to copy the trace record to individual mfns. > This is a prereq before changing the per-cpu allocation from contiguous > to non-contiguous allocation. > > Signed-off-by: Olaf Hering <olaf@aepfle.de> > > diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c > --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 > +++ b/xen/common/trace.c Fri May 06 18:54:41 2011 +0200 > @@ -52,7 +52,6 @@ static struct t_info *t_info; > static unsigned int t_info_pages; > > static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); > -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); > static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); > static u32 data_size __read_mostly; > > @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int > > per_cpu(t_bufs, cpu) = buf = rawbuf; > buf->cons = buf->prod = 0; > - per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); > } > > offset = t_info_first_offset; > @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const > return data_size - calc_unconsumed_bytes(buf); > } > > -static inline struct t_rec *next_record(const struct t_buf *buf, > - uint32_t *next) > +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next, > + unsigned char **next_page, > + uint32_t *offset_in_page) > { > u32 x = buf->prod, cons = buf->cons; > + uint32_t per_cpu_mfn_offset; > + uint32_t per_cpu_mfn_nr; > + uint32_t *mfn_list; > + uint32_t mfn; > + unsigned char *this_page; > > barrier(); /* must read buf->prod and buf->cons only once */ > *next = x; > @@ -472,7 +476,27 @@ static inline struct t_rec *next_record( > > ASSERT(x < data_size); > > - return (struct t_rec *)&this_cpu(t_data)[x]; > + /* add leading header to get total offset of next record */ > + x += sizeof(struct t_buf); > + *offset_in_page = x % PAGE_SIZE; > + > + /* offset into array of mfns */ > + per_cpu_mfn_nr = x / PAGE_SIZE; > + per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()]; > + mfn_list = (uint32_t *)t_info; > + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr]; > + this_page = mfn_to_virt(mfn); > + if (per_cpu_mfn_nr + 1 >= opt_tbuf_size) > + { > + /* reached end of buffer? */ > + *next_page = NULL; > + } > + else > + { > + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1]; > + *next_page = mfn_to_virt(mfn); > + } > + return this_page; > }General approach here looks good, but I''m wondering if there''s a way to reduce the math here. The amount of work done for each trace record posted is really getting pretty big. I guess first of all the ''%'' and ''/'' should be &(PAGE_SIZE-1) and>>(PAGE_SHIFT), respectively.Would it make sense to pre-cache the virtual address of the various MFNs (i.e., keep a per-cpu virtual address list) rather than doing the calculation each time? That might reduce the number of instructions to find the approprate virtual addresses. -George> > static inline void __insert_record(struct t_buf *buf, > @@ -482,28 +506,37 @@ static inline void __insert_record(struc > unsigned int rec_size, > const void *extra_data) > { > - struct t_rec *rec; > + struct t_rec split_rec, *rec; > uint32_t *dst; > + unsigned char *this_page, *next_page; > unsigned int extra_word = extra / sizeof(u32); > unsigned int local_rec_size = calc_rec_size(cycles, extra); > uint32_t next; > + uint32_t offset; > + uint32_t remaining; > > BUG_ON(local_rec_size != rec_size); > BUG_ON(extra & 3); > > - rec = next_record(buf, &next); > - if ( !rec ) > + this_page = next_record(buf, &next, &next_page, &offset); > + if ( !this_page ) > return; > - /* Double-check once more that we have enough space. > - * Don''t bugcheck here, in case the userland tool is doing > - * something stupid. */ > - if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) > + > + remaining = PAGE_SIZE - offset; > + > + if ( unlikely(rec_size > remaining) ) > { > - if ( printk_ratelimit() ) > + if ( next_page == NULL ) > + { > + /* access beyond end of buffer */ > printk(XENLOG_WARNING > - "%s: size=%08x prod=%08x cons=%08x rec=%u\n", > - __func__, data_size, next, buf->cons, rec_size); > - return; > + "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n", > + __func__, data_size, next, buf->cons, rec_size, remaining); > + return; > + } > + rec = &split_rec; > + } else { > + rec = (struct t_rec*)(this_page + offset); > } > > rec->event = event; > @@ -520,6 +553,12 @@ static inline void __insert_record(struc > if ( extra_data && extra ) > memcpy(dst, extra_data, extra); > > + if ( unlikely(rec_size > remaining) ) > + { > + memcpy(this_page + offset, rec, remaining); > + memcpy(next_page, (char *)rec + remaining, rec_size - remaining); > + } > + > wmb(); > > next += rec_size;_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel