To help in vhost analyzing, the following series adding basic tracepoints to vhost. Operations of both virtqueues and vhost works were traced in current implementation, net code were untouched. A top-like satistics displaying script were introduced to help the troubleshooting. TODO: - net specific tracepoints? --- Jason Wang (2): vhost: basic tracepoints tools: virtio: add a top-like utility for displaying vhost satistics drivers/vhost/trace.h | 153 ++++++++++++++++++++ drivers/vhost/vhost.c | 17 ++ tools/virtio/vhost_stat | 360 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 528 insertions(+), 2 deletions(-) create mode 100644 drivers/vhost/trace.h create mode 100755 tools/virtio/vhost_stat -- Jason Wang
To help for the performance optimizations and debugging, this patch tracepoints for vhost. Pay attention that the tracepoints are only for vhost, net code are not touched. Two kinds of activities were traced: virtio and vhost work. Signed-off-by: Jason Wang <jasowang at redhat.com> --- drivers/vhost/trace.h | 153 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/vhost/vhost.c | 17 +++++ 2 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 drivers/vhost/trace.h diff --git a/drivers/vhost/trace.h b/drivers/vhost/trace.h new file mode 100644 index 0000000..0423899 --- /dev/null +++ b/drivers/vhost/trace.h @@ -0,0 +1,153 @@ +#if !defined(_TRACE_VHOST_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VHOST_H + +#include <linux/tracepoint.h> +#include "vhost.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vhost + +/* + * Tracepoint for updating used flag. + */ +TRACE_EVENT(vhost_virtio_update_used_flags, + TP_PROTO(struct vhost_virtqueue *vq), + TP_ARGS(vq), + + TP_STRUCT__entry( + __field(struct vhost_virtqueue *, vq) + __field(u16, used_flags) + ), + + TP_fast_assign( + __entry->vq = vq; + __entry->used_flags = vq->used_flags; + ), + + TP_printk("vhost update used flag %x to vq %p notify %s", + __entry->used_flags, __entry->vq, + (__entry->used_flags & VRING_USED_F_NO_NOTIFY) ? + "disabled" : "enabled") +); + +/* + * Tracepoint for updating avail event. + */ +TRACE_EVENT(vhost_virtio_update_avail_event, + TP_PROTO(struct vhost_virtqueue *vq), + TP_ARGS(vq), + + TP_STRUCT__entry( + __field(struct vhost_virtqueue *, vq) + __field(u16, avail_idx) + ), + + TP_fast_assign( + __entry->vq = vq; + __entry->avail_idx = vq->avail_idx; + ), + + TP_printk("vhost update avail idx %u(%u) for vq %p", + __entry->avail_idx, __entry->avail_idx % + __entry->vq->num, __entry->vq) +); + +/* + * Tracepoint for processing descriptor. + */ +TRACE_EVENT(vhost_virtio_get_vq_desc, + TP_PROTO(struct vhost_virtqueue *vq, unsigned int index, + unsigned out, unsigned int in), + TP_ARGS(vq, index, out, in), + + TP_STRUCT__entry( + __field(struct vhost_virtqueue *, vq) + __field(unsigned int, head) + __field(unsigned int, out) + __field(unsigned int, in) + ), + + TP_fast_assign( + __entry->vq = vq; + __entry->head = index; + __entry->out = out; + __entry->in = in; + ), + + TP_printk("vhost get vq %p head %u out %u in %u", + __entry->vq, __entry->head, __entry->out, __entry->in) + +); + +/* + * Tracepoint for signal guest. + */ +TRACE_EVENT(vhost_virtio_signal, + TP_PROTO(struct vhost_virtqueue *vq), + TP_ARGS(vq), + + TP_STRUCT__entry( + __field(struct vhost_virtqueue *, vq) + ), + + TP_fast_assign( + __entry->vq = vq; + ), + + TP_printk("vhost signal vq %p", __entry->vq) +); + +DECLARE_EVENT_CLASS(vhost_work_template, + TP_PROTO(struct vhost_dev *dev, struct vhost_work *work), + TP_ARGS(dev, work), + + TP_STRUCT__entry( + __field(struct vhost_dev *, dev) + __field(struct vhost_work *, work) + __field(void *, function) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->work = work; + __entry->function = work->fn; + ), + + TP_printk("%pf for work %p dev %p", + __entry->function, __entry->work, __entry->dev) +); + +DEFINE_EVENT(vhost_work_template, vhost_work_queue_wakeup, + TP_PROTO(struct vhost_dev *dev, struct vhost_work *work), + TP_ARGS(dev, work)); + +DEFINE_EVENT(vhost_work_template, vhost_work_queue_coalesce, + TP_PROTO(struct vhost_dev *dev, struct vhost_work *work), + TP_ARGS(dev, work)); + +DEFINE_EVENT(vhost_work_template, vhost_poll_start, + TP_PROTO(struct vhost_dev *dev, struct vhost_work *work), + TP_ARGS(dev, work)); + +DEFINE_EVENT(vhost_work_template, vhost_poll_stop, + TP_PROTO(struct vhost_dev *dev, struct vhost_work *work), + TP_ARGS(dev, work)); + +DEFINE_EVENT(vhost_work_template, vhost_work_execute_start, + TP_PROTO(struct vhost_dev *dev, struct vhost_work *work), + TP_ARGS(dev, work)); + +DEFINE_EVENT(vhost_work_template, vhost_work_execute_end, + TP_PROTO(struct vhost_dev *dev, struct vhost_work *work), + TP_ARGS(dev, work)); + +#endif /* _TRACE_VHOST_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/vhost +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> + diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c14c42b..23f8d85 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -31,6 +31,8 @@ #include <linux/if_arp.h> #include "vhost.h" +#define CREATE_TRACE_POINTS +#include "trace.h" enum { VHOST_MEMORY_MAX_NREGIONS = 64, @@ -50,6 +52,7 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll = container_of(pt, struct vhost_poll, table); poll->wqh = wqh; add_wait_queue(wqh, &poll->wait); + trace_vhost_poll_start(NULL, &poll->work); } static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, @@ -101,6 +104,7 @@ void vhost_poll_start(struct vhost_poll *poll, struct file *file) void vhost_poll_stop(struct vhost_poll *poll) { remove_wait_queue(poll->wqh, &poll->wait); + trace_vhost_poll_stop(NULL, &poll->work); } static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work, @@ -147,7 +151,9 @@ static inline void vhost_work_queue(struct vhost_dev *dev, list_add_tail(&work->node, &dev->work_list); work->queue_seq++; wake_up_process(dev->worker); - } + trace_vhost_work_queue_wakeup(dev, work); + } else + trace_vhost_work_queue_coalesce(dev, work); spin_unlock_irqrestore(&dev->work_lock, flags); } @@ -221,7 +227,9 @@ static int vhost_worker(void *data) if (work) { __set_current_state(TASK_RUNNING); + trace_vhost_work_execute_start(dev, work); work->fn(work); + trace_vhost_work_execute_end(dev, work); } else schedule(); @@ -1011,6 +1019,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } + trace_vhost_virtio_update_used_flags(vq); return 0; } @@ -1030,6 +1039,7 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } + trace_vhost_virtio_update_avail_event(vq); return 0; } @@ -1319,6 +1329,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* Assume notifications from guest are disabled at this point, * if they aren't we would need to update avail_event index. */ BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); + trace_vhost_virtio_get_vq_desc(vq, head, *out_num, *in_num); return head; } @@ -1485,8 +1496,10 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx && vhost_notify(dev, vq)) + if (vq->call_ctx && vhost_notify(dev, vq)) { eventfd_signal(vq->call_ctx, 1); + trace_vhost_virtio_signal(vq); + } } /* And here's the combo meal deal. Supersize me! */
Jason Wang
2012-Apr-10 02:58 UTC
[PATCH 2/2] tools: virtio: add a top-like utility for displaying vhost satistics
This patch adds simple python to display vhost satistics of vhost, the codes were based on kvm_stat script from qemu. As work function has been recored, filters could be used to distinguish which kinds of work are being executed or queued: vhost statistics vhost_virtio_get_vq_desc 1460997 219682 vhost_work_execute_start 101248 12842 vhost_work_execute_end 101247 12842 vhost_work_queue_wakeup 101263 12841 vhost_virtio_signal 68452 8659 vhost_work_queue_wakeup(rx_net) 51797 6584 vhost_work_execute_start(rx_net) 51795 6584 vhost_work_queue_coalesce 35737 6571 vhost_work_queue_coalesce(rx_net) 35709 6566 vhost_virtio_update_avail_event 49512 6271 vhost_work_execute_start(tx_kick) 49429 6254 vhost_work_queue_wakeup(tx_kick) 49442 6252 vhost_work_queue_coalesce(tx_kick) 28 5 vhost_work_execute_start(rx_kick) 22 3 vhost_work_queue_wakeup(rx_kick) 22 3 vhost_poll_start 4 0 Signed-off-by: Jason Wang <jasowang at redhat.com> --- tools/virtio/vhost_stat | 360 +++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 360 insertions(+), 0 deletions(-) create mode 100755 tools/virtio/vhost_stat diff --git a/tools/virtio/vhost_stat b/tools/virtio/vhost_stat new file mode 100755 index 0000000..b730f3b --- /dev/null +++ b/tools/virtio/vhost_stat @@ -0,0 +1,360 @@ +#!/usr/bin/python +# +# top-like utility for displaying vhost statistics +# +# Copyright 2012 Red Hat, Inc. +# +# Modified from kvm_stat from qemu +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. + +import curses +import sys, os, time, optparse + +work_types = { + "handle_rx_kick" : "rx_kick", + "handle_tx_kick" : "tx_kick", + "handle_rx_net" : "rx_net", + "handle_tx_net" : "tx_net", + "vhost_attach_cgroups_work": "cg_attach" + } + +addr = {} + +kallsyms = file("/proc/kallsyms").readlines() +for kallsym in kallsyms: + entry = kallsym.split() + if entry[2] in work_types.keys(): + addr["0x%s" % entry[0]] = work_types[entry[2]] + +filters = { + 'vhost_work_queue_wakeup': ('function', addr), + 'vhost_work_queue_coalesce' : ('function', addr), + 'vhost_work_execute_start' : ('function', addr), + 'vhost_poll_start' : ('function', addr), + 'vhost_poll_stop' : ('function', addr), +} + +def invert(d): + return dict((x[1], x[0]) for x in d.iteritems()) + +for f in filters: + filters[f] = (filters[f][0], invert(filters[f][1])) + +import ctypes, struct, array + +libc = ctypes.CDLL('libc.so.6') +syscall = libc.syscall +class perf_event_attr(ctypes.Structure): + _fields_ = [('type', ctypes.c_uint32), + ('size', ctypes.c_uint32), + ('config', ctypes.c_uint64), + ('sample_freq', ctypes.c_uint64), + ('sample_type', ctypes.c_uint64), + ('read_format', ctypes.c_uint64), + ('flags', ctypes.c_uint64), + ('wakeup_events', ctypes.c_uint32), + ('bp_type', ctypes.c_uint32), + ('bp_addr', ctypes.c_uint64), + ('bp_len', ctypes.c_uint64), + ] +def _perf_event_open(attr, pid, cpu, group_fd, flags): + return syscall(298, ctypes.pointer(attr), ctypes.c_int(pid), + ctypes.c_int(cpu), ctypes.c_int(group_fd), + ctypes.c_long(flags)) + +PERF_TYPE_HARDWARE = 0 +PERF_TYPE_SOFTWARE = 1 +PERF_TYPE_TRACEPOINT = 2 +PERF_TYPE_HW_CACHE = 3 +PERF_TYPE_RAW = 4 +PERF_TYPE_BREAKPOINT = 5 + +PERF_SAMPLE_IP = 1 << 0 +PERF_SAMPLE_TID = 1 << 1 +PERF_SAMPLE_TIME = 1 << 2 +PERF_SAMPLE_ADDR = 1 << 3 +PERF_SAMPLE_READ = 1 << 4 +PERF_SAMPLE_CALLCHAIN = 1 << 5 +PERF_SAMPLE_ID = 1 << 6 +PERF_SAMPLE_CPU = 1 << 7 +PERF_SAMPLE_PERIOD = 1 << 8 +PERF_SAMPLE_STREAM_ID = 1 << 9 +PERF_SAMPLE_RAW = 1 << 10 + +PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0 +PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 +PERF_FORMAT_ID = 1 << 2 +PERF_FORMAT_GROUP = 1 << 3 + +import re + +sys_tracing = '/sys/kernel/debug/tracing' + +class Group(object): + def __init__(self, cpu): + self.events = [] + self.group_leader = None + self.cpu = cpu + def add_event(self, name, event_set, tracepoint, filter = None): + self.events.append(Event(group = self, + name = name, event_set = event_set, + tracepoint = tracepoint, filter = filter)) + if len(self.events) == 1: + self.file = os.fdopen(self.events[0].fd) + def read(self): + bytes = 8 * (1 + len(self.events)) + fmt = 'xxxxxxxx' + 'q' * len(self.events) + return dict(zip([event.name for event in self.events], + struct.unpack(fmt, self.file.read(bytes)))) + +class Event(object): + def __init__(self, group, name, event_set, tracepoint, filter = None): + self.name = name + attr = perf_event_attr() + attr.type = PERF_TYPE_TRACEPOINT + attr.size = ctypes.sizeof(attr) + id_path = os.path.join(sys_tracing, 'events', event_set, + tracepoint, 'id') + id = int(file(id_path).read()) + attr.config = id + attr.sample_type = (PERF_SAMPLE_RAW + | PERF_SAMPLE_TIME + | PERF_SAMPLE_CPU) + attr.sample_period = 1 + attr.read_format = PERF_FORMAT_GROUP + group_leader = -1 + if group.events: + group_leader = group.events[0].fd + fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0) + if fd == -1: + raise Exception('perf_event_open failed') + if filter: + import fcntl + fcntl.ioctl(fd, 0x40082406, filter) + self.fd = fd + def enable(self): + import fcntl + fcntl.ioctl(self.fd, 0x00002400, 0) + def disable(self): + import fcntl + fcntl.ioctl(self.fd, 0x00002401, 0) + +class TracepointProvider(object): + def __init__(self): + path = os.path.join(sys_tracing, 'events', 'vhost') + fields = [f + for f in os.listdir(path) + if os.path.isdir(os.path.join(path, f))] + extra = [] + for f in fields: + if f in filters: + subfield, values = filters[f] + for name, number in values.iteritems(): + # kvm_exit(MMIO) + extra.append(f + '(' + name + ')') + fields += extra + self._setup(fields) + self.select(fields) + def fields(self): + return self._fields + def _setup(self, _fields): + self._fields = _fields + cpure = r'cpu([0-9]+)' + self.cpus = [int(re.match(cpure, x).group(1)) + for x in os.listdir('/sys/devices/system/cpu') + if re.match(cpure, x)] + import resource + nfiles = len(self.cpus) * 1000 + resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles)) + events = [] + self.group_leaders = [] + for cpu in self.cpus: + group = Group(cpu) + for name in _fields: + tracepoint = name + filter = None + # for field like kvm_exit(MMIO) + m = re.match(r'(.*)\((.*)\)', name) + if m: + tracepoint, sub = m.groups() + filter = '%s==%s\0' % (filters[tracepoint][0], + filters[tracepoint][1][sub]) + event = group.add_event(name, event_set = 'vhost', + tracepoint = tracepoint, + filter = filter) + self.group_leaders.append(group) + def select(self, fields): + for group in self.group_leaders: + for event in group.events: + if event.name in fields: + event.enable() + else: + event.disable() + def read(self): + from collections import defaultdict + ret = defaultdict(int) + for group in self.group_leaders: + for name, val in group.read().iteritems(): + ret[name] += val + return ret + +class Stats: + def __init__(self, provider, fields = None): + self.provider = provider + self.fields_filter = fields + self._update() + def _update(self): + def wanted(key): + import re + if not self.fields_filter: + return True + return re.match(self.fields_filter, key) is not None + self.values = dict([(key, None) + for key in provider.fields() + if wanted(key)]) + self.provider.select(self.values.keys()) + def set_fields_filter(self, fields_filter): + self.fields_filter = fields_filter + self._update() + def get(self): + new = self.provider.read() + for key in self.provider.fields(): + oldval = self.values.get(key, (0, 0)) + newval = new[key] + newdelta = None + if oldval is not None: + newdelta = newval - oldval[0] + self.values[key] = (newval, newdelta) + return self.values + +if not os.access('/sys/kernel/debug', os.F_OK): + print 'Please enable CONFIG_DEBUG_FS in your kernel' + sys.exit(1) +if not os.access('/sys/module/vhost_net', os.F_OK): + print 'Please make sure vhost module are loaded' + sys.exit(1) + +label_width = 40 +number_width = 10 + +def tui(screen, stats): + curses.use_default_colors() + curses.noecho() + drilldown = False + fields_filter = stats.fields_filter + def update_drilldown(): + if not fields_filter: + if drilldown: + stats.set_fields_filter(None) + else: + stats.set_fields_filter(r'^[^\(]*$') + update_drilldown() + def refresh(sleeptime): + screen.erase() + screen.addstr(0, 0, 'vhost statistics') + row = 2 + s = stats.get() + def sortkey(x): + if s[x][1]: + return (-s[x][1], -s[x][0]) + else: + return (0, -s[x][0]) + for key in sorted(s.keys(), key = sortkey): + if row >= screen.getmaxyx()[0]: + break + values = s[key] + if not values[0] and not values[1]: + break + col = 1 + screen.addstr(row, col, key) + col += label_width + screen.addstr(row, col, '%10d' % (values[0],)) + col += number_width + if values[1] is not None: + screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) + row += 1 + screen.refresh() + + sleeptime = 0.25 + while True: + refresh(sleeptime) + curses.halfdelay(int(sleeptime * 10)) + sleeptime = 3 + try: + c = screen.getkey() + if c == 'x': + drilldown = not drilldown + update_drilldown() + if c == 'q': + break + except KeyboardInterrupt: + break + except curses.error: + continue + +def batch(stats): + s = stats.get() + time.sleep(1) + s = stats.get() + for key in sorted(s.keys()): + values = s[key] + print '%-22s%10d%10d' % (key, values[0], values[1]) + +def log(stats): + keys = sorted(stats.get().iterkeys()) + def banner(): + for k in keys: + print '%10s' % k[0:9], + print + def statline(): + s = stats.get() + for k in keys: + print ' %9d' % s[k][1], + print + line = 0 + banner_repeat = 20 + while True: + time.sleep(1) + if line % banner_repeat == 0: + banner() + statline() + line += 1 + +options = optparse.OptionParser() +options.add_option('-1', '--once', '--batch', + action = 'store_true', + default = False, + dest = 'once', + help = 'run in batch mode for one second', + ) +options.add_option('-l', '--log', + action = 'store_true', + default = False, + dest = 'log', + help = 'run in logging mode (like vmstat)', + ) +options.add_option('-f', '--fields', + action = 'store', + default = None, + dest = 'fields', + help = 'fields to display (regex)', + ) +(options, args) = options.parse_args(sys.argv) + +try: + provider = TracepointProvider() +except: + print "Could not initialize tracepoint" + sys.exit(1) + +stats = Stats(provider, fields = options.fields) + +if options.log: + log(stats) +elif not options.once: + import curses.wrapper + curses.wrapper(tui, stats) +else: + batch(stats)
On Tue, Apr 10, 2012 at 3:58 AM, Jason Wang <jasowang at redhat.com> wrote:> To help in vhost analyzing, the following series adding basic tracepoints to > vhost. Operations of both virtqueues and vhost works were traced in current > implementation, net code were untouched. A top-like satistics displaying script > were introduced to help the troubleshooting. > > TODO: > - net specific tracepoints? > > --- > > Jason Wang (2): > ? ? ?vhost: basic tracepoints > ? ? ?tools: virtio: add a top-like utility for displaying vhost satistics > > > ?drivers/vhost/trace.h ? | ?153 ++++++++++++++++++++ > ?drivers/vhost/vhost.c ? | ? 17 ++ > ?tools/virtio/vhost_stat | ?360 +++++++++++++++++++++++++++++++++++++++++++++++ > ?3 files changed, 528 insertions(+), 2 deletions(-) > ?create mode 100644 drivers/vhost/trace.h > ?create mode 100755 tools/virtio/vhost_statPerhaps this can replace the vhost log feature? I'm not sure if tracepoints support the right data types but it seems like vhost debugging could be done using tracing with less code. Stefan
Reasonably Related Threads
- [PATCH 0/2] adding tracepoints to vhost
- [PATCH RFC V2 0/4] Adding tracepoints to vhost/net
- [PATCH RFC V2 0/4] Adding tracepoints to vhost/net
- [PATCH RFC V2 4/4] tools: virtio: add a top-like utility for displaying vhost satistics
- [PATCH] tracing/events: Add bounce tracing to swiotbl-xen