Hi folks, This patchset adds vhost-blk support. vhost-blk is a in kernel virito-blk device accelerator. Compared to userspace virtio-blk implementation, vhost-blk gives about 5% to 15% performance improvement. Asias He (5): aio: Export symbols and struct kiocb_batch for in kernel aio usage eventfd: Export symbol eventfd_file_create() vhost: Make vhost a separate module vhost-net: Use VHOST_NET_FEATURES for vhost-net vhost-blk: Add vhost-blk support drivers/vhost/Kconfig | 20 +- drivers/vhost/Makefile | 6 +- drivers/vhost/blk.c | 600 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/vhost/net.c | 4 +- drivers/vhost/test.c | 4 +- drivers/vhost/vhost.c | 48 ++++ drivers/vhost/vhost.h | 18 +- fs/aio.c | 37 ++- fs/eventfd.c | 1 + include/linux/aio.h | 21 ++ include/linux/vhost.h | 1 + 11 files changed, 729 insertions(+), 31 deletions(-) create mode 100644 drivers/vhost/blk.c -- 1.7.10.4
Asias He
2012-Jul-12 15:35 UTC
[PATCH 1/5] aio: Export symbols and struct kiocb_batch for in kernel aio usage
This is useful for people who want to use aio in kernel, e.g. vhost-blk. Signed-off-by: Asias He <asias at redhat.com> --- fs/aio.c | 37 ++++++++++++++++++------------------- include/linux/aio.h | 21 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 55c4c76..93dfbdd 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -224,22 +224,24 @@ static void __put_ioctx(struct kioctx *ctx) call_rcu(&ctx->rcu_head, ctx_rcu_free); } -static inline int try_get_ioctx(struct kioctx *kioctx) +inline int try_get_ioctx(struct kioctx *kioctx) { return atomic_inc_not_zero(&kioctx->users); } +EXPORT_SYMBOL(try_get_ioctx); -static inline void put_ioctx(struct kioctx *kioctx) +inline void put_ioctx(struct kioctx *kioctx) { BUG_ON(atomic_read(&kioctx->users) <= 0); if (unlikely(atomic_dec_and_test(&kioctx->users))) __put_ioctx(kioctx); } +EXPORT_SYMBOL(put_ioctx); /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ -static struct kioctx *ioctx_alloc(unsigned nr_events) +struct kioctx *ioctx_alloc(unsigned nr_events) { struct mm_struct *mm; struct kioctx *ctx; @@ -303,6 +305,7 @@ out_freectx: dprintk("aio: error allocating ioctx %d\n", err); return ERR_PTR(err); } +EXPORT_SYMBOL(ioctx_alloc); /* kill_ctx * Cancels all outstanding aio requests on an aio context. Used @@ -436,23 +439,14 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx) return req; } -/* - * struct kiocb's are allocated in batches to reduce the number of - * times the ctx lock is acquired and released. - */ -#define KIOCB_BATCH_SIZE 32L -struct kiocb_batch { - struct list_head head; - long count; /* number of requests left to allocate */ -}; - -static void kiocb_batch_init(struct kiocb_batch *batch, long total) +void kiocb_batch_init(struct kiocb_batch *batch, long total) { INIT_LIST_HEAD(&batch->head); batch->count = total; } +EXPORT_SYMBOL(kiocb_batch_init); -static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) +void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) { struct kiocb *req, *n; @@ -470,6 +464,7 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) wake_up_all(&ctx->wait); spin_unlock_irq(&ctx->ctx_lock); } +EXPORT_SYMBOL(kiocb_batch_free); /* * Allocate a batch of kiocbs. This avoids taking and dropping the @@ -540,7 +535,7 @@ out: return allocated; } -static inline struct kiocb *aio_get_req(struct kioctx *ctx, +inline struct kiocb *aio_get_req(struct kioctx *ctx, struct kiocb_batch *batch) { struct kiocb *req; @@ -552,6 +547,7 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx, list_del(&req->ki_batch); return req; } +EXPORT_SYMBOL(aio_get_req); static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) { @@ -721,7 +717,7 @@ static inline int __queue_kicked_iocb(struct kiocb *iocb) * simplifies the coding of individual aio operations as * it avoids various potential races. */ -static ssize_t aio_run_iocb(struct kiocb *iocb) +ssize_t aio_run_iocb(struct kiocb *iocb) { struct kioctx *ctx = iocb->ki_ctx; ssize_t (*retry)(struct kiocb *); @@ -815,6 +811,7 @@ out: } return ret; } +EXPORT_SYMBOL(aio_run_iocb); /* * __aio_run_iocbs: @@ -1136,7 +1133,7 @@ static inline void clear_timeout(struct aio_timeout *to) del_singleshot_timer_sync(&to->timer); } -static int read_events(struct kioctx *ctx, +int read_events(struct kioctx *ctx, long min_nr, long nr, struct io_event __user *event, struct timespec __user *timeout) @@ -1252,6 +1249,7 @@ out: destroy_timer_on_stack(&to.timer); return i ? i : ret; } +EXPORT_SYMBOL(read_events); /* Take an ioctx and remove it from the list of ioctx's. Protects * against races with itself via ->dead. @@ -1492,7 +1490,7 @@ static ssize_t aio_setup_single_vector(int type, struct file * file, struct kioc * Performs the initial checks and aio retry method * setup for the kiocb at the time of io submission. */ -static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) +ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) { struct file *file = kiocb->ki_filp; ssize_t ret = 0; @@ -1570,6 +1568,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) return 0; } +EXPORT_SYMBOL(aio_setup_iocb); static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb, struct kiocb_batch *batch, diff --git a/include/linux/aio.h b/include/linux/aio.h index b1a520e..4731da5 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -126,6 +126,16 @@ struct kiocb { struct eventfd_ctx *ki_eventfd; }; +/* + * struct kiocb's are allocated in batches to reduce the number of + * times the ctx lock is acquired and released. + */ +#define KIOCB_BATCH_SIZE 32L +struct kiocb_batch { + struct list_head head; + long count; /* number of requests left to allocate */ +}; + #define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY) #define init_sync_kiocb(x, filp) \ do { \ @@ -216,6 +226,17 @@ struct mm_struct; extern void exit_aio(struct mm_struct *mm); extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); +extern struct kioctx *ioctx_alloc(unsigned nr_events); +extern ssize_t aio_run_iocb(struct kiocb *iocb); +extern int read_events(struct kioctx *ctx, long min_nr, long nr, + struct io_event __user *event, + struct timespec __user *timeout); +extern ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat); +extern void kiocb_batch_init(struct kiocb_batch *batch, long total); +extern void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch); +extern struct kiocb *aio_get_req(struct kioctx *ctx, struct kiocb_batch *batch); +extern int try_get_ioctx(struct kioctx *kioctx); +extern void put_ioctx(struct kioctx *kioctx); #else static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } static inline int aio_put_req(struct kiocb *iocb) { return 0; } -- 1.7.10.4
Asias He <asias at redhat.com> writes:> Hi folks, > > This patchset adds vhost-blk support. vhost-blk is a in kernel virito-blk > device accelerator. Compared to userspace virtio-blk implementation, vhost-blk > gives about 5% to 15% performance improvement. > > Asias He (5): > aio: Export symbols and struct kiocb_batch for in kernel aio usage > eventfd: Export symbol eventfd_file_create() > vhost: Make vhost a separate module > vhost-net: Use VHOST_NET_FEATURES for vhost-net > vhost-blk: Add vhost-blk supportI only saw patches 0 and 1. Where are the other 4? If the answer is, "not on lkml," then please resend them, CC'ing lkml. I'd like to be able to see the usage of the aio routines. Cheers, Jeff
Hello Jeff, On 07/13/2012 12:06 AM, Jeff Moyer wrote:> Asias He <asias at redhat.com> writes: > >> Hi folks, >> >> This patchset adds vhost-blk support. vhost-blk is a in kernel virito-blk >> device accelerator. Compared to userspace virtio-blk implementation, vhost-blk >> gives about 5% to 15% performance improvement. >> >> Asias He (5): >> aio: Export symbols and struct kiocb_batch for in kernel aio usage >> eventfd: Export symbol eventfd_file_create() >> vhost: Make vhost a separate module >> vhost-net: Use VHOST_NET_FEATURES for vhost-net >> vhost-blk: Add vhost-blk support > > I only saw patches 0 and 1. Where are the other 4? If the answer is, > "not on lkml," then please resend them, CC'ing lkml.I did send all the 0-5 patches to lkml, but I somehow messed up the thread. Will CC you next time.> I'd like to be able to see the usage of the aio routines.OK. It'd be nice if you could review. Thanks. -- Asias
Asias He
2012-Jul-13 01:40 UTC
[PATCH 1/5] aio: Export symbols and struct kiocb_batch for in kernel aio usage
Hi James, On 07/13/2012 01:50 AM, James Bottomley wrote:> On Thu, 2012-07-12 at 23:35 +0800, Asias He wrote: >> This is useful for people who want to use aio in kernel, e.g. vhost-blk. >> >> Signed-off-by: Asias He <asias at redhat.com> >> --- >> fs/aio.c | 37 ++++++++++++++++++------------------- >> include/linux/aio.h | 21 +++++++++++++++++++++ >> 2 files changed, 39 insertions(+), 19 deletions(-) > > Um, I think you don't quite understand how aio in the kernel would work; > it's not as simple as just exporting the interfaces. There's already a > (very long) patch set from oracle to do this so loop can use aio: > > http://marc.info/?l=linux-fsdevel&m=133312234313122Oh, I did not see this patch set. Thanks for pointing it out! This bit hasn't merged, right? I'd love to use the aio_kernel_() interface if it is merged. It will simply vhost-blk. Due to lack of better kernel aio interface, we are currently doing io_setup, io_submit, etc. in vhost-blk on our own. -- Asias
On Thu, Jul 12, 2012 at 4:35 PM, Asias He <asias at redhat.com> wrote:> This patchset adds vhost-blk support. vhost-blk is a in kernel virito-blk > device accelerator. Compared to userspace virtio-blk implementation, vhost-blk > gives about 5% to 15% performance improvement.Why is it 5-15% faster? vhost-blk and the userspace virtio-blk you benchmarked should be doing basically the same thing: 1. An eventfd file descriptor is signalled when the vring has new requests available from the guest. 2. A thread wakes up and processes the virtqueue. 3. Linux AIO is used to issue host I/O. 4. An interrupt is injected into the guest. Does the vhost-blk implementation do anything fundamentally different from userspace? Where is the overhead that userspace virtio-blk has? I'm asking because it would be beneficial to fix the overhead (especially it that could speed up all userspace applications) instead of adding a special-purpose kernel module to work around the overhead. Stefan
On Tue, Jul 17, 2012 at 11:32:45AM +0200, Paolo Bonzini wrote:> Il 17/07/2012 11:21, Asias He ha scritto: > >> It depends. Like vhost-scsi, vhost-blk has the problem of a crippled > >> feature set: no support for block device formats, non-raw protocols, > >> etc. This makes it different from vhost-net. > > > > Data-plane qemu also has this cripppled feature set problem, no? > > Yes, but that is just a proof of concept. We can implement a separate > I/O thread within the QEMU block layer, and add fast paths that resemble > data-path QEMU, without limiting the feature set. > > > Does user always choose to use block devices format like qcow2? What > > if they prefer raw image or raw block device? > > If they do, the code should hit fast paths and be fast. But it should > be automatic, without the need for extra knobs. aio=thread vs. > aio=native is already one knob too much IMHO.Well one extra knob at qemu level is harmless IMO since the complexity can be handled by libvirt. For vhost-net libvirt already enables vhost automatically dependeing on backend used and I imagine a similar thing can happen here.> >> So it begs the question, is it going to be used in production, or just a > >> useful reference tool? > > > > This should be decided by user, I can not speak for them. What is wrong > > with adding one option for user which they can decide? > > Having to explain the user about the relative benefits;This can just be done automatically by libvirt.> having to > support the API; having to handle transition from one more thing when > something better comes out. > > PaoloWell this is true for any code. If the limited featureset which vhost-blk can accelerate is something many people use, then accelerating by 5-15% might outweight support costs. -- MST