Richard W.M. Jones
2019-Apr-01 19:38 UTC
[Libguestfs] [PATCH nbdkit] Add readahead filter.
A suggested readahead filter. I've only lightly tested this, but it seems to work fine with qemu-img convert. The commit needs proper tests. Rich.
Richard W.M. Jones
2019-Apr-01 19:38 UTC
[Libguestfs] [PATCH nbdkit] Add readahead filter.
--- filters/cache/nbdkit-cache-filter.pod | 1 + filters/readahead/nbdkit-readahead-filter.pod | 39 +++ plugins/curl/nbdkit-curl-plugin.pod | 1 + configure.ac | 2 + filters/readahead/readahead.c | 243 ++++++++++++++++++ filters/readahead/Makefile.am | 61 +++++ 6 files changed, 347 insertions(+) diff --git a/filters/cache/nbdkit-cache-filter.pod b/filters/cache/nbdkit-cache-filter.pod index f1fde13..431a720 100644 --- a/filters/cache/nbdkit-cache-filter.pod +++ b/filters/cache/nbdkit-cache-filter.pod @@ -135,6 +135,7 @@ environment variable before starting nbdkit. L<nbdkit(1)>, L<nbdkit-file-plugin(1)>, +L<nbdkit-readahead-filter(1)>, L<nbdkit-filter(3)>, L<qemu-img(1)>. diff --git a/filters/readahead/nbdkit-readahead-filter.pod b/filters/readahead/nbdkit-readahead-filter.pod new file mode 100644 index 0000000..75c133b --- /dev/null +++ b/filters/readahead/nbdkit-readahead-filter.pod @@ -0,0 +1,39 @@ +=head1 NAME + +nbdkit-readahead-filter - prefetch data when reading sequentially + +=head1 SYNOPSIS + + nbdkit --filter=readahead plugin + +=head1 DESCRIPTION + +C<nbdkit-readahead-filter> is a filter that prefetches data when the +client is reading sequentially. + +A common use for this filter is to accelerate sequential copy +operations (like L<qemu-img(1)> convert) when the plugin is slow (like +L<nbdkit-curl-plugin(1)>). For example: + + nbdkit -U - --filter=readahead curl https://example.com/disk.img \ + --run 'qemu-img convert $nbd disk.img' + +=head1 PARAMETERS + +There are no parameters specific to nbdkit-readahead-filter. Any +parameters are passed through to and processed by the underlying +plugin in the normal way. + +=head1 SEE ALSO + +L<nbdkit(1)>, +L<nbdkit-curl-plugin(1)>, +L<nbdkit-filter(3)>. + +=head1 AUTHORS + +Richard W.M. Jones + +=head1 COPYRIGHT + +Copyright (C) 2019 Red Hat Inc. diff --git a/plugins/curl/nbdkit-curl-plugin.pod b/plugins/curl/nbdkit-curl-plugin.pod index 1dadf3c..e941417 100644 --- a/plugins/curl/nbdkit-curl-plugin.pod +++ b/plugins/curl/nbdkit-curl-plugin.pod @@ -157,6 +157,7 @@ L<libcurl(3)>, L<CURLOPT_COOKIE(3)> L<CURLOPT_VERBOSE(3)>, L<nbdkit(1)>, +L<nbdkit-readahead-filter(1)>, L<nbdkit-ssh-plugin(1)>, L<nbdkit-plugin(3)>. diff --git a/configure.ac b/configure.ac index 3fed2e7..42933f9 100644 --- a/configure.ac +++ b/configure.ac @@ -835,6 +835,7 @@ filters="\ offset \ partition \ rate \ + readahead \ truncate \ xz \ " @@ -908,6 +909,7 @@ AC_CONFIG_FILES([Makefile filters/offset/Makefile filters/partition/Makefile filters/rate/Makefile + filters/readahead/Makefile filters/truncate/Makefile filters/xz/Makefile fuzzing/Makefile diff --git a/filters/readahead/readahead.c b/filters/readahead/readahead.c new file mode 100644 index 0000000..c56412f --- /dev/null +++ b/filters/readahead/readahead.c @@ -0,0 +1,243 @@ +/* nbdkit + * Copyright (C) 2019 Red Hat Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> + +#include <pthread.h> + +#include <nbdkit-filter.h> + +#include "minmax.h" + +#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL + +/* Copied from server/plugins.c. */ +#define MAX_REQUEST_SIZE (64 * 1024 * 1024) + +#define READAHEAD_MIN 65536 +#define READAHEAD_MAX MAX_REQUEST_SIZE + +/* This lock protects the global state. */ +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + +/* The real size of the underlying plugin. */ +static uint64_t size; + +/* Size of the readahead window. */ +static uint64_t window = READAHEAD_MIN; + +/* The prefetch buffer and its location in the virtual disk. */ +static char buffer[MAX_REQUEST_SIZE]; +static uint64_t position; +static uint32_t length = 0; + +static int64_t readahead_get_size (struct nbdkit_next_ops *next_ops, + void *nxdata, void *handle); + +/* In prepare, force a call to get_size which sets the size global. */ +static int +readahead_prepare (struct nbdkit_next_ops *next_ops, void *nxdata, + void *handle) +{ + int64_t r; + + r = readahead_get_size (next_ops, nxdata, handle); + return r >= 0 ? 0 : -1; +} + +/* Get the size. */ +static int64_t +readahead_get_size (struct nbdkit_next_ops *next_ops, void *nxdata, + void *handle) +{ + int64_t r; + + r = next_ops->get_size (nxdata); + if (r == -1) + return -1; + + pthread_mutex_lock (&lock); + size = r; + pthread_mutex_unlock (&lock); + + return r; +} + +/* Read data. */ +static int +readahead_pread (struct nbdkit_next_ops *next_ops, void *nxdata, + void *handle, void *buf, uint32_t count, uint64_t offset, + uint32_t flags, int *err) +{ + /* The buffer must always be larger than the largest possible read. + * This is assured as long as READAHEAD_MAX == MAX_REQUEST_SIZE and + * MAX_REQUEST_SIZE is the same as defined in server/plugins.c + */ + assert (count <= sizeof (buffer)); + + pthread_mutex_lock (&lock); + + again: + if (length == 0) { + /* We don't have a prefetch buffer at all. This could be the + * first request or reset after a miss. + */ + window = READAHEAD_MIN; + position = offset; + /* Read at least window bytes, but if count is larger read that. + * Note that the count cannot be bigger than the buffer size. + */ + length = MAX (count, window); + /* Don't go beyond the end of the underlying file. */ + length = MIN (length, size - position); + + if (next_ops->pread (nxdata, buffer, length, offset, flags, err) == -1) { + length = 0; /* failed to fill the prefetch buffer */ + goto err; + } + + memcpy (buf, buffer, count); + count = 0; + } + + while (count > 0) { + uint32_t n; + + /* Can we satisfy this request partly or entirely from the prefetch + * buffer? + */ + if (position <= offset && offset < position + length) { + n = MIN (position - offset + length, count); + memcpy (buf, &buffer[offset-position], n); + buf += n; + offset += n; + count -= n; + } + + /* Does the request start immediately after the prefetch buffer? */ + else if (offset == position + length) { + window = MIN (window * 2, READAHEAD_MAX); + position = offset; + /* Read at least window bytes, but if count is larger read that. + * Note that the count cannot be bigger than the buffer size. + */ + length = MAX (count, window); + /* Don't go beyond the end of the underlying file. */ + length = MIN (length, size - position); + + if (next_ops->pread (nxdata, buffer, length, offset, flags, err) == -1) { + length = 0; /* failed to fill the prefetch buffer */ + goto err; + } + + memcpy (buf, buffer, count); + count = 0; + } + + /* Else it's a miss. Reset everything and start again. */ + else { + length = 0; + goto again; + } + } + + pthread_mutex_unlock (&lock); + return 0; + + err: + pthread_mutex_unlock (&lock); + return -1; +} + +/* Any writes or write-like operation kill the prefetch buffer. + * + * We could do better here, but for the current use case of this + * filter it doesn't matter. XXX + */ +static void +kill_readahead (void) +{ + pthread_mutex_lock (&lock); + window = READAHEAD_MIN; + length = 0; + pthread_mutex_unlock (&lock); +} + +static int +readahead_pwrite (struct nbdkit_next_ops *next_ops, void *nxdata, + void *handle, + const void *buf, uint32_t count, uint64_t offset, + uint32_t flags, int *err) +{ + kill_readahead (); + return next_ops->pwrite (nxdata, buf, count, offset, flags, err); +} + +static int +readahead_trim (struct nbdkit_next_ops *next_ops, void *nxdata, + void *handle, + uint32_t count, uint64_t offset, uint32_t flags, + int *err) +{ + kill_readahead (); + return next_ops->trim (nxdata, count, offset, flags, err); +} + +static int +readahead_zero (struct nbdkit_next_ops *next_ops, void *nxdata, + void *handle, + uint32_t count, uint64_t offset, uint32_t flags, + int *err) +{ + kill_readahead (); + return next_ops->zero (nxdata, count, offset, flags, err); +} + +static struct nbdkit_filter filter = { + .name = "readahead", + .longname = "nbdkit readahead filter", + .version = PACKAGE_VERSION, + .prepare = readahead_prepare, + .get_size = readahead_get_size, + .pread = readahead_pread, + .pwrite = readahead_pwrite, + .trim = readahead_trim, + .zero = readahead_zero, +}; + +NBDKIT_REGISTER_FILTER(filter) diff --git a/filters/readahead/Makefile.am b/filters/readahead/Makefile.am new file mode 100644 index 0000000..0e7a4a8 --- /dev/null +++ b/filters/readahead/Makefile.am @@ -0,0 +1,61 @@ +# nbdkit +# Copyright (C) 2019 Red Hat Inc. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Red Hat nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +include $(top_srcdir)/common-rules.mk + +EXTRA_DIST = nbdkit-readahead-filter.pod + +filter_LTLIBRARIES = nbdkit-readahead-filter.la + +nbdkit_readahead_filter_la_SOURCES = \ + readahead.c \ + $(top_srcdir)/include/nbdkit-filter.h + +nbdkit_readahead_filter_la_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/common/include +nbdkit_readahead_filter_la_CFLAGS = \ + $(WARNINGS_CFLAGS) +nbdkit_readahead_filter_la_LDFLAGS = \ + -module -avoid-version -shared \ + -Wl,--version-script=$(top_srcdir)/filters/filters.syms + +if HAVE_POD + +man_MANS = nbdkit-readahead-filter.1 +CLEANFILES += $(man_MANS) + +nbdkit-readahead-filter.1: nbdkit-readahead-filter.pod + $(PODWRAPPER) --section=1 --man $@ \ + --html $(top_builddir)/html/$@.html \ + $< + +endif HAVE_POD -- 2.20.1