Richard W.M. Jones
2020-Jan-30 14:05 UTC
[Libguestfs] [PATCH libnbd] python: Add AIO buffer is_zero method.
Fast testing whether the AIO buffer (or regions within it) contain all zeroes, which allows Python code to quickly do sparsification when copying. This includes the iszero.h header from nbdkit which is distributed under a compatible license. --- common/include/Makefile.am | 5 +-- common/include/iszero.h | 63 +++++++++++++++++++++++++++++++++++++ generator/generator | 17 ++++++++-- python/Makefile.am | 3 +- python/handle.c | 47 +++++++++++++++++++++++++++ python/t/580-aio-is-zero.py | 53 +++++++++++++++++++++++++++++++ 6 files changed, 183 insertions(+), 5 deletions(-) diff --git a/common/include/Makefile.am b/common/include/Makefile.am index 89cc61e..79f0351 100644 --- a/common/include/Makefile.am +++ b/common/include/Makefile.am @@ -1,5 +1,5 @@ # nbd client library in userspace -# Copyright (C) 2013-2019 Red Hat Inc. +# Copyright (C) 2013-2020 Red Hat Inc. # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -18,4 +18,5 @@ include $(top_srcdir)/subdir-rules.mk EXTRA_DIST = \ - byte-swapping.h + byte-swapping.h \ + iszero.h diff --git a/common/include/iszero.h b/common/include/iszero.h new file mode 100644 index 0000000..7fdbb52 --- /dev/null +++ b/common/include/iszero.h @@ -0,0 +1,63 @@ +/* nbdkit + * Copyright (C) 2018 Red Hat Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef NBDKIT_ISZERO_H +#define NBDKIT_ISZERO_H + +#include <string.h> +#include <stdbool.h> + +/* Return true iff the buffer is all zero bytes. + * + * The clever approach here was suggested by Eric Blake. See: + * https://www.redhat.com/archives/libguestfs/2017-April/msg00171.html + * https://rusty.ozlabs.org/?p=560 + * + * See also: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69908 + */ +static inline bool __attribute__((__nonnull__ (1))) +is_zero (const char *buffer, size_t size) +{ + size_t i; + const size_t limit = size < 16 ? size : 16; + + for (i = 0; i < limit; ++i) + if (buffer[i]) + return false; + if (size != limit) + return ! memcmp (buffer, buffer + 16, size - 16); + + return true; +} + +#endif /* NBDKIT_ISZERO_H */ diff --git a/generator/generator b/generator/generator index 31fe9dd..20fd872 100755 --- a/generator/generator +++ b/generator/generator @@ -5014,7 +5014,8 @@ raise_exception () "alloc_aio_buffer"; "aio_buffer_from_bytearray"; "aio_buffer_to_bytearray"; - "aio_buffer_size" ] @ List.map fst handle_calls); + "aio_buffer_size"; + "aio_buffer_is_zero" ] @ List.map fst handle_calls); pr "\n"; pr "#endif /* LIBNBD_METHODS_H */\n" @@ -5044,7 +5045,8 @@ let generate_python_libnbdmod_c () "alloc_aio_buffer"; "aio_buffer_from_bytearray"; "aio_buffer_to_bytearray"; - "aio_buffer_size" ] @ List.map fst handle_calls); + "aio_buffer_size"; + "aio_buffer_is_zero" ] @ List.map fst handle_calls); pr " { NULL, NULL, 0, NULL }\n"; pr "};\n"; pr "\n"; @@ -5667,6 +5669,17 @@ class Buffer (object): '''return the size of an AIO buffer''' return libnbdmod.aio_buffer_size (self._o) + def is_zero (self, offset=0, size=-1): + ''' + Returns true if and only if the buffer contains zero bytes. + Note that a freshly allocated buffer is uninitialized, not zero. + + By default this tests the whole buffer, but you can restrict + the test to a sub-range of the buffer using the optional + offset and size parameters. + ''' + return libnbdmod.aio_buffer_is_zero (self._o, offset, size) + class NBD (object): '''NBD handle''' diff --git a/python/Makefile.am b/python/Makefile.am index 48aabea..c621eb7 100644 --- a/python/Makefile.am +++ b/python/Makefile.am @@ -50,7 +50,8 @@ libnbdmod_la_SOURCES = \ utils.c libnbdmod_la_CPPFLAGS = \ $(PYTHON_CFLAGS) \ - -I$(top_srcdir)/include + -I$(top_srcdir)/include \ + -I$(top_srcdir)/common/include libnbdmod_la_CFLAGS = \ $(WARNINGS_CFLAGS) libnbdmod_la_LIBADD = \ diff --git a/python/handle.c b/python/handle.c index ef81ab8..da0c417 100644 --- a/python/handle.c +++ b/python/handle.c @@ -35,6 +35,8 @@ #include <libnbd.h> +#include "iszero.h" + #include "methods.h" static inline PyObject * @@ -213,3 +215,48 @@ nbd_internal_py_aio_buffer_size (PyObject *self, PyObject *args) return PyLong_FromSsize_t (buf->len); } + +PyObject * +nbd_internal_py_aio_buffer_is_zero (PyObject *self, PyObject *args) +{ + PyObject *obj; + struct py_aio_buffer *buf; + Py_ssize_t offset, size; + + if (!PyArg_ParseTuple (args, + (char *) "Onn:nbd_internal_py_aio_buffer_size", + &obj, &offset, &size)) + return NULL; + + if (size == 0) + Py_RETURN_TRUE; + + buf = nbd_internal_py_get_aio_buffer (obj); + if (buf == NULL) + return NULL; + + /* Check the bounds of the offset. */ + if (offset < 0 || offset > buf->len) { + PyErr_SetString (PyExc_IndexError, "offset out of range"); + return NULL; + } + + /* Compute or check the length. */ + if (size == -1) + size = buf->len - offset; + else if (size < 0) { + PyErr_SetString (PyExc_IndexError, + "size cannot be negative, " + "except -1 to mean to the end of the buffer"); + return NULL; + } + else if ((size_t) offset + size > buf->len) { + PyErr_SetString (PyExc_IndexError, "size out of range"); + return NULL; + } + + if (is_zero (buf->data + offset, size)) + Py_RETURN_TRUE; + else + Py_RETURN_FALSE; +} diff --git a/python/t/580-aio-is-zero.py b/python/t/580-aio-is-zero.py new file mode 100644 index 0000000..5bda9ff --- /dev/null +++ b/python/t/580-aio-is-zero.py @@ -0,0 +1,53 @@ +# libnbd Python bindings +# Copyright (C) 2010-2020 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# This tests the nbd.Buffer is_zero method. We can do this entirely +# without using the NBD protocol. + +import os +import nbd + +# Simplest case: A Buffer initialized with zeros should be zero. +ba = bytearray (2**20) +buf = nbd.Buffer.from_bytearray (ba) +assert buf.is_zero () + +# The above buffer is 2**20 (= 1MB), slices of it should also be zero. +for i in range (0, 7): + assert buf.is_zero (i * 2**17, 2**17) + +# A Buffer initialized with non-zeroes should not be zero. +ba = bytearray (b'\xff') * 2**20 +buf = nbd.Buffer.from_bytearray (ba) +assert not buf.is_zero () + +# Slices should not be zero. +for i in range (0, 15): + assert not buf.is_zero (i * 2**16, 2**16) + +# Buffer with a block of non-zeroes and block of zeroes. +ba = bytearray (b'\xff') * 2**20 + bytearray (2**20) +buf = nbd.Buffer.from_bytearray (ba) +assert not buf.is_zero () +assert not buf.is_zero (0, 2**20) +assert buf.is_zero (2**20) +assert not buf.is_zero (2**19) +assert buf.is_zero (2**20, 2**19) +assert not buf.is_zero (2**20-1, 1) +assert buf.is_zero (2**20, 1) +assert not buf.is_zero (0, 1) +assert buf.is_zero (2**21-1, 1) -- 2.24.1
Eric Blake
2020-Jan-30 14:16 UTC
Re: [Libguestfs] [PATCH libnbd] python: Add AIO buffer is_zero method.
On 1/30/20 8:05 AM, Richard W.M. Jones wrote:> Fast testing whether the AIO buffer (or regions within it) contain all > zeroes, which allows Python code to quickly do sparsification when > copying. > > This includes the iszero.h header from nbdkit which is distributed > under a compatible license. > --- > common/include/Makefile.am | 5 +-- > common/include/iszero.h | 63 +++++++++++++++++++++++++++++++++++++ > generator/generator | 17 ++++++++-- > python/Makefile.am | 3 +- > python/handle.c | 47 +++++++++++++++++++++++++++ > python/t/580-aio-is-zero.py | 53 +++++++++++++++++++++++++++++++ > 6 files changed, 183 insertions(+), 5 deletions(-)ACK.> +++ b/common/include/Makefile.am > @@ -1,5 +1,5 @@ > # nbd client library in userspace > -# Copyright (C) 2013-2019 Red Hat Inc. > +# Copyright (C) 2013-2020 Red Hat Inc.Unrelated question: we are just now making our first libnbd commits of 2020. Should we have the 'nbdsh --version' output a copyright date? And if so, should we automate the process to automatically update it to the current year, rather than remembering to touch files manually when making the first commit in a year? Should we manually update the copyright date in ALL files in a single pass, rather than one-off edits as we make per-file changes (if so, the gnulib project has a nice script for automating a tree-wide copyright range update). -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Richard W.M. Jones
2020-Jan-30 14:29 UTC
Re: [Libguestfs] [PATCH libnbd] python: Add AIO buffer is_zero method.
On Thu, Jan 30, 2020 at 08:16:30AM -0600, Eric Blake wrote:> On 1/30/20 8:05 AM, Richard W.M. Jones wrote: > >Fast testing whether the AIO buffer (or regions within it) contain all > >zeroes, which allows Python code to quickly do sparsification when > >copying. > > > >This includes the iszero.h header from nbdkit which is distributed > >under a compatible license. > >--- > > common/include/Makefile.am | 5 +-- > > common/include/iszero.h | 63 +++++++++++++++++++++++++++++++++++++ > > generator/generator | 17 ++++++++-- > > python/Makefile.am | 3 +- > > python/handle.c | 47 +++++++++++++++++++++++++++ > > python/t/580-aio-is-zero.py | 53 +++++++++++++++++++++++++++++++ > > 6 files changed, 183 insertions(+), 5 deletions(-) > > ACK. > > > >+++ b/common/include/Makefile.am > >@@ -1,5 +1,5 @@ > > # nbd client library in userspace > >-# Copyright (C) 2013-2019 Red Hat Inc. > >+# Copyright (C) 2013-2020 Red Hat Inc. > > Unrelated question: we are just now making our first libnbd commits > of 2020. Should we have the 'nbdsh --version' output a copyright > date?I guess the first line of output must remain as it is now, but we could add further lines if we wanted to. Seems like GNU tools follow some kind of standard where they print <name> <version> on the first line, then copyright and license information on subsequent lines.> And if so, should we automate the process to automatically update it > to the current year, rather than remembering to touch files manually > when making the first commit in a year?Need to be careful about not breaking reproducible builds. This was a problem in the past with libguestfs where it would insert a copyright year into certain output files based on the time of compilation. https://github.com/libguestfs/libguestfs/commit/4d39faaa30eace5f52e92e344d3a62dcce52d71c> Should we > manually update the copyright date in ALL files in a single pass, > rather than one-off edits as we make per-file changes (if so, the > gnulib project has a nice script for automating a tree-wide > copyright range update).We could run the following command over all repository: https://github.com/libguestfs/libguestfs/commit/05d4fcb64d98d4ff1d57560c566ca8e66b695277 Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://people.redhat.com/~rjones/virt-df/
Possibly Parallel Threads
- Re: [PATCH libnbd] python: Add AIO buffer is_zero method.
- Re: [PATCH libnbd] python: Add AIO buffer is_zero method.
- [PATCH libnbd] python: Add AIO buffer is_zero method.
- Re: [PATCH libnbd] python: Add AIO buffer is_zero method.
- [libnbd PATCH 0/6] common: catch up with nbdkit