Richard W.M. Jones
2023-May-17 10:06 UTC
[Libguestfs] [PATCH nbdkit v2 1/6] Add new public nbdkit_parse_probability function
In nbdkit-error-filter we need to parse parameters as probabilities. This is useful enough to add to nbdkit, since we will use it in another filter in future. --- docs/nbdkit-plugin.pod | 19 +++++++ plugins/python/nbdkit-python-plugin.pod | 6 ++ include/nbdkit-common.h | 2 + server/nbdkit.syms | 1 + server/public.c | 37 +++++++++++++ server/test-public.c | 73 +++++++++++++++++++++++++ plugins/ocaml/NBDKit.mli | 11 ++-- plugins/ocaml/NBDKit.ml | 2 + plugins/ocaml/bindings.c | 16 ++++++ plugins/python/modfunctions.c | 21 +++++++ tests/test-python-plugin.py | 12 ++++ tests/test_ocaml_plugin.ml | 1 + 12 files changed, 196 insertions(+), 5 deletions(-) diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod index 860c5cecb..e8d30a98e 100644 --- a/docs/nbdkit-plugin.pod +++ b/docs/nbdkit-plugin.pod @@ -1433,6 +1433,25 @@ C<str> can be a string containing a case-insensitive form of various common toggle values. The function returns 0 or 1 if the parse was successful. If there was an error, it returns C<-1>. +=head2 Parsing probabilities + +Use the C<nbdkit_parse_probability> utility function to parse +probabilities. Common formats understood include: C<"0.1">, C<"10%"> +or C<"1:10">, which all mean a probability of 1 in 10. + + int nbdkit_parse_probability (const char *what, const char *str, + double *ret); + +The C<what> parameter is printed in error messages to provide context. +The C<str> parameter is the probability string. + +On success the function returns C<0> and sets C<*ret>. B<Note> that +the probability returned may be outside the range S<[ 0.0..1.0 ]>, for +example if C<str == "200%">. If you want to clamp the result you must +check that yourself. + +On error, nbdkit_error is called and C<-1> is returned. + =head2 Reading passwords The C<nbdkit_read_password> utility function can be used to read diff --git a/plugins/python/nbdkit-python-plugin.pod b/plugins/python/nbdkit-python-plugin.pod index e328cc2e0..0e55dcfcc 100644 --- a/plugins/python/nbdkit-python-plugin.pod +++ b/plugins/python/nbdkit-python-plugin.pod @@ -136,6 +136,12 @@ C<import errno>. Parse a string (such as "100M") into a size in bytes. Wraps the C<nbdkit_parse_size()> C function. +=head3 C<nbdkit.parse_probability(what, str)> + +Parse a string (such as "100%") into a probability, returning a +floating point number. Wraps the C<nbdkit_parse_probability()> C +function. + =head3 C<nbdkit.shutdown()> Request asynchronous server shutdown. diff --git a/include/nbdkit-common.h b/include/nbdkit-common.h index e070245b8..b44e77323 100644 --- a/include/nbdkit-common.h +++ b/include/nbdkit-common.h @@ -105,6 +105,8 @@ NBDKIT_EXTERN_DECL (void, nbdkit_vdebug, NBDKIT_EXTERN_DECL (char *, nbdkit_absolute_path, (const char *path)); NBDKIT_EXTERN_DECL (int64_t, nbdkit_parse_size, (const char *str)); +NBDKIT_EXTERN_DECL (int, nbdkit_parse_probability, + (const char *what, const char *str, double *r)); NBDKIT_EXTERN_DECL (int, nbdkit_parse_bool, (const char *str)); NBDKIT_EXTERN_DECL (int, nbdkit_parse_int, (const char *what, const char *str, int *r)); diff --git a/server/nbdkit.syms b/server/nbdkit.syms index 353ea2a98..c6480e43e 100644 --- a/server/nbdkit.syms +++ b/server/nbdkit.syms @@ -67,6 +67,7 @@ nbdkit_parse_int64_t; nbdkit_parse_int8_t; nbdkit_parse_int; + nbdkit_parse_probability; nbdkit_parse_size; nbdkit_parse_uint16_t; nbdkit_parse_uint32_t; diff --git a/server/public.c b/server/public.c index 71ea6779d..fe2b48c7c 100644 --- a/server/public.c +++ b/server/public.c @@ -421,6 +421,43 @@ nbdkit_parse_size (const char *str) return size * scale; } +NBDKIT_DLL_PUBLIC int +nbdkit_parse_probability (const char *what, const char *str, + double *retp) +{ + double d, d2; + char c; + int n; + + if (sscanf (str, "%lg%[:/]%lg%n", &d, &c, &d2, &n) == 3 && + strcmp (&str[n], "") == 0) { /* N:M or N/M */ + if (d == 0 && d2 == 0) /* 0/0 is OK */ + ; + else if (d2 == 0) /* N/0 is bad */ + goto bad_parse; + else + d /= d2; + } + else if (sscanf (str, "%lg%n", &d, &n) == 1) { + if (strcmp (&str[n], "%") == 0) /* percentage */ + d /= 100.0; + else if (strcmp (&str[n], "") == 0) /* probability */ + ; + else + goto bad_parse; + } + else + goto bad_parse; + + if (retp) + *retp = d; + return 0; + + bad_parse: + nbdkit_error ("%s: could not parse '%s'", what, str); + return -1; +} + /* Parse a string as a boolean, or return -1 after reporting the error. */ NBDKIT_DLL_PUBLIC int diff --git a/server/test-public.c b/server/test-public.c index 676411290..0d84abdd2 100644 --- a/server/test-public.c +++ b/server/test-public.c @@ -200,6 +200,78 @@ test_nbdkit_parse_size (void) return pass; } +static bool +test_nbdkit_parse_probability (void) +{ + size_t i; + bool pass = true; + struct pair { + const char *str; + int result; + double expected; + } tests[] = { + /* Bogus strings */ + { "", -1 }, + { "garbage", -1 }, + { "0garbage", -1 }, + { "1X", -1 }, + { "1%%", -1 }, + { "1:", -1 }, + { "1:1:1", -1 }, + { "1:0", -1 }, /* format is valid but divide by zero is not allowed */ + { "1/", -1 }, + { "1/2/3", -1 }, + + /* Numbers. */ + { "0", 0, 0 }, + { "1", 0, 1 }, + { "2", 0, 2 }, /* values outside [0..1] range are allowed */ + { "0.1", 0, 0.1 }, + { "0.5", 0, 0.5 }, + { "0.9", 0, 0.9 }, + { "1.0000", 0, 1 }, + + /* Percentages. */ + { "0%", 0, 0 }, + { "50%", 0, 0.5 }, + { "100%", 0, 1 }, + { "90.25%", 0, 0.9025 }, + + /* N in M */ + { "1:1000", 0, 0.001 }, + { "1/1000", 0, 0.001 }, + { "2:99", 0, 2.0/99 }, + { "2/99", 0, 2.0/99 }, + { "0:1000000", 0, 0 }, + }; + + for (i = 0; i < ARRAY_SIZE (tests); i++) { + int r; + double d; + + error_flagged = false; + r = nbdkit_parse_probability ("test", tests[i].str, &d); + if (r != tests[i].result) { + fprintf (stderr, + "Wrong return value for %s, got %d, expected %d\n", + tests[i].str, r, tests[i].result); + pass = false; + } + if (r == 0 && d != tests[i].expected) { + fprintf (stderr, + "Wrong result for %s, got %g, expected %g\n", + tests[i].str, d, tests[i].expected); + pass = false; + } + if ((r == -1) != error_flagged) { + fprintf (stderr, "Wrong error message handling for %s\n", tests[i].str); + pass = false; + } + } + + return pass; +} + static bool test_nbdkit_parse_ints (void) { @@ -503,6 +575,7 @@ main (int argc, char *argv[]) { bool pass = true; pass &= test_nbdkit_parse_size (); + pass &= test_nbdkit_parse_probability (); pass &= test_nbdkit_parse_ints (); pass &= test_nbdkit_read_password (); /* nbdkit_absolute_path and nbdkit_nanosleep not unit-tested here, but diff --git a/plugins/ocaml/NBDKit.mli b/plugins/ocaml/NBDKit.mli index 81447d07d..bc190f267 100644 --- a/plugins/ocaml/NBDKit.mli +++ b/plugins/ocaml/NBDKit.mli @@ -134,9 +134,9 @@ val register_plugin : into [EINVAL]. *) val set_error : Unix.error -> unit -(** Bindings for [nbdkit_parse_size], [nbdkit_parse_bool] and - [nbdkit_read_password]. See nbdkit-plugin(3) for information - about these functions. +(** Bindings for [nbdkit_parse_size], [nbdkit_parse_probability], + [nbdkit_parse_bool] and [nbdkit_read_password]. See + nbdkit-plugin(3) for information about these functions. On error these functions all raise [Invalid_argument]. The actual error is sent to the nbdkit error log and is not @@ -145,10 +145,11 @@ val set_error : Unix.error -> unit (* Note OCaml has functions already for parsing other integers, so * there is no need to bind them here. We only bind the functions * which have special abilities in nbdkit: [parse_size] can parse - * human sizes, [parse_bool] parses a range of nbdkit-specific - * boolean strings, and [read_password] suppresses echo. + * human sizes, [parse_probability] and [parse_bool] parses a range + * of nbdkit-specific strings, and [read_password] suppresses echo. *) val parse_size : string -> int64 +val parse_probability : string -> string -> float val parse_bool : string -> bool val read_password : string -> string diff --git a/plugins/ocaml/NBDKit.ml b/plugins/ocaml/NBDKit.ml index e1cf28c94..2d1696917 100644 --- a/plugins/ocaml/NBDKit.ml +++ b/plugins/ocaml/NBDKit.ml @@ -160,6 +160,8 @@ let register_plugin ~name (* Bindings to nbdkit server functions. *) external set_error : Unix.error -> unit = "ocaml_nbdkit_set_error" [@@noalloc] external parse_size : string -> int64 = "ocaml_nbdkit_parse_size" +external parse_probability : string -> string -> float + "ocaml_nbdkit_parse_probability" external parse_bool : string -> bool = "ocaml_nbdkit_parse_bool" external read_password : string -> string = "ocaml_nbdkit_read_password" external realpath : string -> string = "ocaml_nbdkit_realpath" diff --git a/plugins/ocaml/bindings.c b/plugins/ocaml/bindings.c index f2c9ca07d..4885feac5 100644 --- a/plugins/ocaml/bindings.c +++ b/plugins/ocaml/bindings.c @@ -74,6 +74,22 @@ ocaml_nbdkit_parse_size (value strv) CAMLreturn (rv); } +NBDKIT_DLL_PUBLIC value +ocaml_nbdkit_parse_probability (value whatv, value strv) +{ + CAMLparam2 (whatv, strv); + CAMLlocal1 (dv); + int r; + double d; + + r = nbdkit_parse_probability (String_val (whatv), String_val (strv), &d); + if (r == -1) + caml_invalid_argument ("nbdkit_parse_probability"); + dv = caml_copy_double (d); + + CAMLreturn (dv); +} + NBDKIT_DLL_PUBLIC value ocaml_nbdkit_parse_bool (value strv) { diff --git a/plugins/python/modfunctions.c b/plugins/python/modfunctions.c index 479707e78..74bfd6f27 100644 --- a/plugins/python/modfunctions.c +++ b/plugins/python/modfunctions.c @@ -122,11 +122,32 @@ parse_size (PyObject *self, PyObject *args) return PyLong_FromSize_t ((size_t)size); } +/* nbdkit.parse_probability */ +static PyObject * +parse_probability (PyObject *self, PyObject *args) +{ + const char *what, *str; + double d; + + if (!PyArg_ParseTuple (args, "ss:parse_probability", &what, &str)) + return NULL; + + if (nbdkit_parse_probability (what, str, &d) == -1) { + PyErr_SetString (PyExc_ValueError, + "Unable to parse string as probability"); + return NULL; + } + + return PyFloat_FromDouble (d); +} + static PyMethodDef NbdkitMethods[] = { { "debug", debug, METH_VARARGS, "Print a debug message" }, { "export_name", export_name, METH_NOARGS, "Return the optional export name negotiated with the client" }, + { "parse_probability", parse_probability, METH_VARARGS, + "Parse probability strings into floating point number" }, { "parse_size", parse_size, METH_VARARGS, "Parse human-readable size strings into bytes" }, { "set_error", set_error, METH_VARARGS, diff --git a/tests/test-python-plugin.py b/tests/test-python-plugin.py index c3232a112..7f3a2c2e4 100644 --- a/tests/test-python-plugin.py +++ b/tests/test-python-plugin.py @@ -55,8 +55,20 @@ class TestAPI(unittest.TestCase): with self.assertRaises(ValueError): nbdkit.parse_size('foo') + def test_parse_probability(self): + self.assertEqual(nbdkit.parse_probability('test', '1:10'), 0.1) + self.assertEqual(nbdkit.parse_probability('test', '100%'), 1) + self.assertEqual(nbdkit.parse_probability('test', '0'), 0) + + with self.assertRaises(TypeError): + nbdkit.parse_probability('test', 17) + + with self.assertRaises(ValueError): + nbdkit.parse_probability('test', 'bar') + TestAPI().test_parse_size() +TestAPI().test_parse_probability() def config(k, v): diff --git a/tests/test_ocaml_plugin.ml b/tests/test_ocaml_plugin.ml index 8132de8f8..bf998d361 100644 --- a/tests/test_ocaml_plugin.ml +++ b/tests/test_ocaml_plugin.ml @@ -39,6 +39,7 @@ let sparse = Bytes.make nr_sectors '\000' (* sparseness bitmap *) (* Test parse_* functions. *) let () assert (NBDKit.parse_size "1M" = Int64.of_int (1024*1024)); + assert (NBDKit.parse_probability "test parse probability" "1:10" = 0.1); assert (NBDKit.parse_bool "true" = true); assert (NBDKit.parse_bool "0" = false) -- 2.39.2
Eric Blake
2023-May-17 21:05 UTC
[Libguestfs] [PATCH nbdkit v2 1/6] Add new public nbdkit_parse_probability function
On Wed, May 17, 2023 at 11:06:54AM +0100, Richard W.M. Jones wrote:> In nbdkit-error-filter we need to parse parameters as probabilities. > This is useful enough to add to nbdkit, since we will use it in > another filter in future. > --- > docs/nbdkit-plugin.pod | 19 +++++++ > plugins/python/nbdkit-python-plugin.pod | 6 ++ > include/nbdkit-common.h | 2 + > server/nbdkit.syms | 1 + > server/public.c | 37 +++++++++++++ > server/test-public.c | 73 +++++++++++++++++++++++++ > plugins/ocaml/NBDKit.mli | 11 ++-- > plugins/ocaml/NBDKit.ml | 2 + > plugins/ocaml/bindings.c | 16 ++++++ > plugins/python/modfunctions.c | 21 +++++++ > tests/test-python-plugin.py | 12 ++++ > tests/test_ocaml_plugin.ml | 1 + > 12 files changed, 196 insertions(+), 5 deletions(-) > > diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod > index 860c5cecb..e8d30a98e 100644 > --- a/docs/nbdkit-plugin.pod > +++ b/docs/nbdkit-plugin.pod > @@ -1433,6 +1433,25 @@ C<str> can be a string containing a case-insensitive form of various > common toggle values. The function returns 0 or 1 if the parse was > successful. If there was an error, it returns C<-1>. > > +=head2 Parsing probabilities > + > +Use the C<nbdkit_parse_probability> utility function to parse > +probabilities. Common formats understood include: C<"0.1">, C<"10%"> > +or C<"1:10">, which all mean a probability of 1 in 10. > + > + int nbdkit_parse_probability (const char *what, const char *str, > + double *ret); > + > +The C<what> parameter is printed in error messages to provide context. > +The C<str> parameter is the probability string. > + > +On success the function returns C<0> and sets C<*ret>. B<Note> that > +the probability returned may be outside the range S<[ 0.0..1.0 ]>, for > +example if C<str == "200%">. If you want to clamp the result you must > +check that yourself.Should we at least guarantee that the return value is non-negative (other than potentially -0.0 which compares equal to 0.0) and finite? I don't see how a negative or infinite probability will ever be useful (with apologies to Douglas Adams' infinite improbability drive).> +++ b/server/public.c > @@ -421,6 +421,43 @@ nbdkit_parse_size (const char *str) > return size * scale; > } > > +NBDKIT_DLL_PUBLIC int > +nbdkit_parse_probability (const char *what, const char *str, > + double *retp) > +{ > + double d, d2; > + char c; > + int n; > + > + if (sscanf (str, "%lg%[:/]%lg%n", &d, &c, &d2, &n) == 3 && > + strcmp (&str[n], "") == 0) { /* N:M or N/M */ > + if (d == 0 && d2 == 0) /* 0/0 is OK */I'd write this 'if (d == 0.0 && d2 == 0.0)' to make it obvious that we know we are doing floating point comparisons here (semantics are the same either way, though, because of how integer 0 promotes under standard arithmetic promotion).> + ; > + else if (d2 == 0) /* N/0 is bad */ > + goto bad_parse; > + else > + d /= d2; > + } > + else if (sscanf (str, "%lg%n", &d, &n) == 1) { > + if (strcmp (&str[n], "%") == 0) /* percentage */ > + d /= 100.0; > + else if (strcmp (&str[n], "") == 0) /* probability */ > + ; > + else > + goto bad_parse; > + } > + else > + goto bad_parse;Here's where it might be worth adding: if (d < 0.0 || !isfinite (d)) goto bad_parse; to filter out the cases where the user passed in "inf", "nan", or even some form of large/small that results in overflow. You caould also use 'signbit (d)' instead of 'd < 0.0' if you want to prevent -0.0 from causing surprises (while IEEE 754 and therefore the compiler treats '0.0 == -0.0' as true despite being different bit patterns, x/0.0 and x/-0.0 for finite x differ in behavior).> + > + if (retp) > + *retp = d; > + return 0; > + > + bad_parse: > + nbdkit_error ("%s: could not parse '%s'", what, str);Should this say "could not parse '%s' as a probability" to help the user search the documntation for what forms can be parsed?> + return -1;If you get here, *retp was unchanged. [1]> +} > + > /* Parse a string as a boolean, or return -1 after reporting the error. > */ > NBDKIT_DLL_PUBLIC int > diff --git a/server/test-public.c b/server/test-public.c > index 676411290..0d84abdd2 100644 > --- a/server/test-public.c > +++ b/server/test-public.c > @@ -200,6 +200,78 @@ test_nbdkit_parse_size (void) > return pass; > } > > +static bool > +test_nbdkit_parse_probability (void) > +{ > + size_t i; > + bool pass = true; > + struct pair { > + const char *str; > + int result; > + double expected; > + } tests[] = { > + /* Bogus strings */ > + { "", -1 }, > + { "garbage", -1 }, > + { "0garbage", -1 }, > + { "1X", -1 }, > + { "1%%", -1 }, > + { "1:", -1 }, > + { "1:1:1", -1 }, > + { "1:0", -1 }, /* format is valid but divide by zero is not allowed */ > + { "1/", -1 }, > + { "1/2/3", -1 },If we add my proposed check above for filtering out negatives and non-finite values, we could also test that things like "-1", "inf", "nan", "1e200/1e-200" are rejected. Likewise worth adding a test for "-0" (whether you decide to permit or reject it).> + > + /* Numbers. */ > + { "0", 0, 0 }, > + { "1", 0, 1 }, > + { "2", 0, 2 }, /* values outside [0..1] range are allowed */ > + { "0.1", 0, 0.1 }, > + { "0.5", 0, 0.5 }, > + { "0.9", 0, 0.9 }, > + { "1.0000", 0, 1 }, > + > + /* Percentages. */ > + { "0%", 0, 0 }, > + { "50%", 0, 0.5 }, > + { "100%", 0, 1 }, > + { "90.25%", 0, 0.9025 }, > + > + /* N in M */ > + { "1:1000", 0, 0.001 }, > + { "1/1000", 0, 0.001 }, > + { "2:99", 0, 2.0/99 }, > + { "2/99", 0, 2.0/99 }, > + { "0:1000000", 0, 0 },Since you document in patch 5 that we have a default percentage of "1e-8", it is worth testing that as an explicitly supported string.> + }; > + > + for (i = 0; i < ARRAY_SIZE (tests); i++) { > + int r; > + double d;Uninitialized...> + > + error_flagged = false; > + r = nbdkit_parse_probability ("test", tests[i].str, &d);...and per [1] above, there are code paths where d is not assigned...> + if (r != tests[i].result) { > + fprintf (stderr, > + "Wrong return value for %s, got %d, expected %d\n", > + tests[i].str, r, tests[i].result); > + pass = false; > + } > + if (r == 0 && d != tests[i].expected) { > + fprintf (stderr, > + "Wrong result for %s, got %g, expected %g\n", > + tests[i].str, d, tests[i].expected);...so this can end up printing garbage. You may want to consider having nbdkit_parse_probability assign into d on all code paths, not just success.> + pass = false; > + } > + if ((r == -1) != error_flagged) { > + fprintf (stderr, "Wrong error message handling for %s\n", tests[i].str); > + pass = false; > + } > + } > + > + return pass; > +} > + > static bool > test_nbdkit_parse_ints (void) > { > @@ -503,6 +575,7 @@ main (int argc, char *argv[]) > { > bool pass = true; > pass &= test_nbdkit_parse_size (); > + pass &= test_nbdkit_parse_probability (); > pass &= test_nbdkit_parse_ints (); > pass &= test_nbdkit_read_password (); > /* nbdkit_absolute_path and nbdkit_nanosleep not unit-tested here, but> +++ b/plugins/ocaml/NBDKit.ml > @@ -160,6 +160,8 @@ let register_plugin ~name > (* Bindings to nbdkit server functions. *) > external set_error : Unix.error -> unit = "ocaml_nbdkit_set_error" [@@noalloc] > external parse_size : string -> int64 = "ocaml_nbdkit_parse_size" > +external parse_probability : string -> string -> float > + "ocaml_nbdkit_parse_probability"Is OCaml 'float' required to be any specific floating point (such as IEEE 754 binary64), or is it some nebulous hardware-specific floating point (possibly 32-bit instead of 64-bit, or even permitting VAX instead of IEEE)? But how OCaml maps floating point is not a show-stopper to this patch, since we already state OCaml bindings don't have ABI stability like C bindings. (I understand why modern languages have picked 'float' to mean floating-point while still favoring the IEEE 754 binary64 representations, where C is the odd one out that picked 'float' as binary32 and is stuck with the type name 'double' which has no resemblance to floating-point but rather to its size difference.) [Side note: if you really want a trip, read the 2023 SIGBOVIK article on "GradIEEEnt half decent" about 16-bit floating point values being exploited for their non-linear rounding properties as a way to create non-monotonic functions that can in turn form the basis of a Turing complete system capable of running a 36-second solution of Mario level 1-1 in 19k minutes of wall time using only half-precision floating-point operations... https://sigbovik.org/2023/, http://tom7.org/grad/murphy2023grad.pdf]> +++ b/plugins/python/modfunctions.c > @@ -122,11 +122,32 @@ parse_size (PyObject *self, PyObject *args) > return PyLong_FromSize_t ((size_t)size); > } > > +/* nbdkit.parse_probability */ > +static PyObject * > +parse_probability (PyObject *self, PyObject *args) > +{ > + const char *what, *str; > + double d; > + > + if (!PyArg_ParseTuple (args, "ss:parse_probability", &what, &str)) > + return NULL; > + > + if (nbdkit_parse_probability (what, str, &d) == -1) { > + PyErr_SetString (PyExc_ValueError, > + "Unable to parse string as probability"); > + return NULL; > + } > + > + return PyFloat_FromDouble (d);Another language binding that does not directly guarantee that 'Float' is an IEEE 64-bit value, but where we are probably safe: https://stackoverflow.com/questions/70184494/on-what-systems-does-python-not-use-ieee-754-double-precision-floats -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org