Richard W.M. Jones
2018-Dec-28  20:55 UTC
[Libguestfs] [PATCH v2 nbdkit] common: Improve pseudo-random number generation.
v2: - Fix seeding. - Add a test that nbdkit-random-plugin is producing something which looks at least somewhat random. Rich.
Richard W.M. Jones
2018-Dec-28  20:55 UTC
[Libguestfs] [PATCH v2 nbdkit] common: Improve pseudo-random number generation.
Currently we use non-cryptographically secure random numbers in two
places, the error filter (to inject errors at random) and the random
plugin.  For this we have used either random_r or a home-brew-ish
Linear Congruential Generator.  Use of random_r is problematic on BSDs
because it doesn't exist there.  Use of the LCG is simply a bad
choice.
Replace both uses with a better quality and faster PRNG from David
Blackman and Sebastiano Vigna called ‘xoshiro256** 1.0’
(http://xoshiro.di.unimi.it/).  This is released into the public
domain (where possible) so it compatible with the licensing of nbdkit.
This also fixes a bug in the random plugin where it could never
generate the byte 255 because I used modulo 255 instead of modulo 256
arithmetic.  Ooops.
This also adds a simple sniff test that the random plugin is producing
something which at least looks like random data.
---
 plugins/random/nbdkit-random-plugin.pod |   5 +-
 common/include/random.h                 | 101 ++++++++++++++++++++++++
 filters/error/error.c                   |  28 +++----
 plugins/random/random.c                 |  46 +++++------
 tests/test-random.c                     |  23 ++++++
 common/include/Makefile.am              |   1 +
 plugins/random/Makefile.am              |   1 +
 7 files changed, 160 insertions(+), 45 deletions(-)
diff --git a/plugins/random/nbdkit-random-plugin.pod
b/plugins/random/nbdkit-random-plugin.pod
index 5dfe801..750daab 100644
--- a/plugins/random/nbdkit-random-plugin.pod
+++ b/plugins/random/nbdkit-random-plugin.pod
@@ -15,9 +15,8 @@ The size of the virtual disk must be specified using the
C<size>
 parameter.  If you specify the C<seed> parameter then you will get the
 same random data over multiple runs with the same seed.
 
-The random data is generated using an I<in>secure method (a Linear
-Congruential Generator).  This plugin is mainly good for testing NBD
-clients.
+The random data is generated using an I<insecure> method.  This plugin
+is mainly good for testing NBD clients.
 
 =head1 PARAMETERS
 
diff --git a/common/include/random.h b/common/include/random.h
new file mode 100644
index 0000000..9d90352
--- /dev/null
+++ b/common/include/random.h
@@ -0,0 +1,101 @@
+/* nbdkit
+ * Copyright (C) 2018 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS
IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_RANDOM_H
+#define NBDKIT_RANDOM_H
+
+#include <stdint.h>
+
+/* Generate pseudo-random numbers, quickly, with explicit state.
+ *
+ * This is based on the xoshiro/xoroshiro generators by David Blackman
+ * and Sebastiano Vigna at http://xoshiro.di.unimi.it/ .  Specifically
+ * this is ‘xoshiro256** 1.0’.
+ *
+ * This does _NOT_ generate cryptographically secure random numbers
+ * (CSPRNG) and so should not be used when cryptography or security is
+ * required - use gcrypt if you need those.
+ */
+
+/* You can seed ‘struct random_state’ by setting the s[] elements
+ * directly - but not you must NOT set it all to zero.  OR if you have
+ * a 64 bit seed, you can use xsrandom below to initialize the state.
+ */
+struct random_state {
+  uint64_t s[4];
+};
+
+static inline uint64_t
+snext (uint64_t *seed)
+{
+  uint64_t z = (*seed += 0x9e3779b97f4a7c15);
+  z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
+  z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
+  return z ^ (z >> 31);
+}
+
+static inline void
+xsrandom (uint64_t seed, struct random_state *state)
+{
+  state->s[0] = snext (&seed);
+  state->s[1] = snext (&seed);
+  state->s[2] = snext (&seed);
+  state->s[3] = snext (&seed);
+}
+
+static inline uint64_t
+rotl (const uint64_t x, int k)
+{
+  return (x << k) | (x >> (64 - k));
+}
+
+/* Returns 64 random bits.  Updates the state. */
+static inline uint64_t
+xrandom (struct random_state *state)
+{
+  const uint64_t result_starstar = rotl (state->s[1] * 5, 7) * 9;
+  const uint64_t t = state->s[1] << 17;
+
+  state->s[2] ^= state->s[0];
+  state->s[3] ^= state->s[1];
+  state->s[1] ^= state->s[2];
+  state->s[0] ^= state->s[3];
+
+  state->s[2] ^= t;
+
+  state->s[3] = rotl (state->s[3], 45);
+
+  return result_starstar;
+}
+
+#endif /* NBDKIT_RANDOM_H */
diff --git a/filters/error/error.c b/filters/error/error.c
index 8509a16..9f33910 100644
--- a/filters/error/error.c
+++ b/filters/error/error.c
@@ -44,6 +44,8 @@
 
 #include <nbdkit-filter.h>
 
+#include "random.h"
+
 #define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
 
 struct error_settings {
@@ -222,17 +224,13 @@ error_config (nbdkit_next_config *next, void *nxdata,
   "                               Apply settings only to
read/write/trim/zero"
 
 struct handle {
-#ifdef __GNU_LIBRARY__
-  struct random_data rd;
-  char rd_state[32];
-#endif
+  struct random_state random_state;
 };
 
 static void *
 error_open (nbdkit_next_open *next, void *nxdata, int readonly)
 {
   struct handle *h;
-  time_t t;
 
   if (next (nxdata, readonly) == -1)
     return NULL;
@@ -242,11 +240,7 @@ error_open (nbdkit_next_open *next, void *nxdata, int
readonly)
     nbdkit_error ("malloc: %m");
     return NULL;
   }
-#ifdef __GNU_LIBRARY__
-  memset (&h->rd, 0, sizeof h->rd);
-  time (&t);
-  initstate_r (t, (char *) &h->rd_state, sizeof h->rd_state,
&h->rd);
-#endif
+  xsrandom (0, &h->random_state);
   return h;
 }
 
@@ -264,7 +258,7 @@ random_error (struct handle *h,
               const struct error_settings *error_settings,
               const char *fn, int *err)
 {
-  int32_t rand;
+  uint64_t rand;
 
   if (error_settings->rate <= 0)       /* 0% = never inject */
     return false;
@@ -278,12 +272,12 @@ random_error (struct handle *h,
   if (error_settings->rate >= 1)       /* 100% = always inject */
     goto inject;
 
-#ifdef __GNU_LIBRARY__
-  random_r (&h->rd, &rand);
-#else
-  rand = random ();
-#endif
-  if (rand >= error_settings->rate * RAND_MAX)
+  /* To avoid the question if (double)1.0 * UINT64_MAX is
+   * representable in a 64 bit integer, and because we don't need all
+   * this precision anyway, let's work in 32 bits.
+   */
+  rand = xrandom (&h->random_state) & UINT32_MAX;
+  if (rand >= error_settings->rate * UINT32_MAX)
     return false;
 
  inject:
diff --git a/plugins/random/random.c b/plugins/random/random.c
index 8adc26e..72caaed 100644
--- a/plugins/random/random.c
+++ b/plugins/random/random.c
@@ -44,32 +44,14 @@
 #define NBDKIT_API_VERSION 2
 #include <nbdkit-plugin.h>
 
+#include "random.h"
+
 /* The size of disk in bytes (initialized by size=<SIZE> parameter). */
 static int64_t size = 0;
 
 /* Seed. */
 static uint32_t seed;
 
-/* We use a Linear Congruential Generator (LCG) to make low quality
- * but easy to compute random numbers.  However we're not quite using
- * it in the ordinary way.  In order to be able to read any byte of
- * data without needing to run the LCG from the start, the random data
- * is computed from the index and seed through two rounds of LCG:
- *
- * index i     LCG(seed) -> +i   -> LCG -> LCG -> mod 256 ->
b[i]
- * index i+1   LCG(seed) -> +i+1 -> LCG -> LCG -> mod 256 ->
b[i+1]
- * etc
- *
- * This LCG is the same one as used in glibc.
- */
-static inline uint32_t
-LCG (uint32_t s)
-{
-  s *= 1103515245;
-  s += 12345;
-  return s;
-}
-
 static void
 random_load (void)
 {
@@ -135,13 +117,27 @@ random_pread (void *handle, void *buf, uint32_t count,
uint64_t offset,
 {
   uint32_t i;
   unsigned char *b = buf;
-  uint32_t s;
+  uint64_t s;
 
   for (i = 0; i < count; ++i) {
-    s = LCG (seed) + offset + i;
-    s = LCG (s);
-    s = LCG (s);
-    s = s % 255;
+    /* We use nbdkit common/include/random.h to make random numbers.
+     *
+     * However we're not quite using it in the ordinary way.  In order
+     * to be able to read any byte of data without needing to run the
+     * PRNG from the start, the random data is computed from the index
+     * and seed through three rounds of PRNG:
+     *
+     * index i     PRNG(seed+i)   -> PRNG -> PRNG -> mod 256 ->
b[i]
+     * index i+1   PRNG(seed+i+1) -> PRNG -> PRNG -> mod 256 ->
b[i+1]
+     * etc
+     */
+    struct random_state state;
+
+    xsrandom (seed + offset + i, &state);
+    xrandom (&state);
+    xrandom (&state);
+    s = xrandom (&state);
+    s &= 255;
     b[i] = s;
   }
   return 0;
diff --git a/tests/test-random.c b/tests/test-random.c
index 168331c..c02fdc2 100644
--- a/tests/test-random.c
+++ b/tests/test-random.c
@@ -48,6 +48,8 @@
 #define SIZE 1024*1024
 static char data[SIZE];
 
+static unsigned histogram[256];
+
 /* After reading the whole disk above, we then read randomly selected
  * subsets of the disk and compare the data (it should be identical).
  */
@@ -98,6 +100,27 @@ main (int argc, char *argv[])
   memcpy (data, rdata, SIZE);
   free (rdata);
 
+  /* Test that the data is sufficiently random using a simple
+   * histogram.  This just tests for gross errors and is not a
+   * complete statistical study.
+   */
+  for (i = 0; i < SIZE; ++i) {
+    unsigned char c = (unsigned char) data[i];
+    histogram[c]++;
+  }
+  for (i = 0; i < 256; ++i) {
+    const unsigned expected = SIZE / 256;
+    if (histogram[i] < 80 * expected / 100) {
+      fprintf (stderr, "test-random: "
+               "random data is not uniformly distributed\n"
+               "eg. byte %d occurs %u times (expected about %u
times)\n",
+               i, histogram[i], expected);
+      exit (EXIT_FAILURE);
+    }
+  }
+
+  /* Randomly read parts of the disk to ensure we get the same data.
+   */
   srandom (time (NULL));
   for (i = 0; i < NR_READS; ++i) {
     offset = random ();
diff --git a/common/include/Makefile.am b/common/include/Makefile.am
index 81f4804..f96bab5 100644
--- a/common/include/Makefile.am
+++ b/common/include/Makefile.am
@@ -41,4 +41,5 @@ EXTRA_DIST = \
 	isaligned.h \
 	ispowerof2.h \
 	iszero.h \
+	random.h \
 	rounding.h
diff --git a/plugins/random/Makefile.am b/plugins/random/Makefile.am
index d990158..0a84774 100644
--- a/plugins/random/Makefile.am
+++ b/plugins/random/Makefile.am
@@ -41,6 +41,7 @@ nbdkit_random_plugin_la_SOURCES = \
 	$(top_srcdir)/include/nbdkit-plugin.h
 
 nbdkit_random_plugin_la_CPPFLAGS = \
+	-I$(top_srcdir)/common/include \
 	-I$(top_srcdir)/include
 nbdkit_random_plugin_la_CFLAGS = \
 	$(WARNINGS_CFLAGS)
-- 
2.19.2
Eric Blake
2018-Dec-31  15:35 UTC
Re: [Libguestfs] [PATCH v2 nbdkit] common: Improve pseudo-random number generation.
On 12/28/18 2:55 PM, Richard W.M. Jones wrote:> Currently we use non-cryptographically secure random numbers in two > places, the error filter (to inject errors at random) and the random > plugin. For this we have used either random_r or a home-brew-ish > Linear Congruential Generator. Use of random_r is problematic on BSDs > because it doesn't exist there. Use of the LCG is simply a bad > choice. > > Replace both uses with a better quality and faster PRNG from David > Blackman and Sebastiano Vigna called ‘xoshiro256** 1.0’ > (http://xoshiro.di.unimi.it/). This is released into the public > domain (where possible) so it compatible with the licensing of nbdkit. > > This also fixes a bug in the random plugin where it could never > generate the byte 255 because I used modulo 255 instead of modulo 256 > arithmetic. Ooops. > > This also adds a simple sniff test that the random plugin is producing > something which at least looks like random data. > ---> +#include <stdint.h> > + > +/* Generate pseudo-random numbers, quickly, with explicit state. > + * > + * This is based on the xoshiro/xoroshiro generators by David Blackman > + * and Sebastiano Vigna at http://xoshiro.di.unimi.it/ . Specifically > + * this is ‘xoshiro256** 1.0’. > + * > + * This does _NOT_ generate cryptographically secure random numbers > + * (CSPRNG) and so should not be used when cryptography or security is > + * required - use gcrypt if you need those. > + */ > + > +/* You can seed ‘struct random_state’ by setting the s[] elements > + * directly - but not you must NOT set it all to zero. OR if you haves/not you/note you/> + * a 64 bit seed, you can use xsrandom below to initialize the state. > + */ > +struct random_state { > + uint64_t s[4]; > +}; > + > +static inline uint64_t > +snext (uint64_t *seed) > +{ > + uint64_t z = (*seed += 0x9e3779b97f4a7c15); > + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; > + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; > + return z ^ (z >> 31); > +}Okay, that matches http://xoshiro.di.unimi.it/splitmix64.c. And even though there is one particular value of seed which can result in snext() returning 0 (namely 0x61c8864680b583eb), the fact that xsrandom() calls snext() four times means that random_state is guaranteed to have at least three non-zero values, which was a documented prerequisite of the seeding.> +/* Returns 64 random bits. Updates the state. */ > +static inline uint64_t > +xrandom (struct random_state *state) > +{ > + const uint64_t result_starstar = rotl (state->s[1] * 5, 7) * 9; > + const uint64_t t = state->s[1] << 17; > + > + state->s[2] ^= state->s[0]; > + state->s[3] ^= state->s[1]; > + state->s[1] ^= state->s[2]; > + state->s[0] ^= state->s[3]; > + > + state->s[2] ^= t; > + > + state->s[3] = rotl (state->s[3], 45); > + > + return result_starstar;and that matches http://xoshiro.di.unimi.it/xoshiro256starstar.c Looks like a reasonable copy of the PRNG implementation, and it is nice that the reference site includes enough information to justify this choice of a PRNG.> +++ b/plugins/random/random.c> /* Seed. */ > static uint32_t seed; >Interesting that you kept this as a 32-bit number; I don't see it as being a real drawback, though, especially since...> @@ -135,13 +117,27 @@ random_pread (void *handle, void *buf, uint32_t count, uint64_t offset, > { > uint32_t i; > unsigned char *b = buf; > - uint32_t s; > + uint64_t s; > > for (i = 0; i < count; ++i) { > - s = LCG (seed) + offset + i; > - s = LCG (s); > - s = LCG (s); > - s = s % 255;Oops indeed - %255 and &255 are quite different operations :)> + /* We use nbdkit common/include/random.h to make random numbers. > + * > + * However we're not quite using it in the ordinary way. In order > + * to be able to read any byte of data without needing to run the > + * PRNG from the start, the random data is computed from the index > + * and seed through three rounds of PRNG: > + * > + * index i PRNG(seed+i) -> PRNG -> PRNG -> mod 256 -> b[i] > + * index i+1 PRNG(seed+i+1) -> PRNG -> PRNG -> mod 256 -> b[i+1] > + * etc > + */ > + struct random_state state; > + > + xsrandom (seed + offset + i, &state);...you are adding in 64-bit offset for every re-seed. And my above analysis of the seeding function shows that there is no magic seed value which can result in random_state being all zeros. It's also nice that you keep explicit state here and separately in error.c, so that using the filter and the plugin together does not cause contention where the repeated reseeding from the plugin could negatively affect the error rates.> +++ b/tests/test-random.c> + * complete statistical study. > + */ > + for (i = 0; i < SIZE; ++i) { > + unsigned char c = (unsigned char) data[i]; > + histogram[c]++; > + } > + for (i = 0; i < 256; ++i) { > + const unsigned expected = SIZE / 256; > + if (histogram[i] < 80 * expected / 100) { > + fprintf (stderr, "test-random: " > + "random data is not uniformly distributed\n" > + "eg. byte %d occurs %u times (expected about %u times)\n", > + i, histogram[i], expected);Sane enough for a first-order analysis. The typo fix is trivial, then this should be good to go. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Apparently Analagous Threads
- [PATCH v2 nbdkit] common: Improve pseudo-random number generation.
- Re: [PATCH v2 nbdkit] common: Improve pseudo-random number generation.
- [PATCH nbdkit] common: Improve pseudo-random number generation.
- [PATCH nbdkit] include: Annotate function parameters with attribute((nonnull)).
- [PATCH nbdkit v2 0/2] Use of attribute(()).