Richard W.M. Jones
2012-Feb-15  19:31 UTC
[Libguestfs] [PATCH 0/2] Make appliance building thread-safe (RHBZ#790721).
These two patches make appliance building thread-safe. The first adds a test which easily demonstrates the problem. The second fixes the issue. For more information see Ian McLeod's analysis of the bug here: https://bugzilla.redhat.com/show_bug.cgi?id=790528#c5 Rich.
Richard W.M. Jones
2012-Feb-15  19:31 UTC
[Libguestfs] [PATCH 1/2] tests: Test parallel launch from multiple threads.
From: "Richard W.M. Jones" <rjones at redhat.com>
---
 .gitignore                     |    1 +
 tests/regressions/Makefile.am  |   12 +++-
 tests/regressions/rhbz790721.c |  171 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 1 deletions(-)
 create mode 100644 tests/regressions/rhbz790721.c
diff --git a/.gitignore b/.gitignore
index adce922..785e763 100644
--- a/.gitignore
+++ b/.gitignore
@@ -389,6 +389,7 @@ tests/guests/guest-aux/fedora-packages.db
 tests/guests/ubuntu.img
 tests/guests/windows.img
 tests/regressions/rhbz501893
+tests/regressions/rhbz790721
 test-tool/libguestfs-test-tool
 test-tool/libguestfs-test-tool.1
 test-tool/libguestfs-test-tool-helper
diff --git a/tests/regressions/Makefile.am b/tests/regressions/Makefile.am
index 3aed600..a2a3673 100644
--- a/tests/regressions/Makefile.am
+++ b/tests/regressions/Makefile.am
@@ -27,6 +27,7 @@ TESTS = \
 	rhbz602997.sh \
 	rhbz690819.sh \
 	rhbz789960.sh \
+	rhbz790721 \
 	test-noexec-stack.pl
 
 tests_not_run = \
@@ -40,7 +41,8 @@ TESTS_ENVIRONMENT = \
 	$(top_builddir)/run
 
 check_PROGRAMS = \
-	rhbz501893
+	rhbz501893 \
+	rhbz790721
 
 rhbz501893_SOURCES = rhbz501893.c
 rhbz501893_CFLAGS = \
@@ -49,6 +51,14 @@ rhbz501893_CFLAGS = \
 rhbz501893_LDADD = \
 	$(top_builddir)/src/libguestfs.la
 
+rhbz790721_SOURCES = rhbz790721.c
+rhbz790721_CFLAGS = \
+	-pthread \
+	-I$(top_srcdir)/src -I$(top_builddir)/src \
+	$(WARN_CFLAGS) $(WERROR_CFLAGS)
+rhbz790721_LDADD = \
+	$(top_builddir)/src/libguestfs.la
+
 EXTRA_DIST = \
 	$(TESTS) \
 	$(tests_not_run) \
diff --git a/tests/regressions/rhbz790721.c b/tests/regressions/rhbz790721.c
new file mode 100644
index 0000000..b30e3db
--- /dev/null
+++ b/tests/regressions/rhbz790721.c
@@ -0,0 +1,171 @@
+/* libguestfs
+ * Copyright (C) 2012 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
USA.
+ */
+
+/* Regression test for RHBZ#790721.
+ *
+ * This bug involves locking issues when building the appliance in
+ * parallel from multiple threads in the same process.  We use a read
+ * lock on the 'checksum' file, and it turns out this causes two
+ * problems: (1) locks don't have any effect on threads in the same
+ * process, and (2) because the PID is identical in different threads,
+ * the file we are trying to overwrite has the same name.
+ *
+ * To test this we want to create the appliance repeatedly from
+ * multiple threads, but we don't really care about launching the full
+ * qemu (a waste of time and memory for this test).  Therefore replace
+ * qemu with a fake process and just look for the linking error.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <pthread.h>
+
+#include "guestfs.h"
+
+/* Number of worker threads running the test. */
+#define NR_THREADS 20
+
+static pthread_barrier_t barrier;
+static void *start_thread (void *);
+
+int
+main (int argc, char *argv[])
+{
+  pthread_t thread[NR_THREADS];
+  int data[NR_THREADS];
+  int i, r, errors;
+
+  /* Ensure error messages are not translated. */
+  setenv ("LC_ALL", "C", 1);
+
+  pthread_barrier_init (&barrier, NULL, NR_THREADS);
+
+  /* Create the other threads which will set up their own libguestfs
+   * handle then wait at a barrier before launching.
+   */
+  for (i = 0; i < NR_THREADS; ++i) {
+    data[i] = i;
+    r = pthread_create (&thread[i], NULL, start_thread, &data[i]);
+    if (r != 0) {
+      fprintf (stderr, "pthread_create: %s\n", strerror (r));
+      exit (EXIT_FAILURE);
+    }
+  }
+
+  /* Wait for the threads to exit. */
+  errors = 0;
+
+  for (i = 0; i < NR_THREADS; ++i) {
+    int *ret;
+
+    r = pthread_join (thread[i], (void **) &ret);
+    if (r != 0) {
+      fprintf (stderr, "pthread_join: %s\n", strerror (r));
+      exit (EXIT_FAILURE);
+    }
+    if (*ret == -1)
+      errors++;
+  }
+
+  exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+static void *
+start_thread (void *vi)
+{
+  int thread_num = *(int *)vi;
+  guestfs_h *g;
+  int r;
+  guestfs_error_handler_cb old_error_cb;
+  void *old_error_data;
+  const char *error;
+
+  g = guestfs_create ();
+  if (g == NULL) {
+    perror ("guestfs_create");
+    *(int *)vi = -1;
+    pthread_exit (vi);
+  }
+
+  if (guestfs_add_drive_opts (g, "/dev/null",
+                              GUESTFS_ADD_DRIVE_OPTS_FORMAT, "raw",
+                              GUESTFS_ADD_DRIVE_OPTS_READONLY, 1,
+                              -1) == -1) {
+    *(int *)vi = -1;
+    pthread_exit (vi);
+  }
+
+  /* Fake out qemu. */
+  if (guestfs_set_qemu (g, "/bin/true") == -1) {
+    *(int *)vi = -1;
+    pthread_exit (vi);
+  }
+
+  /* Wait for the other threads to finish starting up. */
+  r = pthread_barrier_wait (&barrier);
+  if (r != 0 && r != PTHREAD_BARRIER_SERIAL_THREAD) {
+    fprintf (stderr, "pthread_barrier_wait: %s\n", strerror (r));
+    *(int *)vi = -1;
+    pthread_exit (vi);
+  }
+
+  /* Launch the handle.  Because of the faked out qemu, we expect this
+   * will fail with "child process died unexpectedly".  We are
+   * interested in other failures.
+   */
+  old_error_cb = guestfs_get_error_handler (g, &old_error_data);
+  guestfs_set_error_handler (g, NULL, NULL);
+  r = guestfs_launch (g);
+  error = guestfs_last_error (g);
+
+  if (r == 0) { /* This should NOT happen. */
+    fprintf (stderr, "rhbz790721: strangeness in test: expected launch to
fail, but it didn't!\n");
+    *(int *)vi = -1;
+    pthread_exit (vi);
+  }
+
+  if (error == NULL) { /* This also should NOT happen. */
+    fprintf (stderr, "rhbz790721: strangeness in test: no error
message!\n");
+    *(int *)vi = -1;
+    pthread_exit (vi);
+  }
+
+  /* If this happens, it indicates a bug/race in the appliance
+   * building code which is what this regression test is designed to
+   * spot.
+   */
+  if (strcmp (error, "child process died unexpectedly") != 0) {
+    fprintf (stderr, "rhbz790721: error: %s\n", error);
+    *(int *)vi = -1;
+    pthread_exit (vi);
+  }
+
+  guestfs_set_error_handler (g, old_error_cb, old_error_data);
+
+  /* Close the handle. */
+  guestfs_close (g);
+
+  *(int *)vi = 0;
+  pthread_exit (vi);
+}
-- 
1.7.9
Richard W.M. Jones
2012-Feb-15  19:31 UTC
[Libguestfs] [PATCH 2/2] appliance: Make appliance building thread-safe (RHBZ#790721).
From: "Richard W.M. Jones" <rjones at redhat.com>
Appliance building can be called from multiple processes, but this is
only safe if each process holds a lock on the 'checksum' file.
However threads within a process are not excluded by a file lock, and
so this strategy completely failed for a multithreaded program calling
guestfs_launch in parallel.
Since it makes no sense for threads in a single program to race each
other to try to create the appliance, add a lock around appliance
building.
This serialises building the appliance, but the rest of guestfs_launch
(eg. starting up qemu) can run in parallel.
---
 src/launch.c |   13 ++++++++++++-
 1 files changed, 12 insertions(+), 1 deletions(-)
diff --git a/src/launch.c b/src/launch.c
index 4e2fba9..124e758 100644
--- a/src/launch.c
+++ b/src/launch.c
@@ -477,6 +477,13 @@ guestfs__launch (guestfs_h *g)
   }
 }
 
+/* RHBZ#790721: It makes no sense to have multiple threads racing to
+ * build the appliance from within a single process, and the code
+ * isn't safe for that anyway.  Therefore put a thread lock around
+ * appliance building.
+ */
+gl_lock_define_initialized (static, building_lock);
+
 static int
 launch_appliance (guestfs_h *g)
 {
@@ -501,8 +508,12 @@ launch_appliance (guestfs_h *g)
 
   /* Locate and/or build the appliance. */
   char *kernel = NULL, *initrd = NULL, *appliance = NULL;
-  if (guestfs___build_appliance (g, &kernel, &initrd, &appliance)
== -1)
+  gl_lock_lock (building_lock);
+  if (guestfs___build_appliance (g, &kernel, &initrd, &appliance)
== -1) {
+    gl_lock_unlock (building_lock);
     return -1;
+  }
+  gl_lock_unlock (building_lock);
 
   TRACE0 (launch_build_appliance_end);
 
-- 
1.7.9
Apparently Analagous Threads
- Fix virt-edit so it preserves permissions (RHBZ#788641)
- [PATCH 0/3] Fix guestfish edit command.
- [PATCH 0/4] fish: Allow the glob command to expand device patterns (RHBZ#635971).
- [PATCH libguestfs 0/4] Add a libvirt backend to libguestfs.
- [PATCH] arm: appliance: Add support for device trees (dtb's).