Richard W.M. Jones
2016-May-16 18:20 UTC
[Libguestfs] [PATCH] launch: direct: Add DAX root filesystem support.
Allow the appliance / root filesystem to be placed on a virtual NVDIMM and accessed directly by the guest kernel (DAX). This requires corresponding changes in supermin. --- src/guestfs-internal.h | 1 + src/launch-direct.c | 68 ++++++++++++++++++++++++++++++++++++++++---------- src/launch.c | 8 +++++- 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/src/guestfs-internal.h b/src/guestfs-internal.h index d325f50..3655219 100644 --- a/src/guestfs-internal.h +++ b/src/guestfs-internal.h @@ -782,6 +782,7 @@ extern int64_t guestfs_int_timeval_diff (const struct timeval *x, const struct t extern void guestfs_int_launch_send_progress (guestfs_h *g, int perdozen); extern char *guestfs_int_appliance_command_line (guestfs_h *g, const char *appliance_dev, int flags); #define APPLIANCE_COMMAND_LINE_IS_TCG 1 +#define APPLIANCE_COMMAND_LINE_USE_ACPI 2 const char *guestfs_int_get_cpu_model (int kvm); int guestfs_int_create_socketname (guestfs_h *g, const char *filename, char (*sockname)[UNIX_PATH_MAX]); extern void guestfs_int_register_backend (const char *name, const struct backend_ops *); diff --git a/src/launch-direct.c b/src/launch-direct.c index 01b7e07..a005bda 100644 --- a/src/launch-direct.c +++ b/src/launch-direct.c @@ -234,6 +234,7 @@ launch_direct (guestfs_h *g, void *datav, const char *arg) struct hv_param *hp; bool has_kvm; int force_tcg; + bool dax; const char *cpu_model; /* At present you must add drives before starting the appliance. In @@ -371,15 +372,29 @@ launch_direct (guestfs_h *g, void *datav, const char *arg) warning (g, "qemu debugging is enabled, connect gdb to tcp::1234 to begin"); } + /* Can we use DAX? */ +#ifdef __x86_64__ + dax = guestfs_int_qemu_version_ge (data->qemu_data, 2, 6) && + guestfs_int_qemu_supports_device (g, data->qemu_data, "nvdimm"); +#else + dax = false; +#endif + ADD_CMDLINE ("-machine"); ADD_CMDLINE_PRINTF ( #ifdef MACHINE_TYPE MACHINE_TYPE "," + "%s" +#elif __x86_64__ + "pc,%s" +#else + "%s" #endif #ifdef __aarch64__ "gic-version=host," #endif "accel=%s", + dax ? "nvdimm," : "", !force_tcg ? "kvm:tcg" : "tcg"); cpu_model = guestfs_int_get_cpu_model (has_kvm && !force_tcg); @@ -394,7 +409,10 @@ launch_direct (guestfs_h *g, void *datav, const char *arg) } ADD_CMDLINE ("-m"); - ADD_CMDLINE_PRINTF ("%d", g->memsize); + if (dax) + ADD_CMDLINE_PRINTF ("%d,maxmem=32G,slots=32", g->memsize); + else + ADD_CMDLINE_PRINTF ("%d", g->memsize); /* Force exit instead of reboot on panic */ ADD_CMDLINE ("-no-reboot"); @@ -541,21 +559,43 @@ launch_direct (guestfs_h *g, void *datav, const char *arg) /* Add the ext2 appliance drive (after all the drives). */ if (has_appliance_drive) { - ADD_CMDLINE ("-drive"); - ADD_CMDLINE_PRINTF ("file=%s,snapshot=on,id=appliance," - "cache=unsafe,if=none,format=raw", - appliance); + if (dax) { + struct stat statbuf; - if (virtio_scsi) { - ADD_CMDLINE ("-device"); - ADD_CMDLINE ("scsi-hd,drive=appliance"); - } - else { + if (stat (appliance, &statbuf) == -1) { + perrorf (g, "stat: %s", appliance); + goto cleanup0; + } + + ADD_CMDLINE ("-object"); + /* share=off corresponds to mmap MAP_PRIVATE inside qemu, so + * this should not affect the underlying file. IOW parallel + * access should be fine. + */ + ADD_CMDLINE_PRINTF ("memory-backend-file,id=mem1,share=off," + "mem-path=%s,size=%" PRIu64 "b", + appliance, (uint64_t) statbuf.st_size); ADD_CMDLINE ("-device"); - ADD_CMDLINE (VIRTIO_BLK ",drive=appliance"); - } + ADD_CMDLINE ("nvdimm,memdev=mem1,id=nv1"); - appliance_dev = make_appliance_dev (g, virtio_scsi); + appliance_dev = safe_strdup (g, "/dev/pmem0"); + } else { + ADD_CMDLINE ("-drive"); + ADD_CMDLINE_PRINTF ("file=%s,snapshot=on,id=appliance," + "cache=unsafe,if=none,format=raw", + appliance); + + if (virtio_scsi) { + ADD_CMDLINE ("-device"); + ADD_CMDLINE ("scsi-hd,drive=appliance"); + } + else { + ADD_CMDLINE ("-device"); + ADD_CMDLINE (VIRTIO_BLK ",drive=appliance"); + } + + appliance_dev = make_appliance_dev (g, virtio_scsi); + } } /* Create the virtio serial bus. */ @@ -597,6 +637,8 @@ launch_direct (guestfs_h *g, void *datav, const char *arg) flags = 0; if (!has_kvm || force_tcg) flags |= APPLIANCE_COMMAND_LINE_IS_TCG; + if (dax) + flags |= APPLIANCE_COMMAND_LINE_USE_ACPI; ADD_CMDLINE_STRING_NODUP (guestfs_int_appliance_command_line (g, appliance_dev, flags)); diff --git a/src/launch.c b/src/launch.c index 72a8b29..49f0455 100644 --- a/src/launch.c +++ b/src/launch.c @@ -318,6 +318,10 @@ guestfs_impl_config (guestfs_h *g, * If we are launching a qemu TCG guest (ie. KVM is known to be * disabled or unavailable). If you don't know, don't pass this flag. * + * =item C<APPLIANCE_COMMAND_LINE_USE_ACPI> + * + * Use ACPI in the appliance. Normally disabled because it is slow. + * * =back * * Note that this function returns a newly allocated buffer which must @@ -331,6 +335,7 @@ guestfs_int_appliance_command_line (guestfs_h *g, const char *appliance_dev, char *term = getenv ("TERM"); char *ret; bool tcg = flags & APPLIANCE_COMMAND_LINE_IS_TCG; + bool use_acpi = flags & APPLIANCE_COMMAND_LINE_USE_ACPI; char lpj_s[64] = ""; if (appliance_dev) @@ -367,7 +372,7 @@ guestfs_int_appliance_command_line (guestfs_h *g, const char *appliance_dev, " udev.event-timeout=6000" /* for newer udevd */ " no_timer_check" /* fix for RHBZ#502058 */ "%s" /* lpj */ - " acpi=off" /* ACPI is slow - 150-200ms extra on my laptop */ + "%s" /* acpi=off: ACPI is slow, 150-200ms on my laptop */ " printk.time=1" /* display timestamp before kernel messages */ " cgroup_disable=memory" /* saves us about 5 MB of RAM */ " usbcore.nousb" /* disable USB, only saves about 1ms */ @@ -386,6 +391,7 @@ guestfs_int_appliance_command_line (guestfs_h *g, const char *appliance_dev, g->memsize, #endif lpj_s, + !use_acpi ? " acpi=off" : "", root, g->selinux ? "selinux=1 enforcing=0" : "selinux=0", g->verbose ? "guestfs_verbose=1" : "quiet", -- 2.7.4
Seemingly Similar Threads
- [PATCH] launch: rework handling of --enable-valgrind-daemon
- [PATCH] lib: direct: Remove support for virtio-blk as the default.
- [PATCH 1/2] launch: direct: Use a single -machine [type, ]accel=... option.
- [PATCH 1/2] launch: Rationalize how we construct the Linux kernel command line.
- [PATCH v2 1/9] build: Remove ./configure --enable-valgrind-daemon.