Richard W.M. Jones
2010-Jul-16 17:28 UTC
[Libguestfs] [PATCH 0/2] Improve the performance of virt-df
Currently virt-df launches the appliance once for each guest. With these two patches the number of times the appliance launches is reduced to once every 26 block devices. In theory we could do better by adding support for disk hot-add (supported by qemu but not via libguestfs at the moment). However this patch is still an improvement on the current situation. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into Xen guests. http://et.redhat.com/~rjones/virt-p2v
Richard W.M. Jones
2010-Jul-16 17:28 UTC
[Libguestfs] [PATCH 1/2] New APIs: lvm-set-filter and lvm-clear-filter.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-p2v converts physical machines to virtual machines. Boot with a live CD or over the network (PXE) and turn machines into Xen guests. http://et.redhat.com/~rjones/virt-p2v -------------- next part -------------->From d2cf9a15a9f22623dbbed33fb66c5077f1275df2 Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Fri, 16 Jul 2010 13:01:21 +0100 Subject: [PATCH 1/2] New APIs: lvm-set-filter and lvm-clear-filter. These APIs allow you to change the device filter, the list of block devices that LVM "sees". Either you can set it to a fixed list of devices / partitions, or you can clear it so that LVM sees everything. --- .gitignore | 1 + daemon/Makefile.am | 1 + daemon/lvm-filter.c | 244 +++++++++++++++++++++++++++++++++++++ po/POTFILES.in | 1 + regressions/Makefile.am | 1 + regressions/test-lvm-filtering.sh | 92 ++++++++++++++ src/MAX_PROC_NR | 2 +- src/generator.ml | 41 ++++++ 8 files changed, 382 insertions(+), 1 deletions(-) create mode 100644 daemon/lvm-filter.c create mode 100755 regressions/test-lvm-filtering.sh diff --git a/.gitignore b/.gitignore index 15b496a..5583737 100644 --- a/.gitignore +++ b/.gitignore @@ -201,6 +201,7 @@ python/guestfs-py.c python/guestfs.pyc regressions/rhbz501893 regressions/test1.img +regressions/test2.img regressions/test.err regressions/test.out ruby/bindtests.rb diff --git a/daemon/Makefile.am b/daemon/Makefile.am index 3fa2b31..cf9f7ca 100644 --- a/daemon/Makefile.am +++ b/daemon/Makefile.am @@ -99,6 +99,7 @@ guestfsd_SOURCES = \ link.c \ ls.c \ lvm.c \ + lvm-filter.c \ mkfs.c \ mknod.c \ modprobe.c \ diff --git a/daemon/lvm-filter.c b/daemon/lvm-filter.c new file mode 100644 index 0000000..e487c6b --- /dev/null +++ b/daemon/lvm-filter.c @@ -0,0 +1,244 @@ +/* libguestfs - the guestfsd daemon + * Copyright (C) 2010 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> + +#include "daemon.h" +#include "c-ctype.h" +#include "actions.h" + +/* Does the current line match the regexp /^\s*filter\s*=/ */ +static int +is_filter_line (const char *line) +{ + while (*line && c_isspace (*line)) + line++; + if (!*line) + return 0; + + if (! STRPREFIX (line, "filter")) + return 0; + line += 6; + + while (*line && c_isspace (*line)) + line++; + if (!*line) + return 0; + + if (*line != '=') + return 0; + + return 1; +} + +/* Rewrite the 'filter = [ ... ]' line in /etc/lvm/lvm.conf. */ +static int +set_filter (const char *filter) +{ + if (verbose) + fprintf (stderr, "LVM: setting device filter to %s\n", filter); + + FILE *ifp = fopen ("/etc/lvm/lvm.conf", "r"); + if (ifp == NULL) { + reply_with_perror ("open: /etc/lvm/lvm.conf"); + return -1; + } + FILE *ofp = fopen ("/etc/lvm/lvm.conf.new", "w"); + if (ofp == NULL) { + reply_with_perror ("open: /etc/lvm/lvm.conf.new"); + fclose (ifp); + return -1; + } + + char *line = NULL; + size_t len = 0; + while (getline (&line, &len, ifp) != -1) { + int r; + if (is_filter_line (line)) { + if (verbose) + fprintf (stderr, "LVM: replacing config line:\n%s", line); + r = fprintf (ofp, " filter = [ %s ]\n", filter); + } else { + r = fprintf (ofp, "%s", line); + } + if (r < 0) { + /* NB. fprintf doesn't set errno on error. */ + reply_with_error ("/etc/lvm/lvm.conf.new: write failed"); + fclose (ifp); + fclose (ofp); + free (line); + unlink ("/etc/lvm/lvm.conf.new"); + return -1; + } + } + + free (line); + + if (fclose (ifp) == EOF) { + reply_with_perror ("/etc/lvm/lvm.conf.new"); + unlink ("/etc/lvm/lvm.conf.new"); + fclose (ofp); + return -1; + } + if (fclose (ofp) == EOF) { + reply_with_perror ("/etc/lvm/lvm.conf.new"); + unlink ("/etc/lvm/lvm.conf.new"); + return -1; + } + + if (rename ("/etc/lvm/lvm.conf.new", "/etc/lvm/lvm.conf") == -1) { + reply_with_perror ("rename: /etc/lvm/lvm.conf"); + unlink ("/etc/lvm/lvm.conf.new"); + return -1; + } + + return 0; +} + +static int +vgchange (const char *vgchange_flag) +{ + char *err; + int r = command (NULL, &err, "lvm", "vgchange", vgchange_flag, NULL); + if (r == -1) { + reply_with_error ("vgscan: %s", err); + free (err); + return -1; + } + + free (err); + return 0; +} + +/* Deactivate all VGs. */ +static int +deactivate (void) +{ + return vgchange ("-an"); +} + +/* Reactivate all VGs. */ +static int +reactivate (void) +{ + return vgchange ("-ay"); +} + +/* Clear the cache and rescan. */ +static int +rescan (void) +{ + unlink ("/etc/lvm/cache/.cache"); + + char *err; + int r = command (NULL, &err, "lvm", "vgscan", NULL); + if (r == -1) { + reply_with_error ("vgscan: %s", err); + free (err); + return -1; + } + + free (err); + return 0; +} + +/* Construct the new, specific filter string. We can assume that + * the 'devices' array does not contain any regexp metachars, + * because it's already been checked by the stub code. + */ +static char * +make_filter_string (char *const *devices) +{ + size_t i; + size_t len = 64; + for (i = 0; devices[i] != NULL; ++i) + len += strlen (devices[i]) + 16; + + char *filter = malloc (len); + if (filter == NULL) { + reply_with_perror ("malloc"); + return NULL; + } + + char *p = filter; + for (i = 0; devices[i] != NULL; ++i) { + /* Because of the way matching works in LVM, each match clause + * should be either: + * "a|^/dev/sda|", for whole block devices, or + * "a|^/dev/sda1$|", for single partitions + * (the assumption being we have <= 26 block devices XXX). + */ + size_t slen = strlen (devices[i]); + char str[slen+16]; + + if (c_isdigit (devices[i][slen-1])) + snprintf (str, slen+16, "\"a|^%s$|\", ", devices[i]); + else + snprintf (str, slen+16, "\"a|^%s|\", ", devices[i]); + + strcpy (p, str); + p += strlen (str); + } + strcpy (p, "\"r|.*|\""); + + return filter; /* Caller must free. */ +} + +int +do_lvm_set_filter (char *const *devices) +{ + char *filter = make_filter_string (devices); + if (filter == NULL) + return -1; + + if (deactivate () == -1) { + free (filter); + return -1; + } + + int r = set_filter (filter); + free (filter); + if (r == -1) + return -1; + + if (rescan () == -1) + return -1; + + return reactivate (); +} + +int +do_lvm_clear_filter (void) +{ + if (deactivate () == -1) + return -1; + + if (set_filter ("\"a/.*/\"") == -1) + return -1; + + if (rescan () == -1) + return -1; + + return reactivate (); +} diff --git a/po/POTFILES.in b/po/POTFILES.in index bb53a76..62df1fd 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -33,6 +33,7 @@ daemon/initrd.c daemon/inotify.c daemon/link.c daemon/ls.c +daemon/lvm-filter.c daemon/lvm.c daemon/mkfs.c daemon/mknod.c diff --git a/regressions/Makefile.am b/regressions/Makefile.am index 385de45..ff00321 100644 --- a/regressions/Makefile.am +++ b/regressions/Makefile.am @@ -34,6 +34,7 @@ TESTS = \ test-cancellation-download-librarycancels.sh \ test-cancellation-upload-daemoncancels.sh \ test-find0.sh \ + test-lvm-filtering.sh \ test-lvm-mapping.pl \ test-noexec-stack.pl \ test-qemudie-killsub.sh \ diff --git a/regressions/test-lvm-filtering.sh b/regressions/test-lvm-filtering.sh new file mode 100755 index 0000000..d7c4e7c --- /dev/null +++ b/regressions/test-lvm-filtering.sh @@ -0,0 +1,92 @@ +#!/bin/bash - +# libguestfs +# Copyright (C) 2010 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +# Test LVM device filtering. + +set -e + +rm -f test1.img test2.img + +actual=$(../fish/guestfish <<'EOF' +sparse test1.img 1G +sparse test2.img 1G + +run + +part-disk /dev/sda mbr +part-disk /dev/sdb mbr + +pvcreate /dev/sda1 +pvcreate /dev/sdb1 + +vgcreate VG1 /dev/sda1 +vgcreate VG2 /dev/sdb1 + +# Should see VG1 and VG2 +vgs + +# Should see just VG1 +lvm-set-filter /dev/sda +vgs +lvm-set-filter /dev/sda1 +vgs + +# Should see just VG2 +lvm-set-filter /dev/sdb +vgs +lvm-set-filter /dev/sdb1 +vgs + +# Should see VG1 and VG2 +lvm-set-filter "/dev/sda /dev/sdb" +vgs +lvm-set-filter "/dev/sda1 /dev/sdb1" +vgs +lvm-set-filter "/dev/sda /dev/sdb1" +vgs +lvm-set-filter "/dev/sda1 /dev/sdb" +vgs +lvm-clear-filter +vgs +EOF +) + +expected="VG1 +VG2 +VG1 +VG1 +VG2 +VG2 +VG1 +VG2 +VG1 +VG2 +VG1 +VG2 +VG1 +VG2 +VG1 +VG2" + +rm -f test1.img test2.img + +if [ "$actual" != "$expected" ]; then + echo "LVM filter test failed. Actual output was:" + echo "$actual" + exit 1 +fi diff --git a/src/MAX_PROC_NR b/src/MAX_PROC_NR index f1aaa90..9183bf0 100644 --- a/src/MAX_PROC_NR +++ b/src/MAX_PROC_NR @@ -1 +1 @@ -254 +256 diff --git a/src/generator.ml b/src/generator.ml index d427558..6ef85fd 100755 --- a/src/generator.ml +++ b/src/generator.ml @@ -4832,6 +4832,47 @@ C<device>. If the filesystem does not have a UUID, this returns the empty string."); + ("lvm_set_filter", (RErr, [DeviceList "devices"]), 255, [Optional "lvm2"], + (* Can't be tested with the current framework because + * the VG is being used by the mounted filesystem, so + * the vgchange -an command we do first will fail. + *) + [], + "set LVM device filter", + "\ +This sets the LVM device filter so that LVM will only be +able to \"see\" the block devices in the list C<devices>, +and will ignore all other attached block devices. + +Where disk image(s) contain duplicate PVs or VGs, this +command is useful to get LVM to ignore the duplicates, otherwise +LVM can get confused. Note also there are two types +of duplication possible: either cloned PVs/VGs which have +identical UUIDs; or VGs that are not cloned but just happen +to have the same name. In normal operation you cannot +create this situation, but you can do it outside LVM, eg. +by cloning disk images or by bit twiddling inside the LVM +metadata. + +This command also clears the LVM cache and performs a volume +group scan. + +You can filter whole block devices or individual partitions. + +You cannot use this if any VG is currently in use (eg. +contains a mounted filesystem), even if you are not +filtering out that VG."); + + ("lvm_clear_filter", (RErr, []), 256, [], + [], (* see note on lvm_set_filter *) + "clear LVM device filter", + "\ +This undoes the effect of C<guestfs_lvm_set_filter>. LVM +will be able to see every block device. + +This command also clears the LVM cache and performs a volume +group scan."); + ] let all_functions = non_daemon_functions @ daemon_functions -- 1.7.1
Richard W.M. Jones
2010-Jul-16 17:29 UTC
[Libguestfs] [PATCH 2/2] df: Minimize the number of times we launch the libguestfs appliance.
-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -------------- next part -------------->From 45a4cd79215752234c4a5a47fb4c9c6741b1594c Mon Sep 17 00:00:00 2001From: Richard Jones <rjones at redhat.com> Date: Fri, 16 Jul 2010 18:11:56 +0100 Subject: [PATCH 2/2] df: Minimize the number of times we launch the libguestfs appliance. This commit greatly improves the performance of the 'virt-df' command by batching as many disks as possible onto a single appliance. In many situations this means the appliance is launched only once, versus one launch per domain as before. However doing it this way is a lot more complex: (1) Because of limits in Linux and virtio-blk, we can only attach 26 disks maximum at a time to the appliance. (2) We have to use LVM filters (lvm-set-filter) to confine LVM to the disks of a single guest. --- tools/virt-df | 217 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 files changed, 177 insertions(+), 40 deletions(-) diff --git a/tools/virt-df b/tools/virt-df index 45b7869..790dd6a 100755 --- a/tools/virt-df +++ b/tools/virt-df @@ -20,12 +20,11 @@ use warnings; use strict; use Sys::Guestfs; -use Sys::Guestfs::Lib qw(open_guest get_partitions); +use Sys::Guestfs::Lib qw(feature_available); use Pod::Usage; use Getopt::Long; -use Data::Dumper; -use XML::Writer; +use File::Basename qw(basename); use POSIX qw(ceil); use Locale::TextDomain 'libguestfs'; @@ -151,9 +150,22 @@ if ($version) { # RHBZ#600977 die __"virt-df: cannot use -h and --csv options together\n" if $human && $csv; -# Open the guest handle. +# Get the list of domains and block devices. +# +# We can't use Sys::Guestfs::Lib::open_guest here because we want to +# create the libguestfs handle/appliance as few times as possible. +# +# If virt-df is called with no parameters, then run the libvirt +# equivalent of "virsh list --all", get the XML for each domain, and +# get the disk devices. +# +# If virt-df is called with parameters, assume it must either be a +# single disk image filename, a list of disk image filenames, or a +# single libvirt guest name. Construct disk devices accordingly. -if (@ARGV == 0) { +my @domains = (); + +if (@ARGV == 0) { # No params, use libvirt. my $conn; if ($uri) { @@ -168,61 +180,186 @@ if (@ARGV == 0) { # https://bugzilla.redhat.com/show_bug.cgi?id=538041 @doms = grep { $_->get_id () != 0 } @doms; - my @domnames = sort (map { $_->get_name () } @doms); + exit 0 unless @doms; + + foreach my $dom (@doms) { + my @disks = get_disks_from_libvirt ($dom); + push @domains, { dom => $dom, + name => $dom->get_name (), + disks => \@disks } + } +} elsif (@ARGV == 1) { # One param, could be disk image or domname. + if (-e $ARGV[0]) { + push @domains, { name => basename ($ARGV[0]), + disks => [ $ARGV[0] ] } + } else { + my $conn; - if (@domnames) { - print_title (); - foreach (@domnames) { - eval { do_df ($_); }; - warn $@ if $@; + if ($uri) { + $conn = Sys::Virt->new (readonly => 1, address => $uri); + } else { + $conn = Sys::Virt->new (readonly => 1); } + + my $dom = $conn->get_domain_by_name ($ARGV[0]) + or die __x("{name} is not the name of a libvirt domain\n", + name => $ARGV[0]); + my @disks = get_disks_from_libvirt ($dom); + push @domains, { dom => $dom, + name => $dom->get_name (), + disks => \@disks } } -} else { - print_title (); - do_df (@ARGV); +} else { # >= 2 params, all disk images. + push @domains, { name => basename ($ARGV[0]), + disks => \@ARGV } } -sub do_df +sub get_disks_from_libvirt { - my $g; + my $dom = shift; + my $xml = $dom->get_xml_description (); - if ($uri) { - $g = open_guest (\@_, address => $uri); - } else { - $g = open_guest (\@_); + my $p = XML::XPath->new (xml => $xml); + my @disks = $p->findnodes ('//devices/disk/source/@dev'); + push (@disks, $p->findnodes ('//devices/disk/source/@file')); + + # Code in Sys::Guestfs::Lib dies here if there are no disks at all. + + return map { $_->getData } @disks; +} + +# Sort the domains by name for display. + at domains = sort { $a->{name} cmp $b->{name} } @domains; + +# Since we got this far, we're somewhat sure we're going to +# get to print the result, so display the title. +print_title (); + +# To minimize the number of times we have to launch the appliance, +# shuffle as many domains together as we can, but not exceeding 26 +# disks per request. (26 = # of letters in the English alphabet, and +# we are only confident that /dev/sd[a-z] will work because of various +# limits). +my $n = 0; +my @request = (); +while (@domains) { + while (@domains) { + my $c = @{$domains[0]->{disks}}; + last if $n + $c > 26; + push @request, shift @domains; + } + multi_df (@request); +} + +sub multi_df +{ + local $_; + my $g = Sys::Guestfs->new (); + + my ($d, $disk); + + foreach $d (@_) { + foreach $disk (@{$d->{disks}}) { + $g->add_drive_ro ($disk); + } } $g->launch (); + my $has_lvm2 = feature_available ($g, "lvm2"); - my @partitions = get_partitions ($g); - - # Think of a printable name for this domain. Just choose the - # first parameter passed to this function, which will work for - # most cases (it'll either be the domain name or the first disk - # image name). - my $domname = $_[0]; - - # Mount each partition in turn, and if mountable, do a statvfs on it. - foreach my $partition (@partitions) { - my %stat; - eval { - $g->mount_ro ($partition, "/"); - %stat = $g->statvfs ("/"); - }; - if (!$@) { - print_stat ($domname, $partition, \%stat); + my @devices = $g->list_devices (); + my @partitions = $g->list_partitions (); + + my $n = 0; + foreach $d (@_) { + my $name = $d->{name}; + my $nr_disks = @{$d->{disks}}; + + # Filter LVM to only the devices applying to the original domain. + my @devs = @devices[$n .. $n+$nr_disks-1]; + $g->lvm_set_filter (\@devs) if $has_lvm2; + + # Find which whole devices (RHBZ#590167), partitions and LVs + # contain mountable filesystems. Stat those which are + # mountable, and ignore the others. + foreach (@devs) { + try_df ($name, $g, $_, canonical_dev ($_, $n)); + } + foreach (filter_partitions (\@devs, @partitions)) { + try_df ($name, $g, $_, canonical_dev ($_, $n)); + } + if ($has_lvm2) { + foreach ($g->lvs ()) { + try_df ($name, $g, $_); + } + } + + $n += $nr_disks; + } +} + +sub filter_partitions +{ + my $devs = shift; + my @devs = @$devs; + my @r; + + foreach my $p (@_) { + foreach my $d (@devs) { + if ($p =~ /^$d\d/) { + push @r, $p; + last; + } } - $g->umount_all (); } + + return @r; +} + +# Calculate the canonical name for a device. +# eg: /dev/vdb1 when offset = 1 +# => canonical name is /dev/sda1 +sub canonical_dev +{ + local $_; + my $dev = shift; + my $offset = shift; + + return $dev unless $dev =~ m{^/dev/.d([a-z])(\d*)$}; + my $disk = $1; + my $partnum = $2; + + $disk = chr (ord ($disk) - $offset); + + return "/dev/sd$disk$partnum" +} + +sub try_df +{ + local $_; + my $domname = shift; + my $g = shift; + my $dev = shift; + my $display = shift || $dev; + + my %stat; + eval { + $g->mount_ro ($dev, "/"); + %stat = $g->statvfs ("/"); + }; + if (!$@) { + print_stat ($domname, $display, \%stat); + } + $g->umount_all (); } sub print_stat { my $domname = shift; - my $partition = shift; + my $dev = shift; my $stat = shift; - my @cols = ($domname, $partition); + my @cols = ($domname, $dev); if (!$inodes) { my $bsize = $stat->{bsize}; # block size -- 1.7.1