Chris Lalancette
2008-Nov-05 15:57 UTC
[Ovirt-devel] [PATCH server] Implement LVM taskomatic
This patch implements LVM scanning, creation, deletion, and VM use in the taskomatic backend. This required a re-thinking of the way we do things, and because of this the implementation ended up being much simpler and more object oriented. Note that in order for this to have any chance to work, you need updated libvirt (a pre 0.5.0 snapshot), and an updated ruby-libvirt package, both of which I am working on and will presently upload to the ovirt yum repositories. Signed-off-by: Chris Lalancette <clalance at redhat.com> --- src/app/models/iscsi_storage_volume.rb | 4 + src/app/models/lvm_storage_volume.rb | 4 + src/app/models/nfs_storage_volume.rb | 4 + src/task-omatic/task_storage.rb | 301 ++++++++++++++++++++++++++------ src/task-omatic/task_vm.rb | 135 ++++++++++++--- src/task-omatic/taskomatic.rb | 2 + src/task-omatic/utils.rb | 242 ++++++++++++-------------- 7 files changed, 480 insertions(+), 212 deletions(-) diff --git a/src/app/models/iscsi_storage_volume.rb b/src/app/models/iscsi_storage_volume.rb index 00d7db2..48edbd8 100644 --- a/src/app/models/iscsi_storage_volume.rb +++ b/src/app/models/iscsi_storage_volume.rb @@ -22,6 +22,10 @@ class IscsiStorageVolume < StorageVolume "#{storage_pool[:target]}:#{lun}" end + def volume_name + "lun" + end + #FIXME: should also take available free space into account def supports_lvm_subdivision return true diff --git a/src/app/models/lvm_storage_volume.rb b/src/app/models/lvm_storage_volume.rb index 7dde2d1..4aac265 100644 --- a/src/app/models/lvm_storage_volume.rb +++ b/src/app/models/lvm_storage_volume.rb @@ -21,4 +21,8 @@ class LvmStorageVolume < StorageVolume def display_name "#{get_type_label}: #{storage_pool.vg_name}:#{lv_name}" end + + def volume_name + "lv_name" + end end diff --git a/src/app/models/nfs_storage_volume.rb b/src/app/models/nfs_storage_volume.rb index f220930..2c18d67 100644 --- a/src/app/models/nfs_storage_volume.rb +++ b/src/app/models/nfs_storage_volume.rb @@ -21,4 +21,8 @@ class NfsStorageVolume < StorageVolume def label_components "#{storage_pool.export_path}/#{filename}" end + + def volume_name + "filename" + end end diff --git a/src/task-omatic/task_storage.rb b/src/task-omatic/task_storage.rb index 5cddf2b..44ba164 100644 --- a/src/task-omatic/task_storage.rb +++ b/src/task-omatic/task_storage.rb @@ -20,26 +20,65 @@ require 'utils' require 'libvirt' -def refresh_pool(task) - puts "refresh_pool" +def build_libvirt_vol_xml(name, size, owner, group, mode) + vol_xml = Document.new + vol_xml.add_element("volume", {"type" => "logical"}) + vol_xml.root.add_element("name").add_text(name) + vol_xml.root.add_element("capacity", {"unit" => "K"}).add_text(size.to_s) + vol_xml.root.add_element("target") + vol_xml.root.elements["target"].add_element("permissions") + vol_xml.root.elements["target"].elements["permissions"].add_element("owner").add_text(owner) + vol_xml.root.elements["target"].elements["permissions"].add_element("group").add_text(group) + vol_xml.root.elements["target"].elements["permissions"].add_element("mode").add_text(mode) - pool = task.storage_pool + return vol_xml +end - if pool == nil - raise "Could not find storage pool" - end +def add_volumes_to_db(db_pool, libvirt_pool, owner = nil, group = nil, mode = nil) + # FIXME: this is currently broken if you do something like: + # 1. Add an iscsi pool with 3 volumes (lun-1, lun-2, lun-3) + # 2. Scan it in + # 3. Remove lun-3 from the pool + # 4. Re-scan it + # What will happen is that you will still have lun-3 available in the + # database, even though it's not available in the pool anymore. It's a + # little tricky, though; we have to make sure that we don't pull the + # database entry out from underneath a possibly running VM (or do we?) + libvirt_pool.list_volumes.each do |volname| + storage_volume = StorageVolume.factory(db_pool.get_type_label) - if pool[:type] == "IscsiStoragePool" - storage = Iscsi.new(pool.ip_addr, pool[:target]) - elsif pool[:type] == "NfsStoragePool" - storage = NFS.new(pool.ip_addr, pool.export_path) - else - raise "Unknown storage pool type " + pool[:type].to_s + # NOTE: it is safe (and, in fact, necessary) to use + # #{storage_volume.volume_name} here without sanitizing it. This is + # because this is *not* based on user modifiable data, but rather, on an + # internal implementation detail + existing_vol = StorageVolume.find(:first, :conditions => + [ "storage_pool_id = ? AND #{storage_volume.volume_name} = ?", + db_pool.id, volname]) + if existing_vol != nil + # in this case, this path already exists in the database; just skip + next + end + + volptr = libvirt_pool.lookup_vol_by_name(volname) + + volinfo = volptr.info + + storage_volume = StorageVolume.factory(db_pool.get_type_label) + storage_volume.path = volptr.path + storage_volume.size = volinfo.capacity / 1024 + storage_volume.storage_pool_id = db_pool.id + storage_volume.write_attribute(storage_volume.volume_name, volname) + storage_volume.lv_owner_perms = owner + storage_volume.lv_group_perms = group + storage_volume.lv_mode_perms = mode + storage_volume.save end +end +def storage_find_suitable_host(pool_id) # find all of the hosts in the same pool as the storage hosts = Host.find(:all, :conditions => - [ "hardware_pool_id = ?", pool.hardware_pool_id ]) + [ "hardware_pool_id = ?", pool_id ]) conn = nil hosts.each do |host| @@ -59,66 +98,212 @@ def refresh_pool(task) end if conn == nil + # last ditch effort; if we didn't find any hosts, just use ourselves. + # this may or may not work + begin + conn = Libvirt::open("qemu:///system") + rescue + end + end + + if conn == nil raise "Could not find a host to scan storage" end - remote_pool_defined = false - remote_pool_started = false - remote_pool = nil + return conn +end - # here, run through the list of already defined pools on the remote side - # and see if a pool matching the XML already exists. If it does - # we don't try to define it again, we just scan it - pool_defined = false - all_storage_pools(conn).each do |remote_pool_name| - tmppool = conn.lookup_storage_pool_by_name(remote_pool_name) - doc = Document.new(tmppool.xml_desc(0)) +# The words "pool" and "volume" are ridiculously overloaded in our context. +# Therefore, the refresh_pool method adopts this convention: +# phys_db_pool: The underlying physical storage pool, as it is represented in +# the database +# phys_libvirt_pool: The underlying physical storage, as it is represented in +# libvirt +# lvm_db_pool: The logical storage pool (if it exists), as it is represented +# in the database +# lvm_libvirt_pool: The logical storage pool (if it exists), as it is +# represented in the database - if storage.xmlequal?(doc.root) - pool_defined = true - remote_pool = tmppool - break +def refresh_pool(task) + puts "refresh_pool" + + phys_db_pool = task.storage_pool + if phys_db_pool == nil + raise "Could not find storage pool" + end + + conn = storage_find_suitable_host(phys_db_pool.hardware_pool_id) + + begin + phys_libvirt_pool = LibvirtPool.factory(phys_db_pool) + phys_libvirt_pool.connect(conn) + + begin + # OK, the pool is all set. Add in all of the volumes + add_volumes_to_db(phys_db_pool, phys_libvirt_pool) + + # OK, now we've scanned the underlying hardware pool and added the + # volumes. Next we scan for pre-existing LVM volumes + logical_xml = conn.discover_storage_pool_sources("logical") + + Document.new(logical_xml).elements.each('sources/source') do |source| + vgname = source.elements["name"].text + + begin + source.elements.each("device") do |device| + byid_device = phys_libvirt_pool.lookup_vol_by_path(device.attributes["path"]).path + end + rescue + # If matching any of the <device> sections in the LVM XML fails + # against the storage pool, then it is likely that this is a storage + # pool not associated with the one we connected above. Go on + # FIXME: it would be nicer to catch the right exception here, and + # fail on other exceptions + puts "One of the logical volumes in #{vgname} is not part of the pool of type #{phys_db_pool[:type]} that we are scanning; ignore the previous error!" + next + end + + # if we make it here, then we were able to resolve all of the devices, + # so we know we need to use a new pool + lvm_db_pool = LvmStoragePool.find(:first, :conditions => + [ "vg_name = ?", vgname ]) + if lvm_db_pool == nil + lvm_db_pool = LvmStoragePool.new + lvm_db_pool[:type] = "LvmStoragePool" + # set the LVM pool to the same hardware pool as the underlying storage + lvm_db_pool.hardware_pool_id = phys_db_pool.hardware_pool_id + lvm_db_pool.vg_name = vgname + lvm_db_pool.save + end + + source.elements.each("device") do |device| + byid_device = phys_libvirt_pool.lookup_vol_by_path(device.attributes["path"]).path + physical_vol = StorageVolume.find(:first, :conditions => + [ "path = ?", byid_device]) + if physical_vol == nil + # Hm. We didn't find the device in the storage volumes already. + # something went wrong internally, and we have to bail + raise "Storage internal physical volume error" + end + + # OK, put the right lvm_pool_id in place + physical_vol.lvm_pool_id = lvm_db_pool.id + physical_vol.save + end + + lvm_libvirt_pool = LibvirtPool.factory(lvm_db_pool) + lvm_libvirt_pool.connect(conn) + + begin + add_volumes_to_db(lvm_db_pool, lvm_libvirt_pool, "0744", "0744", "0744") + ensure + lvm_libvirt_pool.shutdown + end + end + ensure + phys_libvirt_pool.shutdown end + ensure + conn.close end - if not pool_defined - remote_pool = conn.define_storage_pool_xml(storage.getxml, 0) - remote_pool.build(0) - remote_pool_defined = true +end + +def create_volume(task) + puts "create_volume" + + lvm_db_volume = task.storage_volume + if lvm_db_volume == nil + raise "Could not find storage volume to create" + end + if lvm_db_volume[:type] != "LvmStorageVolume" + raise "The volume to create must be of type LvmStorageVolume, not type #{lvm_db_volume[:type]}" end - remote_pool_info = remote_pool.info - if remote_pool_info.state == Libvirt::StoragePool::INACTIVE - # only try to start the pool if it is currently inactive; in all other - # states, assume it is already running - remote_pool.create(0) - remote_pool_started = true + lvm_db_pool = lvm_db_volume.storage_pool + if lvm_db_pool == nil + raise "Could not find storage pool" + end + if lvm_db_pool[:type] != "LvmStoragePool" + raise "The pool for the volume must be of type LvmStoragePool, not type #{lvm_db_pool[:type]}" end - vols = remote_pool.list_volumes - vols.each do |volname| - volptr = remote_pool.lookup_volume_by_name(volname) - existing_vol = StorageVolume.find(:first, :conditions => - [ "path = ?", volptr.path ]) - if existing_vol != nil - # in this case, this path already exists in the database; just skip - next + conn = storage_find_suitable_host(lvm_db_pool.hardware_pool_id) + + begin + phys_libvirt_pool = get_libvirt_pool_from_volume(lvm_db_volume) + phys_libvirt_pool.connect(conn) + + begin + lvm_libvirt_pool = LibvirtPool.factory(lvm_db_pool) + lvm_libvirt_pool.connect(conn) + + begin + vol_xml = build_libvirt_vol_xml(lvm_db_volume.lv_name, + lvm_db_volume.size, + lvm_db_volume.lv_owner_perms, + lvm_db_volume.lv_group_perms, + lvm_db_volume.lv_mode_perms) + + lvm_libvirt_pool.create_vol_xml(vol_xml.to_s) + ensure + lvm_libvirt_pool.shutdown + end + ensure + phys_libvirt_pool.shutdown end + ensure + conn.close + end +end - volinfo = volptr.info +def delete_volume(task) + puts "delete_volume" - storage_volume = StorageVolume.new - storage_volume.path = volptr.path - storage_volume.size = volinfo.capacity / 1024 - storage_volume.storage_pool_id = pool.id - storage_volume[:type] = StoragePool::STORAGE_TYPES[pool.get_type_label] + "StorageVolume" - storage_volume.write_attribute(storage.db_column, volname) - storage_volume.save + lvm_db_volume = task.storage_volume + if lvm_db_volume == nil + raise "Could not find storage volume to delete" + end + if lvm_db_volume[:type] != "LvmStorageVolume" + raise "The volume to delete must be of type LvmStorageVolume, not type #{lvm_db_volume[:type]}" + end + + lvm_db_pool = lvm_db_volume.storage_pool + if lvm_db_pool == nil + raise "Could not find storage pool" end - if remote_pool_started - remote_pool.destroy + if lvm_db_pool[:type] != "LvmStoragePool" + raise "The pool for the volume must be of type LvmStoragePool, not type #{lvm_db_pool[:type]}" end - if remote_pool_defined - remote_pool.undefine + + conn = storage_find_suitable_host(lvm_db_pool.hardware_pool_id) + + begin + phys_libvirt_pool = get_libvirt_pool_from_volume(lvm_db_volume) + phys_libvirt_pool.connect(conn) + + begin + lvm_libvirt_pool = LibvirtPool.factory(lvm_db_pool) + lvm_libvirt_pool.connect(conn) + + begin + libvirt_volume = lvm_libvirt_pool.lookup_vol_by_name(lvm_db_volume.lv_name) + # FIXME: we actually probably want to zero out the whole volume here, so + # we aren't potentially leaking data from one user to another. There + # are two problems, though: + # 1) I'm not sure how I would go about zero'ing the data on a remote + # machine, since there is no "libvirt_write_data" call + # 2) This could potentially take quite a while, so we want to spawn + # off another thread to do it + libvirt_volume.delete + + LvmStorageVolume.delete(lvm_db_volume.id) + ensure + lvm_libvirt_pool.shutdown + end + ensure + phys_libvirt_pool.shutdown + end + ensure + conn.close end - conn.close end diff --git a/src/task-omatic/task_vm.rb b/src/task-omatic/task_vm.rb index b5f888d..c30c6a9 100644 --- a/src/task-omatic/task_vm.rb +++ b/src/task-omatic/task_vm.rb @@ -24,46 +24,140 @@ require 'utils' gem 'cobbler' require 'cobbler' +def findHostSLA(vm) + host = nil + + vm.vm_resource_pool.get_hardware_pool.hosts.each do |curr| + # FIXME: we probably need to add in some notion of "load" into this check + if curr.num_cpus >= vm.num_vcpus_allocated \ + and curr.memory >= vm.memory_allocated \ + and not curr.is_disabled.nil? and curr.is_disabled == 0 \ + and curr.state == Host::STATE_AVAILABLE \ + and (vm.host_id.nil? or (not vm.host_id.nil? and vm.host_id != curr.id)) + host = curr + break + end + end + + if host == nil + # we couldn't find a host that matches this criteria + raise "No host matching VM parameters could be found" + end + + return host +end + +def findHost(host_id) + host = Host.find(:first, :conditions => [ "id = ?", host_id]) + + if host == nil + # Hm, we didn't find the host_id. Seems odd. Return a failure + raise "Could not find host_id " + host_id.to_s + end + + return host +end + +def connect_storage_pools(conn, storage_volumes) + storagedevs = [] + storage_volumes.each do |volume| + # here, we need to iterate through each volume and possibly attach it + # to the host we are going to be using + db_pool = volume.storage_pool + if db_pool == nil + # Hum. Specified by the VM description, but not in the storage pool? + # continue on and hope for the best + puts "Couldn't find pool for volume #{volume.path}; skipping" + next + end + + # we have to special case LVM pools. In that case, we need to first + # activate the underlying physical device, and then do the logical one + if volume[:type] == "LvmStorageVolume" + phys_libvirt_pool = get_libvirt_pool_from_volume(volume) + phys_libvirt_pool.connect(conn) + end + + libvirt_pool = LibvirtPool.factory(db_pool) + libvirt_pool.connect(conn) + + # OK, the pool should be all set. The last thing we need to do is get + # the path based on the volume name + storagedevs << libvirt_pool.lookup_vol_by_name(volume.read_attribute(volume.volume_name)).path + end + + return storagedevs +end + +def remove_pools(conn, type = nil) + all_storage_pools(conn).each do |remote_pool_name| + pool = conn.lookup_storage_pool_by_name(remote_pool_name) + + if type == nil or type == Document.new(pool.xml_desc).root.attributes['type'] + begin + pool.destroy + rescue + end + + begin + # if the destroy failed, we still try to undefine; it may be a pool + # that was previously destroyed but not undefined for whatever reason + pool.undefine + rescue + # do nothing if any of this failed; the worst that happens is that + # we leave a pool configured + puts "Could not teardown pool " + remote_pool_name + "; skipping" + end + end + end +end + +def teardown_storage_pools(conn) + # FIXME: this needs to get a *lot* smarter. In particular, we want to make + # sure we can tear down unused pools even when there are other guests running + if conn.list_domains.empty? + # OK, there are no running guests on this host anymore. We can teardown + # any storage pools that are there without fear + + # we first have to tear-down LVM pools, because they might depend on the + # underlying physical pools + remove_pools(conn, "logical") + + # now tear down the rest of the pools + remove_pools(conn) + end +end + def create_vm_xml(name, uuid, memAllocated, memUsed, vcpus, bootDevice, macAddr, bridge, diskDevices) doc = Document.new doc.add_element("domain", {"type" => "kvm"}) - doc.root.add_element("name") - doc.root.elements["name"].text = name + doc.root.add_element("name").add_text(name) - doc.root.add_element("uuid") - doc.root.elements["uuid"].text = uuid + doc.root.add_element("uuid").add_text(uuid) - doc.root.add_element("memory") - doc.root.elements["memory"].text = memAllocated + doc.root.add_element("memory").add_text(memAllocated.to_s) - doc.root.add_element("currentMemory") - doc.root.elements["currentMemory"].text = memUsed + doc.root.add_element("currentMemory").add_text(memUsed.to_s) - doc.root.add_element("vcpu") - doc.root.elements["vcpu"].text = vcpus + doc.root.add_element("vcpu").add_text(vcpus.to_s) doc.root.add_element("os") - doc.root.elements["os"].add_element("type") - doc.root.elements["os"].elements["type"].text = "hvm" + doc.root.elements["os"].add_element("type").add_text("hvm") doc.root.elements["os"].add_element("boot", {"dev" => bootDevice}) doc.root.add_element("clock", {"offset" => "utc"}) - doc.root.add_element("on_poweroff") - doc.root.elements["on_poweroff"].text = "destroy" + doc.root.add_element("on_poweroff").add_text("destroy") - doc.root.add_element("on_reboot") - doc.root.elements["on_reboot"].text = "restart" + doc.root.add_element("on_reboot").add_text("restart") - doc.root.add_element("on_crash") - doc.root.elements["on_crash"].text = "destroy" + doc.root.add_element("on_crash").add_text("destroy") doc.root.add_element("devices") - doc.root.elements["devices"].add_element("emulator") - doc.root.elements["devices"].elements["emulator"].text = "/usr/bin/qemu-kvm" + doc.root.elements["devices"].add_element("emulator").add_text("/usr/bin/qemu-kvm") devs = ['hda', 'hdb', 'hdc', 'hdd'] which_device = 0 @@ -115,7 +209,6 @@ def setVmVncPort(vm, domain) vm.save end - def findVM(task, fail_on_nil_host_id = true) # find the matching VM in the vms table vm = task.vm diff --git a/src/task-omatic/taskomatic.rb b/src/task-omatic/taskomatic.rb index 0af68bf..1264207 100755 --- a/src/task-omatic/taskomatic.rb +++ b/src/task-omatic/taskomatic.rb @@ -104,6 +104,8 @@ loop do when VmTask::ACTION_UPDATE_STATE_VM then update_state_vm(task) when VmTask::ACTION_MIGRATE_VM then migrate_vm(task) when StorageTask::ACTION_REFRESH_POOL then refresh_pool(task) + when StorageVolumeTask::ACTION_CREATE_VOLUME then create_volume(task) + when StorageVolumeTask::ACTION_DELETE_VOLUME then delete_volume(task) when HostTask::ACTION_CLEAR_VMS then clear_vms_host(task) else puts "unknown task " + task.action diff --git a/src/task-omatic/utils.rb b/src/task-omatic/utils.rb index 47a4543..26516ae 100644 --- a/src/task-omatic/utils.rb +++ b/src/task-omatic/utils.rb @@ -1,40 +1,6 @@ require 'rexml/document' include REXML -def findHostSLA(vm) - host = nil - - vm.vm_resource_pool.get_hardware_pool.hosts.each do |curr| - # FIXME: we probably need to add in some notion of "load" into this check - if curr.num_cpus >= vm.num_vcpus_allocated \ - and curr.memory >= vm.memory_allocated \ - and not curr.is_disabled.nil? and curr.is_disabled == 0 \ - and curr.state == Host::STATE_AVAILABLE \ - and (vm.host_id.nil? or (not vm.host_id.nil? and vm.host_id != curr.id)) - host = curr - break - end - end - - if host == nil - # we couldn't find a host that matches this criteria - raise "No host matching VM parameters could be found" - end - - return host -end - -def findHost(host_id) - host = Host.find(:first, :conditions => [ "id = ?", host_id]) - - if host == nil - # Hm, we didn't find the host_id. Seems odd. Return a failure - raise "Could not find host_id " + host_id.to_s - end - - return host -end - def String.random_alphanumeric(size=16) s = "" size.times { s << (i = Kernel.rand(62); i += ((i < 10) ? 48 : ((i < 36) ? 55 : 61 ))).chr } @@ -47,156 +13,166 @@ def all_storage_pools(conn) return all_pools end -def teardown_storage_pools(conn) - # FIXME: this needs to get a *lot* smarter. In particular, we want to make - # sure we can tear down unused pools even when there are other guests running - if conn.list_domains.empty? - # OK, there are no running guests on this host anymore. We can teardown - # any storage pools that are there without fear - all_storage_pools(conn).each do |remote_pool_name| - begin - pool = conn.lookup_storage_pool_by_name(remote_pool_name) - pool.destroy - pool.undefine - rescue - # do nothing if any of this failed; the worst that happens is that - # we leave a pool configured - puts "Could not teardown pool " + remote_pool_name + "; skipping" - end - end - end -end - -def connect_storage_pools(conn, storage_volumes) - # here, build up a list of already defined pools. We'll use it - # later to see if we need to define new pools for the storage or just - # keep using existing ones +def get_libvirt_pool_from_volume(db_volume) + phys_volume = StorageVolume.find(:first, :conditions => + [ "lvm_pool_id = ?", db_volume.storage_pool_id]) - defined_pools = [] - all_storage_pools(conn).each do |remote_pool_name| - defined_pools << conn.lookup_storage_pool_by_name(remote_pool_name) - end + return LibvirtPool.factory(phys_volume.storage_pool) +end - storagedevs = [] - storage_volumes.each do |volume| - # here, we need to iterate through each volume and possibly attach it - # to the host we are going to be using - storage_pool = volume.storage_pool - - if storage_pool == nil - # Hum. Specified by the VM description, but not in the storage pool? - # continue on and hope for the best - # FIXME: probably want a print to the logs here - next - end +class LibvirtPool + def initialize(type, name = nil) + @remote_pool = nil + @build_on_start = true + @remote_pool_defined = false + @remote_pool_started = false - if storage_pool[:type] == "IscsiStoragePool" - thisstorage = Iscsi.new(storage_pool.ip_addr, storage_pool[:target]) - elsif storage_pool[:type] == "NfsStoragePool" - thisstorage = NFS.new(storage_pool.ip_addr, storage_pool.export_path) + if name == nil + @name = type + "-" + String.random_alphanumeric else - # Hm, a storage type we don't understand; skip it - puts "Storage type " + storage_pool[:type] + " is not understood; skipping" - next + @name = name end - thepool = nil - defined_pools.each do |pool| - doc = Document.new(pool.xml_desc) - root = doc.root + @xml = Document.new + @xml.add_element("pool", {"type" => type}) + + @xml.root.add_element("name").add_text(@name) + + @xml.root.add_element("source") + + @xml.root.add_element("target") + @xml.root.elements["target"].add_element("path") + end + + def connect(conn) + all_storage_pools(conn).each do |remote_pool_name| + tmppool = conn.lookup_storage_pool_by_name(remote_pool_name) - if thisstorage.xmlequal?(doc.root) - thepool = pool + if self.xmlequal?(Document.new(tmppool.xml_desc).root) + @remote_pool = tmppool break end end - if thepool == nil - thepool = conn.define_storage_pool_xml(thisstorage.getxml) - thepool.build - thepool.create - elsif thepool.info.state == Libvirt::StoragePool::INACTIVE + if @remote_pool == nil + @remote_pool = conn.define_storage_pool_xml(@xml.to_s) + # we need this because we don't want to "build" LVM pools, which would + # destroy existing data + if @build_on_start + @remote_pool.build + end + @remote_pool_defined = true + end + + if @remote_pool.info.state == Libvirt::StoragePool::INACTIVE # only try to start the pool if it is currently inactive; in all other # states, assume it is already running - thepool.create + @remote_pool.create + @remote_pool_started = true end + end - storagedevs << thepool.lookup_volume_by_name(volume.read_attribute(thisstorage.db_column)).path + def list_volumes + return @remote_pool.list_volumes end - return storagedevs -end + def lookup_vol_by_path(dev) + return @remote_pool.lookup_volume_by_path(dev) + end -class StorageType - attr_reader :db_column + def lookup_vol_by_name(name) + return @remote_pool.lookup_volume_by_name(name) + end + + def create_vol_xml(xml) + return @remote_pool.create_vol_xml(xml) + end + + def shutdown + if @remote_pool_started + @remote_pool.destroy + end + if @remote_pool_defined + @remote_pool.undefine + end + end def xmlequal?(docroot) return false end - def getxml - return @xml.to_s + def self.factory(pool) + if pool[:type] == "IscsiStoragePool" + return IscsiLibvirtPool.new(pool.ip_addr, pool[:target]) + elsif pool[:type] == "NfsStoragePool" + return NFSLibvirtPool.new(pool.ip_addr, pool.export_path) + elsif pool[:type] == "LvmStoragePool" + return LVMLibvirtPool.new(pool.vg_name) + else + raise "Unknown storage pool type " + pool[:type].to_s + end end end -class Iscsi < StorageType - def initialize(ipaddr, target) +class IscsiLibvirtPool < LibvirtPool + def initialize(ip_addr, target) + super('iscsi') + @type = 'iscsi' - @ipaddr = ipaddr + @ipaddr = ip_addr @target = target - @db_column = 'lun' - - @xml = Document.new - @xml.add_element("pool", {"type" => @type}) - - @xml.root.add_element("name") - @xml.root.elements["name"].text = String.random_alphanumeric - - @xml.root.add_element("source") @xml.root.elements["source"].add_element("host", {"name" => @ipaddr}) @xml.root.elements["source"].add_element("device", {"path" => @target}) - @xml.root.add_element("target") - @xml.root.elements["target"].add_element("path") @xml.root.elements["target"].elements["path"].text = "/dev/disk/by-id" end def xmlequal?(docroot) return (docroot.attributes['type'] == @type and - docroot.elements['source'].elements['host'].attributes['name'] == @ipaddr and - docroot.elements['source'].elements['device'].attributes['path'] == @target) + docroot.elements['source'].elements['host'].attributes['name'] == @ipaddr and + docroot.elements['source'].elements['device'].attributes['path'] == @target) end end -class NFS < StorageType - def initialize(host, remote_path) +class NFSLibvirtPool < LibvirtPool + def initialize(ip_addr, export_path) + super('netfs') + @type = 'netfs' - @host = host - @remote_path = remote_path + @host = ip_addr + @remote_path = export_path @name = String.random_alphanumeric - @db_column = 'filename' - - @xml = Document.new - @xml.add_element("pool", {"type" => @type}) - - @xml.root.add_element("name") - - @xml.root.elements["name"].text = @name - @xml.root.add_element("source") @xml.root.elements["source"].add_element("host", {"name" => @host}) @xml.root.elements["source"].add_element("dir", {"path" => @remote_path}) @xml.root.elements["source"].add_element("format", {"type" => "nfs"}) - @xml.root.add_element("target") - @xml.root.elements["target"].add_element("path") @xml.root.elements["target"].elements["path"].text = "/mnt/" + @name end def xmlequal?(docroot) return (docroot.attributes['type'] == @type and - docroot.elements['source'].elements['host'].attributes['name'] == @host and - docroot.elements['source'].elements['dir'].attributes['path'] == @remote_path) + docroot.elements['source'].elements['host'].attributes['name'] == @host and + docroot.elements['source'].elements['dir'].attributes['path'] == @remote_path) + end +end + +class LVMLibvirtPool < LibvirtPool + def initialize(vg_name) + super('logical', vg_name) + + @type = 'logical' + @build_on_start = false + + @xml.root.elements["source"].add_element("name").add_text(@name) + + @xml.root.elements["target"].elements["path"].text = "/dev/" + @name + end + + def xmlequal?(docroot) + return (docroot.attributes['type'] == @type and + docroot.elements['name'].text == @name and + docroot.elements['source'].elements['name'] == @name) end end -- 1.5.4.3
Just a few comments inline -- I haven't had a chance to test this. Chris Lalancette wrote:> This patch implements LVM scanning, creation, deletion, and VM use in the > taskomatic backend. This required a re-thinking of the way we do things, and > because of this the implementation ended up being much simpler and more object > oriented. > > Note that in order for this to have any chance to work, you need updated > libvirt (a pre 0.5.0 snapshot), and an updated ruby-libvirt package, both of > which I am working on and will presently upload to the ovirt yum repositories. > > Signed-off-by: Chris Lalancette <clalance at redhat.com> > --- > src/app/models/iscsi_storage_volume.rb | 4 + > src/app/models/lvm_storage_volume.rb | 4 + > src/app/models/nfs_storage_volume.rb | 4 + > src/task-omatic/task_storage.rb | 301 ++++++++++++++++++++++++++------ > src/task-omatic/task_vm.rb | 135 ++++++++++++--- > src/task-omatic/taskomatic.rb | 2 + > src/task-omatic/utils.rb | 242 ++++++++++++-------------- > 7 files changed, 480 insertions(+), 212 deletions(-) > > diff --git a/src/app/models/iscsi_storage_volume.rb b/src/app/models/iscsi_storage_volume.rb > index 00d7db2..48edbd8 100644 > --- a/src/app/models/iscsi_storage_volume.rb > +++ b/src/app/models/iscsi_storage_volume.rb > @@ -22,6 +22,10 @@ class IscsiStorageVolume < StorageVolume > "#{storage_pool[:target]}:#{lun}" > end > > + def volume_name > + "lun" > + end > + >For now we need this -- but once you no longer need the db name here you can just return the lun directly. Same comment applies to the other storage types.> diff --git a/src/task-omatic/task_storage.rb b/src/task-omatic/task_storage.rb > index 5cddf2b..44ba164 100644 > --- a/src/task-omatic/task_storage.rb > +++ b/src/task-omatic/task_storage.rb > @@ -20,26 +20,65 @@ require 'utils' > > require 'libvirt' > > -def refresh_pool(task) > - puts "refresh_pool" > +def build_libvirt_vol_xml(name, size, owner, group, mode) > + vol_xml = Document.new > + vol_xml.add_element("volume", {"type" => "logical"}) > + vol_xml.root.add_element("name").add_text(name) > + vol_xml.root.add_element("capacity", {"unit" => "K"}).add_text(size.to_s) > + vol_xml.root.add_element("target") > + vol_xml.root.elements["target"].add_element("permissions") > + vol_xml.root.elements["target"].elements["permissions"].add_element("owner").add_text(owner) > + vol_xml.root.elements["target"].elements["permissions"].add_element("group").add_text(group) > + vol_xml.root.elements["target"].elements["permissions"].add_element("mode").add_text(mode) > > - pool = task.storage_pool > + return vol_xml > +end > > - if pool == nil > - raise "Could not find storage pool" > - end > +def add_volumes_to_db(db_pool, libvirt_pool, owner = nil, group = nil, mode = nil) > + # FIXME: this is currently broken if you do something like: > + # 1. Add an iscsi pool with 3 volumes (lun-1, lun-2, lun-3) > + # 2. Scan it in > + # 3. Remove lun-3 from the pool > + # 4. Re-scan it > + # What will happen is that you will still have lun-3 available in the > + # database, even though it's not available in the pool anymore. It's a > + # little tricky, though; we have to make sure that we don't pull the > + # database entry out from underneath a possibly running VM (or do we?) > + libvirt_pool.list_volumes.each do |volname| > + storage_volume = StorageVolume.factory(db_pool.get_type_label) > > - if pool[:type] == "IscsiStoragePool" > - storage = Iscsi.new(pool.ip_addr, pool[:target]) > - elsif pool[:type] == "NfsStoragePool" > - storage = NFS.new(pool.ip_addr, pool.export_path) > - else > - raise "Unknown storage pool type " + pool[:type].to_s > + # NOTE: it is safe (and, in fact, necessary) to use > + # #{storage_volume.volume_name} here without sanitizing it. This is > + # because this is *not* based on user modifiable data, but rather, on an > + # internal implementation detail > + existing_vol = StorageVolume.find(:first, :conditions => > + [ "storage_pool_id = ? AND #{storage_volume.volume_name} = ?", > + db_pool.id, volname]) > + if existing_vol != nil > + # in this case, this path already exists in the database; just skip > + next > + end > + > + volptr = libvirt_pool.lookup_vol_by_name(volname) > + > + volinfo = volptr.info > + > + storage_volume = StorageVolume.factory(db_pool.get_type_label) > + storage_volume.path = volptr.path > + storage_volume.size = volinfo.capacity / 1024 > + storage_volume.storage_pool_id = db_pool.id > + storage_volume.write_attribute(storage_volume.volume_name, volname) > + storage_volume.lv_owner_perms = owner > + storage_volume.lv_group_perms = group > + storage_volume.lv_mode_perms = mode > + storage_volume.save > end > +end > >You should probably use save! here as we're doing elsewhere so that it raises an exception upon failure.> @@ -59,66 +98,212 @@ def refresh_pool(task) > end > > if conn == nil > + # last ditch effort; if we didn't find any hosts, just use ourselves. > + # this may or may not work > + begin > + conn = Libvirt::open("qemu:///system") > + rescue > + end > + end > + > + if conn == nil > raise "Could not find a host to scan storage" > end > > - remote_pool_defined = false > - remote_pool_started = false > - remote_pool = nil > + return conn > +end > > - # here, run through the list of already defined pools on the remote side > - # and see if a pool matching the XML already exists. If it does > - # we don't try to define it again, we just scan it > - pool_defined = false > - all_storage_pools(conn).each do |remote_pool_name| > - tmppool = conn.lookup_storage_pool_by_name(remote_pool_name) > - doc = Document.new(tmppool.xml_desc(0)) > +# The words "pool" and "volume" are ridiculously overloaded in our context. > +# Therefore, the refresh_pool method adopts this convention: > +# phys_db_pool: The underlying physical storage pool, as it is represented in > +# the database > +# phys_libvirt_pool: The underlying physical storage, as it is represented in > +# libvirt > +# lvm_db_pool: The logical storage pool (if it exists), as it is represented > +# in the database > +# lvm_libvirt_pool: The logical storage pool (if it exists), as it is > +# represented in the database > > - if storage.xmlequal?(doc.root) > - pool_defined = true > - remote_pool = tmppool > - break > +def refresh_pool(task) > + puts "refresh_pool" > + > + phys_db_pool = task.storage_pool > + if phys_db_pool == nil > + raise "Could not find storage pool" > + end > + > + conn = storage_find_suitable_host(phys_db_pool.hardware_pool_id) > + > + begin > + phys_libvirt_pool = LibvirtPool.factory(phys_db_pool) > + phys_libvirt_pool.connect(conn) > + > + begin > + # OK, the pool is all set. Add in all of the volumes > + add_volumes_to_db(phys_db_pool, phys_libvirt_pool) > + > + # OK, now we've scanned the underlying hardware pool and added the > + # volumes. Next we scan for pre-existing LVM volumes > + logical_xml = conn.discover_storage_pool_sources("logical") > + > + Document.new(logical_xml).elements.each('sources/source') do |source| > + vgname = source.elements["name"].text > + > + begin > + source.elements.each("device") do |device| > + byid_device = phys_libvirt_pool.lookup_vol_by_path(device.attributes["path"]).path > + end > + rescue > + # If matching any of the <device> sections in the LVM XML fails > + # against the storage pool, then it is likely that this is a storage > + # pool not associated with the one we connected above. Go on > + # FIXME: it would be nicer to catch the right exception here, and > + # fail on other exceptions > + puts "One of the logical volumes in #{vgname} is not part of the pool of type #{phys_db_pool[:type]} that we are scanning; ignore the previous error!" > + next > + end > + > + # if we make it here, then we were able to resolve all of the devices, > + # so we know we need to use a new pool > + lvm_db_pool = LvmStoragePool.find(:first, :conditions => > + [ "vg_name = ?", vgname ]) > + if lvm_db_pool == nil > + lvm_db_pool = LvmStoragePool.new > + lvm_db_pool[:type] = "LvmStoragePool" > + # set the LVM pool to the same hardware pool as the underlying storage > + lvm_db_pool.hardware_pool_id = phys_db_pool.hardware_pool_id > + lvm_db_pool.vg_name = vgname > + lvm_db_pool.save > + end >use save!> + > + source.elements.each("device") do |device| > + byid_device = phys_libvirt_pool.lookup_vol_by_path(device.attributes["path"]).path > + physical_vol = StorageVolume.find(:first, :conditions => > + [ "path = ?", byid_device]) > + if physical_vol == nil > + # Hm. We didn't find the device in the storage volumes already. > + # something went wrong internally, and we have to bail > + raise "Storage internal physical volume error" > + end > + > + # OK, put the right lvm_pool_id in place > + physical_vol.lvm_pool_id = lvm_db_pool.id > + physical_vol.save >save!> + end > + > + lvm_libvirt_pool = LibvirtPool.factory(lvm_db_pool) > + lvm_libvirt_pool.connect(conn) > + > + begin > + add_volumes_to_db(lvm_db_pool, lvm_libvirt_pool, "0744", "0744", "0744") > + ensure > + lvm_libvirt_pool.shutdown > + end > + end > + ensure > + phys_libvirt_pool.shutdown > end > + ensure > + conn.close > end > - if not pool_defined > - remote_pool = conn.define_storage_pool_xml(storage.getxml, 0) > - remote_pool.build(0) > - remote_pool_defined = true > +end > + > +def create_volume(task) > + puts "create_volume" > + > + lvm_db_volume = task.storage_volume > + if lvm_db_volume == nil > + raise "Could not find storage volume to create" > + end > + if lvm_db_volume[:type] != "LvmStorageVolume" > + raise "The volume to create must be of type LvmStorageVolume, not type #{lvm_db_volume[:type]}" > end > > - remote_pool_info = remote_pool.info > - if remote_pool_info.state == Libvirt::StoragePool::INACTIVE > - # only try to start the pool if it is currently inactive; in all other > - # states, assume it is already running > - remote_pool.create(0) > - remote_pool_started = true > + lvm_db_pool = lvm_db_volume.storage_pool > + if lvm_db_pool == nil > + raise "Could not find storage pool" > + end > + if lvm_db_pool[:type] != "LvmStoragePool" > + raise "The pool for the volume must be of type LvmStoragePool, not type #{lvm_db_pool[:type]}" > end > > - vols = remote_pool.list_volumes > - vols.each do |volname| > - volptr = remote_pool.lookup_volume_by_name(volname) > - existing_vol = StorageVolume.find(:first, :conditions => > - [ "path = ?", volptr.path ]) > - if existing_vol != nil > - # in this case, this path already exists in the database; just skip > - next > + conn = storage_find_suitable_host(lvm_db_pool.hardware_pool_id) > + > + begin > + phys_libvirt_pool = get_libvirt_pool_from_volume(lvm_db_volume) > + phys_libvirt_pool.connect(conn) > + > + begin > + lvm_libvirt_pool = LibvirtPool.factory(lvm_db_pool) > + lvm_libvirt_pool.connect(conn) > + > + begin > + vol_xml = build_libvirt_vol_xml(lvm_db_volume.lv_name, > + lvm_db_volume.size, > + lvm_db_volume.lv_owner_perms, > + lvm_db_volume.lv_group_perms, > + lvm_db_volume.lv_mode_perms) > + > + lvm_libvirt_pool.create_vol_xml(vol_xml.to_s) > + ensure > + lvm_libvirt_pool.shutdown > + end > + ensure > + phys_libvirt_pool.shutdown > end > + ensure > + conn.close > + end > +end > > - volinfo = volptr.info > +def delete_volume(task) > + puts "delete_volume" > > - storage_volume = StorageVolume.new > - storage_volume.path = volptr.path > - storage_volume.size = volinfo.capacity / 1024 > - storage_volume.storage_pool_id = pool.id > - storage_volume[:type] = StoragePool::STORAGE_TYPES[pool.get_type_label] + "StorageVolume" > - storage_volume.write_attribute(storage.db_column, volname) > - storage_volume.save > + lvm_db_volume = task.storage_volume > + if lvm_db_volume == nil > + raise "Could not find storage volume to delete" > + end > + if lvm_db_volume[:type] != "LvmStorageVolume" > + raise "The volume to delete must be of type LvmStorageVolume, not type #{lvm_db_volume[:type]}" > + end > + > + lvm_db_pool = lvm_db_volume.storage_pool > + if lvm_db_pool == nil > + raise "Could not find storage pool" > end > - if remote_pool_started > - remote_pool.destroy > + if lvm_db_pool[:type] != "LvmStoragePool" > + raise "The pool for the volume must be of type LvmStoragePool, not type #{lvm_db_pool[:type]}" > end > - if remote_pool_defined > - remote_pool.undefine > + > + conn = storage_find_suitable_host(lvm_db_pool.hardware_pool_id) > + > + begin > + phys_libvirt_pool = get_libvirt_pool_from_volume(lvm_db_volume) > + phys_libvirt_pool.connect(conn) > + > + begin > + lvm_libvirt_pool = LibvirtPool.factory(lvm_db_pool) > + lvm_libvirt_pool.connect(conn) > + > + begin > + libvirt_volume = lvm_libvirt_pool.lookup_vol_by_name(lvm_db_volume.lv_name) > + # FIXME: we actually probably want to zero out the whole volume here, so > + # we aren't potentially leaking data from one user to another. There > + # are two problems, though: > + # 1) I'm not sure how I would go about zero'ing the data on a remote > + # machine, since there is no "libvirt_write_data" call > + # 2) This could potentially take quite a while, so we want to spawn > + # off another thread to do it > + libvirt_volume.delete > + > + LvmStorageVolume.delete(lvm_db_volume.id) >We need to get this working with lvm_db_volume.destroy, since the error you were getting with that might mean that it's failing to do some necessary cleanup that delete is skipping. But we can probably do that after pushing this patch.> + ensure > + lvm_libvirt_pool.shutdown > + end > + ensure > + phys_libvirt_pool.shutdown > + end > + ensure > + conn.close > end > - conn.close > end > diff --git a/src/task-omatic/task_vm.rb b/src/task-omatic/task_vm.rb > index b5f888d..c30c6a9 100644 > --- a/src/task-omatic/task_vm.rb > +++ b/src/task-omatic/task_vm.rb > @@ -24,46 +24,140 @@ require 'utils' > gem 'cobbler' > require 'cobbler' > > +def findHostSLA(vm) > + host = nil > + > + vm.vm_resource_pool.get_hardware_pool.hosts.each do |curr| > + # FIXME: we probably need to add in some notion of "load" into this check > + if curr.num_cpus >= vm.num_vcpus_allocated \ > + and curr.memory >= vm.memory_allocated \ > + and not curr.is_disabled.nil? and curr.is_disabled == 0 \ > + and curr.state == Host::STATE_AVAILABLE \ > + and (vm.host_id.nil? or (not vm.host_id.nil? and vm.host_id != curr.id)) > + host = curr > + break > + end > + end > + > + if host == nil > + # we couldn't find a host that matches this criteria > + raise "No host matching VM parameters could be found" > + end > + > + return host > +end > + > +def findHost(host_id) > + host = Host.find(:first, :conditions => [ "id = ?", host_id]) > + > + if host == nil > + # Hm, we didn't find the host_id. Seems odd. Return a failure > + raise "Could not find host_id " + host_id.to_s > + end > + > + return host > +end > + > +def connect_storage_pools(conn, storage_volumes) > + storagedevs = [] > + storage_volumes.each do |volume| > + # here, we need to iterate through each volume and possibly attach it > + # to the host we are going to be using > + db_pool = volume.storage_pool > + if db_pool == nil > + # Hum. Specified by the VM description, but not in the storage pool? > + # continue on and hope for the best > + puts "Couldn't find pool for volume #{volume.path}; skipping" > + next > + end > + > + # we have to special case LVM pools. In that case, we need to first > + # activate the underlying physical device, and then do the logical one > + if volume[:type] == "LvmStorageVolume" > + phys_libvirt_pool = get_libvirt_pool_from_volume(volume) > + phys_libvirt_pool.connect(conn) > + end > + > + libvirt_pool = LibvirtPool.factory(db_pool) > + libvirt_pool.connect(conn) > + > + # OK, the pool should be all set. The last thing we need to do is get > + # the path based on the volume name > + storagedevs << libvirt_pool.lookup_vol_by_name(volume.read_attribute(volume.volume_name)).path >And when the volume_name method gets fixed this will also be simplified.> + end > + > + return storagedevs > +end > + > +def remove_pools(conn, type = nil) > + all_storage_pools(conn).each do |remote_pool_name| > + pool = conn.lookup_storage_pool_by_name(remote_pool_name) > + > + if type == nil or type == Document.new(pool.xml_desc).root.attributes['type'] > + begin > + pool.destroy > + rescue > + end > + > + begin > + # if the destroy failed, we still try to undefine; it may be a pool > + # that was previously destroyed but not undefined for whatever reason > + pool.undefine > + rescue > + # do nothing if any of this failed; the worst that happens is that > + # we leave a pool configured > + puts "Could not teardown pool " + remote_pool_name + "; skipping" > + end > + end > + end > +end > + > +def teardown_storage_pools(conn) > + # FIXME: this needs to get a *lot* smarter. In particular, we want to make > + # sure we can tear down unused pools even when there are other guests running > + if conn.list_domains.empty? > + # OK, there are no running guests on this host anymore. We can teardown > + # any storage pools that are there without fear > + > + # we first have to tear-down LVM pools, because they might depend on the > + # underlying physical pools > + remove_pools(conn, "logical") > + > + # now tear down the rest of the pools > + remove_pools(conn) > + end > +end > + > def create_vm_xml(name, uuid, memAllocated, memUsed, vcpus, bootDevice, > macAddr, bridge, diskDevices) > doc = Document.new > > doc.add_element("domain", {"type" => "kvm"}) > > - doc.root.add_element("name") > - doc.root.elements["name"].text = name > + doc.root.add_element("name").add_text(name) > > - doc.root.add_element("uuid") > - doc.root.elements["uuid"].text = uuid > + doc.root.add_element("uuid").add_text(uuid) > > - doc.root.add_element("memory") > - doc.root.elements["memory"].text = memAllocated > + doc.root.add_element("memory").add_text(memAllocated.to_s) > > - doc.root.add_element("currentMemory") > - doc.root.elements["currentMemory"].text = memUsed > + doc.root.add_element("currentMemory").add_text(memUsed.to_s) > > - doc.root.add_element("vcpu") > - doc.root.elements["vcpu"].text = vcpus > + doc.root.add_element("vcpu").add_text(vcpus.to_s) > > doc.root.add_element("os") > - doc.root.elements["os"].add_element("type") > - doc.root.elements["os"].elements["type"].text = "hvm" > + doc.root.elements["os"].add_element("type").add_text("hvm") > doc.root.elements["os"].add_element("boot", {"dev" => bootDevice}) > > doc.root.add_element("clock", {"offset" => "utc"}) > > - doc.root.add_element("on_poweroff") > - doc.root.elements["on_poweroff"].text = "destroy" > + doc.root.add_element("on_poweroff").add_text("destroy") > > - doc.root.add_element("on_reboot") > - doc.root.elements["on_reboot"].text = "restart" > + doc.root.add_element("on_reboot").add_text("restart") > > - doc.root.add_element("on_crash") > - doc.root.elements["on_crash"].text = "destroy" > + doc.root.add_element("on_crash").add_text("destroy") > > doc.root.add_element("devices") > - doc.root.elements["devices"].add_element("emulator") > - doc.root.elements["devices"].elements["emulator"].text = "/usr/bin/qemu-kvm" > + doc.root.elements["devices"].add_element("emulator").add_text("/usr/bin/qemu-kvm") > > devs = ['hda', 'hdb', 'hdc', 'hdd'] > which_device = 0 > @@ -115,7 +209,6 @@ def setVmVncPort(vm, domain) > vm.save > end > > - > def findVM(task, fail_on_nil_host_id = true) > # find the matching VM in the vms table > vm = task.vm >