Tomáš Golembiovský
2022-Apr-20 15:14 UTC
[Libguestfs] [v2v PATCH v3] -o rhv-upload: wait for VM creation task
oVirt API call for VM creation finishes before the VM is actually created. Entities may be still locked after virt-v2v terminates and if user tries to perform (scripted) actions after virt-v2v those operations may fail. To prevent this it is useful to monitor the task and wait for the completion. This will also help to prevent some corner case scenarios (that would be difficult to debug) when the VM creation job fails after virt-v2v already termintates with success. Thanks: Nir Soffer Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1985827 Signed-off-by: Tom?? Golembiovsk? <tgolembi at redhat.com> Reviewed-by: Arik Hadas <ahadas at redhat.com> Reviewed-by: Nir Soffer <nsoffer at redhat.com> --- output/rhv-upload-createvm.py | 57 ++++++++++++++++++- .../ovirtsdk4/__init__.py | 10 +++- .../ovirtsdk4/types.py | 19 +++++++ 3 files changed, 84 insertions(+), 2 deletions(-) Changes in v3: - this time really increased sleep and decreased timeout diff --git a/output/rhv-upload-createvm.py b/output/rhv-upload-createvm.py index 50bb7e34..8887c52b 100644 --- a/output/rhv-upload-createvm.py +++ b/output/rhv-upload-createvm.py @@ -19,12 +19,54 @@ import json import logging import sys +import time +import uuid from urllib.parse import urlparse import ovirtsdk4 as sdk import ovirtsdk4.types as types + +def debug(s): + if params['verbose']: + print(s, file=sys.stderr) + sys.stderr.flush() + + +def jobs_completed(system_service, correlation_id): + jobs_service = system_service.jobs_service() + + try: + jobs = jobs_service.list( + search="correlation_id=%s" % correlation_id) + except sdk.Error as e: + debug( + "Error searching for jobs with correlation id %s: %s" % + (correlation_id, e)) + # We don't know, assume that jobs did not complete yet. + return False + + # STARTED is the only "in progress" status, anything else means the job + # has already terminated. + if all(job.status != types.JobStatus.STARTED for job in jobs): + failed_jobs = [(job.description, str(job.status)) + for job in jobs + if job.status != types.JobStatus.FINISHED] + if failed_jobs: + raise RuntimeError( + "Failed to create a VM! Failed jobs: %r" % failed_jobs) + return True + else: + running_jobs = [(job.description, str(job.status)) for job in jobs] + debug("Some jobs with correlation id %s are running: %s" % + (correlation_id, running_jobs)) + return False + + +# Seconds to wait for the VM import job to complete in oVirt. +timeout = 3 * 60 + # Parameters are passed in via a JSON doc from the OCaml code. # Because this Python code ships embedded inside virt-v2v there # is no formal API here. @@ -67,6 +109,7 @@ system_service = connection.system_service() cluster = system_service.clusters_service().cluster_service(params['rhv_cluster_uuid']) cluster = cluster.get() +correlation_id = str(uuid.uuid4()) vms_service = system_service.vms_service() vm = vms_service.add( types.Vm( @@ -77,5 +120,17 @@ vm = vms_service.add( data=ovf, ) ) - ) + ), + query={'correlation_id': correlation_id}, ) + +# Wait for the import job to finish. +endt = time.monotonic() + timeout +while True: + time.sleep(10) + if jobs_completed(system_service, correlation_id): + break + if time.monotonic() > endt: + raise RuntimeError( + "Timed out waiting for VM creation!" + " Jobs still running for correlation id %s" % correlation_id) diff --git a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py index 0d8f33b3..e33d0714 100644 --- a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py +++ b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py @@ -59,6 +59,9 @@ class SystemService(object): def disks_service(self): return DisksService() + def jobs_service(self): + return JobsService() + def image_transfers_service(self): return ImageTransfersService() @@ -104,6 +107,11 @@ class DisksService(object): return DiskService(disk_id) +class JobsService(object): + def list(self, search=None): + return [types.Job()] + + class ImageTransferService(object): def __init__(self): self._finalized = False @@ -135,7 +143,7 @@ class StorageDomainsService(object): class VmsService(object): - def add(self, vm): + def add(self, vm, query=None): return vm def list(self, search=None): diff --git a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py index 5707fa3e..38d89573 100644 --- a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py +++ b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py @@ -141,6 +141,25 @@ class Initialization(object): pass +class JobStatus(Enum): + ABORTED = "aborted" + FAILED = "failed" + FINISHED = "finished" + STARTED = "started" + UNKNOWN = "unknown" + + def __init__(self, image): + self._image = image + + def __str__(self): + return self._image + + +class Job(object): + description = "Fake job" + status = JobStatus.FINISHED + + class StorageDomain(object): def __init__(self, name=None): pass -- 2.35.1
Richard W.M. Jones
2022-Apr-20 16:39 UTC
[Libguestfs] [v2v PATCH v3] -o rhv-upload: wait for VM creation task
On Wed, Apr 20, 2022 at 05:14:26PM +0200, Tom?? Golembiovsk? wrote:> oVirt API call for VM creation finishes before the VM is actually > created. Entities may be still locked after virt-v2v terminates and if > user tries to perform (scripted) actions after virt-v2v those operations > may fail. To prevent this it is useful to monitor the task and wait for > the completion. This will also help to prevent some corner case > scenarios (that would be difficult to debug) when the VM creation job > fails after virt-v2v already termintates with success. > > Thanks: Nir Soffer > Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1985827 > Signed-off-by: Tom?? Golembiovsk? <tgolembi at redhat.com> > Reviewed-by: Arik Hadas <ahadas at redhat.com> > Reviewed-by: Nir Soffer <nsoffer at redhat.com>Thanks Tomas, this works for me and I've pushed it just now: 06e69bae25..291edb363e master -> master Rich.> output/rhv-upload-createvm.py | 57 ++++++++++++++++++- > .../ovirtsdk4/__init__.py | 10 +++- > .../ovirtsdk4/types.py | 19 +++++++ > 3 files changed, 84 insertions(+), 2 deletions(-) > > Changes in v3: > > - this time really increased sleep and decreased timeout > > diff --git a/output/rhv-upload-createvm.py b/output/rhv-upload-createvm.py > index 50bb7e34..8887c52b 100644 > --- a/output/rhv-upload-createvm.py > +++ b/output/rhv-upload-createvm.py > @@ -19,12 +19,54 @@ > import json > import logging > import sys > +import time > +import uuid > > from urllib.parse import urlparse > > import ovirtsdk4 as sdk > import ovirtsdk4.types as types > > + > +def debug(s): > + if params['verbose']: > + print(s, file=sys.stderr) > + sys.stderr.flush() > + > + > +def jobs_completed(system_service, correlation_id): > + jobs_service = system_service.jobs_service() > + > + try: > + jobs = jobs_service.list( > + search="correlation_id=%s" % correlation_id) > + except sdk.Error as e: > + debug( > + "Error searching for jobs with correlation id %s: %s" % > + (correlation_id, e)) > + # We don't know, assume that jobs did not complete yet. > + return False > + > + # STARTED is the only "in progress" status, anything else means the job > + # has already terminated. > + if all(job.status != types.JobStatus.STARTED for job in jobs): > + failed_jobs = [(job.description, str(job.status)) > + for job in jobs > + if job.status != types.JobStatus.FINISHED] > + if failed_jobs: > + raise RuntimeError( > + "Failed to create a VM! Failed jobs: %r" % failed_jobs) > + return True > + else: > + running_jobs = [(job.description, str(job.status)) for job in jobs] > + debug("Some jobs with correlation id %s are running: %s" % > + (correlation_id, running_jobs)) > + return False > + > + > +# Seconds to wait for the VM import job to complete in oVirt. > +timeout = 3 * 60 > + > # Parameters are passed in via a JSON doc from the OCaml code. > # Because this Python code ships embedded inside virt-v2v there > # is no formal API here. > @@ -67,6 +109,7 @@ system_service = connection.system_service() > cluster = system_service.clusters_service().cluster_service(params['rhv_cluster_uuid']) > cluster = cluster.get() > > +correlation_id = str(uuid.uuid4()) > vms_service = system_service.vms_service() > vm = vms_service.add( > types.Vm( > @@ -77,5 +120,17 @@ vm = vms_service.add( > data=ovf, > ) > ) > - ) > + ), > + query={'correlation_id': correlation_id}, > ) > + > +# Wait for the import job to finish. > +endt = time.monotonic() + timeout > +while True: > + time.sleep(10) > + if jobs_completed(system_service, correlation_id): > + break > + if time.monotonic() > endt: > + raise RuntimeError( > + "Timed out waiting for VM creation!" > + " Jobs still running for correlation id %s" % correlation_id) > diff --git a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py > index 0d8f33b3..e33d0714 100644 > --- a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py > +++ b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/__init__.py > @@ -59,6 +59,9 @@ class SystemService(object): > def disks_service(self): > return DisksService() > > + def jobs_service(self): > + return JobsService() > + > def image_transfers_service(self): > return ImageTransfersService() > > @@ -104,6 +107,11 @@ class DisksService(object): > return DiskService(disk_id) > > > +class JobsService(object): > + def list(self, search=None): > + return [types.Job()] > + > + > class ImageTransferService(object): > def __init__(self): > self._finalized = False > @@ -135,7 +143,7 @@ class StorageDomainsService(object): > > > class VmsService(object): > - def add(self, vm): > + def add(self, vm, query=None): > return vm > > def list(self, search=None): > diff --git a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py > index 5707fa3e..38d89573 100644 > --- a/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py > +++ b/tests/test-v2v-o-rhv-upload-module/ovirtsdk4/types.py > @@ -141,6 +141,25 @@ class Initialization(object): > pass > > > +class JobStatus(Enum): > + ABORTED = "aborted" > + FAILED = "failed" > + FINISHED = "finished" > + STARTED = "started" > + UNKNOWN = "unknown" > + > + def __init__(self, image): > + self._image = image > + > + def __str__(self): > + return self._image > + > + > +class Job(object): > + description = "Fake job" > + status = JobStatus.FINISHED > + > + > class StorageDomain(object): > def __init__(self, name=None): > pass > -- > 2.35.1 > > _______________________________________________ > Libguestfs mailing list > Libguestfs at redhat.com > https://listman.redhat.com/mailman/listinfo/libguestfs-- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com Fedora Windows cross-compiler. Compile Windows programs, test, and build Windows installers. Over 100 libraries supported. http://fedoraproject.org/wiki/MinGW