Richard W.M. Jones
2018-Jun-22 11:59 UTC
[Libguestfs] v2v: -o rhv-upload: Use Unix domain socket to access imageio (RHBZ#1588088).
v1 was here: https://www.redhat.com/archives/libguestfs/2018-June/msg00099.html v2: - Just fixes the two problems noted in the review of the previous version. Rich.
Richard W.M. Jones
2018-Jun-22 11:59 UTC
[Libguestfs] [PATCH v2 1/2] v2v: -o rhv-upload: Always fetch server options when opening the connection.
Previously we lazily requested the server options in the can_* callbacks. The can_* callbacks are always called by nbdkit straight after open, so this just adds complexity for no benefit. This change simply makes the code always fetch the server options during the open callback. This is — functionally at least — mostly just refactoring. However I also added a useful debug message so we can see what features the imageio server is offering. --- v2v/rhv-upload-plugin.py | 77 +++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/v2v/rhv-upload-plugin.py b/v2v/rhv-upload-plugin.py index 7c5084efd..5be426897 100644 --- a/v2v/rhv-upload-plugin.py +++ b/v2v/rhv-upload-plugin.py @@ -165,63 +165,60 @@ def open(readonly): context = context ) + # The first request is to fetch the features of the server. + needs_auth = not params['rhv_direct'] + can_flush = False + can_trim = False + can_zero = False + + http.putrequest("OPTIONS", destination_url.path) + http.putheader("Authorization", transfer.signed_ticket) + http.endheaders() + + r = http.getresponse() + if r.status == 200: + # New imageio never needs authentication. + needs_auth = False + + j = json.loads(r.read()) + can_flush = "flush" in j['features'] + can_trim = "trim" in j['features'] + can_zero = "zero" in j['features'] + + # Old imageio servers returned either 405 Method Not Allowed or + # 204 No Content (with an empty body). If we see that we leave + # all the features as False and they will be emulated. + elif r.status == 405 or r.status == 204: + pass + + else: + raise RuntimeError("could not use OPTIONS request: %d: %s" % + (r.status, r.reason)) + + debug("imageio features: flush=%r trim=%r zero=%r" % + (can_flush, can_trim, can_zero)) + # Save everything we need to make requests in the handle. return { - 'can_flush': False, - 'can_trim': False, - 'can_zero': False, + 'can_flush': can_flush, + 'can_trim': can_trim, + 'can_zero': can_zero, 'connection': connection, 'disk': disk, 'disk_service': disk_service, 'failed': False, - 'got_options': False, 'highestwrite': 0, 'http': http, - 'needs_auth': not params['rhv_direct'], + 'needs_auth': needs_auth, 'path': destination_url.path, 'transfer': transfer, 'transfer_service': transfer_service, } -# Can we issue zero, trim or flush requests? -def get_options(h): - if h['got_options']: - return - h['got_options'] = True - - http = h['http'] - transfer = h['transfer'] - - http.putrequest("OPTIONS", h['path']) - http.putheader("Authorization", transfer.signed_ticket) - http.endheaders() - - r = http.getresponse() - if r.status == 200: - # New imageio never needs authentication. - h['needs_auth'] = False - - j = json.loads(r.read()) - h['can_zero'] = "zero" in j['features'] - h['can_trim'] = "trim" in j['features'] - h['can_flush'] = "flush" in j['features'] - - # Old imageio servers returned either 405 Method Not Allowed or - # 204 No Content (with an empty body). If we see that we leave - # all the features as False and they will be emulated. - elif r.status == 405 or r.status == 204: - pass - - else: - raise RuntimeError("could not use OPTIONS request: %d: %s" % - (r.status, r.reason)) - def can_trim(h): - get_options(h) return h['can_trim'] def can_flush(h): - get_options(h) return h['can_flush'] def get_size(h): -- 2.16.2
Richard W.M. Jones
2018-Jun-22 11:59 UTC
[Libguestfs] [PATCH v2 2/2] v2v: -o rhv-upload: Use Unix domain socket to access imageio (RHBZ#1588088).
In the case where virt-v2v runs on the same server as the imageio daemon that we are talking to, it may be possible to optimize access using a Unix domain socket. This is only an optimization. If it fails or if we're not running on the same server it will fall back to the usual HTTPS over TCP connection. --- v2v/rhv-upload-plugin.py | 65 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/v2v/rhv-upload-plugin.py b/v2v/rhv-upload-plugin.py index 5be426897..a83b95305 100644 --- a/v2v/rhv-upload-plugin.py +++ b/v2v/rhv-upload-plugin.py @@ -19,11 +19,12 @@ import builtins import json import logging +import socket import ssl import sys import time -from http.client import HTTPSConnection +from http.client import HTTPSConnection, HTTPConnection from urllib.parse import urlparse import ovirtsdk4 as sdk @@ -117,6 +118,25 @@ def open(readonly): if time.time() > endt: raise RuntimeError("timed out waiting for disk to become unlocked") + # Get the current host. If it fails, don't worry. + host = None + try: + with builtin_open("/etc/vdsm/vdsm.id") as f: + vdsm_id = f.readline().strip() + + hosts_service = connection.system_service().hosts_service() + hosts = hosts_service.list( + search="hw_id=%s" % vdsm_id, + case_sensitive=False, + ) + if len(hosts) > 0: + host = hosts[0] + debug("host.id = %r" % host.id) + else: + debug("could not retrieve host with hw_id=%s" % vdsm_id) + except: + pass + # Get a reference to the transfer service. transfers_service = system_service.image_transfers_service() @@ -124,6 +144,7 @@ def open(readonly): transfer = transfers_service.add( types.ImageTransfer( disk = types.Disk(id = disk.id), + host = types.Host(id = host.id) if host is not None else None, inactivity_timeout = 3600, ) ) @@ -170,6 +191,7 @@ def open(readonly): can_flush = False can_trim = False can_zero = False + unix_socket = None http.putrequest("OPTIONS", destination_url.path) http.putheader("Authorization", transfer.signed_ticket) @@ -184,6 +206,7 @@ def open(readonly): can_flush = "flush" in j['features'] can_trim = "trim" in j['features'] can_zero = "zero" in j['features'] + unix_socket = j.get('unix_socket') # Old imageio servers returned either 405 Method Not Allowed or # 204 No Content (with an empty body). If we see that we leave @@ -195,8 +218,17 @@ def open(readonly): raise RuntimeError("could not use OPTIONS request: %d: %s" % (r.status, r.reason)) - debug("imageio features: flush=%r trim=%r zero=%r" % - (can_flush, can_trim, can_zero)) + debug("imageio features: flush=%r trim=%r zero=%r unix_socket=%r" % + (can_flush, can_trim, can_zero, unix_socket)) + + # If we are connected to the local host and the host features + # a unix_socket then we can reconnect to that. + if host is not None and unix_socket is not None: + try: + http = UnixHTTPConnection(unix_socket) + debug("optimizing connection using unix socket %r" % unix_socket) + except: + pass # Save everything we need to make requests in the handle. return { @@ -451,3 +483,30 @@ def close(h): raise connection.close() + +# Modify http.client to work over a Unix domain socket. +# Derived from uhttplib written by Erik van Zijst under an MIT license. +# (https://pypi.org/project/uhttplib/) +# Ported to Python 3 by Irit Goihman. + +class UnsupportedError(Exception): + pass + +class _UnixMixin(object): + def set_tunnel(self, host, port=None, headers=None): + raise UnsupportedError("tunneling is not supported") + +class UnixHTTPConnection(_UnixMixin, HTTPConnection): + def __init__(self, path, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + self.path = path + HTTPConnection.__init__(self, "localhost", timeout=timeout) + + def connect(self): + self.sock = _create_unix_socket(self.timeout) + self.sock.connect(self.path) + +def _create_unix_socket(timeout): + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + return sock -- 2.16.2
Nir Soffer
2018-Jun-25 22:50 UTC
Re: [Libguestfs] [PATCH v2 1/2] v2v: -o rhv-upload: Always fetch server options when opening the connection.
On Fri, Jun 22, 2018 at 2:59 PM Richard W.M. Jones <rjones@redhat.com> wrote:> Previously we lazily requested the server options in the can_* > callbacks. The can_* callbacks are always called by nbdkit straight > after open, so this just adds complexity for no benefit. This change > simply makes the code always fetch the server options during the open > callback. >I agree, the simple way is much better.> > This is — functionally at least — mostly just refactoring. However I > also added a useful debug message so we can see what features the > imageio server is offering. > --- > v2v/rhv-upload-plugin.py | 77 > +++++++++++++++++++++++------------------------- > 1 file changed, 37 insertions(+), 40 deletions(-) > > diff --git a/v2v/rhv-upload-plugin.py b/v2v/rhv-upload-plugin.py > index 7c5084efd..5be426897 100644 > --- a/v2v/rhv-upload-plugin.py > +++ b/v2v/rhv-upload-plugin.py > @@ -165,63 +165,60 @@ def open(readonly): > context = context > ) > > + # The first request is to fetch the features of the server. > + needs_auth = not params['rhv_direct'] > + can_flush = False > + can_trim = False > + can_zero = False > + > + http.putrequest("OPTIONS", destination_url.path) > + http.putheader("Authorization", transfer.signed_ticket)+ http.endheaders()> + > + r = http.getresponse() >We should read the response data here to make sure we consume the entire response before sending hte nex> + if r.status == 200: > + # New imageio never needs authentication. > + needs_auth = False > + > + j = json.loads(r.read()) > + can_flush = "flush" in j['features'] > + can_trim = "trim" in j['features'] > + can_zero = "zero" in j['features'] > + > + # Old imageio servers returned either 405 Method Not Allowed or > + # 204 No Content (with an empty body). If we see that we leave > + # all the features as False and they will be emulated. > + elif r.status == 405 or r.status == 204: > + pass > + > + else: > + raise RuntimeError("could not use OPTIONS request: %d: %s" % > + (r.status, r.reason)) >> + > + debug("imageio features: flush=%r trim=%r zero=%r" % > + (can_flush, can_trim, can_zero)) > + > # Save everything we need to make requests in the handle. > return { > - 'can_flush': False, > - 'can_trim': False, > - 'can_zero': False, > + 'can_flush': can_flush, > + 'can_trim': can_trim, > + 'can_zero': can_zero, > 'connection': connection, > 'disk': disk, > 'disk_service': disk_service, > 'failed': False, > - 'got_options': False, > 'highestwrite': 0, > 'http': http, > - 'needs_auth': not params['rhv_direct'], > + 'needs_auth': needs_auth, > 'path': destination_url.path, > 'transfer': transfer, > 'transfer_service': transfer_service, > } > > -# Can we issue zero, trim or flush requests? > -def get_options(h): > - if h['got_options']: > - return > - h['got_options'] = True > - > - http = h['http'] > - transfer = h['transfer'] > - > - http.putrequest("OPTIONS", h['path']) > - http.putheader("Authorization", transfer.signed_ticket) > - http.endheaders() > - > - r = http.getresponse() > - if r.status == 200: > - # New imageio never needs authentication. > - h['needs_auth'] = False > - > - j = json.loads(r.read()) > - h['can_zero'] = "zero" in j['features'] > - h['can_trim'] = "trim" in j['features'] > - h['can_flush'] = "flush" in j['features'] > - > - # Old imageio servers returned either 405 Method Not Allowed or > - # 204 No Content (with an empty body). If we see that we leave > - # all the features as False and they will be emulated. > - elif r.status == 405 or r.status == 204: > - pass > - > - else: > - raise RuntimeError("could not use OPTIONS request: %d: %s" % > - (r.status, r.reason)) > - > def can_trim(h): > - get_options(h) > return h['can_trim'] > > def can_flush(h): > - get_options(h) > return h['can_flush'] > > def get_size(h): >Otherwise looks good. Nir
Nir Soffer
2018-Jun-25 23:28 UTC
Re: [Libguestfs] [PATCH v2 2/2] v2v: -o rhv-upload: Use Unix domain socket to access imageio (RHBZ#1588088).
On Fri, Jun 22, 2018 at 2:59 PM Richard W.M. Jones <rjones@redhat.com> wrote:> In the case where virt-v2v runs on the same server as the imageio > daemon that we are talking to, it may be possible to optimize access > using a Unix domain socket. > > This is only an optimization. If it fails or if we're not running on > the same server it will fall back to the usual HTTPS over TCP > connection. >I wonder how this can happen. If we do: - get current host hardware id - ask engine to start transfer on this host Then there is no way that the transfer will not run on the current host, and we can use unix socket if imageio supports it.> --- > v2v/rhv-upload-plugin.py | 65 > +++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 62 insertions(+), 3 deletions(-) > > diff --git a/v2v/rhv-upload-plugin.py b/v2v/rhv-upload-plugin.py > index 5be426897..a83b95305 100644 > --- a/v2v/rhv-upload-plugin.py > +++ b/v2v/rhv-upload-plugin.py > @@ -19,11 +19,12 @@ > import builtins > import json > import logging > +import socket > import ssl > import sys > import time > > -from http.client import HTTPSConnection > +from http.client import HTTPSConnection, HTTPConnection > from urllib.parse import urlparse > > import ovirtsdk4 as sdk > @@ -117,6 +118,25 @@ def open(readonly): > if time.time() > endt: > raise RuntimeError("timed out waiting for disk to become > unlocked") > > + # Get the current host. If it fails, don't worry. > + host = None > + try: > + with builtin_open("/etc/vdsm/vdsm.id") as f: >Why use builtin_open() instead of open() or io.open()?> + vdsm_id = f.readline().strip() >debug log with the host hardware id would be nice here.> + > + hosts_service = connection.system_service().hosts_service() > + hosts = hosts_service.list( > + search="hw_id=%s" % vdsm_id, > + case_sensitive=False, > + ) > + if len(hosts) > 0: > + host = hosts[0] > + debug("host.id = %r" % host.id) > + else: > + debug("could not retrieve host with hw_id=%s" % vdsm_id) > + except: > + pass >This will make it vey hard to debug slow uploads. I agree that optimization can be skipped on errors, but we must log a detailed error message before we fallback to https. Also bare except should be used only in one case, when you do: try: ... except: cleanup raise If you don't raise, you should use Exception. Otherwise you will swallow KeyboardInterrupt, SystemExit, and GeneratorExit and any other user defined exceptions inheriting from BaseException. Also it will be easier to maintain this if this will be in a separate function like: def find_host(): read hardware id or return None... get hosts from engine or return None... return types.Host(host.id)> + > # Get a reference to the transfer service. > transfers_service = system_service.image_transfers_service() > > @@ -124,6 +144,7 @@ def open(readonly): > transfer = transfers_service.add( > types.ImageTransfer( > disk = types.Disk(id = disk.id), > + host = types.Host(id = host.id) if host is not None else > None, >We could use here: host = find_host()> inactivity_timeout = 3600, > ) > ) > @@ -170,6 +191,7 @@ def open(readonly): > can_flush = False > can_trim = False > can_zero = False > + unix_socket = None > > http.putrequest("OPTIONS", destination_url.path) > http.putheader("Authorization", transfer.signed_ticket) > @@ -184,6 +206,7 @@ def open(readonly): > can_flush = "flush" in j['features'] > can_trim = "trim" in j['features'] > can_zero = "zero" in j['features'] > + unix_socket = j.get('unix_socket') > > # Old imageio servers returned either 405 Method Not Allowed or > # 204 No Content (with an empty body). If we see that we leave > @@ -195,8 +218,17 @@ def open(readonly): > raise RuntimeError("could not use OPTIONS request: %d: %s" % > (r.status, r.reason)) > > - debug("imageio features: flush=%r trim=%r zero=%r" % > - (can_flush, can_trim, can_zero)) > + debug("imageio features: flush=%r trim=%r zero=%r unix_socket=%r" % > + (can_flush, can_trim, can_zero, unix_socket)) > + > + # If we are connected to the local host and the host features > + # a unix_socket then we can reconnect to that. >"We are connected" sounds strange. Maybe something like: if we started the transfer on the current host...> + if host is not None and unix_socket is not None: > + try: > + http = UnixHTTPConnection(unix_socket) > + debug("optimizing connection using unix socket %r" % > unix_socket) > + except: > + pass> # Save everything we need to make requests in the handle. > return { > @@ -451,3 +483,30 @@ def close(h): > raise > > connection.close() > + > +# Modify http.client to work over a Unix domain socket. >http.client.HTTPConnection> +# Derived from uhttplib written by Erik van Zijst under an MIT license. > +# (https://pypi.org/project/uhttplib/) > +# Ported to Python 3 by Irit Goihman. > + > +class UnsupportedError(Exception): > + pass > + > +class _UnixMixin(object): > + def set_tunnel(self, host, port=None, headers=None): > + raise UnsupportedError("tunneling is not supported") >I think we can drop both the mixin and the error, I don't know why I complicated things in this way when I wrote this :-). Since this is not a library and you are not using set_tunnel, there is no need for this.> + > +class UnixHTTPConnection(_UnixMixin, HTTPConnection): > + def __init__(self, path, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): > + self.path = path > + HTTPConnection.__init__(self, "localhost", timeout=timeout) > + > + def connect(self): > + self.sock = _create_unix_socket(self.timeout) > + self.sock.connect(self.path) > + > +def _create_unix_socket(timeout): > + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) > + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: > + sock.settimeout(timeout) > + return sock >I would inline this into connect. I added similar change here: https://gerrit.ovirt.org/#/c/92477/1/examples/upload Otherwise this looks fine. Nir
Possibly Parallel Threads
- v2v: -o rhv-upload: Use Unix domain socket to access imageio (RHBZ#1588088).
- [PATCH 00/18] rvh-upload: Various fixes and cleanups
- [PATCH v3] v2v: -o rhv-upload: Use Unix domain socket to access imageio (RHBZ#1588088).
- Re: [PATCH v3] v2v: -o rhv-upload: Use Unix domain socket to access imageio (RHBZ#1588088).
- [PATCH v2 00/11] rvh-upload: Various fixes and cleanups