# HG changeset patch # User john.levon@sun.com # Date 1198195706 28800 # Node ID e57a1b9ef7ebb5fbd3d99af512d202eacd51662c # Parent a87624a82a68cbee7487aea465431ac1415637c5 Fix xend xenstore handling. xend can get into a situation where two processes are attempting to interact with the xenstore socket, with disastrous results. Fix the two bad users of xstransact, add a big warning, and fix the destructor so future mistakes will be detected earlier. Signed-off-by: John Levon <john.levon@sun.com> diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -1537,10 +1537,9 @@ class XendDomainInfo: except: # Log and swallow any exceptions in removal -- # there''s nothing more we can do. - log.exception("Device release failed: %s; %s; %s", - self.info[''name_label''], devclass, dev) - - + log.exception("Device release failed: %s; %s; %s", + self.info[''name_label''], devclass, dev) + t.abort() def getDeviceController(self, name): """Get the device controller for this domain, and if it @@ -1852,7 +1851,6 @@ class XendDomainInfo: # build list of phantom devices to be removed after normal devices plist = [] if self.domid is not None: - from xen.xend.xenstore.xstransact import xstransact t = xstransact("%s/device/vbd" % GetDomainPath(self.domid)) for dev in t.list(): backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \ @@ -1862,6 +1860,7 @@ class XendDomainInfo: % backend_phantom_vbd) plist.append(backend_phantom_vbd) plist.append(frontend_phantom_vbd) + t.abort() return plist def _cleanup_phantom_devs(self, plist): diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -22,8 +22,6 @@ from xen.xend import sxp from xen.xend import sxp from xen.xend.XendError import VmError from xen.xend.XendLogging import log - -from xen.xend.xenstore.xstransact import xstransact from xen.xend.server.DevController import DevController diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py +++ b/tools/python/xen/xend/xenstore/xstransact.py @@ -7,8 +7,16 @@ from xen.xend.xenstore.xsutil import xshandle +class xstransact: + """WARNING: Be very careful if you''re instantiating an xstransact object + yourself (i.e. not using the capitalized static helpers like .Read(). + It is essential that you clean up the object in place via + t.commit/abort(): GC can happen at any time, including contexts where + it''s not safe to to use the shared xenstore socket fd. In particular, + if xend forks, and GC occurs, we can have two processes trying to + use the same xenstore fd, and all hell breaks loose. + """ -class xstransact: def __init__(self, path = ""): @@ -22,8 +30,9 @@ class xstransact: self.in_transaction = True def __del__(self): + # see above. if self.in_transaction: - xshandle().transaction_end(self.transaction, True) + raise RuntimeError("ERROR: GC of live transaction") def commit(self): if not self.in_transaction: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
# HG changeset patch # User john.levon@sun.com # Date 1198248324 28800 # Node ID b29e155a85ef663170b0651798c84db79bafcdd8 # Parent d7974e8d8f5131356e7c8fc87f880d737044e91b Fix xend xenstore handling. xend can get into a situation where two processes are attempting to interact with the xenstore socket, with disastrous results. Fix the two bad users of xstransact, add a big warning, and fix the destructor so future mistakes will be detected earlier. Signed-off-by: John Levon <john.levon@sun.com> diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -1533,10 +1533,9 @@ class XendDomainInfo: except: # Log and swallow any exceptions in removal -- # there''s nothing more we can do. - log.exception("Device release failed: %s; %s; %s", - self.info[''name_label''], devclass, dev) - - + log.exception("Device release failed: %s; %s; %s", + self.info[''name_label''], devclass, dev) + t.abort() def getDeviceController(self, name): """Get the device controller for this domain, and if it @@ -1848,7 +1847,6 @@ class XendDomainInfo: # build list of phantom devices to be removed after normal devices plist = [] if self.domid is not None: - from xen.xend.xenstore.xstransact import xstransact t = xstransact("%s/device/vbd" % GetDomainPath(self.domid)) for dev in t.list(): backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \ @@ -1858,6 +1856,7 @@ class XendDomainInfo: % backend_phantom_vbd) plist.append(backend_phantom_vbd) plist.append(frontend_phantom_vbd) + t.abort() return plist def _cleanup_phantom_devs(self, plist): diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -22,8 +22,6 @@ from xen.xend import sxp from xen.xend import sxp from xen.xend.XendError import VmError from xen.xend.XendLogging import log - -from xen.xend.xenstore.xstransact import xstransact from xen.xend.server.DevController import DevController diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py +++ b/tools/python/xen/xend/xenstore/xstransact.py @@ -7,8 +7,16 @@ from xen.xend.xenstore.xsutil import xshandle +class xstransact: + """WARNING: Be very careful if you''re instantiating an xstransact object + yourself (i.e. not using the capitalized static helpers like .Read(). + It is essential that you clean up the object in place via + t.commit/abort(): GC can happen at any time, including contexts where + it''s not safe to to use the shared xenstore socket fd. In particular, + if xend forks, and GC occurs, we can have two processes trying to + use the same xenstore fd, and all hell breaks loose. + """ -class xstransact: def __init__(self, path = ""): @@ -22,8 +30,9 @@ class xstransact: self.in_transaction = True def __del__(self): + # see above. if self.in_transaction: - xshandle().transaction_end(self.transaction, True) + raise RuntimeError("ERROR: GC of live transaction") def commit(self): if not self.in_transaction: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
This is the cause of the "reboot loop" xend failures I reported earlier. Note that I''ve only tested this patch against 3.1, however the code, and the fix, is the same in unstable. I realise it''s late in the cycle but this bug is bad enough to need fixing IMHO. cheers, john PS you might get duplicate mails later, Sun''s mail server was playing up # HG changeset patch # User john.levon@sun.com # Date 1198248324 28800 # Node ID b29e155a85ef663170b0651798c84db79bafcdd8 # Parent d7974e8d8f5131356e7c8fc87f880d737044e91b Fix xend xenstore handling. xend can get into a situation where two processes are attempting to interact with the xenstore socket, with disastrous results. Fix the two bad users of xstransact, add a big warning, and fix the destructor so future mistakes will be detected earlier. Signed-off-by: John Levon <john.levon@sun.com> diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -1533,10 +1533,9 @@ class XendDomainInfo: except: # Log and swallow any exceptions in removal -- # there''s nothing more we can do. - log.exception("Device release failed: %s; %s; %s", - self.info[''name_label''], devclass, dev) - - + log.exception("Device release failed: %s; %s; %s", + self.info[''name_label''], devclass, dev) + t.abort() def getDeviceController(self, name): """Get the device controller for this domain, and if it @@ -1848,7 +1847,6 @@ class XendDomainInfo: # build list of phantom devices to be removed after normal devices plist = [] if self.domid is not None: - from xen.xend.xenstore.xstransact import xstransact t = xstransact("%s/device/vbd" % GetDomainPath(self.domid)) for dev in t.list(): backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \ @@ -1858,6 +1856,7 @@ class XendDomainInfo: % backend_phantom_vbd) plist.append(backend_phantom_vbd) plist.append(frontend_phantom_vbd) + t.abort() return plist def _cleanup_phantom_devs(self, plist): diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -22,8 +22,6 @@ from xen.xend import sxp from xen.xend import sxp from xen.xend.XendError import VmError from xen.xend.XendLogging import log - -from xen.xend.xenstore.xstransact import xstransact from xen.xend.server.DevController import DevController diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py +++ b/tools/python/xen/xend/xenstore/xstransact.py @@ -7,8 +7,16 @@ from xen.xend.xenstore.xsutil import xshandle +class xstransact: + """WARNING: Be very careful if you''re instantiating an xstransact object + yourself (i.e. not using the capitalized static helpers like .Read(). + It is essential that you clean up the object in place via + t.commit/abort(): GC can happen at any time, including contexts where + it''s not safe to to use the shared xenstore socket fd. In particular, + if xend forks, and GC occurs, we can have two processes trying to + use the same xenstore fd, and all hell breaks loose. + """ -class xstransact: def __init__(self, path = ""): @@ -22,8 +30,9 @@ class xstransact: self.in_transaction = True def __del__(self): + # see above. if self.in_transaction: - xshandle().transaction_end(self.transaction, True) + raise RuntimeError("ERROR: GC of live transaction") def commit(self): if not self.in_transaction: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel P. Berrange
2007-Dec-21 15:08 UTC
Re: [Xen-devel] [PATCH] Fix xend xenstore handling
On Fri, Dec 21, 2007 at 02:57:26PM +0000, John Levon wrote:> > This is the cause of the "reboot loop" xend failures I reported earlier. > Note that I''ve only tested this patch against 3.1, however the code, > and the fix, is the same in unstable. I realise it''s late in the cycle > but this bug is bad enough to need fixing IMHO.I think the entire code segment from the point at which xstransact() is created needs to be in a try...finally block to be safe against the code throwing exceptions, otherwise you could very ocassionally get the final xs.abort() being missed in error conditions. Regards, Dan. -- |=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=| |=- Perl modules: http://search.cpan.org/~danberr/ -=| |=- Projects: http://freshmeat.net/~danielpb/ -=| |=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=| _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On Fri, Dec 21, 2007 at 03:08:03PM +0000, Daniel P. Berrange wrote:> > This is the cause of the "reboot loop" xend failures I reported earlier. > > Note that I''ve only tested this patch against 3.1, however the code, > > and the fix, is the same in unstable. I realise it''s late in the cycle > > but this bug is bad enough to need fixing IMHO. > > I think the entire code segment from the point at which xstransact() is > created needs to be in a try...finally block to be safe against the > code throwing exceptions, otherwise you could very ocassionally get the > final xs.abort() being missed in error conditions.OK, how about this one? (I''m starting testing it now) regards john # HG changeset patch # User john.levon@sun.com # Date 1198250774 28800 # Node ID c847f62cbad09ccbf0ba63ee30204228c16d60fb # Parent 5092403708afc964a908cf1a193f3fc68fb4b950 Fix xend xenstore handling. xend can get into a situation where two processes are attempting to interact with the xenstore socket, with disastrous results. Fix the two bad users of xstransact, add a big warning, and fix the destructor so future mistakes will be detected earlier. Signed-off-by: John Levon <john.levon@sun.com> diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -1529,18 +1529,19 @@ class XendDomainInfo: log.debug("Releasing devices") t = xstransact("%s/device" % self.dompath) - for devclass in XendDevices.valid_devices(): - for dev in t.list(devclass): - try: - log.debug("Removing %s", dev); - self.destroyDevice(devclass, dev, False); - except: - # Log and swallow any exceptions in removal -- - # there''s nothing more we can do. + try: + for devclass in XendDevices.valid_devices(): + for dev in t.list(devclass): + try: + log.debug("Removing %s", dev); + self.destroyDevice(devclass, dev, False); + except: + # Log and swallow any exceptions in removal -- + # there''s nothing more we can do. log.exception("Device release failed: %s; %s; %s", self.info[''name_label''], devclass, dev) - - + finally: + t.abort() def getDeviceController(self, name): """Get the device controller for this domain, and if it @@ -1852,16 +1853,18 @@ class XendDomainInfo: # build list of phantom devices to be removed after normal devices plist = [] if self.domid is not None: - from xen.xend.xenstore.xstransact import xstransact t = xstransact("%s/device/vbd" % GetDomainPath(self.domid)) - for dev in t.list(): - backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \ - % (self.dompath, dev)) - if backend_phantom_vbd is not None: - frontend_phantom_vbd = xstransact.Read("%s/frontend" \ - % backend_phantom_vbd) - plist.append(backend_phantom_vbd) - plist.append(frontend_phantom_vbd) + try: + for dev in t.list(): + backend_phantom_vbd = xstransact.Read("%s/device/vbd/%s/phantom_vbd" \ + % (self.dompath, dev)) + if backend_phantom_vbd is not None: + frontend_phantom_vbd = xstransact.Read("%s/frontend" \ + % backend_phantom_vbd) + plist.append(backend_phantom_vbd) + plist.append(frontend_phantom_vbd) + finally: + t.abort() return plist def _cleanup_phantom_devs(self, plist): diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -22,8 +22,6 @@ from xen.xend import sxp from xen.xend import sxp from xen.xend.XendError import VmError from xen.xend.XendLogging import log - -from xen.xend.xenstore.xstransact import xstransact from xen.xend.server.DevController import DevController diff --git a/tools/python/xen/xend/xenstore/xstransact.py b/tools/python/xen/xend/xenstore/xstransact.py --- a/tools/python/xen/xend/xenstore/xstransact.py +++ b/tools/python/xen/xend/xenstore/xstransact.py @@ -7,8 +7,16 @@ from xen.xend.xenstore.xsutil import xshandle +class xstransact: + """WARNING: Be very careful if you''re instantiating an xstransact object + yourself (i.e. not using the capitalized static helpers like .Read(). + It is essential that you clean up the object in place via + t.commit/abort(): GC can happen at any time, including contexts where + it''s not safe to to use the shared xenstore socket fd. In particular, + if xend forks, and GC occurs, we can have two processes trying to + use the same xenstore fd, and all hell breaks loose. + """ -class xstransact: def __init__(self, path = ""): @@ -22,8 +30,9 @@ class xstransact: self.in_transaction = True def __del__(self): + # see above. if self.in_transaction: - xshandle().transaction_end(self.transaction, True) + raise RuntimeError("ERROR: GC of live transaction") def commit(self): if not self.in_transaction: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel P. Berrange
2007-Dec-21 15:31 UTC
Re: [Xen-devel] [PATCH] Fix xend xenstore handling
On Fri, Dec 21, 2007 at 03:27:13PM +0000, John Levon wrote:> On Fri, Dec 21, 2007 at 03:08:03PM +0000, Daniel P. Berrange wrote: > > > > This is the cause of the "reboot loop" xend failures I reported earlier. > > > Note that I''ve only tested this patch against 3.1, however the code, > > > and the fix, is the same in unstable. I realise it''s late in the cycle > > > but this bug is bad enough to need fixing IMHO. > > > > I think the entire code segment from the point at which xstransact() is > > created needs to be in a try...finally block to be safe against the > > code throwing exceptions, otherwise you could very ocassionally get the > > final xs.abort() being missed in error conditions. > > OK, how about this one? (I''m starting testing it now)Yep, that looks good to me. Dan. -- |=- Red Hat, Engineering, Emerging Technologies, Boston. +1 978 392 2496 -=| |=- Perl modules: http://search.cpan.org/~danberr/ -=| |=- Projects: http://freshmeat.net/~danielpb/ -=| |=- GnuPG: 7D3B9505 F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 -=| _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On Thu, Dec 20, 2007 at 07:42:29PM -0800, john.levon@sun.com wrote:> # HG changeset patch > # User john.levon@sun.com > # Date 1198195706 28800 > # Node ID e57a1b9ef7ebb5fbd3d99af512d202eacd51662c > # Parent a87624a82a68cbee7487aea465431ac1415637c5 > Fix xend xenstore handling.These two are the delayed emails, please ignore thanks john _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel