Olaf Hering
2011-Jan-21 16:09 UTC
[Xen-devel] initial kdump support for domU, and xswatch question
This week I worked on kdump support with SLES11SP1 as dom0/domU. I came up with the patch below which works ok in my testing. There is also a kernel patch, which is not finished yet (proper crashkernel detection missing). During my testing and "fine-tuning" I came across an issue that I havent figured out yet: In the added function _handleCrashDumpWatch() a new watch on ${backend}/state should be registered. This watch does never trigger for some reason, even though the values do change. Is registering a watch within a watch supposed to work? I would like to send an event from _handleCrashDumpWatchCallback() when the backend switched state to avoid a hardcoded sleep, similar to what the hotplug scripts do. Any ideas what is wrong with my attempt? Olaf tools/python/xen/xend/XendDomainInfo.py | 107 ++++++++++++++ --- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py @@ -35,6 +35,7 @@ import stat import shutil import traceback from types import StringTypes +from threading import Event import xen.lowlevel.xc from xen.util import asserts, auxbin, mkdir @@ -2391,6 +2392,96 @@ class XendDomainInfo: return self.getDeviceController(deviceClass).reconfigureDevice( devid, devconfig) + def _handleCrashDumpWatchCallback(self, arg, ev): + log.debug("_handleCrashDumpWatchCallback called with ''%s''" % arg) + v = None + try: + v = xstransact.Read(arg) + except: + log.exception("_handleCrashDumpWatchCallback exception") + ev.set() + return False + if not int(v) == 4: + ev.set() + return True + + # reset all devices where frontend and backend is in state XenbusStateConnected + # protocol: + # initial value is 0 + # crashed guest writes 1, this function resets all devices + # this function writes 2, to notify the guest about the finished reset process + def _handleCrashDumpWatch(self, arg): + log.debug("_handleCrashDumpWatch called with ''%s''" % arg) + v = None + delay = 3.0 + try: + v = xstransact.Read(arg) + except: + log.exception("_handleCrashDumpWatch exception") + log.debug("_handleCrashDumpWatch trigger value %s" % v) + if not int(v) == 1: + return True + try: + ev = Event() + t = xstransact("%s/device" % self.vmpath) + try: + for devclass in XendDevices.valid_devices(): + for dev in t.list(devclass): + self.crashWatchCallback = backend = frontend = f_state = b_state = None + try: + log.debug("Reading dev %s", dev) + frontend = xstransact.Read("%s/device/%s" % (self.vmpath, dev), "frontend") + f_state = xstransact.Read(frontend, "state") + backend = xstransact.Read("%s/device/%s" % (self.vmpath, dev), "backend") + b_state = xstransact.Read(backend, "state") + log.debug(''backend %s(%s) frontend %s(%s)'', backend, b_state, frontend, f_state) + except: + log.exception("Reading frontend/backend state failed: %s; %s; %s", + self.info[''name_label''], + devclass, dev) + pass + try: + # XenbusStateConnected + if b_state == "4" and f_state == "4": + ev.clear() + self.crashWatchCallback = xswatch(backend + ''/state'', self._handleCrashDumpWatchCallback, ev) + # XenbusStateClosing + log.debug("Set %s to XenbusStateClosing", frontend) + xstransact.Write(frontend, "state", "5") + ev.wait(delay) + b_state = xstransact.Read(backend, "state") + log.debug(''backend %s/state == %s'', backend, b_state) + # XenbusStateClosed + log.debug("Set %s to XenbusStateClosed", frontend) + xstransact.Write(frontend, "state", "6") + ev.wait(delay) + b_state = xstransact.Read(backend, "state") + log.debug(''backend %s/state == %s'', backend, b_state) + # XenbusStateInitialising + log.debug("Set %s to XenbusStateInitialising", frontend) + xstransact.Write(frontend, "state", "1") + ev.wait(delay) + b_state = xstransact.Read(backend, "state") + log.debug(''backend %s/state == %s'', backend, b_state) + if self.crashWatchCallback: + try: + self.crashWatchCallback.unwatch() + except: + pass + except: + log.debug("state write failed for %s" % frontend) + pass + finally: + t.abort() + log.debug("_handleCrashDumpWatch notify guest") + xstransact.Write(self.dompath, ''device-reset-trigger'', "2") + except: + log.debug("_handleCrashDumpWatch exception") + pass + log.debug("_handleCrashDumpWatch done") + # release this watch + return False + def _createDevices(self, resume = False): """Create the devices for a vm. @@ -2439,6 +2530,12 @@ class XendDomainInfo: self.info[''devices''][dev_uuid][1][''devid''] = devid + xstransact.Write(self.dompath, ''device-reset-trigger'', "0") + xstransact.SetPermissions(self.dompath + ''/device-reset-trigger'', + { ''dom'': self.getDomid(), ''read'': True, ''write'': True }) + self.crashWatch = xswatch(self.dompath + ''/device-reset-trigger'', + self._handleCrashDumpWatch) + if self.image: self.image.createDeviceModel(resume) self.image.createXenPaging() @@ -2479,6 +2576,16 @@ class XendDomainInfo: finally: t.abort() + try: + if self.crashWatch: + try: + self.crashWatch.unwatch() + except: + pass + finally: + self.crashWatch = None + + def getDeviceController(self, name): """Get the device controller for this domain, and if it doesn''t exist, create it. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel