From: Hank Janssen <hjanssen at microsoft.com>
If we get a SCSI host bus reset we now gracefully handle it, and we take the
device offline.
This before sometimes caused hangs.
Signed-off-by:Hank Janssen <hjanssen at microsoft.com>
Signed-off-by:Haiyang Zhang <haiyangz at microsoft.com>
---
drivers/staging/hv/storvsc.c | 36 +++++++++++++++++++++++++++++++++++-
1 files changed, 35 insertions(+), 1 deletions(-)
diff --git a/drivers/staging/hv/storvsc.c b/drivers/staging/hv/storvsc.c index
6bd2ff1..5f222cf 100644
--- a/drivers/staging/hv/storvsc.c
+++ b/drivers/staging/hv/storvsc.c
@@ -48,7 +48,9 @@ struct storvsc_device {
/* 0 indicates the device is being destroyed */
atomic_t RefCount;
-
+
+ int reset;
+ spinlock_t lock;
atomic_t NumOutstandingRequests;
/*
@@ -93,6 +95,9 @@ static inline struct storvsc_device *AllocStorDevice(struct
hv_device *Device)
atomic_cmpxchg(&storDevice->RefCount, 0, 2);
storDevice->Device = Device;
+ storDevice->reset = 0;
+ spin_lock_init(&storDevice->lock);
+
Device->Extension = storDevice;
return storDevice;
@@ -101,6 +106,7 @@ static inline struct storvsc_device *AllocStorDevice(struct
hv_device *Device) static inline void FreeStorDevice(struct storvsc_device
*Device) {
/* ASSERT(atomic_read(&Device->RefCount) == 0); */
+ /*kfree(Device->lock);*/
kfree(Device);
}
@@ -108,13 +114,24 @@ static inline void FreeStorDevice(struct storvsc_device
*Device) static inline struct storvsc_device *GetStorDevice(struct hv_device
*Device) {
struct storvsc_device *storDevice;
+ unsigned long flags;
storDevice = (struct storvsc_device *)Device->Extension;
+
+ spin_lock_irqsave(&storDevice->lock, flags);
+
+ if (storDevice->reset == 1) {
+ spin_unlock_irqrestore(&storDevice->lock, flags);
+ return NULL;
+ }
+
if (storDevice && atomic_read(&storDevice->RefCount) > 1)
atomic_inc(&storDevice->RefCount);
else
storDevice = NULL;
+ spin_unlock_irqrestore(&storDevice->lock, flags);
+
return storDevice;
}
@@ -122,13 +139,19 @@ static inline struct storvsc_device *GetStorDevice(struct
hv_device *Device) static inline struct storvsc_device
*MustGetStorDevice(struct hv_device *Device) {
struct storvsc_device *storDevice;
+ unsigned long flags;
storDevice = (struct storvsc_device *)Device->Extension;
+
+ spin_lock_irqsave(&storDevice->lock, flags);
+
if (storDevice && atomic_read(&storDevice->RefCount))
atomic_inc(&storDevice->RefCount);
else
storDevice = NULL;
+ spin_unlock_irqrestore(&storDevice->lock, flags);
+
return storDevice;
}
@@ -614,6 +637,7 @@ int StorVscOnHostReset(struct hv_device *Device)
struct storvsc_device *storDevice;
struct storvsc_request_extension *request;
struct vstor_packet *vstorPacket;
+ unsigned long flags;
int ret;
DPRINT_INFO(STORVSC, "resetting host adapter..."); @@ -625,6 +649,16
@@ int StorVscOnHostReset(struct hv_device *Device)
return -1;
}
+ spin_lock_irqsave(&storDevice->lock, flags);
+ storDevice->reset = 1;
+ spin_unlock_irqrestore(&storDevice->lock, flags);
+
+ /*
+ * Wait for traffic in transit to complete
+ */
+ while (atomic_read(&storDevice->NumOutstandingRequests))
+ udelay(1000);
+
request = &storDevice->ResetRequest;
vstorPacket = &request->VStorPacket;
--
1.6.0.2
On Tue, Aug 03, 2010 at 05:31:56PM +0000, Hank Janssen wrote:> From: Hank Janssen <hjanssen at microsoft.com> > > If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. > This before sometimes caused hangs.Is this a problem for all older versions as well? If so, should it be backported to the -stable kernel releases?> > Signed-off-by:Hank Janssen <hjanssen at microsoft.com> > Signed-off-by:Haiyang Zhang <haiyangz at microsoft.com> > > > --- > drivers/staging/hv/storvsc.c | 36 +++++++++++++++++++++++++++++++++++- > 1 files changed, 35 insertions(+), 1 deletions(-) > > diff --git a/drivers/staging/hv/storvsc.c b/drivers/staging/hv/storvsc.c index 6bd2ff1..5f222cf 100644 > --- a/drivers/staging/hv/storvsc.c > +++ b/drivers/staging/hv/storvsc.c > @@ -48,7 +48,9 @@ struct storvsc_device { > > /* 0 indicates the device is being destroyed */ > atomic_t RefCount; > - > +Trailing whitespace :(> + int reset;Can't this be a bool?> + spinlock_t lock; > atomic_t NumOutstandingRequests; > > /* > @@ -93,6 +95,9 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device) > atomic_cmpxchg(&storDevice->RefCount, 0, 2); > > storDevice->Device = Device; > + storDevice->reset = 0; > + spin_lock_init(&storDevice->lock); > + > Device->Extension = storDevice; > > return storDevice; > @@ -101,6 +106,7 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device) static inline void FreeStorDevice(struct storvsc_device *Device) { > /* ASSERT(atomic_read(&Device->RefCount) == 0); */ > + /*kfree(Device->lock);*/Why add a commented out line? Especially one that is incorrect? :)> kfree(Device); > } > > @@ -108,13 +114,24 @@ static inline void FreeStorDevice(struct storvsc_device *Device) static inline struct storvsc_device *GetStorDevice(struct hv_device *Device) { > struct storvsc_device *storDevice; > + unsigned long flags; > > storDevice = (struct storvsc_device *)Device->Extension; > + > + spin_lock_irqsave(&storDevice->lock, flags); > + > + if (storDevice->reset == 1) { > + spin_unlock_irqrestore(&storDevice->lock, flags); > + return NULL;Don't return here, jump to the end of the function and return there. That way you only have one lock/unlock pair and it's much easier to maintain and audit over time that you got everything correct.> + } > + > if (storDevice && atomic_read(&storDevice->RefCount) > 1) > atomic_inc(&storDevice->RefCount); > else > storDevice = NULL; > > + spin_unlock_irqrestore(&storDevice->lock, flags); > + > return storDevice; > } > > @@ -122,13 +139,19 @@ static inline struct storvsc_device *GetStorDevice(struct hv_device *Device) static inline struct storvsc_device *MustGetStorDevice(struct hv_device *Device) { > struct storvsc_device *storDevice; > + unsigned long flags; > > storDevice = (struct storvsc_device *)Device->Extension; > + > + spin_lock_irqsave(&storDevice->lock, flags); > + > if (storDevice && atomic_read(&storDevice->RefCount)) > atomic_inc(&storDevice->RefCount); > else > storDevice = NULL; > > + spin_unlock_irqrestore(&storDevice->lock, flags); > + > return storDevice; > } > > @@ -614,6 +637,7 @@ int StorVscOnHostReset(struct hv_device *Device) > struct storvsc_device *storDevice; > struct storvsc_request_extension *request; > struct vstor_packet *vstorPacket; > + unsigned long flags; > int ret; > > DPRINT_INFO(STORVSC, "resetting host adapter..."); @@ -625,6 +649,16 @@ int StorVscOnHostReset(struct hv_device *Device) > return -1; > } > > + spin_lock_irqsave(&storDevice->lock, flags); > + storDevice->reset = 1; > + spin_unlock_irqrestore(&storDevice->lock, flags); > + > + /* > + * Wait for traffic in transit to complete > + */ > + while (atomic_read(&storDevice->NumOutstandingRequests)) > + udelay(1000);What's ever going to get us out of this loop? You need a fall-back in case this read never succeeds. And why an atomic value if you have a lock protecting it? That's major overkill and is probably not needed. thanks, greg k-h
And then Greg spoke;>>On Tue, Aug 03, 2010 at 05:31:56PM +0000, Hank Janssen wrote: >> From: Hank Janssen <hjanssen at microsoft.com> >> >> If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. >> This before sometimes caused hangs. > >Is this a problem for all older versions as well? If so, should it be backported to the -stable kernel releases?Yes, this should be backported to all stable kernel releases that have the Hyper-V drivers In them. This fixes a VM freeze. All patches I submitted in the batch of 6 (Except for the TODO patch) fix bugs. This one Fixes the worst one. Thanks, Hank.
Reasonably Related Threads
- [PATCH 6/6] staging: hv: Gracefully handle SCSI resets
- [PATCH 1/8] staging: hv: Convert camel case struct fields in vstorage.h to lowercase
- [PATCH 1/8] staging: hv: Convert camel case struct fields in vstorage.h to lowercase
- [PATCH 2/6] Staging: hv: Rename vm_device to hyperv_device
- [PATCH 2/6] Staging: hv: Rename vm_device to hyperv_device