From: Hank Janssen <hjanssen at microsoft.com> If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. This before sometimes caused hangs. Signed-off-by:Hank Janssen <hjanssen at microsoft.com> Signed-off-by:Haiyang Zhang <haiyangz at microsoft.com> --- drivers/staging/hv/storvsc.c | 36 +++++++++++++++++++++++++++++++++++- 1 files changed, 35 insertions(+), 1 deletions(-) diff --git a/drivers/staging/hv/storvsc.c b/drivers/staging/hv/storvsc.c index 6bd2ff1..5f222cf 100644 --- a/drivers/staging/hv/storvsc.c +++ b/drivers/staging/hv/storvsc.c @@ -48,7 +48,9 @@ struct storvsc_device { /* 0 indicates the device is being destroyed */ atomic_t RefCount; - + + int reset; + spinlock_t lock; atomic_t NumOutstandingRequests; /* @@ -93,6 +95,9 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device) atomic_cmpxchg(&storDevice->RefCount, 0, 2); storDevice->Device = Device; + storDevice->reset = 0; + spin_lock_init(&storDevice->lock); + Device->Extension = storDevice; return storDevice; @@ -101,6 +106,7 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device) static inline void FreeStorDevice(struct storvsc_device *Device) { /* ASSERT(atomic_read(&Device->RefCount) == 0); */ + /*kfree(Device->lock);*/ kfree(Device); } @@ -108,13 +114,24 @@ static inline void FreeStorDevice(struct storvsc_device *Device) static inline struct storvsc_device *GetStorDevice(struct hv_device *Device) { struct storvsc_device *storDevice; + unsigned long flags; storDevice = (struct storvsc_device *)Device->Extension; + + spin_lock_irqsave(&storDevice->lock, flags); + + if (storDevice->reset == 1) { + spin_unlock_irqrestore(&storDevice->lock, flags); + return NULL; + } + if (storDevice && atomic_read(&storDevice->RefCount) > 1) atomic_inc(&storDevice->RefCount); else storDevice = NULL; + spin_unlock_irqrestore(&storDevice->lock, flags); + return storDevice; } @@ -122,13 +139,19 @@ static inline struct storvsc_device *GetStorDevice(struct hv_device *Device) static inline struct storvsc_device *MustGetStorDevice(struct hv_device *Device) { struct storvsc_device *storDevice; + unsigned long flags; storDevice = (struct storvsc_device *)Device->Extension; + + spin_lock_irqsave(&storDevice->lock, flags); + if (storDevice && atomic_read(&storDevice->RefCount)) atomic_inc(&storDevice->RefCount); else storDevice = NULL; + spin_unlock_irqrestore(&storDevice->lock, flags); + return storDevice; } @@ -614,6 +637,7 @@ int StorVscOnHostReset(struct hv_device *Device) struct storvsc_device *storDevice; struct storvsc_request_extension *request; struct vstor_packet *vstorPacket; + unsigned long flags; int ret; DPRINT_INFO(STORVSC, "resetting host adapter..."); @@ -625,6 +649,16 @@ int StorVscOnHostReset(struct hv_device *Device) return -1; } + spin_lock_irqsave(&storDevice->lock, flags); + storDevice->reset = 1; + spin_unlock_irqrestore(&storDevice->lock, flags); + + /* + * Wait for traffic in transit to complete + */ + while (atomic_read(&storDevice->NumOutstandingRequests)) + udelay(1000); + request = &storDevice->ResetRequest; vstorPacket = &request->VStorPacket; -- 1.6.0.2
On Tue, Aug 03, 2010 at 05:31:56PM +0000, Hank Janssen wrote:> From: Hank Janssen <hjanssen at microsoft.com> > > If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. > This before sometimes caused hangs.Is this a problem for all older versions as well? If so, should it be backported to the -stable kernel releases?> > Signed-off-by:Hank Janssen <hjanssen at microsoft.com> > Signed-off-by:Haiyang Zhang <haiyangz at microsoft.com> > > > --- > drivers/staging/hv/storvsc.c | 36 +++++++++++++++++++++++++++++++++++- > 1 files changed, 35 insertions(+), 1 deletions(-) > > diff --git a/drivers/staging/hv/storvsc.c b/drivers/staging/hv/storvsc.c index 6bd2ff1..5f222cf 100644 > --- a/drivers/staging/hv/storvsc.c > +++ b/drivers/staging/hv/storvsc.c > @@ -48,7 +48,9 @@ struct storvsc_device { > > /* 0 indicates the device is being destroyed */ > atomic_t RefCount; > - > +Trailing whitespace :(> + int reset;Can't this be a bool?> + spinlock_t lock; > atomic_t NumOutstandingRequests; > > /* > @@ -93,6 +95,9 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device) > atomic_cmpxchg(&storDevice->RefCount, 0, 2); > > storDevice->Device = Device; > + storDevice->reset = 0; > + spin_lock_init(&storDevice->lock); > + > Device->Extension = storDevice; > > return storDevice; > @@ -101,6 +106,7 @@ static inline struct storvsc_device *AllocStorDevice(struct hv_device *Device) static inline void FreeStorDevice(struct storvsc_device *Device) { > /* ASSERT(atomic_read(&Device->RefCount) == 0); */ > + /*kfree(Device->lock);*/Why add a commented out line? Especially one that is incorrect? :)> kfree(Device); > } > > @@ -108,13 +114,24 @@ static inline void FreeStorDevice(struct storvsc_device *Device) static inline struct storvsc_device *GetStorDevice(struct hv_device *Device) { > struct storvsc_device *storDevice; > + unsigned long flags; > > storDevice = (struct storvsc_device *)Device->Extension; > + > + spin_lock_irqsave(&storDevice->lock, flags); > + > + if (storDevice->reset == 1) { > + spin_unlock_irqrestore(&storDevice->lock, flags); > + return NULL;Don't return here, jump to the end of the function and return there. That way you only have one lock/unlock pair and it's much easier to maintain and audit over time that you got everything correct.> + } > + > if (storDevice && atomic_read(&storDevice->RefCount) > 1) > atomic_inc(&storDevice->RefCount); > else > storDevice = NULL; > > + spin_unlock_irqrestore(&storDevice->lock, flags); > + > return storDevice; > } > > @@ -122,13 +139,19 @@ static inline struct storvsc_device *GetStorDevice(struct hv_device *Device) static inline struct storvsc_device *MustGetStorDevice(struct hv_device *Device) { > struct storvsc_device *storDevice; > + unsigned long flags; > > storDevice = (struct storvsc_device *)Device->Extension; > + > + spin_lock_irqsave(&storDevice->lock, flags); > + > if (storDevice && atomic_read(&storDevice->RefCount)) > atomic_inc(&storDevice->RefCount); > else > storDevice = NULL; > > + spin_unlock_irqrestore(&storDevice->lock, flags); > + > return storDevice; > } > > @@ -614,6 +637,7 @@ int StorVscOnHostReset(struct hv_device *Device) > struct storvsc_device *storDevice; > struct storvsc_request_extension *request; > struct vstor_packet *vstorPacket; > + unsigned long flags; > int ret; > > DPRINT_INFO(STORVSC, "resetting host adapter..."); @@ -625,6 +649,16 @@ int StorVscOnHostReset(struct hv_device *Device) > return -1; > } > > + spin_lock_irqsave(&storDevice->lock, flags); > + storDevice->reset = 1; > + spin_unlock_irqrestore(&storDevice->lock, flags); > + > + /* > + * Wait for traffic in transit to complete > + */ > + while (atomic_read(&storDevice->NumOutstandingRequests)) > + udelay(1000);What's ever going to get us out of this loop? You need a fall-back in case this read never succeeds. And why an atomic value if you have a lock protecting it? That's major overkill and is probably not needed. thanks, greg k-h
And then Greg spoke;>>On Tue, Aug 03, 2010 at 05:31:56PM +0000, Hank Janssen wrote: >> From: Hank Janssen <hjanssen at microsoft.com> >> >> If we get a SCSI host bus reset we now gracefully handle it, and we take the device offline. >> This before sometimes caused hangs. > >Is this a problem for all older versions as well? If so, should it be backported to the -stable kernel releases?Yes, this should be backported to all stable kernel releases that have the Hyper-V drivers In them. This fixes a VM freeze. All patches I submitted in the batch of 6 (Except for the TODO patch) fix bugs. This one Fixes the worst one. Thanks, Hank.
Reasonably Related Threads
- [PATCH 6/6] staging: hv: Gracefully handle SCSI resets
- [PATCH 1/8] staging: hv: Convert camel case struct fields in vstorage.h to lowercase
- [PATCH 1/8] staging: hv: Convert camel case struct fields in vstorage.h to lowercase
- [PATCH 2/6] Staging: hv: Rename vm_device to hyperv_device
- [PATCH 2/6] Staging: hv: Rename vm_device to hyperv_device