Michael S. Tsirkin
2022-Aug-09 20:16 UTC
[PATCH v2 4/4] hwrng: virtio - always add a pending request
On Wed, Aug 03, 2022 at 02:12:25PM +0100, Vladimir Murzin wrote:> On 8/3/22 13:55, Michael S. Tsirkin wrote: > > On Wed, Aug 03, 2022 at 01:25:15PM +0100, Vladimir Murzin wrote: > >> On 8/3/22 12:39, Michael S. Tsirkin wrote: > >>> On Wed, Aug 03, 2022 at 09:57:35AM +0100, Vladimir Murzin wrote: > >>>> On 8/2/22 13:49, Vladimir Murzin wrote: > >>>>> Hi Laurent, > >>>>> > >>>>> On 10/28/21 11:11, Laurent Vivier wrote: > >>>>>> If we ensure we have already some data available by enqueuing > >>>>>> again the buffer once data are exhausted, we can return what we > >>>>>> have without waiting for the device answer. > >>>>>> > >>>>>> Signed-off-by: Laurent Vivier <lvivier at redhat.com> > >>>>>> --- > >>>>>> drivers/char/hw_random/virtio-rng.c | 26 ++++++++++++-------------- > >>>>>> 1 file changed, 12 insertions(+), 14 deletions(-) > >>>>>> > >>>>>> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c > >>>>>> index 8ba97cf4ca8f..0a7dde135db1 100644 > >>>>>> --- a/drivers/char/hw_random/virtio-rng.c > >>>>>> +++ b/drivers/char/hw_random/virtio-rng.c > >>>>>> @@ -20,7 +20,6 @@ struct virtrng_info { > >>>>>> struct virtqueue *vq; > >>>>>> char name[25]; > >>>>>> int index; > >>>>>> - bool busy; > >>>>>> bool hwrng_register_done; > >>>>>> bool hwrng_removed; > >>>>>> /* data transfer */ > >>>>>> @@ -44,16 +43,18 @@ static void random_recv_done(struct virtqueue *vq) > >>>>>> return; > >>>>>> > >>>>>> vi->data_idx = 0; > >>>>>> - vi->busy = false; > >>>>>> > >>>>>> complete(&vi->have_data); > >>>>>> } > >>>>>> > >>>>>> -/* The host will fill any buffer we give it with sweet, sweet randomness. */ > >>>>>> -static void register_buffer(struct virtrng_info *vi) > >>>>>> +static void request_entropy(struct virtrng_info *vi) > >>>>>> { > >>>>>> struct scatterlist sg; > >>>>>> > >>>>>> + reinit_completion(&vi->have_data); > >>>>>> + vi->data_avail = 0; > >>>>>> + vi->data_idx = 0; > >>>>>> + > >>>>>> sg_init_one(&sg, vi->data, sizeof(vi->data)); > >>>>>> > >>>>>> /* There should always be room for one buffer. */ > >>>>>> @@ -69,6 +70,8 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, > >>>>>> memcpy(buf, vi->data + vi->data_idx, size); > >>>>>> vi->data_idx += size; > >>>>>> vi->data_avail -= size; > >>>>>> + if (vi->data_avail == 0) > >>>>>> + request_entropy(vi); > >>>>>> return size; > >>>>>> } > >>>>>> > >>>>>> @@ -98,13 +101,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) > >>>>>> * so either size is 0 or data_avail is 0 > >>>>>> */ > >>>>>> while (size != 0) { > >>>>>> - /* data_avail is 0 */ > >>>>>> - if (!vi->busy) { > >>>>>> - /* no pending request, ask for more */ > >>>>>> - vi->busy = true; > >>>>>> - reinit_completion(&vi->have_data); > >>>>>> - register_buffer(vi); > >>>>>> - } > >>>>>> + /* data_avail is 0 but a request is pending */ > >>>>>> ret = wait_for_completion_killable(&vi->have_data); > >>>>>> if (ret < 0) > >>>>>> return ret; > >>>>>> @@ -126,8 +123,7 @@ static void virtio_cleanup(struct hwrng *rng) > >>>>>> { > >>>>>> struct virtrng_info *vi = (struct virtrng_info *)rng->priv; > >>>>>> > >>>>>> - if (vi->busy) > >>>>>> - complete(&vi->have_data); > >>>>>> + complete(&vi->have_data); > >>>>>> } > >>>>>> > >>>>>> static int probe_common(struct virtio_device *vdev) > >>>>>> @@ -163,6 +159,9 @@ static int probe_common(struct virtio_device *vdev) > >>>>>> goto err_find; > >>>>>> } > >>>>>> > >>>>>> + /* we always have a pending entropy request */ > >>>>>> + request_entropy(vi); > >>>>>> + > >>>>>> return 0; > >>>>>> > >>>>>> err_find: > >>>>>> @@ -181,7 +180,6 @@ static void remove_common(struct virtio_device *vdev) > >>>>>> vi->data_idx = 0; > >>>>>> complete(&vi->have_data); > >>>>>> vdev->config->reset(vdev); > >>>>>> - vi->busy = false; > >>>>>> if (vi->hwrng_register_done) > >>>>>> hwrng_unregister(&vi->hwrng); > >>>>>> vdev->config->del_vqs(vdev); > >>>>> > >>>>> We observed that after this commit virtio-rng implementation in FVP doesn't > >>>>> work > >>>>> > >>>>> INFO: bp.virtio_rng: Selected Random Generator Device: XORSHIFT DEVICE > >>>>> INFO: bp.virtio_rng: Using seed value: 0x5674bba8 > >>>>> Error: FVP_Base_AEMvA: bp.virtio_rng: <vq0-requestq> Found invalid descriptor index > >>>>> In file: (unknown):0 > >>>>> In process: FVP_Base_AEMvA.thread_p_12 @ 935500020 ns > >>>>> Info: FVP_Base_AEMvA: bp.virtio_rng: Could not extract buffer > >>>>> > >>>>> while basic baremetal test works as expected > >>>>> > >>>>> INFO: bp.virtio_rng: Selected Random Generator Device: XORSHIFT DEVICE > >>>>> INFO: bp.virtio_rng: Using seed value: 0x541c142e > >>>>> Info: FVP_Base_AEMv8A: bp.virtio_rng: Generated Number: 0x4b098991ceb377e6 > >>>>> Info: FVP_Base_AEMv8A: bp.virtio_rng: Generated Number: 0xbdcbe3f765ba62f7 > >>>>> > >>>>> We are trying to get an idea what is missing and where, yet none of us familiar > >>>>> with the driver :( > >>>>> > >>>>> I'm looping Kevin who originally reported that and Mauricio who is looking form > >>>>> the FVP side. > >>>> > >>>> With the following diff FVP works agin > >>>> > >>>> diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c > >>>> index a6f3a8a2ac..042503ad6c 100644 > >>>> --- a/drivers/char/hw_random/virtio-rng.c > >>>> +++ b/drivers/char/hw_random/virtio-rng.c > >>>> @@ -54,6 +54,7 @@ static void request_entropy(struct virtrng_info *vi) > >>>> reinit_completion(&vi->have_data); > >>>> vi->data_avail = 0; > >>>> vi->data_idx = 0; > >>>> + smp_mb(); > >>>> > >>>> sg_init_one(&sg, vi->data, sizeof(vi->data)); > >>>> > >>>> > >>>> What do you reckon? > >>>> > >>>> Cheers > >>>> Vladimir > >>> > >>> Thanks for debugging this! > >>> > >>> OK, interesting. > >>> > >>> data_idx and data_avail are accessed from virtio_read. > >>> > >>> Which as far as I can tell is invoked just with reading_mutex. > >>> > >>> > >>> But, request_entropy is called from probe when device is registered > >>> this time without locks > >>> so it can trigger while another thread is calling virtio_read. > >>> > >>> Second request_entropy is called from a callback random_recv_done > >>> also without locks. > >>> > >>> So it's great that smp_mb helped here but I suspect in fact we > >>> need locking. Laurent? > >>> > >> > >> I'm sorry for the noise, but it looks like I'm seeing issue for some different reasons. > >> I manage to reproduce issue even with smb_mb() in place. The reason I though it helped > >> is because I changed both environment and added smb_mb(). > >> > >> Anyway, thank you for your time! > >> > >> Cheers > >> Vladimir > > > > Well we at least have a race condition found by code review here. Here's > > a quick hack attempting to fix it. I don't like it much since > > it adds buffers with GFP_ATOMIC and kicks under a spinlock, but > > for now we can at least test it. I did a quick build but that's > > all, a bit rushed now sorry. Would appreciate knowing whether > > this addresses the issue for you. > > > > > > diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c > > index a6f3a8a2aca6..36121c8d0315 100644 > > --- a/drivers/char/hw_random/virtio-rng.c > > +++ b/drivers/char/hw_random/virtio-rng.c > > @@ -23,6 +23,7 @@ struct virtrng_info { > > bool hwrng_register_done; > > bool hwrng_removed; > > /* data transfer */ > > + spinlock_t lock; > > struct completion have_data; > > unsigned int data_avail; > > unsigned int data_idx; > > @@ -37,6 +38,9 @@ struct virtrng_info { > > static void random_recv_done(struct virtqueue *vq) > > { > > struct virtrng_info *vi = vq->vdev->priv; > > + unsigned long flags; > > + > > + spin_lock_irqsave(&vi->lock, flags); > > > > /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ > > if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) > > @@ -45,20 +49,20 @@ static void random_recv_done(struct virtqueue *vq) > > vi->data_idx = 0; > > > > complete(&vi->have_data); > > + spin_unlock_irqrestore(&vi->lock, flags); > > } > > > > static void request_entropy(struct virtrng_info *vi) > > { > > struct scatterlist sg; > > > > - reinit_completion(&vi->have_data); > > - vi->data_avail = 0; > > + BUG_ON(vi->data_avail != 0); > > vi->data_idx = 0; > > > > sg_init_one(&sg, vi->data, sizeof(vi->data)); > > > > /* There should always be room for one buffer. */ > > - virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); > > + virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_ATOMIC); > > > > virtqueue_kick(vi->vq); > > } > > @@ -70,8 +74,10 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, > > memcpy(buf, vi->data + vi->data_idx, size); > > vi->data_idx += size; > > vi->data_avail -= size; > > - if (vi->data_avail == 0) > > + if (vi->data_avail == 0) { > > + reinit_completion(&vi->have_data); > > request_entropy(vi); > > + } > > return size; > > } > > > > @@ -81,18 +87,21 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) > > struct virtrng_info *vi = (struct virtrng_info *)rng->priv; > > unsigned int chunk; > > size_t read; > > + unsigned long flags; > > > > if (vi->hwrng_removed) > > return -ENODEV; > > > > read = 0; > > > > + spin_lock_irqsave(&vi->lock, flags); > > /* copy available data */ > > if (vi->data_avail) { > > chunk = copy_data(vi, buf, size); > > size -= chunk; > > read += chunk; > > } > > + spin_unlock_irqrestore(&vi->lock, flags); > > > > if (!wait) > > return read; > > @@ -108,12 +117,14 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) > > /* if vi->data_avail is 0, we have been interrupted > > * by a cleanup, but buffer stays in the queue > > */ > > + spin_lock_irqsave(&vi->lock, flags); > > if (vi->data_avail == 0) > > return read; > > > > chunk = copy_data(vi, buf + read, size); > > size -= chunk; > > read += chunk; > > + spin_unlock_irqrestore(&vi->lock, flags); > > } > > > > return read; > > @@ -122,19 +133,25 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) > > static void virtio_cleanup(struct hwrng *rng) > > { > > struct virtrng_info *vi = (struct virtrng_info *)rng->priv; > > + unsigned long flags; > > > > + spin_lock_irqsave(&vi->lock, flags); > > complete(&vi->have_data); > > + spin_unlock_irqrestore(&vi->lock, flags); > > } > > > > static int probe_common(struct virtio_device *vdev) > > { > > int err, index; > > struct virtrng_info *vi = NULL; > > + unsigned long flags; > > > > vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL); > > if (!vi) > > return -ENOMEM; > > > > + spin_lock_init(&vi->lock); > > + > > vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL); > > if (index < 0) { > > err = index; > > @@ -162,7 +179,10 @@ static int probe_common(struct virtio_device *vdev) > > virtio_device_ready(vdev); > > > > /* we always have a pending entropy request */ > > - request_entropy(vi); > > + spin_lock_irqsave(&vi->lock, flags); > > + if (vi->data_avail == 0) > > + request_entropy(vi); > > + spin_unlock_irqrestore(&vi->lock, flags); > > > > return 0; > > > > > > Thanks a lot! I gave it a go and it did not help. I think I need to find out how exactly > my environment affected... it not necessary need to be kernel related.Okay ... I'll wait to hear your report then. You are saying maybe there's no bug in kernel, something else changed in your environment? -- MST