Rusty Russell
2014-Sep-18 02:48 UTC
[PATCH 1/5] hw_random: place mutex around read functions and buffers.
There's currently a big lock around everything, and it means that we can't query sysfs (eg /sys/devices/virtual/misc/hw_random/rng_current) while the rng is reading. This is a real problem when the rng is slow, or blocked (eg. virtio_rng with qemu's default /dev/random backend) This doesn't help (it leaves the current lock untouched), just adds a lock to protect the read function and the static buffers, in preparation for transition. Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> --- drivers/char/hw_random/core.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index aa30a25c8d49..b1b6042ad85c 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -53,7 +53,10 @@ static struct hwrng *current_rng; static struct task_struct *hwrng_fill; static LIST_HEAD(rng_list); +/* Protects rng_list and current_rng */ static DEFINE_MUTEX(rng_mutex); +/* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */ +static DEFINE_MUTEX(reading_mutex); static int data_avail; static u8 *rng_buffer, *rng_fillbuf; static unsigned short current_quality; @@ -81,7 +84,9 @@ static void add_early_randomness(struct hwrng *rng) unsigned char bytes[16]; int bytes_read; + mutex_lock(&reading_mutex); bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1); + mutex_unlock(&reading_mutex); if (bytes_read > 0) add_device_randomness(bytes, bytes_read); } @@ -128,6 +133,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, int wait) { int present; + BUG_ON(!mutex_is_locked(&reading_mutex)); if (rng->read) return rng->read(rng, (void *)buffer, size, wait); @@ -160,13 +166,14 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, goto out_unlock; } + mutex_lock(&reading_mutex); if (!data_avail) { bytes_read = rng_get_data(current_rng, rng_buffer, rng_buffer_size(), !(filp->f_flags & O_NONBLOCK)); if (bytes_read < 0) { err = bytes_read; - goto out_unlock; + goto out_unlock_reading; } data_avail = bytes_read; } @@ -174,7 +181,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (!data_avail) { if (filp->f_flags & O_NONBLOCK) { err = -EAGAIN; - goto out_unlock; + goto out_unlock_reading; } } else { len = data_avail; @@ -186,7 +193,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (copy_to_user(buf + ret, rng_buffer + data_avail, len)) { err = -EFAULT; - goto out_unlock; + goto out_unlock_reading; } size -= len; @@ -194,6 +201,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } mutex_unlock(&rng_mutex); + mutex_unlock(&reading_mutex); if (need_resched()) schedule_timeout_interruptible(1); @@ -208,6 +216,9 @@ out: out_unlock: mutex_unlock(&rng_mutex); goto out; +out_unlock_reading: + mutex_unlock(&reading_mutex); + goto out_unlock; } @@ -348,13 +359,16 @@ static int hwrng_fillfn(void *unused) while (!kthread_should_stop()) { if (!current_rng) break; + mutex_lock(&reading_mutex); rc = rng_get_data(current_rng, rng_fillbuf, rng_buffer_size(), 1); + mutex_unlock(&reading_mutex); if (rc <= 0) { pr_warn("hwrng: no data available\n"); msleep_interruptible(10000); continue; } + /* Outside lock, sure, but y'know: randomness. */ add_hwgenerator_randomness((void *)rng_fillbuf, rc, rc * current_quality * 8 >> 10); } -- 1.9.1
Amos Kong
2014-Sep-18 12:22 UTC
[PATCH 2/5] hw_random: use reference counts on each struct hwrng.
On Thu, Sep 18, 2014 at 12:18:23PM +0930, Rusty Russell wrote:> current_rng holds one reference, and we bump it every time we want > to do a read from it. > > This means we only hold the rng_mutex to grab or drop a reference, > so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't > block on read of /dev/hwrng. > > Using a kref is overkill (we're always under the rng_mutex), but > a standard pattern. > > This also solves the problem that the hwrng_fillfn thread was > accessing current_rng without a lock, which could change (eg. to NULL) > underneath it.Hi Rusty,> Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> > --- > drivers/char/hw_random/core.c | 135 ++++++++++++++++++++++++++++-------------- > include/linux/hw_random.h | 2 + > 2 files changed, 94 insertions(+), 43 deletions(-)...> static int rng_dev_open(struct inode *inode, struct file *filp) > { > /* enforce read-only access to this chrdev */ > @@ -154,21 +202,22 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, > ssize_t ret = 0; > int err = 0; > int bytes_read, len; > + struct hwrng *rng; > > while (size) { > - if (mutex_lock_interruptible(&rng_mutex)) { > - err = -ERESTARTSYS; > + rng = get_current_rng(); > + if (IS_ERR(rng)) { > + err = PTR_ERR(rng); > goto out; > } > - > - if (!current_rng) { > + if (!rng) { > err = -ENODEV; > - goto out_unlock; > + goto out; > } > > mutex_lock(&reading_mutex); > if (!data_avail) { > - bytes_read = rng_get_data(current_rng, rng_buffer, > + bytes_read = rng_get_data(rng, rng_buffer, > rng_buffer_size(), > !(filp->f_flags & O_NONBLOCK)); > if (bytes_read < 0) { > @@ -200,7 +249,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, > ret += len; > } > > - mutex_unlock(&rng_mutex); > mutex_unlock(&reading_mutex); > > if (need_resched()) > @@ -210,15 +258,16 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, > err = -ERESTARTSYS;We need put_rng() in this error path. Otherwise, unhotplug will hang in the end of hwrng_unregister() | /* Just in case rng is reading right now, wait. */ | wait_event(rng_done, atomic_read(&rng->ref.refcount) == 0); Steps to reproduce the hang: guest) # dd if=/dev/hwrng of=/dev/null cancel dd process after 10 seconds guest) # dd if=/dev/hwrng of=/dev/null & hotunplug rng device from qemu monitor result: device can't be removed (still can find in QEMU monitor) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 96fa067..4e22d70 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -258,6 +258,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (signal_pending(current)) { err = -ERESTARTSYS; + put_rng(rng); goto out; }> goto out; > } > + > + put_rng(rng); > } > out: > return ret ? : err; > -out_unlock: > - mutex_unlock(&rng_mutex); > - goto out; > + > out_unlock_reading: > mutex_unlock(&reading_mutex); > - goto out_unlock; > + put_rng(rng); > + goto out; > } > > > @@ -257,8 +306,8 @@ static ssize_t hwrng_attr_current_store(struct device *dev, > err = hwrng_init(rng); > if (err) > break; > - hwrng_cleanup(current_rng); > - current_rng = rng; > + drop_current_rng(); > + set_current_rng(rng); > err = 0; > break; > } > @@ -272,17 +321,15 @@ static ssize_t hwrng_attr_current_show(struct device *dev, > struct device_attribute *attr, > char *buf) > { > - int err; > ssize_t ret; > - const char *name = "none"; > + struct hwrng *rng; > > - err = mutex_lock_interruptible(&rng_mutex); > - if (err) > - return -ERESTARTSYS; > - if (current_rng) > - name = current_rng->name; > - ret = snprintf(buf, PAGE_SIZE, "%s\n", name); > - mutex_unlock(&rng_mutex); > + rng = get_current_rng(); > + if (IS_ERR(rng)) > + return PTR_ERR(rng); > + > + ret = snprintf(buf, PAGE_SIZE, "%s\n", rng ? rng->name : "none"); > + put_rng(rng); > > return ret; > } > @@ -357,12 +404,16 @@ static int hwrng_fillfn(void *unused) > long rc; > > while (!kthread_should_stop()) { > - if (!current_rng) > + struct hwrng *rng; > + > + rng = get_current_rng(); > + if (IS_ERR(rng) || !rng) > break; > mutex_lock(&reading_mutex); > - rc = rng_get_data(current_rng, rng_fillbuf, > + rc = rng_get_data(rng, rng_fillbuf, > rng_buffer_size(), 1); > mutex_unlock(&reading_mutex); > + put_rng(rng);^^^ This put_rng() called a deadlock. I will describe in the bottom.> if (rc <= 0) { > pr_warn("hwrng: no data available\n"); > msleep_interruptible(10000); > @@ -423,14 +474,13 @@ int hwrng_register(struct hwrng *rng) > err = hwrng_init(rng); > if (err) > goto out_unlock; > - current_rng = rng; > + set_current_rng(rng); > } > err = 0; > if (!old_rng) { > err = register_miscdev(); > if (err) { > - hwrng_cleanup(rng); > - current_rng = NULL; > + drop_current_rng(); > goto out_unlock; > } > } > @@ -457,22 +507,21 @@ EXPORT_SYMBOL_GPL(hwrng_register); > > void hwrng_unregister(struct hwrng *rng) > { > - int err; > - > mutex_lock(&rng_mutex); > > list_del(&rng->list); > if (current_rng == rng) { > - hwrng_cleanup(rng); > - if (list_empty(&rng_list)) { > - current_rng = NULL; > - } else { > - current_rng = list_entry(rng_list.prev, struct hwrng, list); > - err = hwrng_init(current_rng); > - if (err) > - current_rng = NULL; > + drop_current_rng(); > + if (!list_empty(&rng_list)) { > + struct hwrng *tail; > + > + tail = list_entry(rng_list.prev, struct hwrng, list); > + > + if (hwrng_init(tail) == 0) > + set_current_rng(tail); > } > } > + > if (list_empty(&rng_list)) { > unregister_miscdev(); > if (hwrng_fill)hwrng_unregister() and put_rng() grab the lock, if hwrng_unregister() takes the lock, hwrng_fillfn() will stay at put_rng() to wait the lock. Right now, thread_stop() is insider lock protection, but we try to wake up the fillfn thread and wait for its completion. | wake_up_process(k); | wait_for_completion(&kthread->exited); The solution is moving kthread_stop() outsider of lock protection. @@ -524,11 +525,11 @@ void hwrng_unregister(struct hwrng *rng) if (list_empty(&rng_list)) { unregister_miscdev(); + mutex_unlock(&rng_mutex); if (hwrng_fill) kthread_stop(hwrng_fill); - } - - mutex_unlock(&rng_mutex); + } else + mutex_unlock(&rng_mutex); /* Just in case rng is reading right now, wait. */ wait_event(rng_done, atomic_read(&rng->ref.refcount) == 0); ===============After applied my additional two fixes, both cating hung and hotunplug issues were resolved. | test 0: | hotunplug rng device from qemu monitor | | test 1: | guest) # dd if=/dev/hwrng of=/dev/null & | hotunplug rng device from qemu monitor | | test 2: | guest) # dd if=/dev/random of=/dev/null & | hotunplug rng device from qemu monitor | | test 4: | guest) # dd if=/dev/hwrng of=/dev/null & | cat /sys/devices/virtual/misc/hw_random/rng_* | | test 5: | guest) # dd if=/dev/hwrng of=/dev/null | cancel dd process after 10 seconds | guest) # dd if=/dev/hwrng of=/dev/null & | hotunplug rng device from qemu monitor | | test 6: | use a fifo as rng backend, execute test 0 ~ 5 with no input of fifo Test are all passed :-) I know you are going or you already started your holiday, I will post a v2 with my additional patches. Thanks, Amos> diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h > index 914bb08cd738..c212e71ea886 100644 > --- a/include/linux/hw_random.h > +++ b/include/linux/hw_random.h > @@ -14,6 +14,7 @@ > > #include <linux/types.h> > #include <linux/list.h> > +#include <linux/kref.h> > > /** > * struct hwrng - Hardware Random Number Generator driver > @@ -44,6 +45,7 @@ struct hwrng { > > /* internal. */ > struct list_head list; > + struct kref ref; > }; > > /** Register a new Hardware Random Number Generator driver. */ > -- > 1.9.1
On Thu, Sep 18, 2014 at 12:18:24PM +0930, Rusty Russell wrote:> The previous patch added one potential problem: we can still be > reading from a hwrng when it's unregistered. Add a wait for zero > in the hwrng_unregister path. > > Signed-off-by: Rusty Russell <rusty at rustcorp.com.au> > --- > drivers/char/hw_random/core.c | 5 +++++ > 1 file changed, 5 insertions(+) > > diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c > index dc9092a1075d..b4a21e9521cf 100644 > --- a/drivers/char/hw_random/core.c > +++ b/drivers/char/hw_random/core.c > @@ -60,6 +60,7 @@ static DEFINE_MUTEX(rng_mutex); > static DEFINE_MUTEX(reading_mutex); > static int data_avail; > static u8 *rng_buffer, *rng_fillbuf; > +static DECLARE_WAIT_QUEUE_HEAD(rng_done); > static unsigned short current_quality; > static unsigned short default_quality; /* = 0; default to "off" */ > > @@ -98,6 +99,7 @@ static inline void cleanup_rng(struct kref *kref) > > if (rng->cleanup) > rng->cleanup(rng); > + wake_up_all(&rng_done); > } > > static void set_current_rng(struct hwrng *rng) > @@ -529,6 +531,9 @@ void hwrng_unregister(struct hwrng *rng) > } > > mutex_unlock(&rng_mutex); > + > + /* Just in case rng is reading right now, wait. */ > + wait_event(rng_done, atomic_read(&rng->ref.refcount) == 0);While it's obviously better than what we have now, I don't believe this is 100% safe as the cleanup function might still be running even after the ref count hits zero. Once we return from this function the module may be unloaded so we need to ensure that nothing is running at this point. Cheers, -- Email: Herbert Xu <herbert at gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt