Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 0/3] kthread: pass in user and check RLIMIT_NPROC
The vhost driver will create a kthread when userspace does a VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread. We can then end up violating the userspace process's RLIMIT_NPROC. This patchset allows drivers to pass in the user to charge/check. The patches were made over Linus's current tree.
Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 1/3] kthread: allow caller to pass in user_struct
Currently, the kthreadd's user_struct has its processes checked against the RLIMIT_NPROC limit. In cases like for vhost where the driver is making a thread for userspace, we want the userspace process to have its processes count checked and incremented. This patch allows the kthread code to take a user_struct and pass it to copy_process. The next patches will then convert the fork/cred code. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- include/linux/kthread.h | 5 ++++ include/linux/sched/task.h | 2 ++ kernel/kthread.c | 58 ++++++++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2484ed97e72f..3c64bd8bf34c 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -28,6 +28,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg) +struct task_struct *kthread_create_for_user(int (*threadfn)(void *data), + void *data, + struct user_struct *user, + const char namefmt[], ...); + struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), void *data, unsigned int cpu, diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ef02be869cf2..357e95679e33 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -34,6 +34,8 @@ struct kernel_clone_args { int io_thread; struct cgroup *cgrp; struct css_set *cset; + /* User to check RLIMIT_NPROC against */ + struct user_struct *user; }; /* diff --git a/kernel/kthread.c b/kernel/kthread.c index fe3f2a40d61e..9e7e4d04664f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -41,6 +41,7 @@ struct kthread_create_info int (*threadfn)(void *data); void *data; int node; + struct user_struct *user; /* Result passed back to kthread_create() from kthreadd. */ struct task_struct *result; @@ -327,13 +328,21 @@ int tsk_fork_get_node(struct task_struct *tsk) static void create_kthread(struct kthread_create_info *create) { + /* We want our own signal handler (we take no signals by default). */ + struct kernel_clone_args clone_args = { + .flags = CLONE_FS | CLONE_FILES | CLONE_VM | + CLONE_UNTRACED, + .exit_signal = SIGCHLD, + .stack = (unsigned long)kthread, + .stack_size = (unsigned long)create, + .user = create->user, + }; int pid; #ifdef CONFIG_NUMA current->pref_node_fork = create->node; #endif - /* We want our own signal handler (we take no signals by default). */ - pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); + pid = kernel_clone(&clone_args); if (pid < 0) { /* If user was SIGKILLed, I release the structure. */ struct completion *done = xchg(&create->done, NULL); @@ -347,11 +356,11 @@ static void create_kthread(struct kthread_create_info *create) } } -static __printf(4, 0) +static __printf(5, 0) struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), - void *data, int node, - const char namefmt[], - va_list args) + void *data, int node, + struct user_struct *user, + const char namefmt[], va_list args) { DECLARE_COMPLETION_ONSTACK(done); struct task_struct *task; @@ -364,6 +373,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), create->data = data; create->node = node; create->done = &done; + create->user = user; spin_lock(&kthread_create_lock); list_add_tail(&create->list, &kthread_create_list); @@ -444,13 +454,43 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), va_list args; va_start(args, namefmt); - task = __kthread_create_on_node(threadfn, data, node, namefmt, args); + task = __kthread_create_on_node(threadfn, data, node, NULL, namefmt, + args); va_end(args); return task; } EXPORT_SYMBOL(kthread_create_on_node); +/** + * kthread_create_for_user - create a kthread and check @user's RLIMIT_NPROC + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @user: user_struct that will have its RLIMIT_NPROC checked + * @namefmt: printf-style name for the thread. + * + * This will create a kthread on the current node, leaving it in the stopped + * state. This is just a helper for kthread_create_on_node() that will check + * @user's process count against its RLIMIT_NPROC. See the + * kthread_create_on_node() documentation for more details. + */ +struct task_struct *kthread_create_for_user(int (*threadfn)(void *data), + void *data, + struct user_struct *user, + const char namefmt[], ...) +{ + struct task_struct *task; + va_list args; + + va_start(args, namefmt); + task = __kthread_create_on_node(threadfn, data, NUMA_NO_NODE, user, + namefmt, args); + va_end(args); + + return task; +} +EXPORT_SYMBOL(kthread_create_for_user); + static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state) { unsigned long flags; @@ -785,8 +825,8 @@ __kthread_create_worker(int cpu, unsigned int flags, if (cpu >= 0) node = cpu_to_node(cpu); - task = __kthread_create_on_node(kthread_worker_fn, worker, - node, namefmt, args); + task = __kthread_create_on_node(kthread_worker_fn, worker, node, NULL, + namefmt, args); if (IS_ERR(task)) goto fail_task; -- 2.25.1
Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 2/3] kernel/fork, cred.c: allow copy_process to take user
This allows kthread to pass copy_process the user we want to check for the RLIMIT_NPROC limit for and also charge for the new process. It will be used by vhost where userspace has that driver create threads but the kthreadd thread is checked/charged. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- include/linux/cred.h | 3 ++- kernel/cred.c | 7 ++++--- kernel/fork.c | 12 +++++++----- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index 14971322e1a0..9a2c1398cdd4 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -153,7 +153,8 @@ struct cred { extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); -extern int copy_creds(struct task_struct *, unsigned long); +extern int copy_creds(struct task_struct *, unsigned long, + struct user_struct *); extern const struct cred *get_task_cred(struct task_struct *); extern struct cred *cred_alloc_blank(void); extern struct cred *prepare_creds(void); diff --git a/kernel/cred.c b/kernel/cred.c index e1d274cd741b..e006aafa8f05 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -330,7 +330,8 @@ struct cred *prepare_exec_creds(void) * The new process gets the current process's subjective credentials as its * objective and subjective credentials */ -int copy_creds(struct task_struct *p, unsigned long clone_flags) +int copy_creds(struct task_struct *p, unsigned long clone_flags, + struct user_struct *user) { struct cred *new; int ret; @@ -351,7 +352,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) kdebug("share_creds(%p{%d,%d})", p->cred, atomic_read(&p->cred->usage), read_cred_subscribers(p->cred)); - atomic_inc(&p->cred->user->processes); + atomic_inc(&user->processes); return 0; } @@ -384,7 +385,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) } #endif - atomic_inc(&new->user->processes); + atomic_inc(&user->processes); p->cred = p->real_cred = get_cred(new); alter_cred_subscribers(new, 2); validate_creds(new); diff --git a/kernel/fork.c b/kernel/fork.c index dc06afd725cb..6389aea6d3eb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1860,6 +1860,7 @@ static __latent_entropy struct task_struct *copy_process( struct file *pidfile = NULL; u64 clone_flags = args->flags; struct nsproxy *nsp = current->nsproxy; + struct user_struct *user = args->user; /* * Don't allow sharing the root directory with processes in a different @@ -1976,16 +1977,17 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + if (!user) + user = p->real_cred->user; retval = -EAGAIN; - if (atomic_read(&p->real_cred->user->processes) >- task_rlimit(p, RLIMIT_NPROC)) { - if (p->real_cred->user != INIT_USER && + if (atomic_read(&user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { + if (user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; - retval = copy_creds(p, clone_flags); + retval = copy_creds(p, clone_flags, user); if (retval < 0) goto bad_fork_free; @@ -2385,7 +2387,7 @@ static __latent_entropy struct task_struct *copy_process( #endif delayacct_tsk_free(p); bad_fork_cleanup_count: - atomic_dec(&p->cred->user->processes); + atomic_dec(&user->processes); exit_creds(p); bad_fork_free: p->state = TASK_DEAD; -- 2.25.1
Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 3/3] vhost: pass kthread user to check RLIMIT_NPROC
This has vhost pass in the user to the kthread API, so the process doing the ioctl has its RLIMIT_NPROC checked and its processes count incremented. Signed-off-by: Mike Christie <michael.christie at oracle.com> --- drivers/vhost/vhost.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5ccb0705beae..141cca6fd50a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -595,8 +595,9 @@ long vhost_dev_set_owner(struct vhost_dev *dev) dev->kcov_handle = kcov_common_handle(); if (dev->use_worker) { - worker = kthread_create(vhost_worker, dev, - "vhost-%d", current->pid); + worker = kthread_create_for_user(vhost_worker, dev, + current->real_cred->user, + "vhost-%d", current->pid); if (IS_ERR(worker)) { err = PTR_ERR(worker); goto err_worker; -- 2.25.1
Michael S. Tsirkin
2021-Jun-24 07:34 UTC
[PATCH 0/3] kthread: pass in user and check RLIMIT_NPROC
On Wed, Jun 23, 2021 at 10:08:01PM -0500, Mike Christie wrote:> The vhost driver will create a kthread when userspace does a > VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread. > We can then end up violating the userspace process's RLIMIT_NPROC. This > patchset allows drivers to pass in the user to charge/check. > > The patches were made over Linus's current tree. >Makes sense I guess. Acked-by: Michael S. Tsirkin <mst at redhat.com>
Stefan Hajnoczi
2021-Jun-24 09:40 UTC
[PATCH 0/3] kthread: pass in user and check RLIMIT_NPROC
On Wed, Jun 23, 2021 at 10:08:01PM -0500, Mike Christie wrote:> The vhost driver will create a kthread when userspace does a > VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread. > We can then end up violating the userspace process's RLIMIT_NPROC. This > patchset allows drivers to pass in the user to charge/check. > > The patches were made over Linus's current tree.Makes sense from a vhost perspective and for future users, but I'm not familiar with the kthread internals: Acked-by: Stefan Hajnoczi <stefanha at redhat.com> -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 488 bytes Desc: not available URL: <http://lists.linuxfoundation.org/pipermail/virtualization/attachments/20210624/e755c3ba/attachment.sig>