Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 0/3] kthread: pass in user and check RLIMIT_NPROC
The vhost driver will create a kthread when userspace does a VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread. We can then end up violating the userspace process's RLIMIT_NPROC. This patchset allows drivers to pass in the user to charge/check. The patches were made over Linus's current tree.
Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 1/3] kthread: allow caller to pass in user_struct
Currently, the kthreadd's user_struct has its processes checked against
the RLIMIT_NPROC limit. In cases like for vhost where the driver is making
a thread for userspace, we want the userspace process to have its processes
count checked and incremented.
This patch allows the kthread code to take a user_struct and pass it to
copy_process. The next patches will then convert the fork/cred code.
Signed-off-by: Mike Christie <michael.christie at oracle.com>
---
include/linux/kthread.h | 5 ++++
include/linux/sched/task.h | 2 ++
kernel/kthread.c | 58 ++++++++++++++++++++++++++++++++------
3 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2484ed97e72f..3c64bd8bf34c 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -28,6 +28,11 @@ struct task_struct *kthread_create_on_node(int
(*threadfn)(void *data),
kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)
+struct task_struct *kthread_create_for_user(int (*threadfn)(void *data),
+ void *data,
+ struct user_struct *user,
+ const char namefmt[], ...);
+
struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
void *data,
unsigned int cpu,
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ef02be869cf2..357e95679e33 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -34,6 +34,8 @@ struct kernel_clone_args {
int io_thread;
struct cgroup *cgrp;
struct css_set *cset;
+ /* User to check RLIMIT_NPROC against */
+ struct user_struct *user;
};
/*
diff --git a/kernel/kthread.c b/kernel/kthread.c
index fe3f2a40d61e..9e7e4d04664f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -41,6 +41,7 @@ struct kthread_create_info
int (*threadfn)(void *data);
void *data;
int node;
+ struct user_struct *user;
/* Result passed back to kthread_create() from kthreadd. */
struct task_struct *result;
@@ -327,13 +328,21 @@ int tsk_fork_get_node(struct task_struct *tsk)
static void create_kthread(struct kthread_create_info *create)
{
+ /* We want our own signal handler (we take no signals by default). */
+ struct kernel_clone_args clone_args = {
+ .flags = CLONE_FS | CLONE_FILES | CLONE_VM |
+ CLONE_UNTRACED,
+ .exit_signal = SIGCHLD,
+ .stack = (unsigned long)kthread,
+ .stack_size = (unsigned long)create,
+ .user = create->user,
+ };
int pid;
#ifdef CONFIG_NUMA
current->pref_node_fork = create->node;
#endif
- /* We want our own signal handler (we take no signals by default). */
- pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+ pid = kernel_clone(&clone_args);
if (pid < 0) {
/* If user was SIGKILLed, I release the structure. */
struct completion *done = xchg(&create->done, NULL);
@@ -347,11 +356,11 @@ static void create_kthread(struct kthread_create_info
*create)
}
}
-static __printf(4, 0)
+static __printf(5, 0)
struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
- void *data, int node,
- const char namefmt[],
- va_list args)
+ void *data, int node,
+ struct user_struct *user,
+ const char namefmt[], va_list args)
{
DECLARE_COMPLETION_ONSTACK(done);
struct task_struct *task;
@@ -364,6 +373,7 @@ struct task_struct *__kthread_create_on_node(int
(*threadfn)(void *data),
create->data = data;
create->node = node;
create->done = &done;
+ create->user = user;
spin_lock(&kthread_create_lock);
list_add_tail(&create->list, &kthread_create_list);
@@ -444,13 +454,43 @@ struct task_struct *kthread_create_on_node(int
(*threadfn)(void *data),
va_list args;
va_start(args, namefmt);
- task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
+ task = __kthread_create_on_node(threadfn, data, node, NULL, namefmt,
+ args);
va_end(args);
return task;
}
EXPORT_SYMBOL(kthread_create_on_node);
+/**
+ * kthread_create_for_user - create a kthread and check @user's
RLIMIT_NPROC
+ * @threadfn: the function to run until signal_pending(current).
+ * @data: data ptr for @threadfn.
+ * @user: user_struct that will have its RLIMIT_NPROC checked
+ * @namefmt: printf-style name for the thread.
+ *
+ * This will create a kthread on the current node, leaving it in the stopped
+ * state. This is just a helper for kthread_create_on_node() that will check
+ * @user's process count against its RLIMIT_NPROC. See the
+ * kthread_create_on_node() documentation for more details.
+ */
+struct task_struct *kthread_create_for_user(int (*threadfn)(void *data),
+ void *data,
+ struct user_struct *user,
+ const char namefmt[], ...)
+{
+ struct task_struct *task;
+ va_list args;
+
+ va_start(args, namefmt);
+ task = __kthread_create_on_node(threadfn, data, NUMA_NO_NODE, user,
+ namefmt, args);
+ va_end(args);
+
+ return task;
+}
+EXPORT_SYMBOL(kthread_create_for_user);
+
static void __kthread_bind_mask(struct task_struct *p, const struct cpumask
*mask, long state)
{
unsigned long flags;
@@ -785,8 +825,8 @@ __kthread_create_worker(int cpu, unsigned int flags,
if (cpu >= 0)
node = cpu_to_node(cpu);
- task = __kthread_create_on_node(kthread_worker_fn, worker,
- node, namefmt, args);
+ task = __kthread_create_on_node(kthread_worker_fn, worker, node, NULL,
+ namefmt, args);
if (IS_ERR(task))
goto fail_task;
--
2.25.1
Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 2/3] kernel/fork, cred.c: allow copy_process to take user
This allows kthread to pass copy_process the user we want to check for the
RLIMIT_NPROC limit for and also charge for the new process. It will be used
by vhost where userspace has that driver create threads but the kthreadd
thread is checked/charged.
Signed-off-by: Mike Christie <michael.christie at oracle.com>
---
include/linux/cred.h | 3 ++-
kernel/cred.c | 7 ++++---
kernel/fork.c | 12 +++++++-----
3 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 14971322e1a0..9a2c1398cdd4 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -153,7 +153,8 @@ struct cred {
extern void __put_cred(struct cred *);
extern void exit_creds(struct task_struct *);
-extern int copy_creds(struct task_struct *, unsigned long);
+extern int copy_creds(struct task_struct *, unsigned long,
+ struct user_struct *);
extern const struct cred *get_task_cred(struct task_struct *);
extern struct cred *cred_alloc_blank(void);
extern struct cred *prepare_creds(void);
diff --git a/kernel/cred.c b/kernel/cred.c
index e1d274cd741b..e006aafa8f05 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -330,7 +330,8 @@ struct cred *prepare_exec_creds(void)
* The new process gets the current process's subjective credentials as its
* objective and subjective credentials
*/
-int copy_creds(struct task_struct *p, unsigned long clone_flags)
+int copy_creds(struct task_struct *p, unsigned long clone_flags,
+ struct user_struct *user)
{
struct cred *new;
int ret;
@@ -351,7 +352,7 @@ int copy_creds(struct task_struct *p, unsigned long
clone_flags)
kdebug("share_creds(%p{%d,%d})",
p->cred, atomic_read(&p->cred->usage),
read_cred_subscribers(p->cred));
- atomic_inc(&p->cred->user->processes);
+ atomic_inc(&user->processes);
return 0;
}
@@ -384,7 +385,7 @@ int copy_creds(struct task_struct *p, unsigned long
clone_flags)
}
#endif
- atomic_inc(&new->user->processes);
+ atomic_inc(&user->processes);
p->cred = p->real_cred = get_cred(new);
alter_cred_subscribers(new, 2);
validate_creds(new);
diff --git a/kernel/fork.c b/kernel/fork.c
index dc06afd725cb..6389aea6d3eb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1860,6 +1860,7 @@ static __latent_entropy struct task_struct *copy_process(
struct file *pidfile = NULL;
u64 clone_flags = args->flags;
struct nsproxy *nsp = current->nsproxy;
+ struct user_struct *user = args->user;
/*
* Don't allow sharing the root directory with processes in a different
@@ -1976,16 +1977,17 @@ static __latent_entropy struct task_struct
*copy_process(
#ifdef CONFIG_PROVE_LOCKING
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
+ if (!user)
+ user = p->real_cred->user;
retval = -EAGAIN;
- if (atomic_read(&p->real_cred->user->processes) >-
task_rlimit(p, RLIMIT_NPROC)) {
- if (p->real_cred->user != INIT_USER &&
+ if (atomic_read(&user->processes) >= task_rlimit(p, RLIMIT_NPROC)) {
+ if (user != INIT_USER &&
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
goto bad_fork_free;
}
current->flags &= ~PF_NPROC_EXCEEDED;
- retval = copy_creds(p, clone_flags);
+ retval = copy_creds(p, clone_flags, user);
if (retval < 0)
goto bad_fork_free;
@@ -2385,7 +2387,7 @@ static __latent_entropy struct task_struct *copy_process(
#endif
delayacct_tsk_free(p);
bad_fork_cleanup_count:
- atomic_dec(&p->cred->user->processes);
+ atomic_dec(&user->processes);
exit_creds(p);
bad_fork_free:
p->state = TASK_DEAD;
--
2.25.1
Mike Christie
2021-Jun-24 03:08 UTC
[PATCH 3/3] vhost: pass kthread user to check RLIMIT_NPROC
This has vhost pass in the user to the kthread API, so the process doing
the ioctl has its RLIMIT_NPROC checked and its processes count
incremented.
Signed-off-by: Mike Christie <michael.christie at oracle.com>
---
drivers/vhost/vhost.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5ccb0705beae..141cca6fd50a 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -595,8 +595,9 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
dev->kcov_handle = kcov_common_handle();
if (dev->use_worker) {
- worker = kthread_create(vhost_worker, dev,
- "vhost-%d", current->pid);
+ worker = kthread_create_for_user(vhost_worker, dev,
+ current->real_cred->user,
+ "vhost-%d", current->pid);
if (IS_ERR(worker)) {
err = PTR_ERR(worker);
goto err_worker;
--
2.25.1
Michael S. Tsirkin
2021-Jun-24 07:34 UTC
[PATCH 0/3] kthread: pass in user and check RLIMIT_NPROC
On Wed, Jun 23, 2021 at 10:08:01PM -0500, Mike Christie wrote:> The vhost driver will create a kthread when userspace does a > VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread. > We can then end up violating the userspace process's RLIMIT_NPROC. This > patchset allows drivers to pass in the user to charge/check. > > The patches were made over Linus's current tree. >Makes sense I guess. Acked-by: Michael S. Tsirkin <mst at redhat.com>
Stefan Hajnoczi
2021-Jun-24 09:40 UTC
[PATCH 0/3] kthread: pass in user and check RLIMIT_NPROC
On Wed, Jun 23, 2021 at 10:08:01PM -0500, Mike Christie wrote:> The vhost driver will create a kthread when userspace does a > VHOST_SET_OWNER ioctl, but the thread is charged to the kthreadd thread. > We can then end up violating the userspace process's RLIMIT_NPROC. This > patchset allows drivers to pass in the user to charge/check. > > The patches were made over Linus's current tree.Makes sense from a vhost perspective and for future users, but I'm not familiar with the kthread internals: Acked-by: Stefan Hajnoczi <stefanha at redhat.com> -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 488 bytes Desc: not available URL: <http://lists.linuxfoundation.org/pipermail/virtualization/attachments/20210624/e755c3ba/attachment.sig>