Hi all, this series improves the use_mm / unuse_mm interface by better documenting the assumptions, and my taking the set_fs manipulations spread over the callers into the core API. Changes since v1: - drop a few patches - fix a comment typo - cover the newly merged use_mm/unuse_mm caller in vfio
Christoph Hellwig
2020-Apr-16 05:31 UTC
[PATCH 1/3] kernel: move use_mm/unuse_mm to kthread.c
These helpers are only for use with kernel threads, and I will tie them more into the kthread infrastructure going forward. Also move the prototypes to kthread.h - mmu_context.h was a little weird to start with as it otherwise contains very low-level MM bits. Signed-off-by: Christoph Hellwig <hch at lst.de> Acked-by: Felix Kuehling <Felix.Kuehling at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 1 - .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 1 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 - drivers/gpu/drm/i915/gvt/kvmgt.c | 2 +- drivers/usb/gadget/function/f_fs.c | 2 +- drivers/usb/gadget/legacy/inode.c | 2 +- drivers/vfio/vfio_iommu_type1.c | 2 +- drivers/vhost/vhost.c | 1 - fs/aio.c | 1 - fs/io-wq.c | 1 - fs/io_uring.c | 1 - include/linux/kthread.h | 5 ++ include/linux/mmu_context.h | 5 -- kernel/kthread.c | 56 ++++++++++++++++ mm/Makefile | 2 +- mm/mmu_context.c | 64 ------------------- 19 files changed, 67 insertions(+), 86 deletions(-) delete mode 100644 mm/mmu_context.c diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 13feb313e9b3..b820c8fc689f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,6 +27,7 @@ #include <linux/types.h> #include <linux/mm.h> +#include <linux/kthread.h> #include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 6529caca88fe..35d4a5ab0228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -22,7 +22,6 @@ #include <linux/module.h> #include <linux/fdtable.h> #include <linux/uaccess.h> -#include <linux/mmu_context.h> #include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 4ec6d0c03201..b1655054b919 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -19,7 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_10_1_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 0b7e78748540..7d01420c0c85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index ccd635b812b5..635cd1a26bed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gfx_v8_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..c7fd0c47b254 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,8 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_9_0_offset.h" diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 074c4efb58eb..ca1dd6e6f395 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -31,7 +31,7 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/mm.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/sched/mm.h> #include <linux/types.h> #include <linux/list.h> diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index c81023b195c3..c57b1b2507c6 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -32,7 +32,7 @@ #include <linux/usb/functionfs.h> #include <linux/aio.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/poll.h> #include <linux/eventfd.h> diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index aa0de9e35afa..8b5233888bf8 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -21,7 +21,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/poll.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/aio.h> #include <linux/uio.h> #include <linux/refcount.h> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 85b32c325282..5f50866a8b01 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -27,7 +27,7 @@ #include <linux/iommu.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/rbtree.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d450e16c5c25..ead1deed80d3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -14,7 +14,6 @@ #include <linux/vhost.h> #include <linux/uio.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/poll.h> diff --git a/fs/aio.c b/fs/aio.c index 5f3d3d814928..328829f0343b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -27,7 +27,6 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/mmu_context.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/timer.h> diff --git a/fs/io-wq.c b/fs/io-wq.c index 4023c9846860..5f590bf27bff 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -10,7 +10,6 @@ #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/sched/mm.h> #include <linux/percpu.h> #include <linux/slab.h> diff --git a/fs/io_uring.c b/fs/io_uring.c index 5190bfb6a665..f5a80f6f28f5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -55,7 +55,6 @@ #include <linux/fdtable.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/mmu_context.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/kthread.h> diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8bbcaad7ef0f..c2d40c9672d6 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -5,6 +5,8 @@ #include <linux/err.h> #include <linux/sched.h> +struct mm_struct; + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -198,6 +200,9 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); +void use_mm(struct mm_struct *mm); +void unuse_mm(struct mm_struct *mm); + struct cgroup_subsys_state; #ifdef CONFIG_BLK_CGROUP diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index d9a543a9e1cc..c51a84132d7c 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -4,11 +4,6 @@ #include <asm/mmu_context.h> -struct mm_struct; - -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); - /* Architectures that care about IRQ state in switch_mm can override this. */ #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..ce4610316377 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1,13 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* Kernel thread helper functions. * Copyright (C) 2004 IBM Corporation, Rusty Russell. + * Copyright (C) 2009 Red Hat, Inc. * * Creation is done via kthreadd, so that we get a clean environment * even if we're invoked from userspace (think modprobe, hotplug cpu, * etc.). */ #include <uapi/linux/sched/types.h> +#include <linux/mm.h> +#include <linux/mmu_context.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/sched/task.h> #include <linux/kthread.h> #include <linux/completion.h> @@ -25,6 +29,7 @@ #include <linux/numa.h> #include <trace/events/sched.h> + static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; @@ -1203,6 +1208,57 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); +/* + * use_mm + * Makes the calling kernel thread take on the specified + * mm context. + * (Note: this routine is intended to be called only + * from a kernel thread context) + */ +void use_mm(struct mm_struct *mm) +{ + struct mm_struct *active_mm; + struct task_struct *tsk = current; + + task_lock(tsk); + active_mm = tsk->active_mm; + if (active_mm != mm) { + mmgrab(mm); + tsk->active_mm = mm; + } + tsk->mm = mm; + switch_mm(active_mm, mm, tsk); + task_unlock(tsk); +#ifdef finish_arch_post_lock_switch + finish_arch_post_lock_switch(); +#endif + + if (active_mm != mm) + mmdrop(active_mm); +} +EXPORT_SYMBOL_GPL(use_mm); + +/* + * unuse_mm + * Reverses the effect of use_mm, i.e. releases the + * specified mm context which was earlier taken on + * by the calling kernel thread + * (Note: this routine is intended to be called only + * from a kernel thread context) + */ +void unuse_mm(struct mm_struct *mm) +{ + struct task_struct *tsk = current; + + task_lock(tsk); + sync_mm_rss(mm); + tsk->mm = NULL; + /* active_mm is still 'mm' */ + enter_lazy_tlb(mm, tsk); + task_unlock(tsk); +} +EXPORT_SYMBOL_GPL(unuse_mm); + #ifdef CONFIG_BLK_CGROUP /** * kthread_associate_blkcg - associate blkcg to current kthread diff --git a/mm/Makefile b/mm/Makefile index fccd3756b25f..cafde277ed8d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -41,7 +41,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ - mm_init.o mmu_context.o percpu.o slab_common.o \ + mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ debug.o gup.o $(mmu-y) diff --git a/mm/mmu_context.c b/mm/mmu_context.c deleted file mode 100644 index 3e612ae748e9..000000000000 --- a/mm/mmu_context.c +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 2009 Red Hat, Inc. - * - * See ../COPYING for licensing terms. - */ - -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/sched/task.h> -#include <linux/mmu_context.h> -#include <linux/export.h> - -#include <asm/mmu_context.h> - -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void use_mm(struct mm_struct *mm) -{ - struct mm_struct *active_mm; - struct task_struct *tsk = current; - - task_lock(tsk); - active_mm = tsk->active_mm; - if (active_mm != mm) { - mmgrab(mm); - tsk->active_mm = mm; - } - tsk->mm = mm; - switch_mm(active_mm, mm, tsk); - task_unlock(tsk); -#ifdef finish_arch_post_lock_switch - finish_arch_post_lock_switch(); -#endif - - if (active_mm != mm) - mmdrop(active_mm); -} -EXPORT_SYMBOL_GPL(use_mm); - -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void unuse_mm(struct mm_struct *mm) -{ - struct task_struct *tsk = current; - - task_lock(tsk); - sync_mm_rss(mm); - tsk->mm = NULL; - /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, tsk); - task_unlock(tsk); -} -EXPORT_SYMBOL_GPL(unuse_mm); -- 2.25.1
Christoph Hellwig
2020-Apr-16 05:31 UTC
[PATCH 2/3] kernel: better document the use_mm/unuse_mm API contract
Switch the function documentation to kerneldoc comments, and add WARN_ON_ONCE asserts that the calling thread is a kernel thread and does not have ->mm set (or has ->mm set in the case of unuse_mm). Also give the functions a kthread_ prefix to better document the use case. Signed-off-by: Christoph Hellwig <hch at lst.de> Acked-by: Felix Kuehling <Felix.Kuehling at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +-- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 +-- drivers/usb/gadget/function/f_fs.c | 4 +-- drivers/usb/gadget/legacy/inode.c | 4 +-- drivers/vfio/vfio_iommu_type1.c | 4 +-- drivers/vhost/vhost.c | 4 +-- fs/io-wq.c | 6 ++-- fs/io_uring.c | 6 ++-- include/linux/kthread.h | 4 +-- kernel/kthread.c | 33 +++++++++++----------- mm/oom_kill.c | 6 ++-- mm/vmacache.c | 4 +-- 12 files changed, 41 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b820c8fc689f..b063bd7f41d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -192,9 +192,9 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ } else if (current->mm == NULL) { \ - use_mm(mmptr); \ + kthread_use_mm(mmptr); \ valid = !get_user((dst), (wptr)); \ - unuse_mm(mmptr); \ + kthread_unuse_mm(mmptr); \ } \ pagefault_enable(); \ } \ diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index ca1dd6e6f395..f2927575b793 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -2048,7 +2048,7 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, if (kthread) { if (!mmget_not_zero(kvm->mm)) return -EFAULT; - use_mm(kvm->mm); + kthread_use_mm(kvm->mm); } idx = srcu_read_lock(&kvm->srcu); @@ -2057,7 +2057,7 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, srcu_read_unlock(&kvm->srcu, idx); if (kthread) { - unuse_mm(kvm->mm); + kthread_unuse_mm(kvm->mm); mmput(kvm->mm); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index c57b1b2507c6..d9e48bd7c692 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -827,9 +827,9 @@ static void ffs_user_copy_worker(struct work_struct *work) mm_segment_t oldfs = get_fs(); set_fs(USER_DS); - use_mm(io_data->mm); + kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); - unuse_mm(io_data->mm); + kthread_unuse_mm(io_data->mm); set_fs(oldfs); } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 8b5233888bf8..a05552bc2ff8 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -462,9 +462,9 @@ static void ep_user_copy_worker(struct work_struct *work) struct kiocb *iocb = priv->iocb; size_t ret; - use_mm(mm); + kthread_use_mm(mm); ret = copy_to_iter(priv->buf, priv->actual, &priv->to); - unuse_mm(mm); + kthread_unuse_mm(mm); if (!ret) ret = -EFAULT; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5f50866a8b01..2eb105aa9723 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2333,7 +2333,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, return -EPERM; if (kthread) - use_mm(mm); + kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2351,7 +2351,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, *copied = __copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; if (kthread) - unuse_mm(mm); + kthread_unuse_mm(mm); out: mmput(mm); return *copied ? 0 : -EFAULT; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ead1deed80d3..17d598e74780 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -332,7 +332,7 @@ static int vhost_worker(void *data) mm_segment_t oldfs = get_fs(); set_fs(USER_DS); - use_mm(dev->mm); + kthread_use_mm(dev->mm); for (;;) { /* mb paired w/ kthread_stop */ @@ -360,7 +360,7 @@ static int vhost_worker(void *data) schedule(); } } - unuse_mm(dev->mm); + kthread_unuse_mm(dev->mm); set_fs(oldfs); return 0; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 5f590bf27bff..748621f7391e 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -170,7 +170,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) } __set_current_state(TASK_RUNNING); set_fs(KERNEL_DS); - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -417,7 +417,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) { if (worker->mm) { - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -426,7 +426,7 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) return; } if (mmget_not_zero(work->mm)) { - use_mm(work->mm); + kthread_use_mm(work->mm); if (!worker->mm) set_fs(USER_DS); worker->mm = work->mm; diff --git a/fs/io_uring.c b/fs/io_uring.c index f5a80f6f28f5..8a8148512da7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5876,7 +5876,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, err = -EFAULT; goto fail_req; } - use_mm(ctx->sqo_mm); + kthread_use_mm(ctx->sqo_mm); *mm = ctx->sqo_mm; } @@ -5948,7 +5948,7 @@ static int io_sq_thread(void *data) * may sleep. */ if (cur_mm) { - unuse_mm(cur_mm); + kthread_unuse_mm(cur_mm); mmput(cur_mm); cur_mm = NULL; } @@ -6025,7 +6025,7 @@ static int io_sq_thread(void *data) set_fs(old_fs); if (cur_mm) { - unuse_mm(cur_mm); + kthread_unuse_mm(cur_mm); mmput(cur_mm); } revert_creds(old_cred); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c2d40c9672d6..12258ea077cf 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -200,8 +200,8 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); +void kthread_use_mm(struct mm_struct *mm); +void kthread_unuse_mm(struct mm_struct *mm); struct cgroup_subsys_state; diff --git a/kernel/kthread.c b/kernel/kthread.c index ce4610316377..8ed4b4fbec7c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1208,18 +1208,18 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) +/** + * kthread_use_mm - make the calling kthread operate on an address space + * @mm: address space to operate on */ -void use_mm(struct mm_struct *mm) +void kthread_use_mm(struct mm_struct *mm) { struct mm_struct *active_mm; struct task_struct *tsk = current; + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(tsk->mm); + task_lock(tsk); active_mm = tsk->active_mm; if (active_mm != mm) { @@ -1236,20 +1236,19 @@ void use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); } -EXPORT_SYMBOL_GPL(use_mm); +EXPORT_SYMBOL_GPL(kthread_use_mm); -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) +/** + * kthread_unuse_mm - reverse the effect of kthread_use_mm() + * @mm: address space to operate on */ -void unuse_mm(struct mm_struct *mm) +void kthread_unuse_mm(struct mm_struct *mm) { struct task_struct *tsk = current; + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(!tsk->mm); + task_lock(tsk); sync_mm_rss(mm); tsk->mm = NULL; @@ -1257,7 +1256,7 @@ void unuse_mm(struct mm_struct *mm) enter_lazy_tlb(mm, tsk); task_unlock(tsk); } -EXPORT_SYMBOL_GPL(unuse_mm); +EXPORT_SYMBOL_GPL(kthread_unuse_mm); #ifdef CONFIG_BLK_CGROUP /** diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dfc357614e56..958d2972313f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -126,7 +126,7 @@ static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc) /* * The process p may have detached its own ->mm while exiting or through - * use_mm(), but one or more of its subthreads may still have a valid + * kthread_use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ @@ -919,8 +919,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) continue; } /* - * No use_mm() user needs to read from the userspace so we are - * ok to reap it. + * No kthead_use_mm() user needs to read from the userspace so + * we are ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; diff --git a/mm/vmacache.c b/mm/vmacache.c index cdc32a3b02fa..ceedbab82106 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -25,8 +25,8 @@ * task's vmacache pertains to a different mm (ie, its own). There is * nothing we can do here. * - * Also handle the case where a kernel thread has adopted this mm via use_mm(). - * That kernel thread's vmacache is not applicable to this mm. + * Also handle the case where a kernel thread has adopted this mm via + * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm. */ static inline bool vmacache_valid_mm(struct mm_struct *mm) { -- 2.25.1
Some architectures like arm64 and s390 require USER_DS to be set for kernel threads to access user address space, which is the whole purpose of kthread_use_mm, but other like x86 don't. That has lead to a huge mess where some callers are fixed up once they are tested on said architectures, while others linger around and yet other like io_uring try to do "clever" optimizations for what usually is just a trivial asignment to a member in the thread_struct for most architectures. Make kthread_use_mm set USER_DS, and kthread_unuse_mm restore to the previous value instead. Signed-off-by: Christoph Hellwig <hch at lst.de> Acked-by: Michael S. Tsirkin <mst at redhat.com> [vhost] --- drivers/usb/gadget/function/f_fs.c | 4 ---- drivers/vhost/vhost.c | 3 --- fs/io-wq.c | 8 ++------ fs/io_uring.c | 4 ---- kernel/kthread.c | 6 ++++++ 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d9e48bd7c692..a1198f4c527c 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -824,13 +824,9 @@ static void ffs_user_copy_worker(struct work_struct *work) bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { - mm_segment_t oldfs = get_fs(); - - set_fs(USER_DS); kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); kthread_unuse_mm(io_data->mm); - set_fs(oldfs); } io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 17d598e74780..b2abfbdf3cb2 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -329,9 +329,7 @@ static int vhost_worker(void *data) struct vhost_dev *dev = data; struct vhost_work *work, *work_next; struct llist_node *node; - mm_segment_t oldfs = get_fs(); - set_fs(USER_DS); kthread_use_mm(dev->mm); for (;;) { @@ -361,7 +359,6 @@ static int vhost_worker(void *data) } } kthread_unuse_mm(dev->mm); - set_fs(oldfs); return 0; } diff --git a/fs/io-wq.c b/fs/io-wq.c index 748621f7391e..a5e90ac39e4d 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -169,7 +169,6 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) dropped_lock = true; } __set_current_state(TASK_RUNNING); - set_fs(KERNEL_DS); kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; @@ -421,14 +420,11 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) mmput(worker->mm); worker->mm = NULL; } - if (!work->mm) { - set_fs(KERNEL_DS); + if (!work->mm) return; - } + if (mmget_not_zero(work->mm)) { kthread_use_mm(work->mm); - if (!worker->mm) - set_fs(USER_DS); worker->mm = work->mm; /* hang on to this mm */ work->mm = NULL; diff --git a/fs/io_uring.c b/fs/io_uring.c index 8a8148512da7..40f90b98a18a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5908,15 +5908,12 @@ static int io_sq_thread(void *data) struct io_ring_ctx *ctx = data; struct mm_struct *cur_mm = NULL; const struct cred *old_cred; - mm_segment_t old_fs; DEFINE_WAIT(wait); unsigned long timeout; int ret = 0; complete(&ctx->completions[1]); - old_fs = get_fs(); - set_fs(USER_DS); old_cred = override_creds(ctx->creds); timeout = jiffies + ctx->sq_thread_idle; @@ -6023,7 +6020,6 @@ static int io_sq_thread(void *data) if (current->task_works) task_work_run(); - set_fs(old_fs); if (cur_mm) { kthread_unuse_mm(cur_mm); mmput(cur_mm); diff --git a/kernel/kthread.c b/kernel/kthread.c index 8ed4b4fbec7c..86357cd38eb2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -52,6 +52,7 @@ struct kthread { unsigned long flags; unsigned int cpu; void *data; + mm_segment_t oldfs; struct completion parked; struct completion exited; #ifdef CONFIG_BLK_CGROUP @@ -1235,6 +1236,9 @@ void kthread_use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); + + to_kthread(tsk)->oldfs = get_fs(); + set_fs(USER_DS); } EXPORT_SYMBOL_GPL(kthread_use_mm); @@ -1249,6 +1253,8 @@ void kthread_unuse_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(!tsk->mm); + set_fs(to_kthread(tsk)->oldfs); + task_lock(tsk); sync_mm_rss(mm); tsk->mm = NULL; -- 2.25.1
On Thu, Apr 16, 2020 at 07:31:58AM +0200, Christoph Hellwig wrote:> Some architectures like arm64 and s390 require USER_DS to be set for > kernel threads to access user address space, which is the whole purpose > of kthread_use_mm, but other like x86 don't. That has lead to a huge > mess where some callers are fixed up once they are tested on said > architectures, while others linger around and yet other like io_uring > try to do "clever" optimizations for what usually is just a trivial > asignment to a member in the thread_struct for most architectures. > > Make kthread_use_mm set USER_DS, and kthread_unuse_mm restore to the > previous value instead. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > Acked-by: Michael S. Tsirkin <mst at redhat.com> [vhost] > ---Acked-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org> [usb]
Greg KH
2020-Apr-16 06:43 UTC
[PATCH 2/3] kernel: better document the use_mm/unuse_mm API contract
On Thu, Apr 16, 2020 at 07:31:57AM +0200, Christoph Hellwig wrote:> Switch the function documentation to kerneldoc comments, and add > WARN_ON_ONCE asserts that the calling thread is a kernel thread and > does not have ->mm set (or has ->mm set in the case of unuse_mm). > > Also give the functions a kthread_ prefix to better document the > use case. > > Signed-off-by: Christoph Hellwig <hch at lst.de> > Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>Acked-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org> [usb]
On Thu, Apr 16, 2020 at 07:31:55AM +0200, Christoph Hellwig wrote:> this series improves the use_mm / unuse_mm interface by better > documenting the assumptions, and my taking the set_fs manipulations > spread over the callers into the core API.I appreciate all the work you're doing here. Do you have plans to introduce a better-named API than set_fs() / get_fs()? Also, having set_fs() return the previous value of 'fs' would simplify a lot of the callers.
On Thu, Apr 16, 2020 at 08:17:44PM -0700, Matthew Wilcox wrote:> On Thu, Apr 16, 2020 at 07:31:55AM +0200, Christoph Hellwig wrote: > > this series improves the use_mm / unuse_mm interface by better > > documenting the assumptions, and my taking the set_fs manipulations > > spread over the callers into the core API. > > I appreciate all the work you're doing here. > > Do you have plans to introduce a better-named API than set_fs() / get_fs()?Eventually. For now I just plan to kill as many as possible.> Also, having set_fs() return the previous value of 'fs' would simplify > a lot of the callers.One thing that should go relatively soon is the need to store the previous value because we'll have so few callers left that we know we can't recurse. We should be able to get there around 5.9 / 5.10.
On 4/15/20 11:31 PM, Christoph Hellwig wrote:> Hi all, > > this series improves the use_mm / unuse_mm interface by better > documenting the assumptions, and my taking the set_fs manipulations > spread over the callers into the core API. > > Changes since v1: > - drop a few patches > - fix a comment typo > - cover the newly merged use_mm/unuse_mm caller in vfioYou can add my reviewed-by/tested-by to the patches, passes the io_uring regression tests. -- Jens Axboe