On Sat, Jul 22, 2017 at 03:37:30PM +0700, Eugene Grosbein
wrote:> On 22.07.2017 15:00, Konstantin Belousov wrote:
> > On Sat, Jul 22, 2017 at 02:40:59PM +0700, Eugene Grosbein wrote:
> >> Also, I've always wondered what load pattern one should have
> >> to exhibit real kernel stack problems due to KVA fragmentation
> >> and KSTACK_PAGES>2 on i386?
> > In fact each stack consumes 3 contigous pages because there is also
> > the guard, which catches the double faults.
> >
> > You need to use the machine, e.g. to run something that creates and
destroys
> > kernel threads, while doing something that consumes kernel_arena KVA.
> > Plain malloc/zmalloc is enough.
>
> Does an i386 box running PPPoE connection to an ISP (mpd5) plus several
> IPSEC tunnels plus PPtP tunnel plus WiFi access point plus
> "transmission" torrent client with 2TB UFS volume over GEOM_CACHE
> over GEOM_JOURNAL over USB qualify? There are ospfd, racoon,
> sendmail, ssh and several periodic cron jobs too.
I doubt that any tunnels activity causes creation and destruction of threads.
Same for hostapd or routing daemons or UFS over really weird geom classes.
Sendmail and cron indeed cause process creation, but the overhead of
processing of these programs prevents high turnaround of new processes,
typically. No idea about your torrent client.
>From this description, I would be not even surprised if your machine
load fits into the kstacks cache, despite cache' quite conservative
settings. In other words, almost definitely your machine is not
representative for the problematic load. Something that creates a lot of
short- and middle- lived threads would be.
>
> > In other words, any non-static load would cause fragmentation
preventing
> > allocations of the kernel stacks for new threads.
> >
> >> How can I get ddb backtrace you asked for? I'm not very
familiar with ddb.
> >> I have serial console to such i386 system.
> >
> > bt command for the given thread provides the backtrace. I have no
idea
> > how did you obtained the numbers that you show.
>
> Not sure what kernel thread I too to trace... If you just need a name of
the function:
>
> $ objdump -d vm_object.o | grep -B 8 'sub .*0x...,%esp' |less
>
> 00003b30 <sysctl_vm_object_list>:
> 3b30: 55 push %ebp
> 3b31: 89 e5 mov %esp,%ebp
> 3b33: 53 push %ebx
> 3b34: 57 push %edi
> 3b35: 56 push %esi
> 3b36: 83 e4 f8 and $0xfffffff8,%esp
> 3b39: 81 ec 30 05 00 00 sub $0x530,%esp
>
> It uses stack for pretty large struct kinfo_vmobject (includes char
kvo_path[PATH_MAX])
> and several others.
I see. It is enough information to fix your observation for vm_object.o.
Patch below reduces the frame size for sysctl_vm_object_list from 1.3K
to 200 bytes. This function is only executed by explicit user query.
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 6c6137d5fb2..b92d31c3e60 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -2275,7 +2315,7 @@ vm_object_vnode(vm_object_t object)
static int
sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
{
- struct kinfo_vmobject kvo;
+ struct kinfo_vmobject *kvo;
char *fullpath, *freepath;
struct vnode *vp;
struct vattr va;
@@ -2300,6 +2340,7 @@ sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
count * 11 / 10));
}
+ kvo = malloc(sizeof(*kvo), M_TEMP, M_WAITOK);
error = 0;
/*
@@ -2317,13 +2358,13 @@ sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
continue;
}
mtx_unlock(&vm_object_list_mtx);
- kvo.kvo_size = ptoa(obj->size);
- kvo.kvo_resident = obj->resident_page_count;
- kvo.kvo_ref_count = obj->ref_count;
- kvo.kvo_shadow_count = obj->shadow_count;
- kvo.kvo_memattr = obj->memattr;
- kvo.kvo_active = 0;
- kvo.kvo_inactive = 0;
+ kvo->kvo_size = ptoa(obj->size);
+ kvo->kvo_resident = obj->resident_page_count;
+ kvo->kvo_ref_count = obj->ref_count;
+ kvo->kvo_shadow_count = obj->shadow_count;
+ kvo->kvo_memattr = obj->memattr;
+ kvo->kvo_active = 0;
+ kvo->kvo_inactive = 0;
TAILQ_FOREACH(m, &obj->memq, listq) {
/*
* A page may belong to the object but be
@@ -2335,46 +2376,46 @@ sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
* approximation of the system anyway.
*/
if (vm_page_active(m))
- kvo.kvo_active++;
+ kvo->kvo_active++;
else if (vm_page_inactive(m))
- kvo.kvo_inactive++;
+ kvo->kvo_inactive++;
}
- kvo.kvo_vn_fileid = 0;
- kvo.kvo_vn_fsid = 0;
- kvo.kvo_vn_fsid_freebsd11 = 0;
+ kvo->kvo_vn_fileid = 0;
+ kvo->kvo_vn_fsid = 0;
+ kvo->kvo_vn_fsid_freebsd11 = 0;
freepath = NULL;
fullpath = "";
vp = NULL;
switch (obj->type) {
case OBJT_DEFAULT:
- kvo.kvo_type = KVME_TYPE_DEFAULT;
+ kvo->kvo_type = KVME_TYPE_DEFAULT;
break;
case OBJT_VNODE:
- kvo.kvo_type = KVME_TYPE_VNODE;
+ kvo->kvo_type = KVME_TYPE_VNODE;
vp = obj->handle;
vref(vp);
break;
case OBJT_SWAP:
- kvo.kvo_type = KVME_TYPE_SWAP;
+ kvo->kvo_type = KVME_TYPE_SWAP;
break;
case OBJT_DEVICE:
- kvo.kvo_type = KVME_TYPE_DEVICE;
+ kvo->kvo_type = KVME_TYPE_DEVICE;
break;
case OBJT_PHYS:
- kvo.kvo_type = KVME_TYPE_PHYS;
+ kvo->kvo_type = KVME_TYPE_PHYS;
break;
case OBJT_DEAD:
- kvo.kvo_type = KVME_TYPE_DEAD;
+ kvo->kvo_type = KVME_TYPE_DEAD;
break;
case OBJT_SG:
- kvo.kvo_type = KVME_TYPE_SG;
+ kvo->kvo_type = KVME_TYPE_SG;
break;
case OBJT_MGTDEVICE:
- kvo.kvo_type = KVME_TYPE_MGTDEVICE;
+ kvo->kvo_type = KVME_TYPE_MGTDEVICE;
break;
default:
- kvo.kvo_type = KVME_TYPE_UNKNOWN;
+ kvo->kvo_type = KVME_TYPE_UNKNOWN;
break;
}
VM_OBJECT_RUNLOCK(obj);
@@ -2382,29 +2423,30 @@ sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
vn_fullpath(curthread, vp, &fullpath, &freepath);
vn_lock(vp, LK_SHARED | LK_RETRY);
if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
- kvo.kvo_vn_fileid = va.va_fileid;
- kvo.kvo_vn_fsid = va.va_fsid;
- kvo.kvo_vn_fsid_freebsd11 = va.va_fsid;
+ kvo->kvo_vn_fileid = va.va_fileid;
+ kvo->kvo_vn_fsid = va.va_fsid;
+ kvo->kvo_vn_fsid_freebsd11 = va.va_fsid;
/* truncate */
}
vput(vp);
}
- strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path));
+ strlcpy(kvo->kvo_path, fullpath, sizeof(kvo->kvo_path));
if (freepath != NULL)
free(freepath, M_TEMP);
/* Pack record size down */
- kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) +
- strlen(kvo.kvo_path) + 1;
- kvo.kvo_structsize = roundup(kvo.kvo_structsize,
+ kvo->kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path)
+ + strlen(kvo->kvo_path) + 1;
+ kvo->kvo_structsize = roundup(kvo->kvo_structsize,
sizeof(uint64_t));
- error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize);
+ error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize);
mtx_lock(&vm_object_list_mtx);
if (error)
break;
}
mtx_unlock(&vm_object_list_mtx);
+ free(kvo, M_TEMP);
return (error);
}
SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP
|