Luis Chamberlain
2020-Jun-26 02:54 UTC
[Bridge] linux-next: umh: fix processed error when UMH_WAIT_PROC is used seems to break linux bridge on s390x (bisected)
On Wed, Jun 24, 2020 at 08:37:55PM +0200, Christian Borntraeger wrote:> > > On 24.06.20 20:32, Christian Borntraeger wrote: > [...]> > > So the translations look correct. But your change is actually a sematic change > > if(ret) will only trigger if there is an error > > if (KWIFEXITED(ret)) will always trigger when the process ends. So we will always overwrite -ECHILD > > and we did not do it before. > > > > So the right fix is > > diff --git a/kernel/umh.c b/kernel/umh.c > index f81e8698e36e..a3a3196e84d1 100644 > --- a/kernel/umh.c > +++ b/kernel/umh.c > @@ -154,7 +154,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) > * the real error code is already in sub_info->retval or > * sub_info->retval is 0 anyway, so don't mess with it then. > */ > - if (KWIFEXITED(ret)) > + if (KWEXITSTATUS(ret)) > sub_info->retval = KWEXITSTATUS(ret); > } > > I think.Nope, the right form is to check for WIFEXITED() before using WEXITSTATUS(). I'm not able to reproduce this on x86 with a bridge. What type of bridge are you using on a guest, or did you mean using KVM so that the *host* can spawn kvm guests? It would be good if you can try to add a bridge manually and see where things fail. Can you do something like this: brctl addbr br0 brctl addif br0 ens6 ip link set dev br0 up Note that most callers are for modprobe. I'd be curious to see which umh is failing which breaks bridge for you. Can you trut this so we can see which umh call is failing? diff --git a/kernel/umh.c b/kernel/umh.c index f81e8698e36e..5ad74bc301d8 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -2,6 +2,9 @@ /* * umh - the kernel usermode helper */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/task.h> @@ -154,8 +157,12 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) * the real error code is already in sub_info->retval or * sub_info->retval is 0 anyway, so don't mess with it then. */ - if (KWIFEXITED(ret)) + printk("== ret: %02x\n", ret); + printk("== KWIFEXITED(ret): %02x\n", KWIFEXITED(ret)); + if (KWIFEXITED(ret)) { + printk("KWEXITSTATUS(ret): %d\n", KWEXITSTATUS(ret)); sub_info->retval = KWEXITSTATUS(ret); + } } /* Restore default kernel sig handler */ @@ -383,6 +390,7 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, void *data) { struct subprocess_info *sub_info; + unsigned int i = 0; sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); if (!sub_info) goto out; @@ -394,6 +402,11 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, #else sub_info->path = path; #endif + pr_info("sub_info->path: %s\n", sub_info->path); + while (argv[i]) + printk(KERN_INFO "%s ", argv[i++]); + printk(KERN_INFO "\n"); + sub_info->argv = argv; sub_info->envp = envp;
Christian Borntraeger
2020-Jun-26 05:24 UTC
[Bridge] linux-next: umh: fix processed error when UMH_WAIT_PROC is used seems to break linux bridge on s390x (bisected)
On 26.06.20 04:54, Luis Chamberlain wrote:> On Wed, Jun 24, 2020 at 08:37:55PM +0200, Christian Borntraeger wrote: >> >> >> On 24.06.20 20:32, Christian Borntraeger wrote: >> [...]> >>> So the translations look correct. But your change is actually a sematic change >>> if(ret) will only trigger if there is an error >>> if (KWIFEXITED(ret)) will always trigger when the process ends. So we will always overwrite -ECHILD >>> and we did not do it before. >>> >> >> So the right fix is >> >> diff --git a/kernel/umh.c b/kernel/umh.c >> index f81e8698e36e..a3a3196e84d1 100644 >> --- a/kernel/umh.c >> +++ b/kernel/umh.c >> @@ -154,7 +154,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) >> * the real error code is already in sub_info->retval or >> * sub_info->retval is 0 anyway, so don't mess with it then. >> */ >> - if (KWIFEXITED(ret)) >> + if (KWEXITSTATUS(ret)) >> sub_info->retval = KWEXITSTATUS(ret); >> } >> >> I think. > > Nope, the right form is to check for WIFEXITED() before using WEXITSTATUS().But this IS a change over the previous code, no? I will test next week as I am travelling right now.
Luis Chamberlain
2020-Jun-30 17:57 UTC
[Bridge] linux-next: umh: fix processed error when UMH_WAIT_PROC is used seems to break linux bridge on s390x (bisected)
On Fri, Jun 26, 2020 at 02:54:10AM +0000, Luis Chamberlain wrote:> On Wed, Jun 24, 2020 at 08:37:55PM +0200, Christian Borntraeger wrote: > > > > > > On 24.06.20 20:32, Christian Borntraeger wrote: > > [...]> > > > So the translations look correct. But your change is actually a sematic change > > > if(ret) will only trigger if there is an error > > > if (KWIFEXITED(ret)) will always trigger when the process ends. So we will always overwrite -ECHILD > > > and we did not do it before. > > > > > > > So the right fix is > > > > diff --git a/kernel/umh.c b/kernel/umh.c > > index f81e8698e36e..a3a3196e84d1 100644 > > --- a/kernel/umh.c > > +++ b/kernel/umh.c > > @@ -154,7 +154,7 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) > > * the real error code is already in sub_info->retval or > > * sub_info->retval is 0 anyway, so don't mess with it then. > > */ > > - if (KWIFEXITED(ret)) > > + if (KWEXITSTATUS(ret)) > > sub_info->retval = KWEXITSTATUS(ret); > > } > > > > I think. > > Nope, the right form is to check for WIFEXITED() before using WEXITSTATUS(). > I'm not able to reproduce this on x86 with a bridge. What type of bridge > are you using on a guest, or did you mean using KVM so that the *host* > can spawn kvm guests? > > It would be good if you can try to add a bridge manually and see where > things fail. Can you do something like this: > > brctl addbr br0 > brctl addif br0 ens6 > ip link set dev br0 up > > Note that most callers are for modprobe. I'd be curious to see which > umh is failing which breaks bridge for you. Can you trut this so we can > see which umh call is failing?Christian, any luck getting to test the code below to see what this reveals? Luis> > diff --git a/kernel/umh.c b/kernel/umh.c > index f81e8698e36e..5ad74bc301d8 100644 > --- a/kernel/umh.c > +++ b/kernel/umh.c > @@ -2,6 +2,9 @@ > /* > * umh - the kernel usermode helper > */ > + > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > + > #include <linux/module.h> > #include <linux/sched.h> > #include <linux/sched/task.h> > @@ -154,8 +157,12 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) > * the real error code is already in sub_info->retval or > * sub_info->retval is 0 anyway, so don't mess with it then. > */ > - if (KWIFEXITED(ret)) > + printk("== ret: %02x\n", ret); > + printk("== KWIFEXITED(ret): %02x\n", KWIFEXITED(ret)); > + if (KWIFEXITED(ret)) { > + printk("KWEXITSTATUS(ret): %d\n", KWEXITSTATUS(ret)); > sub_info->retval = KWEXITSTATUS(ret); > + } > } > > /* Restore default kernel sig handler */ > @@ -383,6 +390,7 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, > void *data) > { > struct subprocess_info *sub_info; > + unsigned int i = 0; > sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); > if (!sub_info) > goto out; > @@ -394,6 +402,11 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, > #else > sub_info->path = path; > #endif > + pr_info("sub_info->path: %s\n", sub_info->path); > + while (argv[i]) > + printk(KERN_INFO "%s ", argv[i++]); > + printk(KERN_INFO "\n"); > + > sub_info->argv = argv; > sub_info->envp = envp; > >