This is recent i386 stable/7. The problem is 100% reproducible with the same stack trace. The problem happens when I start a Linux program that performs a check on a cd-rom device (atapi cd-rom that is presented as cd0 by atapicam driver). I examined the crash dump in kgdb but couldn't find anything peculiar in the variables, so I guess that this must be stack overflow or something like that. In fact, difference between values in %esp in frame 4 and frame 22 is 7500 which is quite close to KSTACK_PAGES * PAGE_SIZE for i386. If I did my calculations correctly then it seems that linux_ioctl_cdrom uses slightly more than 4K of stack, then cam_periph_error uses 828 bytes, scsi_command_string uses 696, cam_error_print uses 540. The backtrace is below. I am keeping the dump. (kgdb) bt #0 doadump () at pcpu.h:196 #1 0xc0556523 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:418 #2 0xc055676f in panic (fmt=Variable "fmt" is not available. ) at /usr/src/sys/kern/kern_shutdown.c:574 #3 0xc0712b69 in dblfault_handler () at /usr/src/sys/i386/i386/trap.c:972 #4 0xc058380a in kvprintf (fmt=0xc075d204 " ", func=0xc05828e0 <snprintf_func>, arg=0xdabf0120, radix=10, ap=0xdabf015c "B") at /usr/src/sys/kern/subr_prf.c:823 #5 0xc0583c0b in vsnprintf (str=0xdabf03eb "", size=49, format=0xc075d202 "%x ", ap=0xdabf0158 "B") at /usr/src/sys/kern/subr_prf.c:483 #6 0xc0583cf9 in snprintf (str=0xdabf03eb "", size=49, format=0xc075d202 "%x ") at /usr/src/sys/kern/subr_prf.c:467 #7 0xc0449dc0 in scsi_cdb_string (cdb_ptr=0xc3187c9c "B", cdb_string=0xdabf03eb "", len=49) at /usr/src/sys/cam/scsi/scsi_all.c:2943 #8 0xc0449ffd in scsi_command_string (csio=0xc3187c00, sb=0xdabf0440) at /usr/src/sys/cam/scsi/scsi_all.c:3031 #9 0xc043db61 in cam_error_string (ccb=0xc3187c00, str=0xdabf04bc "(cd0:ata0:0:0:0): ", str_len=512, flags=Variable "flags" is not available. ) at /usr/src/sys/cam/cam.c:262 #10 0xc043dcb4 in cam_error_print (ccb=0xc3187c00, flags=CAM_ESF_ALL, proto_flags=CAM_EPF_ALL) at /usr/src/sys/cam/cam.c:341 #11 0xc043e500 in cam_periph_error (ccb=0xc3187c00, camflags=CAM_RETRY_SELTO, sense_flags=1, save_ccb=0xc32b3034) at /usr/src/sys/cam/cam_periph.c:1548 #12 0xc044bcb0 in cderror (ccb=0xc3187c00, cam_flags=2, sense_flags=1) at /usr/src/sys/cam/scsi/scsi_cd.c:3133 #13 0xc043ee1a in cam_periph_runccb (ccb=0xc3187c00, error_routine=0xc044bc10 <cderror>, camflags=CAM_RETRY_SELTO, sense_flags=1, ds=0xc329fb40) at /usr/src/sys/cam/cam_periph.c:902 #14 0xc044c2cb in cdrunccb (ccb=0xc3187c00, error_routine=0xc044bc10 <cderror>, cam_flags=2, sense_flags=1) at /usr/src/sys/cam/scsi/scsi_cd.c:1318 #15 0xc044f684 in cdioctl (dp=0xc3286000, cmd=3222037279, addr=0xdabf1c1c, flag=5, td=0xc354c230) at /usr/src/sys/cam/scsi/scsi_cd.c:3227 #16 0xc04f9fa4 in g_disk_ioctl (pp=0xc32c7000, cmd=3222037279, data=0xdabf1c1c, fflag=5, td=0xc354c230) at /usr/src/sys/geom/geom_disk.c:231 #17 0xc04f92dd in g_dev_ioctl (dev=0xc33faa00, cmd=3222037279, data=0xdabf1c1c "\001\001", fflag=5, td=0xc354c230) at /usr/src/sys/geom/geom_dev.c:332 #18 0xc04e5b87 in devfs_ioctl_f (fp=0xc32d1474, com=3222037279, data=0xdabf1c1c, cred=0xc4077800, td=0xc354c230) at /usr/src/sys/fs/devfs/devfs_vnops.c:602 #19 0xc3449ea4 in linux_ioctl_cdrom (td=0xc354c230, args=0xdabf1cfc) at file.h:269 #20 0xc344978a in linux_ioctl (td=0xc354c230, args=0xdabf1cfc) at /usr/src/sys/modules/linux/../../compat/linux/linux_ioctl.c:2621 #21 0xc0713495 in syscall (frame=0xdabf1d38) at /usr/src/sys/i386/i386/trap.c:1090 #22 0xc07006d0 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:255 #23 0x00000033 in ?? () -- Andriy Gapon
Andriy Gapon
2009-May-18 14:09 UTC
stack abuse by linux_ioctl_cdrom [Was: double-fault in linux ioctl for atapicam cd device]
on 18/05/2009 16:41 Andriy Gapon said the following:> This is recent i386 stable/7. The problem is 100% reproducible with the same stack > trace. The problem happens when I start a Linux program that performs a check on a > cd-rom device (atapi cd-rom that is presented as cd0 by atapicam driver). > I examined the crash dump in kgdb but couldn't find anything peculiar in the > variables, so I guess that this must be stack overflow or something like that. > > In fact, difference between values in %esp in frame 4 and frame 22 is 7500 which > is quite close to KSTACK_PAGES * PAGE_SIZE for i386. > If I did my calculations correctly then it seems that linux_ioctl_cdrom uses > slightly more than 4K of stack, then cam_periph_error uses 828 bytes, > scsi_command_string uses 696, cam_error_print uses 540.In fact almost all of stack usage in linux_ioctl_cdrom comes from struct dvd_struct (2060 bytes) and l_dvd_struct (2056 bytes) variables declared on stack (for LINUX_DVD_READ_STRUCT case). Not sure what's the best way to fix - move to heap?> The backtrace is below. I am keeping the dump. > > (kgdb) bt > #0 doadump () at pcpu.h:196 > #1 0xc0556523 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:418 > #2 0xc055676f in panic (fmt=Variable "fmt" is not available. > ) at /usr/src/sys/kern/kern_shutdown.c:574 > #3 0xc0712b69 in dblfault_handler () at /usr/src/sys/i386/i386/trap.c:972 > #4 0xc058380a in kvprintf (fmt=0xc075d204 " ", func=0xc05828e0 <snprintf_func>, > arg=0xdabf0120, radix=10, ap=0xdabf015c "B") at /usr/src/sys/kern/subr_prf.c:823 > #5 0xc0583c0b in vsnprintf (str=0xdabf03eb "", size=49, format=0xc075d202 "%x ", > ap=0xdabf0158 "B") at /usr/src/sys/kern/subr_prf.c:483 > #6 0xc0583cf9 in snprintf (str=0xdabf03eb "", size=49, format=0xc075d202 "%x ") > at /usr/src/sys/kern/subr_prf.c:467 > #7 0xc0449dc0 in scsi_cdb_string (cdb_ptr=0xc3187c9c "B", cdb_string=0xdabf03eb > "", len=49) at /usr/src/sys/cam/scsi/scsi_all.c:2943 > #8 0xc0449ffd in scsi_command_string (csio=0xc3187c00, sb=0xdabf0440) at > /usr/src/sys/cam/scsi/scsi_all.c:3031 > #9 0xc043db61 in cam_error_string (ccb=0xc3187c00, str=0xdabf04bc > "(cd0:ata0:0:0:0): ", str_len=512, flags=Variable "flags" is not available. > ) at /usr/src/sys/cam/cam.c:262 > #10 0xc043dcb4 in cam_error_print (ccb=0xc3187c00, flags=CAM_ESF_ALL, > proto_flags=CAM_EPF_ALL) at /usr/src/sys/cam/cam.c:341 > #11 0xc043e500 in cam_periph_error (ccb=0xc3187c00, camflags=CAM_RETRY_SELTO, > sense_flags=1, save_ccb=0xc32b3034) at /usr/src/sys/cam/cam_periph.c:1548 > #12 0xc044bcb0 in cderror (ccb=0xc3187c00, cam_flags=2, sense_flags=1) at > /usr/src/sys/cam/scsi/scsi_cd.c:3133 > #13 0xc043ee1a in cam_periph_runccb (ccb=0xc3187c00, error_routine=0xc044bc10 > <cderror>, camflags=CAM_RETRY_SELTO, sense_flags=1, ds=0xc329fb40) at > /usr/src/sys/cam/cam_periph.c:902 > #14 0xc044c2cb in cdrunccb (ccb=0xc3187c00, error_routine=0xc044bc10 <cderror>, > cam_flags=2, sense_flags=1) at /usr/src/sys/cam/scsi/scsi_cd.c:1318 > #15 0xc044f684 in cdioctl (dp=0xc3286000, cmd=3222037279, addr=0xdabf1c1c, flag=5, > td=0xc354c230) at /usr/src/sys/cam/scsi/scsi_cd.c:3227 > #16 0xc04f9fa4 in g_disk_ioctl (pp=0xc32c7000, cmd=3222037279, data=0xdabf1c1c, > fflag=5, td=0xc354c230) at /usr/src/sys/geom/geom_disk.c:231 > #17 0xc04f92dd in g_dev_ioctl (dev=0xc33faa00, cmd=3222037279, data=0xdabf1c1c > "\001\001", fflag=5, td=0xc354c230) at /usr/src/sys/geom/geom_dev.c:332 > #18 0xc04e5b87 in devfs_ioctl_f (fp=0xc32d1474, com=3222037279, data=0xdabf1c1c, > cred=0xc4077800, td=0xc354c230) at /usr/src/sys/fs/devfs/devfs_vnops.c:602 > #19 0xc3449ea4 in linux_ioctl_cdrom (td=0xc354c230, args=0xdabf1cfc) at file.h:269 > #20 0xc344978a in linux_ioctl (td=0xc354c230, args=0xdabf1cfc) at > /usr/src/sys/modules/linux/../../compat/linux/linux_ioctl.c:2621 > #21 0xc0713495 in syscall (frame=0xdabf1d38) at /usr/src/sys/i386/i386/trap.c:1090 > #22 0xc07006d0 in Xint0x80_syscall () at /usr/src/sys/i386/i386/exception.s:255 > #23 0x00000033 in ?? () >-- Andriy Gapon
This is a patch that I currently use to fix the problem for myself - both 2KB structs are allocated on the heap. I am not sure what is the proper style for chained calls using chained if-else, but I think that the chaining is the best way to organize that piece of code, so that there is only one exit point from case-block to make sure that FREE is always called. diff --git a/sys/compat/linux/linux_ioctl.c b/sys/compat/linux/linux_ioctl.c index 8e42ec1..7e3453c 100644 --- a/sys/compat/linux/linux_ioctl.c +++ b/sys/compat/linux/linux_ioctl.c @@ -1538,23 +1538,28 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) /* LINUX_CDROMAUDIOBUFSIZ */ case LINUX_DVD_READ_STRUCT: { - l_dvd_struct lds; - struct dvd_struct bds; + l_dvd_struct *p_lds; + struct dvd_struct *p_bds; - error = copyin((void *)args->arg, &lds, sizeof(lds)); - if (error) - break; - error = linux_to_bsd_dvd_struct(&lds, &bds); - if (error) - break; - error = fo_ioctl(fp, DVDIOCREADSTRUCTURE, (caddr_t)&bds, - td->td_ucred, td); - if (error) - break; - error = bsd_to_linux_dvd_struct(&bds, &lds); - if (error) - break; - error = copyout(&lds, (void *)args->arg, sizeof(lds)); + MALLOC(p_lds, l_dvd_struct *, sizeof(*p_lds), + M_LINUX, M_WAITOK); + MALLOC(p_bds, struct dvd_struct *, sizeof(*p_bds), + M_LINUX, M_WAITOK); + if ((error = copyin((void *)args->arg, p_lds, sizeof(*p_lds))) + != 0) + ; /* nothing */ + else if ((error = linux_to_bsd_dvd_struct(p_lds, p_bds)) != 0) + ; /* nothing */ + else if ((error = fo_ioctl(fp, DVDIOCREADSTRUCTURE, + (caddr_t)p_bds, td->td_ucred, td)) != 0) + ; /* nothing */ + else if ((error = bsd_to_linux_dvd_struct(p_bds, p_lds)) != 0) + ; /* nothing */ + else + error = copyout(p_lds, (void *)args->arg, + sizeof(*p_lds)); + FREE(p_bds, M_LINUX); + FREE(p_lds, M_LINUX); break; } -- Andriy Gapon