Jon Zhu
2011-Jun-11 15:02 UTC
[Lustre-discuss] mkfs.lustre hangs the system (SUSE Linux 11 sp1, Lustre MGS/MDT)
Hi, Does anyone know why mkfs.lustre hangs the system? This problem occurred on Lustre 1.8.5 SUSE Linux 11 x86_64 version, h/w platform is Amazon EC2. Here''s the full strace log: ldiskfs # strace mkfs.lustre --fsname=temp --mgs --mdt /dev/sdf execve("/usr/sbin/mkfs.lustre", ["mkfs.lustre", "--fsname=temp", "--mgs", "--mdt ", "/dev/sdf"], [/* 51 vars */]) = 0 brk(0) = 0x612000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124b4000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=50926, ...}) = 0 mmap(NULL, 50926, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f30124a7000 close(3) = 0 open("/lib64/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340\354\1\0\0\0\0\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=1661454, ...}) = 0 mmap(NULL, 3528776, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) 0x7f 3011f39000 fadvise64(3, 0, 3528776, POSIX_FADV_WILLNEED) = 0 mprotect(0x7f301208d000, 2097152, PROT_NONE) = 0 mmap(0x7f301228d000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENY WRITE, 3, 0x154000) = 0x7f301228d000 mmap(0x7f3012292000, 18504, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANON YMOUS, -1, 0) = 0x7f3012292000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124a6000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124a5000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124a4000 arch_prctl(ARCH_SET_FS, 0x7f30124a5700) = 0 mprotect(0x7f301228d000, 16384, PROT_READ) = 0 mprotect(0x608000, 4096, PROT_READ) = 0 mprotect(0x7f30124b5000, 4096, PROT_READ) = 0 munmap(0x7f30124a7000, 50926) = 0 open("/proc/sys/kernel/osrelease", O_RDONLY) = 3 read(3, "2.6.", 4) = 4 close(3) = 0 access("/dev/sdf", F_OK) = 0 stat("/dev/sdf", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 80), ...}) = 0 access("/usr/sbin/l_getgroups", R_OK|X_OK) = 0 fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124b3000 write(1, "\n", 1 ) = 1 write(1, " Permanent disk data:\n", 24 Permanent disk data: ) = 24 write(1, "Target: temp-MDTffff\n", 25Target: temp-MDTffff ) = 25 write(1, "Index: unassigned\n", 23Index: unassigned ) = 23 write(1, "Lustre FS: temp\n", 17Lustre FS: temp ) = 17 write(1, "Mount type: ldiskfs\n", 20Mount type: ldiskfs ) = 20 write(1, "Flags: 0x75\n", 17Flags: 0x75 ) = 17 write(1, " (MDT MGS needs_ind"..., 55 (MDT MGS needs_i ndex first_time update ) ) = 55 write(1, "Persistent mount opts: iopen_nop"..., 65Persistent mount opts: iopen_n opriv,user_xattr,errors=remount-ro ) = 65 write(1, "Parameters: mdt.group_upcall=/us"..., 51Parameters: mdt.group_upcall=/ usr/sbin/l_getgroups ) = 51 write(1, "\n", 1 ) = 1 brk(0) = 0x612000 brk(0x633000) = 0x633000 open("/etc/mtab", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=349, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124b2000 read(3, "/dev/sda1 / ext3 rw,acl,user_xat"..., 4096) = 349 read(3, "", 4096) = 0 close(3) = 0 munmap(0x7f30124b2000, 4096) = 0 syscall_293(0x7fff65afd9c0, 0x80000, 0x406af9, 0x7f30124a5700, 0, 0x612100, 0x61 1820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x61 1820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x61 1820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x61 1820, 0x611820) 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, chil d_tidptr=0x7f30124a59d0) 3978 close(4) = 0 fcntl(3, F_SETFD, 0) = 0 fstat(3, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124b2000 read(3, "", 4096) = 0 --- SIGCHLD (Child exited) @ 0 (0) --- read(3, "", 4096) = 0 close(3) = 0 wait4(3978, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], 0, NULL) = 3978 munmap(0x7f30124b2000, 4096) = 0 syscall_293(0x7fff65afd9c0, 0x80000, 0x406af9, 0x7f30124a5700, 0x1, 0x612100, 0x 611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x 611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x 611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x611820, 0x 611820, 0x611820) = 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, chil d_tidptr=0x7f30124a59d0) 3981 close(4) = 0 fcntl(3, F_SETFD, 0) = 0 fstat(3, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124b2000 read(3, "", 4096) = 0 --- SIGCHLD (Child exited) @ 0 (0) --- read(3, "", 4096) = 0 close(3) = 0 wait4(3981, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], 0, NULL) = 3981 munmap(0x7f30124b2000, 4096) = 0 write(1, "checking for existing Lustre dat"..., 45checking for existing Lustre d ata: not found ) = 45 open("/dev/sdf", O_RDONLY) = 3 ioctl(3, BLKGETSIZE64, 0x7fff65afc910) = 0 close(3) = 0 write(1, "device size = 5120MB\n", 21device size = 5120MB ) = 21 syscall_293(0x7fff65afb870, 0x80000, 0x406af9, 0x7f30124a5700, 0x1, 0x612100, 0x 609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x 609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x 609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x609210, 0x 609210, 0x609210) = 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, chil d_tidptr=0x7f30124a59d0) 3984 close(4) = 0 fcntl(3, F_SETFD, 0) = 0 fstat(3, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 0x7f3 0124b2000 read(3, "debugfs 1.41.9 (22-Aug-2009)\n", 4096) = 29 read(3, "Supported feature: uninit_groups"..., 4096) = 33 read(3, "", 4096) = 0 uname({sys="Linux", node="ip-10-196-34-143", ...}) = 0 write(1, "2 6 32\n", 72 6 32 ) = 7 write(1, "formatting backing filesystem ld"..., 50formatting backing filesystem ldiskfs on /dev/sdf ) = 50 write(1, "\ttarget name temp-MDTffff\n", 27 target name temp-MDTffff ) = 27 write(1, "\t4k blocks 1310720\n", 23 4k blocks 1310720 ) = 23 write(1, "\toptions -J size=204 -i 4"..., 76 options -J size=2 04 -i 4096 -I 512 -q -O dir_index,uninit_groups -F ) = 76 write(1, "mkfs_cmd = mke2fs -j -b 4096 -L "..., 123mkfs_cmd = mke2fs -j -b 4096 -L temp-MDTffff -J size=204 -i 4096 -I 512 -q -O dir_index,uninit_groups -F /dev/sdf 1310720 ) = 123 gettimeofday({1307462203, 738431}, NULL) = 0 getpid() = 3977 open("/tmp/run_command_logiIQsXq", O_RDWR|O_CREAT|O_EXCL, 0600) = 4 close(4) = 0 rt_sigaction(SIGINT, {0x1, [], SA_RESTORER, 0x7f3011f6b9e0}, {SIG_DFL, [], 0}, 8 ) = 0 rt_sigaction(SIGQUIT, {0x1, [], SA_RESTORER, 0x7f3011f6b9e0}, {SIG_DFL, [], 0}, 8) = 0 rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0 clone(child_stack=0, flags=CLONE_PARENT_SETTID|SIGCHLD, parent_tidptr=0x7fff65af c828) 3986 wait4(3986, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 3986 rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x7f3011f6b9e0}, NULL, 8) 0 rt_sigaction(SIGQUIT, {SIG_DFL, [], SA_RESTORER, 0x7f3011f6b9e0}, NULL, 8) 0 rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 --- SIGCHLD (Child exited) @ 0 (0) --- unlink("/tmp/run_command_logiIQsXq") = 0 mkdir("/tmp/mntFdZiJN", 0700) = 0 mount("/dev/sdf", "/tmp/mntFdZiJN", "ldiskfs", 0, "iopen_nopriv,user_xattr,error s=r"... Thanks, -Jon jon.zhu at gmail.com -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.lustre.org/pipermail/lustre-discuss/attachments/20110611/f1e8afaa/attachment-0001.html