Hi, Trying to start MDS/MDT and OST on the same node ... MDS /MDT combo appears to be working fine ... [root@scratchy ~]# cat /proc/fs/lustre/devices 0 UP mgs MGS MGS 5 1 UP mgc MGC10.3.1.40@tcp 3c892ff4-3c64-c307-517e-f0d06ecfde8d 5 2 UP mdt MDS MDS_uuid 3 3 UP lov testfs-mdtlov testfs-mdtlov_UUID 4 4 UP mds testfs-MDT0000 testfs-MDT0000_UUID 3 [root@scratchy ~]# Has anyone run into or had experience with this issue? OST won''t start for me in 1.5.97. [root@scratchy ~]# mkfs.lustre --reformat --fsname=testfs --ost --mgsnode=10.3.1.40@tcp /dev/sdb1 Permanent disk data: Target: testfs-OSTffff Index: unassigned Lustre FS: testfs Mount type: ldiskfs Flags: 0x72 (OST needs_index first_time update ) Persistent mount opts: errors=remount-ro,extents,mballoc Parameters: mgsnode=10.3.1.40@tcp device size = 8191MB formatting backing filesystem ldiskfs on /dev/sdb1 target name testfs-OSTffff 4k blocks 0 options -J size=324 -i 16384 -I 256 -q -O dir_index -F mkfs_cmd = mkfs.ext2 -j -b 4096 -L testfs-OSTffff -J size=324 -i 16384 -I 256 -q -O dir_index -F /dev/sdb1 Writing CONFIGS/mountdata [root@scratchy ~]# mount -t lustre /dev/sdb1 /mnt/test/ost0 mount.lustre: mount /dev/sdb1 at /mnt/test/ost0 failed: Invalid argument This may have multiple causes. Are the mount options correct? Check the syslog for more info. [root@scratchy ~]# Coresponding console messages ... LDISKFS-fs: file extents enabled LDISKFS-fs: mballoc enabled LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #512: rc -24 LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #513: rc -24 LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #514: rc -24 LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #515: rc -24 LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #516: rc -24 LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #517: rc -24 LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #518: rc -24 LustreError: 10154:0:(obd_config.c:299:class_setup()) setup OSS failed (-22) LustreError: 10154:0:(obd_mount.c:450:lustre_start_simple()) OSS setup error -22 LustreError: 10154:0:(obd_mount.c:1030:server_start_targets()) failed to start OSS: -22 LustreError: 10154:0:(obd_mount.c:1560:server_fill_super()) Unable to start targets: -22 LustreError: 10154:0:(mgc_request.c:150:config_log_find()) can''t get log testfs-OSTffff LustreError: 10154:0:(obd_mount.c:1347:server_put_super()) no obd testfs-OSTffff LustreError: 10154:0:(obd_mount.c:118:server_deregister_mount()) testfs-OSTffff not registered LDISKFS-fs: mballoc: 0 blocks 0 reqs (0 success) LDISKFS-fs: mballoc: 0 extents scanned, 0 goal hits, 0 2^N hits, 0 breaks LDISKFS-fs: mballoc: 0 generated and it took 0 Lustre: server umount testfs-OSTffff complete LustreError: 10154:0:(obd_mount.c:1911:lustre_fill_super()) Unable to mount (-22) [root@scratchy ~]# lsmod Module Size Used by loop 17552 1 ost 91068 0 mds 441944 2 fsfilt_ldiskfs 50944 2 mgs 135224 1 mgc 52008 2 mgs ldiskfs 218784 2 fsfilt_ldiskfs lustre 402192 0 lov 272200 2 lustre lquota 110096 4 mds mdc 99928 1 lustre ksocklnd 119624 1 ptlrpc 601864 8 ost,mds,mgs,mgc,lustre,lov,lquota,mdc obdclass 498888 11 ost,mds,fsfilt_ldiskfs,mgs,mgc,lustre,lov,lquota,mdc,ptlrpc lnet 226696 4 lustre,ksocklnd,ptlrpc,obdclass lvfs 40712 12 ost,mds,fsfilt_ldiskfs,mgs,mgc,lustre,lov,lquota,mdc,ptlrpc,obdclass libcfs 118648 14 ost,mds,fsfilt_ldiskfs,mgs,mgc,lustre,lov,lquota,mdc,ksocklnd,ptlrpc,obdclass,lnet,lvfs ipv6 284384 22 autofs4 24840 0 i2c_dev 14208 0 i2c_core 29184 1 i2c_dev nfs 245808 3 lockd 78768 2 nfs nfs_acl 5632 1 nfs sunrpc 176248 7 nfs,lockd,nfs_acl dm_multipath 22544 0 ohci_hcd 24848 0 ehci_hcd 33924 0 floppy 66384 0 iscsi_init 14976 0 dm_snapshot 19264 0 dm_zero 4224 0 dm_mirror 29720 0 ext3 140688 1 jbd 76472 3 fsfilt_ldiskfs,ldiskfs,ext3 dm_mod 68416 4 dm_multipath,dm_snapshot,dm_zero,dm_mirror iscsi_sfnet 95388 4 scsi_transport_iscsi 13952 1 iscsi_sfnet md5 6272 1 crc32c 3840 54 libcrc32c 4480 1 crc32c sd_mod 25112 4 scsi_mod 142032 2 iscsi_sfnet,sd_mod e100 43904 0 mii 7680 1 e100 [root@scratchy ~]# Cheers, Paul -------------- next part -------------- An HTML attachment was scrubbed... URL: http://mail.clusterfs.com/pipermail/lustre-discuss/attachments/20070220/4f22304f/attachment.html
On Feb 20, 2007 10:28 -0600, Paul Bertels wrote:> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #517: rc -24 > LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #518: rc -24It''s a bit strange that it is trying to start so many threads at startup time. How much RAM and how many CPUs on this node? Cheers, Andreas -- Andreas Dilger Principal Software Engineer Cluster File Systems, Inc.
Andreas Dilger wrote:> On Feb 20, 2007 10:28 -0600, Paul Bertels wrote: > >> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #517: rc -24 >> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #518: rc -24 >> > > It''s a bit strange that it is trying to start so many threads at startup > time. How much RAM and how many CPUs on this node? >Indeed strange, since there is a hard-coded max of 512. And that error message seems to indicate that min > max. Hmm - oops. As andreas suspected, min can be arbitrarily large depending on number of cpus and memory. Need to fix that immediately. In the meantime, you can set this in modprobe.conf: options ost ost_num_threads=512
Thanks for the quick reply ... this node is one of our CMs it has 4 sockets, each with a dual core 875 Opteron. [root@scratchy ~]# cat /proc/meminfo | fgrep Mem MemTotal: 13833540 kB MemFree: 12997216 kB [root@scratchy ~]# [root@scratchy ~]# cat /proc/cpuinfo | fgrep processor processor : 0 processor : 1 processor : 2 processor : 3 processor : 4 processor : 5 processor : 6 processor : 7 [root@scratchy ~]# -----Original Message----- From: Andreas Dilger [mailto:adilger@clusterfs.com] Sent: Wed 2/21/2007 3:20 AM To: Paul Bertels Cc: lustre-discuss@clusterfs.com Subject: Re: [Lustre-discuss] OST won''t start in 1.5.97 On Feb 20, 2007 10:28 -0600, Paul Bertels wrote:> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #517: rc -24 > LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #518: rc -24It''s a bit strange that it is trying to start so many threads at startup time. How much RAM and how many CPUs on this node? Cheers, Andreas -- Andreas Dilger Principal Software Engineer Cluster File Systems, Inc. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://mail.clusterfs.com/pipermail/lustre-discuss/attachments/20070221/db40a864/attachment.html
Hi Nathaniel, Changing the oss_num_threads=500 for the ost module fixed the issue. Thanks very much for your and Andreas''s help. Looking forward to 1.6.0 final. I can now start the ost and mount from the client. (to reiterate this machine has 8 cpus and 16 Gigs of RAM) [root@scratchy ~]# mount -t lustre /dev/sdb1 /mnt/test/ost0 [root@scratchy ~]# mount -t lustre 10.3.1.40:/testfs /mnt/client/ [root@scratchy ~]# mount /dev/sda1 on / type ext3 (rw) none on /proc type proc (rw) none on /sys type sysfs (rw) none on /dev/pts type devpts (rw,gid=5,mode=620) usbfs on /proc/bus/usb type usbfs (rw) none on /tmp type tmpfs (rw) none on /dev/shm type tmpfs (rw) none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw) sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw) lab777-ecp:/share on /lab777/share type nfs (rw,hard,intr,bg,tcp,nfsvers=3,rsize=32768,wsize=32768,addr=10.3.254.10) lab777-ecp:/opt on /opt type nfs (rw,hard,intr,bg,tcp,nfsvers=3,rsize=32768,wsize=32768,addr=10.3.254.10) lab777-ecp:/home on /home type nfs (rw,hard,intr,bg,tcp,nfsvers=3,rsize=32768,wsize=32768,addr=10.3.254.10) /dev/sdd1 on /mnt/test/mdt type lustre (rw) /dev/sdb1 on /mnt/test/ost0 type lustre (rw) 10.3.1.40@tcp:/testfs on /mnt/client type lustre (rw) [root@scratchy ~]# cat /etc/modprobe.conf alias eth0 e100 alias scsi_hostadapter iscsi_init alias usb-controller ehci-hcd alias usb-controller1 ohci-hcd #options lnet networks=tcp options lnet ip2nets="tcp0(eth0) 10.3.1.[1-254]" options ost oss_num_threads=500 [root@scratchy ~]# cat /proc/fs/lustre/devices 0 UP mgs MGS MGS 5 1 UP mgc MGC10.3.1.40@tcp 8eb860ba-9a26-3dba-aa03-98d9794e7acd 5 2 UP mdt MDS MDS_uuid 3 3 UP lov testfs-mdtlov testfs-mdtlov_UUID 4 4 UP mds testfs-MDT0000 testfs-MDT0000_UUID 5 5 UP ost OSS OSS_uuid 3 6 UP obdfilter testfs-OST0000 testfs-OST0000_UUID 7 7 UP osc testfs-OST0000-osc testfs-mdtlov_UUID 5 8 UP lov testfs-clilov-00000101d11abc00 042be540-6eb1-787a-9cbf-63781a32b170 4 9 UP mdc testfs-MDT0000-mdc-00000101d11abc00 042be540-6eb1-787a-9cbf-63781a32b170 5 10 UP osc testfs-OST0000-osc-00000101d11abc00 042be540-6eb1-787a-9cbf-63781a32b170 5 [root@scratchy ~]# Cheers, Paul -------------- next part -------------- An HTML attachment was scrubbed... URL: http://mail.clusterfs.com/pipermail/lustre-discuss/attachments/20070221/191b974a/attachment.html