Hi,
Trying to start MDS/MDT and OST on the same node ...
MDS /MDT combo appears to be working fine ...
[root@scratchy ~]# cat /proc/fs/lustre/devices
0 UP mgs MGS MGS 5
1 UP mgc MGC10.3.1.40@tcp 3c892ff4-3c64-c307-517e-f0d06ecfde8d 5
2 UP mdt MDS MDS_uuid 3
3 UP lov testfs-mdtlov testfs-mdtlov_UUID 4
4 UP mds testfs-MDT0000 testfs-MDT0000_UUID 3
[root@scratchy ~]#
Has anyone run into or had experience with this issue? OST won''t start
for me in 1.5.97.
[root@scratchy ~]# mkfs.lustre --reformat --fsname=testfs --ost
--mgsnode=10.3.1.40@tcp /dev/sdb1
Permanent disk data:
Target: testfs-OSTffff
Index: unassigned
Lustre FS: testfs
Mount type: ldiskfs
Flags: 0x72
(OST needs_index first_time update )
Persistent mount opts: errors=remount-ro,extents,mballoc
Parameters: mgsnode=10.3.1.40@tcp
device size = 8191MB
formatting backing filesystem ldiskfs on /dev/sdb1
target name testfs-OSTffff
4k blocks 0
options -J size=324 -i 16384 -I 256 -q -O dir_index -F
mkfs_cmd = mkfs.ext2 -j -b 4096 -L testfs-OSTffff -J size=324 -i 16384 -I 256
-q -O dir_index -F /dev/sdb1
Writing CONFIGS/mountdata
[root@scratchy ~]# mount -t lustre /dev/sdb1 /mnt/test/ost0
mount.lustre: mount /dev/sdb1 at /mnt/test/ost0 failed: Invalid argument
This may have multiple causes.
Are the mount options correct?
Check the syslog for more info.
[root@scratchy ~]#
Coresponding console messages ...
LDISKFS-fs: file extents enabled
LDISKFS-fs: mballoc enabled
LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost
thread #512: rc -24
LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost
thread #513: rc -24
LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost
thread #514: rc -24
LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost
thread #515: rc -24
LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost
thread #516: rc -24
LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost
thread #517: rc -24
LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost
thread #518: rc -24
LustreError: 10154:0:(obd_config.c:299:class_setup()) setup OSS failed (-22)
LustreError: 10154:0:(obd_mount.c:450:lustre_start_simple()) OSS setup error -22
LustreError: 10154:0:(obd_mount.c:1030:server_start_targets()) failed to start
OSS: -22
LustreError: 10154:0:(obd_mount.c:1560:server_fill_super()) Unable to start
targets: -22
LustreError: 10154:0:(mgc_request.c:150:config_log_find()) can''t get
log testfs-OSTffff
LustreError: 10154:0:(obd_mount.c:1347:server_put_super()) no obd testfs-OSTffff
LustreError: 10154:0:(obd_mount.c:118:server_deregister_mount()) testfs-OSTffff
not registered
LDISKFS-fs: mballoc: 0 blocks 0 reqs (0 success)
LDISKFS-fs: mballoc: 0 extents scanned, 0 goal hits, 0 2^N hits, 0 breaks
LDISKFS-fs: mballoc: 0 generated and it took 0
Lustre: server umount testfs-OSTffff complete
LustreError: 10154:0:(obd_mount.c:1911:lustre_fill_super()) Unable to mount
(-22)
[root@scratchy ~]# lsmod
Module Size Used by
loop 17552 1
ost 91068 0
mds 441944 2
fsfilt_ldiskfs 50944 2
mgs 135224 1
mgc 52008 2 mgs
ldiskfs 218784 2 fsfilt_ldiskfs
lustre 402192 0
lov 272200 2 lustre
lquota 110096 4 mds
mdc 99928 1 lustre
ksocklnd 119624 1
ptlrpc 601864 8 ost,mds,mgs,mgc,lustre,lov,lquota,mdc
obdclass 498888 11
ost,mds,fsfilt_ldiskfs,mgs,mgc,lustre,lov,lquota,mdc,ptlrpc
lnet 226696 4 lustre,ksocklnd,ptlrpc,obdclass
lvfs 40712 12
ost,mds,fsfilt_ldiskfs,mgs,mgc,lustre,lov,lquota,mdc,ptlrpc,obdclass
libcfs 118648 14
ost,mds,fsfilt_ldiskfs,mgs,mgc,lustre,lov,lquota,mdc,ksocklnd,ptlrpc,obdclass,lnet,lvfs
ipv6 284384 22
autofs4 24840 0
i2c_dev 14208 0
i2c_core 29184 1 i2c_dev
nfs 245808 3
lockd 78768 2 nfs
nfs_acl 5632 1 nfs
sunrpc 176248 7 nfs,lockd,nfs_acl
dm_multipath 22544 0
ohci_hcd 24848 0
ehci_hcd 33924 0
floppy 66384 0
iscsi_init 14976 0
dm_snapshot 19264 0
dm_zero 4224 0
dm_mirror 29720 0
ext3 140688 1
jbd 76472 3 fsfilt_ldiskfs,ldiskfs,ext3
dm_mod 68416 4 dm_multipath,dm_snapshot,dm_zero,dm_mirror
iscsi_sfnet 95388 4
scsi_transport_iscsi 13952 1 iscsi_sfnet
md5 6272 1
crc32c 3840 54
libcrc32c 4480 1 crc32c
sd_mod 25112 4
scsi_mod 142032 2 iscsi_sfnet,sd_mod
e100 43904 0
mii 7680 1 e100
[root@scratchy ~]#
Cheers,
Paul
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
http://mail.clusterfs.com/pipermail/lustre-discuss/attachments/20070220/4f22304f/attachment.html
On Feb 20, 2007 10:28 -0600, Paul Bertels wrote:> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #517: rc -24 > LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #518: rc -24It''s a bit strange that it is trying to start so many threads at startup time. How much RAM and how many CPUs on this node? Cheers, Andreas -- Andreas Dilger Principal Software Engineer Cluster File Systems, Inc.
Andreas Dilger wrote:> On Feb 20, 2007 10:28 -0600, Paul Bertels wrote: > >> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #517: rc -24 >> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #518: rc -24 >> > > It''s a bit strange that it is trying to start so many threads at startup > time. How much RAM and how many CPUs on this node? >Indeed strange, since there is a hard-coded max of 512. And that error message seems to indicate that min > max. Hmm - oops. As andreas suspected, min can be arbitrarily large depending on number of cpus and memory. Need to fix that immediately. In the meantime, you can set this in modprobe.conf: options ost ost_num_threads=512
Thanks for the quick reply ... this node is one of our CMs it has 4 sockets, each with a dual core 875 Opteron. [root@scratchy ~]# cat /proc/meminfo | fgrep Mem MemTotal: 13833540 kB MemFree: 12997216 kB [root@scratchy ~]# [root@scratchy ~]# cat /proc/cpuinfo | fgrep processor processor : 0 processor : 1 processor : 2 processor : 3 processor : 4 processor : 5 processor : 6 processor : 7 [root@scratchy ~]# -----Original Message----- From: Andreas Dilger [mailto:adilger@clusterfs.com] Sent: Wed 2/21/2007 3:20 AM To: Paul Bertels Cc: lustre-discuss@clusterfs.com Subject: Re: [Lustre-discuss] OST won''t start in 1.5.97 On Feb 20, 2007 10:28 -0600, Paul Bertels wrote:> LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #517: rc -24 > LustreError: 10154:0:(service.c:1072:ptlrpc_start_threads()) cannot start ll_ost thread #518: rc -24It''s a bit strange that it is trying to start so many threads at startup time. How much RAM and how many CPUs on this node? Cheers, Andreas -- Andreas Dilger Principal Software Engineer Cluster File Systems, Inc. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://mail.clusterfs.com/pipermail/lustre-discuss/attachments/20070221/db40a864/attachment.html
Hi Nathaniel, Changing the oss_num_threads=500 for the ost module fixed the issue. Thanks very much for your and Andreas''s help. Looking forward to 1.6.0 final. I can now start the ost and mount from the client. (to reiterate this machine has 8 cpus and 16 Gigs of RAM) [root@scratchy ~]# mount -t lustre /dev/sdb1 /mnt/test/ost0 [root@scratchy ~]# mount -t lustre 10.3.1.40:/testfs /mnt/client/ [root@scratchy ~]# mount /dev/sda1 on / type ext3 (rw) none on /proc type proc (rw) none on /sys type sysfs (rw) none on /dev/pts type devpts (rw,gid=5,mode=620) usbfs on /proc/bus/usb type usbfs (rw) none on /tmp type tmpfs (rw) none on /dev/shm type tmpfs (rw) none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw) sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw) lab777-ecp:/share on /lab777/share type nfs (rw,hard,intr,bg,tcp,nfsvers=3,rsize=32768,wsize=32768,addr=10.3.254.10) lab777-ecp:/opt on /opt type nfs (rw,hard,intr,bg,tcp,nfsvers=3,rsize=32768,wsize=32768,addr=10.3.254.10) lab777-ecp:/home on /home type nfs (rw,hard,intr,bg,tcp,nfsvers=3,rsize=32768,wsize=32768,addr=10.3.254.10) /dev/sdd1 on /mnt/test/mdt type lustre (rw) /dev/sdb1 on /mnt/test/ost0 type lustre (rw) 10.3.1.40@tcp:/testfs on /mnt/client type lustre (rw) [root@scratchy ~]# cat /etc/modprobe.conf alias eth0 e100 alias scsi_hostadapter iscsi_init alias usb-controller ehci-hcd alias usb-controller1 ohci-hcd #options lnet networks=tcp options lnet ip2nets="tcp0(eth0) 10.3.1.[1-254]" options ost oss_num_threads=500 [root@scratchy ~]# cat /proc/fs/lustre/devices 0 UP mgs MGS MGS 5 1 UP mgc MGC10.3.1.40@tcp 8eb860ba-9a26-3dba-aa03-98d9794e7acd 5 2 UP mdt MDS MDS_uuid 3 3 UP lov testfs-mdtlov testfs-mdtlov_UUID 4 4 UP mds testfs-MDT0000 testfs-MDT0000_UUID 5 5 UP ost OSS OSS_uuid 3 6 UP obdfilter testfs-OST0000 testfs-OST0000_UUID 7 7 UP osc testfs-OST0000-osc testfs-mdtlov_UUID 5 8 UP lov testfs-clilov-00000101d11abc00 042be540-6eb1-787a-9cbf-63781a32b170 4 9 UP mdc testfs-MDT0000-mdc-00000101d11abc00 042be540-6eb1-787a-9cbf-63781a32b170 5 10 UP osc testfs-OST0000-osc-00000101d11abc00 042be540-6eb1-787a-9cbf-63781a32b170 5 [root@scratchy ~]# Cheers, Paul -------------- next part -------------- An HTML attachment was scrubbed... URL: http://mail.clusterfs.com/pipermail/lustre-discuss/attachments/20070221/191b974a/attachment.html