Hi all,
Problem solved. Some nodes had iptables blocking port 988 and some
didn''t. :-)
Scott
On Apr 13, 2007, at 10:49 PM, Scott Atchley wrote:
> Hi all,
>
> I am trying to set up Lustre using TCP. I have the following in /
> etc/modprobe.conf:
>
> options lnet networks="tcp0(eth2)"
>
> to specify the third NIC only. I have two OSSs and one MDS. They
> startup and see each fine. My XML is pasted below.
>
> When I try to have a client start with:
>
> # lconf --node client lustre-fs.xml
>
> it hangs at:
>
> + mount -t lustre_lite -o
> osc=lov1,mdc=MDC_compute-0-1.local_mds1_MNT_client lustre-fs /mnt/
> lustre
>
> If I check its NIDs, I see:
>
> # cat /proc/sys/lnet/nis
> nid refs peer max tx min
> 0@lo 2 0 0 0 0
> 192.168.1.254@tcp 2 8 256 256 255
>
> which is the correct address for this client. If instead of using
> lconf, I simply modprobe lnet and run lctl, then I try to ping the
> MDS. It fails:
>
> # lctl
> lctl > network up
> LNET configured
> lctl > network tcp
> lctl > ping 192.168.1.250
> failed to ping 192.168.1.250@tcp: Input/output error
>
> Yet, I can ping the node on the command line:
>
> # ping -s 9000 192.168.1.250
> PING 192.168.1.250 (192.168.1.250) 9000(9028) bytes of data.
> 9008 bytes from 192.168.1.250: icmp_seq=0 ttl=64 time=0.142 ms
>
> # ping -s 9000 nas-0-0-m
> PING nas-0-0-m.local (192.168.1.250) 9000(9028) bytes of data.
> 9008 bytes from nas-0-0-m.local (192.168.1.250): icmp_seq=0 ttl=64
> time=0.111 ms
>
> If I try using lctl on the MDS to ping the client, it fails as well
> but I can ping the two OSSs.
>
> Any suggestions?
>
> Thanks,
>
> Scott
>
>
> <?xml version=''1.0''
encoding=''UTF-8''?>
> <lustre version=''2003070801''
mtime=''1176488849''>
> <ldlm name=''ldlm''
uuid=''ldlm_UUID''/>
> <node uuid=''client_UUID''
name=''client''>
> <profile_ref uuidref=''PROFILE_client_UUID''/>
> <network uuid=''NET_client_lnet_UUID''
nettype=''lnet''
> name=''NET_client_lnet''>
> <nid>*</nid>
> <clusterid>0</clusterid>
> <port>988</port>
> </network>
> </node>
> <profile uuid=''PROFILE_client_UUID''
name=''PROFILE_client''>
> <ldlm_ref uuidref=''ldlm_UUID''/>
> <network_ref uuidref=''NET_client_lnet_UUID''/>
> <mountpoint_ref uuidref=''MNT_client_UUID''/>
> </profile>
> <node uuid=''storenode1-m_UUID''
name=''storenode1-m''>
> <profile_ref
uuidref=''PROFILE_storenode1-m_UUID''/>
> <network uuid=''NET_storenode1-m_lnet_UUID''
nettype=''lnet''
> name=''NET_storenode1-m_lnet''>
> <nid>192.168.1.2</nid>
> <clusterid>0</clusterid>
> <port>988</port>
> </network>
> </node>
> <profile uuid=''PROFILE_storenode1-m_UUID''
> name=''PROFILE_storenode1-m''>
> <ldlm_ref uuidref=''ldlm_UUID''/>
> <network_ref
uuidref=''NET_storenode1-m_lnet_UUID''/>
> <osd_ref uuidref=''OSD_ost1_storenode1-m_UUID''/>
> </profile>
> <node uuid=''storenode2-m_UUID''
name=''storenode2-m''>
> <profile_ref
uuidref=''PROFILE_storenode2-m_UUID''/>
> <network uuid=''NET_storenode2-m_lnet_UUID''
nettype=''lnet''
> name=''NET_storenode2-m_lnet''>
> <nid>192.168.1.4</nid>
> <clusterid>0</clusterid>
> <port>988</port>
> </network>
> </node>
> <profile uuid=''PROFILE_storenode2-m_UUID''
> name=''PROFILE_storenode2-m''>
> <ldlm_ref uuidref=''ldlm_UUID''/>
> <network_ref
uuidref=''NET_storenode2-m_lnet_UUID''/>
> <osd_ref uuidref=''OSD_ost2_storenode2-m_UUID''/>
> </profile>
> <node uuid=''nas-0-0-m_UUID''
name=''nas-0-0-m''>
> <profile_ref uuidref=''PROFILE_nas-0-0-m_UUID''/>
> <network uuid=''NET_nas-0-0-m_lnet_UUID''
nettype=''lnet''
> name=''NET_nas-0-0-m_lnet''>
> <nid>192.168.1.250</nid>
> <clusterid>0</clusterid>
> <port>988</port>
> </network>
> </node>
> <profile uuid=''PROFILE_nas-0-0-m_UUID''
name=''PROFILE_nas-0-0-m''>
> <ldlm_ref uuidref=''ldlm_UUID''/>
> <network_ref
uuidref=''NET_nas-0-0-m_lnet_UUID''/>
> <mdsdev_ref uuidref=''MDD_mds1_nas-0-0-m_UUID''/>
> </profile>
> <mds uuid=''mds1_UUID''
name=''mds1''>
> <active_ref uuidref=''MDD_mds1_nas-0-0-m_UUID''/>
> <lovconfig_ref uuidref=''LVCFG_lov1_UUID''/>
> <filesystem_ref uuidref=''FS_fsname_UUID''/>
> </mds>
> <mdsdev uuid=''MDD_mds1_nas-0-0-m_UUID''
name=''MDD_mds1_nas-0-0-m''>
> <fstype>ldiskfs</fstype>
> <devpath>/var/run/lustre/mds</devpath>
> <autoformat>yes</autoformat>
> <devsize>5000000</devsize>
> <journalsize>0</journalsize>
> <inodesize>0</inodesize>
> <node_ref uuidref=''nas-0-0-m_UUID''/>
> <target_ref uuidref=''mds1_UUID''/>
> </mdsdev>
> <lov stripesize=''4194304''
stripecount=''-1'' stripepattern=''0''
> uuid=''lov1_UUID'' name=''lov1''>
> <mds_ref uuidref=''mds1_UUID''/>
> <obd_ref uuidref=''ost1_UUID''/>
> <obd_ref uuidref=''ost2_UUID''/>
> </lov>
> <lovconfig uuid=''LVCFG_lov1_UUID''
name=''LVCFG_lov1''>
> <lov_ref uuidref=''lov1_UUID''/>
> </lovconfig>
> <ost uuid=''ost1_UUID''
name=''ost1''>
> <active_ref
uuidref=''OSD_ost1_storenode1-m_UUID''/>
> </ost>
> <osd osdtype=''obdfilter''
uuid=''OSD_ost1_storenode1-m_UUID''
> name=''OSD_ost1_storenode1-m''>
> <target_ref uuidref=''ost1_UUID''/>
> <node_ref uuidref=''storenode1-m_UUID''/>
> <fstype>ldiskfs</fstype>
> <devpath>/dev/sda1</devpath>
> <autoformat>no</autoformat>
> <devsize>0</devsize>
> <journalsize>0</journalsize>
> <inodesize>0</inodesize>
> </osd>
> <ost uuid=''ost2_UUID''
name=''ost2''>
> <active_ref
uuidref=''OSD_ost2_storenode2-m_UUID''/>
> </ost>
> <osd osdtype=''obdfilter''
uuid=''OSD_ost2_storenode2-m_UUID''
> name=''OSD_ost2_storenode2-m''>
> <target_ref uuidref=''ost2_UUID''/>
> <node_ref uuidref=''storenode2-m_UUID''/>
> <fstype>ldiskfs</fstype>
> <devpath>/dev/sda1</devpath>
> <autoformat>no</autoformat>
> <devsize>0</devsize>
> <journalsize>0</journalsize>
> <inodesize>0</inodesize>
> </osd>
> <filesystem uuid=''FS_fsname_UUID''
name=''FS_fsname''>
> <mds_ref uuidref=''mds1_UUID''/>
> <obd_ref uuidref=''lov1_UUID''/>
> </filesystem>
> <mountpoint uuid=''MNT_client_UUID''
name=''MNT_client''>
> <filesystem_ref uuidref=''FS_fsname_UUID''/>
> <path>/mnt/lustre</path>
> </mountpoint>
> </lustre>
>
>
> _______________________________________________
> Lustre-discuss mailing list
> Lustre-discuss@clusterfs.com
> https://mail.clusterfs.com/mailman/listinfo/lustre-discuss
>