On 2010-01-25, at 16:32, Christopher J. Morrone wrote:> Richard Lefebvre wrote:
>> I have been going through the operations manual, but I can''t
find
>> the answer of how to find on which OSS each OST are?
>
> Yes, that seems like an oversight to me too. I''d like to see lfs
> have a new command to make this lookup easier.
Chris, Richard,
I agree it makes sense to have a command to do this. As I write this
I''m offline, so I can''t poke at my test filesystem, but
thought that /
proc/fs/lustre/osc/*/import will contain the OST name and NID(s) to
which they are connected. If this doesn''t contain the information
needed, can you please suggest what else might be added?
There is the "lfs osts" command, which will list the OSTs themselves,
but it doesn''t print the NIDs. That seems like the logical place to
add additional information about the OSTs.
> Attached is my "ostmapping" script that you can run on a lustre
> client. It farms various places: lfs, lctl, /proc, and lists all
> oss nids showing the osts that are currently being exported to the
> client on which you run the command. If the nid looks like an IP
> address, it even tries to lookup the hostname associated with the
> address.
>
> If anyone has a better way to do this, let me know!
It looks useful, though I suspect that a lot of what you are
extracting from /proc can actually be gotten via a single ioctl or
preferably via an llapi_* function.
> Run "ostmapping -h" to see the options. You can also ask it to
just
> show the oss and osts for one filesystem, or just the osts for one
> oss.
>
> Here''s a sample of the its output:
>
> 172.16.2.1 at tcp (tycho1-lnet0)
> lc1-OST0000_UUID 0 ACTIVE
> lc1-OST0008_UUID 8 ACTIVE
> lc1-OST0010_UUID 16 ACTIVE
> 172.16.2.2 at tcp (tycho2-lnet0)
> lc1-OST0001_UUID 1 ACTIVE
> lc1-OST0009_UUID 9 ACTIVE
> lc1-OST0011_UUID 17 ACTIVE
> 172.16.2.3 at tcp (tycho3-lnet0)
> lc1-OST0002_UUID 2 ACTIVE
> lc1-OST000a_UUID 10 ACTIVE
> lc1-OST0012_UUID 18 ACTIVE
> 172.16.2.4 at tcp (tycho4-lnet0)
> lc1-OST0003_UUID 3 ACTIVE
> lc1-OST000b_UUID 11 ACTIVE
> lc1-OST0013_UUID 19 ACTIVE
> #!/usr/bin/env python
>
> import operator
> import os
> import re
> import socket
> import sys
> import subprocess
> from optparse import OptionParser
>
> PROC_LLITE = ''/proc/fs/lustre/llite''
> PROC_OSC = ''/proc/fs/lustre/osc''
>
> def uuid_from_file(path, filename):
> """Works for files that have uuid and status, like
> "ost_server_uuid",
> and also for files that just contain the uuid, like
"uuid"."""
>
> full_path = os.path.join(path, filename)
> return open(full_path).readline().split()[0]
> def get_fs_uuid_by_name():
> """Return a dictionary of filesystem uuids. Key is
filesystem
> name."""
>
> dictionary = {}
> dirs = os.listdir(PROC_LLITE)
> for d in dirs:
> if not os.path.isdir(os.path.join(PROC_LLITE, d)):
> continue
>
> # Assume that the portion of the llite directory name before
> the
> # last "-" is the name of the filesystem.
> filesystem_name = d.rsplit(''-'', 1)[0]
> filesystem_uuid = uuid_from_file(os.path.join(PROC_LLITE, d),
> ''uuid'')
>
> dictionary[filesystem_name] = filesystem_uuid
>
> return dictionary
>
> def get_fs_name_by_mntpt():
> """Return dictionary of filesystem names. Key is mount
point."""
>
> d = {}
> for line in open(''/proc/mounts''):
> field = line.split()
> fs_type = field[2]
> if fs_type != ''lustre'':
> continue
> fs_name = field[0].split('':/'')[1]
> mntpt = field[1]
> d[mntpt] = fs_name
> return d
>
> def path_to_fs_uuid(path):
> fs_uuid_by_name = get_fs_uuid_by_name()
> fs_name_by_mntpt = get_fs_name_by_mntpt()
>
> path = os.path.abspath(path)
> # Keep checking dirname of path until the mount point is found.
> while True:
> if path in fs_name_by_mntpt:
> return fs_uuid_by_name[fs_name_by_mntpt[path]]
> if path == ''/'':
> break
> path = os.path.dirname(path)
> return None
>
> def ostuuid_from_oscuuid(osc_name):
> path = os.path.join(PROC_OSC,
osc_name+''/ost_server_uuid'')
>
> return uuid_from_file(os.path.join(PROC_OSC, osc_name),
> ''ost_server_uuid'')
>
> def get_osts_by_fs_uuid():
> try:
> lfs = subprocess.Popen([''lfs'',
''osts''], stdout=subprocess.PIPE)
> except:
> print >>sys.stderr, ''Unable to execute
"lfs".''
>
> osts_info_dict = {}
> for line in lfs.stdout:
> line = line.strip()
> args = re.split(''[\s:]+'', line)
> if len(args) != 3:
> continue
> ost_idx = args[0]
> ost_uuid = args[1]
> ost_status = args[2]
> osts_info_dict[ost_uuid] = [ost_uuid, ost_idx, ost_status]
>
> # Get all of the osts on the system by walking through
> # all of the oscs on the system. The lctl output listing
> # the oscs associates each osc with a filesystem UUID,
> # and then we can find out which ost the osc is associated with
> # from /proc. Keep in mind that each ost can be in use by
> # multiple oscs, so just ignore the duplicates.
> try:
> lctl = subprocess.Popen([''/usr/sbin/lctl'',
''dl'', ''-t''],
> stdout=subprocess.PIPE)
> except:
> print >>sys.stderr, ''Unable to execute
"/usr/sbin/lctl dl -t".''
> sys.exit(3)
>
> osts_by_filesystem = {}
> for line in lctl.stdout:
> line = line.strip()
> field = line.split()
> device_type = field[2]
> if device_type != ''osc'':
> continue
> osc_name = field[3]
>
> filesystem_uuid = field[4]
> if filesystem_uuid not in osts_by_filesystem:
> osts_by_filesystem[filesystem_uuid] = {}
> osts_dict = osts_by_filesystem[filesystem_uuid]
>
> ost_uuid = ostuuid_from_oscuuid(osc_name)
> if ost_uuid in osts_dict:
> # already found this ost through another osc
> continue
> ost_nid = field[6]
> osts_info_dict[ost_uuid].append(ost_nid)
> osts_dict[ost_uuid] = osts_info_dict[ost_uuid]
>
> return osts_by_filesystem
>
> def nid_sort(nida, nidb):
> a = nida.split(''@'')
> b = nidb.split(''@'')
> if a[1] != b[1]:
> return cmp(a[1], b[1])
> a = a[0].split(''.'')
> b = b[0].split(''.'')
> if len(a) != len(b):
> return cmp(len(a), len(b))
> for (x, y) in zip(a, b):
> if x != y:
> try:
> rc = cmp(int(x), int(y))
> except:
> rc = cmp(x, y)
> return rc
> return 0
>
> def print_ost_mapping(ost_mapping):
> sorted_nids = ost_mapping.keys()
> sorted_nids.sort(cmp=nid_sort)
> for server_nid in sorted_nids:
> addr, domain = server_nid.split(''@'')
> if domain == ''tcp'':
> try:
> hostname = socket.gethostbyaddr(addr)[0]
> except:
> hostname = ''unknown''
> print server_nid, ''('' + hostname +
'')''
> else:
> print server_nid
> for ost in ost_mapping[server_nid]:
> print '' %-20s %-5s %s'' % (ost[0], ost[1],
ost[2])
>
> def main():
> p = OptionParser()
> p.add_option(''-d'', ''--directory'',
dest=''dir'', metavar=''DIR'',
> help=''path to a lustre filesystem'')
> p.add_option(''-s'', ''--server'',
dest=''server_nid'', metavar=''NID'',
> help=''list the osts on a single server'')
> (opts, args) = p.parse_args()
>
> osts_by_fs_uuid = get_osts_by_fs_uuid()
>
> if not osts_by_fs_uuid:
> print ''No lustre filesystems mounted on this
node?''
> return 2
>
> if opts.dir:
> fs_uuid = path_to_fs_uuid(opts.dir)
> if not fs_uuid:
> print ''"''+opts.dir+''" is
not a lustre filesystem''
> return 3
> fs_uuids = [fs_uuid]
> else:
> fs_uuids = osts_by_fs_uuid.keys()
>
> # Make a simple list of the ost info arrays
> osts_list = []
> for fs_uuid in fs_uuids:
> fs_ost_dict = osts_by_fs_uuid[fs_uuid]
> osts_list.extend(fs_ost_dict.values())
>
> # Now make a dict of the arrays of ost info arrays,
> # keyed by oss nids.
> osts_dict = {}
> for ost in osts_list:
> nid = ost[3]
> if nid not in osts_dict:
> osts_dict[nid] = []
> osts_dict[nid].append(ost)
> # Sort the osts by index
> for osts in osts_dict.values():
> osts.sort(key=lambda x: int(x[1]))
>
> if opts.server_nid:
> if opts.server_nid not in osts_dict:
> print ''Server NID not found''
> return 2
> osts_dict = dict({opts.server_nid: osts_dict[opts.server_nid]})
>
> print_ost_mapping(osts_dict)
>
> return 0
>
> if __name__ == ''__main__'':
> sys.exit(main())
> _______________________________________________
> Lustre-discuss mailing list
> Lustre-discuss at lists.lustre.org
> http://lists.lustre.org/mailman/listinfo/lustre-discuss
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.