Cc: Bcc: Subject: [Emoly.Liu at Sun.COM: some fixes for lustre adio driver] Reply-To: Here is LiuYing''s message from last month. If all looks OK, once I''ve merged the two patch sets, all the known bugs will be fixed. Thanks for the work! ==rob ----- Forwarded message from "emoly.liu" <Emoly.Liu at Sun.COM> ----- Sender: Emoly.Liu at Sun.COM From: "emoly.liu" <Emoly.Liu at Sun.COM> Subject: some fixes for lustre adio driver Date: Mon, 14 Dec 2009 18:32:34 +0800 Message-id: <4B261442.5010909 at sun.com> User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.4pre) Gecko/20090922 Fedora/3.0-3.9.b4.fc12 Thunderbird/3.0b4 To: Robert Latham <robl at mcs.anl.gov> Cc: "Tom.Wang" <Tom.Wang at Sun.COM> X-Spam-Status: No, score=-3.043 tagged_above=-10 required=6.6 tests=[AWL=-0.444, BAYES_00=-2.599] Hi Rob, Since our discussion in June, I haven''t been working on lustre adio driver. Now, I''m back. Here is a patch(based on remio revision 5963) to fix the following problems in the driver: 1. unsafe lum initialization in ADIOI_LUSTRE_Open() 2. wrong avail_cb_nodes calculation in ADIOI_LUSTRE_Get_striping_info(). Wei-keng found this bug and I remember I ever submitted this correction, but probably missed. 3. enabling ROMIO_LUSTRE in ad_fstype.c I test this patch on my local machine, it passed all the cases in romio/test/runtest, except i_noncontig and noncontig, which should be not related to collective I/O. I will continue to investigate them, so please land this patch first. If you have any problems on the patch or lustre adio driver, please let me know. Thanks, LiuYing -- Best regards, LiuYing System Software Engineer, Lustre Group Sun Microsystems ( China ) Co. Limited Index: adio/ad_lustre/ad_lustre_open.c ==================================================================--- adio/ad_lustre/ad_lustre_open.c (revision 5963) +++ adio/ad_lustre/ad_lustre_open.c (working copy) @@ -10,10 +10,13 @@ #include "ad_lustre.h" +#define MAX_LOV_UUID_COUNT 1000 + void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) { int perm, old_mask, amode, amode_direct; - struct lov_user_md lum = { 0 }; + int lumlen; + struct lov_user_md *lum = NULL; char *value; #if defined(MPICH2) || !defined(PRINT_ERR_MSG) @@ -46,30 +49,34 @@ if (fd->fd_sys != -1) { int err; - value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - /* get file striping information and set it in info */ - lum.lmm_magic = LOV_USER_MAGIC; - err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum); + lumlen = sizeof(struct lov_user_md) + + MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data); + lum = (struct lov_user_md *)ADIOI_Malloc(lumlen); + lum->lmm_magic = LOV_USER_MAGIC; + err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum); + if (!err) { + value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); - if (!err) { - fd->hints->striping_unit = lum.lmm_stripe_size; - sprintf(value, "%d", lum.lmm_stripe_size); + fd->hints->striping_unit = lum->lmm_stripe_size; + sprintf(value, "%d", lum->lmm_stripe_size); MPI_Info_set(fd->info, "striping_unit", value); - fd->hints->striping_factor = lum.lmm_stripe_count; - sprintf(value, "%d", lum.lmm_stripe_count); + fd->hints->striping_factor = lum->lmm_stripe_count; + sprintf(value, "%d", lum->lmm_stripe_count); MPI_Info_set(fd->info, "striping_factor", value); - fd->hints->fs_hints.lustre.start_iodevice = lum.lmm_stripe_offset; - sprintf(value, "%d", lum.lmm_stripe_offset); + fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset; + sprintf(value, "%d", lum->lmm_stripe_offset); MPI_Info_set(fd->info, "romio_lustre_start_iodevice", value); + + ADIOI_Free(value); } - ADIOI_Free(value); + ADIOI_Free(lum); if (fd->access_mode & ADIO_APPEND) fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); - } + } if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); @@ -86,7 +93,7 @@ } /* --BEGIN ERROR HANDLING-- */ - if (fd->fd_sys == -1 || ((fd->fd_direct == -1) && + if (fd->fd_sys == -1 || ((fd->fd_direct == -1) && (fd->direct_write || fd->direct_read))) { if (errno == ENAMETOOLONG) *error_code = MPIO_Err_create_code(MPI_SUCCESS, Index: adio/ad_lustre/ad_lustre_aggregate.c ==================================================================--- adio/ad_lustre/ad_lustre_aggregate.c (revision 5963) +++ adio/ad_lustre/ad_lustre_aggregate.c (working copy) @@ -59,7 +59,7 @@ * so that each OST is accessed by only one or more constant clients. */ CO_nodes = stripe_count * CO; avail_cb_nodes = ADIOI_MIN(nprocs_for_coll, CO_nodes); - if (avail_cb_nodes == CO_nodes) { + if (avail_cb_nodes < CO_nodes) { do { /* find the divisor of CO_nodes */ divisor = 1; Index: adio/common/ad_fstype.c ==================================================================--- adio/common/ad_fstype.c (revision 5963) +++ adio/common/ad_fstype.c (working copy) @@ -347,7 +347,7 @@ # endif /*#if defined(LINUX) && defined(ROMIO_LUSTRE)*/ -#if 0 +#if 1 /* disable lustre auto-detection until we figure out why collective i/o * broken */ #ifdef ROMIO_LUSTRE ----- End forwarded message ----- -- Rob Latham Mathematics and Computer Science Division Argonne National Lab, IL USA