Igor Kozhkuhov
2013-Oct-26 16:09 UTC
[PATCH] 1. changes for vdiskadm on illumos based platform
2. update ZFS in libfsimage from illumos for pygrub diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/Rules.mk --- a/tools/libfsimage/Rules.mk Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/Rules.mk Sat Oct 26 20:03:06 2013 +0400 @@ -2,11 +2,19 @@ include $(XEN_ROOT)/tools/Rules.mk CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\" CFLAGS += -Werror -D_GNU_SOURCE +# need for build illumos ZFS +CFLAGS += -Wno-parentheses +CFLAGS += -Wno-unused +# end LDFLAGS += -L../common/ PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y)) -FSDIR = $(LIBDIR)/fs +FSDIR-y = $(LIBDIR)/fs/$(FS) +FSDIR-$(CONFIG_SunOS)-x86_64 = $(PREFIX)/lib/fs/$(FS)/64 +FSDIR-$(CONFIG_SunOS)-x86_32 = $(PREFIX)/lib/fs/$(FS)/ +FSDIR-$(CONFIG_SunOS) = $(FSDIR-$(CONFIG_SunOS)-$(XEN_TARGET_ARCH)) +FSDIR = $(FSDIR-y) FSLIB = fsimage.so @@ -15,11 +23,14 @@ fs-all: $(FSLIB) .PHONY: fs-install fs-install: fs-all - $(INSTALL_DIR) $(DESTDIR)$(FSDIR)/$(FS) - $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR)/$(FS) + $(INSTALL_DIR) $(DESTDIR)$(FSDIR) + $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR) + +BUILD_LINE-y = $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) +BUILD_LINE-$(CONFIG_SunOS) = $(CC) $(CFLAGS) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(FSLIB): $(PIC_OBJS) - $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) + $(BUILD_LINE-y) clean distclean:: rm -f $(PIC_OBJS) $(FSLIB) $(DEPS) diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/Makefile --- a/tools/libfsimage/common/Makefile Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/Makefile Sat Oct 26 20:03:06 2013 +0400 @@ -4,11 +4,16 @@ include $(XEN_ROOT)/tools/libfsimage/Rul MAJOR = 1.0 MINOR = 0 -LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS +CFLAGS-ADDS-$(CONFIG_SunOS) += -Werror -Wp,-MD,.$(@F).d $(ADD_INCLUDES) +CFLAGS-ADDS-$(CONFIG_SunOS) += -I/usr/include/libxml2 +CFLAGS-ADDS-$(CONFIG_Linux)+ +LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS $(ADD_PATH_LIBS) LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU LDFLAGS = $(LDFLAGS-y) CFLAGS += $(PTHREAD_CFLAGS) +CFLAGS += $(CFLAGS-ADDS-y) LDFLAGS += $(PTHREAD_LDFLAGS) LIB_SRCS-y = fsimage.c fsimage_plugin.c fsimage_grub.c @@ -32,15 +37,18 @@ install: all $(INSTALL_DATA) fsimage_grub.h $(DESTDIR)$(INCLUDEDIR) clean distclean:: - rm -f $(LIB) + rm -f $(PIC_OBJS) $(LIB) $(DEPS) libfsimage.so: libfsimage.so.$(MAJOR) ln -sf $< $@ libfsimage.so.$(MAJOR): libfsimage.so.$(MAJOR).$(MINOR) ln -sf $< $@ +BUILD_LINE-y = $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) +BUILD_LINE-$(CONFIG_SunOS) = $(CC) $(CFLAGS $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) -lvdisk -lvboxdisk -lxml2 -lgen -lc + libfsimage.so.$(MAJOR).$(MINOR): $(PIC_OBJS) - $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) + $(BUILD_LINE-y) -include $(DEPS) diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage.c --- a/tools/libfsimage/common/fsimage.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage.c Sat Oct 26 20:03:06 2013 +0400 @@ -36,22 +36,43 @@ static pthread_mutex_t fsi_lock = PTHREAD_MUTEX_INITIALIZER; +#ifdef _VDISK_ +#include "vdisk.h" +#endif + fsi_t *fsi_open_fsimage(const char *path, uint64_t off, const char *options) { fsi_t *fsi = NULL; - int fd; + int fd = -1; int err; + void *pvd = NULL; +#ifdef _VDISK_ + if (vdisk_check_vdisk(path)) { + if ((pvd = vdisk_open(path)) == NULL) + goto fail; + } else { + if ((fd = open(path, O_RDONLY)) == -1) + goto fail; + } +#else if ((fd = open(path, O_RDONLY)) == -1) goto fail; +#endif if ((fsi = malloc(sizeof(*fsi))) == NULL) goto fail; - fsi->f_fd = fd; fsi->f_off = off; fsi->f_data = NULL; fsi->f_bootstring = NULL; + if (pvd) { + fsi->f_fd = NULL; + fsi->f_pvdisk = pvd; + } else { + fsi->f_fd = fd; + fsi->f_pvdisk = NULL; + } pthread_mutex_lock(&fsi_lock); err = find_plugin(fsi, path, options); @@ -73,8 +94,16 @@ fail: void fsi_close_fsimage(fsi_t *fsi) { pthread_mutex_lock(&fsi_lock); - fsi->f_plugin->fp_ops->fpo_umount(fsi); - (void) close(fsi->f_fd); + fsi->f_plugin->fp_ops->fpo_umount(fsi); +#ifdef _VDISK_ + if (fsi->f_pvdisk) { + vdisk_close(fsi->f_pvdisk); + } else { + (void) close(fsi->f_fd); + } +#else + (void) close(fsi->f_fd); +#endif free(fsi); pthread_mutex_unlock(&fsi_lock); } diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_grub.c --- a/tools/libfsimage/common/fsimage_grub.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage_grub.c Sat Oct 26 20:03:06 2013 +0400 @@ -31,6 +31,10 @@ #include "fsimage_grub.h" #include "fsimage_priv.h" +#ifdef _VDISK_ +#include "vdisk.h" +#endif + static char *disk_read_junk; typedef struct fsig_data { @@ -176,7 +180,17 @@ fsig_devread(fsi_file_t *ffi, unsigned i r = SECTOR_SIZE - n; if (r > bufsize) r = bufsize; +#ifdef _VDISK_ + if (ffi->ff_fsi->f_pvdisk) { + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, + (off_t)(off - n), tmp, SECTOR_SIZE); + } else { + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, + off - n); + } +#else ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off - n); +#endif if (ret < n + r) return (0); memcpy(buf, tmp + n, r); @@ -187,7 +201,16 @@ fsig_devread(fsi_file_t *ffi, unsigned i n = (bufsize & ~(SECTOR_SIZE - 1)); if (n > 0) { +#ifdef _VDISK_ + if (ffi->ff_fsi->f_pvdisk) { + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, (off_t)off, + buf, n); + } else { + ret = pread(ffi->ff_fsi->f_fd, buf, n, off); + } +#else ret = pread(ffi->ff_fsi->f_fd, buf, n, off); +#endif if (ret < n) return (0); buf += n; @@ -195,7 +218,16 @@ fsig_devread(fsi_file_t *ffi, unsigned i off += n; } if (bufsize > 0) { +#ifdef _VDISK_ + if (ffi->ff_fsi->f_pvdisk) { + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, (off_t)off, + tmp, SECTOR_SIZE); + } else { + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off); + } +#else ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off); +#endif if (ret < bufsize) return (0); memcpy(buf, tmp, bufsize); diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_plugin.c --- a/tools/libfsimage/common/fsimage_plugin.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage_plugin.c Sat Oct 26 20:03:06 2013 +0400 @@ -122,6 +122,7 @@ fail: static int load_plugins(void) { const char *fsdir = getenv("FSIMAGE_FSDIR"); + const char *isadir = ""; struct dirent *dp = NULL; struct dirent *dpp; DIR *dir = NULL; @@ -130,8 +131,26 @@ static int load_plugins(void) int err; int ret = -1; +#if defined(FSIMAGE_FSDIR) if (fsdir == NULL) fsdir = FSIMAGE_FSDIR; +#elif defined(__sun__) + if (fsdir == NULL) + fsdir = "/usr/lib/fs"; + + if (sizeof(void *) == 8) + isadir = "64/"; +#elif defined(__ia64__) + if (fsdir == NULL) + fsdir = "/usr/lib/fs"; +#else + if (fsdir == NULL) { + if (sizeof(void *) == 8) + fsdir = "/usr/lib64/fs"; + else + fsdir = "/usr/lib/fs"; + } +#endif if ((name_max = pathconf(fsdir, _PC_NAME_MAX)) == -1) goto fail; @@ -153,8 +172,8 @@ static int load_plugins(void) if (strcmp(dpp->d_name, "..") == 0) continue; - (void) snprintf(tmp, name_max, "%s/%s/fsimage.so", fsdir, - dpp->d_name); + (void) snprintf(tmp, name_max, "%s/%s/%sfsimage.so", fsdir, + dpp->d_name, isadir); if (init_plugin(tmp) != 0) goto fail; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_priv.h --- a/tools/libfsimage/common/fsimage_priv.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/common/fsimage_priv.h Sat Oct 26 20:03:06 2013 +0400 @@ -47,6 +47,7 @@ struct fsi { void *f_data; fsi_plugin_t *f_plugin; char *f_bootstring; + void *f_pvdisk; }; struct fsi_file { diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/Makefile --- a/tools/libfsimage/zfs/Makefile Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/Makefile Sat Oct 26 20:03:06 2013 +0400 @@ -25,7 +25,7 @@ XEN_ROOT = $(CURDIR)/../../.. CFLAGS += -DFSYS_ZFS -DFSIMAGE -I$(XEN_ROOT)/tools/libfsimage/zfs -LIB_SRCS-y = zfs_lzjb.c zfs_sha256.c zfs_fletcher.c fsi_zfs.c fsys_zfs.c +LIB_SRCS-y = zfs_lzjb.c zfs_sha256.c zfs_fletcher.c fsi_zfs.c fsys_zfs.c zfs_lz4.c FS = zfs diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsi_zfs.h --- a/tools/libfsimage/zfs/fsi_zfs.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/fsi_zfs.h Sat Oct 26 20:03:06 2013 +0400 @@ -36,6 +36,8 @@ /* Boot signature related defines for the findroot command */ #define BOOTSIGN_DIR "/boot/grub/bootsign" +#define BOOTSIGN_ARGLEN (MAXNAMELEN + 10) /* (<sign>,0,d) */ +#define BOOTSIGN_LEN (sizeof (BOOTSIGN_DIR) + 1 + BOOTSIGN_ARGLEN) #define BOOTSIGN_BACKUP "/etc/bootsign" /* Maybe redirect memory requests through grub_scratch_mem. */ @@ -60,6 +62,7 @@ #define grub_strstr strstr #define grub_strlen strlen #define grub_memmove memmove +#define grub_isspace isspace extern char current_bootpath[MAXPATHLEN]; extern char current_rootpool[MAXNAMELEN]; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsys_zfs.c --- a/tools/libfsimage/zfs/fsys_zfs.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/fsys_zfs.c Sat Oct 26 20:03:06 2013 +0400 @@ -16,12 +16,18 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + */ + +/* * The zfs plug-in routines for GRUB are: * * zfs_mount() - locates a valid uberblock of the root pool and reads @@ -69,7 +75,18 @@ decomp_entry_t decomp_table[ZIO_COMPRESS {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ {"off", 0}, /* ZIO_COMPRESS_OFF */ {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ - {"empty", 0} /* ZIO_COMPRESS_EMPTY */ + {"empty", 0}, /* ZIO_COMPRESS_EMPTY */ + {"gzip-1", 0}, /* ZIO_COMPRESS_GZIP_1 */ + {"gzip-2", 0}, /* ZIO_COMPRESS_GZIP_2 */ + {"gzip-3", 0}, /* ZIO_COMPRESS_GZIP_3 */ + {"gzip-4", 0}, /* ZIO_COMPRESS_GZIP_4 */ + {"gzip-5", 0}, /* ZIO_COMPRESS_GZIP_5 */ + {"gzip-6", 0}, /* ZIO_COMPRESS_GZIP_6 */ + {"gzip-7", 0}, /* ZIO_COMPRESS_GZIP_7 */ + {"gzip-8", 0}, /* ZIO_COMPRESS_GZIP_8 */ + {"gzip-9", 0}, /* ZIO_COMPRESS_GZIP_9 */ + {"zle", 0}, /* ZIO_COMPRESS_ZLE */ + {"lz4", lz4_decompress} /* ZIO_COMPRESS_LZ4 */ }; static int zio_read_data(blkptr_t *bp, void *buf, char *stack); @@ -80,8 +97,8 @@ static int zio_read_data(blkptr_t *bp, v static int zfs_bcmp(const void *s1, const void *s2, size_t n) { - const uint8_t *ps1 = s1; - const uint8_t *ps2 = s2; + const uchar_t *ps1 = s1; + const uchar_t *ps2 = s2; if (s1 != s2 && n != 0) { do { @@ -118,16 +135,16 @@ zio_checksum_off(const void *buf, uint64 /* Checksum Table and Values */ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { - { { NULL, NULL }, 0, 0, "inherit" }, - { { NULL, NULL }, 0, 0, "on" }, - { { zio_checksum_off, zio_checksum_off }, 0, 0, "off" }, - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "label" }, - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "gang_header" }, - { { NULL, NULL }, 0, 0, "zilog" }, - { { fletcher_2_native, fletcher_2_byteswap }, 0, 0, "fletcher2" }, - { { fletcher_4_native, fletcher_4_byteswap }, 1, 0, "fletcher4" }, - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 0, "SHA256" }, - { { NULL, NULL }, 0, 0, "zilog2" } + {{NULL, NULL}, 0, 0, "inherit"}, + {{NULL, NULL}, 0, 0, "on"}, + {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, + {{NULL, NULL}, 0, 0, "zilog"}, + {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, + {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}, + {{NULL, NULL}, 0, 0, "zilog2"}, }; /* @@ -217,18 +234,13 @@ vdev_uberblock_compare(uberblock_t *ub1, * Three pieces of information are needed to verify an uberblock: the magic * number, the version number, and the checksum. * - * Currently Implemented: version number, magic number - * Need to Implement: checksum - * * Return: * 0 - Success * -1 - Failure */ static int -uberblock_verify(uberblock_phys_t *ub, uint64_t offset) +uberblock_verify(uberblock_t *uber, uint64_t ub_size, uint64_t offset) { - - uberblock_t *uber = &ub->ubp_uberblock; blkptr_t bp; BP_ZERO(&bp); @@ -236,11 +248,11 @@ uberblock_verify(uberblock_phys_t *ub, u BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); - if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0) + if (zio_checksum_verify(&bp, (char *)uber, ub_size) != 0) return (-1); if (uber->ub_magic == UBERBLOCK_MAGIC && - uber->ub_version > 0 && uber->ub_version <= SPA_VERSION) + SPA_VERSION_IS_SUPPORTED(uber->ub_version)) return (0); return (-1); @@ -252,25 +264,28 @@ uberblock_verify(uberblock_phys_t *ub, u * Success - Pointer to the best uberblock. * Failure - NULL */ -static uberblock_phys_t * -find_bestub(uberblock_phys_t *ub_array, uint64_t sector) +static uberblock_t * +find_bestub(char *ub_array, uint64_t ashift, uint64_t sector) { - uberblock_phys_t *ubbest = NULL; - uint64_t offset; + uberblock_t *ubbest = NULL; + uberblock_t *ubnext; + uint64_t offset, ub_size; int i; - for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) { + ub_size = VDEV_UBERBLOCK_SIZE(ashift); + + for (i = 0; i < VDEV_UBERBLOCK_COUNT(ashift); i++) { + ubnext = (uberblock_t *)ub_array; + ub_array += ub_size; offset = (sector << SPA_MINBLOCKSHIFT) + - VDEV_UBERBLOCK_OFFSET(i); - if (uberblock_verify(&ub_array[i], offset) == 0) { - if (ubbest == NULL) { - ubbest = &ub_array[i]; - } else if (vdev_uberblock_compare( - &(ub_array[i].ubp_uberblock), - &(ubbest->ubp_uberblock)) > 0) { - ubbest = &ub_array[i]; - } - } + VDEV_UBERBLOCK_OFFSET(ashift, i); + + if (uberblock_verify(ubnext, ub_size, offset) != 0) + continue; + + if (ubbest == NULL || + vdev_uberblock_compare(ubnext, ubbest) > 0) + ubbest = ubnext; } return (ubbest); @@ -295,7 +310,7 @@ zio_read_gang(blkptr_t *bp, dva_t *dva, zio_gb = (zio_gbh_phys_t *)stack; stack += SPA_GANGBLOCKSIZE; offset = DVA_GET_OFFSET(dva); - sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); /* read in the gang block header */ if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) { @@ -354,8 +369,8 @@ zio_read_data(blkptr_t *bp, void *buf, c } else { /* read in a data block */ offset = DVA_GET_OFFSET(&bp->blk_dva[i]); - sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); - if (devread(sector, 0, psize, buf)) + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); + if (devread(sector, 0, psize, buf) != 0) return (0); } } @@ -399,7 +414,7 @@ zio_read(blkptr_t *bp, void *buf, char * stack += psize; } - if (zio_read_data(bp, buf, stack)) { + if (zio_read_data(bp, buf, stack) != 0) { grub_printf("zio_read_data failed\n"); return (ERR_FSYS_CORRUPT); } @@ -409,8 +424,13 @@ zio_read(blkptr_t *bp, void *buf, char * return (ERR_FSYS_CORRUPT); } - if (comp != ZIO_COMPRESS_OFF) - decomp_table[comp].decomp_func(buf, retbuf, psize, lsize); + if (comp != ZIO_COMPRESS_OFF) { + if (decomp_table[comp].decomp_func(buf, retbuf, psize, + lsize) != 0) { + grub_printf("zio_read decompression failed\n"); + return (ERR_FSYS_CORRUPT); + } + } return (0); } @@ -446,7 +466,7 @@ dmu_read(dnode_phys_t *dn, uint64_t blki grub_memset(buf, 0, dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); break; - } else if ((errnum = zio_read(bp, tmpbuf, stack))) { + } else if (errnum = zio_read(bp, tmpbuf, stack)) { return (errnum); } @@ -465,13 +485,13 @@ dmu_read(dnode_phys_t *dn, uint64_t blki * errnum - failure */ static int -mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name, +mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name, uint64_t *value) { int i, chunks; mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; - chunks = objsize/MZAP_ENT_LEN - 1; + chunks = objsize / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { *value = mzap_ent[i].mze_value; @@ -511,8 +531,8 @@ zap_hash(uint64_t salt, const char *name /* * Only use 28 bits, since we need 4 bits in the cookie for the * collision differentiator. We MUST use the high bits, since - * those are the onces that we first pay attention to when - * chosing the bucket. + * those are the ones that we first pay attention to when + * choosing the bucket. */ crc &= ~((1ULL << (64 - 28)) - 1); @@ -617,7 +637,7 @@ zap_leaf_lookup(zap_leaf_phys_t *l, int */ static int fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, - char *name, uint64_t *value, char *stack) + const char *name, uint64_t *value, char *stack) { zap_leaf_phys_t *l; uint64_t hash, idx, blkid; @@ -645,7 +665,7 @@ fzap_lookup(dnode_phys_t *zap_dnode, zap stack += 1<<blksft; if ((1<<blksft) < sizeof (zap_leaf_phys_t)) return (ERR_FSYS_CORRUPT); - if ((errnum = dmu_read(zap_dnode, blkid, l, stack))) + if (errnum = dmu_read(zap_dnode, blkid, l, stack)) return (errnum); return (zap_leaf_lookup(l, blksft, hash, name, value)); @@ -660,7 +680,8 @@ fzap_lookup(dnode_phys_t *zap_dnode, zap * errnum - failure */ static int -zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack) +zap_lookup(dnode_phys_t *zap_dnode, const char *name, uint64_t *val, + char *stack) { uint64_t block_type; int size; @@ -671,7 +692,7 @@ zap_lookup(dnode_phys_t *zap_dnode, char size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; stack += size; - if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack))) + if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) != 0) return (errnum); block_type = *((uint64_t *)zapbuf); @@ -687,6 +708,56 @@ zap_lookup(dnode_phys_t *zap_dnode, char return (ERR_FSYS_CORRUPT); } +typedef struct zap_attribute { + int za_integer_length; + uint64_t za_num_integers; + uint64_t za_first_integer; + char *za_name; +} zap_attribute_t; + +typedef int (zap_cb_t)(zap_attribute_t *za, void *arg, char *stack); + +static int +zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack) +{ + uint32_t size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; + zap_attribute_t za; + int i; + mzap_phys_t *mzp = (mzap_phys_t *)stack; + stack += size; + + if ((errnum = dmu_read(zap_dnode, 0, mzp, stack)) != 0) + return (errnum); + + /* + * Iteration over fatzap objects has not yet been implemented. + * If we encounter a pool in which there are more features for + * read than can fit inside a microzap (i.e., more than 2048 + * features for read), we can add support for fatzap iteration. + * For now, fail. + */ + if (mzp->mz_block_type != ZBT_MICRO) { + grub_printf("feature information stored in fatzap, pool " + "version not supported\n"); + return (1); + } + + za.za_integer_length = 8; + za.za_num_integers = 1; + for (i = 0; i < size / MZAP_ENT_LEN - 1; i++) { + mzap_ent_phys_t *mzep = &mzp->mz_chunk[i]; + int err; + + za.za_first_integer = mzep->mze_value; + za.za_name = mzep->mze_name; + err = cb(&za, arg, stack); + if (err != 0) + return (err); + } + + return (0); +} + /* * Get the dnode of an object number from the metadnode of an object set. * @@ -731,7 +802,7 @@ dnode_get(dnode_phys_t *mdn, uint64_t ob stack += blksz; } - if ((errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack))) + if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)) return (errnum); grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); @@ -766,6 +837,24 @@ is_top_dataset_file(char *str) return (0); } +static int +check_feature(zap_attribute_t *za, void *arg, char *stack) +{ + const char **names = arg; + int i; + + if (za->za_first_integer == 0) + return (0); + + for (i = 0; names[i] != NULL; i++) { + if (grub_strcmp(za->za_name, names[i]) == 0) { + return (0); + } + } + grub_printf("missing feature for read ''%s''\n", za->za_name); + return (ERR_NEWER_VERSION); +} + /* * Get the file dnode for a given file name where mdn is the meta dnode * for this ZFS object set. When found, place the file dnode in dn. @@ -782,40 +871,40 @@ dnode_get_path(dnode_phys_t *mdn, char * uint64_t objnum, version; char *cname, ch; - if ((errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, - dn, stack))) + if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, + dn, stack)) return (errnum); - if ((errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack))) + if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)) return (errnum); if (version > ZPL_VERSION) return (-1); - if ((errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack))) + if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)) return (errnum); - if ((errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, - dn, stack))) + if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, + dn, stack)) return (errnum); /* skip leading slashes */ while (*path == ''/'') path++; - while (*path && !isspace((uint8_t)*path)) { + while (*path && !grub_isspace(*path)) { /* get the next component name */ cname = path; - while (*path && !isspace((uint8_t)*path) && *path != ''/'') + while (*path && !grub_isspace(*path) && *path != ''/'') path++; ch = *path; *path = 0; /* ensure null termination */ - if ((errnum = zap_lookup(dn, cname, &objnum, stack))) + if (errnum = zap_lookup(dn, cname, &objnum, stack)) return (errnum); objnum = ZFS_DIRENT_OBJ(objnum); - if ((errnum = dnode_get(mdn, objnum, 0, dn, stack))) + if (errnum = dnode_get(mdn, objnum, 0, dn, stack)) return (errnum); *path = ch; @@ -843,8 +932,8 @@ get_default_bootfsobj(dnode_phys_t *mosm dnode_phys_t *dn = (dnode_phys_t *)stack; stack += DNODE_SIZE; - if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, dn, stack))) + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, dn, stack)) return (errnum); /* @@ -854,7 +943,7 @@ get_default_bootfsobj(dnode_phys_t *mosm if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)) return (errnum); if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) @@ -868,6 +957,57 @@ get_default_bootfsobj(dnode_phys_t *mosm } /* + * List of pool features that the grub implementation of ZFS supports for + * read. Note that features that are only required for write do not need + * to be listed here since grub opens pools in read-only mode. + * + * When this list is updated the version number in usr/src/grub/capability + * must be incremented to ensure the new grub gets installed. + */ +static const char *spa_feature_names[] = { + "org.illumos:lz4_compress", + NULL +}; + +/* + * Checks whether the MOS features that are active are supported by this + * (GRUB''s) implementation of ZFS. + * + * Return: + * 0: Success. + * errnum: Failure. + */ +static int +check_mos_features(dnode_phys_t *mosmdn, char *stack) +{ + uint64_t objnum; + dnode_phys_t *dn; + uint8_t error = 0; + + dn = (dnode_phys_t *)stack; + stack += DNODE_SIZE; + + if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, dn, stack)) != 0) + return (errnum); + + /* + * Find the object number for ''features_for_read'' and retrieve its + * corresponding dnode. Note that we don''t check features_for_write + * because GRUB is not opening the pool for write. + */ + if ((errnum = zap_lookup(dn, DMU_POOL_FEATURES_FOR_READ, &objnum, + stack)) != 0) + return (errnum); + + if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA, + dn, stack)) != 0) + return (errnum); + + return (zap_iterate(dn, check_feature, spa_feature_names, stack)); +} + +/* * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), * e.g. pool/rootfs, or a given object number (obj), e.g. the object number * of pool/rootfs. @@ -896,15 +1036,15 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha goto skip; } - if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, - DMU_OT_OBJECT_DIRECTORY, mdn, stack))) + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, + DMU_OT_OBJECT_DIRECTORY, mdn, stack)) return (errnum); - if ((errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, - stack))) + if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, + stack)) return (errnum); - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack)) return (errnum); if (fsname == NULL) { @@ -914,23 +1054,24 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha } /* take out the pool name */ - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != ''/'') + while (*fsname && !grub_isspace(*fsname) && *fsname != ''/'') fsname++; - while (*fsname && !isspace((uint8_t)*fsname)) { + while (*fsname && !grub_isspace(*fsname)) { uint64_t childobj; while (*fsname == ''/'') fsname++; cname = fsname; - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != ''/'') + while (*fsname && !grub_isspace(*fsname) && *fsname != ''/'') fsname++; ch = *fsname; *fsname = 0; snapname = cname; - while (*snapname && !isspace((uint8_t)*snapname) && *snapname != ''@'') + while (*snapname && !grub_isspace(*snapname) && *snapname !+ ''@'') snapname++; if (*snapname == ''@'') { issnapshot = 1; @@ -938,15 +1079,15 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha } childobj ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; - if ((errnum = dnode_get(mosmdn, childobj, - DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack))) + if (errnum = dnode_get(mosmdn, childobj, + DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)) return (errnum); if (zap_lookup(mdn, cname, &objnum, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, - mdn, stack))) + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, + mdn, stack)) return (errnum); *fsname = ch; @@ -958,7 +1099,7 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha *obj = headobj; skip: - if ((errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack))) + if (errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack)) return (errnum); if (issnapshot) { uint64_t snapobj; @@ -966,13 +1107,13 @@ skip: snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> ds_snapnames_zapobj; - if ((errnum = dnode_get(mosmdn, snapobj, - DMU_OT_DSL_DS_SNAP_MAP, mdn, stack))) + if (errnum = dnode_get(mosmdn, snapobj, + DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)) return (errnum); if (zap_lookup(mdn, snapname + 1, &headobj, stack)) return (ERR_FILESYSTEM_NOT_FOUND); - if ((errnum = dnode_get(mosmdn, headobj, - DMU_OT_DSL_DATASET, mdn, stack))) + if (errnum = dnode_get(mosmdn, headobj, + DMU_OT_DSL_DATASET, mdn, stack)) return (errnum); if (obj) *obj = headobj; @@ -981,7 +1122,7 @@ skip: bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; osp = (objset_phys_t *)stack; stack += sizeof (objset_phys_t); - if ((errnum = zio_read(bp, osp, stack))) + if (errnum = zio_read(bp, osp, stack)) return (errnum); grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); @@ -1019,8 +1160,7 @@ nvlist_unpack(char *nvlist, char **out) if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) return (1); - nvlist += 4; - *out = nvlist; + *out = nvlist + 4; return (0); } @@ -1033,7 +1173,7 @@ nvlist_array(char *nvlist, int index) /* skip the header, nvl_version, and nvl_nvflag */ nvlist = nvlist + 4 * 2; - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) + while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) nvlist += encode_size; /* goto the next nvpair */ nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ @@ -1042,69 +1182,159 @@ nvlist_array(char *nvlist, int index) return (nvlist); } +/* + * The nvlist_next_nvpair() function returns a handle to the next nvpair in the + * list following nvpair. If nvpair is NULL, the first pair is returned. If + * nvpair is the last pair in the nvlist, NULL is returned. + */ +static char * +nvlist_next_nvpair(char *nvl, char *nvpair) +{ + char *cur, *prev; + int encode_size; + + if (nvl == NULL) + return (NULL); + + if (nvpair == NULL) { + /* skip over nvl_version and nvl_nvflag */ + nvpair = nvl + 4 * 2; + } else { + /* skip to the next nvpair */ + encode_size = BSWAP_32(*(uint32_t *)nvpair); + nvpair += encode_size; + } + + /* 8 bytes of 0 marks the end of the list */ + if (*(uint64_t *)nvpair == 0) + return (NULL); + + return (nvpair); +} + +/* + * This function returns 0 on success and 1 on failure. On success, a string + * containing the name of nvpair is saved in buf. + */ +static int +nvpair_name(char *nvp, char *buf, int buflen) +{ + int len; + + /* skip over encode/decode size */ + nvp += 4 * 2; + + len = BSWAP_32(*(uint32_t *)nvp); + if (buflen < len + 1) + return (1); + + grub_memmove(buf, nvp + 4, len); + buf[len] = ''\0''; + + return (0); +} + +/* + * This function retrieves the value of the nvpair in the form of enumerated + * type data_type_t. This is used to determine the appropriate type to pass to + * nvpair_value(). + */ +static int +nvpair_type(char *nvp) +{ + int name_len, type; + + /* skip over encode/decode size */ + nvp += 4 * 2; + + /* skip over name_len */ + name_len = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + + /* skip over name */ + nvp = nvp + ((name_len + 3) & ~3); /* align */ + + type = BSWAP_32(*(uint32_t *)nvp); + + return (type); +} + +static int +nvpair_value(char *nvp, void *val, int valtype, int *nelmp) +{ + int name_len, type, slen; + char *strval = val; + uint64_t *intval = val; + + /* skip over encode/decode size */ + nvp += 4 * 2; + + /* skip over name_len */ + name_len = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + + /* skip over name */ + nvp = nvp + ((name_len + 3) & ~3); /* align */ + + /* skip over type */ + type = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + + if (type == valtype) { + int nelm; + + nelm = BSWAP_32(*(uint32_t *)nvp); + if (valtype != DATA_TYPE_BOOLEAN && nelm < 1) + return (1); + nvp += 4; + + switch (valtype) { + case DATA_TYPE_BOOLEAN: + return (0); + + case DATA_TYPE_STRING: + slen = BSWAP_32(*(uint32_t *)nvp); + nvp += 4; + grub_memmove(strval, nvp, slen); + strval[slen] = ''\0''; + return (0); + + case DATA_TYPE_UINT64: + *intval = BSWAP_64(*(uint64_t *)nvp); + return (0); + + case DATA_TYPE_NVLIST: + *(void **)val = (void *)nvp; + return (0); + + case DATA_TYPE_NVLIST_ARRAY: + *(void **)val = (void *)nvp; + if (nelmp) + *nelmp = nelm; + return (0); + } + } + + return (1); +} + static int nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, int *nelmp) { - int name_len, type, slen, encode_size; - char *nvpair, *nvp_name, *strval = val; - uint64_t *intval = val; + char *nvpair; - /* skip the header, nvl_version, and nvl_nvflag */ - nvlist = nvlist + 4 * 2; - - /* - * Loop thru the nvpair list - * The XDR representation of an integer is in big-endian byte order. - */ - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) { - - nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ - - name_len = BSWAP_32(*(uint32_t *)nvpair); - nvpair += 4; - - nvp_name = nvpair; - nvpair = nvpair + ((name_len + 3) & ~3); /* align */ - - type = BSWAP_32(*(uint32_t *)nvpair); - nvpair += 4; + for (nvpair = nvlist_next_nvpair(nvlist, NULL); + nvpair != NULL; + nvpair = nvlist_next_nvpair(nvlist, nvpair)) { + int name_len = BSWAP_32(*(uint32_t *)(nvpair + 4 * 2)); + char *nvp_name = nvpair + 4 * 3; if ((grub_strncmp(nvp_name, name, name_len) == 0) && - type == valtype) { - int nelm; - - if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1) - return (1); - nvpair += 4; - - switch (valtype) { - case DATA_TYPE_STRING: - slen = BSWAP_32(*(uint32_t *)nvpair); - nvpair += 4; - grub_memmove(strval, nvpair, slen); - strval[slen] = ''\0''; - return (0); - - case DATA_TYPE_UINT64: - *intval = BSWAP_64(*(uint64_t *)nvpair); - return (0); - - case DATA_TYPE_NVLIST: - *(void **)val = (void *)nvpair; - return (0); - - case DATA_TYPE_NVLIST_ARRAY: - *(void **)val = (void *)nvpair; - if (nelmp) - *nelmp = nelm; - return (0); - } + nvpair_type(nvpair) == valtype) { + return (nvpair_value(nvpair, val, valtype, nelmp)); } - - nvlist += encode_size; /* goto the next nvpair */ } - return (1); } @@ -1141,7 +1371,7 @@ vdev_get_bootpath(char *nv, uint64_t ing NULL)) return (ERR_FSYS_CORRUPT); - if (strcmp(type, VDEV_TYPE_DISK) == 0) { + if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) { uint64_t guid; if (vdev_validate(nv) != 0) @@ -1171,15 +1401,15 @@ vdev_get_bootpath(char *nv, uint64_t ing devid, DATA_TYPE_STRING, NULL) != 0) devid[0] = ''\0''; - if (strlen(bootpath) >= MAXPATHLEN || - strlen(devid) >= MAXPATHLEN) + if (grub_strlen(bootpath) >= MAXPATHLEN || + grub_strlen(devid) >= MAXPATHLEN) return (ERR_WONT_FIT); return (0); - } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || - strcmp(type, VDEV_TYPE_REPLACING) == 0 || - (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { + } else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0 || + grub_strcmp(type, VDEV_TYPE_REPLACING) == 0 || + (is_spare = (grub_strcmp(type, VDEV_TYPE_SPARE) == 0))) { int nelm, i; char *child; @@ -1207,15 +1437,14 @@ vdev_get_bootpath(char *nv, uint64_t ing * 0 - success * ERR_* - failure */ -int +static int check_pool_label(uint64_t sector, char *stack, char *outdevid, - char *outpath, uint64_t *outguid) + char *outpath, uint64_t *outguid, uint64_t *outashift, uint64_t *outversion) { vdev_phys_t *vdev; uint64_t pool_state, txg = 0; - char *nvlist, *nv; + char *nvlist, *nv, *features; uint64_t diskguid; - uint64_t version; sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT); @@ -1248,10 +1477,10 @@ check_pool_label(uint64_t sector, char * if (txg == 0) return (ERR_NO_BOOTPATH); - if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version, + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, outversion, DATA_TYPE_UINT64, NULL)) return (ERR_FSYS_CORRUPT); - if (version > SPA_VERSION) + if (!SPA_VERSION_IS_SUPPORTED(*outversion)) return (ERR_NEWER_VERSION); if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, DATA_TYPE_NVLIST, NULL)) @@ -1259,11 +1488,38 @@ check_pool_label(uint64_t sector, char * if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, DATA_TYPE_UINT64, NULL)) return (ERR_FSYS_CORRUPT); + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_ASHIFT, outashift, + DATA_TYPE_UINT64, NULL) != 0) + return (ERR_FSYS_CORRUPT); if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0)) return (ERR_NO_BOOTPATH); if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid, DATA_TYPE_UINT64, NULL)) return (ERR_FSYS_CORRUPT); + + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, + &features, DATA_TYPE_NVLIST, NULL) == 0) { + char *nvp; + char *name = stack; + stack += MAXNAMELEN; + + for (nvp = nvlist_next_nvpair(features, NULL); + nvp != NULL; + nvp = nvlist_next_nvpair(features, nvp)) { + zap_attribute_t za; + + if (nvpair_name(nvp, name, MAXNAMELEN) != 0) + return (ERR_FSYS_CORRUPT); + + za.za_integer_length = 8; + za.za_num_integers = 1; + za.za_first_integer = 1; + za.za_name = name; + if (check_feature(&za, spa_feature_names, stack) != 0) + return (ERR_NEWER_VERSION); + } + } + return (0); } @@ -1278,15 +1534,16 @@ check_pool_label(uint64_t sector, char * int zfs_mount(void) { - char *stack; + char *stack, *ub_array; int label = 0; - uberblock_phys_t *ub_array, *ubbest; + uberblock_t *ubbest; objset_phys_t *osp; char tmp_bootpath[MAXNAMELEN]; char tmp_devid[MAXNAMELEN]; - uint64_t tmp_guid; + uint64_t tmp_guid, ashift, version; uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT; int err = errnum; /* preserve previous errnum state */ + uint64_t sector; /* if it''s our first time here, zero the best uberblock out */ if (best_drive == 0 && best_part == 0 && find_best_root) { @@ -1296,7 +1553,7 @@ zfs_mount(void) stackbase = ZFS_SCRATCH; stack = stackbase; - ub_array = (uberblock_phys_t *)stack; + ub_array = stack; stack += VDEV_UBERBLOCK_RING; osp = (objset_phys_t *)stack; @@ -1305,8 +1562,6 @@ zfs_mount(void) for (label = 0; label < VDEV_LABELS; label++) { - uint64_t sector; - /* * some eltorito stacks don''t give us a size and * we end up setting the size to MAXUINT, further @@ -1324,39 +1579,38 @@ zfs_mount(void) /* Read in the uberblock ring (128K). */ if (devread(sector + - ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> - SPA_MINBLOCKSHIFT), 0, VDEV_UBERBLOCK_RING, - (char *)ub_array) == 0) + ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT), + 0, VDEV_UBERBLOCK_RING, ub_array) == 0) continue; - if ((ubbest = find_bestub(ub_array, sector)) != NULL && - zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack) - == 0) { + if (check_pool_label(sector, stack, tmp_devid, + tmp_bootpath, &tmp_guid, &ashift, &version)) + continue; - VERIFY_OS_TYPE(osp, DMU_OST_META); + if (pool_guid == 0) + pool_guid = tmp_guid; - if (check_pool_label(sector, stack, tmp_devid, - tmp_bootpath, &tmp_guid)) - continue; - if (pool_guid == 0) - pool_guid = tmp_guid; + if ((ubbest = find_bestub(ub_array, ashift, sector)) == NULL || + zio_read(&ubbest->ub_rootbp, osp, stack) != 0) + continue; - if (find_best_root && ((pool_guid != tmp_guid) || - vdev_uberblock_compare(&ubbest->ubp_uberblock, - &(current_uberblock)) <= 0)) - continue; + VERIFY_OS_TYPE(osp, DMU_OST_META); - /* Got the MOS. Save it at the memory addr MOS. */ - grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); - grub_memmove(¤t_uberblock, - &ubbest->ubp_uberblock, sizeof (uberblock_t)); - grub_memmove(current_bootpath, tmp_bootpath, - MAXNAMELEN); - grub_memmove(current_devid, tmp_devid, - grub_strlen(tmp_devid)); - is_zfs_mount = 1; - return (1); - } + if (version >= SPA_VERSION_FEATURES && + check_mos_features(&osp->os_meta_dnode, stack) != 0) + continue; + + if (find_best_root && ((pool_guid != tmp_guid) || + vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0)) + continue; + + /* Got the MOS. Save it at the memory addr MOS. */ + grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); + grub_memmove(¤t_uberblock, ubbest, sizeof (uberblock_t)); + grub_memmove(current_bootpath, tmp_bootpath, MAXNAMELEN); + grub_memmove(current_devid, tmp_devid, grub_strlen(tmp_devid)); + is_zfs_mount = 1; + return (1); } /* @@ -1399,23 +1653,23 @@ zfs_open(char *filename) * do not goto ''current_bootfs''. */ if (is_top_dataset_file(filename)) { - if ((errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack))) + if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)) return (0); current_bootfs_obj = 0; } else { if (current_bootfs[0] == ''\0'') { /* Get the default root filesystem object number */ - if ((errnum = get_default_bootfsobj(MOS, - ¤t_bootfs_obj, stack))) + if (errnum = get_default_bootfsobj(MOS, + ¤t_bootfs_obj, stack)) return (0); - if ((errnum = get_objset_mdn(MOS, NULL, - ¤t_bootfs_obj, mdn, stack))) + if (errnum = get_objset_mdn(MOS, NULL, + ¤t_bootfs_obj, mdn, stack)) return (0); } else { - if ((errnum = get_objset_mdn(MOS, current_bootfs, - ¤t_bootfs_obj, mdn, stack))) { + if (errnum = get_objset_mdn(MOS, current_bootfs, + ¤t_bootfs_obj, mdn, stack)) { grub_memset(current_bootfs, 0, MAXNAMELEN); return (0); } @@ -1515,7 +1769,7 @@ zfs_read(char *buf, int len) */ uint64_t blkid = filepos / blksz; - if ((errnum = dmu_read(DNODE, blkid, file_buf, stack))) + if (errnum = dmu_read(DNODE, blkid, file_buf, stack)) return (0); file_start = blkid * blksz; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsys_zfs.h --- a/tools/libfsimage/zfs/fsys_zfs.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/fsys_zfs.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,10 +16,17 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + */ + #ifndef _FSYS_ZFS_H #define _FSYS_ZFS_H @@ -95,26 +102,8 @@ typedef unsigned int size_t; #define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) #define P2ROUNDUP(x, align) (-(-(x) & -(align))) -/* - * XXX Match these macro up with real zfs once we have nvlist support so that we - * can support large sector disks. - */ -#define UBERBLOCK_SIZE (1ULL << UBERBLOCK_SHIFT) -#define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT -#include <stddef.h> -#define VDEV_UBERBLOCK_OFFSET(n) \ -offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT]) - typedef struct uberblock uberblock_t; -/* XXX Uberblock_phys_t is no longer in the kernel zfs */ -typedef struct uberblock_phys { - uberblock_t ubp_uberblock; - char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) - - sizeof (zio_eck_t)]; - zio_eck_t ubp_zec; -} uberblock_phys_t; - /* * Macros to get fields in a bp or DVA. */ @@ -137,10 +126,36 @@ typedef struct uberblock_phys { #define NV_ENCODE_NATIVE 0 #define NV_ENCODE_XDR 1 #define HOST_ENDIAN 1 /* for x86 machine */ -#define DATA_TYPE_UINT64 8 -#define DATA_TYPE_STRING 9 -#define DATA_TYPE_NVLIST 19 -#define DATA_TYPE_NVLIST_ARRAY 20 +typedef enum { + DATA_TYPE_UNKNOWN = 0, + DATA_TYPE_BOOLEAN, + DATA_TYPE_BYTE, + DATA_TYPE_INT16, + DATA_TYPE_UINT16, + DATA_TYPE_INT32, + DATA_TYPE_UINT32, + DATA_TYPE_INT64, + DATA_TYPE_UINT64, + DATA_TYPE_STRING, + DATA_TYPE_BYTE_ARRAY, + DATA_TYPE_INT16_ARRAY, + DATA_TYPE_UINT16_ARRAY, + DATA_TYPE_INT32_ARRAY, + DATA_TYPE_UINT32_ARRAY, + DATA_TYPE_INT64_ARRAY, + DATA_TYPE_UINT64_ARRAY, + DATA_TYPE_STRING_ARRAY, + DATA_TYPE_HRTIME, + DATA_TYPE_NVLIST, + DATA_TYPE_NVLIST_ARRAY, + DATA_TYPE_BOOLEAN_VALUE, + DATA_TYPE_INT8, + DATA_TYPE_UINT8, + DATA_TYPE_BOOLEAN_ARRAY, + DATA_TYPE_INT8_ARRAY, + DATA_TYPE_UINT8_ARRAY, + DATA_TYPE_DOUBLE +} data_type_t; /* * Decompression Entry - lzjb @@ -199,6 +214,7 @@ extern void fletcher_4_native(const void extern void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); extern void zio_checksum_SHA256(const void *, uint64_t, zio_cksum_t *); extern int lzjb_decompress(void *, void *, size_t, size_t); +extern int lz4_decompress(void *, void *, size_t, size_t); #endif /* FSYS_ZFS */ diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dmu.h --- a/tools/libfsimage/zfs/zfs-include/dmu.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/dmu.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,11 +16,16 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #ifndef _SYS_DMU_H #define _SYS_DMU_H @@ -31,6 +36,41 @@ * The DMU also interacts with the SPA. That interface is described in * dmu_spa.h. */ + +#define B_FALSE 0 +#define B_TRUE 1 + +#define DMU_OT_NEWTYPE 0x80 +#define DMU_OT_METADATA 0x40 +#define DMU_OT_BYTESWAP_MASK 0x3f + +#define DMU_OT(byteswap, metadata) \ + (DMU_OT_NEWTYPE | \ + ((metadata) ? DMU_OT_METADATA : 0) | \ + ((byteswap) & DMU_OT_BYTESWAP_MASK)) + +#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \ + (ot) < DMU_OT_NUMTYPES) + +#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_METADATA) : \ + dmu_ot[(ot)].ot_metadata) + +typedef enum dmu_object_byteswap { + DMU_BSWAP_UINT8, + DMU_BSWAP_UINT16, + DMU_BSWAP_UINT32, + DMU_BSWAP_UINT64, + DMU_BSWAP_ZAP, + DMU_BSWAP_DNODE, + DMU_BSWAP_OBJSET, + DMU_BSWAP_ZNODE, + DMU_BSWAP_OLDACL, + DMU_BSWAP_ACL, + DMU_BSWAP_NUMFUNCS +} dmu_object_byteswap_t; + typedef enum dmu_object_type { DMU_OT_NONE, /* general: */ @@ -38,8 +78,8 @@ typedef enum dmu_object_type { DMU_OT_OBJECT_ARRAY, /* UINT64 */ DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ - DMU_OT_BPLIST, /* UINT64 */ - DMU_OT_BPLIST_HDR, /* UINT64 */ + DMU_OT_BPOBJ, /* UINT64 */ + DMU_OT_BPOBJ_HDR, /* UINT64 */ /* spa: */ DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ DMU_OT_SPACE_MAP, /* UINT64 */ @@ -56,7 +96,7 @@ typedef enum dmu_object_type { DMU_OT_DSL_DATASET, /* UINT64 */ /* zpl: */ DMU_OT_ZNODE, /* ZNODE */ - DMU_OT_OLDACL, /* OLD ACL */ + DMU_OT_OLDACL, /* Old ACL */ DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ DMU_OT_MASTER_NODE, /* ZAP */ @@ -79,7 +119,7 @@ typedef enum dmu_object_type { DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ DMU_OT_NEXT_CLONES, /* ZAP */ - DMU_OT_SCRUB_QUEUE, /* ZAP */ + DMU_OT_SCAN_QUEUE, /* ZAP */ DMU_OT_USERGROUP_USED, /* ZAP */ DMU_OT_USERGROUP_QUOTA, /* ZAP */ DMU_OT_USERREFS, /* ZAP */ @@ -89,7 +129,24 @@ typedef enum dmu_object_type { DMU_OT_SA_MASTER_NODE, /* ZAP */ DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ - DMU_OT_NUMTYPES + DMU_OT_SCAN_XLATE, /* ZAP */ + DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */ + DMU_OT_DEADLIST, /* ZAP */ + DMU_OT_DEADLIST_HDR, /* UINT64 */ + DMU_OT_DSL_CLONES, /* ZAP */ + DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */ + DMU_OT_NUMTYPES, + + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), } dmu_object_type_t; typedef enum dmu_objset_type { @@ -107,6 +164,9 @@ typedef enum dmu_objset_type { */ #define DMU_POOL_DIRECTORY_OBJECT 1 #define DMU_POOL_CONFIG "config" +#define DMU_POOL_FEATURES_FOR_READ "features_for_read" +#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write" +#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions" #define DMU_POOL_ROOT_DATASET "root_dataset" #define DMU_POOL_SYNC_BPLIST "sync_bplist" #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dsl_dataset.h --- a/tools/libfsimage/zfs/zfs-include/dsl_dataset.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/dsl_dataset.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_DSL_DATASET_H #define _SYS_DSL_DATASET_H -#pragma ident "%Z%%M% %I% %E% SMI" - typedef struct dsl_dataset_phys { uint64_t ds_dir_obj; uint64_t ds_prev_snap_obj; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dsl_dir.h --- a/tools/libfsimage/zfs/zfs-include/dsl_dir.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/dsl_dir.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_DSL_DIR_H #define _SYS_DSL_DIR_H -#pragma ident "%Z%%M% %I% %E% SMI" - typedef struct dsl_dir_phys { uint64_t dd_creation_time; /* not actually used */ uint64_t dd_head_dataset_obj; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/spa.h --- a/tools/libfsimage/zfs/zfs-include/spa.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/spa.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,11 +16,16 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #ifndef _SYS_SPA_H #define _SYS_SPA_H @@ -65,7 +70,7 @@ /* * Size of block to hold the configuration data (a packed nvlist) */ -#define SPA_CONFIG_BLOCKSIZE (1 << 14) +#define SPA_CONFIG_BLOCKSIZE (1ULL << 14) /* * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/uberblock_impl.h --- a/tools/libfsimage/zfs/zfs-include/uberblock_impl.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/uberblock_impl.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_UBERBLOCK_IMPL_H #define _SYS_UBERBLOCK_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * The uberblock version is incremented whenever an incompatible on-disk * format change is made to the SPA, DMU, or ZAP. diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/vdev_impl.h --- a/tools/libfsimage/zfs/zfs-include/vdev_impl.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/vdev_impl.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,12 +24,30 @@ #ifndef _SYS_VDEV_IMPL_H #define _SYS_VDEV_IMPL_H +/* helper macros */ +#undef offsetof +#if defined(__GNUC__) +#define offsetof(s, m) __builtin_offsetof(s, m) +#else +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) +#endif +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + #define VDEV_PAD_SIZE (8 << 10) /* 2 padding areas (vl_pad1 and vl_pad2) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) +#define VDEV_UBERBLOCK_SHIFT(sh) \ + MAX((sh), UBERBLOCK_SHIFT) +#define VDEV_UBERBLOCK_COUNT(sh) \ + (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(sh)) +#define VDEV_UBERBLOCK_OFFSET(sh, n) \ + offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(sh)]) +#define VDEV_UBERBLOCK_SIZE(sh) \ + (1ULL << VDEV_UBERBLOCK_SHIFT(sh)) + typedef struct vdev_phys { char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; zio_eck_t vp_zbt; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zap_leaf.h --- a/tools/libfsimage/zfs/zfs-include/zap_leaf.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zap_leaf.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_ZAP_LEAF_H #define _SYS_ZAP_LEAF_H -#pragma ident "%Z%%M% %I% %E% SMI" - #define ZAP_LEAF_MAGIC 0x2AB1EAF /* chunk size = 24 bytes */ diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zfs.h --- a/tools/libfsimage/zfs/zfs-include/zfs.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zfs.h Sat Oct 26 20:03:06 2013 +0400 @@ -16,9 +16,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_FS_ZFS_H @@ -27,7 +28,14 @@ /* * On-disk version number. */ -#define SPA_VERSION 24ULL +#define SPA_VERSION_INITIAL 1ULL +#define SPA_VERSION_BEFORE_FEATURES 28ULL +#define SPA_VERSION 5000ULL +#define SPA_VERSION_FEATURES 5000ULL + +#define SPA_VERSION_IS_SUPPORTED(v) \ + (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \ + ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION)) /* * The following are configuration names used in the nvlist describing a pool''s @@ -67,6 +75,7 @@ #define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" #define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" #define ZPOOL_CONFIG_DDT_STATS "ddt_stats" +#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" /* * The persistent vdev state is stored as separate values rather than a single * ''vdev_state'' entry. This is because a device can be in multiple states, such diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zfs_acl.h --- a/tools/libfsimage/zfs/zfs-include/zfs_acl.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zfs_acl.h Sat Oct 26 20:03:06 2013 +0400 @@ -24,8 +24,6 @@ #ifndef _SYS_FS_ZFS_ACL_H #define _SYS_FS_ZFS_ACL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifndef _UID_T #define _UID_T typedef unsigned int uid_t; /* UID type */ diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zio.h --- a/tools/libfsimage/zfs/zfs-include/zio.h Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs-include/zio.h Sat Oct 26 20:03:06 2013 +0400 @@ -20,6 +20,9 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2013 by Saso Kiselkov. All rights reserved. + */ #ifndef _ZIO_H #define _ZIO_H @@ -73,6 +76,17 @@ enum zio_compress { ZIO_COMPRESS_OFF, ZIO_COMPRESS_LZJB, ZIO_COMPRESS_EMPTY, + ZIO_COMPRESS_GZIP_1, + ZIO_COMPRESS_GZIP_2, + ZIO_COMPRESS_GZIP_3, + ZIO_COMPRESS_GZIP_4, + ZIO_COMPRESS_GZIP_5, + ZIO_COMPRESS_GZIP_6, + ZIO_COMPRESS_GZIP_7, + ZIO_COMPRESS_GZIP_8, + ZIO_COMPRESS_GZIP_9, + ZIO_COMPRESS_ZLE, + ZIO_COMPRESS_LZ4, ZIO_COMPRESS_FUNCTIONS }; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_fletcher.c --- a/tools/libfsimage/zfs/zfs_fletcher.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs_fletcher.c Sat Oct 26 20:03:06 2013 +0400 @@ -21,8 +21,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "fsys_zfs.h" diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_lz4.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libfsimage/zfs/zfs_lz4.c Sat Oct 26 20:03:06 2013 +0400 @@ -0,0 +1,313 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2013, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#include "fsys_zfs.h" +#include <string.h> + +static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, + int isize, int maxOutputSize); + +int +lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) +{ + const uint8_t *src = s_start; + uint32_t bufsiz = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | + src[3]; + + /* invalid compressed buffer size encoded at start */ + if (bufsiz + 4 > s_len) + return (1); + + /* + * Returns 0 on success (decompression function returned non-negative) + * and non-zero on failure (decompression function returned negative). + */ + return (LZ4_uncompress_unknownOutputSize(s_start + 4, d_start, bufsiz, + d_len) < 0); +} + +/* + * CPU Feature Detection + */ + +/* 32 or 64 bits ? */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ + defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ + defined(__LP64__) || defined(_LP64)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Little Endian or Big Endian? + * Note: overwrite the below #define if you know your architecture endianess. + */ +#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ + defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ + defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ + defined(__powerpc) || defined(powerpc) || \ + ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) +#define LZ4_BIG_ENDIAN 1 +#else + /* + * Little Endian assumed. PDP Endian and other very rare endian format + * are unsupported. + */ +#endif + +/* + * Compiler Options + */ +#if __STDC_VERSION__ >= 199901L /* C99 */ +/* "restrict" is a known keyword */ +#else +/* Disable restrict */ +#define restrict +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) \ + | (((x) & 0xffu) << 8))) + +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +#define expect(expr, value) (__builtin_expect((expr), (value))) +#else +#define expect(expr, value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + +/* Basic types */ +#define BYTE uint8_t +#define U16 uint16_t +#define U32 uint32_t +#define S32 int32_t +#define U64 uint64_t + +typedef struct _U16_S { + U16 v; +} U16_S; +typedef struct _U32_S { + U32 v; +} U32_S; +typedef struct _U64_S { + U64 v; +} U64_S; + +#define A64(x) (((U64_S *)(x))->v) +#define A32(x) (((U32_S *)(x))->v) +#define A16(x) (((U16_S *)(x))->v) + +/* + * Constants + */ +#define MINMATCH 4 + +#define COPYLENGTH 8 +#define LASTLITERALS 5 + +#define ML_BITS 4 +#define ML_MASK ((1U<<ML_BITS)-1) +#define RUN_BITS (8-ML_BITS) +#define RUN_MASK ((1U<<RUN_BITS)-1) + +/* + * Architecture-specific macros + */ +#if LZ4_ARCH64 +#define STEPSIZE 8 +#define UARCH U64 +#define AARCH A64 +#define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) +#define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) +#define HTYPE U32 +#define INITBASE(base) const BYTE* const base = ip +#else +#define STEPSIZE 4 +#define UARCH U32 +#define AARCH A32 +#define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); +#define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const BYTE* +#define INITBASE(base) const int base = 0 +#endif + +#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } +#define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ + { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } +#else +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } +#endif + +/* Macros */ +#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); + +/* Decompression functions */ + +static int +LZ4_uncompress_unknownOutputSize(const char *source, + char *dest, int isize, int maxOutputSize) +{ + /* Local Variables */ + const BYTE *restrict ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *restrict ref; + + BYTE *restrict op = (BYTE *) dest; + BYTE *const oend = op + maxOutputSize; + BYTE *cpy; + + size_t dec[] = { 0, 3, 2, 3, 0, 0, 0, 0 }; + + /* Main Loop */ + while (ip < iend) { + BYTE token; + int length; + + /* get runlength */ + token = *ip++; + if ((length = (token >> ML_BITS)) == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + if (cpy > oend) + /* + * Error: request to write beyond destination + * buffer. + */ + goto _output_error; + if (ip + length > iend) + /* + * Error : request to read beyond source + * buffer. + */ + goto _output_error; + memcpy(op, ip, length); + op += length; + ip += length; + if (ip < iend) + /* Error : LZ4 format violation */ + goto _output_error; + /* Necessarily EOF, due to parsing restrictions. */ + break; + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + /* + * Error: offset creates reference outside of + * destination buffer. + */ + goto _output_error; + + /* get matchlength */ + if ((length = (token & ML_MASK)) == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + /* copy repeated sequence */ + if unlikely(op - ref < STEPSIZE) { +#if LZ4_ARCH64 + size_t dec2table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + size_t dec2 = dec2table[op - ref]; +#else + const int dec2 = 0; +#endif + *op++ = *ref++; + *op++ = *ref++; + *op++ = *ref++; + *op++ = *ref++; + ref -= dec[op - ref]; + A32(op) = A32(ref); + op += STEPSIZE - 4; + ref -= dec2; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + /* + * Error: request to write outside of + * destination buffer. + */ + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + if (op == oend) + /* + * Check EOF (should never happen, since last + * 5 bytes are supposed to be literals). + */ + break; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + + /* end of decoding */ + return (int)(((char *)op) - dest); + + /* write overflow error detected */ + _output_error: + return (int)(-(((char *)ip) - source)); +} diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_lzjb.c --- a/tools/libfsimage/zfs/zfs_lzjb.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs_lzjb.c Sat Oct 26 20:03:06 2013 +0400 @@ -21,8 +21,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "fsys_zfs.h" #define MATCH_BITS 6 @@ -34,10 +32,10 @@ int lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) { - uint8_t *src = s_start; - uint8_t *dst = d_start; - uint8_t *d_end = (uint8_t *)d_start + d_len; - uint8_t *cpy, copymap = ''\0''; + uchar_t *src = s_start; + uchar_t *dst = d_start; + uchar_t *d_end = (uchar_t *)d_start + d_len; + uchar_t *cpy, copymap = ''\0''; int copymask = 1 << (NBBY - 1); while (dst < d_end) { @@ -49,7 +47,7 @@ lzjb_decompress(void *s_start, void *d_s int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; src += 2; - if ((cpy = dst - offset) < (uint8_t *)d_start) + if ((cpy = dst - offset) < (uchar_t *)d_start) return (-1); while (--mlen >= 0 && dst < d_end) *dst++ = *cpy++; diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_sha256.c --- a/tools/libfsimage/zfs/zfs_sha256.c Thu Oct 24 22:46:20 2013 +0100 +++ b/tools/libfsimage/zfs/zfs_sha256.c Sat Oct 26 20:03:06 2013 +0400 @@ -21,8 +21,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include "fsys_zfs.h" /*
Wei Liu
2013-Oct-27 00:07 UTC
Re: [PATCH] 1. changes for vdiskadm on illumos based platform
On Sat, Oct 26, 2013 at 08:09:04PM +0400, Igor Kozhkuhov wrote:> 2. update ZFS in libfsimage from illumos for pygrub >Wow, this is such a hugh diff. TBH this is not a proper patch. Please have a look at http://wiki.xen.org/wiki/Submitting_Xen_Patches And don''t forget to CC respective maintainers. In your case you should CC Ian Campbell and Ian Jackson as they are toolstack maintainers. Wei.> diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/Rules.mk > --- a/tools/libfsimage/Rules.mk Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/Rules.mk Sat Oct 26 20:03:06 2013 +0400 > @@ -2,11 +2,19 @@ include $(XEN_ROOT)/tools/Rules.mk > > CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\" > CFLAGS += -Werror -D_GNU_SOURCE > +# need for build illumos ZFS > +CFLAGS += -Wno-parentheses > +CFLAGS += -Wno-unused > +# end > LDFLAGS += -L../common/ > > PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y)) > > -FSDIR = $(LIBDIR)/fs > +FSDIR-y = $(LIBDIR)/fs/$(FS) > +FSDIR-$(CONFIG_SunOS)-x86_64 = $(PREFIX)/lib/fs/$(FS)/64 > +FSDIR-$(CONFIG_SunOS)-x86_32 = $(PREFIX)/lib/fs/$(FS)/ > +FSDIR-$(CONFIG_SunOS) = $(FSDIR-$(CONFIG_SunOS)-$(XEN_TARGET_ARCH)) > +FSDIR = $(FSDIR-y) > > FSLIB = fsimage.so > > @@ -15,11 +23,14 @@ fs-all: $(FSLIB) > > .PHONY: fs-install > fs-install: fs-all > - $(INSTALL_DIR) $(DESTDIR)$(FSDIR)/$(FS) > - $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR)/$(FS) > + $(INSTALL_DIR) $(DESTDIR)$(FSDIR) > + $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR) > + > +BUILD_LINE-y = $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) > +BUILD_LINE-$(CONFIG_SunOS) = $(CC) $(CFLAGS) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) > > $(FSLIB): $(PIC_OBJS) > - $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) > + $(BUILD_LINE-y) > > clean distclean:: > rm -f $(PIC_OBJS) $(FSLIB) $(DEPS) > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/Makefile > --- a/tools/libfsimage/common/Makefile Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/Makefile Sat Oct 26 20:03:06 2013 +0400 > @@ -4,11 +4,16 @@ include $(XEN_ROOT)/tools/libfsimage/Rul > MAJOR = 1.0 > MINOR = 0 > > -LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS > +CFLAGS-ADDS-$(CONFIG_SunOS) += -Werror -Wp,-MD,.$(@F).d $(ADD_INCLUDES) > +CFLAGS-ADDS-$(CONFIG_SunOS) += -I/usr/include/libxml2 > +CFLAGS-ADDS-$(CONFIG_Linux)> + > +LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS $(ADD_PATH_LIBS) > LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU > LDFLAGS = $(LDFLAGS-y) > > CFLAGS += $(PTHREAD_CFLAGS) > +CFLAGS += $(CFLAGS-ADDS-y) > LDFLAGS += $(PTHREAD_LDFLAGS) > > LIB_SRCS-y = fsimage.c fsimage_plugin.c fsimage_grub.c > @@ -32,15 +37,18 @@ install: all > $(INSTALL_DATA) fsimage_grub.h $(DESTDIR)$(INCLUDEDIR) > > clean distclean:: > - rm -f $(LIB) > + rm -f $(PIC_OBJS) $(LIB) $(DEPS) > > libfsimage.so: libfsimage.so.$(MAJOR) > ln -sf $< $@ > libfsimage.so.$(MAJOR): libfsimage.so.$(MAJOR).$(MINOR) > ln -sf $< $@ > > +BUILD_LINE-y = $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) > +BUILD_LINE-$(CONFIG_SunOS) = $(CC) $(CFLAGS $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) -lvdisk -lvboxdisk -lxml2 -lgen -lc > + > libfsimage.so.$(MAJOR).$(MINOR): $(PIC_OBJS) > - $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libfsimage.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $^ $(PTHREAD_LIBS) > + $(BUILD_LINE-y) > > -include $(DEPS) > > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage.c > --- a/tools/libfsimage/common/fsimage.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/fsimage.c Sat Oct 26 20:03:06 2013 +0400 > @@ -36,22 +36,43 @@ > > static pthread_mutex_t fsi_lock = PTHREAD_MUTEX_INITIALIZER; > > +#ifdef _VDISK_ > +#include "vdisk.h" > +#endif > + > fsi_t *fsi_open_fsimage(const char *path, uint64_t off, const char *options) > { > fsi_t *fsi = NULL; > - int fd; > + int fd = -1; > int err; > + void *pvd = NULL; > > +#ifdef _VDISK_ > + if (vdisk_check_vdisk(path)) { > + if ((pvd = vdisk_open(path)) == NULL) > + goto fail; > + } else { > + if ((fd = open(path, O_RDONLY)) == -1) > + goto fail; > + } > +#else > if ((fd = open(path, O_RDONLY)) == -1) > goto fail; > +#endif > > if ((fsi = malloc(sizeof(*fsi))) == NULL) > goto fail; > > - fsi->f_fd = fd; > fsi->f_off = off; > fsi->f_data = NULL; > fsi->f_bootstring = NULL; > + if (pvd) { > + fsi->f_fd = NULL; > + fsi->f_pvdisk = pvd; > + } else { > + fsi->f_fd = fd; > + fsi->f_pvdisk = NULL; > + } > > pthread_mutex_lock(&fsi_lock); > err = find_plugin(fsi, path, options); > @@ -73,8 +94,16 @@ fail: > void fsi_close_fsimage(fsi_t *fsi) > { > pthread_mutex_lock(&fsi_lock); > - fsi->f_plugin->fp_ops->fpo_umount(fsi); > - (void) close(fsi->f_fd); > + fsi->f_plugin->fp_ops->fpo_umount(fsi); > +#ifdef _VDISK_ > + if (fsi->f_pvdisk) { > + vdisk_close(fsi->f_pvdisk); > + } else { > + (void) close(fsi->f_fd); > + } > +#else > + (void) close(fsi->f_fd); > +#endif > free(fsi); > pthread_mutex_unlock(&fsi_lock); > } > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_grub.c > --- a/tools/libfsimage/common/fsimage_grub.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/fsimage_grub.c Sat Oct 26 20:03:06 2013 +0400 > @@ -31,6 +31,10 @@ > #include "fsimage_grub.h" > #include "fsimage_priv.h" > > +#ifdef _VDISK_ > +#include "vdisk.h" > +#endif > + > static char *disk_read_junk; > > typedef struct fsig_data { > @@ -176,7 +180,17 @@ fsig_devread(fsi_file_t *ffi, unsigned i > r = SECTOR_SIZE - n; > if (r > bufsize) > r = bufsize; > +#ifdef _VDISK_ > + if (ffi->ff_fsi->f_pvdisk) { > + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, > + (off_t)(off - n), tmp, SECTOR_SIZE); > + } else { > + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, > + off - n); > + } > +#else > ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off - n); > +#endif > if (ret < n + r) > return (0); > memcpy(buf, tmp + n, r); > @@ -187,7 +201,16 @@ fsig_devread(fsi_file_t *ffi, unsigned i > > n = (bufsize & ~(SECTOR_SIZE - 1)); > if (n > 0) { > +#ifdef _VDISK_ > + if (ffi->ff_fsi->f_pvdisk) { > + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, (off_t)off, > + buf, n); > + } else { > + ret = pread(ffi->ff_fsi->f_fd, buf, n, off); > + } > +#else > ret = pread(ffi->ff_fsi->f_fd, buf, n, off); > +#endif > if (ret < n) > return (0); > buf += n; > @@ -195,7 +218,16 @@ fsig_devread(fsi_file_t *ffi, unsigned i > off += n; > } > if (bufsize > 0) { > +#ifdef _VDISK_ > + if (ffi->ff_fsi->f_pvdisk) { > + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, (off_t)off, > + tmp, SECTOR_SIZE); > + } else { > + ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off); > + } > +#else > ret = pread(ffi->ff_fsi->f_fd, tmp, SECTOR_SIZE, off); > +#endif > if (ret < bufsize) > return (0); > memcpy(buf, tmp, bufsize); > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_plugin.c > --- a/tools/libfsimage/common/fsimage_plugin.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/fsimage_plugin.c Sat Oct 26 20:03:06 2013 +0400 > @@ -122,6 +122,7 @@ fail: > static int load_plugins(void) > { > const char *fsdir = getenv("FSIMAGE_FSDIR"); > + const char *isadir = ""; > struct dirent *dp = NULL; > struct dirent *dpp; > DIR *dir = NULL; > @@ -130,8 +131,26 @@ static int load_plugins(void) > int err; > int ret = -1; > > +#if defined(FSIMAGE_FSDIR) > if (fsdir == NULL) > fsdir = FSIMAGE_FSDIR; > +#elif defined(__sun__) > + if (fsdir == NULL) > + fsdir = "/usr/lib/fs"; > + > + if (sizeof(void *) == 8) > + isadir = "64/"; > +#elif defined(__ia64__) > + if (fsdir == NULL) > + fsdir = "/usr/lib/fs"; > +#else > + if (fsdir == NULL) { > + if (sizeof(void *) == 8) > + fsdir = "/usr/lib64/fs"; > + else > + fsdir = "/usr/lib/fs"; > + } > +#endif > > if ((name_max = pathconf(fsdir, _PC_NAME_MAX)) == -1) > goto fail; > @@ -153,8 +172,8 @@ static int load_plugins(void) > if (strcmp(dpp->d_name, "..") == 0) > continue; > > - (void) snprintf(tmp, name_max, "%s/%s/fsimage.so", fsdir, > - dpp->d_name); > + (void) snprintf(tmp, name_max, "%s/%s/%sfsimage.so", fsdir, > + dpp->d_name, isadir); > > if (init_plugin(tmp) != 0) > goto fail; > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_priv.h > --- a/tools/libfsimage/common/fsimage_priv.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/fsimage_priv.h Sat Oct 26 20:03:06 2013 +0400 > @@ -47,6 +47,7 @@ struct fsi { > void *f_data; > fsi_plugin_t *f_plugin; > char *f_bootstring; > + void *f_pvdisk; > }; > > struct fsi_file { > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/Makefile > --- a/tools/libfsimage/zfs/Makefile Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/Makefile Sat Oct 26 20:03:06 2013 +0400 > @@ -25,7 +25,7 @@ > XEN_ROOT = $(CURDIR)/../../.. > > CFLAGS += -DFSYS_ZFS -DFSIMAGE -I$(XEN_ROOT)/tools/libfsimage/zfs > -LIB_SRCS-y = zfs_lzjb.c zfs_sha256.c zfs_fletcher.c fsi_zfs.c fsys_zfs.c > +LIB_SRCS-y = zfs_lzjb.c zfs_sha256.c zfs_fletcher.c fsi_zfs.c fsys_zfs.c zfs_lz4.c > > FS = zfs > > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsi_zfs.h > --- a/tools/libfsimage/zfs/fsi_zfs.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/fsi_zfs.h Sat Oct 26 20:03:06 2013 +0400 > @@ -36,6 +36,8 @@ > > /* Boot signature related defines for the findroot command */ > #define BOOTSIGN_DIR "/boot/grub/bootsign" > +#define BOOTSIGN_ARGLEN (MAXNAMELEN + 10) /* (<sign>,0,d) */ > +#define BOOTSIGN_LEN (sizeof (BOOTSIGN_DIR) + 1 + BOOTSIGN_ARGLEN) > #define BOOTSIGN_BACKUP "/etc/bootsign" > > /* Maybe redirect memory requests through grub_scratch_mem. */ > @@ -60,6 +62,7 @@ > #define grub_strstr strstr > #define grub_strlen strlen > #define grub_memmove memmove > +#define grub_isspace isspace > > extern char current_bootpath[MAXPATHLEN]; > extern char current_rootpool[MAXNAMELEN]; > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsys_zfs.c > --- a/tools/libfsimage/zfs/fsys_zfs.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/fsys_zfs.c Sat Oct 26 20:03:06 2013 +0400 > @@ -16,12 +16,18 @@ > * along with this program; if not, write to the Free Software > * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > */ > + > /* > * Copyright 2010 Sun Microsystems, Inc. All rights reserved. > * Use is subject to license terms. > */ > > /* > + * Copyright (c) 2012 by Delphix. All rights reserved. > + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. > + */ > + > +/* > * The zfs plug-in routines for GRUB are: > * > * zfs_mount() - locates a valid uberblock of the root pool and reads > @@ -69,7 +75,18 @@ decomp_entry_t decomp_table[ZIO_COMPRESS > {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */ > {"off", 0}, /* ZIO_COMPRESS_OFF */ > {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */ > - {"empty", 0} /* ZIO_COMPRESS_EMPTY */ > + {"empty", 0}, /* ZIO_COMPRESS_EMPTY */ > + {"gzip-1", 0}, /* ZIO_COMPRESS_GZIP_1 */ > + {"gzip-2", 0}, /* ZIO_COMPRESS_GZIP_2 */ > + {"gzip-3", 0}, /* ZIO_COMPRESS_GZIP_3 */ > + {"gzip-4", 0}, /* ZIO_COMPRESS_GZIP_4 */ > + {"gzip-5", 0}, /* ZIO_COMPRESS_GZIP_5 */ > + {"gzip-6", 0}, /* ZIO_COMPRESS_GZIP_6 */ > + {"gzip-7", 0}, /* ZIO_COMPRESS_GZIP_7 */ > + {"gzip-8", 0}, /* ZIO_COMPRESS_GZIP_8 */ > + {"gzip-9", 0}, /* ZIO_COMPRESS_GZIP_9 */ > + {"zle", 0}, /* ZIO_COMPRESS_ZLE */ > + {"lz4", lz4_decompress} /* ZIO_COMPRESS_LZ4 */ > }; > > static int zio_read_data(blkptr_t *bp, void *buf, char *stack); > @@ -80,8 +97,8 @@ static int zio_read_data(blkptr_t *bp, v > static int > zfs_bcmp(const void *s1, const void *s2, size_t n) > { > - const uint8_t *ps1 = s1; > - const uint8_t *ps2 = s2; > + const uchar_t *ps1 = s1; > + const uchar_t *ps2 = s2; > > if (s1 != s2 && n != 0) { > do { > @@ -118,16 +135,16 @@ zio_checksum_off(const void *buf, uint64 > > /* Checksum Table and Values */ > zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { > - { { NULL, NULL }, 0, 0, "inherit" }, > - { { NULL, NULL }, 0, 0, "on" }, > - { { zio_checksum_off, zio_checksum_off }, 0, 0, "off" }, > - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "label" }, > - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 1, "gang_header" }, > - { { NULL, NULL }, 0, 0, "zilog" }, > - { { fletcher_2_native, fletcher_2_byteswap }, 0, 0, "fletcher2" }, > - { { fletcher_4_native, fletcher_4_byteswap }, 1, 0, "fletcher4" }, > - { { zio_checksum_SHA256, zio_checksum_SHA256 }, 1, 0, "SHA256" }, > - { { NULL, NULL }, 0, 0, "zilog2" } > + {{NULL, NULL}, 0, 0, "inherit"}, > + {{NULL, NULL}, 0, 0, "on"}, > + {{zio_checksum_off, zio_checksum_off}, 0, 0, "off"}, > + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "label"}, > + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, "gang_header"}, > + {{NULL, NULL}, 0, 0, "zilog"}, > + {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, > + {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, > + {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}, > + {{NULL, NULL}, 0, 0, "zilog2"}, > }; > > /* > @@ -217,18 +234,13 @@ vdev_uberblock_compare(uberblock_t *ub1, > * Three pieces of information are needed to verify an uberblock: the magic > * number, the version number, and the checksum. > * > - * Currently Implemented: version number, magic number > - * Need to Implement: checksum > - * > * Return: > * 0 - Success > * -1 - Failure > */ > static int > -uberblock_verify(uberblock_phys_t *ub, uint64_t offset) > +uberblock_verify(uberblock_t *uber, uint64_t ub_size, uint64_t offset) > { > - > - uberblock_t *uber = &ub->ubp_uberblock; > blkptr_t bp; > > BP_ZERO(&bp); > @@ -236,11 +248,11 @@ uberblock_verify(uberblock_phys_t *ub, u > BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER); > ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0); > > - if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0) > + if (zio_checksum_verify(&bp, (char *)uber, ub_size) != 0) > return (-1); > > if (uber->ub_magic == UBERBLOCK_MAGIC && > - uber->ub_version > 0 && uber->ub_version <= SPA_VERSION) > + SPA_VERSION_IS_SUPPORTED(uber->ub_version)) > return (0); > > return (-1); > @@ -252,25 +264,28 @@ uberblock_verify(uberblock_phys_t *ub, u > * Success - Pointer to the best uberblock. > * Failure - NULL > */ > -static uberblock_phys_t * > -find_bestub(uberblock_phys_t *ub_array, uint64_t sector) > +static uberblock_t * > +find_bestub(char *ub_array, uint64_t ashift, uint64_t sector) > { > - uberblock_phys_t *ubbest = NULL; > - uint64_t offset; > + uberblock_t *ubbest = NULL; > + uberblock_t *ubnext; > + uint64_t offset, ub_size; > int i; > > - for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) { > + ub_size = VDEV_UBERBLOCK_SIZE(ashift); > + > + for (i = 0; i < VDEV_UBERBLOCK_COUNT(ashift); i++) { > + ubnext = (uberblock_t *)ub_array; > + ub_array += ub_size; > offset = (sector << SPA_MINBLOCKSHIFT) + > - VDEV_UBERBLOCK_OFFSET(i); > - if (uberblock_verify(&ub_array[i], offset) == 0) { > - if (ubbest == NULL) { > - ubbest = &ub_array[i]; > - } else if (vdev_uberblock_compare( > - &(ub_array[i].ubp_uberblock), > - &(ubbest->ubp_uberblock)) > 0) { > - ubbest = &ub_array[i]; > - } > - } > + VDEV_UBERBLOCK_OFFSET(ashift, i); > + > + if (uberblock_verify(ubnext, ub_size, offset) != 0) > + continue; > + > + if (ubbest == NULL || > + vdev_uberblock_compare(ubnext, ubbest) > 0) > + ubbest = ubnext; > } > > return (ubbest); > @@ -295,7 +310,7 @@ zio_read_gang(blkptr_t *bp, dva_t *dva, > zio_gb = (zio_gbh_phys_t *)stack; > stack += SPA_GANGBLOCKSIZE; > offset = DVA_GET_OFFSET(dva); > - sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); > + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); > > /* read in the gang block header */ > if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) { > @@ -354,8 +369,8 @@ zio_read_data(blkptr_t *bp, void *buf, c > } else { > /* read in a data block */ > offset = DVA_GET_OFFSET(&bp->blk_dva[i]); > - sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); > - if (devread(sector, 0, psize, buf)) > + sector = DVA_OFFSET_TO_PHYS_SECTOR(offset); > + if (devread(sector, 0, psize, buf) != 0) > return (0); > } > } > @@ -399,7 +414,7 @@ zio_read(blkptr_t *bp, void *buf, char * > stack += psize; > } > > - if (zio_read_data(bp, buf, stack)) { > + if (zio_read_data(bp, buf, stack) != 0) { > grub_printf("zio_read_data failed\n"); > return (ERR_FSYS_CORRUPT); > } > @@ -409,8 +424,13 @@ zio_read(blkptr_t *bp, void *buf, char * > return (ERR_FSYS_CORRUPT); > } > > - if (comp != ZIO_COMPRESS_OFF) > - decomp_table[comp].decomp_func(buf, retbuf, psize, lsize); > + if (comp != ZIO_COMPRESS_OFF) { > + if (decomp_table[comp].decomp_func(buf, retbuf, psize, > + lsize) != 0) { > + grub_printf("zio_read decompression failed\n"); > + return (ERR_FSYS_CORRUPT); > + } > + } > > return (0); > } > @@ -446,7 +466,7 @@ dmu_read(dnode_phys_t *dn, uint64_t blki > grub_memset(buf, 0, > dn->dn_datablkszsec << SPA_MINBLOCKSHIFT); > break; > - } else if ((errnum = zio_read(bp, tmpbuf, stack))) { > + } else if (errnum = zio_read(bp, tmpbuf, stack)) { > return (errnum); > } > > @@ -465,13 +485,13 @@ dmu_read(dnode_phys_t *dn, uint64_t blki > * errnum - failure > */ > static int > -mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name, > +mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name, > uint64_t *value) > { > int i, chunks; > mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; > > - chunks = objsize/MZAP_ENT_LEN - 1; > + chunks = objsize / MZAP_ENT_LEN - 1; > for (i = 0; i < chunks; i++) { > if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) { > *value = mzap_ent[i].mze_value; > @@ -511,8 +531,8 @@ zap_hash(uint64_t salt, const char *name > /* > * Only use 28 bits, since we need 4 bits in the cookie for the > * collision differentiator. We MUST use the high bits, since > - * those are the onces that we first pay attention to when > - * chosing the bucket. > + * those are the ones that we first pay attention to when > + * choosing the bucket. > */ > crc &= ~((1ULL << (64 - 28)) - 1); > > @@ -617,7 +637,7 @@ zap_leaf_lookup(zap_leaf_phys_t *l, int > */ > static int > fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap, > - char *name, uint64_t *value, char *stack) > + const char *name, uint64_t *value, char *stack) > { > zap_leaf_phys_t *l; > uint64_t hash, idx, blkid; > @@ -645,7 +665,7 @@ fzap_lookup(dnode_phys_t *zap_dnode, zap > stack += 1<<blksft; > if ((1<<blksft) < sizeof (zap_leaf_phys_t)) > return (ERR_FSYS_CORRUPT); > - if ((errnum = dmu_read(zap_dnode, blkid, l, stack))) > + if (errnum = dmu_read(zap_dnode, blkid, l, stack)) > return (errnum); > > return (zap_leaf_lookup(l, blksft, hash, name, value)); > @@ -660,7 +680,8 @@ fzap_lookup(dnode_phys_t *zap_dnode, zap > * errnum - failure > */ > static int > -zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack) > +zap_lookup(dnode_phys_t *zap_dnode, const char *name, uint64_t *val, > + char *stack) > { > uint64_t block_type; > int size; > @@ -671,7 +692,7 @@ zap_lookup(dnode_phys_t *zap_dnode, char > size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; > stack += size; > > - if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack))) > + if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) != 0) > return (errnum); > > block_type = *((uint64_t *)zapbuf); > @@ -687,6 +708,56 @@ zap_lookup(dnode_phys_t *zap_dnode, char > return (ERR_FSYS_CORRUPT); > } > > +typedef struct zap_attribute { > + int za_integer_length; > + uint64_t za_num_integers; > + uint64_t za_first_integer; > + char *za_name; > +} zap_attribute_t; > + > +typedef int (zap_cb_t)(zap_attribute_t *za, void *arg, char *stack); > + > +static int > +zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack) > +{ > + uint32_t size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; > + zap_attribute_t za; > + int i; > + mzap_phys_t *mzp = (mzap_phys_t *)stack; > + stack += size; > + > + if ((errnum = dmu_read(zap_dnode, 0, mzp, stack)) != 0) > + return (errnum); > + > + /* > + * Iteration over fatzap objects has not yet been implemented. > + * If we encounter a pool in which there are more features for > + * read than can fit inside a microzap (i.e., more than 2048 > + * features for read), we can add support for fatzap iteration. > + * For now, fail. > + */ > + if (mzp->mz_block_type != ZBT_MICRO) { > + grub_printf("feature information stored in fatzap, pool " > + "version not supported\n"); > + return (1); > + } > + > + za.za_integer_length = 8; > + za.za_num_integers = 1; > + for (i = 0; i < size / MZAP_ENT_LEN - 1; i++) { > + mzap_ent_phys_t *mzep = &mzp->mz_chunk[i]; > + int err; > + > + za.za_first_integer = mzep->mze_value; > + za.za_name = mzep->mze_name; > + err = cb(&za, arg, stack); > + if (err != 0) > + return (err); > + } > + > + return (0); > +} > + > /* > * Get the dnode of an object number from the metadnode of an object set. > * > @@ -731,7 +802,7 @@ dnode_get(dnode_phys_t *mdn, uint64_t ob > stack += blksz; > } > > - if ((errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack))) > + if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack)) > return (errnum); > > grub_memmove(buf, &dnbuf[idx], DNODE_SIZE); > @@ -766,6 +837,24 @@ is_top_dataset_file(char *str) > return (0); > } > > +static int > +check_feature(zap_attribute_t *za, void *arg, char *stack) > +{ > + const char **names = arg; > + int i; > + > + if (za->za_first_integer == 0) > + return (0); > + > + for (i = 0; names[i] != NULL; i++) { > + if (grub_strcmp(za->za_name, names[i]) == 0) { > + return (0); > + } > + } > + grub_printf("missing feature for read ''%s''\n", za->za_name); > + return (ERR_NEWER_VERSION); > +} > + > /* > * Get the file dnode for a given file name where mdn is the meta dnode > * for this ZFS object set. When found, place the file dnode in dn. > @@ -782,40 +871,40 @@ dnode_get_path(dnode_phys_t *mdn, char * > uint64_t objnum, version; > char *cname, ch; > > - if ((errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, > - dn, stack))) > + if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE, > + dn, stack)) > return (errnum); > > - if ((errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack))) > + if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack)) > return (errnum); > if (version > ZPL_VERSION) > return (-1); > > - if ((errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack))) > + if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack)) > return (errnum); > > - if ((errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, > - dn, stack))) > + if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS, > + dn, stack)) > return (errnum); > > /* skip leading slashes */ > while (*path == ''/'') > path++; > > - while (*path && !isspace((uint8_t)*path)) { > + while (*path && !grub_isspace(*path)) { > > /* get the next component name */ > cname = path; > - while (*path && !isspace((uint8_t)*path) && *path != ''/'') > + while (*path && !grub_isspace(*path) && *path != ''/'') > path++; > ch = *path; > *path = 0; /* ensure null termination */ > > - if ((errnum = zap_lookup(dn, cname, &objnum, stack))) > + if (errnum = zap_lookup(dn, cname, &objnum, stack)) > return (errnum); > > objnum = ZFS_DIRENT_OBJ(objnum); > - if ((errnum = dnode_get(mdn, objnum, 0, dn, stack))) > + if (errnum = dnode_get(mdn, objnum, 0, dn, stack)) > return (errnum); > > *path = ch; > @@ -843,8 +932,8 @@ get_default_bootfsobj(dnode_phys_t *mosm > dnode_phys_t *dn = (dnode_phys_t *)stack; > stack += DNODE_SIZE; > > - if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, > - DMU_OT_OBJECT_DIRECTORY, dn, stack))) > + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, > + DMU_OT_OBJECT_DIRECTORY, dn, stack)) > return (errnum); > > /* > @@ -854,7 +943,7 @@ get_default_bootfsobj(dnode_phys_t *mosm > if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack)) > return (ERR_FILESYSTEM_NOT_FOUND); > > - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack))) > + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack)) > return (errnum); > > if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack)) > @@ -868,6 +957,57 @@ get_default_bootfsobj(dnode_phys_t *mosm > } > > /* > + * List of pool features that the grub implementation of ZFS supports for > + * read. Note that features that are only required for write do not need > + * to be listed here since grub opens pools in read-only mode. > + * > + * When this list is updated the version number in usr/src/grub/capability > + * must be incremented to ensure the new grub gets installed. > + */ > +static const char *spa_feature_names[] = { > + "org.illumos:lz4_compress", > + NULL > +}; > + > +/* > + * Checks whether the MOS features that are active are supported by this > + * (GRUB''s) implementation of ZFS. > + * > + * Return: > + * 0: Success. > + * errnum: Failure. > + */ > +static int > +check_mos_features(dnode_phys_t *mosmdn, char *stack) > +{ > + uint64_t objnum; > + dnode_phys_t *dn; > + uint8_t error = 0; > + > + dn = (dnode_phys_t *)stack; > + stack += DNODE_SIZE; > + > + if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, > + DMU_OT_OBJECT_DIRECTORY, dn, stack)) != 0) > + return (errnum); > + > + /* > + * Find the object number for ''features_for_read'' and retrieve its > + * corresponding dnode. Note that we don''t check features_for_write > + * because GRUB is not opening the pool for write. > + */ > + if ((errnum = zap_lookup(dn, DMU_POOL_FEATURES_FOR_READ, &objnum, > + stack)) != 0) > + return (errnum); > + > + if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA, > + dn, stack)) != 0) > + return (errnum); > + > + return (zap_iterate(dn, check_feature, spa_feature_names, stack)); > +} > + > +/* > * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname), > * e.g. pool/rootfs, or a given object number (obj), e.g. the object number > * of pool/rootfs. > @@ -896,15 +1036,15 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha > goto skip; > } > > - if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, > - DMU_OT_OBJECT_DIRECTORY, mdn, stack))) > + if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT, > + DMU_OT_OBJECT_DIRECTORY, mdn, stack)) > return (errnum); > > - if ((errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, > - stack))) > + if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, > + stack)) > return (errnum); > > - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack))) > + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack)) > return (errnum); > > if (fsname == NULL) { > @@ -914,23 +1054,24 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha > } > > /* take out the pool name */ > - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != ''/'') > + while (*fsname && !grub_isspace(*fsname) && *fsname != ''/'') > fsname++; > > - while (*fsname && !isspace((uint8_t)*fsname)) { > + while (*fsname && !grub_isspace(*fsname)) { > uint64_t childobj; > > while (*fsname == ''/'') > fsname++; > > cname = fsname; > - while (*fsname && !isspace((uint8_t)*fsname) && *fsname != ''/'') > + while (*fsname && !grub_isspace(*fsname) && *fsname != ''/'') > fsname++; > ch = *fsname; > *fsname = 0; > > snapname = cname; > - while (*snapname && !isspace((uint8_t)*snapname) && *snapname != ''@'') > + while (*snapname && !grub_isspace(*snapname) && *snapname !> + ''@'') > snapname++; > if (*snapname == ''@'') { > issnapshot = 1; > @@ -938,15 +1079,15 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha > } > childobj > ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj; > - if ((errnum = dnode_get(mosmdn, childobj, > - DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack))) > + if (errnum = dnode_get(mosmdn, childobj, > + DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack)) > return (errnum); > > if (zap_lookup(mdn, cname, &objnum, stack)) > return (ERR_FILESYSTEM_NOT_FOUND); > > - if ((errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, > - mdn, stack))) > + if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, > + mdn, stack)) > return (errnum); > > *fsname = ch; > @@ -958,7 +1099,7 @@ get_objset_mdn(dnode_phys_t *mosmdn, cha > *obj = headobj; > > skip: > - if ((errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack))) > + if (errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack)) > return (errnum); > if (issnapshot) { > uint64_t snapobj; > @@ -966,13 +1107,13 @@ skip: > snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))-> > ds_snapnames_zapobj; > > - if ((errnum = dnode_get(mosmdn, snapobj, > - DMU_OT_DSL_DS_SNAP_MAP, mdn, stack))) > + if (errnum = dnode_get(mosmdn, snapobj, > + DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)) > return (errnum); > if (zap_lookup(mdn, snapname + 1, &headobj, stack)) > return (ERR_FILESYSTEM_NOT_FOUND); > - if ((errnum = dnode_get(mosmdn, headobj, > - DMU_OT_DSL_DATASET, mdn, stack))) > + if (errnum = dnode_get(mosmdn, headobj, > + DMU_OT_DSL_DATASET, mdn, stack)) > return (errnum); > if (obj) > *obj = headobj; > @@ -981,7 +1122,7 @@ skip: > bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp; > osp = (objset_phys_t *)stack; > stack += sizeof (objset_phys_t); > - if ((errnum = zio_read(bp, osp, stack))) > + if (errnum = zio_read(bp, osp, stack)) > return (errnum); > > grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE); > @@ -1019,8 +1160,7 @@ nvlist_unpack(char *nvlist, char **out) > if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN) > return (1); > > - nvlist += 4; > - *out = nvlist; > + *out = nvlist + 4; > return (0); > } > > @@ -1033,7 +1173,7 @@ nvlist_array(char *nvlist, int index) > /* skip the header, nvl_version, and nvl_nvflag */ > nvlist = nvlist + 4 * 2; > > - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) > + while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) > nvlist += encode_size; /* goto the next nvpair */ > > nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */ > @@ -1042,69 +1182,159 @@ nvlist_array(char *nvlist, int index) > return (nvlist); > } > > +/* > + * The nvlist_next_nvpair() function returns a handle to the next nvpair in the > + * list following nvpair. If nvpair is NULL, the first pair is returned. If > + * nvpair is the last pair in the nvlist, NULL is returned. > + */ > +static char * > +nvlist_next_nvpair(char *nvl, char *nvpair) > +{ > + char *cur, *prev; > + int encode_size; > + > + if (nvl == NULL) > + return (NULL); > + > + if (nvpair == NULL) { > + /* skip over nvl_version and nvl_nvflag */ > + nvpair = nvl + 4 * 2; > + } else { > + /* skip to the next nvpair */ > + encode_size = BSWAP_32(*(uint32_t *)nvpair); > + nvpair += encode_size; > + } > + > + /* 8 bytes of 0 marks the end of the list */ > + if (*(uint64_t *)nvpair == 0) > + return (NULL); > + > + return (nvpair); > +} > + > +/* > + * This function returns 0 on success and 1 on failure. On success, a string > + * containing the name of nvpair is saved in buf. > + */ > +static int > +nvpair_name(char *nvp, char *buf, int buflen) > +{ > + int len; > + > + /* skip over encode/decode size */ > + nvp += 4 * 2; > + > + len = BSWAP_32(*(uint32_t *)nvp); > + if (buflen < len + 1) > + return (1); > + > + grub_memmove(buf, nvp + 4, len); > + buf[len] = ''\0''; > + > + return (0); > +} > + > +/* > + * This function retrieves the value of the nvpair in the form of enumerated > + * type data_type_t. This is used to determine the appropriate type to pass to > + * nvpair_value(). > + */ > +static int > +nvpair_type(char *nvp) > +{ > + int name_len, type; > + > + /* skip over encode/decode size */ > + nvp += 4 * 2; > + > + /* skip over name_len */ > + name_len = BSWAP_32(*(uint32_t *)nvp); > + nvp += 4; > + > + /* skip over name */ > + nvp = nvp + ((name_len + 3) & ~3); /* align */ > + > + type = BSWAP_32(*(uint32_t *)nvp); > + > + return (type); > +} > + > +static int > +nvpair_value(char *nvp, void *val, int valtype, int *nelmp) > +{ > + int name_len, type, slen; > + char *strval = val; > + uint64_t *intval = val; > + > + /* skip over encode/decode size */ > + nvp += 4 * 2; > + > + /* skip over name_len */ > + name_len = BSWAP_32(*(uint32_t *)nvp); > + nvp += 4; > + > + /* skip over name */ > + nvp = nvp + ((name_len + 3) & ~3); /* align */ > + > + /* skip over type */ > + type = BSWAP_32(*(uint32_t *)nvp); > + nvp += 4; > + > + if (type == valtype) { > + int nelm; > + > + nelm = BSWAP_32(*(uint32_t *)nvp); > + if (valtype != DATA_TYPE_BOOLEAN && nelm < 1) > + return (1); > + nvp += 4; > + > + switch (valtype) { > + case DATA_TYPE_BOOLEAN: > + return (0); > + > + case DATA_TYPE_STRING: > + slen = BSWAP_32(*(uint32_t *)nvp); > + nvp += 4; > + grub_memmove(strval, nvp, slen); > + strval[slen] = ''\0''; > + return (0); > + > + case DATA_TYPE_UINT64: > + *intval = BSWAP_64(*(uint64_t *)nvp); > + return (0); > + > + case DATA_TYPE_NVLIST: > + *(void **)val = (void *)nvp; > + return (0); > + > + case DATA_TYPE_NVLIST_ARRAY: > + *(void **)val = (void *)nvp; > + if (nelmp) > + *nelmp = nelm; > + return (0); > + } > + } > + > + return (1); > +} > + > static int > nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype, > int *nelmp) > { > - int name_len, type, slen, encode_size; > - char *nvpair, *nvp_name, *strval = val; > - uint64_t *intval = val; > + char *nvpair; > > - /* skip the header, nvl_version, and nvl_nvflag */ > - nvlist = nvlist + 4 * 2; > - > - /* > - * Loop thru the nvpair list > - * The XDR representation of an integer is in big-endian byte order. > - */ > - while ((encode_size = BSWAP_32(*(uint32_t *)nvlist))) { > - > - nvpair = nvlist + 4 * 2; /* skip the encode/decode size */ > - > - name_len = BSWAP_32(*(uint32_t *)nvpair); > - nvpair += 4; > - > - nvp_name = nvpair; > - nvpair = nvpair + ((name_len + 3) & ~3); /* align */ > - > - type = BSWAP_32(*(uint32_t *)nvpair); > - nvpair += 4; > + for (nvpair = nvlist_next_nvpair(nvlist, NULL); > + nvpair != NULL; > + nvpair = nvlist_next_nvpair(nvlist, nvpair)) { > + int name_len = BSWAP_32(*(uint32_t *)(nvpair + 4 * 2)); > + char *nvp_name = nvpair + 4 * 3; > > if ((grub_strncmp(nvp_name, name, name_len) == 0) && > - type == valtype) { > - int nelm; > - > - if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1) > - return (1); > - nvpair += 4; > - > - switch (valtype) { > - case DATA_TYPE_STRING: > - slen = BSWAP_32(*(uint32_t *)nvpair); > - nvpair += 4; > - grub_memmove(strval, nvpair, slen); > - strval[slen] = ''\0''; > - return (0); > - > - case DATA_TYPE_UINT64: > - *intval = BSWAP_64(*(uint64_t *)nvpair); > - return (0); > - > - case DATA_TYPE_NVLIST: > - *(void **)val = (void *)nvpair; > - return (0); > - > - case DATA_TYPE_NVLIST_ARRAY: > - *(void **)val = (void *)nvpair; > - if (nelmp) > - *nelmp = nelm; > - return (0); > - } > + nvpair_type(nvpair) == valtype) { > + return (nvpair_value(nvpair, val, valtype, nelmp)); > } > - > - nvlist += encode_size; /* goto the next nvpair */ > } > - > return (1); > } > > @@ -1141,7 +1371,7 @@ vdev_get_bootpath(char *nv, uint64_t ing > NULL)) > return (ERR_FSYS_CORRUPT); > > - if (strcmp(type, VDEV_TYPE_DISK) == 0) { > + if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) { > uint64_t guid; > > if (vdev_validate(nv) != 0) > @@ -1171,15 +1401,15 @@ vdev_get_bootpath(char *nv, uint64_t ing > devid, DATA_TYPE_STRING, NULL) != 0) > devid[0] = ''\0''; > > - if (strlen(bootpath) >= MAXPATHLEN || > - strlen(devid) >= MAXPATHLEN) > + if (grub_strlen(bootpath) >= MAXPATHLEN || > + grub_strlen(devid) >= MAXPATHLEN) > return (ERR_WONT_FIT); > > return (0); > > - } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 || > - strcmp(type, VDEV_TYPE_REPLACING) == 0 || > - (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) { > + } else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0 || > + grub_strcmp(type, VDEV_TYPE_REPLACING) == 0 || > + (is_spare = (grub_strcmp(type, VDEV_TYPE_SPARE) == 0))) { > int nelm, i; > char *child; > > @@ -1207,15 +1437,14 @@ vdev_get_bootpath(char *nv, uint64_t ing > * 0 - success > * ERR_* - failure > */ > -int > +static int > check_pool_label(uint64_t sector, char *stack, char *outdevid, > - char *outpath, uint64_t *outguid) > + char *outpath, uint64_t *outguid, uint64_t *outashift, uint64_t *outversion) > { > vdev_phys_t *vdev; > uint64_t pool_state, txg = 0; > - char *nvlist, *nv; > + char *nvlist, *nv, *features; > uint64_t diskguid; > - uint64_t version; > > sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT); > > @@ -1248,10 +1477,10 @@ check_pool_label(uint64_t sector, char * > if (txg == 0) > return (ERR_NO_BOOTPATH); > > - if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version, > + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, outversion, > DATA_TYPE_UINT64, NULL)) > return (ERR_FSYS_CORRUPT); > - if (version > SPA_VERSION) > + if (!SPA_VERSION_IS_SUPPORTED(*outversion)) > return (ERR_NEWER_VERSION); > if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv, > DATA_TYPE_NVLIST, NULL)) > @@ -1259,11 +1488,38 @@ check_pool_label(uint64_t sector, char * > if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid, > DATA_TYPE_UINT64, NULL)) > return (ERR_FSYS_CORRUPT); > + if (nvlist_lookup_value(nv, ZPOOL_CONFIG_ASHIFT, outashift, > + DATA_TYPE_UINT64, NULL) != 0) > + return (ERR_FSYS_CORRUPT); > if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0)) > return (ERR_NO_BOOTPATH); > if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid, > DATA_TYPE_UINT64, NULL)) > return (ERR_FSYS_CORRUPT); > + > + if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, > + &features, DATA_TYPE_NVLIST, NULL) == 0) { > + char *nvp; > + char *name = stack; > + stack += MAXNAMELEN; > + > + for (nvp = nvlist_next_nvpair(features, NULL); > + nvp != NULL; > + nvp = nvlist_next_nvpair(features, nvp)) { > + zap_attribute_t za; > + > + if (nvpair_name(nvp, name, MAXNAMELEN) != 0) > + return (ERR_FSYS_CORRUPT); > + > + za.za_integer_length = 8; > + za.za_num_integers = 1; > + za.za_first_integer = 1; > + za.za_name = name; > + if (check_feature(&za, spa_feature_names, stack) != 0) > + return (ERR_NEWER_VERSION); > + } > + } > + > return (0); > } > > @@ -1278,15 +1534,16 @@ check_pool_label(uint64_t sector, char * > int > zfs_mount(void) > { > - char *stack; > + char *stack, *ub_array; > int label = 0; > - uberblock_phys_t *ub_array, *ubbest; > + uberblock_t *ubbest; > objset_phys_t *osp; > char tmp_bootpath[MAXNAMELEN]; > char tmp_devid[MAXNAMELEN]; > - uint64_t tmp_guid; > + uint64_t tmp_guid, ashift, version; > uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT; > int err = errnum; /* preserve previous errnum state */ > + uint64_t sector; > > /* if it''s our first time here, zero the best uberblock out */ > if (best_drive == 0 && best_part == 0 && find_best_root) { > @@ -1296,7 +1553,7 @@ zfs_mount(void) > > stackbase = ZFS_SCRATCH; > stack = stackbase; > - ub_array = (uberblock_phys_t *)stack; > + ub_array = stack; > stack += VDEV_UBERBLOCK_RING; > > osp = (objset_phys_t *)stack; > @@ -1305,8 +1562,6 @@ zfs_mount(void) > > for (label = 0; label < VDEV_LABELS; label++) { > > - uint64_t sector; > - > /* > * some eltorito stacks don''t give us a size and > * we end up setting the size to MAXUINT, further > @@ -1324,39 +1579,38 @@ zfs_mount(void) > > /* Read in the uberblock ring (128K). */ > if (devread(sector + > - ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> > - SPA_MINBLOCKSHIFT), 0, VDEV_UBERBLOCK_RING, > - (char *)ub_array) == 0) > + ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT), > + 0, VDEV_UBERBLOCK_RING, ub_array) == 0) > continue; > > - if ((ubbest = find_bestub(ub_array, sector)) != NULL && > - zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack) > - == 0) { > + if (check_pool_label(sector, stack, tmp_devid, > + tmp_bootpath, &tmp_guid, &ashift, &version)) > + continue; > > - VERIFY_OS_TYPE(osp, DMU_OST_META); > + if (pool_guid == 0) > + pool_guid = tmp_guid; > > - if (check_pool_label(sector, stack, tmp_devid, > - tmp_bootpath, &tmp_guid)) > - continue; > - if (pool_guid == 0) > - pool_guid = tmp_guid; > + if ((ubbest = find_bestub(ub_array, ashift, sector)) == NULL || > + zio_read(&ubbest->ub_rootbp, osp, stack) != 0) > + continue; > > - if (find_best_root && ((pool_guid != tmp_guid) || > - vdev_uberblock_compare(&ubbest->ubp_uberblock, > - &(current_uberblock)) <= 0)) > - continue; > + VERIFY_OS_TYPE(osp, DMU_OST_META); > > - /* Got the MOS. Save it at the memory addr MOS. */ > - grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); > - grub_memmove(¤t_uberblock, > - &ubbest->ubp_uberblock, sizeof (uberblock_t)); > - grub_memmove(current_bootpath, tmp_bootpath, > - MAXNAMELEN); > - grub_memmove(current_devid, tmp_devid, > - grub_strlen(tmp_devid)); > - is_zfs_mount = 1; > - return (1); > - } > + if (version >= SPA_VERSION_FEATURES && > + check_mos_features(&osp->os_meta_dnode, stack) != 0) > + continue; > + > + if (find_best_root && ((pool_guid != tmp_guid) || > + vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0)) > + continue; > + > + /* Got the MOS. Save it at the memory addr MOS. */ > + grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE); > + grub_memmove(¤t_uberblock, ubbest, sizeof (uberblock_t)); > + grub_memmove(current_bootpath, tmp_bootpath, MAXNAMELEN); > + grub_memmove(current_devid, tmp_devid, grub_strlen(tmp_devid)); > + is_zfs_mount = 1; > + return (1); > } > > /* > @@ -1399,23 +1653,23 @@ zfs_open(char *filename) > * do not goto ''current_bootfs''. > */ > if (is_top_dataset_file(filename)) { > - if ((errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack))) > + if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack)) > return (0); > > current_bootfs_obj = 0; > } else { > if (current_bootfs[0] == ''\0'') { > /* Get the default root filesystem object number */ > - if ((errnum = get_default_bootfsobj(MOS, > - ¤t_bootfs_obj, stack))) > + if (errnum = get_default_bootfsobj(MOS, > + ¤t_bootfs_obj, stack)) > return (0); > > - if ((errnum = get_objset_mdn(MOS, NULL, > - ¤t_bootfs_obj, mdn, stack))) > + if (errnum = get_objset_mdn(MOS, NULL, > + ¤t_bootfs_obj, mdn, stack)) > return (0); > } else { > - if ((errnum = get_objset_mdn(MOS, current_bootfs, > - ¤t_bootfs_obj, mdn, stack))) { > + if (errnum = get_objset_mdn(MOS, current_bootfs, > + ¤t_bootfs_obj, mdn, stack)) { > grub_memset(current_bootfs, 0, MAXNAMELEN); > return (0); > } > @@ -1515,7 +1769,7 @@ zfs_read(char *buf, int len) > */ > uint64_t blkid = filepos / blksz; > > - if ((errnum = dmu_read(DNODE, blkid, file_buf, stack))) > + if (errnum = dmu_read(DNODE, blkid, file_buf, stack)) > return (0); > > file_start = blkid * blksz; > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsys_zfs.h > --- a/tools/libfsimage/zfs/fsys_zfs.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/fsys_zfs.h Sat Oct 26 20:03:06 2013 +0400 > @@ -16,10 +16,17 @@ > * along with this program; if not, write to the Free Software > * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > */ > + > /* > * Copyright 2010 Sun Microsystems, Inc. All rights reserved. > * Use is subject to license terms. > */ > + > +/* > + * Copyright (c) 2012 by Delphix. All rights reserved. > + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. > + */ > + > #ifndef _FSYS_ZFS_H > #define _FSYS_ZFS_H > > @@ -95,26 +102,8 @@ typedef unsigned int size_t; > #define BSWAP_64(x) ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32)) > #define P2ROUNDUP(x, align) (-(-(x) & -(align))) > > -/* > - * XXX Match these macro up with real zfs once we have nvlist support so that we > - * can support large sector disks. > - */ > -#define UBERBLOCK_SIZE (1ULL << UBERBLOCK_SHIFT) > -#define VDEV_UBERBLOCK_SHIFT UBERBLOCK_SHIFT > -#include <stddef.h> > -#define VDEV_UBERBLOCK_OFFSET(n) \ > -offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT]) > - > typedef struct uberblock uberblock_t; > > -/* XXX Uberblock_phys_t is no longer in the kernel zfs */ > -typedef struct uberblock_phys { > - uberblock_t ubp_uberblock; > - char ubp_pad[UBERBLOCK_SIZE - sizeof (uberblock_t) - > - sizeof (zio_eck_t)]; > - zio_eck_t ubp_zec; > -} uberblock_phys_t; > - > /* > * Macros to get fields in a bp or DVA. > */ > @@ -137,10 +126,36 @@ typedef struct uberblock_phys { > #define NV_ENCODE_NATIVE 0 > #define NV_ENCODE_XDR 1 > #define HOST_ENDIAN 1 /* for x86 machine */ > -#define DATA_TYPE_UINT64 8 > -#define DATA_TYPE_STRING 9 > -#define DATA_TYPE_NVLIST 19 > -#define DATA_TYPE_NVLIST_ARRAY 20 > +typedef enum { > + DATA_TYPE_UNKNOWN = 0, > + DATA_TYPE_BOOLEAN, > + DATA_TYPE_BYTE, > + DATA_TYPE_INT16, > + DATA_TYPE_UINT16, > + DATA_TYPE_INT32, > + DATA_TYPE_UINT32, > + DATA_TYPE_INT64, > + DATA_TYPE_UINT64, > + DATA_TYPE_STRING, > + DATA_TYPE_BYTE_ARRAY, > + DATA_TYPE_INT16_ARRAY, > + DATA_TYPE_UINT16_ARRAY, > + DATA_TYPE_INT32_ARRAY, > + DATA_TYPE_UINT32_ARRAY, > + DATA_TYPE_INT64_ARRAY, > + DATA_TYPE_UINT64_ARRAY, > + DATA_TYPE_STRING_ARRAY, > + DATA_TYPE_HRTIME, > + DATA_TYPE_NVLIST, > + DATA_TYPE_NVLIST_ARRAY, > + DATA_TYPE_BOOLEAN_VALUE, > + DATA_TYPE_INT8, > + DATA_TYPE_UINT8, > + DATA_TYPE_BOOLEAN_ARRAY, > + DATA_TYPE_INT8_ARRAY, > + DATA_TYPE_UINT8_ARRAY, > + DATA_TYPE_DOUBLE > +} data_type_t; > > /* > * Decompression Entry - lzjb > @@ -199,6 +214,7 @@ extern void fletcher_4_native(const void > extern void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); > extern void zio_checksum_SHA256(const void *, uint64_t, zio_cksum_t *); > extern int lzjb_decompress(void *, void *, size_t, size_t); > +extern int lz4_decompress(void *, void *, size_t, size_t); > > #endif /* FSYS_ZFS */ > > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dmu.h > --- a/tools/libfsimage/zfs/zfs-include/dmu.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/dmu.h Sat Oct 26 20:03:06 2013 +0400 > @@ -16,11 +16,16 @@ > * along with this program; if not, write to the Free Software > * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > */ > + > /* > * Copyright 2010 Sun Microsystems, Inc. All rights reserved. > * Use is subject to license terms. > */ > > +/* > + * Copyright (c) 2012 by Delphix. All rights reserved. > + */ > + > #ifndef _SYS_DMU_H > #define _SYS_DMU_H > > @@ -31,6 +36,41 @@ > * The DMU also interacts with the SPA. That interface is described in > * dmu_spa.h. > */ > + > +#define B_FALSE 0 > +#define B_TRUE 1 > + > +#define DMU_OT_NEWTYPE 0x80 > +#define DMU_OT_METADATA 0x40 > +#define DMU_OT_BYTESWAP_MASK 0x3f > + > +#define DMU_OT(byteswap, metadata) \ > + (DMU_OT_NEWTYPE | \ > + ((metadata) ? DMU_OT_METADATA : 0) | \ > + ((byteswap) & DMU_OT_BYTESWAP_MASK)) > + > +#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ > + ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \ > + (ot) < DMU_OT_NUMTYPES) > + > +#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \ > + ((ot) & DMU_OT_METADATA) : \ > + dmu_ot[(ot)].ot_metadata) > + > +typedef enum dmu_object_byteswap { > + DMU_BSWAP_UINT8, > + DMU_BSWAP_UINT16, > + DMU_BSWAP_UINT32, > + DMU_BSWAP_UINT64, > + DMU_BSWAP_ZAP, > + DMU_BSWAP_DNODE, > + DMU_BSWAP_OBJSET, > + DMU_BSWAP_ZNODE, > + DMU_BSWAP_OLDACL, > + DMU_BSWAP_ACL, > + DMU_BSWAP_NUMFUNCS > +} dmu_object_byteswap_t; > + > typedef enum dmu_object_type { > DMU_OT_NONE, > /* general: */ > @@ -38,8 +78,8 @@ typedef enum dmu_object_type { > DMU_OT_OBJECT_ARRAY, /* UINT64 */ > DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */ > DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */ > - DMU_OT_BPLIST, /* UINT64 */ > - DMU_OT_BPLIST_HDR, /* UINT64 */ > + DMU_OT_BPOBJ, /* UINT64 */ > + DMU_OT_BPOBJ_HDR, /* UINT64 */ > /* spa: */ > DMU_OT_SPACE_MAP_HEADER, /* UINT64 */ > DMU_OT_SPACE_MAP, /* UINT64 */ > @@ -56,7 +96,7 @@ typedef enum dmu_object_type { > DMU_OT_DSL_DATASET, /* UINT64 */ > /* zpl: */ > DMU_OT_ZNODE, /* ZNODE */ > - DMU_OT_OLDACL, /* OLD ACL */ > + DMU_OT_OLDACL, /* Old ACL */ > DMU_OT_PLAIN_FILE_CONTENTS, /* UINT8 */ > DMU_OT_DIRECTORY_CONTENTS, /* ZAP */ > DMU_OT_MASTER_NODE, /* ZAP */ > @@ -79,7 +119,7 @@ typedef enum dmu_object_type { > DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */ > DMU_OT_FUID_SIZE, /* FUID table size UINT64 */ > DMU_OT_NEXT_CLONES, /* ZAP */ > - DMU_OT_SCRUB_QUEUE, /* ZAP */ > + DMU_OT_SCAN_QUEUE, /* ZAP */ > DMU_OT_USERGROUP_USED, /* ZAP */ > DMU_OT_USERGROUP_QUOTA, /* ZAP */ > DMU_OT_USERREFS, /* ZAP */ > @@ -89,7 +129,24 @@ typedef enum dmu_object_type { > DMU_OT_SA_MASTER_NODE, /* ZAP */ > DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */ > DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */ > - DMU_OT_NUMTYPES > + DMU_OT_SCAN_XLATE, /* ZAP */ > + DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */ > + DMU_OT_DEADLIST, /* ZAP */ > + DMU_OT_DEADLIST_HDR, /* UINT64 */ > + DMU_OT_DSL_CLONES, /* ZAP */ > + DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */ > + DMU_OT_NUMTYPES, > + > + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), > + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), > + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), > + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), > + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), > + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), > + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), > + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), > + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), > + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), > } dmu_object_type_t; > > typedef enum dmu_objset_type { > @@ -107,6 +164,9 @@ typedef enum dmu_objset_type { > */ > #define DMU_POOL_DIRECTORY_OBJECT 1 > #define DMU_POOL_CONFIG "config" > +#define DMU_POOL_FEATURES_FOR_READ "features_for_read" > +#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write" > +#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions" > #define DMU_POOL_ROOT_DATASET "root_dataset" > #define DMU_POOL_SYNC_BPLIST "sync_bplist" > #define DMU_POOL_ERRLOG_SCRUB "errlog_scrub" > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dsl_dataset.h > --- a/tools/libfsimage/zfs/zfs-include/dsl_dataset.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/dsl_dataset.h Sat Oct 26 20:03:06 2013 +0400 > @@ -24,8 +24,6 @@ > #ifndef _SYS_DSL_DATASET_H > #define _SYS_DSL_DATASET_H > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > typedef struct dsl_dataset_phys { > uint64_t ds_dir_obj; > uint64_t ds_prev_snap_obj; > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/dsl_dir.h > --- a/tools/libfsimage/zfs/zfs-include/dsl_dir.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/dsl_dir.h Sat Oct 26 20:03:06 2013 +0400 > @@ -24,8 +24,6 @@ > #ifndef _SYS_DSL_DIR_H > #define _SYS_DSL_DIR_H > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > typedef struct dsl_dir_phys { > uint64_t dd_creation_time; /* not actually used */ > uint64_t dd_head_dataset_obj; > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/spa.h > --- a/tools/libfsimage/zfs/zfs-include/spa.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/spa.h Sat Oct 26 20:03:06 2013 +0400 > @@ -16,11 +16,16 @@ > * along with this program; if not, write to the Free Software > * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > */ > + > /* > * Copyright 2010 Sun Microsystems, Inc. All rights reserved. > * Use is subject to license terms. > */ > > +/* > + * Copyright (c) 2012 by Delphix. All rights reserved. > + */ > + > #ifndef _SYS_SPA_H > #define _SYS_SPA_H > > @@ -65,7 +70,7 @@ > /* > * Size of block to hold the configuration data (a packed nvlist) > */ > -#define SPA_CONFIG_BLOCKSIZE (1 << 14) > +#define SPA_CONFIG_BLOCKSIZE (1ULL << 14) > > /* > * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB. > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/uberblock_impl.h > --- a/tools/libfsimage/zfs/zfs-include/uberblock_impl.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/uberblock_impl.h Sat Oct 26 20:03:06 2013 +0400 > @@ -24,8 +24,6 @@ > #ifndef _SYS_UBERBLOCK_IMPL_H > #define _SYS_UBERBLOCK_IMPL_H > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > /* > * The uberblock version is incremented whenever an incompatible on-disk > * format change is made to the SPA, DMU, or ZAP. > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/vdev_impl.h > --- a/tools/libfsimage/zfs/zfs-include/vdev_impl.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/vdev_impl.h Sat Oct 26 20:03:06 2013 +0400 > @@ -24,12 +24,30 @@ > #ifndef _SYS_VDEV_IMPL_H > #define _SYS_VDEV_IMPL_H > > +/* helper macros */ > +#undef offsetof > +#if defined(__GNUC__) > +#define offsetof(s, m) __builtin_offsetof(s, m) > +#else > +#define offsetof(s, m) ((size_t)(&(((s *)0)->m))) > +#endif > +#define MAX(x, y) ((x) > (y) ? (x) : (y)) > + > #define VDEV_PAD_SIZE (8 << 10) > /* 2 padding areas (vl_pad1 and vl_pad2) to skip */ > #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 > #define VDEV_PHYS_SIZE (112 << 10) > #define VDEV_UBERBLOCK_RING (128 << 10) > > +#define VDEV_UBERBLOCK_SHIFT(sh) \ > + MAX((sh), UBERBLOCK_SHIFT) > +#define VDEV_UBERBLOCK_COUNT(sh) \ > + (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(sh)) > +#define VDEV_UBERBLOCK_OFFSET(sh, n) \ > + offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(sh)]) > +#define VDEV_UBERBLOCK_SIZE(sh) \ > + (1ULL << VDEV_UBERBLOCK_SHIFT(sh)) > + > typedef struct vdev_phys { > char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)]; > zio_eck_t vp_zbt; > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zap_leaf.h > --- a/tools/libfsimage/zfs/zfs-include/zap_leaf.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/zap_leaf.h Sat Oct 26 20:03:06 2013 +0400 > @@ -24,8 +24,6 @@ > #ifndef _SYS_ZAP_LEAF_H > #define _SYS_ZAP_LEAF_H > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > #define ZAP_LEAF_MAGIC 0x2AB1EAF > > /* chunk size = 24 bytes */ > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zfs.h > --- a/tools/libfsimage/zfs/zfs-include/zfs.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/zfs.h Sat Oct 26 20:03:06 2013 +0400 > @@ -16,9 +16,10 @@ > * along with this program; if not, write to the Free Software > * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. > */ > + > /* > - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. > - * Use is subject to license terms. > + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. > + * Copyright (c) 2012 by Delphix. All rights reserved. > */ > > #ifndef _SYS_FS_ZFS_H > @@ -27,7 +28,14 @@ > /* > * On-disk version number. > */ > -#define SPA_VERSION 24ULL > +#define SPA_VERSION_INITIAL 1ULL > +#define SPA_VERSION_BEFORE_FEATURES 28ULL > +#define SPA_VERSION 5000ULL > +#define SPA_VERSION_FEATURES 5000ULL > + > +#define SPA_VERSION_IS_SUPPORTED(v) \ > + (((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \ > + ((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION)) > > /* > * The following are configuration names used in the nvlist describing a pool''s > @@ -67,6 +75,7 @@ > #define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" > #define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" > #define ZPOOL_CONFIG_DDT_STATS "ddt_stats" > +#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" > /* > * The persistent vdev state is stored as separate values rather than a single > * ''vdev_state'' entry. This is because a device can be in multiple states, such > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zfs_acl.h > --- a/tools/libfsimage/zfs/zfs-include/zfs_acl.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/zfs_acl.h Sat Oct 26 20:03:06 2013 +0400 > @@ -24,8 +24,6 @@ > #ifndef _SYS_FS_ZFS_ACL_H > #define _SYS_FS_ZFS_ACL_H > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > #ifndef _UID_T > #define _UID_T > typedef unsigned int uid_t; /* UID type */ > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs-include/zio.h > --- a/tools/libfsimage/zfs/zfs-include/zio.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs-include/zio.h Sat Oct 26 20:03:06 2013 +0400 > @@ -20,6 +20,9 @@ > * Copyright 2010 Sun Microsystems, Inc. All rights reserved. > * Use is subject to license terms. > */ > +/* > + * Copyright 2013 by Saso Kiselkov. All rights reserved. > + */ > > #ifndef _ZIO_H > #define _ZIO_H > @@ -73,6 +76,17 @@ enum zio_compress { > ZIO_COMPRESS_OFF, > ZIO_COMPRESS_LZJB, > ZIO_COMPRESS_EMPTY, > + ZIO_COMPRESS_GZIP_1, > + ZIO_COMPRESS_GZIP_2, > + ZIO_COMPRESS_GZIP_3, > + ZIO_COMPRESS_GZIP_4, > + ZIO_COMPRESS_GZIP_5, > + ZIO_COMPRESS_GZIP_6, > + ZIO_COMPRESS_GZIP_7, > + ZIO_COMPRESS_GZIP_8, > + ZIO_COMPRESS_GZIP_9, > + ZIO_COMPRESS_ZLE, > + ZIO_COMPRESS_LZ4, > ZIO_COMPRESS_FUNCTIONS > }; > > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_fletcher.c > --- a/tools/libfsimage/zfs/zfs_fletcher.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs_fletcher.c Sat Oct 26 20:03:06 2013 +0400 > @@ -21,8 +21,6 @@ > * Use is subject to license terms. > */ > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > #include "fsys_zfs.h" > > > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_lz4.c > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/tools/libfsimage/zfs/zfs_lz4.c Sat Oct 26 20:03:06 2013 +0400 > @@ -0,0 +1,313 @@ > +/* > + * LZ4 - Fast LZ compression algorithm > + * Header File > + * Copyright (C) 2011-2013, Yann Collet. > + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions are > + * met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * * Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following disclaimer > + * in the documentation and/or other materials provided with the > + * distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + * > + * You can contact the author at : > + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html > + * - LZ4 source repository : http://code.google.com/p/lz4/ > + */ > + > +#include "fsys_zfs.h" > +#include <string.h> > + > +static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, > + int isize, int maxOutputSize); > + > +int > +lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) > +{ > + const uint8_t *src = s_start; > + uint32_t bufsiz = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | > + src[3]; > + > + /* invalid compressed buffer size encoded at start */ > + if (bufsiz + 4 > s_len) > + return (1); > + > + /* > + * Returns 0 on success (decompression function returned non-negative) > + * and non-zero on failure (decompression function returned negative). > + */ > + return (LZ4_uncompress_unknownOutputSize(s_start + 4, d_start, bufsiz, > + d_len) < 0); > +} > + > +/* > + * CPU Feature Detection > + */ > + > +/* 32 or 64 bits ? */ > +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ > + defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ > + defined(__LP64__) || defined(_LP64)) > +#define LZ4_ARCH64 1 > +#else > +#define LZ4_ARCH64 0 > +#endif > + > +/* > + * Little Endian or Big Endian? > + * Note: overwrite the below #define if you know your architecture endianess. > + */ > +#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ > + defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ > + defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ > + defined(__powerpc) || defined(powerpc) || \ > + ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) > +#define LZ4_BIG_ENDIAN 1 > +#else > + /* > + * Little Endian assumed. PDP Endian and other very rare endian format > + * are unsupported. > + */ > +#endif > + > +/* > + * Compiler Options > + */ > +#if __STDC_VERSION__ >= 199901L /* C99 */ > +/* "restrict" is a known keyword */ > +#else > +/* Disable restrict */ > +#define restrict > +#endif > + > +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) > + > +#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) \ > + | (((x) & 0xffu) << 8))) > + > +#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) > +#define expect(expr, value) (__builtin_expect((expr), (value))) > +#else > +#define expect(expr, value) (expr) > +#endif > + > +#define likely(expr) expect((expr) != 0, 1) > +#define unlikely(expr) expect((expr) != 0, 0) > + > +/* Basic types */ > +#define BYTE uint8_t > +#define U16 uint16_t > +#define U32 uint32_t > +#define S32 int32_t > +#define U64 uint64_t > + > +typedef struct _U16_S { > + U16 v; > +} U16_S; > +typedef struct _U32_S { > + U32 v; > +} U32_S; > +typedef struct _U64_S { > + U64 v; > +} U64_S; > + > +#define A64(x) (((U64_S *)(x))->v) > +#define A32(x) (((U32_S *)(x))->v) > +#define A16(x) (((U16_S *)(x))->v) > + > +/* > + * Constants > + */ > +#define MINMATCH 4 > + > +#define COPYLENGTH 8 > +#define LASTLITERALS 5 > + > +#define ML_BITS 4 > +#define ML_MASK ((1U<<ML_BITS)-1) > +#define RUN_BITS (8-ML_BITS) > +#define RUN_MASK ((1U<<RUN_BITS)-1) > + > +/* > + * Architecture-specific macros > + */ > +#if LZ4_ARCH64 > +#define STEPSIZE 8 > +#define UARCH U64 > +#define AARCH A64 > +#define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; > +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) > +#define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) > +#define HTYPE U32 > +#define INITBASE(base) const BYTE* const base = ip > +#else > +#define STEPSIZE 4 > +#define UARCH U32 > +#define AARCH A32 > +#define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; > +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); > +#define LZ4_SECURECOPY LZ4_WILDCOPY > +#define HTYPE const BYTE* > +#define INITBASE(base) const int base = 0 > +#endif > + > +#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) > +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ > + { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } > +#define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ > + { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } > +#else > +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } > +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } > +#endif > + > +/* Macros */ > +#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); > + > +/* Decompression functions */ > + > +static int > +LZ4_uncompress_unknownOutputSize(const char *source, > + char *dest, int isize, int maxOutputSize) > +{ > + /* Local Variables */ > + const BYTE *restrict ip = (const BYTE *) source; > + const BYTE *const iend = ip + isize; > + const BYTE *restrict ref; > + > + BYTE *restrict op = (BYTE *) dest; > + BYTE *const oend = op + maxOutputSize; > + BYTE *cpy; > + > + size_t dec[] = { 0, 3, 2, 3, 0, 0, 0, 0 }; > + > + /* Main Loop */ > + while (ip < iend) { > + BYTE token; > + int length; > + > + /* get runlength */ > + token = *ip++; > + if ((length = (token >> ML_BITS)) == RUN_MASK) { > + int s = 255; > + while ((ip < iend) && (s == 255)) { > + s = *ip++; > + length += s; > + } > + } > + /* copy literals */ > + cpy = op + length; > + if ((cpy > oend - COPYLENGTH) || > + (ip + length > iend - COPYLENGTH)) { > + if (cpy > oend) > + /* > + * Error: request to write beyond destination > + * buffer. > + */ > + goto _output_error; > + if (ip + length > iend) > + /* > + * Error : request to read beyond source > + * buffer. > + */ > + goto _output_error; > + memcpy(op, ip, length); > + op += length; > + ip += length; > + if (ip < iend) > + /* Error : LZ4 format violation */ > + goto _output_error; > + /* Necessarily EOF, due to parsing restrictions. */ > + break; > + } > + LZ4_WILDCOPY(ip, op, cpy); > + ip -= (op - cpy); > + op = cpy; > + > + /* get offset */ > + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); > + ip += 2; > + if (ref < (BYTE * const) dest) > + /* > + * Error: offset creates reference outside of > + * destination buffer. > + */ > + goto _output_error; > + > + /* get matchlength */ > + if ((length = (token & ML_MASK)) == ML_MASK) { > + while (ip < iend) { > + int s = *ip++; > + length += s; > + if (s == 255) > + continue; > + break; > + } > + } > + /* copy repeated sequence */ > + if unlikely(op - ref < STEPSIZE) { > +#if LZ4_ARCH64 > + size_t dec2table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; > + size_t dec2 = dec2table[op - ref]; > +#else > + const int dec2 = 0; > +#endif > + *op++ = *ref++; > + *op++ = *ref++; > + *op++ = *ref++; > + *op++ = *ref++; > + ref -= dec[op - ref]; > + A32(op) = A32(ref); > + op += STEPSIZE - 4; > + ref -= dec2; > + } else { > + LZ4_COPYSTEP(ref, op); > + } > + cpy = op + length - (STEPSIZE - 4); > + if (cpy > oend - COPYLENGTH) { > + if (cpy > oend) > + /* > + * Error: request to write outside of > + * destination buffer. > + */ > + goto _output_error; > + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); > + while (op < cpy) > + *op++ = *ref++; > + op = cpy; > + if (op == oend) > + /* > + * Check EOF (should never happen, since last > + * 5 bytes are supposed to be literals). > + */ > + break; > + continue; > + } > + LZ4_SECURECOPY(ref, op, cpy); > + op = cpy; /* correction */ > + } > + > + /* end of decoding */ > + return (int)(((char *)op) - dest); > + > + /* write overflow error detected */ > + _output_error: > + return (int)(-(((char *)ip) - source)); > +} > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_lzjb.c > --- a/tools/libfsimage/zfs/zfs_lzjb.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs_lzjb.c Sat Oct 26 20:03:06 2013 +0400 > @@ -21,8 +21,6 @@ > * Use is subject to license terms. > */ > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > #include "fsys_zfs.h" > > #define MATCH_BITS 6 > @@ -34,10 +32,10 @@ > int > lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) > { > - uint8_t *src = s_start; > - uint8_t *dst = d_start; > - uint8_t *d_end = (uint8_t *)d_start + d_len; > - uint8_t *cpy, copymap = ''\0''; > + uchar_t *src = s_start; > + uchar_t *dst = d_start; > + uchar_t *d_end = (uchar_t *)d_start + d_len; > + uchar_t *cpy, copymap = ''\0''; > int copymask = 1 << (NBBY - 1); > > while (dst < d_end) { > @@ -49,7 +47,7 @@ lzjb_decompress(void *s_start, void *d_s > int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; > int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; > src += 2; > - if ((cpy = dst - offset) < (uint8_t *)d_start) > + if ((cpy = dst - offset) < (uchar_t *)d_start) > return (-1); > while (--mlen >= 0 && dst < d_end) > *dst++ = *cpy++; > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/zfs_sha256.c > --- a/tools/libfsimage/zfs/zfs_sha256.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/zfs_sha256.c Sat Oct 26 20:03:06 2013 +0400 > @@ -21,8 +21,6 @@ > * Use is subject to license terms. > */ > > -#pragma ident "%Z%%M% %I% %E% SMI" > - > #include "fsys_zfs.h" > > /* > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Ian Campbell
2013-Oct-31 12:49 UTC
Re: [PATCH] 1. changes for vdiskadm on illumos based platform
On Sat, 2013-10-26 at 20:09 +0400, Igor Kozhkuhov wrote:> 2. update ZFS in libfsimage from illumos for pygrubThis needs a Signed-off-by, please see http://wiki.xenproject.org/wiki/Submitting_Xen_Patches for information. Also please construct the commit message as a single line summary (shown in the short logs etc) followed by a longer description. The wiki has some hints on this too. also you seem to be mixing various different types of fixes (make fixes, CFLAGS fixes, CC line fixes) which are then not well explained in the commit log. It is best to split these out into separate changes. As it stands this patch is huge and almost unreviewable.> diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/Rules.mk > --- a/tools/libfsimage/Rules.mk Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/Rules.mk Sat Oct 26 20:03:06 2013 +0400 > @@ -2,11 +2,19 @@ include $(XEN_ROOT)/tools/Rules.mk > > CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\" > CFLAGS += -Werror -D_GNU_SOURCE > +# need for build illumos ZFS > +CFLAGS += -Wno-parentheses > +CFLAGS += -Wno-unusedShould either be in zfs/Makefile or in config/SunOS.mk or at least added based on CONFIG_SunOS then I think.> +# end > LDFLAGS += -L../common/ > > PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y)) > > -FSDIR = $(LIBDIR)/fs > +FSDIR-y = $(LIBDIR)/fs/$(FS) > +FSDIR-$(CONFIG_SunOS)-x86_64 = $(PREFIX)/lib/fs/$(FS)/64 > +FSDIR-$(CONFIG_SunOS)-x86_32 = $(PREFIX)/lib/fs/$(FS)/ > +FSDIR-$(CONFIG_SunOS) = $(FSDIR-$(CONFIG_SunOS)-$(XEN_TARGET_ARCH)) > +FSDIR = $(FSDIR-y)This seems to imply that LIBDIR is defined wrongly for the platform. Shouldn''t this be fixed in config/sunos.mk?> FSLIB = fsimage.so > > @@ -15,11 +23,14 @@ fs-all: $(FSLIB) > > .PHONY: fs-install > fs-install: fs-all > - $(INSTALL_DIR) $(DESTDIR)$(FSDIR)/$(FS) > - $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR)/$(FS) > + $(INSTALL_DIR) $(DESTDIR)$(FSDIR) > + $(INSTALL_PROG) $(FSLIB) $(DESTDIR)$(FSDIR) > + > +BUILD_LINE-y = $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) > +BUILD_LINE-$(CONFIG_SunOS) = $(CC) $(CFLAGS) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS)This seems to be some simple reordering plus (unintentionally?) dropping $(APPEND_LFLAGS). Do you have reason to think that the reordering will break on non-Illumos -- otherwise can''t we just change it? Oh, you add $(CFLAGS), why? This is a link invocation. Perhaps this indicates that something is in CFLAGS which should be LDFLAGS?> $(FSLIB): $(PIC_OBJS) > - $(CC) $(LDFLAGS) $(SHLIB_LDFLAGS) -o $@ $^ -lfsimage $(FS_LIBDEPS) $(APPEND_LDFLAGS) > + $(BUILD_LINE-y) > > clean distclean:: > rm -f $(PIC_OBJS) $(FSLIB) $(DEPS) > diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/Makefile > --- a/tools/libfsimage/common/Makefile Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/Makefile Sat Oct 26 20:03:06 2013 +0400 > @@ -4,11 +4,16 @@ include $(XEN_ROOT)/tools/libfsimage/Rul > MAJOR = 1.0 > MINOR = 0 > > -LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS > +CFLAGS-ADDS-$(CONFIG_SunOS) += -Werror -Wp,-MD,.$(@F).d $(ADD_INCLUDES) > +CFLAGS-ADDS-$(CONFIG_SunOS) += -I/usr/include/libxml2This path should be detected (if it isn''t already) by using pkgconfig from the configure script. Or perhaps xml2-config is the right thing. Either way hardcoding this here is wrong. Some of the comments I made previous seem to apply to the rest of this file too.> [...]> diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage.c > --- a/tools/libfsimage/common/fsimage.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/fsimage.c Sat Oct 26 20:03:06 2013 +0400 > @@ -36,22 +36,43 @@ > > static pthread_mutex_t fsi_lock = PTHREAD_MUTEX_INITIALIZER; > > +#ifdef _VDISK_What is _VDISK_? Where does it come from? I don''t see it being defined here, nor do I see vdisk.h being added. There seems to be an awful lot of ifdeffery being added on the back of this. Without knowing more about vdisk I can''t advise fully but it seems that some sort of refactoring would be preferable.> +#include "vdisk.h" > +#endif> ++#ifdef _VDISK_ > + if (ffi->ff_fsi->f_pvdisk) { > + ret = vdisk_read(ffi->ff_fsi->f_pvdisk, (off_t)off, > + buf, n); > + } else { > + ret = pread(ffi->ff_fsi->f_fd, buf, n, off); > + } > +#else > ret = pread(ffi->ff_fsi->f_fd, buf, n, off); > +#endifThis pattern seems to have been repeated a lot. I think it should be refactored into a function and expressed as { #ifdef _VDISK_ if (...->f_pvdisk) return vdisk_read(...) #endif return pread(...) } and equivalents for other ops like write etc. Or maybe a set of vdisk ops which are implemented in terms of pread etc is the way to go? (I''d need to know more about VIDSK to say)> diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/common/fsimage_plugin.c > --- a/tools/libfsimage/common/fsimage_plugin.c Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/common/fsimage_plugin.c Sat Oct 26 20:03:06 2013 +0400 > @@ -122,6 +122,7 @@ fail: > static int load_plugins(void) > { > const char *fsdir = getenv("FSIMAGE_FSDIR"); > + const char *isadir = ""; > struct dirent *dp = NULL; > struct dirent *dpp; > DIR *dir = NULL; > @@ -130,8 +131,26 @@ static int load_plugins(void) > int err; > int ret = -1; > > +#if defined(FSIMAGE_FSDIR) > if (fsdir == NULL) > fsdir = FSIMAGE_FSDIR; > +#elif defined(__sun__) > + if (fsdir == NULL) > + fsdir = "/usr/lib/fs"; > + > + if (sizeof(void *) == 8) > + isadir = "64/";Can''t all this come from configure and/or config.mk for the platform? Also, can''t you just set fsdir to /usr/lib/fs or /usr/lib/fs/64 as desired and avoid adding isadir?> +#elif defined(__ia64__)I think you must have carried this ia64 support over a rebase, it has now been deleted.> + if (fsdir == NULL) > + fsdir = "/usr/lib/fs"; > +#else > + if (fsdir == NULL) { > + if (sizeof(void *) == 8) > + fsdir = "/usr/lib64/fs"; > + else > + fsdir = "/usr/lib/fs";This seems to have differing behaviour on non-SunOS from what was there before. Another rebasing artefact perhaps?> diff -r 7c12aaa128e3 -r c2e11847cac0 tools/libfsimage/zfs/fsi_zfs.h > --- a/tools/libfsimage/zfs/fsi_zfs.h Thu Oct 24 22:46:20 2013 +0100 > +++ b/tools/libfsimage/zfs/fsi_zfs.h Sat Oct 26 20:03:06 2013 +0400 > @@ -36,6 +36,8 @@There is a lot ZFS updates here. Are the resynchronising with some existing upstream (perhaps grub?). If so please can you provide a reference to the version sync''d in the commit log. Ian.
Reasonably Related Threads
- [PATCH 0 of 1 v2] tools: honour --libdir when it is passed to ./configure
- Bug#609517: xen-utils-3.2-1: Pygrub can't find grub.conf in a reiserfs partion on amd64 arch
- [PATCH] Compile issue with tools/libfsimage/iso9660
- Re: [Xen-staging] [xen-unstable] Add iso9660 support to libfsimage.
- Re: [Xen-changelog] [xen-unstable] tools: Rationalise library soname versions.