Josef Bacik
2012-Jul-20 19:15 UTC
[PATCH] Btrfs-progs: detect if the disk we are formatting is a ssd V2
SSD''s do not gain anything by having metadata DUP turned on. The underlying file system that is a part of all SSD''s could easily map duplicate metadat blocks into the same erase block which effectively eliminates the benefit of duplicating the metadata on disk. So detect if we are formatting a single SSD drive and if we are do not use DUP. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com> --- V1->V2: use blkid to get the full disk in case we happen to be formatting a partition. Makefile | 2 +- mkfs.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 9694444..d827216 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ btrfsck: $(objects) btrfsck.o $(CC) $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS) mkfs.btrfs: $(objects) mkfs.o - $(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) + $(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) -lblkid btrfs-debug-tree: $(objects) debug-tree.o $(CC) $(CFLAGS) -o btrfs-debug-tree $(objects) debug-tree.o $(LDFLAGS) $(LIBS) diff --git a/mkfs.c b/mkfs.c index dff5eb8..fc2b6ed 100644 --- a/mkfs.c +++ b/mkfs.c @@ -37,6 +37,7 @@ #include <linux/fs.h> #include <ctype.h> #include <attr/xattr.h> +#include <blkid/blkid.h> #include "kerncompat.h" #include "ctree.h" #include "disk-io.h" @@ -234,7 +235,7 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans, static int create_raid_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 data_profile, int data_profile_opt, u64 metadata_profile, - int metadata_profile_opt, int mixed) + int metadata_profile_opt, int mixed, int ssd) { u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); u64 allowed; @@ -246,7 +247,7 @@ static int create_raid_groups(struct btrfs_trans_handle *trans, */ if (!metadata_profile_opt && !mixed) { metadata_profile = (num_devices > 1) ? - BTRFS_BLOCK_GROUP_RAID1 : BTRFS_BLOCK_GROUP_DUP; + BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? 0: BTRFS_BLOCK_GROUP_DUP; } if (!data_profile_opt && !mixed) { data_profile = (num_devices > 1) ? @@ -1201,6 +1202,49 @@ static int zero_output_file(int out_fd, u64 size, u32 sectorsize) return ret; } +static int is_ssd(const char *file) +{ + char *devname; + blkid_probe probe; + char *dev; + char path[PATH_MAX]; + dev_t disk; + int fd; + char rotational; + + probe = blkid_new_probe_from_filename(file); + if (!probe) + return 0; + + /* + * We want to use blkid_devno_to_wholedisk() but it''s broken for some + * reason on F17 at least so we''ll do this trickery + */ + disk = blkid_probe_get_wholedisk_devno(probe); + devname = blkid_devno_to_devname(disk); + + dev = strrchr(devname, ''/''); + dev++; + + snprintf(path, PATH_MAX, "/sys/block/%s/queue/rotational", dev); + + free(devname); + blkid_free_probe(probe); + + fd = open(path, O_RDONLY); + if (fd < 0) { + return 0; + } + + if (read(fd, &rotational, sizeof(char)) < sizeof(char)) { + close(fd); + return 0; + } + close(fd); + + return !atoi((const char *)&rotational); +} + int main(int ac, char **av) { char *file; @@ -1227,6 +1271,7 @@ int main(int ac, char **av) int data_profile_opt = 0; int metadata_profile_opt = 0; int nodiscard = 0; + int ssd = 0; char *source_dir = NULL; int source_dir_set = 0; @@ -1352,6 +1397,9 @@ int main(int ac, char **av) exit(1); } } + + ssd = is_ssd(file); + if (mixed) { if (metadata_profile != data_profile) { fprintf(stderr, "With mixed block groups data and metadata " @@ -1438,7 +1486,7 @@ raid_groups: if (!source_dir_set) { ret = create_raid_groups(trans, root, data_profile, data_profile_opt, metadata_profile, - metadata_profile_opt, mixed); + metadata_profile_opt, mixed, ssd); BUG_ON(ret); } -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli
2012-Jul-20 19:36 UTC
Re: [PATCH] Btrfs-progs: detect if the disk we are formatting is a ssd V2
On 07/20/2012 09:15 PM, Josef Bacik wrote:> SSD''s do not gain anything by having metadata DUP turned on. The underlying > file system that is a part of all SSD''s could easily map duplicate metadatIf I understood correctly you are stating that because an SSD *might* "eliminates the benefit of duplicating the metadata" mkfs.btrfs *must* remove _silently_ this behaviour on all SSD ? To me it seems too strong; or almost it should be documented in the man page and/or issuing a warning during the format process.> blocks into the same erase block which effectively eliminates the benefit of > duplicating the metadata on disk. So detect if we are formatting a single > SSD drive and if we are do not use DUP. Thanks, > > Signed-off-by: Josef Bacik <jbacik@fusionio.com> > --- > V1->V2: use blkid to get the full disk in case we happen to be formatting a > partition. > > Makefile | 2 +- > mkfs.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 52 insertions(+), 4 deletions(-) > > diff --git a/Makefile b/Makefile > index 9694444..d827216 100644 > --- a/Makefile > +++ b/Makefile > @@ -65,7 +65,7 @@ btrfsck: $(objects) btrfsck.o > $(CC) $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS) > > mkfs.btrfs: $(objects) mkfs.o > - $(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) > + $(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) -lblkid > > btrfs-debug-tree: $(objects) debug-tree.o > $(CC) $(CFLAGS) -o btrfs-debug-tree $(objects) debug-tree.o $(LDFLAGS) $(LIBS) > diff --git a/mkfs.c b/mkfs.c > index dff5eb8..fc2b6ed 100644 > --- a/mkfs.c > +++ b/mkfs.c > @@ -37,6 +37,7 @@ > #include <linux/fs.h> > #include <ctype.h> > #include <attr/xattr.h> > +#include <blkid/blkid.h> > #include "kerncompat.h" > #include "ctree.h" > #include "disk-io.h" > @@ -234,7 +235,7 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans, > static int create_raid_groups(struct btrfs_trans_handle *trans, > struct btrfs_root *root, u64 data_profile, > int data_profile_opt, u64 metadata_profile, > - int metadata_profile_opt, int mixed) > + int metadata_profile_opt, int mixed, int ssd) > { > u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); > u64 allowed; > @@ -246,7 +247,7 @@ static int create_raid_groups(struct btrfs_trans_handle *trans, > */ > if (!metadata_profile_opt && !mixed) {Please put something like + if(ssd) + printf("SSD detected: remove the metadata duplication;");> metadata_profile = (num_devices > 1) ? > - BTRFS_BLOCK_GROUP_RAID1 : BTRFS_BLOCK_GROUP_DUP; > + BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? 0: BTRFS_BLOCK_GROUP_DUP; > } > if (!data_profile_opt && !mixed) { > data_profile = (num_devices > 1) ? > @@ -1201,6 +1202,49 @@ static int zero_output_file(int out_fd, u64 size, u32 sectorsize) > return ret; > } > > +static int is_ssd(const char *file) > +{ > + char *devname; > + blkid_probe probe; > + char *dev; > + char path[PATH_MAX]; > + dev_t disk; > + int fd; > + char rotational; > + > + probe = blkid_new_probe_from_filename(file); > + if (!probe) > + return 0; > + > + /* > + * We want to use blkid_devno_to_wholedisk() but it''s broken for some > + * reason on F17 at least so we''ll do this trickery > + */ > + disk = blkid_probe_get_wholedisk_devno(probe); > + devname = blkid_devno_to_devname(disk); > + > + dev = strrchr(devname, ''/''); > + dev++; > + > + snprintf(path, PATH_MAX, "/sys/block/%s/queue/rotational", dev); > + > + free(devname); > + blkid_free_probe(probe); > + > + fd = open(path, O_RDONLY); > + if (fd < 0) { > + return 0; > + } > + > + if (read(fd, &rotational, sizeof(char)) < sizeof(char)) { > + close(fd); > + return 0; > + } > + close(fd); > + > + return !atoi((const char *)&rotational); > +} > + > int main(int ac, char **av) > { > char *file; > @@ -1227,6 +1271,7 @@ int main(int ac, char **av) > int data_profile_opt = 0; > int metadata_profile_opt = 0; > int nodiscard = 0; > + int ssd = 0; > > char *source_dir = NULL; > int source_dir_set = 0; > @@ -1352,6 +1397,9 @@ int main(int ac, char **av) > exit(1); > } > } > + > + ssd = is_ssd(file); > +> if (mixed) { > if (metadata_profile != data_profile) { > fprintf(stderr, "With mixed block groups data and metadata " > @@ -1438,7 +1486,7 @@ raid_groups: > if (!source_dir_set) { > ret = create_raid_groups(trans, root, data_profile, > data_profile_opt, metadata_profile, > - metadata_profile_opt, mixed); > + metadata_profile_opt, mixed, ssd); > BUG_ON(ret); > } >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Wendy Cheng
2012-Jul-20 22:38 UTC
Re: [PATCH] Btrfs-progs: detect if the disk we are formatting is a ssd V2
On Fri, Jul 20, 2012 at 12:36 PM, Goffredo Baroncelli <kreijack@libero.it> wrote:> On 07/20/2012 09:15 PM, Josef Bacik wrote: >> SSD''s do not gain anything by having metadata DUP turned on. The underlying >> file system that is a part of all SSD''s could easily map duplicate metadat > > If I understood correctly you are stating that because an SSD *might* > "eliminates the benefit of duplicating the metadata" mkfs.btrfs *must* > remove _silently_ this behaviour on all SSD ? > > To me it seems too strong; or almost it should be documented in the man > page and/or issuing a warning during the format process.I''ll have to second this .. this is my first time looking into btrfs - do feel free to correct me if my reading is not correct. Based on https://btrfs.wiki.kernel.org/index.php/Glossary, I assume the DUP is a flag to ask for meta-data duplication within the same device entity. This implies whenever an FS (meta-data) block is updated, the duplicated FS block needs to be modified as well (true ?). So within a conventional SSD firmware implementation, it is true that both FS blocks could end up in the same SSD block that get erased and re-allocated together. Similar thing could happen with disks that have embedded de-duplication feature turned on. However, this should have been a task for the admin (or whoever types this mkfs command). It is not a filesystem''s job to assume how the firmware works and silently ignore the DUP request, *unless* there is a standard specification clearly describes linux devices that claim to be not "rotational" should behave this way. -- Wendy -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2012-Jul-23 12:46 UTC
Re: [PATCH] Btrfs-progs: detect if the disk we are formatting is a ssd V2
On Fri, Jul 20, 2012 at 04:38:59PM -0600, Wendy Cheng wrote:> On Fri, Jul 20, 2012 at 12:36 PM, Goffredo Baroncelli > <kreijack@libero.it> wrote: > > On 07/20/2012 09:15 PM, Josef Bacik wrote: > >> SSD''s do not gain anything by having metadata DUP turned on. The underlying > >> file system that is a part of all SSD''s could easily map duplicate metadat > > > > If I understood correctly you are stating that because an SSD *might* > > "eliminates the benefit of duplicating the metadata" mkfs.btrfs *must* > > remove _silently_ this behaviour on all SSD ? > > > > To me it seems too strong; or almost it should be documented in the man > > page and/or issuing a warning during the format process. > > I''ll have to second this .. this is my first time looking into btrfs - > do feel free to correct me if my reading is not correct. > > Based on https://btrfs.wiki.kernel.org/index.php/Glossary, I assume > the DUP is a flag to ask for meta-data duplication within the same > device entity. This implies whenever an FS (meta-data) block is > updated, the duplicated FS block needs to be modified as well (true > ?). So within a conventional SSD firmware implementation, it is true > that both FS blocks could end up in the same SSD block that get erased > and re-allocated together. Similar thing could happen with disks that > have embedded de-duplication feature turned on. > > However, this should have been a task for the admin (or whoever types > this mkfs command). It is not a filesystem''s job to assume how the > firmware works and silently ignore the DUP request, *unless* there is > a standard specification clearly describes linux devices that claim to > be not "rotational" should behave this way. >The admin can still use -m dup if he wants the added possiblity of protection, this just makes the default not dup. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli
2012-Jul-23 17:01 UTC
Re: [PATCH] Btrfs-progs: detect if the disk we are formatting is a ssd V2
On 07/23/2012 02:46 PM, Josef Bacik wrote:> On Fri, Jul 20, 2012 at 04:38:59PM -0600, Wendy Cheng wrote:>> However, this should have been a task for the admin (or whoever types >> this mkfs command). It is not a filesystem''s job to assume how the >> firmware works and silently ignore the DUP request, *unless* there is >> a standard specification clearly describes linux devices that claim to >> be not "rotational" should behave this way. >> > > The admin can still use -m dup if he wants the added possiblity of protection, > this just makes the default not dup.Josef, this was clear to us with the the code at hand. However, what was pointed out is that is change of a well established behaviour without documenting it not informing it. I don''t arguing about the rationale, what I am telling is : ok to the change but you have to inform the user. Saying "but the user can revert the change passing ''-m dup''" is not a valid response, because the user *could* revert the change if he *would* be informed about the change. BR G.Baroncelli>Thanks, > > Josef > . >-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2012-Jul-23 17:06 UTC
Re: [PATCH] Btrfs-progs: detect if the disk we are formatting is a ssd V2
On Mon, Jul 23, 2012 at 11:01:17AM -0600, Goffredo Baroncelli wrote:> On 07/23/2012 02:46 PM, Josef Bacik wrote: > > On Fri, Jul 20, 2012 at 04:38:59PM -0600, Wendy Cheng wrote: > > >> However, this should have been a task for the admin (or whoever types > >> this mkfs command). It is not a filesystem''s job to assume how the > >> firmware works and silently ignore the DUP request, *unless* there is > >> a standard specification clearly describes linux devices that claim to > >> be not "rotational" should behave this way. > >> > > > > The admin can still use -m dup if he wants the added possiblity of protection, > > this just makes the default not dup. > > Josef, > > this was clear to us with the the code at hand. However, what was > pointed out is that is change of a well established behaviour without > documenting it not informing it. > > I don''t arguing about the rationale, what I am telling is : ok to the > change but you have to inform the user. > > Saying "but the user can revert the change passing ''-m dup''" is not a > valid response, because the user *could* revert the change if he *would* > be informed about the change. >Yeah sorry Goffredo I agree and I changed it locally, I just haven''t sent it out yet. Thanks, Josef -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Josef Bacik
2012-Jul-23 17:22 UTC
[PATCH] Btrfs-progs: detect if the disk we are formatting is a ssd V2
SSD''s do not gain anything by having metadata DUP turned on. The underlying file system that is a part of all SSD''s could easily map duplicate metadat blocks into the same erase block which effectively eliminates the benefit of duplicating the metadata on disk. So detect if we are formatting a single SSD drive and if we are do not use DUP. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com> --- V1->V2: add explanation to man page and print a message if we default to single metadata in the case of an SSD. Makefile | 2 +- man/mkfs.btrfs.8.in | 5 +++- mkfs.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 9694444..d827216 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ btrfsck: $(objects) btrfsck.o $(CC) $(CFLAGS) -o btrfsck btrfsck.o $(objects) $(LDFLAGS) $(LIBS) mkfs.btrfs: $(objects) mkfs.o - $(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) + $(CC) $(CFLAGS) -o mkfs.btrfs $(objects) mkfs.o $(LDFLAGS) $(LIBS) -lblkid btrfs-debug-tree: $(objects) debug-tree.o $(CC) $(CFLAGS) -o btrfs-debug-tree $(objects) debug-tree.o $(LDFLAGS) $(LIBS) diff --git a/man/mkfs.btrfs.8.in b/man/mkfs.btrfs.8.in index fc2e1d2..dfa7996 100644 --- a/man/mkfs.btrfs.8.in +++ b/man/mkfs.btrfs.8.in @@ -46,7 +46,10 @@ Specify a label for the filesystem. .TP \fB\-m\fR, \fB\-\-metadata \fIprofile\fR Specify how metadata must be spanned across the devices specified. Valid -values are raid0, raid1, raid10 or single. +values are raid0, raid1, raid10, single or dup. Single device will have dup +set by default except in the case of SSDs which will default to single. This is +because SSDs can remap blocks internally so duplicate blocks could end up in the +same erase block which negates the benefits of doing metadata duplication. .TP \fB\-M\fR, \fB\-\-mixed\fR Mix data and metadata chunks together for more efficient space diff --git a/mkfs.c b/mkfs.c index dff5eb8..8816db8 100644 --- a/mkfs.c +++ b/mkfs.c @@ -37,6 +37,7 @@ #include <linux/fs.h> #include <ctype.h> #include <attr/xattr.h> +#include <blkid/blkid.h> #include "kerncompat.h" #include "ctree.h" #include "disk-io.h" @@ -234,7 +235,7 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans, static int create_raid_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 data_profile, int data_profile_opt, u64 metadata_profile, - int metadata_profile_opt, int mixed) + int metadata_profile_opt, int mixed, int ssd) { u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); u64 allowed; @@ -245,8 +246,12 @@ static int create_raid_groups(struct btrfs_trans_handle *trans, * For mixed groups defaults are single/single. */ if (!metadata_profile_opt && !mixed) { + if (num_devices == 1 && ssd) + printf("Detected a SSD, turning off metadata " + "duplication. Mkfs with -m dup if you want to " + "force metadata duplication.\n"); metadata_profile = (num_devices > 1) ? - BTRFS_BLOCK_GROUP_RAID1 : BTRFS_BLOCK_GROUP_DUP; + BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? 0: BTRFS_BLOCK_GROUP_DUP; } if (!data_profile_opt && !mixed) { data_profile = (num_devices > 1) ? @@ -1201,6 +1206,49 @@ static int zero_output_file(int out_fd, u64 size, u32 sectorsize) return ret; } +static int is_ssd(const char *file) +{ + char *devname; + blkid_probe probe; + char *dev; + char path[PATH_MAX]; + dev_t disk; + int fd; + char rotational; + + probe = blkid_new_probe_from_filename(file); + if (!probe) + return 0; + + /* + * We want to use blkid_devno_to_wholedisk() but it''s broken for some + * reason on F17 at least so we''ll do this trickery + */ + disk = blkid_probe_get_wholedisk_devno(probe); + devname = blkid_devno_to_devname(disk); + + dev = strrchr(devname, ''/''); + dev++; + + snprintf(path, PATH_MAX, "/sys/block/%s/queue/rotational", dev); + + free(devname); + blkid_free_probe(probe); + + fd = open(path, O_RDONLY); + if (fd < 0) { + return 0; + } + + if (read(fd, &rotational, sizeof(char)) < sizeof(char)) { + close(fd); + return 0; + } + close(fd); + + return !atoi((const char *)&rotational); +} + int main(int ac, char **av) { char *file; @@ -1227,6 +1275,7 @@ int main(int ac, char **av) int data_profile_opt = 0; int metadata_profile_opt = 0; int nodiscard = 0; + int ssd = 0; char *source_dir = NULL; int source_dir_set = 0; @@ -1352,6 +1401,9 @@ int main(int ac, char **av) exit(1); } } + + ssd = is_ssd(file); + if (mixed) { if (metadata_profile != data_profile) { fprintf(stderr, "With mixed block groups data and metadata " @@ -1438,7 +1490,7 @@ raid_groups: if (!source_dir_set) { ret = create_raid_groups(trans, root, data_profile, data_profile_opt, metadata_profile, - metadata_profile_opt, mixed); + metadata_profile_opt, mixed, ssd); BUG_ON(ret); } -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html