Stefan Bader
2013-May-13 17:47 UTC
[PATCH] xen-blk(front|back): Handle large physical sector disks
I accidentally realized today that any domU''s using the paravirt disk driver potentially suffer from poor performance when they get handed in a physical volume and partitioning is done inside the guest. The physical volume passed in has to be one that has the compat 512 logical sector size but hints its real sector size (eg. 4096) as physical sector size. In dom0 handling is correct and partitions or logical volumes there would be aligned to 4k. But the blkfront driver just gets one sector size (the logical) passed in from netback. And so inside the guest the drive appears to be an old style 512/512 drive. Alignment of partitions created in the guest very likely go wrong somewhere (they do for me). I tried to fix this in a way that works with all four combinations of dom0 and domU drivers. Tested those with a PVM guest that was set up to have a logical volume passed in as xvdb). Sidenote: PVM guests that map files or volume directly to partitions may be accidentally ok as ext4 uses 4k blocks by default). What I am not sure about is whether this also is sufficient for handling migration (possible to another host with other sectors). But I think that the units of tables is still 512, only alignment is changed. So it should more or less work. How does this look to you? -Stefan From 6c200e6666cd4d632e2234d267e387b72d69a95c Mon Sep 17 00:00:00 2001 From: Stefan Bader <stefan.bader@canonical.com> Date: Mon, 13 May 2013 16:28:15 +0200 Subject: [PATCH] xen/blk: Use physical sector size for setup Currently xen-blkback passes the logical sector size over xenbus and xen-blkfront sets up the paravirt disk with that logical block size. But newer drives usually have the logical sector size set to 512 for compatibility reasons and would show the actual sector size only in physical sector size. This results in the device being partitioned and accessed in dom0 with the correct sector size, but the guest thinks 512 bytes is the correct block size. And that results in poor performance. To fix this, blkback gets modified to pass also physical-sector-size over xenbus and blkfront to use both values to set up the paravirt disk. I did not just change the passed in sector-size because I am not sure having a bigger logical sector size than the physical one is valid (and that would happen if a newer dom0 kernel hits an older domU kernel). Also this way a domU set up before should still be accessible (just some tools might detect the unaligned setup). Signed-off-by: Stefan Bader <stefan.bader@canonical.com> --- drivers/block/xen-blkback/xenbus.c | 7 +++++++ drivers/block/xen-blkfront.c | 24 ++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8bfd1bc..1404204 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -704,6 +704,13 @@ again: dev->nodename); goto abort; } + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", + bdev_physical_block_size(be->blkif->vbd.bdev)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", + dev->nodename); + goto abort; + } err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d89ef86..60ec315 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -542,7 +542,8 @@ wait: flush_requests(info); } -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, + unsigned int physical_sector_size) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); + blk_queue_physical_block_size(rq, physical_sector_size); blk_queue_max_hw_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, - u16 vdisk_info, u16 sector_size) + u16 vdisk_info, u16 sector_size, + unsigned int physical_sector_size) { struct gendisk *gd; int nr_minors = 1; @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size)) { + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { del_gendisk(gd); goto release; } @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) { unsigned long long sectors; unsigned long sector_size; + unsigned int physical_sector_size; unsigned int binfo; int err; int barrier, flush, discard, persistent; @@ -1437,6 +1441,17 @@ static void blkfront_connect(struct blkfront_info *info) return; } + /* + * physcial-sector-size is a newer field, so old backends may not + * provide this. Assume physical sector size to be the same as + * sector_size in that case. + */ + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "physical-sector-size", "%u", &physical_sector_size, + NULL); + if (err) + physical_sector_size = sector_size; + info->feature_flush = 0; info->flush_op = 0; @@ -1483,7 +1498,8 @@ static void blkfront_connect(struct blkfront_info *info) else info->feature_persistent = persistent; - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, + physical_sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-May-14 08:04 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> wrote: > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -704,6 +704,13 @@ again: > dev->nodename); > goto abort; > } > + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > + bdev_physical_block_size(be->blkif->vbd.bdev)); > + if (err) { > + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", > + dev->nodename); > + goto abort;Failure here should not be fatal (as with any other protocol extensions). Beyond that the patch looks good to me. Jan
Roger Pau Monné
2013-May-14 14:49 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
On 13/05/13 19:47, Stefan Bader wrote:> I accidentally realized today that any domU''s using the paravirt disk driver > potentially suffer from poor performance when they get handed in a physical > volume and partitioning is done inside the guest. The physical volume passed in > has to be one that has the compat 512 logical sector size but hints its real > sector size (eg. 4096) as physical sector size. > > In dom0 handling is correct and partitions or logical volumes there would be > aligned to 4k. But the blkfront driver just gets one sector size (the logical) > passed in from netback. And so inside the guest the drive appears to be an old > style 512/512 drive. Alignment of partitions created in the guest very likely go > wrong somewhere (they do for me). > > I tried to fix this in a way that works with all four combinations of dom0 and > domU drivers. Tested those with a PVM guest that was set up to have a logical > volume passed in as xvdb). Sidenote: PVM guests that map files or volume > directly to partitions may be accidentally ok as ext4 uses 4k blocks by default). > > What I am not sure about is whether this also is sufficient for handling > migration (possible to another host with other sectors). But I think that the > units of tables is still 512, only alignment is changed. So it should more or > less work. > > How does this look to you?Thanks for the patch, apart from Jan''s comment, it looks good to me. I just have one question, if for example we are using iscsi disks, do different initiators report different physical sector sizes for the same disk? I''m asking this because AFAICS blk_queue_physical_block_size will only be set when the disk is first attached, but not when recovering from migration, and if different initiators report different physical sector sizes we should probably call blk_queue_physical_block_size with the new value when recovering from migration.
Stefan Bader
2013-May-14 16:11 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
On 14.05.2013 16:49, Roger Pau Monné wrote:> On 13/05/13 19:47, Stefan Bader wrote: >> I accidentally realized today that any domU''s using the paravirt disk driver >> potentially suffer from poor performance when they get handed in a physical >> volume and partitioning is done inside the guest. The physical volume passed in >> has to be one that has the compat 512 logical sector size but hints its real >> sector size (eg. 4096) as physical sector size. >> >> In dom0 handling is correct and partitions or logical volumes there would be >> aligned to 4k. But the blkfront driver just gets one sector size (the logical) >> passed in from netback. And so inside the guest the drive appears to be an old >> style 512/512 drive. Alignment of partitions created in the guest very likely go >> wrong somewhere (they do for me). >> >> I tried to fix this in a way that works with all four combinations of dom0 and >> domU drivers. Tested those with a PVM guest that was set up to have a logical >> volume passed in as xvdb). Sidenote: PVM guests that map files or volume >> directly to partitions may be accidentally ok as ext4 uses 4k blocks by default). >> >> What I am not sure about is whether this also is sufficient for handling >> migration (possible to another host with other sectors). But I think that the >> units of tables is still 512, only alignment is changed. So it should more or >> less work. >> >> How does this look to you? > > Thanks for the patch, apart from Jan''s comment, it looks good to me. IThanks, I will re-spin/-send the patch soonish.> just have one question, if for example we are using iscsi disks, do > different initiators report different physical sector sizes for the same > disk?Personally I have not yet used an iscsi setup, yet. But if it is the same physical disk used by the target, the initiators should report the same sizes. An interesting case would be stacks that combine multiple disks. But that is another thing. But would the usual iscsi setup not rather be to have the guest directly talk to the target? Thus not using blkfront at all?> > I''m asking this because AFAICS blk_queue_physical_block_size will only > be set when the disk is first attached, but not when recovering from > migration, and if different initiators report different physical sector > sizes we should probably call blk_queue_physical_block_size with the new > value when recovering from migration.Initially I was wondering whether this needs to support cases where a guest is saved, and then this dump and the contents of a disk were transfered (ending up on a different physical disk). But that if probably just crazy, impossible or both.> > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Stefan Bader
2013-May-15 09:26 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
On 14.05.2013 10:04, Jan Beulich wrote:>>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> wrote: >> --- a/drivers/block/xen-blkback/xenbus.c >> +++ b/drivers/block/xen-blkback/xenbus.c >> @@ -704,6 +704,13 @@ again: >> dev->nodename); >> goto abort; >> } >> + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", >> + bdev_physical_block_size(be->blkif->vbd.bdev)); >> + if (err) { >> + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", >> + dev->nodename); >> + goto abort; > > Failure here should not be fatal (as with any other protocol > extensions).So I suppose that should be xenbus_dev_error and no abort here. Just wondering (and sorry for being thick headed here) why would a failure here be different in severity for an extension or not. Is that not just adding an element to the xenstore object and failure would not be related to this being an extension?> > Beyond that the patch looks good to me. > > Jan >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-May-15 09:47 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
>>> On 15.05.13 at 11:26, Stefan Bader <stefan.bader@canonical.com> wrote: > On 14.05.2013 10:04, Jan Beulich wrote: >>>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> wrote: >>> --- a/drivers/block/xen-blkback/xenbus.c >>> +++ b/drivers/block/xen-blkback/xenbus.c >>> @@ -704,6 +704,13 @@ again: >>> dev->nodename); >>> goto abort; >>> } >>> + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", >>> + bdev_physical_block_size(be->blkif->vbd.bdev)); >>> + if (err) { >>> + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", >>> + dev->nodename); >>> + goto abort; >> >> Failure here should not be fatal (as with any other protocol >> extensions). > > So I suppose that should be xenbus_dev_error and no abort here. Just > wondering > (and sorry for being thick headed here) why would a failure here be > different in > severity for an extension or not. Is that not just adding an element to the > xenstore object and failure would not be related to this being an extension?A driver should only bail upon encountering a problem that it can''t recover from. Failure to write a xenstore node that neither the backend nor the frontend really require for their work is certainly not among those. Yes, it''s only a xenstore write, but it can fail at least theoretically (or else there wouldn''t be a need for error handling here in the first place), and you shouldn''t handle such failure in undue ways (i.e. failure to write required nodes is fatal, but failure to write nodes related to extensions isn''t). Jan
James Harper
2013-May-15 10:04 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
> > >>> On 15.05.13 at 11:26, Stefan Bader <stefan.bader@canonical.com> > wrote: > > On 14.05.2013 10:04, Jan Beulich wrote: > >>>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> > wrote: > >>> --- a/drivers/block/xen-blkback/xenbus.c > >>> +++ b/drivers/block/xen-blkback/xenbus.c > >>> @@ -704,6 +704,13 @@ again: > >>> dev->nodename); > >>> goto abort; > >>> } > >>> + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", > "%u", > >>> + bdev_physical_block_size(be->blkif->vbd.bdev)); > >>> + if (err) { > >>> + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", > >>> + dev->nodename); > >>> + goto abort; > >> > >> Failure here should not be fatal (as with any other protocol > >> extensions). > > > > So I suppose that should be xenbus_dev_error and no abort here. Just > > wondering > > (and sorry for being thick headed here) why would a failure here be > > different in > > severity for an extension or not. Is that not just adding an element to the > > xenstore object and failure would not be related to this being an > extension? > > A driver should only bail upon encountering a problem that it can''t > recover from. Failure to write a xenstore node that neither the > backend nor the frontend really require for their work is certainly > not among those. Yes, it''s only a xenstore write, but it can fail at > least theoretically (or else there wouldn''t be a need for error > handling here in the first place), and you shouldn''t handle such > failure in undue ways (i.e. failure to write required nodes is fatal, > but failure to write nodes related to extensions isn''t). >What is the recovery though? If the physical block size is unusual (eg not 512) and the write has failed, what is the outcome? I suspect that it''s going to not be what the user expected - partitions could be incorrectly aligned if doing an install, etc. If it were my system then in the (vanishingly rare?) case that this write failed, I''d prefer a hard failure. If a simple write to xenstore fails then isn''t the world coming to an end anyway? James
Stefan Bader
2013-May-15 10:10 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
On 15.05.2013 12:04, James Harper wrote:>> >>>>> On 15.05.13 at 11:26, Stefan Bader <stefan.bader@canonical.com> >> wrote: >>> On 14.05.2013 10:04, Jan Beulich wrote: >>>>>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> >> wrote: >>>>> --- a/drivers/block/xen-blkback/xenbus.c +++ >>>>> b/drivers/block/xen-blkback/xenbus.c @@ -704,6 +704,13 @@ again: >>>>> dev->nodename); goto abort; } + err = xenbus_printf(xbt, >>>>> dev->nodename, "physical-sector-size", >> "%u", >>>>> + bdev_physical_block_size(be->blkif->vbd.bdev)); + if (err) { >>>>> + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", + >>>>> dev->nodename); + goto abort; >>>> >>>> Failure here should not be fatal (as with any other protocol >>>> extensions). >>> >>> So I suppose that should be xenbus_dev_error and no abort here. Just >>> wondering (and sorry for being thick headed here) why would a failure >>> here be different in severity for an extension or not. Is that not just >>> adding an element to the xenstore object and failure would not be related >>> to this being an >> extension? >> >> A driver should only bail upon encountering a problem that it can''t recover >> from. Failure to write a xenstore node that neither the backend nor the >> frontend really require for their work is certainly not among those. Yes, >> it''s only a xenstore write, but it can fail at least theoretically (or else >> there wouldn''t be a need for error handling here in the first place), and >> you shouldn''t handle such failure in undue ways (i.e. failure to write >> required nodes is fatal, but failure to write nodes related to extensions >> isn''t). >> > > What is the recovery though? If the physical block size is unusual (eg not > 512) and the write has failed, what is the outcome? I suspect that it''s going > to not be what the user expected - partitions could be incorrectly aligned if > doing an install, etc. If it were my system then in the (vanishingly rare?) > case that this write failed, I''d prefer a hard failure.If the write fails the frontend just behaves as today and you may get the wrong alignment. It is not a fatal error, the access to the device will be possible in all variations. Just performance may be unexpectedly poor.> > If a simple write to xenstore fails then isn''t the world coming to an end > anyway?Probably true. And I wonder whether its moot one way or the other as probably those cases where this write would fail, it would have failed before and one doe not get there at all.> > James >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Stefan Bader
2013-May-15 10:58 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
On 14.05.2013 10:04, Jan Beulich wrote:>>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> wrote: >> --- a/drivers/block/xen-blkback/xenbus.c >> +++ b/drivers/block/xen-blkback/xenbus.c >> @@ -704,6 +704,13 @@ again: >> dev->nodename); >> goto abort; >> } >> + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", >> + bdev_physical_block_size(be->blkif->vbd.bdev)); >> + if (err) { >> + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", >> + dev->nodename); >> + goto abort; > > Failure here should not be fatal (as with any other protocol > extensions). > > Beyond that the patch looks good to me. > > Jan >Ok, here the version with the fatal error changed to just error message. From 922e5f4c3074559f5cb5475956bd4a1723cc4fdc Mon Sep 17 00:00:00 2001 From: Stefan Bader <stefan.bader@canonical.com> Date: Mon, 13 May 2013 16:28:15 +0200 Subject: [PATCH] xen/blk: Use physical sector size for setup Currently xen-blkback passes the logical sector size over xenbus and xen-blkfront sets up the paravirt disk with that logical block size. But newer drives usually have the logical sector size set to 512 for compatibility reasons and would show the actual sector size only in physical sector size. This results in the device being partitioned and accessed in dom0 with the correct sector size, but the guest thinks 512 bytes is the correct block size. And that results in poor performance. To fix this, blkback gets modified to pass also physical-sector-size over xenbus and blkfront to use both values to set up the paravirt disk. I did not just change the passed in sector-size because I am not sure having a bigger logical sector size than the physical one is valid (and that would happen if a newer dom0 kernel hits an older domU kernel). Also this way a domU set up before should still be accessible (just some tools might detect the unaligned setup). Signed-off-by: Stefan Bader <stefan.bader@canonical.com> --- drivers/block/xen-blkback/xenbus.c | 5 +++++ drivers/block/xen-blkfront.c | 24 ++++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8bfd1bc..25463e3 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -704,6 +704,11 @@ again: dev->nodename); goto abort; } + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", + bdev_physical_block_size(be->blkif->vbd.bdev)); + if (err) + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", + dev->nodename); err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d89ef86..60ec315 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -542,7 +542,8 @@ wait: flush_requests(info); } -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, + unsigned int physical_sector_size) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); + blk_queue_physical_block_size(rq, physical_sector_size); blk_queue_max_hw_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, - u16 vdisk_info, u16 sector_size) + u16 vdisk_info, u16 sector_size, + unsigned int physical_sector_size) { struct gendisk *gd; int nr_minors = 1; @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size)) { + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { del_gendisk(gd); goto release; } @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) { unsigned long long sectors; unsigned long sector_size; + unsigned int physical_sector_size; unsigned int binfo; int err; int barrier, flush, discard, persistent; @@ -1437,6 +1441,17 @@ static void blkfront_connect(struct blkfront_info *info) return; } + /* + * physcial-sector-size is a newer field, so old backends may not + * provide this. Assume physical sector size to be the same as + * sector_size in that case. + */ + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "physical-sector-size", "%u", &physical_sector_size, + NULL); + if (err) + physical_sector_size = sector_size; + info->feature_flush = 0; info->flush_op = 0; @@ -1483,7 +1498,8 @@ static void blkfront_connect(struct blkfront_info *info) else info->feature_persistent = persistent; - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, + physical_sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-May-15 12:23 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
>>> On 15.05.13 at 12:04, James Harper <james.harper@bendigoit.com.au> wrote: >> >> >>> On 15.05.13 at 11:26, Stefan Bader <stefan.bader@canonical.com> >> wrote: >> > On 14.05.2013 10:04, Jan Beulich wrote: >> >>>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> >> wrote: >> >>> --- a/drivers/block/xen-blkback/xenbus.c >> >>> +++ b/drivers/block/xen-blkback/xenbus.c >> >>> @@ -704,6 +704,13 @@ again: >> >>> dev->nodename); >> >>> goto abort; >> >>> } >> >>> + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", >> "%u", >> >>> + bdev_physical_block_size(be->blkif->vbd.bdev)); >> >>> + if (err) { >> >>> + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", >> >>> + dev->nodename); >> >>> + goto abort; >> >> >> >> Failure here should not be fatal (as with any other protocol >> >> extensions). >> > >> > So I suppose that should be xenbus_dev_error and no abort here. Just >> > wondering >> > (and sorry for being thick headed here) why would a failure here be >> > different in >> > severity for an extension or not. Is that not just adding an element to the >> > xenstore object and failure would not be related to this being an >> extension? >> >> A driver should only bail upon encountering a problem that it can''t >> recover from. Failure to write a xenstore node that neither the >> backend nor the frontend really require for their work is certainly >> not among those. Yes, it''s only a xenstore write, but it can fail at >> least theoretically (or else there wouldn''t be a need for error >> handling here in the first place), and you shouldn''t handle such >> failure in undue ways (i.e. failure to write required nodes is fatal, >> but failure to write nodes related to extensions isn''t). >> > > What is the recovery though? If the physical block size is unusual (eg not > 512) and the write has failed, what is the outcome? I suspect that it''s going > to not be what the user expected - partitions could be incorrectly aligned if > doing an install, etc. If it were my system then in the (vanishingly rare?) > case that this write failed, I''d prefer a hard failure.That''s a policy decision, but the driver should not enforce policy. And remember, till now this information isn''t even being passed, so the question of hard failure being preferable is pretty mute.> If a simple write to xenstore fails then isn''t the world coming to an end > anyway?Possibly, albeit xenstore could fail this just because there''s no xenstore space left for one of the involved entities. And again, my main motivation to request the change is to not set bad precedents. The error here conceptually should not be fatal, with no regard to the actual functionality it represents. Jan
Jan Beulich
2013-May-15 12:28 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
>>> On 15.05.13 at 12:58, Stefan Bader <stefan.bader@canonical.com> wrote: > @@ -1437,6 +1441,17 @@ static void blkfront_connect(struct blkfront_info *info) > return; > } > > + /* > + * physcial-sector-size is a newer field, so old backends may not > + * provide this. Assume physical sector size to be the same as > + * sector_size in that case. > + */ > + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, > + "physical-sector-size", "%u", &physical_sector_size, > + NULL);Sorry I didn''t pay attention to this in the first round: xenbus_scanf() should be preferred (for it having better type checking) over xenbus_gather() for single item operations. Jan
Konrad Rzeszutek Wilk
2013-May-15 13:54 UTC
Re: [PATCH] xen-blk(front|back): Handle large physical sector disks
On Wed, May 15, 2013 at 11:26:25AM +0200, Stefan Bader wrote:> On 14.05.2013 10:04, Jan Beulich wrote: > >>>> On 13.05.13 at 19:47, Stefan Bader <stefan.bader@canonical.com> wrote: > >> --- a/drivers/block/xen-blkback/xenbus.c > >> +++ b/drivers/block/xen-blkback/xenbus.c > >> @@ -704,6 +704,13 @@ again: > >> dev->nodename); > >> goto abort; > >> } > >> + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > >> + bdev_physical_block_size(be->blkif->vbd.bdev)); > >> + if (err) { > >> + xenbus_dev_fatal(dev, err, "writing %s/physical-sector-size", > >> + dev->nodename); > >> + goto abort; > > > > Failure here should not be fatal (as with any other protocol > > extensions). > > So I suppose that should be xenbus_dev_error and no abort here. Just wonderingOr dev_warn(&dev->dev).> (and sorry for being thick headed here) why would a failure here be different in > severity for an extension or not. Is that not just adding an element to the > xenstore object and failure would not be related to this being an extension?Doing a failure tears down the whole XenBus connection. We don''t want that.> > > > > Beyond that the patch looks good to me. > > > > Jan > > > >
Stefan Bader
2013-May-15 15:02 UTC
[PATCH v3] xen-blk(front|back): Handle large physical sector disks
Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as that seems closer to the style of the other calls. -Stefan From cddea87d842cc5feb427f3eedefc0566d69fb9b6 Mon Sep 17 00:00:00 2001 From: Stefan Bader <stefan.bader@canonical.com> Date: Mon, 13 May 2013 16:28:15 +0200 Subject: [PATCH] xen/blk: Use physical sector size for setup Currently xen-blkback passes the logical sector size over xenbus and xen-blkfront sets up the paravirt disk with that logical block size. But newer drives usually have the logical sector size set to 512 for compatibility reasons and would show the actual sector size only in physical sector size. This results in the device being partitioned and accessed in dom0 with the correct sector size, but the guest thinks 512 bytes is the correct block size. And that results in poor performance. To fix this, blkback gets modified to pass also physical-sector-size over xenbus and blkfront to use both values to set up the paravirt disk. I did not just change the passed in sector-size because I am not sure having a bigger logical sector size than the physical one is valid (and that would happen if a newer dom0 kernel hits an older domU kernel). Also this way a domU set up before should still be accessible (just some tools might detect the unaligned setup). [v2: Make xenbus write failure non-fatal] [v3: Use xenbus_scanf instead of xenbus_gather] Signed-off-by: Stefan Bader <stefan.bader@canonical.com> --- drivers/block/xen-blkback/xenbus.c | 5 +++++ drivers/block/xen-blkfront.c | 23 +++++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8bfd1bc..25463e3 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -704,6 +704,11 @@ again: dev->nodename); goto abort; } + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", + bdev_physical_block_size(be->blkif->vbd.bdev)); + if (err) + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", + dev->nodename); err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d89ef86..33b5618 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -542,7 +542,8 @@ wait: flush_requests(info); } -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, + unsigned int physical_sector_size) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); + blk_queue_physical_block_size(rq, physical_sector_size); blk_queue_max_hw_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, - u16 vdisk_info, u16 sector_size) + u16 vdisk_info, u16 sector_size, + unsigned int physical_sector_size) { struct gendisk *gd; int nr_minors = 1; @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->driverfs_dev = &(info->xbdev->dev); set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size)) { + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { del_gendisk(gd); goto release; } @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) { unsigned long long sectors; unsigned long sector_size; + unsigned int physical_sector_size; unsigned int binfo; int err; int barrier, flush, discard, persistent; @@ -1437,6 +1441,16 @@ static void blkfront_connect(struct blkfront_info *info) return; } + /* + * physcial-sector-size is a newer field, so old backends may not + * provide this. Assume physical sector size to be the same as + * sector_size in that case. + */ + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "physical-sector-size", "%u", &physical_sector_size); + if (err != 1) + physical_sector_size = sector_size; + info->feature_flush = 0; info->flush_op = 0; @@ -1483,7 +1497,8 @@ static void blkfront_connect(struct blkfront_info *info) else info->feature_persistent = persistent; - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, + physical_sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Stefan Bader
2013-May-22 11:48 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On 15.05.2013 17:02, Stefan Bader wrote:> Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as > that seems closer to the style of the other calls.Was this ok as is or pending some answers/discussion which I may have not given/missed? -Stefan> > -Stefan > > From cddea87d842cc5feb427f3eedefc0566d69fb9b6 Mon Sep 17 00:00:00 2001 > From: Stefan Bader <stefan.bader@canonical.com> > Date: Mon, 13 May 2013 16:28:15 +0200 > Subject: [PATCH] xen/blk: Use physical sector size for setup > > Currently xen-blkback passes the logical sector size over xenbus and > xen-blkfront sets up the paravirt disk with that logical block size. > But newer drives usually have the logical sector size set to 512 for > compatibility reasons and would show the actual sector size only in > physical sector size. > This results in the device being partitioned and accessed in dom0 with > the correct sector size, but the guest thinks 512 bytes is the correct > block size. And that results in poor performance. > > To fix this, blkback gets modified to pass also physical-sector-size > over xenbus and blkfront to use both values to set up the paravirt > disk. I did not just change the passed in sector-size because I am > not sure having a bigger logical sector size than the physical one > is valid (and that would happen if a newer dom0 kernel hits an older > domU kernel). Also this way a domU set up before should still be > accessible (just some tools might detect the unaligned setup). > > [v2: Make xenbus write failure non-fatal] > [v3: Use xenbus_scanf instead of xenbus_gather] > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > drivers/block/xen-blkback/xenbus.c | 5 +++++ > drivers/block/xen-blkfront.c | 23 +++++++++++++++++++---- > 2 files changed, 24 insertions(+), 4 deletions(-) > > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 8bfd1bc..25463e3 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -704,6 +704,11 @@ again: > dev->nodename); > goto abort; > } > + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > + bdev_physical_block_size(be->blkif->vbd.bdev)); > + if (err) > + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", > + dev->nodename); > > err = xenbus_transaction_end(xbt, 0); > if (err == -EAGAIN) > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index d89ef86..33b5618 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -542,7 +542,8 @@ wait: > flush_requests(info); > } > > -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > + unsigned int physical_sector_size) > { > struct request_queue *rq; > struct blkfront_info *info = gd->private_data; > @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 > sector_size) > > /* Hard sector size and max sectors impersonate the equiv. hardware. */ > blk_queue_logical_block_size(rq, sector_size); > + blk_queue_physical_block_size(rq, physical_sector_size); > blk_queue_max_hw_sectors(rq, 512); > > /* Each segment in a request is up to an aligned page in size. */ > @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) > > static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > struct blkfront_info *info, > - u16 vdisk_info, u16 sector_size) > + u16 vdisk_info, u16 sector_size, > + unsigned int physical_sector_size) > { > struct gendisk *gd; > int nr_minors = 1; > @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > gd->driverfs_dev = &(info->xbdev->dev); > set_capacity(gd, capacity); > > - if (xlvbd_init_blk_queue(gd, sector_size)) { > + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { > del_gendisk(gd); > goto release; > } > @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) > { > unsigned long long sectors; > unsigned long sector_size; > + unsigned int physical_sector_size; > unsigned int binfo; > int err; > int barrier, flush, discard, persistent; > @@ -1437,6 +1441,16 @@ static void blkfront_connect(struct blkfront_info *info) > return; > } > > + /* > + * physcial-sector-size is a newer field, so old backends may not > + * provide this. Assume physical sector size to be the same as > + * sector_size in that case. > + */ > + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, > + "physical-sector-size", "%u", &physical_sector_size); > + if (err != 1) > + physical_sector_size = sector_size; > + > info->feature_flush = 0; > info->flush_op = 0; > > @@ -1483,7 +1497,8 @@ static void blkfront_connect(struct blkfront_info *info) > else > info->feature_persistent = persistent; > > - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, > + physical_sector_size); > if (err) { > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > info->xbdev->otherend); > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-May-22 12:21 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
>>> On 22.05.13 at 13:48, Stefan Bader <stefan.bader@canonical.com> wrote: > On 15.05.2013 17:02, Stefan Bader wrote: >> Changed the xenbus_gather but kept the xenbus_dev_error (without being > fatal) as >> that seems closer to the style of the other calls. > > Was this ok as is or pending some answers/discussion which I may have not > given/missed?All my concerns have been taken care of - thanks. The only thing I haven''t seen so far is a patch to the master copy of blkif.h to document the new xenstore node. Jan>> From cddea87d842cc5feb427f3eedefc0566d69fb9b6 Mon Sep 17 00:00:00 2001 >> From: Stefan Bader <stefan.bader@canonical.com> >> Date: Mon, 13 May 2013 16:28:15 +0200 >> Subject: [PATCH] xen/blk: Use physical sector size for setup >> >> Currently xen-blkback passes the logical sector size over xenbus and >> xen-blkfront sets up the paravirt disk with that logical block size. >> But newer drives usually have the logical sector size set to 512 for >> compatibility reasons and would show the actual sector size only in >> physical sector size. >> This results in the device being partitioned and accessed in dom0 with >> the correct sector size, but the guest thinks 512 bytes is the correct >> block size. And that results in poor performance. >> >> To fix this, blkback gets modified to pass also physical-sector-size >> over xenbus and blkfront to use both values to set up the paravirt >> disk. I did not just change the passed in sector-size because I am >> not sure having a bigger logical sector size than the physical one >> is valid (and that would happen if a newer dom0 kernel hits an older >> domU kernel). Also this way a domU set up before should still be >> accessible (just some tools might detect the unaligned setup). >> >> [v2: Make xenbus write failure non-fatal] >> [v3: Use xenbus_scanf instead of xenbus_gather] >> >> Signed-off-by: Stefan Bader <stefan.bader@canonical.com> >> --- >> drivers/block/xen-blkback/xenbus.c | 5 +++++ >> drivers/block/xen-blkfront.c | 23 +++++++++++++++++++---- >> 2 files changed, 24 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/block/xen-blkback/xenbus.c > b/drivers/block/xen-blkback/xenbus.c >> index 8bfd1bc..25463e3 100644 >> --- a/drivers/block/xen-blkback/xenbus.c >> +++ b/drivers/block/xen-blkback/xenbus.c >> @@ -704,6 +704,11 @@ again: >> dev->nodename); >> goto abort; >> } >> + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", >> + bdev_physical_block_size(be->blkif->vbd.bdev)); >> + if (err) >> + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", >> + dev->nodename); >> >> err = xenbus_transaction_end(xbt, 0); >> if (err == -EAGAIN) >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c >> index d89ef86..33b5618 100644 >> --- a/drivers/block/xen-blkfront.c >> +++ b/drivers/block/xen-blkfront.c >> @@ -542,7 +542,8 @@ wait: >> flush_requests(info); >> } >> >> -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) >> +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, >> + unsigned int physical_sector_size) >> { >> struct request_queue *rq; >> struct blkfront_info *info = gd->private_data; >> @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 >> sector_size) >> >> /* Hard sector size and max sectors impersonate the equiv. hardware. */ >> blk_queue_logical_block_size(rq, sector_size); >> + blk_queue_physical_block_size(rq, physical_sector_size); >> blk_queue_max_hw_sectors(rq, 512); >> >> /* Each segment in a request is up to an aligned page in size. */ >> @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) >> >> static int xlvbd_alloc_gendisk(blkif_sector_t capacity, >> struct blkfront_info *info, >> - u16 vdisk_info, u16 sector_size) >> + u16 vdisk_info, u16 sector_size, >> + unsigned int physical_sector_size) >> { >> struct gendisk *gd; >> int nr_minors = 1; >> @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, >> gd->driverfs_dev = &(info->xbdev->dev); >> set_capacity(gd, capacity); >> >> - if (xlvbd_init_blk_queue(gd, sector_size)) { >> + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { >> del_gendisk(gd); >> goto release; >> } >> @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info > *info) >> { >> unsigned long long sectors; >> unsigned long sector_size; >> + unsigned int physical_sector_size; >> unsigned int binfo; >> int err; >> int barrier, flush, discard, persistent; >> @@ -1437,6 +1441,16 @@ static void blkfront_connect(struct blkfront_info > *info) >> return; >> } >> >> + /* >> + * physcial-sector-size is a newer field, so old backends may not >> + * provide this. Assume physical sector size to be the same as >> + * sector_size in that case. >> + */ >> + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, >> + "physical-sector-size", "%u", &physical_sector_size); >> + if (err != 1) >> + physical_sector_size = sector_size; >> + >> info->feature_flush = 0; >> info->flush_op = 0; >> >> @@ -1483,7 +1497,8 @@ static void blkfront_connect(struct blkfront_info > *info) >> else >> info->feature_persistent = persistent; >> >> - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); >> + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, >> + physical_sector_size); >> if (err) { >> xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", >> info->xbdev->otherend); >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xen.org >> http://lists.xen.org/xen-devel >>
Konrad Rzeszutek Wilk
2013-May-22 13:02 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On Wed, May 22, 2013 at 01:48:02PM +0200, Stefan Bader wrote:> On 15.05.2013 17:02, Stefan Bader wrote: > > Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as > > that seems closer to the style of the other calls. > > Was this ok as is or pending some answers/discussion which I may have not > given/missed?I think that was it. Thanks for ping!> > -Stefan > > > > > -Stefan > > > > From cddea87d842cc5feb427f3eedefc0566d69fb9b6 Mon Sep 17 00:00:00 2001 > > From: Stefan Bader <stefan.bader@canonical.com> > > Date: Mon, 13 May 2013 16:28:15 +0200 > > Subject: [PATCH] xen/blk: Use physical sector size for setup > > > > Currently xen-blkback passes the logical sector size over xenbus and > > xen-blkfront sets up the paravirt disk with that logical block size. > > But newer drives usually have the logical sector size set to 512 for > > compatibility reasons and would show the actual sector size only in > > physical sector size. > > This results in the device being partitioned and accessed in dom0 with > > the correct sector size, but the guest thinks 512 bytes is the correct > > block size. And that results in poor performance. > > > > To fix this, blkback gets modified to pass also physical-sector-size > > over xenbus and blkfront to use both values to set up the paravirt > > disk. I did not just change the passed in sector-size because I am > > not sure having a bigger logical sector size than the physical one > > is valid (and that would happen if a newer dom0 kernel hits an older > > domU kernel). Also this way a domU set up before should still be > > accessible (just some tools might detect the unaligned setup). > > > > [v2: Make xenbus write failure non-fatal] > > [v3: Use xenbus_scanf instead of xenbus_gather] > > > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > > --- > > drivers/block/xen-blkback/xenbus.c | 5 +++++ > > drivers/block/xen-blkfront.c | 23 +++++++++++++++++++---- > > 2 files changed, 24 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > > index 8bfd1bc..25463e3 100644 > > --- a/drivers/block/xen-blkback/xenbus.c > > +++ b/drivers/block/xen-blkback/xenbus.c > > @@ -704,6 +704,11 @@ again: > > dev->nodename); > > goto abort; > > } > > + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > > + bdev_physical_block_size(be->blkif->vbd.bdev)); > > + if (err) > > + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", > > + dev->nodename); > > > > err = xenbus_transaction_end(xbt, 0); > > if (err == -EAGAIN) > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > > index d89ef86..33b5618 100644 > > --- a/drivers/block/xen-blkfront.c > > +++ b/drivers/block/xen-blkfront.c > > @@ -542,7 +542,8 @@ wait: > > flush_requests(info); > > } > > > > -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > > +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > > + unsigned int physical_sector_size) > > { > > struct request_queue *rq; > > struct blkfront_info *info = gd->private_data; > > @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 > > sector_size) > > > > /* Hard sector size and max sectors impersonate the equiv. hardware. */ > > blk_queue_logical_block_size(rq, sector_size); > > + blk_queue_physical_block_size(rq, physical_sector_size); > > blk_queue_max_hw_sectors(rq, 512); > > > > /* Each segment in a request is up to an aligned page in size. */ > > @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) > > > > static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > > struct blkfront_info *info, > > - u16 vdisk_info, u16 sector_size) > > + u16 vdisk_info, u16 sector_size, > > + unsigned int physical_sector_size) > > { > > struct gendisk *gd; > > int nr_minors = 1; > > @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > > gd->driverfs_dev = &(info->xbdev->dev); > > set_capacity(gd, capacity); > > > > - if (xlvbd_init_blk_queue(gd, sector_size)) { > > + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { > > del_gendisk(gd); > > goto release; > > } > > @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) > > { > > unsigned long long sectors; > > unsigned long sector_size; > > + unsigned int physical_sector_size; > > unsigned int binfo; > > int err; > > int barrier, flush, discard, persistent; > > @@ -1437,6 +1441,16 @@ static void blkfront_connect(struct blkfront_info *info) > > return; > > } > > > > + /* > > + * physcial-sector-size is a newer field, so old backends may not > > + * provide this. Assume physical sector size to be the same as > > + * sector_size in that case. > > + */ > > + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, > > + "physical-sector-size", "%u", &physical_sector_size); > > + if (err != 1) > > + physical_sector_size = sector_size; > > + > > info->feature_flush = 0; > > info->flush_op = 0; > > > > @@ -1483,7 +1497,8 @@ static void blkfront_connect(struct blkfront_info *info) > > else > > info->feature_persistent = persistent; > > > > - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > > + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, > > + physical_sector_size); > > if (err) { > > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > > info->xbdev->otherend); > > > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xen.org > > http://lists.xen.org/xen-devel > > > >> _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel
Stefan Bader
2013-May-22 13:15 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On 22.05.2013 14:21, Jan Beulich wrote:> > The only thing I haven''t seen so far is a patch to the master > copy of blkif.h to document the new xenstore node. >Ok, maybe something like this. What I realize is that I deliberately used unsigned int as this is defined as 32bit on x86. But maybe it should be changed to uint32_t? -Stefan From 8d1023ce11b9067346e9794d95b2876d98484f43 Mon Sep 17 00:00:00 2001 From: Stefan Bader <stefan.bader@canonical.com> Date: Wed, 22 May 2013 15:11:18 +0200 Subject: [PATCH] blkif.h: Document the physical-sector-size extension Signed-off-by: Stefan Bader <stefan.bader@canonical.com> --- xen/include/public/io/blkif.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h index 97b423b..f7c3366 100644 --- a/xen/include/public/io/blkif.h +++ b/xen/include/public/io/blkif.h @@ -208,12 +208,17 @@ * sector-size * Values: <uint32_t> * - * The native sector size, in bytes, of the backend device. + * The logical sector size, in bytes, of the backend device. + * + * physical-sector-size + * Values: <unsigned int> + * + * The physical sector size, in bytes, of the backend device. * * sectors * Values: <uint64_t> * - * The size of the backend device, expressed in units of its native + * The size of the backend device, expressed in units of its logical * sector size ("sector-size"). * ***************************************************************************** @@ -473,8 +478,9 @@ * NB. first_sect and last_sect in blkif_request_segment, as well as * sector_number in blkif_request, are always expressed in 512-byte units. * However they must be properly aligned to the real sector size of the - * physical disk, which is reported in the "sector-size" node in the backend - * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units. + * physical disk, which is reported in the "physical-sector-size" node in + * the backend xenbus info. Also the xenbus "sectors" node is expressed in + * 512-byte units. */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ -- 1.7.9.5 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Jan Beulich
2013-May-22 13:22 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
>>> On 22.05.13 at 15:15, Stefan Bader <stefan.bader@canonical.com> wrote: > On 22.05.2013 14:21, Jan Beulich wrote: >> >> The only thing I haven''t seen so far is a patch to the master >> copy of blkif.h to document the new xenstore node. >> > > Ok, maybe something like this. What I realize is that I deliberately used > unsigned int as this is defined as 32bit on x86. But maybe it should be > changed > to uint32_t?I don''t think this matters much, as the values are represented as strings anyway. Personally I''m not even sure that specifying a type here is really necessary. Let''s see what others think. Jan> From 8d1023ce11b9067346e9794d95b2876d98484f43 Mon Sep 17 00:00:00 2001 > From: Stefan Bader <stefan.bader@canonical.com> > Date: Wed, 22 May 2013 15:11:18 +0200 > Subject: [PATCH] blkif.h: Document the physical-sector-size extension > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > xen/include/public/io/blkif.h | 14 ++++++++++---- > 1 file changed, 10 insertions(+), 4 deletions(-) > > diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h > index 97b423b..f7c3366 100644 > --- a/xen/include/public/io/blkif.h > +++ b/xen/include/public/io/blkif.h > @@ -208,12 +208,17 @@ > * sector-size > * Values: <uint32_t> > * > - * The native sector size, in bytes, of the backend device. > + * The logical sector size, in bytes, of the backend device. > + * > + * physical-sector-size > + * Values: <unsigned int> > + * > + * The physical sector size, in bytes, of the backend device. > * > * sectors > * Values: <uint64_t> > * > - * The size of the backend device, expressed in units of its native > + * The size of the backend device, expressed in units of its logical > * sector size ("sector-size"). > * > > ***************************************************************************** > @@ -473,8 +478,9 @@ > * NB. first_sect and last_sect in blkif_request_segment, as well as > * sector_number in blkif_request, are always expressed in 512-byte units. > * However they must be properly aligned to the real sector size of the > - * physical disk, which is reported in the "sector-size" node in the backend > - * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte > units. > + * physical disk, which is reported in the "physical-sector-size" node in > + * the backend xenbus info. Also the xenbus "sectors" node is expressed in > + * 512-byte units. > */ > struct blkif_request_segment { > grant_ref_t gref; /* reference to I/O buffer frame */ > -- > 1.7.9.5
Konrad Rzeszutek Wilk
2013-May-28 12:47 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On Wed, May 15, 2013 at 05:02:35PM +0200, Stefan Bader wrote:> Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as > that seems closer to the style of the other calls.Hm, could you rebase this on top of stable/for-jens-3.10 please? I get this: patching file drivers/block/xen-blkback/xenbus.c Hunk #1 FAILED at 704. 1 out of 1 hunk FAILED -- saving rejects to file drivers/block/xen-blkback/xenbus.c.rej patching file drivers/block/xen-blkfront.c Hunk #1 FAILED at 542. patch: **** malformed patch at line 133: 16> > -Stefan > > From cddea87d842cc5feb427f3eedefc0566d69fb9b6 Mon Sep 17 00:00:00 2001 > From: Stefan Bader <stefan.bader@canonical.com> > Date: Mon, 13 May 2013 16:28:15 +0200 > Subject: [PATCH] xen/blk: Use physical sector size for setup > > Currently xen-blkback passes the logical sector size over xenbus and > xen-blkfront sets up the paravirt disk with that logical block size. > But newer drives usually have the logical sector size set to 512 for > compatibility reasons and would show the actual sector size only in > physical sector size. > This results in the device being partitioned and accessed in dom0 with > the correct sector size, but the guest thinks 512 bytes is the correct > block size. And that results in poor performance. > > To fix this, blkback gets modified to pass also physical-sector-size > over xenbus and blkfront to use both values to set up the paravirt > disk. I did not just change the passed in sector-size because I am > not sure having a bigger logical sector size than the physical one > is valid (and that would happen if a newer dom0 kernel hits an older > domU kernel). Also this way a domU set up before should still be > accessible (just some tools might detect the unaligned setup). > > [v2: Make xenbus write failure non-fatal] > [v3: Use xenbus_scanf instead of xenbus_gather] > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > drivers/block/xen-blkback/xenbus.c | 5 +++++ > drivers/block/xen-blkfront.c | 23 +++++++++++++++++++---- > 2 files changed, 24 insertions(+), 4 deletions(-) > > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 8bfd1bc..25463e3 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -704,6 +704,11 @@ again: > dev->nodename); > goto abort; > } > + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > + bdev_physical_block_size(be->blkif->vbd.bdev)); > + if (err) > + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", > + dev->nodename); > > err = xenbus_transaction_end(xbt, 0); > if (err == -EAGAIN) > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index d89ef86..33b5618 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -542,7 +542,8 @@ wait: > flush_requests(info); > } > > -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > + unsigned int physical_sector_size) > { > struct request_queue *rq; > struct blkfront_info *info = gd->private_data; > @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 > sector_size) > > /* Hard sector size and max sectors impersonate the equiv. hardware. */ > blk_queue_logical_block_size(rq, sector_size); > + blk_queue_physical_block_size(rq, physical_sector_size); > blk_queue_max_hw_sectors(rq, 512); > > /* Each segment in a request is up to an aligned page in size. */ > @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) > > static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > struct blkfront_info *info, > - u16 vdisk_info, u16 sector_size) > + u16 vdisk_info, u16 sector_size, > + unsigned int physical_sector_size) > { > struct gendisk *gd; > int nr_minors = 1; > @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > gd->driverfs_dev = &(info->xbdev->dev); > set_capacity(gd, capacity); > > - if (xlvbd_init_blk_queue(gd, sector_size)) { > + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { > del_gendisk(gd); > goto release; > } > @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) > { > unsigned long long sectors; > unsigned long sector_size; > + unsigned int physical_sector_size; > unsigned int binfo; > int err; > int barrier, flush, discard, persistent; > @@ -1437,6 +1441,16 @@ static void blkfront_connect(struct blkfront_info *info) > return; > } > > + /* > + * physcial-sector-size is a newer field, so old backends may not > + * provide this. Assume physical sector size to be the same as > + * sector_size in that case. > + */ > + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, > + "physical-sector-size", "%u", &physical_sector_size); > + if (err != 1) > + physical_sector_size = sector_size; > + > info->feature_flush = 0; > info->flush_op = 0; > > @@ -1483,7 +1497,8 @@ static void blkfront_connect(struct blkfront_info *info) > else > info->feature_persistent = persistent; > > - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, > + physical_sector_size); > if (err) { > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > info->xbdev->otherend); > -- > 1.7.9.5 >> From cddea87d842cc5feb427f3eedefc0566d69fb9b6 Mon Sep 17 00:00:00 2001 > From: Stefan Bader <stefan.bader@canonical.com> > Date: Mon, 13 May 2013 16:28:15 +0200 > Subject: [PATCH] xen/blk: Use physical sector size for setup > > Currently xen-blkback passes the logical sector size over xenbus and > xen-blkfront sets up the paravirt disk with that logical block size. > But newer drives usually have the logical sector size set to 512 for > compatibility reasons and would show the actual sector size only in > physical sector size. > This results in the device being partitioned and accessed in dom0 with > the correct sector size, but the guest thinks 512 bytes is the correct > block size. And that results in poor performance. > > To fix this, blkback gets modified to pass also physical-sector-size > over xenbus and blkfront to use both values to set up the paravirt > disk. I did not just change the passed in sector-size because I am > not sure having a bigger logical sector size than the physical one > is valid (and that would happen if a newer dom0 kernel hits an older > domU kernel). Also this way a domU set up before should still be > accessible (just some tools might detect the unaligned setup). > > [v2: Make xenbus write failure non-fatal] > [v3: Use xenbus_scanf instead of xenbus_gather] > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > drivers/block/xen-blkback/xenbus.c | 5 +++++ > drivers/block/xen-blkfront.c | 23 +++++++++++++++++++---- > 2 files changed, 24 insertions(+), 4 deletions(-) > > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 8bfd1bc..25463e3 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -704,6 +704,11 @@ again: > dev->nodename); > goto abort; > } > + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > + bdev_physical_block_size(be->blkif->vbd.bdev)); > + if (err) > + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", > + dev->nodename); > > err = xenbus_transaction_end(xbt, 0); > if (err == -EAGAIN) > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index d89ef86..33b5618 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -542,7 +542,8 @@ wait: > flush_requests(info); > } > > -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > + unsigned int physical_sector_size) > { > struct request_queue *rq; > struct blkfront_info *info = gd->private_data; > @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > > /* Hard sector size and max sectors impersonate the equiv. hardware. */ > blk_queue_logical_block_size(rq, sector_size); > + blk_queue_physical_block_size(rq, physical_sector_size); > blk_queue_max_hw_sectors(rq, 512); > > /* Each segment in a request is up to an aligned page in size. */ > @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) > > static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > struct blkfront_info *info, > - u16 vdisk_info, u16 sector_size) > + u16 vdisk_info, u16 sector_size, > + unsigned int physical_sector_size) > { > struct gendisk *gd; > int nr_minors = 1; > @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > gd->driverfs_dev = &(info->xbdev->dev); > set_capacity(gd, capacity); > > - if (xlvbd_init_blk_queue(gd, sector_size)) { > + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { > del_gendisk(gd); > goto release; > } > @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) > { > unsigned long long sectors; > unsigned long sector_size; > + unsigned int physical_sector_size; > unsigned int binfo; > int err; > int barrier, flush, discard, persistent; > @@ -1437,6 +1441,16 @@ static void blkfront_connect(struct blkfront_info *info) > return; > } > > + /* > + * physcial-sector-size is a newer field, so old backends may not > + * provide this. Assume physical sector size to be the same as > + * sector_size in that case. > + */ > + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, > + "physical-sector-size", "%u", &physical_sector_size); > + if (err != 1) > + physical_sector_size = sector_size; > + > info->feature_flush = 0; > info->flush_op = 0; > > @@ -1483,7 +1497,8 @@ static void blkfront_connect(struct blkfront_info *info) > else > info->feature_persistent = persistent; > > - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, > + physical_sector_size); > if (err) { > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > info->xbdev->otherend); > -- > 1.7.9.5 >
Stefan Bader
2013-May-28 12:55 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On 28.05.2013 14:47, Konrad Rzeszutek Wilk wrote:> On Wed, May 15, 2013 at 05:02:35PM +0200, Stefan Bader wrote: >> Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as >> that seems closer to the style of the other calls. > > Hm, could you rebase this on top of stable/for-jens-3.10 please? > > I get this: > > patching file drivers/block/xen-blkback/xenbus.c > Hunk #1 FAILED at 704. > 1 out of 1 hunk FAILED -- saving rejects to file drivers/block/xen-blkback/xenbus.c.rej > patching file drivers/block/xen-blkfront.c > Hunk #1 FAILED at 542. > patch: **** malformed patch at line 133: 16 > >I rather suspect another TB fail. Lets try the same on-top-of-Linus and xen-git as attachements... -Stefan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Konrad Rzeszutek Wilk
2013-May-28 16:17 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On Tue, May 28, 2013 at 02:55:22PM +0200, Stefan Bader wrote:> On 28.05.2013 14:47, Konrad Rzeszutek Wilk wrote: > > On Wed, May 15, 2013 at 05:02:35PM +0200, Stefan Bader wrote: > >> Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as > >> that seems closer to the style of the other calls. > > > > Hm, could you rebase this on top of stable/for-jens-3.10 please? > > > > I get this: > > > > patching file drivers/block/xen-blkback/xenbus.c > > Hunk #1 FAILED at 704. > > 1 out of 1 hunk FAILED -- saving rejects to file drivers/block/xen-blkback/xenbus.c.rej > > patching file drivers/block/xen-blkfront.c > > Hunk #1 FAILED at 542. > > patch: **** malformed patch at line 133: 16 > > > > > > I rather suspect another TB fail. Lets try the same on-top-of-Linus and xen-git > as attachements...So better, but stable/for-jens-3.10 has some extra changes. patch -p1 < /tmp/0001-xen-blk-Use-physical-sector-size-for-setup.patch patching file drivers/block/xen-blkback/xenbus.c Hunk #1 succeeded at 782 (offset 78 lines). patching file drivers/block/xen-blkfront.c Hunk #1 FAILED at 542. Hunk #2 succeeded at 629 (offset 65 lines). Hunk #3 succeeded at 736 (offset 68 lines). Hunk #4 FAILED at 736. Hunk #5 succeeded at 1698 (offset 301 lines). Hunk #6 succeeded at 1748 (offset 308 lines). Hunk #7 succeeded at 1811 with fuzz 2 (offset 315 lines). 2 out of 7 hunks FAILED -- saving rejects to file drivers/block/xen-blkfront.c.rej> > -Stefan >> From e1bc2a0027c20679bfc81beb3f2e888b283a44f2 Mon Sep 17 00:00:00 2001 > From: Stefan Bader <stefan.bader@canonical.com> > Date: Mon, 13 May 2013 16:28:15 +0200 > Subject: [PATCH] xen/blk: Use physical sector size for setup > > Currently xen-blkback passes the logical sector size over xenbus and > xen-blkfront sets up the paravirt disk with that logical block size. > But newer drives usually have the logical sector size set to 512 for > compatibility reasons and would show the actual sector size only in > physical sector size. > This results in the device being partitioned and accessed in dom0 with > the correct sector size, but the guest thinks 512 bytes is the correct > block size. And that results in poor performance. > > To fix this, blkback gets modified to pass also physical-sector-size > over xenbus and blkfront to use both values to set up the paravirt > disk. I did not just change the passed in sector-size because I am > not sure having a bigger logical sector size than the physical one > is valid (and that would happen if a newer dom0 kernel hits an older > domU kernel). Also this way a domU set up before should still be > accessible (just some tools might detect the unaligned setup). > > [v2: Make xenbus write failure non-fatal] > [v3: Use xenbus_scanf instead of xenbus_gather] > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > drivers/block/xen-blkback/xenbus.c | 5 +++++ > drivers/block/xen-blkfront.c | 23 +++++++++++++++++++---- > 2 files changed, 24 insertions(+), 4 deletions(-) > > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 8bfd1bc..25463e3 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -704,6 +704,11 @@ again: > dev->nodename); > goto abort; > } > + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > + bdev_physical_block_size(be->blkif->vbd.bdev)); > + if (err) > + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", > + dev->nodename); > > err = xenbus_transaction_end(xbt, 0); > if (err == -EAGAIN) > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index d89ef86..33b5618 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -542,7 +542,8 @@ wait: > flush_requests(info); > } > > -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > + unsigned int physical_sector_size) > { > struct request_queue *rq; > struct blkfront_info *info = gd->private_data; > @@ -564,6 +565,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > > /* Hard sector size and max sectors impersonate the equiv. hardware. */ > blk_queue_logical_block_size(rq, sector_size); > + blk_queue_physical_block_size(rq, physical_sector_size); > blk_queue_max_hw_sectors(rq, 512); > > /* Each segment in a request is up to an aligned page in size. */ > @@ -667,7 +669,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) > > static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > struct blkfront_info *info, > - u16 vdisk_info, u16 sector_size) > + u16 vdisk_info, u16 sector_size, > + unsigned int physical_sector_size) > { > struct gendisk *gd; > int nr_minors = 1; > @@ -734,7 +737,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > gd->driverfs_dev = &(info->xbdev->dev); > set_capacity(gd, capacity); > > - if (xlvbd_init_blk_queue(gd, sector_size)) { > + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { > del_gendisk(gd); > goto release; > } > @@ -1395,6 +1398,7 @@ static void blkfront_connect(struct blkfront_info *info) > { > unsigned long long sectors; > unsigned long sector_size; > + unsigned int physical_sector_size; > unsigned int binfo; > int err; > int barrier, flush, discard, persistent; > @@ -1437,6 +1441,16 @@ static void blkfront_connect(struct blkfront_info *info) > return; > } > > + /* > + * physcial-sector-size is a newer field, so old backends may not > + * provide this. Assume physical sector size to be the same as > + * sector_size in that case. > + */ > + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, > + "physical-sector-size", "%u", &physical_sector_size); > + if (err != 1) > + physical_sector_size = sector_size; > + > info->feature_flush = 0; > info->flush_op = 0; > > @@ -1483,7 +1497,8 @@ static void blkfront_connect(struct blkfront_info *info) > else > info->feature_persistent = persistent; > > - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, > + physical_sector_size); > if (err) { > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > info->xbdev->otherend); > -- > 1.7.9.5 >> From 8d1023ce11b9067346e9794d95b2876d98484f43 Mon Sep 17 00:00:00 2001 > From: Stefan Bader <stefan.bader@canonical.com> > Date: Wed, 22 May 2013 15:11:18 +0200 > Subject: [PATCH] blkif.h: Document the physical-sector-size extension > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > xen/include/public/io/blkif.h | 14 ++++++++++---- > 1 file changed, 10 insertions(+), 4 deletions(-) > > diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h > index 97b423b..f7c3366 100644 > --- a/xen/include/public/io/blkif.h > +++ b/xen/include/public/io/blkif.h > @@ -208,12 +208,17 @@ > * sector-size > * Values: <uint32_t> > * > - * The native sector size, in bytes, of the backend device. > + * The logical sector size, in bytes, of the backend device. > + * > + * physical-sector-size > + * Values: <unsigned int> > + * > + * The physical sector size, in bytes, of the backend device. > * > * sectors > * Values: <uint64_t> > * > - * The size of the backend device, expressed in units of its native > + * The size of the backend device, expressed in units of its logical > * sector size ("sector-size"). > * > ***************************************************************************** > @@ -473,8 +478,9 @@ > * NB. first_sect and last_sect in blkif_request_segment, as well as > * sector_number in blkif_request, are always expressed in 512-byte units. > * However they must be properly aligned to the real sector size of the > - * physical disk, which is reported in the "sector-size" node in the backend > - * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units. > + * physical disk, which is reported in the "physical-sector-size" node in > + * the backend xenbus info. Also the xenbus "sectors" node is expressed in > + * 512-byte units. > */ > struct blkif_request_segment { > grant_ref_t gref; /* reference to I/O buffer frame */ > -- > 1.7.9.5 >
Stefan Bader
2013-May-28 18:03 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On 28.05.2013 18:17, Konrad Rzeszutek Wilk wrote:> On Tue, May 28, 2013 at 02:55:22PM +0200, Stefan Bader wrote: >> On 28.05.2013 14:47, Konrad Rzeszutek Wilk wrote: >>> On Wed, May 15, 2013 at 05:02:35PM +0200, Stefan Bader wrote: >>>> Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as >>>> that seems closer to the style of the other calls. >>> >>> Hm, could you rebase this on top of stable/for-jens-3.10 please? >>> >>> I get this: >>> >>> patching file drivers/block/xen-blkback/xenbus.c >>> Hunk #1 FAILED at 704. >>> 1 out of 1 hunk FAILED -- saving rejects to file drivers/block/xen-blkback/xenbus.c.rej >>> patching file drivers/block/xen-blkfront.c >>> Hunk #1 FAILED at 542. >>> patch: **** malformed patch at line 133: 16 >>> >>> >> >> I rather suspect another TB fail. Lets try the same on-top-of-Linus and xen-git >> as attachements... > > So better, but stable/for-jens-3.10 has some extra changes. > > patch -p1 < /tmp/0001-xen-blk-Use-physical-sector-size-for-setup.patch > patching file drivers/block/xen-blkback/xenbus.c > Hunk #1 succeeded at 782 (offset 78 lines). > patching file drivers/block/xen-blkfront.c > Hunk #1 FAILED at 542. > Hunk #2 succeeded at 629 (offset 65 lines). > Hunk #3 succeeded at 736 (offset 68 lines). > Hunk #4 FAILED at 736. > Hunk #5 succeeded at 1698 (offset 301 lines). > Hunk #6 succeeded at 1748 (offset 308 lines). > Hunk #7 succeeded at 1811 with fuzz 2 (offset 315 lines). > 2 out of 7 hunks FAILED -- saving rejects to file drivers/block/xen-blkfront.c.rej > >> >> -Stefan >> >Here a quickly done rebase. Best needs a compile test before but I did not get to it. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Konrad Rzeszutek Wilk
2013-Jun-05 20:25 UTC
Re: [PATCH v3] xen-blk(front|back): Handle large physical sector disks
On Tue, May 28, 2013 at 08:03:40PM +0200, Stefan Bader wrote:> On 28.05.2013 18:17, Konrad Rzeszutek Wilk wrote: > > On Tue, May 28, 2013 at 02:55:22PM +0200, Stefan Bader wrote: > >> On 28.05.2013 14:47, Konrad Rzeszutek Wilk wrote: > >>> On Wed, May 15, 2013 at 05:02:35PM +0200, Stefan Bader wrote: > >>>> Changed the xenbus_gather but kept the xenbus_dev_error (without being fatal) as > >>>> that seems closer to the style of the other calls. > >>> > >>> Hm, could you rebase this on top of stable/for-jens-3.10 please? > >>> > >>> I get this: > >>> > >>> patching file drivers/block/xen-blkback/xenbus.c > >>> Hunk #1 FAILED at 704. > >>> 1 out of 1 hunk FAILED -- saving rejects to file drivers/block/xen-blkback/xenbus.c.rej > >>> patching file drivers/block/xen-blkfront.c > >>> Hunk #1 FAILED at 542. > >>> patch: **** malformed patch at line 133: 16 > >>> > >>> > >> > >> I rather suspect another TB fail. Lets try the same on-top-of-Linus and xen-git > >> as attachements... > > > > So better, but stable/for-jens-3.10 has some extra changes. > > > > patch -p1 < /tmp/0001-xen-blk-Use-physical-sector-size-for-setup.patch > > patching file drivers/block/xen-blkback/xenbus.c > > Hunk #1 succeeded at 782 (offset 78 lines). > > patching file drivers/block/xen-blkfront.c > > Hunk #1 FAILED at 542. > > Hunk #2 succeeded at 629 (offset 65 lines). > > Hunk #3 succeeded at 736 (offset 68 lines). > > Hunk #4 FAILED at 736. > > Hunk #5 succeeded at 1698 (offset 301 lines). > > Hunk #6 succeeded at 1748 (offset 308 lines). > > Hunk #7 succeeded at 1811 with fuzz 2 (offset 315 lines). > > 2 out of 7 hunks FAILED -- saving rejects to file drivers/block/xen-blkfront.c.rej > > > >> > >> -Stefan > >> > > > Here a quickly done rebase. Best needs a compile test before but I did not get > to it.Applied.> > >> From 4f90805519c9380a9b88853112ed57cc6bd51759 Mon Sep 17 00:00:00 2001 > From: Stefan Bader <stefan.bader@canonical.com> > Date: Mon, 13 May 2013 16:28:15 +0200 > Subject: [PATCH] xen/blk: Use physical sector size for setup > > Currently xen-blkback passes the logical sector size over xenbus and > xen-blkfront sets up the paravirt disk with that logical block size. > But newer drives usually have the logical sector size set to 512 for > compatibility reasons and would show the actual sector size only in > physical sector size. > This results in the device being partitioned and accessed in dom0 with > the correct sector size, but the guest thinks 512 bytes is the correct > block size. And that results in poor performance. > > To fix this, blkback gets modified to pass also physical-sector-size > over xenbus and blkfront to use both values to set up the paravirt > disk. I did not just change the passed in sector-size because I am > not sure having a bigger logical sector size than the physical one > is valid (and that would happen if a newer dom0 kernel hits an older > domU kernel). Also this way a domU set up before should still be > accessible (just some tools might detect the unaligned setup). > > [v2: Make xenbus write failure non-fatal] > [v3: Use xenbus_scanf instead of xenbus_gather] > [v4: Rebased against segment changes] > > Signed-off-by: Stefan Bader <stefan.bader@canonical.com> > --- > drivers/block/xen-blkback/xenbus.c | 5 +++++ > drivers/block/xen-blkfront.c | 21 ++++++++++++++++++--- > 2 files changed, 23 insertions(+), 3 deletions(-) > > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 4a4749c..7b06f94 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -782,6 +782,11 @@ again: > dev->nodename); > goto abort; > } > + err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", > + bdev_physical_block_size(be->blkif->vbd.bdev)); > + if (err) > + xenbus_dev_error(dev, err, "writing %s/physical-sector-size", > + dev->nodename); > > err = xenbus_transaction_end(xbt, 0); > if (err == -EAGAIN) > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index bac8cf3..f1f75c6 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -607,6 +607,7 @@ wait: > } > > static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > + unsigned int physical_sector_size, > unsigned int segments) > { > struct request_queue *rq; > @@ -629,6 +630,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > > /* Hard sector size and max sectors impersonate the equiv. hardware. */ > blk_queue_logical_block_size(rq, sector_size); > + blk_queue_physical_block_size(rq, physical_sector_size); > blk_queue_max_hw_sectors(rq, 512); > > /* Each segment in a request is up to an aligned page in size. */ > @@ -735,7 +737,8 @@ static char *encode_disk_name(char *ptr, unsigned int n) > > static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > struct blkfront_info *info, > - u16 vdisk_info, u16 sector_size) > + u16 vdisk_info, u16 sector_size, > + unsigned int physical_sector_size) > { > struct gendisk *gd; > int nr_minors = 1; > @@ -802,7 +805,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > gd->driverfs_dev = &(info->xbdev->dev); > set_capacity(gd, capacity); > > - if (xlvbd_init_blk_queue(gd, sector_size, > + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, > info->max_indirect_segments ? : > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { > del_gendisk(gd); > @@ -1696,6 +1699,7 @@ static void blkfront_connect(struct blkfront_info *info) > { > unsigned long long sectors; > unsigned long sector_size; > + unsigned int physical_sector_size; > unsigned int binfo; > int err; > int barrier, flush, discard, persistent; > @@ -1745,6 +1749,16 @@ static void blkfront_connect(struct blkfront_info *info) > return; > } > > + /* > + * physcial-sector-size is a newer field, so old backends may not > + * provide this. Assume physical sector size to be the same as > + * sector_size in that case. > + */ > + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, > + "physical-sector-size", "%u", &physical_sector_size); > + if (err != 1) > + physical_sector_size = sector_size; > + > info->feature_flush = 0; > info->flush_op = 0; > > @@ -1798,7 +1812,8 @@ static void blkfront_connect(struct blkfront_info *info) > return; > } > > - err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, > + physical_sector_size); > if (err) { > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > info->xbdev->otherend); > -- > 1.7.9.5 >> _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel