On Thu, Aug 18, 2011 at 6:18 PM, Jan Beulich <JBeulich@novell.com> wrote:> >>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@novell.com> wrote: >> JBeulich@novell.com >> Subject: [PATCH V2 2/3] xen-blkfront: teach blkfront driver handle trim >> request >> Date: Thu, 18 Aug 2011 17:34:30 +0800 >> Message-Id: <1313660071-25230-3-git-send-email-lidongyang@novell.com> >> X-Mailer: git-send-email 1.7.6 >> In-Reply-To: <1313660071-25230-1-git-send-email-lidongyang@novell.com> >> References: <1313660071-25230-1-git-send-email-lidongyang@novell.com> >> >> The blkfront driver now will read feature-trim from xenstore, >> and set up the request queue with trim params, then we can forward the >> discard requests to backend driver. >> >> Signed-off-by: Li Dongyang <lidongyang@novell.com> >> --- >> drivers/block/xen-blkfront.c | 96 ++++++++++++++++++++++++++++++++---------- >> 1 files changed, 73 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c >> index b536a9c..c22fb07 100644 >> --- a/drivers/block/xen-blkfront.c >> +++ b/drivers/block/xen-blkfront.c >> @@ -98,6 +98,9 @@ struct blkfront_info >> unsigned long shadow_free; >> unsigned int feature_flush; >> unsigned int flush_op; >> + unsigned int feature_trim; >> + unsigned int discard_granularity; >> + unsigned int discard_alignment; >> int is_ready; >> }; >> >> @@ -302,29 +305,36 @@ static int blkif_queue_request(struct request *req) >> ring_req->operation = info->flush_op; >> } >> >> - ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); >> - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); >> + if (unlikely(req->cmd_flags & REQ_DISCARD)) { >> + /* id, sector_number and handle are set above. */ >> + ring_req->operation = BLKIF_OP_TRIM; >> + ring_req->nr_segments = 0; >> + ring_req->u.trim.nr_sectors = blk_rq_sectors(req); >> + } else { >> + ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); >> + BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); >> >> - for_each_sg(info->sg, sg, ring_req->nr_segments, i) { >> - buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); >> - fsect = sg->offset >> 9; >> - lsect = fsect + (sg->length >> 9) - 1; >> - /* install a grant reference. */ >> - ref = gnttab_claim_grant_reference(&gref_head); >> - BUG_ON(ref == -ENOSPC); >> + for_each_sg(info->sg, sg, ring_req->nr_segments, i) { >> + buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); >> + fsect = sg->offset >> 9; >> + lsect = fsect + (sg->length >> 9) - 1; >> + /* install a grant reference. */ >> + ref = gnttab_claim_grant_reference(&gref_head); >> + BUG_ON(ref == -ENOSPC); >> >> - gnttab_grant_foreign_access_ref( >> - ref, >> - info->xbdev->otherend_id, >> - buffer_mfn, >> - rq_data_dir(req) ); >> - >> - info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); >> - ring_req->u.rw.seg[i] >> - (struct blkif_request_segment) { >> - .gref = ref, >> - .first_sect = fsect, >> - .last_sect = lsect }; >> + gnttab_grant_foreign_access_ref( >> + ref, >> + info->xbdev->otherend_id, >> + buffer_mfn, >> + rq_data_dir(req)); >> + >> + info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); >> + ring_req->u.rw.seg[i] >> + (struct blkif_request_segment) { >> + .gref = ref, >> + .first_sect = fsect, >> + .last_sect = lsect }; >> + } >> } >> >> info->ring.req_prod_pvt++; >> @@ -399,6 +409,7 @@ wait: >> static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) >> { >> struct request_queue *rq; >> + struct blkfront_info *info = gd->private_data; >> >> rq = blk_init_queue(do_blkif_request, &blkif_io_lock); >> if (rq == NULL) >> @@ -406,6 +417,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 >> sector_size) >> >> queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); >> >> + if (info->feature_trim) { >> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); >> + blk_queue_max_discard_sectors(rq, get_capacity(gd)); >> + rq->limits.discard_granularity = info->discard_granularity; >> + rq->limits.discard_alignment = info->discard_alignment; >> + } >> + >> /* Hard sector size and max sectors impersonate the equiv. hardware. */ >> blk_queue_logical_block_size(rq, sector_size); >> blk_queue_max_hw_sectors(rq, 512); >> @@ -722,6 +740,19 @@ static irqreturn_t blkif_interrupt(int irq, void >> *dev_id) >> >> error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; >> switch (bret->operation) { >> + case BLKIF_OP_TRIM: >> + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { >> + struct request_queue *rq = info->rq; >> + printk(KERN_WARNING "blkfront: %s: trim op failed\n", >> + info->gd->disk_name); >> + error = -EOPNOTSUPP; >> + info->feature_trim = 0; >> + spin_lock_irq(rq->queue_lock); >> + queue_flag_clear(QUEUE_FLAG_DISCARD, rq); >> + spin_unlock_irq(rq->queue_lock); > > Are you sure you want to re-enable interrupts here unconditionally?hm, you''re right, as we called spin_lock_irqsave() above, I think it''s safe for us to just use spin_lock and spin_unlock here.> >> + } >> + __blk_end_request_all(req, error); >> + break; >> case BLKIF_OP_FLUSH_DISKCACHE: >> case BLKIF_OP_WRITE_BARRIER: >> if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { >> @@ -1108,7 +1139,9 @@ static void blkfront_connect(struct blkfront_info >> *info) >> unsigned long sector_size; >> unsigned int binfo; >> int err; >> - int barrier, flush; >> + unsigned int discard_granularity; >> + unsigned int discard_alignment; >> + int barrier, flush, trim; >> >> switch (info->connected) { >> case BLKIF_STATE_CONNECTED: >> @@ -1178,7 +1211,24 @@ static void blkfront_connect(struct blkfront_info >> *info) >> info->feature_flush = REQ_FLUSH; >> info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; >> } >> - >> + >> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, >> + "feature-trim", "%d", &trim, >> + NULL); > > So you switched this to use "trim", but ... > >> + >> + if (!err && trim) { >> + info->feature_trim = 1; >> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, >> + "discard_granularity", "%u", &discard_granularity, >> + "discard_alignment", "%u", &discard_alignment, > > ... you kept these using "discard" - quite inconsistent.the discard_granularity and discard_alignment are taken from struct queue_limits they are needed to setup the queue in guest if we are using a phy device has trim. so I think we can keep the names here.> > Also, I think (but I may be wrong) that dashes are preferred over > underscores in xenstore node names.that could be done in xenstore, I used dashes because they are taken from struct queue_limits> >> + NULL); >> + if (!err) { >> + info->discard_granularity = discard_granularity; >> + info->discard_alignment = discard_alignment; > > I think you should set info->feature_trim only here (and then you can > eliminate the local variables).those discard_granularity and discard_alignment are only meaningful if the backend is a phy device has trim, and the 2 args are read from the queue limits from host. if the backend is a file, we can go with no discard_granularity and discard_alignment because we are about to punch a hole in the image file.> > Jan > >> + } >> + } else >> + info->feature_trim = 0; >> + >> err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); >> if (err) { >> xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2011-Aug-22 09:55 UTC
[Xen-devel] Re: [PATCH V2 2/3] xen-blkfront: teach blkfront driver handle trim request
>>> On 22.08.11 at 11:19, Li Dongyang <jerry87905@gmail.com> wrote: > On Thu, Aug 18, 2011 at 6:18 PM, Jan Beulich <JBeulich@novell.com> wrote: >> >>> On 18.08.11 at 11:35, Li Dongyang <lidongyang@novell.com> wrote: >>> @@ -1178,7 +1211,24 @@ static void blkfront_connect(struct blkfront_info >>> *info) >>> info->feature_flush = REQ_FLUSH; >>> info->flush_op = BLKIF_OP_FLUSH_DISKCACHE; >>> } >>> - >>> + >>> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, >>> + "feature-trim", "%d", &trim, >>> + NULL); >> >> So you switched this to use "trim", but ... >> >>> + >>> + if (!err && trim) { >>> + info->feature_trim = 1; >>> + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, >>> + "discard_granularity", "%u", &discard_granularity, >>> + "discard_alignment", "%u", &discard_alignment, >> >> ... you kept these using "discard" - quite inconsistent. > the discard_granularity and discard_alignment are taken from struct > queue_limits > they are needed to setup the queue in guest if we are using a phy > device has trim. > so I think we can keep the names here.The way Linux names them doesn''t matter for the xenstore interface. I think they should be named so that the connection to the base node''s name is obvious.>> >> Also, I think (but I may be wrong) that dashes are preferred over >> underscores in xenstore node names. > that could be done in xenstore, I used dashes because they are taken > from struct queue_limits >> >>> + NULL); >>> + if (!err) { >>> + info->discard_granularity = discard_granularity; >>> + info->discard_alignment = discard_alignment; >> >> I think you should set info->feature_trim only here (and then you can >> eliminate the local variables). > those discard_granularity and discard_alignment are only meaningful if > the backend is a phy device has trim, > and the 2 args are read from the queue limits from host. if the > backend is a file, we can go with no discard_granularity > and discard_alignment because we are about to punch a hole in the image > file.Then you should update the condition accordingly. Setting info->feature_trim prematurely is just calling for later problems. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel