Konrad Rzeszutek Wilk
2011-Nov-30 18:15 UTC
[PATCH] Xen block improvements for 3.3 [secure-discard flag](v1)
Way back in 3.2 time-frame we worked out some of these patches and how to improve the code. I don''t think I had posted them after we chatted about what to do or how to do it... Either way, posting them here to kick-start the conversation and see if we can improve these patches some more. I''ve been running with these patches for some time now so I am quite confident that I got the 32-bit to 64-bit structure padding worked out. (Ran it with old guests/new guests/32-bit/64-bit/Linux/Windows combination. Around 18 different scenarios). Konrad Rzeszutek Wilk (3): xen/blk[front|back]: Squash blkif_request_rw and blkif_request_discard together xen/blk[front|back]: Enhance discard support with secure erasing support. xen/blkback: Move processing of BLKIF_OP_DISCARD from dispatch_rw_block_io Li Dongyang (1): xen-blkback: convert hole punching to discard request on loop devices drivers/block/xen-blkback/blkback.c | 84 ++++++++++++++-------------------- drivers/block/xen-blkback/common.h | 67 ++++++++++++++++------------ drivers/block/xen-blkback/xenbus.c | 12 +++++ drivers/block/xen-blkfront.c | 77 +++++++++++++++++++++----------- include/xen/interface/io/blkif.h | 40 ++++++++++++++--- 5 files changed, 169 insertions(+), 111 deletions(-)
Konrad Rzeszutek Wilk
2011-Nov-30 18:15 UTC
[PATCH 1/4] xen/blk[front|back]: Squash blkif_request_rw and blkif_request_discard together
In a union type structure to deal with the overlapping attributes in a easier manner. Suggested-by: Ian Campbell <Ian.Campbell@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- drivers/block/xen-blkback/blkback.c | 13 ++++--- drivers/block/xen-blkback/common.h | 64 +++++++++++++++++++--------------- drivers/block/xen-blkfront.c | 44 ++++++++++++----------- include/xen/interface/io/blkif.h | 24 +++++++++---- 4 files changed, 83 insertions(+), 62 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 15ec4db..d7104ab 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -362,7 +362,7 @@ static int xen_blkbk_map(struct blkif_request *req, { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; - int nseg = req->nr_segments; + int nseg = req->u.rw.nr_segments; int ret = 0; /* @@ -449,7 +449,7 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) } else if (err) status = BLKIF_RSP_ERROR; - make_response(blkif, req->id, req->operation, status); + make_response(blkif, req->u.discard.id, req->operation, status); } static void xen_blk_drain_io(struct xen_blkif *blkif) @@ -644,7 +644,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } /* Check that the number of segments is sane. */ - nseg = req->nr_segments; + nseg = req->u.rw.nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_FLUSH && operation != REQ_DISCARD) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { @@ -654,12 +655,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, goto fail_response; } - preq.dev = req->handle; + preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; pending_req->blkif = blkif; - pending_req->id = req->id; + pending_req->id = req->u.rw.id; pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; @@ -784,7 +785,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, xen_blkbk_unmap(pending_req); fail_response: /* Haven''t submitted any bio''s yet. */ - make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ return -EIO; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index dfb1b3a..dbfe7b3 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -60,58 +60,66 @@ struct blkif_common_response { char dummy; }; -/* i386 protocol version */ -#pragma pack(push, 4) - struct blkif_x86_32_request_rw { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; +} __attribute__((__packed__)); struct blkif_x86_32_request_discard { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t _pad1; /* was "handle" for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - uint64_t nr_sectors; -}; + uint64_t nr_sectors; +} __attribute__((__packed__)); struct blkif_x86_32_request { uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t id; /* private guest value, echoed in resp */ union { struct blkif_x86_32_request_rw rw; struct blkif_x86_32_request_discard discard; } u; -}; +} __attribute__((__packed__)); + +/* i386 protocol version */ +#pragma pack(push, 4) struct blkif_x86_32_response { uint64_t id; /* copied from request */ uint8_t operation; /* copied from request */ int16_t status; /* BLKIF_RSP_??? */ }; #pragma pack(pop) - /* x86_64 protocol version */ struct blkif_x86_64_request_rw { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */ + uint64_t id; blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; +} __attribute__((__packed__)); struct blkif_x86_64_request_discard { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t _pad1; /* was "handle" for read/write requests */ + uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */ + uint64_t id; blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - uint64_t nr_sectors; -}; + uint64_t nr_sectors; +} __attribute__((__packed__)); struct blkif_x86_64_request { uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t __attribute__((__aligned__(8))) id; union { struct blkif_x86_64_request_rw rw; struct blkif_x86_64_request_discard discard; } u; -}; +} __attribute__((__packed__)); + struct blkif_x86_64_response { uint64_t __attribute__((__aligned__(8))) id; uint8_t operation; /* copied from request */ @@ -237,18 +245,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; - dst->nr_segments = src->nr_segments; - dst->handle = src->handle; - dst->id = src->id; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.nr_segments = src->u.rw.nr_segments; + dst->u.rw.handle = src->u.rw.handle; + dst->u.rw.id = src->u.rw.id; dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; + if (n > dst->u.rw.nr_segments) + n = dst->u.rw.nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->u.rw.seg[i]; break; @@ -266,18 +274,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; - dst->nr_segments = src->nr_segments; - dst->handle = src->handle; - dst->id = src->id; switch (src->operation) { case BLKIF_OP_READ: case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.nr_segments = src->u.rw.nr_segments; + dst->u.rw.handle = src->u.rw.handle; + dst->u.rw.id = src->u.rw.id; dst->u.rw.sector_number = src->u.rw.sector_number; barrier(); - if (n > dst->nr_segments) - n = dst->nr_segments; + if (n > dst->u.rw.nr_segments) + n = dst->u.rw.nr_segments; for (i = 0; i < n; i++) dst->u.rw.seg[i] = src->u.rw.seg[i]; break; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7b2ec59..2c2c4be 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -135,15 +135,15 @@ static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; BUG_ON(free >= BLK_RING_SIZE); - info->shadow_free = info->shadow[free].req.id; - info->shadow[free].req.id = 0x0fffffee; /* debug */ + info->shadow_free = info->shadow[free].req.u.rw.id; + info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; } static void add_id_to_freelist(struct blkfront_info *info, unsigned long id) { - info->shadow[id].req.id = info->shadow_free; + info->shadow[id].req.u.rw.id = info->shadow_free; info->shadow[id].request = NULL; info->shadow_free = id; } @@ -287,9 +287,9 @@ static int blkif_queue_request(struct request *req) id = get_id_from_freelist(info); info->shadow[id].request = req; - ring_req->id = id; + ring_req->u.rw.id = id; ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); - ring_req->handle = info->handle; + ring_req->u.rw.handle = info->handle; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; @@ -308,13 +308,15 @@ static int blkif_queue_request(struct request *req) if (unlikely(req->cmd_flags & REQ_DISCARD)) { /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; - ring_req->nr_segments = 0; + ring_req->u.discard.nr_segments = 0; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); } else { - ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); - BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, + info->sg); + BUG_ON(ring_req->u.rw.nr_segments > + BLKIF_MAX_SEGMENTS_PER_REQUEST); - for_each_sg(info->sg, sg, ring_req->nr_segments, i) { + for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; @@ -705,7 +707,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) static void blkif_completion(struct blk_shadow *s) { int i; - for (i = 0; i < s->req.nr_segments; i++) + for (i = 0; i < s->req.u.rw.nr_segments; i++) gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); } @@ -763,7 +765,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && - info->shadow[id].req.nr_segments == 0)) { + info->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", info->flush_op == BLKIF_OP_WRITE_BARRIER ? "barrier" : "flush disk cache", @@ -984,8 +986,8 @@ static int blkfront_probe(struct xenbus_device *dev, INIT_WORK(&info->work, blkif_restart_queue); for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + info->shadow[i].req.u.rw.id = i+1; + info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, ''/'')+1, NULL, 0); @@ -1019,9 +1021,9 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.id = i+1; + info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; /* Stage 3: Find pending requests and requeue them. */ for (i = 0; i < BLK_RING_SIZE; i++) { @@ -1034,17 +1036,17 @@ static int blkif_recover(struct blkfront_info *info) *req = copy[i].req; /* We get a new request id, and must reset the shadow state. */ - req->id = get_id_from_freelist(info); - memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); + req->u.rw.id = get_id_from_freelist(info); + memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); /* Rewrite any grant references invalidated by susp/resume. */ - for (j = 0; j < req->nr_segments; j++) + for (j = 0; j < req->u.rw.nr_segments; j++) gnttab_grant_foreign_access_ref( req->u.rw.seg[j].gref, info->xbdev->otherend_id, - pfn_to_mfn(info->shadow[req->id].frame[j]), - rq_data_dir(info->shadow[req->id].request)); - info->shadow[req->id].req = *req; + pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), + rq_data_dir(info->shadow[req->u.rw.id].request)); + info->shadow[req->u.rw.id].req = *req; info->ring.req_prod_pvt++; } diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 9324488..f88e28b 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -95,6 +95,12 @@ typedef uint64_t blkif_sector_t; #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 struct blkif_request_rw { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ +#ifdef CONFIG_X86_64 + uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ +#endif + uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ @@ -102,23 +108,27 @@ struct blkif_request_rw { /* @last_sect: last sector in frame to transfer (inclusive). */ uint8_t first_sect, last_sect; } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -}; +} __attribute__((__packed__)); struct blkif_request_discard { + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t _pad1; /* only for read/write requests */ +#ifdef CONFIG_X86_64 + uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ +#endif + uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number; - uint64_t nr_sectors; -}; + uint64_t nr_sectors; + uint8_t _pad3; +} __attribute__((__packed__)); struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t id; /* private guest value, echoed in resp */ union { struct blkif_request_rw rw; struct blkif_request_discard discard; } u; -}; +} __attribute__((__packed__)); struct blkif_response { uint64_t id; /* copied from request */ -- 1.7.7.3
Konrad Rzeszutek Wilk
2011-Nov-30 18:15 UTC
[PATCH 2/4] xen/blk[front|back]: Enhance discard support with secure erasing support.
Part of the blkdev_issue_discard(xx) operation is that it can also issue a secure discard operation that will permanantly remove the sectors in question. We advertise that we can support that via the ''discard-secure'' attribute and on the request, if the ''secure'' bit is set, we will attempt to pass in REQ_DISCARD | REQ_SECURE. CC: Li Dongyang <lidongyang@novell.com> [v1: Used ''flag'' instead of ''secure:1'' bit] [v2: Use ''reserved'' uint8_t instead of adding a new value] [v3: Check for nseg when mapping instead of operation] Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- drivers/block/xen-blkback/blkback.c | 18 ++++++++++----- drivers/block/xen-blkback/common.h | 7 ++++- drivers/block/xen-blkback/xenbus.c | 12 ++++++++++ drivers/block/xen-blkfront.c | 41 ++++++++++++++++++++++++++-------- include/xen/interface/io/blkif.h | 18 ++++++++++++++- 5 files changed, 77 insertions(+), 19 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d7104ab..9d2261b 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -422,13 +422,16 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; - if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) + if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) { + unsigned long secure = (blkif->vbd.discard_secure && + (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? + BLKDEV_DISCARD_SECURE : 0; /* just forward the discard request */ err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, - GFP_KERNEL, 0); - else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { + GFP_KERNEL, secure); + } else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { /* punch a hole in the backing file */ struct loop_device *lo = bdev->bd_disk->private_data; struct file *file = lo->lo_backing_file; @@ -643,8 +646,11 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, break; } - /* Check that the number of segments is sane. */ - nseg = req->u.rw.nr_segments; + if (unlikely(operation == REQ_DISCARD)) + nseg = 0; + else + /* Check that the number of segments is sane. */ + nseg = req->u.rw.nr_segments; if (unlikely(nseg == 0 && operation != WRITE_FLUSH && operation != REQ_DISCARD) || @@ -708,7 +714,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) + if (nseg && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index dbfe7b3..d0ee7ed 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -69,7 +69,7 @@ struct blkif_x86_32_request_rw { } __attribute__((__packed__)); struct blkif_x86_32_request_discard { - uint8_t nr_segments; /* number of segments */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ blkif_vdev_t _pad1; /* was "handle" for read/write requests */ uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ @@ -104,7 +104,7 @@ struct blkif_x86_64_request_rw { } __attribute__((__packed__)); struct blkif_x86_64_request_discard { - uint8_t nr_segments; /* number of segments */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ blkif_vdev_t _pad1; /* was "handle" for read/write requests */ uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */ uint64_t id; @@ -164,6 +164,7 @@ struct xen_vbd { /* Cached size parameter. */ sector_t size; bool flush_support; + bool discard_secure; }; struct backend_info; @@ -261,6 +262,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; @@ -290,6 +292,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, dst->u.rw.seg[i] = src->u.rw.seg[i]; break; case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index f759ad4..187fd2c 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -338,6 +338,9 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, if (q && q->flush_flags) vbd->flush_support = true; + if (q && blk_queue_secdiscard(q)) + vbd->discard_secure = true; + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); return 0; @@ -420,6 +423,15 @@ int xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) state = 1; blkif->blk_backend_type = BLKIF_BACKEND_PHY; } + /* Optional. */ + err = xenbus_printf(xbt, dev->nodename, + "discard-secure", "%d", + blkif->vbd.discard_secure); + if (err) { + xenbus_dev_fatal(dev, err, + "writting discard-secure"); + goto kfree; + } } } else { err = PTR_ERR(type); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c2c4be..351ddef 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -98,7 +98,8 @@ struct blkfront_info unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; - unsigned int feature_discard; + unsigned int feature_discard:1; + unsigned int feature_secdiscard:1; unsigned int discard_granularity; unsigned int discard_alignment; int is_ready; @@ -305,11 +306,14 @@ static int blkif_queue_request(struct request *req) ring_req->operation = info->flush_op; } - if (unlikely(req->cmd_flags & REQ_DISCARD)) { + if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { /* id, sector_number and handle are set above. */ ring_req->operation = BLKIF_OP_DISCARD; - ring_req->u.discard.nr_segments = 0; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); + if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) + ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; + else + ring_req->u.discard.flag = 0; } else { ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, info->sg); @@ -426,6 +430,8 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; + if (info->feature_secdiscard) + queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ @@ -707,6 +713,8 @@ static void blkif_free(struct blkfront_info *info, int suspend) static void blkif_completion(struct blk_shadow *s) { int i; + /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place + * flag. */ for (i = 0; i < s->req.u.rw.nr_segments; i++) gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); } @@ -738,7 +746,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) id = bret->id; req = info->shadow[id].request; - blkif_completion(&info->shadow[id]); + if (bret->operation != BLKIF_OP_DISCARD) + blkif_completion(&info->shadow[id]); add_id_to_freelist(info, id); @@ -751,7 +760,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) info->gd->disk_name); error = -EOPNOTSUPP; info->feature_discard = 0; + info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); + queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); } __blk_end_request_all(req, error); break; @@ -1039,13 +1050,15 @@ static int blkif_recover(struct blkfront_info *info) req->u.rw.id = get_id_from_freelist(info); memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); + if (req->operation != BLKIF_OP_DISCARD) { /* Rewrite any grant references invalidated by susp/resume. */ - for (j = 0; j < req->u.rw.nr_segments; j++) - gnttab_grant_foreign_access_ref( - req->u.rw.seg[j].gref, - info->xbdev->otherend_id, - pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), - rq_data_dir(info->shadow[req->u.rw.id].request)); + for (j = 0; j < req->u.rw.nr_segments; j++) + gnttab_grant_foreign_access_ref( + req->u.rw.seg[j].gref, + info->xbdev->otherend_id, + pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]), + rq_data_dir(info->shadow[req->u.rw.id].request)); + } info->shadow[req->u.rw.id].req = *req; info->ring.req_prod_pvt++; @@ -1137,11 +1150,13 @@ static void blkfront_setup_discard(struct blkfront_info *info) char *type; unsigned int discard_granularity; unsigned int discard_alignment; + unsigned int discard_secure; type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); if (IS_ERR(type)) return; + info->feature_secdiscard = 0; if (strncmp(type, "phy", 3) == 0) { err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "discard-granularity", "%u", &discard_granularity, @@ -1152,6 +1167,12 @@ static void blkfront_setup_discard(struct blkfront_info *info) info->discard_granularity = discard_granularity; info->discard_alignment = discard_alignment; } + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "discard-secure", "%d", &discard_secure, + NULL); + if (!err) + info->feature_secdiscard = discard_secure; + } else if (strncmp(type, "file", 4) == 0) info->feature_discard = 1; diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index f88e28b..ee338bf 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -84,6 +84,21 @@ typedef uint64_t blkif_sector_t; * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc * http://www.seagate.com/staticfiles/support/disc/manuals/ * Interface%20manuals/100293068c.pdf + * The backend can optionally provide three extra XenBus attributes to + * further optimize the discard functionality: + * ''discard-aligment'' - Devices that support discard functionality may + * internally allocate space in units that are bigger than the exported + * logical block size. The discard-alignment parameter indicates how many bytes + * the beginning of the partition is offset from the internal allocation unit''s + * natural alignment. + * ''discard-granularity'' - Devices that support discard functionality may + * internally allocate space using units that are bigger than the logical block + * size. The discard-granularity parameter indicates the size of the internal + * allocation unit in bytes if reported by the device. Otherwise the + * discard-granularity will be set to match the device''s physical block size. + * ''discard-secure'' - All copies of the discarded sectors (potentially created + * by garbage collection) must also be erased. To use this feature, the flag + * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim. */ #define BLKIF_OP_DISCARD 5 @@ -111,7 +126,8 @@ struct blkif_request_rw { } __attribute__((__packed__)); struct blkif_request_discard { - uint8_t nr_segments; /* number of segments */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ blkif_vdev_t _pad1; /* only for read/write requests */ #ifdef CONFIG_X86_64 uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ -- 1.7.7.3
Konrad Rzeszutek Wilk
2011-Nov-30 18:15 UTC
[PATCH 3/4] xen/blkback: Move processing of BLKIF_OP_DISCARD from dispatch_rw_block_io
. and move it to its own function that will deal with the discard operation. Suggested-by: Jan Beulich <JBeulich@novell.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- drivers/block/xen-blkback/blkback.c | 54 +++++++++++++++------------------- 1 files changed, 24 insertions(+), 30 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9d2261b..b058de7 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -416,12 +416,16 @@ static int xen_blkbk_map(struct blkif_request *req, return ret; } -static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) +static int dispatch_discard_io(struct xen_blkif *blkif, + struct blkif_request *req) { int err = 0; int status = BLKIF_RSP_OKAY; struct block_device *bdev = blkif->vbd.bdev; + blkif->st_ds_req++; + + xen_blkif_get(blkif); if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) { unsigned long secure = (blkif->vbd.discard_secure && (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? @@ -453,6 +457,8 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) status = BLKIF_RSP_ERROR; make_response(blkif, req->u.discard.id, req->operation, status); + xen_blkif_put(blkif); + return err; } static void xen_blk_drain_io(struct xen_blkif *blkif) @@ -576,8 +582,11 @@ __do_block_io_op(struct xen_blkif *blkif) /* Apply all sanity checks to /private copy/ of request. */ barrier(); - - if (dispatch_rw_block_io(blkif, &req, pending_req)) + if (unlikely(req.operation == BLKIF_OP_DISCARD)) { + free_req(pending_req); + if (dispatch_discard_io(blkif, &req)) + break; + } else if (dispatch_rw_block_io(blkif, &req, pending_req)) break; /* Yield point for this unbounded loop. */ @@ -636,24 +645,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_f_req++; operation = WRITE_FLUSH; break; - case BLKIF_OP_DISCARD: - blkif->st_ds_req++; - operation = REQ_DISCARD; - break; default: operation = 0; /* make gcc happy */ goto fail_response; break; } - if (unlikely(operation == REQ_DISCARD)) - nseg = 0; - else - /* Check that the number of segments is sane. */ - nseg = req->u.rw.nr_segments; + /* Check that the number of segments is sane. */ + nseg = req->u.rw.nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_FLUSH && - operation != REQ_DISCARD) || + if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); @@ -714,7 +715,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (nseg && xen_blkbk_map(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* @@ -746,23 +747,16 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); + BUG_ON(operation != WRITE_FLUSH); - if (operation == WRITE_FLUSH) { - bio = bio_alloc(GFP_KERNEL, 0); - if (unlikely(bio == NULL)) - goto fail_put_bio; + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; - biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; - bio->bi_private = pending_req; - bio->bi_end_io = end_block_io_op; - } else if (operation == REQ_DISCARD) { - xen_blk_discard(blkif, req); - xen_blkif_put(blkif); - free_req(pending_req); - return 0; - } + biolist[nbio++] = bio; + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; } /* -- 1.7.7.3
Konrad Rzeszutek Wilk
2011-Nov-30 18:15 UTC
[PATCH 4/4] xen-blkback: convert hole punching to discard request on loop devices
From: Li Dongyang <lidongyang@novell.com> As of dfaa2ef68e80c378e610e3c8c536f1c239e8d3ef, loop devices support discard request now. We could just issue a discard request, and the loop driver will punch the hole for us, so we don''t need to touch the internals of loop device and punch the hole ourselves, Thanks. V0->V1: rebased on devel/for-jens-3.3 Signed-off-by: Li Dongyang <lidongyang@novell.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- drivers/block/xen-blkback/blkback.c | 19 ++----------------- 1 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index b058de7..0088bf6 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -39,9 +39,6 @@ #include <linux/list.h> #include <linux/delay.h> #include <linux/freezer.h> -#include <linux/loop.h> -#include <linux/falloc.h> -#include <linux/fs.h> #include <xen/events.h> #include <xen/page.h> @@ -426,27 +423,15 @@ static int dispatch_discard_io(struct xen_blkif *blkif, blkif->st_ds_req++; xen_blkif_get(blkif); - if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) { + if (blkif->blk_backend_type == BLKIF_BACKEND_PHY || + blkif->blk_backend_type == BLKIF_BACKEND_FILE) { unsigned long secure = (blkif->vbd.discard_secure && (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? BLKDEV_DISCARD_SECURE : 0; - /* just forward the discard request */ err = blkdev_issue_discard(bdev, req->u.discard.sector_number, req->u.discard.nr_sectors, GFP_KERNEL, secure); - } else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { - /* punch a hole in the backing file */ - struct loop_device *lo = bdev->bd_disk->private_data; - struct file *file = lo->lo_backing_file; - - if (file->f_op->fallocate) - err = file->f_op->fallocate(file, - FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, - req->u.discard.sector_number << 9, - req->u.discard.nr_sectors << 9); - else - err = -EOPNOTSUPP; } else err = -EOPNOTSUPP; -- 1.7.7.3