Duan, Ronghui
2012-Aug-16 10:23 UTC
[RFC v1 1/5] VBD: enlarge max segment per request in blkfront
refactoring the blkfront Signed-off-by: Ronghui Duan <ronghui.duan@intel.com> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4e86393..a263faf 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -64,6 +64,12 @@ enum blkif_state { BLKIF_STATE_SUSPENDED, }; +enum blkif_ring_type { + RING_TYPE_UNDEFINED = 0, + RING_TYPE_1 = 1, + RING_TYPE_2 = 2, +}; + struct blk_shadow { struct blkif_request req; struct request *request; @@ -91,12 +97,14 @@ struct blkfront_info enum blkif_state connected; int ring_ref; struct blkif_front_ring ring; - struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct scatterlist *sg; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow *shadow; + struct blk_front_operations *ops; + enum blkif_ring_type ring_type; unsigned long shadow_free; unsigned int feature_flush; unsigned int flush_op; @@ -107,6 +115,36 @@ struct blkfront_info int is_ready; }; +/* interface of blkfront ring operation */ +static struct blk_front_operations { + void *(*ring_get_request) (struct blkfront_info *info); + struct blkif_response *(*ring_get_response) (struct blkfront_info *info); + struct blkif_request_segment *(*ring_get_segment) + (struct blkfront_info *info, int i); + unsigned long (*get_id) (struct blkfront_info *info); + void (*add_id) (struct blkfront_info *info, unsigned long id); + void (*save_seg_shadow) (struct blkfront_info *info, unsigned long mfn, + unsigned long id, int i); + void (*save_req_shadow) (struct blkfront_info *info, + struct request *req, unsigned long id); + struct request *(*get_req_from_shadow)(struct blkfront_info *info, + unsigned long id); + RING_IDX (*get_rsp_prod) (struct blkfront_info *info); + RING_IDX (*get_rsp_cons) (struct blkfront_info *info); + RING_IDX (*get_req_prod_pvt) (struct blkfront_info *info); + void (*check_left_response) (struct blkfront_info *info, int *more_to_do); + void (*update_rsp_event) (struct blkfront_info *info, int i); + void (*update_rsp_cons) (struct blkfront_info *info); + void (*update_req_prod_pvt) (struct blkfront_info *info); + void (*ring_push) (struct blkfront_info *info, int *notify); + int (*recover) (struct blkfront_info *info); + int (*ring_full) (struct blkfront_info *info); + int (*setup_blkring) (struct xenbus_device *dev, struct blkfront_info *info); + void (*free_blkring) (struct blkfront_info *info, int suspend); + void (*blkif_completion) (struct blkfront_info *info, unsigned long id); + unsigned int max_seg; +} blk_front_ops; + static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); @@ -132,7 +170,7 @@ static DEFINE_SPINLOCK(minor_lock); #define DEV_NAME "xvd" /* name in /dev */ -static int get_id_from_freelist(struct blkfront_info *info) +static unsigned long get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; BUG_ON(free >= BLK_RING_SIZE); @@ -141,7 +179,7 @@ static int get_id_from_freelist(struct blkfront_info *info) return free; } -static void add_id_to_freelist(struct blkfront_info *info, +void add_id_to_freelist(struct blkfront_info *info, unsigned long id) { info->shadow[id].req.u.rw.id = info->shadow_free; @@ -251,6 +289,42 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, return 0; } +static int ring_full(struct blkfront_info *info) +{ + return RING_FULL(&info->ring); +} + +void *ring_get_request(struct blkfront_info *info) +{ + return (void *)RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); +} + +struct blkif_request_segment *ring_get_segment(struct blkfront_info *info, int i) +{ + struct blkif_request *ring_req + (struct blkif_request *)info->ops->ring_get_request(info); + return &ring_req->u.rw.seg[i]; +} + +void save_seg_shadow(struct blkfront_info *info, + unsigned long mfn, unsigned long id, int i) +{ + info->shadow[id].frame[i] = mfn_to_pfn(mfn); +} + +void save_req_shadow(struct blkfront_info *info, + struct request *req, unsigned long id) +{ + struct blkif_request *ring_req + (struct blkif_request *)info->ops->ring_get_request(info); + info->shadow[id].req = *ring_req; + info->shadow[id].request = req; +} + +void update_req_prod_pvt(struct blkfront_info *info) +{ + info->ring.req_prod_pvt++; +} /* * Generate a Xen blkfront IO request from a blk layer request. Reads * and writes are handled as expected. @@ -262,6 +336,7 @@ static int blkif_queue_request(struct request *req) struct blkfront_info *info = req->rq_disk->private_data; unsigned long buffer_mfn; struct blkif_request *ring_req; + struct blkif_request_segment *ring_seg; unsigned long id; unsigned int fsect, lsect; int i, ref; @@ -282,9 +357,9 @@ static int blkif_queue_request(struct request *req) } /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); - id = get_id_from_freelist(info); - info->shadow[id].request = req; + ring_req = (struct blkif_request *)info->ops->ring_get_request(info); + id = info->ops->get_id(info); + //info->shadow[id].request = req; ring_req->u.rw.id = id; ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); @@ -315,8 +390,7 @@ static int blkif_queue_request(struct request *req) } else { ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, info->sg); - BUG_ON(ring_req->u.rw.nr_segments > - BLKIF_MAX_SEGMENTS_PER_REQUEST); + BUG_ON(ring_req->u.rw.nr_segments > info->ops->max_seg); for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg))); @@ -332,31 +406,35 @@ static int blkif_queue_request(struct request *req) buffer_mfn, rq_data_dir(req)); - info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); - ring_req->u.rw.seg[i] - (struct blkif_request_segment) { - .gref = ref, - .first_sect = fsect, - .last_sect = lsect }; + ring_seg = info->ops->ring_get_segment(info, i); + *ring_seg =(struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + info->ops->save_seg_shadow(info, buffer_mfn, id, i); } } - info->ring.req_prod_pvt++; - /* Keep a private copy so we can reissue requests when recovering. */ - info->shadow[id].req = *ring_req; + info->ops->save_req_shadow(info, req, id); + + info->ops->update_req_prod_pvt(info); gnttab_free_grant_references(gref_head); return 0; } +void ring_push(struct blkfront_info *info, int *notify) +{ + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, *notify); +} static inline void flush_requests(struct blkfront_info *info) { int notify; - RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); + info->ops->ring_push(info, ¬ify); if (notify) notify_remote_via_irq(info->irq); @@ -379,7 +457,7 @@ static void do_blkif_request(struct request_queue *rq) while ((req = blk_peek_request(rq)) != NULL) { info = req->rq_disk->private_data; - if (RING_FULL(&info->ring)) + if (info->ops->ring_full(info)) goto wait; blk_start_request(req); @@ -434,14 +512,15 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); - blk_queue_max_hw_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); blk_queue_max_segment_size(rq, PAGE_SIZE); /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_segments(rq, info->ops->max_seg); + blk_queue_max_hw_sectors(rq, info->ops->max_seg * PAGE_SIZE + / sector_size); /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); @@ -661,7 +740,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info) static void kick_pending_request_queues(struct blkfront_info *info) { - if (!RING_FULL(&info->ring)) { + if (!ring_full(info)) { /* Re-enable calldowns. */ blk_start_queue(info->rq); /* Kick things off immediately. */ @@ -696,20 +775,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) flush_work_sync(&info->work); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; - info->ring.sring = NULL; - } + info->ops->free_blkring(info, suspend); + if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; } -static void blkif_completion(struct blk_shadow *s) +static void blkif_completion(struct blkfront_info *info, unsigned long id) { + struct blk_shadow *s = &info->shadow[id]; int i; /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place * flag. */ @@ -717,6 +793,39 @@ static void blkif_completion(struct blk_shadow *s) gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL); } +struct blkif_response *ring_get_response(struct blkfront_info *info) +{ + return RING_GET_RESPONSE(&info->ring, info->ring.rsp_cons); +} +RING_IDX get_rsp_prod(struct blkfront_info *info) +{ + return info->ring.sring->rsp_prod; +} +RING_IDX get_rsp_cons(struct blkfront_info *info) +{ + return info->ring.rsp_cons; +} +struct request *get_req_from_shadow(struct blkfront_info *info, + unsigned long id) +{ + return info->shadow[id].request; +} +void update_rsp_cons(struct blkfront_info *info) +{ + info->ring.rsp_cons++; +} +RING_IDX get_req_prod_pvt(struct blkfront_info *info) +{ + return info->ring.req_prod_pvt; +} +void check_left_response(struct blkfront_info *info, int *more_to_do) +{ + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, *more_to_do); +} +void update_rsp_event(struct blkfront_info *info, int i) +{ + info->ring.sring->rsp_event = i + 1; +} static irqreturn_t blkif_interrupt(int irq, void *dev_id) { struct request *req; @@ -734,20 +843,20 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) } again: - rp = info->ring.sring->rsp_prod; + rp = info->ops->get_rsp_prod(info); rmb(); /* Ensure we see queued responses up to ''rp''. */ - for (i = info->ring.rsp_cons; i != rp; i++) { + for (i = info->ops->get_rsp_cons(info); i != rp; i++) { unsigned long id; - bret = RING_GET_RESPONSE(&info->ring, i); + bret = info->ops->ring_get_response(info); id = bret->id; - req = info->shadow[id].request; + req = info->ops->get_req_from_shadow(info, id); if (bret->operation != BLKIF_OP_DISCARD) - blkif_completion(&info->shadow[id]); + info->ops->blkif_completion(info, id); - add_id_to_freelist(info, id); + info->ops->add_id(info, id); error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { @@ -800,17 +909,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) default: BUG(); } + info->ops->update_rsp_cons(info); } - info->ring.rsp_cons = i; - - if (i != info->ring.req_prod_pvt) { + rp = info->ops->get_req_prod_pvt(info); + if (i != rp) { int more_to_do; - RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); + info->ops->check_left_response(info, &more_to_do); if (more_to_do) goto again; } else - info->ring.sring->rsp_event = i + 1; + info->ops->update_rsp_event(info, i); kick_pending_request_queues(info); @@ -819,6 +928,26 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static int init_shadow(struct blkfront_info *info) +{ + unsigned int ring_size; + int i; + if (info->ring_type != RING_TYPE_UNDEFINED) + return 0; + + info->ring_type = RING_TYPE_1; + ring_size = BLK_RING_SIZE; + info->shadow = kzalloc(sizeof(struct blk_shadow) * ring_size, + GFP_KERNEL); + if (!info->shadow) + return -ENOMEM; + + for (i = 0; i < ring_size; i++) + info->shadow[i].req.u.rw.id = i+1; + info->shadow[ring_size - 1].req.u.rw.id = 0x0fffffff; + + return 0; +} static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) @@ -836,8 +965,6 @@ static int setup_blkring(struct xenbus_device *dev, SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { free_page((unsigned long)sring); @@ -846,6 +973,16 @@ static int setup_blkring(struct xenbus_device *dev, } info->ring_ref = err; + info->sg = kzalloc(sizeof(struct scatterlist) * info->ops->max_seg, GFP_KERNEL); + if (!info->sg) { + err = -ENOMEM; + goto fail; + } + sg_init_table(info->sg, info->ops->max_seg); + + err = init_shadow(info); + if (err) + goto fail; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) goto fail; @@ -866,6 +1003,20 @@ fail: return err; } +static void free_blkring(struct blkfront_info *info, int suspend) +{ + if (info->ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref, 0, + (unsigned long)info->ring.sring); + info->ring_ref = GRANT_INVALID_REF; + info->ring.sring = NULL; + } + + kfree(info->sg); + + if (!suspend) + kfree(info->shadow); +} /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, @@ -875,8 +1026,11 @@ static int talk_to_blkback(struct xenbus_device *dev, struct xenbus_transaction xbt; int err; + /* register ring ops */ + info->ops = &blk_front_ops; + /* Create shared ring, alloc event channel. */ - err = setup_blkring(dev, info); + err = info->ops->setup_blkring(dev, info); if (err) goto out; @@ -937,7 +1091,7 @@ again: static int blkfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int err, vdevice, i; + int err, vdevice; struct blkfront_info *info; /* FIXME: Use dynamic device id if this is not set. */ @@ -995,10 +1149,6 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.u.rw.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; - /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, ''/'')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); @@ -1022,14 +1172,14 @@ static int blkif_recover(struct blkfront_info *info) int j; /* Stage 1: Make a safe copy of the shadow state. */ - copy = kmalloc(sizeof(info->shadow), + copy = kmalloc(sizeof(struct blk_shadow) * BLK_RING_SIZE, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH); if (!copy) return -ENOMEM; - memcpy(copy, info->shadow, sizeof(info->shadow)); + memcpy(copy, info->shadow, sizeof(struct blk_shadow) * BLK_RING_SIZE); /* Stage 2: Set up free list. */ - memset(&info->shadow, 0, sizeof(info->shadow)); + memset(info->shadow, 0, sizeof(struct blk_shadow) * BLK_RING_SIZE); for (i = 0; i < BLK_RING_SIZE; i++) info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; @@ -1042,11 +1192,11 @@ static int blkif_recover(struct blkfront_info *info) continue; /* Grab a request slot and copy shadow state into it. */ - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + req = (struct blkif_request *)info->ops->ring_get_request(info); *req = copy[i].req; /* We get a new request id, and must reset the shadow state. */ - req->u.rw.id = get_id_from_freelist(info); + req->u.rw.id = info->ops->get_id(info); memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); if (req->operation != BLKIF_OP_DISCARD) { @@ -1100,7 +1250,7 @@ static int blkfront_resume(struct xenbus_device *dev) err = talk_to_blkback(dev, info); if (info->connected == BLKIF_STATE_SUSPENDED && !err) - err = blkif_recover(info); + err = info->ops->recover(info); return err; } @@ -1280,7 +1430,6 @@ static void blkfront_connect(struct blkfront_info *info) info->connected = BLKIF_STATE_CONNECTED; kick_pending_request_queues(info); spin_unlock_irq(&info->io_lock); - add_disk(info->gd); info->is_ready = 1; @@ -1444,6 +1593,31 @@ out: return 0; } +static struct blk_front_operations blk_front_ops = { + .ring_get_request = ring_get_request, + .ring_get_response = ring_get_response, + .ring_get_segment = ring_get_segment, + .get_id = get_id_from_freelist, + .add_id = add_id_to_freelist, + .save_seg_shadow = save_seg_shadow, + .save_req_shadow = save_req_shadow, + .get_req_from_shadow = get_req_from_shadow, + .get_rsp_prod = get_rsp_prod, + .get_rsp_cons = get_rsp_cons, + .get_req_prod_pvt = get_req_prod_pvt, + .check_left_response = check_left_response, + .update_rsp_event = update_rsp_event, + .update_rsp_cons = update_rsp_cons, + .update_req_prod_pvt = update_req_prod_pvt, + .ring_push = ring_push, + .recover = blkif_recover, + .ring_full = ring_full, + .setup_blkring = setup_blkring, + .free_blkring = free_blkring, + .blkif_completion = blkif_completion, + .max_seg = BLKIF_MAX_SEGMENTS_PER_REQUEST, +}; + static const struct block_device_operations xlvbd_block_fops { .owner = THIS_MODULE, -ronghui _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel