Please pull upstream/xen/dom0/backend/blktap2 at git://xenbits.xensource.com/people/dstodden/linux.git Summary: - Move blktap to drivers/block - Takes a .config update setting BLK_DEV_TAP - Redo the ABI definitions, in linux/blktap.h - Some prerequisites for new commands, such as cache flushing and trim support. Plus more device configuration driven from userspace, such as setting physical sector sizes. - a new CREATE_DEVICE ioctls to flag extensions, like the above mentioned ones. - Bugfix from Dominic Curran (Fix reference to freed struct request). Thanks, Daniel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 01/10] blktap: Add include/linux/blktap.h
Moves blktap2 definitions into a common header file. Includes xen/interface/io/ring.h and new ring definitions. Makes blktap build independently from xen-devel headers. New blktap_ring structs are fully congrent to blkif rings, for binary compat. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/xen/blktap/blktap.h | 66 ++++---------------------------- drivers/xen/blktap/control.c | 14 +++--- drivers/xen/blktap/device.c | 12 +++--- drivers/xen/blktap/request.c | 8 ++-- drivers/xen/blktap/ring.c | 51 ++++++++++++++----------- drivers/xen/blktap/sysfs.c | 6 +- include/linux/blktap.h | 85 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 142 insertions(+), 100 deletions(-) create mode 100644 include/linux/blktap.h diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h index fe63fc9..1318cad 100644 --- a/drivers/xen/blktap/blktap.h +++ b/drivers/xen/blktap/blktap.h @@ -6,7 +6,7 @@ #include <linux/cdev.h> #include <linux/init.h> #include <linux/scatterlist.h> -#include <xen/blkif.h> +#include <linux/blktap.h> extern int blktap_debug_level; extern int blktap_ring_major; @@ -30,74 +30,26 @@ extern int blktap_device_major; #define BLKTAP_DEVICE_CLOSED 5 #define BLKTAP_SHUTDOWN_REQUESTED 8 -/* blktap IOCTLs: */ -#define BLKTAP2_IOCTL_KICK_FE 1 -#define BLKTAP2_IOCTL_ALLOC_TAP 200 -#define BLKTAP2_IOCTL_FREE_TAP 201 -#define BLKTAP2_IOCTL_CREATE_DEVICE 202 -#define BLKTAP2_IOCTL_REMOVE_DEVICE 207 - -#define BLKTAP2_MAX_MESSAGE_LEN 256 - -#define BLKTAP2_RING_MESSAGE_CLOSE 3 - #define BLKTAP_REQUEST_FREE 0 #define BLKTAP_REQUEST_PENDING 1 -/* - * The maximum number of requests that can be outstanding at any time - * is determined by - * - * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] - * - * where mmap_alloc < MAX_DYNAMIC_MEM. - * - * TODO: - * mmap_alloc is initialised to 2 and should be adjustable on the fly via - * sysfs. - */ -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) -#define MAX_DYNAMIC_MEM BLK_RING_SIZE -#define MAX_PENDING_REQS BLK_RING_SIZE -#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) -#define MMAP_VADDR(_start, _req, _seg) \ - (_start + \ - ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * PAGE_SIZE)) - -struct grant_handle_pair { - grant_handle_t kernel; - grant_handle_t user; -}; -#define INVALID_GRANT_HANDLE 0xFFFF - -struct blktap_handle { - unsigned int ring; - unsigned int device; - unsigned int minor; -}; - -struct blktap_params { - char name[BLKTAP2_MAX_MESSAGE_LEN]; - unsigned long long capacity; - unsigned long sector_size; -}; - struct blktap_device { spinlock_t lock; struct gendisk *gd; }; +struct blktap_request; + struct blktap_ring { struct task_struct *task; struct vm_area_struct *vma; - struct blkif_front_ring ring; + blktap_front_ring_t ring; unsigned long ring_vstart; unsigned long user_vstart; int n_pending; - struct blktap_request *pending[MAX_PENDING_REQS]; + struct blktap_request *pending[BLKTAP_RING_SIZE]; wait_queue_head_t poll_wait; @@ -128,8 +80,8 @@ struct blktap_request { int operation; struct timeval time; - struct scatterlist sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; + struct page *pages[BLKTAP_SEGMENT_MAX]; int nr_pages; }; @@ -148,7 +100,7 @@ struct blktap { wait_queue_head_t remove_wait; struct work_struct remove_work; - char name[BLKTAP2_MAX_MESSAGE_LEN]; + char name[BLKTAP_NAME_MAX]; struct blktap_statistics stats; }; @@ -189,7 +141,7 @@ void blktap_sysfs_destroy(struct blktap *); int blktap_device_init(void); void blktap_device_exit(void); size_t blktap_device_debug(struct blktap *, char *, size_t); -int blktap_device_create(struct blktap *, struct blktap_params *); +int blktap_device_create(struct blktap *, struct blktap_device_info *); int blktap_device_destroy(struct blktap *); void blktap_device_destroy_sync(struct blktap *); void blktap_device_run_queue(struct blktap *); diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c index f339bba..57b1a10 100644 --- a/drivers/xen/blktap/control.c +++ b/drivers/xen/blktap/control.c @@ -127,19 +127,19 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, struct blktap *tap; switch (cmd) { - case BLKTAP2_IOCTL_ALLOC_TAP: { - struct blktap_handle h; + case BLKTAP_IOCTL_ALLOC_TAP: { + struct blktap_info info; void __user *ptr = (void __user*)arg; tap = blktap_control_create_tap(); if (!tap) return -ENOMEM; - h.ring = blktap_ring_major; - h.device = blktap_device_major; - h.minor = tap->minor; + info.ring_major = blktap_ring_major; + info.bdev_major = blktap_device_major; + info.ring_minor = tap->minor; - if (copy_to_user(ptr, &h, sizeof(h))) { + if (copy_to_user(ptr, &info, sizeof(info))) { blktap_control_destroy_tap(tap); return -EFAULT; } @@ -147,7 +147,7 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, return 0; } - case BLKTAP2_IOCTL_FREE_TAP: { + case BLKTAP_IOCTL_FREE_TAP: { int minor = arg; if (minor > MAX_BLKTAP_DEVICE) diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c index fce2769..6bb04bd 100644 --- a/drivers/xen/blktap/device.c +++ b/drivers/xen/blktap/device.c @@ -186,7 +186,7 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) write ? ''w'' : ''r'', blk_rq_bytes(rq), nsegs); request->rq = rq; - request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; + request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; err = blktap_request_get_pages(tap, request, nsegs); if (err) @@ -276,7 +276,7 @@ blktap_device_do_request(struct request_queue *rq) static void blktap_device_configure(struct blktap *tap, - struct blktap_params *params) + struct blktap_device_info *params) { struct request_queue *rq; struct blktap_device *dev = &tap->device; @@ -297,8 +297,8 @@ blktap_device_configure(struct blktap *tap, blk_queue_max_segment_size(rq, PAGE_SIZE); /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); + blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); @@ -311,7 +311,7 @@ blktap_device_configure(struct blktap *tap, static int blktap_device_validate_params(struct blktap *tap, - struct blktap_params *params) + struct blktap_device_info *params) { struct device *dev = tap->ring.dev; int sector_order, name_sz; @@ -425,7 +425,7 @@ blktap_device_destroy_sync(struct blktap *tap) } int -blktap_device_create(struct blktap *tap, struct blktap_params *params) +blktap_device_create(struct blktap *tap, struct blktap_device_info *params) { int minor, err; struct gendisk *gd; diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c index 9bef48c..8cfd6c9 100644 --- a/drivers/xen/blktap/request.c +++ b/drivers/xen/blktap/request.c @@ -7,18 +7,18 @@ #include "blktap.h" /* max pages per shared pool. just to prevent accidental dos. */ -#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST) +#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) /* default page pool size. when considering to shrink a shared pool, * note that paused tapdisks may grab a whole lot of pages for a long * time. */ -#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES) +#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) /* max number of pages allocatable per request. */ -#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST +#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX /* min request structs per pool. These grow dynamically. */ -#define POOL_MIN_REQS BLK_RING_SIZE +#define POOL_MIN_REQS BLKTAP_RING_SIZE static struct kset *pool_set; diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c index 6b86be5..9442a64 100644 --- a/drivers/xen/blktap/ring.c +++ b/drivers/xen/blktap/ring.c @@ -18,7 +18,7 @@ static struct cdev blktap_ring_cdev; static void blktap_ring_read_response(struct blktap *tap, - const struct blkif_response *rsp) + const blktap_ring_rsp_t *rsp) { struct blktap_ring *ring = &tap->ring; struct blktap_request *request; @@ -27,7 +27,7 @@ blktap_ring_read_response(struct blktap *tap, request = NULL; usr_idx = rsp->id; - if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) { + if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { err = -ERANGE; goto invalid; } @@ -48,7 +48,7 @@ blktap_ring_read_response(struct blktap *tap, "request %d [%p] response: %d\n", request->usr_idx, request, rsp->status); - err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO; + err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; end_request: blktap_device_end_request(tap, request, err); return; @@ -67,7 +67,7 @@ static void blktap_read_ring(struct blktap *tap) { struct blktap_ring *ring = &tap->ring; - struct blkif_response rsp; + blktap_ring_rsp_t rsp; RING_IDX rc, rp; down_read(¤t->mm->mmap_sem); @@ -90,6 +90,11 @@ blktap_read_ring(struct blktap *tap) up_read(¤t->mm->mmap_sem); } +#define MMAP_VADDR(_start, _req, _seg) \ + ((_start) + \ + ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ + ((_seg) * BLKTAP_PAGE_SIZE)) + static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { return VM_FAULT_SIGBUS; @@ -102,7 +107,7 @@ blktap_ring_fail_pending(struct blktap *tap) struct blktap_request *request; int usr_idx; - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { request = ring->pending[usr_idx]; if (!request) continue; @@ -154,7 +159,7 @@ blktap_ring_map_request(struct blktap *tap, int seg, err = 0; int write; - write = request->operation == BLKIF_OP_WRITE; + write = request->operation == BLKTAP_OP_WRITE; for (seg = 0; seg < request->nr_pages; seg++) { if (write) @@ -182,7 +187,7 @@ blktap_ring_unmap_request(struct blktap *tap, uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); size = request->nr_pages << PAGE_SHIFT; - read = request->operation == BLKIF_OP_READ; + read = request->operation == BLKTAP_OP_READ; if (read) for (seg = 0; seg < request->nr_pages; seg++) @@ -217,11 +222,11 @@ blktap_ring_make_request(struct blktap *tap) if (!request) return ERR_PTR(-ENOMEM); - for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++) + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) if (!ring->pending[usr_idx]) break; - BUG_ON(usr_idx >= BLK_RING_SIZE); + BUG_ON(usr_idx >= BLKTAP_RING_SIZE); request->tap = tap; request->usr_idx = usr_idx; @@ -237,7 +242,7 @@ blktap_ring_submit_request(struct blktap *tap, struct blktap_request *request) { struct blktap_ring *ring = &tap->ring; - struct blkif_request *breq; + blktap_ring_req_t *breq; struct scatterlist *sg; int i, nsecs = 0; @@ -248,12 +253,12 @@ blktap_ring_submit_request(struct blktap *tap, breq->id = request->usr_idx; breq->sector_number = blk_rq_pos(request->rq); - breq->handle = 0; + breq->__pad = 0; breq->operation = request->operation; breq->nr_segments = request->nr_pages; blktap_for_each_sg(sg, request, i) { - struct blkif_request_segment *seg = &breq->seg[i]; + struct blktap_segment *seg = &breq->seg[i]; int first, count; count = sg->length >> 9; @@ -270,12 +275,12 @@ blktap_ring_submit_request(struct blktap *tap, do_gettimeofday(&request->time); - if (request->operation == BLKIF_OP_WRITE) { + if (request->operation == BLKTAP_OP_WRITE) { tap->stats.st_wr_sect += nsecs; tap->stats.st_wr_req++; } - if (request->operation == BLKIF_OP_READ) { + if (request->operation == BLKTAP_OP_READ) { tap->stats.st_rd_sect += nsecs; tap->stats.st_rd_req++; } @@ -327,7 +332,7 @@ blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) { struct blktap *tap = filp->private_data; struct blktap_ring *ring = &tap->ring; - struct blkif_sring *sring; + blktap_sring_t *sring; struct page *page = NULL; int err; @@ -384,25 +389,25 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, return -EACCES; switch(cmd) { - case BLKTAP2_IOCTL_KICK_FE: + case BLKTAP_IOCTL_RESPOND: blktap_read_ring(tap); return 0; - case BLKTAP2_IOCTL_CREATE_DEVICE: { - struct blktap_params params; + case BLKTAP_IOCTL_CREATE_DEVICE: { + struct blktap_device_info info; void __user *ptr = (void *)arg; if (!arg) return -EINVAL; - if (copy_from_user(¶ms, ptr, sizeof(params))) + if (copy_from_user(&info, ptr, sizeof(info))) return -EFAULT; - return blktap_device_create(tap, ¶ms); + return blktap_device_create(tap, &info); } - case BLKTAP2_IOCTL_REMOVE_DEVICE: + case BLKTAP_IOCTL_REMOVE_DEVICE: return blktap_device_destroy(tap); } @@ -482,7 +487,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) s += snprintf(s, end - s, "begin pending:%d\n", ring->n_pending); - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { struct blktap_request *request; struct timeval *time; int write; @@ -491,7 +496,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) if (!request) continue; - write = request->operation == BLKIF_OP_WRITE; + write = request->operation == BLKTAP_OP_WRITE; time = &request->time; s += snprintf(s, end - s, diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c index 7bbfea8..182de9a 100644 --- a/drivers/xen/blktap/sysfs.c +++ b/drivers/xen/blktap/sysfs.c @@ -20,7 +20,7 @@ blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const c if (!tap) return 0; - if (size >= BLKTAP2_MAX_MESSAGE_LEN) + if (size >= BLKTAP_NAME_MAX) return -ENAMETOOLONG; if (strnlen(buf, size) != size) @@ -75,8 +75,8 @@ blktap_sysfs_remove_device(struct device *dev, goto wait; if (tap->ring.vma) { - blkif_sring_t *sring = tap->ring.ring.sring; - sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE; + blktap_sring_t *sring = tap->ring.ring.sring; + sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; blktap_ring_kick_user(tap); } else { INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); diff --git a/include/linux/blktap.h b/include/linux/blktap.h new file mode 100644 index 0000000..ec33429 --- /dev/null +++ b/include/linux/blktap.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2011, XenSource Inc. + * All rights reserved. + */ + +#ifndef _LINUX_BLKTAP_H +#define _LINUX_BLKTAP_H + +/* + * Control + */ + +#define BLKTAP_IOCTL_RESPOND 1 +#define BLKTAP_IOCTL_ALLOC_TAP 200 +#define BLKTAP_IOCTL_FREE_TAP 201 +#define BLKTAP_IOCTL_CREATE_DEVICE 202 +#define BLKTAP_IOCTL_REMOVE_DEVICE 207 + +#define BLKTAP_NAME_MAX 256 + +struct blktap_info { + unsigned int ring_major; + unsigned int bdev_major; + unsigned int ring_minor; +}; + +struct blktap_device_info { + char name[BLKTAP_NAME_MAX]; + unsigned long long capacity; + unsigned long sector_size; +}; + +/* + * I/O ring + */ + +#ifdef __KERNEL__ +#define BLKTAP_PAGE_SIZE PAGE_SIZE +#endif + +#include <xen/interface/io/ring.h> + +typedef struct blktap_ring_request blktap_ring_req_t; +typedef struct blktap_ring_response blktap_ring_rsp_t; + +struct blktap_segment { + uint32_t __pad; + uint8_t first_sect; + uint8_t last_sect; +}; + +#define BLKTAP_OP_READ 0 +#define BLKTAP_OP_WRITE 1 + +#define BLKTAP_SEGMENT_MAX 11 + +struct blktap_ring_request { + uint8_t operation; + uint8_t nr_segments; + uint16_t __pad; + uint64_t id; + uint64_t sector_number; + struct blktap_segment seg[BLKTAP_SEGMENT_MAX]; +}; + +#define BLKTAP_RSP_EOPNOTSUPP -2 +#define BLKTAP_RSP_ERROR -1 +#define BLKTAP_RSP_OKAY 0 + +struct blktap_ring_response { + uint64_t id; + uint8_t operation; + int16_t status; +}; + +DEFINE_RING_TYPES(blktap, struct blktap_ring_request, struct blktap_ring_response); +#define BLKTAP_RING_SIZE __CONST_RING_SIZE(blktap, BLKTAP_PAGE_SIZE) + +/* + * Ring messages (DEPRECATED) + */ + +#define BLKTAP_RING_MESSAGE_CLOSE 3 + +#endif /* _LINUX_BLKTAP_H */ -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 02/10] blktap: Upgrade CREATE_DEVICE ioctl.
Alternative for the blktap2_params-compatible call. * drops the image name parameter. * flags: adds support for disk R/O-mode, and later extensions. Bumps up the relevant ioctl nr, for new tapdisk sources. The old slot remains supported through translation. Note that set_disk_ro might sleep, so just drop a gratuitous spinlock. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/xen/blktap/device.c | 55 ++++++++++++++++-------------------------- drivers/xen/blktap/ring.c | 48 ++++++++++++++++++++++++++++++++++--- include/linux/blktap.h | 18 ++++++++++---- 3 files changed, 78 insertions(+), 43 deletions(-) diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c index 6bb04bd..9a09457 100644 --- a/drivers/xen/blktap/device.c +++ b/drivers/xen/blktap/device.c @@ -276,20 +276,17 @@ blktap_device_do_request(struct request_queue *rq) static void blktap_device_configure(struct blktap *tap, - struct blktap_device_info *params) + struct blktap_device_info *info) { - struct request_queue *rq; - struct blktap_device *dev = &tap->device; - - dev = &tap->device; - rq = dev->gd->queue; - - spin_lock_irq(&dev->lock); + struct blktap_device *tapdev = &tap->device; + struct gendisk *gd = tapdev->gd; + struct request_queue *rq = gd->queue; - set_capacity(dev->gd, params->capacity); + set_capacity(gd, info->capacity); + set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO)); /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, params->sector_size); + blk_queue_logical_block_size(rq, info->sector_size); blk_queue_max_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ @@ -305,38 +302,30 @@ blktap_device_configure(struct blktap *tap, /* We are reordering, but cacheless. */ blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); - - spin_unlock_irq(&dev->lock); } static int -blktap_device_validate_params(struct blktap *tap, - struct blktap_device_info *params) +blktap_device_validate_info(struct blktap *tap, + struct blktap_device_info *info) { struct device *dev = tap->ring.dev; - int sector_order, name_sz; - - sector_order = ffs(params->sector_size) - 1; + int sector_order; + sector_order = ffs(info->sector_size) - 1; if (sector_order < 9 || sector_order > 12 || - params->sector_size != 1U<<sector_order) - goto fail; - - if (!params->capacity || - (params->capacity > ULLONG_MAX >> sector_order)) + info->sector_size != 1U<<sector_order) goto fail; - name_sz = min(sizeof(params->name), sizeof(tap->name)); - if (strnlen(params->name, name_sz) >= name_sz) + if (!info->capacity || + (info->capacity > ULLONG_MAX >> sector_order)) goto fail; return 0; fail: - params->name[name_sz-1] = 0; - dev_err(dev, "capacity: %llu, sector-size: %lu, name: %s\n", - params->capacity, params->sector_size, params->name); + dev_err(dev, "capacity: %llu, sector-size: %u\n", + info->capacity, info->sector_size); return -EINVAL; } @@ -425,7 +414,7 @@ blktap_device_destroy_sync(struct blktap *tap) } int -blktap_device_create(struct blktap *tap, struct blktap_device_info *params) +blktap_device_create(struct blktap *tap, struct blktap_device_info *info) { int minor, err; struct gendisk *gd; @@ -440,7 +429,7 @@ blktap_device_create(struct blktap *tap, struct blktap_device_info *params) if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) return -EEXIST; - if (blktap_device_validate_params(tap, params)) + if (blktap_device_validate_info(tap, info)) return -EINVAL; gd = alloc_disk(1); @@ -479,16 +468,14 @@ blktap_device_create(struct blktap *tap, struct blktap_device_info *params) rq->queuedata = tapdev; tapdev->gd = gd; - blktap_device_configure(tap, params); + blktap_device_configure(tap, info); add_disk(gd); - if (params->name[0]) - strncpy(tap->name, params->name, sizeof(tap->name)-1); - set_bit(BLKTAP_DEVICE, &tap->dev_inuse); - dev_info(disk_to_dev(gd), "sector-size: %u capacity: %llu\n", + dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n", queue_logical_block_size(rq), + queue_physical_block_size(rq), (unsigned long long)get_capacity(gd)); return 0; diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c index 9442a64..635f1fd 100644 --- a/drivers/xen/blktap/ring.c +++ b/drivers/xen/blktap/ring.c @@ -16,6 +16,10 @@ static struct cdev blktap_ring_cdev; */ #define RING_PAGES 1 +#define BLKTAP_INFO_SIZE_AT(_memb) \ + offsetof(struct blktap_device_info, _memb) + \ + sizeof(((struct blktap_device_info*)0)->_memb) + static void blktap_ring_read_response(struct blktap *tap, const blktap_ring_rsp_t *rsp) @@ -382,6 +386,8 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, { struct blktap *tap = filp->private_data; struct blktap_ring *ring = &tap->ring; + void __user *ptr = (void *)arg; + int err; BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg); @@ -394,14 +400,48 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, blktap_read_ring(tap); return 0; + case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: { + struct blktap_device_info info; + struct blktap2_params params; + + if (copy_from_user(¶ms, ptr, sizeof(params))) + return -EFAULT; + + info.capacity = params.capacity; + info.sector_size = params.sector_size; + info.flags = 0; + + err = blktap_device_create(tap, &info); + if (err) + return err; + + if (params.name[0]) { + strncpy(tap->name, params.name, sizeof(params.name)); + tap->name[sizeof(tap->name)-1] = 0; + } + + return 0; + } + case BLKTAP_IOCTL_CREATE_DEVICE: { + struct blktap_device_info __user *ptr = (void *)arg; struct blktap_device_info info; - void __user *ptr = (void *)arg; + unsigned long mask; + size_t base_sz, sz; + + mask = BLKTAP_DEVICE_FLAG_RO; + + memset(&info, 0, sizeof(info)); + sz = base_sz = BLKTAP_INFO_SIZE_AT(flags); + + if (copy_from_user(&info, ptr, sz)) + return -EFAULT; - if (!arg) - return -EINVAL; + if (sz > base_sz) + if (copy_from_user(&info, ptr, sz)) + return -EFAULT; - if (copy_from_user(&info, ptr, sizeof(info))) + if (put_user(info.flags & mask, &ptr->flags)) return -EFAULT; return blktap_device_create(tap, &info); diff --git a/include/linux/blktap.h b/include/linux/blktap.h index ec33429..2c3c924 100644 --- a/include/linux/blktap.h +++ b/include/linux/blktap.h @@ -13,10 +13,10 @@ #define BLKTAP_IOCTL_RESPOND 1 #define BLKTAP_IOCTL_ALLOC_TAP 200 #define BLKTAP_IOCTL_FREE_TAP 201 -#define BLKTAP_IOCTL_CREATE_DEVICE 202 +#define BLKTAP_IOCTL_CREATE_DEVICE 208 #define BLKTAP_IOCTL_REMOVE_DEVICE 207 -#define BLKTAP_NAME_MAX 256 +#define BLKTAP_DEVICE_FLAG_RO 0x00000001UL /* disk is R/O */ struct blktap_info { unsigned int ring_major; @@ -25,9 +25,9 @@ struct blktap_info { }; struct blktap_device_info { - char name[BLKTAP_NAME_MAX]; unsigned long long capacity; - unsigned long sector_size; + unsigned int sector_size; + unsigned long flags; }; /* @@ -77,9 +77,17 @@ DEFINE_RING_TYPES(blktap, struct blktap_ring_request, struct blktap_ring_respons #define BLKTAP_RING_SIZE __CONST_RING_SIZE(blktap, BLKTAP_PAGE_SIZE) /* - * Ring messages (DEPRECATED) + * Ring messages + old ioctls (DEPRECATED) */ #define BLKTAP_RING_MESSAGE_CLOSE 3 +#define BLKTAP_IOCTL_CREATE_DEVICE_COMPAT 202 +#define BLKTAP_NAME_MAX 256 + +struct blktap2_params { + char name[BLKTAP_NAME_MAX]; + unsigned long long capacity; + unsigned long sector_size; +}; #endif /* _LINUX_BLKTAP_H */ -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
From: Daniel Stodden <dns@somacoma.net> Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/Kconfig | 9 + drivers/block/Makefile | 1 + drivers/block/blktap/Makefile | 3 + drivers/block/blktap/blktap.h | 161 +++++++++++ drivers/block/blktap/control.c | 315 +++++++++++++++++++++ drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++ drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++ drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++ drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++ drivers/xen/Kconfig | 11 - drivers/xen/Makefile | 1 - drivers/xen/blktap/Makefile | 3 - drivers/xen/blktap/blktap.h | 161 ----------- drivers/xen/blktap/control.c | 315 --------------------- drivers/xen/blktap/device.c | 551 ------------------------------------- drivers/xen/blktap/request.c | 418 ---------------------------- drivers/xen/blktap/ring.c | 595 ---------------------------------------- drivers/xen/blktap/sysfs.c | 288 ------------------- 18 files changed, 2341 insertions(+), 2343 deletions(-) create mode 100644 drivers/block/blktap/Makefile create mode 100644 drivers/block/blktap/blktap.h create mode 100644 drivers/block/blktap/control.c create mode 100644 drivers/block/blktap/device.c create mode 100644 drivers/block/blktap/request.c create mode 100644 drivers/block/blktap/ring.c create mode 100644 drivers/block/blktap/sysfs.c delete mode 100644 drivers/xen/blktap/Makefile delete mode 100644 drivers/xen/blktap/blktap.h delete mode 100644 drivers/xen/blktap/control.c delete mode 100644 drivers/xen/blktap/device.c delete mode 100644 drivers/xen/blktap/request.c delete mode 100644 drivers/xen/blktap/ring.c delete mode 100644 drivers/xen/blktap/sysfs.c diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index bea8ae7..c4a55a3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -471,4 +471,13 @@ config BLK_DEV_HD If unsure, say N. +config BLK_DEV_TAP + tristate "Blktap userspace devices" + help + The block tap driver allows block device requests to be + redirected to processes, through a device interface. + Doing so allows user-space development of high-performance + block storage backends, where disk images may be implemented + as files, in memory, or on other hosts across the network. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7755a5e..8389917 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o +obj-$(CONFIG_BLK_DEV_TAP) += blktap/ obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o diff --git a/drivers/block/blktap/Makefile b/drivers/block/blktap/Makefile new file mode 100644 index 0000000..923a7c5 --- /dev/null +++ b/drivers/block/blktap/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_BLK_DEV_TAP) := blktap.o + +blktap-objs := control.o ring.o device.o request.o sysfs.o diff --git a/drivers/block/blktap/blktap.h b/drivers/block/blktap/blktap.h new file mode 100644 index 0000000..1318cad --- /dev/null +++ b/drivers/block/blktap/blktap.h @@ -0,0 +1,161 @@ +#ifndef _BLKTAP_H_ +#define _BLKTAP_H_ + +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/init.h> +#include <linux/scatterlist.h> +#include <linux/blktap.h> + +extern int blktap_debug_level; +extern int blktap_ring_major; +extern int blktap_device_major; + +#define BTPRINTK(level, tag, force, _f, _a...) \ + do { \ + if (blktap_debug_level > level && \ + (force || printk_ratelimit())) \ + printk(tag "%s: " _f, __func__, ##_a); \ + } while (0) + +#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a) +#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a) +#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a) +#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a) + +#define MAX_BLKTAP_DEVICE 1024 + +#define BLKTAP_DEVICE 4 +#define BLKTAP_DEVICE_CLOSED 5 +#define BLKTAP_SHUTDOWN_REQUESTED 8 + +#define BLKTAP_REQUEST_FREE 0 +#define BLKTAP_REQUEST_PENDING 1 + +struct blktap_device { + spinlock_t lock; + struct gendisk *gd; +}; + +struct blktap_request; + +struct blktap_ring { + struct task_struct *task; + + struct vm_area_struct *vma; + blktap_front_ring_t ring; + unsigned long ring_vstart; + unsigned long user_vstart; + + int n_pending; + struct blktap_request *pending[BLKTAP_RING_SIZE]; + + wait_queue_head_t poll_wait; + + dev_t devno; + struct device *dev; +}; + +struct blktap_statistics { + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_rd_sect; + int st_wr_sect; + s64 st_rd_cnt; + s64 st_rd_sum_usecs; + s64 st_rd_max_usecs; + s64 st_wr_cnt; + s64 st_wr_sum_usecs; + s64 st_wr_max_usecs; +}; + +struct blktap_request { + struct blktap *tap; + struct request *rq; + int usr_idx; + + int operation; + struct timeval time; + + struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; + struct page *pages[BLKTAP_SEGMENT_MAX]; + int nr_pages; +}; + +#define blktap_for_each_sg(_sg, _req, _i) \ + for (_sg = (_req)->sg_table, _i = 0; \ + _i < (_req)->nr_pages; \ + (_sg)++, (_i)++) + +struct blktap { + int minor; + unsigned long dev_inuse; + + struct blktap_ring ring; + struct blktap_device device; + struct blktap_page_pool *pool; + + wait_queue_head_t remove_wait; + struct work_struct remove_work; + char name[BLKTAP_NAME_MAX]; + + struct blktap_statistics stats; +}; + +struct blktap_page_pool { + struct mempool_s *bufs; + spinlock_t lock; + struct kobject kobj; + wait_queue_head_t wait; +}; + +extern struct mutex blktap_lock; +extern struct blktap **blktaps; +extern int blktap_max_minor; + +int blktap_control_destroy_tap(struct blktap *); +size_t blktap_control_debug(struct blktap *, char *, size_t); + +int blktap_ring_init(void); +void blktap_ring_exit(void); +size_t blktap_ring_debug(struct blktap *, char *, size_t); +int blktap_ring_create(struct blktap *); +int blktap_ring_destroy(struct blktap *); +struct blktap_request *blktap_ring_make_request(struct blktap *); +void blktap_ring_free_request(struct blktap *,struct blktap_request *); +void blktap_ring_submit_request(struct blktap *, struct blktap_request *); +int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, int); +int blktap_ring_map_request(struct blktap *, struct blktap_request *); +void blktap_ring_unmap_request(struct blktap *, struct blktap_request *); +void blktap_ring_set_message(struct blktap *, int); +void blktap_ring_kick_user(struct blktap *); + +int blktap_sysfs_init(void); +void blktap_sysfs_exit(void); +int blktap_sysfs_create(struct blktap *); +void blktap_sysfs_destroy(struct blktap *); + +int blktap_device_init(void); +void blktap_device_exit(void); +size_t blktap_device_debug(struct blktap *, char *, size_t); +int blktap_device_create(struct blktap *, struct blktap_device_info *); +int blktap_device_destroy(struct blktap *); +void blktap_device_destroy_sync(struct blktap *); +void blktap_device_run_queue(struct blktap *); +void blktap_device_end_request(struct blktap *, struct blktap_request *, int); + +int blktap_page_pool_init(struct kobject *); +void blktap_page_pool_exit(void); +struct blktap_page_pool *blktap_page_pool_get(const char *); + +size_t blktap_request_debug(struct blktap *, char *, size_t); +struct blktap_request *blktap_request_alloc(struct blktap *); +int blktap_request_get_pages(struct blktap *, struct blktap_request *, int); +void blktap_request_free(struct blktap *, struct blktap_request *); +void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int); + + +#endif diff --git a/drivers/block/blktap/control.c b/drivers/block/blktap/control.c new file mode 100644 index 0000000..57b1a10 --- /dev/null +++ b/drivers/block/blktap/control.c @@ -0,0 +1,315 @@ +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/miscdevice.h> +#include <linux/device.h> +#include <asm/uaccess.h> + +#include "blktap.h" + +DEFINE_MUTEX(blktap_lock); + +struct blktap **blktaps; +int blktap_max_minor; +static struct blktap_page_pool *default_pool; + +static struct blktap * +blktap_control_get_minor(void) +{ + int minor; + struct blktap *tap; + + tap = kzalloc(sizeof(*tap), GFP_KERNEL); + if (unlikely(!tap)) + return NULL; + + mutex_lock(&blktap_lock); + + for (minor = 0; minor < blktap_max_minor; minor++) + if (!blktaps[minor]) + break; + + if (minor == MAX_BLKTAP_DEVICE) + goto fail; + + if (minor == blktap_max_minor) { + void *p; + int n; + + n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE); + p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL); + if (!p) + goto fail; + + blktaps = p; + minor = blktap_max_minor; + blktap_max_minor = n; + + memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0])); + } + + tap->minor = minor; + blktaps[minor] = tap; + + __module_get(THIS_MODULE); +out: + mutex_unlock(&blktap_lock); + return tap; + +fail: + mutex_unlock(&blktap_lock); + kfree(tap); + tap = NULL; + goto out; +} + +static void +blktap_control_put_minor(struct blktap* tap) +{ + blktaps[tap->minor] = NULL; + kfree(tap); + + module_put(THIS_MODULE); +} + +static struct blktap* +blktap_control_create_tap(void) +{ + struct blktap *tap; + int err; + + tap = blktap_control_get_minor(); + if (!tap) + return NULL; + + kobject_get(&default_pool->kobj); + tap->pool = default_pool; + + err = blktap_ring_create(tap); + if (err) + goto fail_tap; + + err = blktap_sysfs_create(tap); + if (err) + goto fail_ring; + + return tap; + +fail_ring: + blktap_ring_destroy(tap); +fail_tap: + blktap_control_put_minor(tap); + + return NULL; +} + +int +blktap_control_destroy_tap(struct blktap *tap) +{ + int err; + + err = blktap_ring_destroy(tap); + if (err) + return err; + + kobject_put(&tap->pool->kobj); + + blktap_sysfs_destroy(tap); + + blktap_control_put_minor(tap); + + return 0; +} + +static int +blktap_control_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct blktap *tap; + + switch (cmd) { + case BLKTAP_IOCTL_ALLOC_TAP: { + struct blktap_info info; + void __user *ptr = (void __user*)arg; + + tap = blktap_control_create_tap(); + if (!tap) + return -ENOMEM; + + info.ring_major = blktap_ring_major; + info.bdev_major = blktap_device_major; + info.ring_minor = tap->minor; + + if (copy_to_user(ptr, &info, sizeof(info))) { + blktap_control_destroy_tap(tap); + return -EFAULT; + } + + return 0; + } + + case BLKTAP_IOCTL_FREE_TAP: { + int minor = arg; + + if (minor > MAX_BLKTAP_DEVICE) + return -EINVAL; + + tap = blktaps[minor]; + if (!tap) + return -ENODEV; + + return blktap_control_destroy_tap(tap); + } + } + + return -ENOIOCTLCMD; +} + +static struct file_operations blktap_control_file_operations = { + .owner = THIS_MODULE, + .ioctl = blktap_control_ioctl, +}; + +static struct miscdevice blktap_control = { + .minor = MISC_DYNAMIC_MINOR, + .name = "blktap-control", + .fops = &blktap_control_file_operations, +}; + +static struct device *control_device; + +static ssize_t +blktap_control_show_default_pool(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s", kobject_name(&default_pool->kobj)); +} + +static ssize_t +blktap_control_store_default_pool(struct device *device, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap_page_pool *pool, *tmp = default_pool; + + pool = blktap_page_pool_get(buf); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + default_pool = pool; + kobject_put(&tmp->kobj); + + return size; +} + +static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, + blktap_control_show_default_pool, + blktap_control_store_default_pool); + +size_t +blktap_control_debug(struct blktap *tap, char *buf, size_t size) +{ + char *s = buf, *end = buf + size; + + s += snprintf(s, end - s, + "tap %u:%u name:''%s'' flags:%#08lx\n", + MAJOR(tap->ring.devno), MINOR(tap->ring.devno), + tap->name, tap->dev_inuse); + + return s - buf; +} + +static int __init +blktap_control_init(void) +{ + int err; + + err = misc_register(&blktap_control); + if (err) + return err; + + control_device = blktap_control.this_device; + + blktap_max_minor = min(64, MAX_BLKTAP_DEVICE); + blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL); + if (!blktaps) { + BTERR("failed to allocate blktap minor map"); + return -ENOMEM; + } + + err = blktap_page_pool_init(&control_device->kobj); + if (err) + return err; + + default_pool = blktap_page_pool_get("default"); + if (!default_pool) + return -ENOMEM; + + err = device_create_file(control_device, &dev_attr_default_pool); + if (err) + return err; + + return 0; +} + +static void +blktap_control_exit(void) +{ + if (default_pool) { + kobject_put(&default_pool->kobj); + default_pool = NULL; + } + + blktap_page_pool_exit(); + + if (blktaps) { + kfree(blktaps); + blktaps = NULL; + } + + if (control_device) { + misc_deregister(&blktap_control); + control_device = NULL; + } +} + +static void +blktap_exit(void) +{ + blktap_control_exit(); + blktap_ring_exit(); + blktap_sysfs_exit(); + blktap_device_exit(); +} + +static int __init +blktap_init(void) +{ + int err; + + err = blktap_device_init(); + if (err) + goto fail; + + err = blktap_ring_init(); + if (err) + goto fail; + + err = blktap_sysfs_init(); + if (err) + goto fail; + + err = blktap_control_init(); + if (err) + goto fail; + + return 0; + +fail: + blktap_exit(); + return err; +} + +module_init(blktap_init); +module_exit(blktap_exit); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c new file mode 100644 index 0000000..9a09457 --- /dev/null +++ b/drivers/block/blktap/device.c @@ -0,0 +1,551 @@ +#include <linux/fs.h> +#include <linux/blkdev.h> +#include <linux/cdrom.h> +#include <linux/hdreg.h> +#include <scsi/scsi.h> +#include <scsi/scsi_ioctl.h> + +#include "blktap.h" + +int blktap_device_major; + +#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device) + +static int +blktap_device_open(struct block_device *bdev, fmode_t mode) +{ + struct gendisk *disk = bdev->bd_disk; + struct blktap_device *tapdev = disk->private_data; + + if (!tapdev) + return -ENXIO; + + /* NB. we might have bounced a bd trylock by tapdisk. when + * failing for reasons not !tapdev, make sure to kick tapdisk + * out of destroy wait state again. */ + + return 0; +} + +static int +blktap_device_release(struct gendisk *disk, fmode_t mode) +{ + struct blktap_device *tapdev = disk->private_data; + struct block_device *bdev = bdget_disk(disk, 0); + struct blktap *tap = dev_to_blktap(tapdev); + + bdput(bdev); + + if (!bdev->bd_openers) { + set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse); + blktap_ring_kick_user(tap); + } + + return 0; +} + +static int +blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg) +{ + /* We don''t have real geometry info, but let''s at least return + values consistent with the size of the device */ + sector_t nsect = get_capacity(bd->bd_disk); + sector_t cylinders = nsect; + + hg->heads = 0xff; + hg->sectors = 0x3f; + sector_div(cylinders, hg->heads * hg->sectors); + hg->cylinders = cylinders; + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) + hg->cylinders = 0xffff; + return 0; +} + +static int +blktap_device_ioctl(struct block_device *bd, fmode_t mode, + unsigned command, unsigned long argument) +{ + int i; + + switch (command) { + case CDROMMULTISESSION: + BTDBG("FIXME: support multisession CDs later\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char __user *)(argument + i))) + return -EFAULT; + return 0; + + case SCSI_IOCTL_GET_IDLUN: + if (!access_ok(VERIFY_WRITE, argument, + sizeof(struct scsi_idlun))) + return -EFAULT; + + /* return 0 for now. */ + __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id); + __put_user(0, + &((struct scsi_idlun __user *)argument)->host_unique_id); + return 0; + + default: + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", + command);*/ + return -EINVAL; /* same return as native Linux */ + } + + return 0; +} + +static struct block_device_operations blktap_device_file_operations = { + .owner = THIS_MODULE, + .open = blktap_device_open, + .release = blktap_device_release, + .ioctl = blktap_device_ioctl, + .getgeo = blktap_device_getgeo +}; + +/* NB. __blktap holding the queue lock; blktap where unlocked */ + +static inline struct request* +__blktap_next_queued_rq(struct request_queue *q) +{ + return blk_peek_request(q); +} + +static inline void +__blktap_dequeue_rq(struct request *rq) +{ + blk_start_request(rq); +} + +/* NB. err == 0 indicates success, failures < 0 */ + +static inline void +__blktap_end_queued_rq(struct request *rq, int err) +{ + blk_start_request(rq); + __blk_end_request(rq, err, blk_rq_bytes(rq)); +} + +static inline void +__blktap_end_rq(struct request *rq, int err) +{ + __blk_end_request(rq, err, blk_rq_bytes(rq)); +} + +static inline void +blktap_end_rq(struct request *rq, int err) +{ + spin_lock_irq(rq->q->queue_lock); + __blktap_end_rq(rq, err); + spin_unlock_irq(rq->q->queue_lock); +} + +void +blktap_device_end_request(struct blktap *tap, + struct blktap_request *request, + int error) +{ + struct blktap_device *tapdev = &tap->device; + struct request *rq = request->rq; + + blktap_ring_unmap_request(tap, request); + + blktap_ring_free_request(tap, request); + + dev_dbg(disk_to_dev(tapdev->gd), + "end_request: op=%d error=%d bytes=%d\n", + rq_data_dir(rq), error, blk_rq_bytes(rq)); + + blktap_end_rq(rq, error); +} + +int +blktap_device_make_request(struct blktap *tap, struct request *rq) +{ + struct blktap_device *tapdev = &tap->device; + struct blktap_request *request; + int write, nsegs; + int err; + + request = blktap_ring_make_request(tap); + if (IS_ERR(request)) { + err = PTR_ERR(request); + request = NULL; + + if (err == -ENOSPC || err == -ENOMEM) + goto stop; + + goto fail; + } + + write = rq_data_dir(rq) == WRITE; + nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table); + + dev_dbg(disk_to_dev(tapdev->gd), + "make_request: op=%c bytes=%d nsegs=%d\n", + write ? ''w'' : ''r'', blk_rq_bytes(rq), nsegs); + + request->rq = rq; + request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; + + err = blktap_request_get_pages(tap, request, nsegs); + if (err) + goto stop; + + err = blktap_ring_map_request(tap, request); + if (err) + goto fail; + + blktap_ring_submit_request(tap, request); + + return 0; + +stop: + tap->stats.st_oo_req++; + err = -EBUSY; + +_out: + if (request) + blktap_ring_free_request(tap, request); + + return err; +fail: + if (printk_ratelimit()) + dev_warn(disk_to_dev(tapdev->gd), + "make request: %d, failing\n", err); + goto _out; +} + +/* + * called from tapdisk context + */ +void +blktap_device_run_queue(struct blktap *tap) +{ + struct blktap_device *tapdev = &tap->device; + struct request_queue *q; + struct request *rq; + int err; + + if (!tapdev->gd) + return; + + q = tapdev->gd->queue; + + spin_lock_irq(&tapdev->lock); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); + + do { + rq = __blktap_next_queued_rq(q); + if (!rq) + break; + + if (!blk_fs_request(rq)) { + __blktap_end_queued_rq(rq, -EOPNOTSUPP); + continue; + } + + spin_unlock_irq(&tapdev->lock); + + err = blktap_device_make_request(tap, rq); + + spin_lock_irq(&tapdev->lock); + + if (err == -EBUSY) { + blk_stop_queue(q); + break; + } + + __blktap_dequeue_rq(rq); + + if (unlikely(err)) + __blktap_end_rq(rq, err); + } while (1); + + spin_unlock_irq(&tapdev->lock); +} + +static void +blktap_device_do_request(struct request_queue *rq) +{ + struct blktap_device *tapdev = rq->queuedata; + struct blktap *tap = dev_to_blktap(tapdev); + + blktap_ring_kick_user(tap); +} + +static void +blktap_device_configure(struct blktap *tap, + struct blktap_device_info *info) +{ + struct blktap_device *tapdev = &tap->device; + struct gendisk *gd = tapdev->gd; + struct request_queue *rq = gd->queue; + + set_capacity(gd, info->capacity); + set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO)); + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_logical_block_size(rq, info->sector_size); + blk_queue_max_sectors(rq, 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); + blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + /* We are reordering, but cacheless. */ + blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); +} + +static int +blktap_device_validate_info(struct blktap *tap, + struct blktap_device_info *info) +{ + struct device *dev = tap->ring.dev; + int sector_order; + + sector_order = ffs(info->sector_size) - 1; + if (sector_order < 9 || + sector_order > 12 || + info->sector_size != 1U<<sector_order) + goto fail; + + if (!info->capacity || + (info->capacity > ULLONG_MAX >> sector_order)) + goto fail; + + return 0; + +fail: + dev_err(dev, "capacity: %llu, sector-size: %u\n", + info->capacity, info->sector_size); + return -EINVAL; +} + +int +blktap_device_destroy(struct blktap *tap) +{ + struct blktap_device *tapdev = &tap->device; + struct block_device *bdev; + struct gendisk *gd; + int err; + + gd = tapdev->gd; + if (!gd) + return 0; + + bdev = bdget_disk(gd, 0); + + err = !mutex_trylock(&bdev->bd_mutex); + if (err) { + /* NB. avoid a deadlock. the last opener syncs the + * bdev holding bd_mutex. */ + err = -EBUSY; + goto out_nolock; + } + + if (bdev->bd_openers) { + err = -EBUSY; + goto out; + } + + del_gendisk(gd); + gd->private_data = NULL; + + blk_cleanup_queue(gd->queue); + + put_disk(gd); + tapdev->gd = NULL; + + clear_bit(BLKTAP_DEVICE, &tap->dev_inuse); + err = 0; +out: + mutex_unlock(&bdev->bd_mutex); +out_nolock: + bdput(bdev); + + return err; +} + +static void +blktap_device_fail_queue(struct blktap *tap) +{ + struct blktap_device *tapdev = &tap->device; + struct request_queue *q = tapdev->gd->queue; + + spin_lock_irq(&tapdev->lock); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); + + do { + struct request *rq = __blktap_next_queued_rq(q); + if (!rq) + break; + + __blktap_end_queued_rq(rq, -EIO); + } while (1); + + spin_unlock_irq(&tapdev->lock); +} + +static int +blktap_device_try_destroy(struct blktap *tap) +{ + int err; + + err = blktap_device_destroy(tap); + if (err) + blktap_device_fail_queue(tap); + + return err; +} + +void +blktap_device_destroy_sync(struct blktap *tap) +{ + wait_event(tap->ring.poll_wait, + !blktap_device_try_destroy(tap)); +} + +int +blktap_device_create(struct blktap *tap, struct blktap_device_info *info) +{ + int minor, err; + struct gendisk *gd; + struct request_queue *rq; + struct blktap_device *tapdev; + + gd = NULL; + rq = NULL; + tapdev = &tap->device; + minor = tap->minor; + + if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + return -EEXIST; + + if (blktap_device_validate_info(tap, info)) + return -EINVAL; + + gd = alloc_disk(1); + if (!gd) { + err = -ENOMEM; + goto fail; + } + + if (minor < 26) { + sprintf(gd->disk_name, "td%c", ''a'' + minor % 26); + } else if (minor < (26 + 1) * 26) { + sprintf(gd->disk_name, "td%c%c", + ''a'' + minor / 26 - 1,''a'' + minor % 26); + } else { + const unsigned int m1 = (minor / 26 - 1) / 26 - 1; + const unsigned int m2 = (minor / 26 - 1) % 26; + const unsigned int m3 = minor % 26; + sprintf(gd->disk_name, "td%c%c%c", + ''a'' + m1, ''a'' + m2, ''a'' + m3); + } + + gd->major = blktap_device_major; + gd->first_minor = minor; + gd->fops = &blktap_device_file_operations; + gd->private_data = tapdev; + + spin_lock_init(&tapdev->lock); + rq = blk_init_queue(blktap_device_do_request, &tapdev->lock); + if (!rq) { + err = -ENOMEM; + goto fail; + } + elevator_init(rq, "noop"); + + gd->queue = rq; + rq->queuedata = tapdev; + tapdev->gd = gd; + + blktap_device_configure(tap, info); + add_disk(gd); + + set_bit(BLKTAP_DEVICE, &tap->dev_inuse); + + dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n", + queue_logical_block_size(rq), + queue_physical_block_size(rq), + (unsigned long long)get_capacity(gd)); + + return 0; + +fail: + if (gd) + del_gendisk(gd); + if (rq) + blk_cleanup_queue(rq); + + return err; +} + +size_t +blktap_device_debug(struct blktap *tap, char *buf, size_t size) +{ + struct gendisk *disk = tap->device.gd; + struct request_queue *q; + struct block_device *bdev; + char *s = buf, *end = buf + size; + + if (!disk) + return 0; + + q = disk->queue; + + s += snprintf(s, end - s, + "disk capacity:%llu sector size:%u\n", + (unsigned long long)get_capacity(disk), + queue_logical_block_size(q)); + + s += snprintf(s, end - s, + "queue flags:%#lx plugged:%d stopped:%d empty:%d\n", + q->queue_flags, + blk_queue_plugged(q), blk_queue_stopped(q), + elv_queue_empty(q)); + + bdev = bdget_disk(disk, 0); + if (bdev) { + s += snprintf(s, end - s, + "bdev openers:%d closed:%d\n", + bdev->bd_openers, + test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)); + bdput(bdev); + } + + return s - buf; +} + +int __init +blktap_device_init() +{ + int major; + + /* Dynamically allocate a major for this device */ + major = register_blkdev(0, "tapdev"); + if (major < 0) { + BTERR("Couldn''t register blktap device\n"); + return -ENOMEM; + } + + blktap_device_major = major; + BTINFO("blktap device major %d\n", major); + + return 0; +} + +void +blktap_device_exit(void) +{ + if (blktap_device_major) + unregister_blkdev(blktap_device_major, "tapdev"); +} diff --git a/drivers/block/blktap/request.c b/drivers/block/blktap/request.c new file mode 100644 index 0000000..8cfd6c9 --- /dev/null +++ b/drivers/block/blktap/request.c @@ -0,0 +1,418 @@ +#include <linux/mempool.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/device.h> + +#include "blktap.h" + +/* max pages per shared pool. just to prevent accidental dos. */ +#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) + +/* default page pool size. when considering to shrink a shared pool, + * note that paused tapdisks may grab a whole lot of pages for a long + * time. */ +#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) + +/* max number of pages allocatable per request. */ +#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX + +/* min request structs per pool. These grow dynamically. */ +#define POOL_MIN_REQS BLKTAP_RING_SIZE + +static struct kset *pool_set; + +#define kobj_to_pool(_kobj) \ + container_of(_kobj, struct blktap_page_pool, kobj) + +static struct kmem_cache *request_cache; +static mempool_t *request_pool; + +static void +__page_pool_wake(struct blktap_page_pool *pool) +{ + mempool_t *mem = pool->bufs; + + /* + NB. slightly wasteful to always wait for a full segment + set. but this ensures the next disk makes + progress. presently, the repeated request struct + alloc/release cycles would otherwise keep everyone spinning. + */ + + if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES) + wake_up(&pool->wait); +} + +int +blktap_request_get_pages(struct blktap *tap, + struct blktap_request *request, int nr_pages) +{ + struct blktap_page_pool *pool = tap->pool; + mempool_t *mem = pool->bufs; + struct page *page; + + BUG_ON(request->nr_pages != 0); + BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES); + + if (mem->curr_nr < nr_pages) + return -ENOMEM; + + /* NB. avoid thundering herds of tapdisks colliding. */ + spin_lock(&pool->lock); + + if (mem->curr_nr < nr_pages) { + spin_unlock(&pool->lock); + return -ENOMEM; + } + + while (request->nr_pages < nr_pages) { + page = mempool_alloc(mem, GFP_NOWAIT); + BUG_ON(!page); + request->pages[request->nr_pages++] = page; + } + + spin_unlock(&pool->lock); + + return 0; +} + +static void +blktap_request_put_pages(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_page_pool *pool = tap->pool; + struct page *page; + + while (request->nr_pages) { + page = request->pages[--request->nr_pages]; + mempool_free(page, pool->bufs); + } +} + +size_t +blktap_request_debug(struct blktap *tap, char *buf, size_t size) +{ + struct blktap_page_pool *pool = tap->pool; + mempool_t *mem = pool->bufs; + char *s = buf, *end = buf + size; + + s += snprintf(buf, end - s, + "pool:%s pages:%d free:%d\n", + kobject_name(&pool->kobj), + mem->min_nr, mem->curr_nr); + + return s - buf; +} + +struct blktap_request* +blktap_request_alloc(struct blktap *tap) +{ + struct blktap_request *request; + + request = mempool_alloc(request_pool, GFP_NOWAIT); + if (request) + request->tap = tap; + + return request; +} + +void +blktap_request_free(struct blktap *tap, + struct blktap_request *request) +{ + blktap_request_put_pages(tap, request); + + mempool_free(request, request_pool); + + __page_pool_wake(tap->pool); +} + +void +blktap_request_bounce(struct blktap *tap, + struct blktap_request *request, + int seg, int write) +{ + struct scatterlist *sg = &request->sg_table[seg]; + void *s, *p; + + BUG_ON(seg >= request->nr_pages); + + s = sg_virt(sg); + p = page_address(request->pages[seg]) + sg->offset; + + if (write) + memcpy(p, s, sg->length); + else + memcpy(s, p, sg->length); +} + +static void +blktap_request_ctor(void *obj) +{ + struct blktap_request *request = obj; + + memset(request, 0, sizeof(*request)); + sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table)); +} + +static int +blktap_page_pool_resize(struct blktap_page_pool *pool, int target) +{ + mempool_t *bufs = pool->bufs; + int err; + + /* NB. mempool asserts min_nr >= 1 */ + target = max(1, target); + + err = mempool_resize(bufs, target, GFP_KERNEL); + if (err) + return err; + + __page_pool_wake(pool); + + return 0; +} + +struct pool_attribute { + struct attribute attr; + + ssize_t (*show)(struct blktap_page_pool *pool, + char *buf); + + ssize_t (*store)(struct blktap_page_pool *pool, + const char *buf, size_t count); +}; + +#define kattr_to_pool_attr(_kattr) \ + container_of(_kattr, struct pool_attribute, attr) + +static ssize_t +blktap_page_pool_show_size(struct blktap_page_pool *pool, + char *buf) +{ + mempool_t *mem = pool->bufs; + return sprintf(buf, "%d", mem->min_nr); +} + +static ssize_t +blktap_page_pool_store_size(struct blktap_page_pool *pool, + const char *buf, size_t size) +{ + int target; + + /* + * NB. target fixup to avoid undesired results. less than a + * full segment set can wedge the disk. much more than a + * couple times the physical queue depth is rarely useful. + */ + + target = simple_strtoul(buf, NULL, 0); + target = max(POOL_MAX_REQUEST_PAGES, target); + target = min(target, POOL_MAX_PAGES); + + return blktap_page_pool_resize(pool, target) ? : size; +} + +static struct pool_attribute blktap_page_pool_attr_size + __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, + blktap_page_pool_show_size, + blktap_page_pool_store_size); + +static ssize_t +blktap_page_pool_show_free(struct blktap_page_pool *pool, + char *buf) +{ + mempool_t *mem = pool->bufs; + return sprintf(buf, "%d", mem->curr_nr); +} + +static struct pool_attribute blktap_page_pool_attr_free + __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH, + blktap_page_pool_show_free, + NULL); + +static struct attribute *blktap_page_pool_attrs[] = { + &blktap_page_pool_attr_size.attr, + &blktap_page_pool_attr_free.attr, + NULL, +}; + +static inline struct kobject* +__blktap_kset_find_obj(struct kset *kset, const char *name) +{ + struct kobject *k; + struct kobject *ret = NULL; + + spin_lock(&kset->list_lock); + list_for_each_entry(k, &kset->list, entry) { + if (kobject_name(k) && !strcmp(kobject_name(k), name)) { + ret = kobject_get(k); + break; + } + } + spin_unlock(&kset->list_lock); + return ret; +} + +static ssize_t +blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr, + char *buf) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + struct pool_attribute *attr = kattr_to_pool_attr(kattr); + + if (attr->show) + return attr->show(pool, buf); + + return -EIO; +} + +static ssize_t +blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr, + const char *buf, size_t size) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + struct pool_attribute *attr = kattr_to_pool_attr(kattr); + + if (attr->show) + return attr->store(pool, buf, size); + + return -EIO; +} + +static struct sysfs_ops blktap_page_pool_sysfs_ops = { + .show = blktap_page_pool_show_attr, + .store = blktap_page_pool_store_attr, +}; + +static void +blktap_page_pool_release(struct kobject *kobj) +{ + struct blktap_page_pool *pool = kobj_to_pool(kobj); + mempool_destroy(pool->bufs); + kfree(pool); +} + +struct kobj_type blktap_page_pool_ktype = { + .release = blktap_page_pool_release, + .sysfs_ops = &blktap_page_pool_sysfs_ops, + .default_attrs = blktap_page_pool_attrs, +}; + +static void* +__mempool_page_alloc(gfp_t gfp_mask, void *pool_data) +{ + struct page *page; + + if (!(gfp_mask & __GFP_WAIT)) + return NULL; + + page = alloc_page(gfp_mask); + if (page) + SetPageReserved(page); + + return page; +} + +static void +__mempool_page_free(void *element, void *pool_data) +{ + struct page *page = element; + + ClearPageReserved(page); + put_page(page); +} + +static struct kobject* +blktap_page_pool_create(const char *name, int nr_pages) +{ + struct blktap_page_pool *pool; + int err; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + goto fail; + + spin_lock_init(&pool->lock); + init_waitqueue_head(&pool->wait); + + pool->bufs = mempool_create(nr_pages, + __mempool_page_alloc, __mempool_page_free, + pool); + if (!pool->bufs) + goto fail_pool; + + kobject_init(&pool->kobj, &blktap_page_pool_ktype); + pool->kobj.kset = pool_set; + err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name); + if (err) + goto fail_bufs; + + return &pool->kobj; + + kobject_del(&pool->kobj); +fail_bufs: + mempool_destroy(pool->bufs); +fail_pool: + kfree(pool); +fail: + return NULL; +} + +struct blktap_page_pool* +blktap_page_pool_get(const char *name) +{ + struct kobject *kobj; + + kobj = __blktap_kset_find_obj(pool_set, name); + if (!kobj) + kobj = blktap_page_pool_create(name, + POOL_DEFAULT_PAGES); + if (!kobj) + return ERR_PTR(-ENOMEM); + + return kobj_to_pool(kobj); +} + +int __init +blktap_page_pool_init(struct kobject *parent) +{ + request_cache + kmem_cache_create("blktap-request", + sizeof(struct blktap_request), 0, + 0, blktap_request_ctor); + if (!request_cache) + return -ENOMEM; + + request_pool + mempool_create_slab_pool(POOL_MIN_REQS, request_cache); + if (!request_pool) + return -ENOMEM; + + pool_set = kset_create_and_add("pools", NULL, parent); + if (!pool_set) + return -ENOMEM; + + return 0; +} + +void +blktap_page_pool_exit(void) +{ + if (pool_set) { + BUG_ON(!list_empty(&pool_set->list)); + kset_unregister(pool_set); + pool_set = NULL; + } + + if (request_pool) { + mempool_destroy(request_pool); + request_pool = NULL; + } + + if (request_cache) { + kmem_cache_destroy(request_cache); + request_cache = NULL; + } +} diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c new file mode 100644 index 0000000..635f1fd --- /dev/null +++ b/drivers/block/blktap/ring.c @@ -0,0 +1,595 @@ + +#include <linux/device.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/poll.h> +#include <linux/blkdev.h> + +#include "blktap.h" + +int blktap_ring_major; +static struct cdev blktap_ring_cdev; + + /* + * BLKTAP - immediately before the mmap area, + * we have a bunch of pages reserved for shared memory rings. + */ +#define RING_PAGES 1 + +#define BLKTAP_INFO_SIZE_AT(_memb) \ + offsetof(struct blktap_device_info, _memb) + \ + sizeof(((struct blktap_device_info*)0)->_memb) + +static void +blktap_ring_read_response(struct blktap *tap, + const blktap_ring_rsp_t *rsp) +{ + struct blktap_ring *ring = &tap->ring; + struct blktap_request *request; + int usr_idx, err; + + request = NULL; + + usr_idx = rsp->id; + if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { + err = -ERANGE; + goto invalid; + } + + request = ring->pending[usr_idx]; + + if (!request) { + err = -ESRCH; + goto invalid; + } + + if (rsp->operation != request->operation) { + err = -EINVAL; + goto invalid; + } + + dev_dbg(ring->dev, + "request %d [%p] response: %d\n", + request->usr_idx, request, rsp->status); + + err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; +end_request: + blktap_device_end_request(tap, request, err); + return; + +invalid: + dev_warn(ring->dev, + "invalid response, idx:%d status:%d op:%d/%d: err %d\n", + usr_idx, rsp->status, + rsp->operation, request->operation, + err); + if (request) + goto end_request; +} + +static void +blktap_read_ring(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + blktap_ring_rsp_t rsp; + RING_IDX rc, rp; + + down_read(¤t->mm->mmap_sem); + if (!ring->vma) { + up_read(¤t->mm->mmap_sem); + return; + } + + /* for each outstanding message on the ring */ + rp = ring->ring.sring->rsp_prod; + rmb(); + + for (rc = ring->ring.rsp_cons; rc != rp; rc++) { + memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp)); + blktap_ring_read_response(tap, &rsp); + } + + ring->ring.rsp_cons = rc; + + up_read(¤t->mm->mmap_sem); +} + +#define MMAP_VADDR(_start, _req, _seg) \ + ((_start) + \ + ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ + ((_seg) * BLKTAP_PAGE_SIZE)) + +static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static void +blktap_ring_fail_pending(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct blktap_request *request; + int usr_idx; + + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { + request = ring->pending[usr_idx]; + if (!request) + continue; + + blktap_device_end_request(tap, request, -EIO); + } +} + +static void +blktap_ring_vm_close(struct vm_area_struct *vma) +{ + struct blktap *tap = vma->vm_private_data; + struct blktap_ring *ring = &tap->ring; + struct page *page = virt_to_page(ring->ring.sring); + + blktap_ring_fail_pending(tap); + + zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); + ClearPageReserved(page); + __free_page(page); + + ring->vma = NULL; + + if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + blktap_control_destroy_tap(tap); +} + +static struct vm_operations_struct blktap_ring_vm_operations = { + .close = blktap_ring_vm_close, + .fault = blktap_ring_fault, +}; + +int +blktap_ring_map_segment(struct blktap *tap, + struct blktap_request *request, + int seg) +{ + struct blktap_ring *ring = &tap->ring; + unsigned long uaddr; + + uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg); + return vm_insert_page(ring->vma, uaddr, request->pages[seg]); +} + +int +blktap_ring_map_request(struct blktap *tap, + struct blktap_request *request) +{ + int seg, err = 0; + int write; + + write = request->operation == BLKTAP_OP_WRITE; + + for (seg = 0; seg < request->nr_pages; seg++) { + if (write) + blktap_request_bounce(tap, request, seg, write); + + err = blktap_ring_map_segment(tap, request, seg); + if (err) + break; + } + + if (err) + blktap_ring_unmap_request(tap, request); + + return err; +} + +void +blktap_ring_unmap_request(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_ring *ring = &tap->ring; + unsigned long uaddr; + unsigned size; + int seg, read; + + uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); + size = request->nr_pages << PAGE_SHIFT; + read = request->operation == BLKTAP_OP_READ; + + if (read) + for (seg = 0; seg < request->nr_pages; seg++) + blktap_request_bounce(tap, request, seg, !read); + + zap_page_range(ring->vma, uaddr, size, NULL); +} + +void +blktap_ring_free_request(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_ring *ring = &tap->ring; + + ring->pending[request->usr_idx] = NULL; + ring->n_pending--; + + blktap_request_free(tap, request); +} + +struct blktap_request* +blktap_ring_make_request(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct blktap_request *request; + int usr_idx; + + if (RING_FULL(&ring->ring)) + return ERR_PTR(-ENOSPC); + + request = blktap_request_alloc(tap); + if (!request) + return ERR_PTR(-ENOMEM); + + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) + if (!ring->pending[usr_idx]) + break; + + BUG_ON(usr_idx >= BLKTAP_RING_SIZE); + + request->tap = tap; + request->usr_idx = usr_idx; + + ring->pending[usr_idx] = request; + ring->n_pending++; + + return request; +} + +void +blktap_ring_submit_request(struct blktap *tap, + struct blktap_request *request) +{ + struct blktap_ring *ring = &tap->ring; + blktap_ring_req_t *breq; + struct scatterlist *sg; + int i, nsecs = 0; + + dev_dbg(ring->dev, + "request %d [%p] submit\n", request->usr_idx, request); + + breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt); + + breq->id = request->usr_idx; + breq->sector_number = blk_rq_pos(request->rq); + breq->__pad = 0; + breq->operation = request->operation; + breq->nr_segments = request->nr_pages; + + blktap_for_each_sg(sg, request, i) { + struct blktap_segment *seg = &breq->seg[i]; + int first, count; + + count = sg->length >> 9; + first = sg->offset >> 9; + + seg->first_sect = first; + seg->last_sect = first + count - 1; + + nsecs += count; + } + + ring->ring.req_prod_pvt++; + + do_gettimeofday(&request->time); + + + if (request->operation == BLKTAP_OP_WRITE) { + tap->stats.st_wr_sect += nsecs; + tap->stats.st_wr_req++; + } + + if (request->operation == BLKTAP_OP_READ) { + tap->stats.st_rd_sect += nsecs; + tap->stats.st_rd_req++; + } +} + +static int +blktap_ring_open(struct inode *inode, struct file *filp) +{ + struct blktap *tap = NULL; + int minor; + + minor = iminor(inode); + + if (minor < blktap_max_minor) + tap = blktaps[minor]; + + if (!tap) + return -ENXIO; + + if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + return -ENXIO; + + if (tap->ring.task) + return -EBUSY; + + filp->private_data = tap; + tap->ring.task = current; + + return 0; +} + +static int +blktap_ring_release(struct inode *inode, struct file *filp) +{ + struct blktap *tap = filp->private_data; + + blktap_device_destroy_sync(tap); + + tap->ring.task = NULL; + + if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + blktap_control_destroy_tap(tap); + + return 0; +} + +static int +blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct blktap *tap = filp->private_data; + struct blktap_ring *ring = &tap->ring; + blktap_sring_t *sring; + struct page *page = NULL; + int err; + + if (ring->vma) + return -EBUSY; + + page = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!page) + return -ENOMEM; + + SetPageReserved(page); + + err = vm_insert_page(vma, vma->vm_start, page); + if (err) + goto fail; + + sring = page_address(page); + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE); + + ring->ring_vstart = vma->vm_start; + ring->user_vstart = ring->ring_vstart + PAGE_SIZE; + + vma->vm_private_data = tap; + + vma->vm_flags |= VM_DONTCOPY; + vma->vm_flags |= VM_RESERVED; + + vma->vm_ops = &blktap_ring_vm_operations; + + ring->vma = vma; + return 0; + +fail: + if (page) { + zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); + ClearPageReserved(page); + __free_page(page); + } + + return err; +} + +static int +blktap_ring_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct blktap *tap = filp->private_data; + struct blktap_ring *ring = &tap->ring; + void __user *ptr = (void *)arg; + int err; + + BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg); + + if (!ring->vma || ring->vma->vm_mm != current->mm) + return -EACCES; + + switch(cmd) { + case BLKTAP_IOCTL_RESPOND: + + blktap_read_ring(tap); + return 0; + + case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: { + struct blktap_device_info info; + struct blktap2_params params; + + if (copy_from_user(¶ms, ptr, sizeof(params))) + return -EFAULT; + + info.capacity = params.capacity; + info.sector_size = params.sector_size; + info.flags = 0; + + err = blktap_device_create(tap, &info); + if (err) + return err; + + if (params.name[0]) { + strncpy(tap->name, params.name, sizeof(params.name)); + tap->name[sizeof(tap->name)-1] = 0; + } + + return 0; + } + + case BLKTAP_IOCTL_CREATE_DEVICE: { + struct blktap_device_info __user *ptr = (void *)arg; + struct blktap_device_info info; + unsigned long mask; + size_t base_sz, sz; + + mask = BLKTAP_DEVICE_FLAG_RO; + + memset(&info, 0, sizeof(info)); + sz = base_sz = BLKTAP_INFO_SIZE_AT(flags); + + if (copy_from_user(&info, ptr, sz)) + return -EFAULT; + + if (sz > base_sz) + if (copy_from_user(&info, ptr, sz)) + return -EFAULT; + + if (put_user(info.flags & mask, &ptr->flags)) + return -EFAULT; + + return blktap_device_create(tap, &info); + } + + case BLKTAP_IOCTL_REMOVE_DEVICE: + + return blktap_device_destroy(tap); + } + + return -ENOIOCTLCMD; +} + +static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait) +{ + struct blktap *tap = filp->private_data; + struct blktap_ring *ring = &tap->ring; + int work; + + poll_wait(filp, &tap->pool->wait, wait); + poll_wait(filp, &ring->poll_wait, wait); + + down_read(¤t->mm->mmap_sem); + if (ring->vma && tap->device.gd) + blktap_device_run_queue(tap); + up_read(¤t->mm->mmap_sem); + + work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod; + RING_PUSH_REQUESTS(&ring->ring); + + if (work || + ring->ring.sring->private.tapif_user.msg || + test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)) + return POLLIN | POLLRDNORM; + + return 0; +} + +static struct file_operations blktap_ring_file_operations = { + .owner = THIS_MODULE, + .open = blktap_ring_open, + .release = blktap_ring_release, + .ioctl = blktap_ring_ioctl, + .mmap = blktap_ring_mmap, + .poll = blktap_ring_poll, +}; + +void +blktap_ring_kick_user(struct blktap *tap) +{ + wake_up(&tap->ring.poll_wait); +} + +int +blktap_ring_destroy(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + + if (ring->task || ring->vma) + return -EBUSY; + + return 0; +} + +int +blktap_ring_create(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + + init_waitqueue_head(&ring->poll_wait); + ring->devno = MKDEV(blktap_ring_major, tap->minor); + + return 0; +} + +size_t +blktap_ring_debug(struct blktap *tap, char *buf, size_t size) +{ + struct blktap_ring *ring = &tap->ring; + char *s = buf, *end = buf + size; + int usr_idx; + + s += snprintf(s, end - s, + "begin pending:%d\n", ring->n_pending); + + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { + struct blktap_request *request; + struct timeval *time; + int write; + + request = ring->pending[usr_idx]; + if (!request) + continue; + + write = request->operation == BLKTAP_OP_WRITE; + time = &request->time; + + s += snprintf(s, end - s, + "%02d: usr_idx:%02d " + "op:%c nr_pages:%02d time:%lu.%09lu\n", + usr_idx, request->usr_idx, + write ? ''W'' : ''R'', request->nr_pages, + time->tv_sec, time->tv_usec); + } + + s += snprintf(s, end - s, "end pending\n"); + + return s - buf; +} + + +int __init +blktap_ring_init(void) +{ + dev_t dev = 0; + int err; + + cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations); + blktap_ring_cdev.owner = THIS_MODULE; + + err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2"); + if (err < 0) { + BTERR("error registering ring devices: %d\n", err); + return err; + } + + err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE); + if (err) { + BTERR("error adding ring device: %d\n", err); + unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE); + return err; + } + + blktap_ring_major = MAJOR(dev); + BTINFO("blktap ring major: %d\n", blktap_ring_major); + + return 0; +} + +void +blktap_ring_exit(void) +{ + if (!blktap_ring_major) + return; + + cdev_del(&blktap_ring_cdev); + unregister_chrdev_region(MKDEV(blktap_ring_major, 0), + MAX_BLKTAP_DEVICE); + + blktap_ring_major = 0; +} diff --git a/drivers/block/blktap/sysfs.c b/drivers/block/blktap/sysfs.c new file mode 100644 index 0000000..182de9a --- /dev/null +++ b/drivers/block/blktap/sysfs.c @@ -0,0 +1,288 @@ +#include <linux/types.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/genhd.h> +#include <linux/blkdev.h> + +#include "blktap.h" + +int blktap_debug_level = 1; + +static struct class *class; + +static ssize_t +blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) +{ + struct blktap *tap; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + if (size >= BLKTAP_NAME_MAX) + return -ENAMETOOLONG; + + if (strnlen(buf, size) != size) + return -EINVAL; + + strcpy(tap->name, buf); + + return size; +} + +static ssize_t +blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct blktap *tap; + ssize_t size; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + if (tap->name[0]) + size = sprintf(buf, "%s\n", tap->name); + else + size = sprintf(buf, "%d\n", tap->minor); + + return size; +} +static DEVICE_ATTR(name, S_IRUGO|S_IWUSR, + blktap_sysfs_get_name, blktap_sysfs_set_name); + +static void +blktap_sysfs_remove_work(struct work_struct *work) +{ + struct blktap *tap + = container_of(work, struct blktap, remove_work); + blktap_control_destroy_tap(tap); +} + +static ssize_t +blktap_sysfs_remove_device(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap *tap; + int err; + + tap = dev_get_drvdata(dev); + if (!tap) + return size; + + if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) + goto wait; + + if (tap->ring.vma) { + blktap_sring_t *sring = tap->ring.ring.sring; + sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; + blktap_ring_kick_user(tap); + } else { + INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); + schedule_work(&tap->remove_work); + } +wait: + err = wait_event_interruptible(tap->remove_wait, + !dev_get_drvdata(dev)); + if (err) + return err; + + return size; +} +static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); + +static ssize_t +blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct blktap *tap; + char *s = buf, *end = buf + PAGE_SIZE; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + s += blktap_control_debug(tap, s, end - s); + + s += blktap_request_debug(tap, s, end - s); + + s += blktap_device_debug(tap, s, end - s); + + s += blktap_ring_debug(tap, s, end - s); + + return s - buf; +} +static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL); + +static ssize_t +blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct blktap *tap; + ssize_t rv = 0; + + tap = dev_get_drvdata(dev); + if (!tap) + return 0; + + if (tap->ring.task) + rv = sprintf(buf, "%d\n", tap->ring.task->pid); + + return rv; +} +static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL); + +static ssize_t +blktap_sysfs_show_pool(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct blktap *tap = dev_get_drvdata(dev); + return sprintf(buf, "%s", kobject_name(&tap->pool->kobj)); +} + +static ssize_t +blktap_sysfs_store_pool(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct blktap *tap = dev_get_drvdata(dev); + struct blktap_page_pool *pool, *tmp = tap->pool; + + if (tap->device.gd) + return -EBUSY; + + pool = blktap_page_pool_get(buf); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + tap->pool = pool; + kobject_put(&tmp->kobj); + + return size; +} +DEVICE_ATTR(pool, S_IRUSR|S_IWUSR, + blktap_sysfs_show_pool, blktap_sysfs_store_pool); + +int +blktap_sysfs_create(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct device *dev; + int err = 0; + + init_waitqueue_head(&tap->remove_wait); + + dev = device_create(class, NULL, ring->devno, + tap, "blktap%d", tap->minor); + if (IS_ERR(dev)) + err = PTR_ERR(dev); + if (!err) + err = device_create_file(dev, &dev_attr_name); + if (!err) + err = device_create_file(dev, &dev_attr_remove); + if (!err) + err = device_create_file(dev, &dev_attr_debug); + if (!err) + err = device_create_file(dev, &dev_attr_task); + if (!err) + err = device_create_file(dev, &dev_attr_pool); + if (!err) + ring->dev = dev; + else + device_unregister(dev); + + return err; +} + +void +blktap_sysfs_destroy(struct blktap *tap) +{ + struct blktap_ring *ring = &tap->ring; + struct device *dev; + + dev = ring->dev; + + if (!dev) + return; + + dev_set_drvdata(dev, NULL); + wake_up(&tap->remove_wait); + + device_unregister(dev); + ring->dev = NULL; +} + +static ssize_t +blktap_sysfs_show_verbosity(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", blktap_debug_level); +} + +static ssize_t +blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size) +{ + int level; + + if (sscanf(buf, "%d", &level) == 1) { + blktap_debug_level = level; + return size; + } + + return -EINVAL; +} +static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR, + blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity); + +static ssize_t +blktap_sysfs_show_devices(struct class *class, char *buf) +{ + int i, ret; + struct blktap *tap; + + mutex_lock(&blktap_lock); + + ret = 0; + for (i = 0; i < blktap_max_minor; i++) { + tap = blktaps[i]; + if (!tap) + continue; + + if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) + continue; + + ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name); + } + + mutex_unlock(&blktap_lock); + + return ret; +} +static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL); + +void +blktap_sysfs_exit(void) +{ + if (class) + class_destroy(class); +} + +int __init +blktap_sysfs_init(void) +{ + struct class *cls; + int err = 0; + + cls = class_create(THIS_MODULE, "blktap2"); + if (IS_ERR(cls)) + err = PTR_ERR(cls); + if (!err) + err = class_create_file(cls, &class_attr_verbosity); + if (!err) + err = class_create_file(cls, &class_attr_devices); + if (!err) + class = cls; + else + class_destroy(cls); + + return err; +} diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index c34e71c..b951b83 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -45,17 +45,6 @@ config XEN_BLKDEV_BACKEND interface. -config XEN_BLKDEV_TAP - tristate "Block-device tap backend driver" - depends on XEN_BACKEND && BLOCK - help - The block tap driver is an alternative to the block back driver - and allows VM block requests to be redirected to userspace through - a device interface. The tap allows user-space development of - high-performance block backends, where disk images may be implemented - as files, in memory, or on other hosts across the network. This - driver can safely coexist with the existing blockback driver. - config XEN_BLKBACK_PAGEMAP tristate depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP != n diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa4d6e2..44f835e 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -6,6 +6,5 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ -obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile deleted file mode 100644 index 822b4e4..0000000 --- a/drivers/xen/blktap/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o - -blktap-objs := control.o ring.o device.o request.o sysfs.o diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h deleted file mode 100644 index 1318cad..0000000 --- a/drivers/xen/blktap/blktap.h +++ /dev/null @@ -1,161 +0,0 @@ -#ifndef _BLKTAP_H_ -#define _BLKTAP_H_ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/cdev.h> -#include <linux/init.h> -#include <linux/scatterlist.h> -#include <linux/blktap.h> - -extern int blktap_debug_level; -extern int blktap_ring_major; -extern int blktap_device_major; - -#define BTPRINTK(level, tag, force, _f, _a...) \ - do { \ - if (blktap_debug_level > level && \ - (force || printk_ratelimit())) \ - printk(tag "%s: " _f, __func__, ##_a); \ - } while (0) - -#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a) -#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a) -#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a) -#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a) - -#define MAX_BLKTAP_DEVICE 1024 - -#define BLKTAP_DEVICE 4 -#define BLKTAP_DEVICE_CLOSED 5 -#define BLKTAP_SHUTDOWN_REQUESTED 8 - -#define BLKTAP_REQUEST_FREE 0 -#define BLKTAP_REQUEST_PENDING 1 - -struct blktap_device { - spinlock_t lock; - struct gendisk *gd; -}; - -struct blktap_request; - -struct blktap_ring { - struct task_struct *task; - - struct vm_area_struct *vma; - blktap_front_ring_t ring; - unsigned long ring_vstart; - unsigned long user_vstart; - - int n_pending; - struct blktap_request *pending[BLKTAP_RING_SIZE]; - - wait_queue_head_t poll_wait; - - dev_t devno; - struct device *dev; -}; - -struct blktap_statistics { - unsigned long st_print; - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_rd_sect; - int st_wr_sect; - s64 st_rd_cnt; - s64 st_rd_sum_usecs; - s64 st_rd_max_usecs; - s64 st_wr_cnt; - s64 st_wr_sum_usecs; - s64 st_wr_max_usecs; -}; - -struct blktap_request { - struct blktap *tap; - struct request *rq; - int usr_idx; - - int operation; - struct timeval time; - - struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; - struct page *pages[BLKTAP_SEGMENT_MAX]; - int nr_pages; -}; - -#define blktap_for_each_sg(_sg, _req, _i) \ - for (_sg = (_req)->sg_table, _i = 0; \ - _i < (_req)->nr_pages; \ - (_sg)++, (_i)++) - -struct blktap { - int minor; - unsigned long dev_inuse; - - struct blktap_ring ring; - struct blktap_device device; - struct blktap_page_pool *pool; - - wait_queue_head_t remove_wait; - struct work_struct remove_work; - char name[BLKTAP_NAME_MAX]; - - struct blktap_statistics stats; -}; - -struct blktap_page_pool { - struct mempool_s *bufs; - spinlock_t lock; - struct kobject kobj; - wait_queue_head_t wait; -}; - -extern struct mutex blktap_lock; -extern struct blktap **blktaps; -extern int blktap_max_minor; - -int blktap_control_destroy_tap(struct blktap *); -size_t blktap_control_debug(struct blktap *, char *, size_t); - -int blktap_ring_init(void); -void blktap_ring_exit(void); -size_t blktap_ring_debug(struct blktap *, char *, size_t); -int blktap_ring_create(struct blktap *); -int blktap_ring_destroy(struct blktap *); -struct blktap_request *blktap_ring_make_request(struct blktap *); -void blktap_ring_free_request(struct blktap *,struct blktap_request *); -void blktap_ring_submit_request(struct blktap *, struct blktap_request *); -int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, int); -int blktap_ring_map_request(struct blktap *, struct blktap_request *); -void blktap_ring_unmap_request(struct blktap *, struct blktap_request *); -void blktap_ring_set_message(struct blktap *, int); -void blktap_ring_kick_user(struct blktap *); - -int blktap_sysfs_init(void); -void blktap_sysfs_exit(void); -int blktap_sysfs_create(struct blktap *); -void blktap_sysfs_destroy(struct blktap *); - -int blktap_device_init(void); -void blktap_device_exit(void); -size_t blktap_device_debug(struct blktap *, char *, size_t); -int blktap_device_create(struct blktap *, struct blktap_device_info *); -int blktap_device_destroy(struct blktap *); -void blktap_device_destroy_sync(struct blktap *); -void blktap_device_run_queue(struct blktap *); -void blktap_device_end_request(struct blktap *, struct blktap_request *, int); - -int blktap_page_pool_init(struct kobject *); -void blktap_page_pool_exit(void); -struct blktap_page_pool *blktap_page_pool_get(const char *); - -size_t blktap_request_debug(struct blktap *, char *, size_t); -struct blktap_request *blktap_request_alloc(struct blktap *); -int blktap_request_get_pages(struct blktap *, struct blktap_request *, int); -void blktap_request_free(struct blktap *, struct blktap_request *); -void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int); - - -#endif diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c deleted file mode 100644 index 57b1a10..0000000 --- a/drivers/xen/blktap/control.c +++ /dev/null @@ -1,315 +0,0 @@ -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/miscdevice.h> -#include <linux/device.h> -#include <asm/uaccess.h> - -#include "blktap.h" - -DEFINE_MUTEX(blktap_lock); - -struct blktap **blktaps; -int blktap_max_minor; -static struct blktap_page_pool *default_pool; - -static struct blktap * -blktap_control_get_minor(void) -{ - int minor; - struct blktap *tap; - - tap = kzalloc(sizeof(*tap), GFP_KERNEL); - if (unlikely(!tap)) - return NULL; - - mutex_lock(&blktap_lock); - - for (minor = 0; minor < blktap_max_minor; minor++) - if (!blktaps[minor]) - break; - - if (minor == MAX_BLKTAP_DEVICE) - goto fail; - - if (minor == blktap_max_minor) { - void *p; - int n; - - n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE); - p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL); - if (!p) - goto fail; - - blktaps = p; - minor = blktap_max_minor; - blktap_max_minor = n; - - memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0])); - } - - tap->minor = minor; - blktaps[minor] = tap; - - __module_get(THIS_MODULE); -out: - mutex_unlock(&blktap_lock); - return tap; - -fail: - mutex_unlock(&blktap_lock); - kfree(tap); - tap = NULL; - goto out; -} - -static void -blktap_control_put_minor(struct blktap* tap) -{ - blktaps[tap->minor] = NULL; - kfree(tap); - - module_put(THIS_MODULE); -} - -static struct blktap* -blktap_control_create_tap(void) -{ - struct blktap *tap; - int err; - - tap = blktap_control_get_minor(); - if (!tap) - return NULL; - - kobject_get(&default_pool->kobj); - tap->pool = default_pool; - - err = blktap_ring_create(tap); - if (err) - goto fail_tap; - - err = blktap_sysfs_create(tap); - if (err) - goto fail_ring; - - return tap; - -fail_ring: - blktap_ring_destroy(tap); -fail_tap: - blktap_control_put_minor(tap); - - return NULL; -} - -int -blktap_control_destroy_tap(struct blktap *tap) -{ - int err; - - err = blktap_ring_destroy(tap); - if (err) - return err; - - kobject_put(&tap->pool->kobj); - - blktap_sysfs_destroy(tap); - - blktap_control_put_minor(tap); - - return 0; -} - -static int -blktap_control_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct blktap *tap; - - switch (cmd) { - case BLKTAP_IOCTL_ALLOC_TAP: { - struct blktap_info info; - void __user *ptr = (void __user*)arg; - - tap = blktap_control_create_tap(); - if (!tap) - return -ENOMEM; - - info.ring_major = blktap_ring_major; - info.bdev_major = blktap_device_major; - info.ring_minor = tap->minor; - - if (copy_to_user(ptr, &info, sizeof(info))) { - blktap_control_destroy_tap(tap); - return -EFAULT; - } - - return 0; - } - - case BLKTAP_IOCTL_FREE_TAP: { - int minor = arg; - - if (minor > MAX_BLKTAP_DEVICE) - return -EINVAL; - - tap = blktaps[minor]; - if (!tap) - return -ENODEV; - - return blktap_control_destroy_tap(tap); - } - } - - return -ENOIOCTLCMD; -} - -static struct file_operations blktap_control_file_operations = { - .owner = THIS_MODULE, - .ioctl = blktap_control_ioctl, -}; - -static struct miscdevice blktap_control = { - .minor = MISC_DYNAMIC_MINOR, - .name = "blktap-control", - .fops = &blktap_control_file_operations, -}; - -static struct device *control_device; - -static ssize_t -blktap_control_show_default_pool(struct device *device, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%s", kobject_name(&default_pool->kobj)); -} - -static ssize_t -blktap_control_store_default_pool(struct device *device, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct blktap_page_pool *pool, *tmp = default_pool; - - pool = blktap_page_pool_get(buf); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - default_pool = pool; - kobject_put(&tmp->kobj); - - return size; -} - -static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, - blktap_control_show_default_pool, - blktap_control_store_default_pool); - -size_t -blktap_control_debug(struct blktap *tap, char *buf, size_t size) -{ - char *s = buf, *end = buf + size; - - s += snprintf(s, end - s, - "tap %u:%u name:''%s'' flags:%#08lx\n", - MAJOR(tap->ring.devno), MINOR(tap->ring.devno), - tap->name, tap->dev_inuse); - - return s - buf; -} - -static int __init -blktap_control_init(void) -{ - int err; - - err = misc_register(&blktap_control); - if (err) - return err; - - control_device = blktap_control.this_device; - - blktap_max_minor = min(64, MAX_BLKTAP_DEVICE); - blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL); - if (!blktaps) { - BTERR("failed to allocate blktap minor map"); - return -ENOMEM; - } - - err = blktap_page_pool_init(&control_device->kobj); - if (err) - return err; - - default_pool = blktap_page_pool_get("default"); - if (!default_pool) - return -ENOMEM; - - err = device_create_file(control_device, &dev_attr_default_pool); - if (err) - return err; - - return 0; -} - -static void -blktap_control_exit(void) -{ - if (default_pool) { - kobject_put(&default_pool->kobj); - default_pool = NULL; - } - - blktap_page_pool_exit(); - - if (blktaps) { - kfree(blktaps); - blktaps = NULL; - } - - if (control_device) { - misc_deregister(&blktap_control); - control_device = NULL; - } -} - -static void -blktap_exit(void) -{ - blktap_control_exit(); - blktap_ring_exit(); - blktap_sysfs_exit(); - blktap_device_exit(); -} - -static int __init -blktap_init(void) -{ - int err; - - err = blktap_device_init(); - if (err) - goto fail; - - err = blktap_ring_init(); - if (err) - goto fail; - - err = blktap_sysfs_init(); - if (err) - goto fail; - - err = blktap_control_init(); - if (err) - goto fail; - - return 0; - -fail: - blktap_exit(); - return err; -} - -module_init(blktap_init); -module_exit(blktap_exit); -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c deleted file mode 100644 index 9a09457..0000000 --- a/drivers/xen/blktap/device.c +++ /dev/null @@ -1,551 +0,0 @@ -#include <linux/fs.h> -#include <linux/blkdev.h> -#include <linux/cdrom.h> -#include <linux/hdreg.h> -#include <scsi/scsi.h> -#include <scsi/scsi_ioctl.h> - -#include "blktap.h" - -int blktap_device_major; - -#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device) - -static int -blktap_device_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - struct blktap_device *tapdev = disk->private_data; - - if (!tapdev) - return -ENXIO; - - /* NB. we might have bounced a bd trylock by tapdisk. when - * failing for reasons not !tapdev, make sure to kick tapdisk - * out of destroy wait state again. */ - - return 0; -} - -static int -blktap_device_release(struct gendisk *disk, fmode_t mode) -{ - struct blktap_device *tapdev = disk->private_data; - struct block_device *bdev = bdget_disk(disk, 0); - struct blktap *tap = dev_to_blktap(tapdev); - - bdput(bdev); - - if (!bdev->bd_openers) { - set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse); - blktap_ring_kick_user(tap); - } - - return 0; -} - -static int -blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg) -{ - /* We don''t have real geometry info, but let''s at least return - values consistent with the size of the device */ - sector_t nsect = get_capacity(bd->bd_disk); - sector_t cylinders = nsect; - - hg->heads = 0xff; - hg->sectors = 0x3f; - sector_div(cylinders, hg->heads * hg->sectors); - hg->cylinders = cylinders; - if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) - hg->cylinders = 0xffff; - return 0; -} - -static int -blktap_device_ioctl(struct block_device *bd, fmode_t mode, - unsigned command, unsigned long argument) -{ - int i; - - switch (command) { - case CDROMMULTISESSION: - BTDBG("FIXME: support multisession CDs later\n"); - for (i = 0; i < sizeof(struct cdrom_multisession); i++) - if (put_user(0, (char __user *)(argument + i))) - return -EFAULT; - return 0; - - case SCSI_IOCTL_GET_IDLUN: - if (!access_ok(VERIFY_WRITE, argument, - sizeof(struct scsi_idlun))) - return -EFAULT; - - /* return 0 for now. */ - __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id); - __put_user(0, - &((struct scsi_idlun __user *)argument)->host_unique_id); - return 0; - - default: - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ - return -EINVAL; /* same return as native Linux */ - } - - return 0; -} - -static struct block_device_operations blktap_device_file_operations = { - .owner = THIS_MODULE, - .open = blktap_device_open, - .release = blktap_device_release, - .ioctl = blktap_device_ioctl, - .getgeo = blktap_device_getgeo -}; - -/* NB. __blktap holding the queue lock; blktap where unlocked */ - -static inline struct request* -__blktap_next_queued_rq(struct request_queue *q) -{ - return blk_peek_request(q); -} - -static inline void -__blktap_dequeue_rq(struct request *rq) -{ - blk_start_request(rq); -} - -/* NB. err == 0 indicates success, failures < 0 */ - -static inline void -__blktap_end_queued_rq(struct request *rq, int err) -{ - blk_start_request(rq); - __blk_end_request(rq, err, blk_rq_bytes(rq)); -} - -static inline void -__blktap_end_rq(struct request *rq, int err) -{ - __blk_end_request(rq, err, blk_rq_bytes(rq)); -} - -static inline void -blktap_end_rq(struct request *rq, int err) -{ - spin_lock_irq(rq->q->queue_lock); - __blktap_end_rq(rq, err); - spin_unlock_irq(rq->q->queue_lock); -} - -void -blktap_device_end_request(struct blktap *tap, - struct blktap_request *request, - int error) -{ - struct blktap_device *tapdev = &tap->device; - struct request *rq = request->rq; - - blktap_ring_unmap_request(tap, request); - - blktap_ring_free_request(tap, request); - - dev_dbg(disk_to_dev(tapdev->gd), - "end_request: op=%d error=%d bytes=%d\n", - rq_data_dir(rq), error, blk_rq_bytes(rq)); - - blktap_end_rq(rq, error); -} - -int -blktap_device_make_request(struct blktap *tap, struct request *rq) -{ - struct blktap_device *tapdev = &tap->device; - struct blktap_request *request; - int write, nsegs; - int err; - - request = blktap_ring_make_request(tap); - if (IS_ERR(request)) { - err = PTR_ERR(request); - request = NULL; - - if (err == -ENOSPC || err == -ENOMEM) - goto stop; - - goto fail; - } - - write = rq_data_dir(rq) == WRITE; - nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table); - - dev_dbg(disk_to_dev(tapdev->gd), - "make_request: op=%c bytes=%d nsegs=%d\n", - write ? ''w'' : ''r'', blk_rq_bytes(rq), nsegs); - - request->rq = rq; - request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; - - err = blktap_request_get_pages(tap, request, nsegs); - if (err) - goto stop; - - err = blktap_ring_map_request(tap, request); - if (err) - goto fail; - - blktap_ring_submit_request(tap, request); - - return 0; - -stop: - tap->stats.st_oo_req++; - err = -EBUSY; - -_out: - if (request) - blktap_ring_free_request(tap, request); - - return err; -fail: - if (printk_ratelimit()) - dev_warn(disk_to_dev(tapdev->gd), - "make request: %d, failing\n", err); - goto _out; -} - -/* - * called from tapdisk context - */ -void -blktap_device_run_queue(struct blktap *tap) -{ - struct blktap_device *tapdev = &tap->device; - struct request_queue *q; - struct request *rq; - int err; - - if (!tapdev->gd) - return; - - q = tapdev->gd->queue; - - spin_lock_irq(&tapdev->lock); - queue_flag_clear(QUEUE_FLAG_STOPPED, q); - - do { - rq = __blktap_next_queued_rq(q); - if (!rq) - break; - - if (!blk_fs_request(rq)) { - __blktap_end_queued_rq(rq, -EOPNOTSUPP); - continue; - } - - spin_unlock_irq(&tapdev->lock); - - err = blktap_device_make_request(tap, rq); - - spin_lock_irq(&tapdev->lock); - - if (err == -EBUSY) { - blk_stop_queue(q); - break; - } - - __blktap_dequeue_rq(rq); - - if (unlikely(err)) - __blktap_end_rq(rq, err); - } while (1); - - spin_unlock_irq(&tapdev->lock); -} - -static void -blktap_device_do_request(struct request_queue *rq) -{ - struct blktap_device *tapdev = rq->queuedata; - struct blktap *tap = dev_to_blktap(tapdev); - - blktap_ring_kick_user(tap); -} - -static void -blktap_device_configure(struct blktap *tap, - struct blktap_device_info *info) -{ - struct blktap_device *tapdev = &tap->device; - struct gendisk *gd = tapdev->gd; - struct request_queue *rq = gd->queue; - - set_capacity(gd, info->capacity); - set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO)); - - /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, info->sector_size); - blk_queue_max_sectors(rq, 512); - - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); - - /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); - blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); - - /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); - - /* We are reordering, but cacheless. */ - blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); -} - -static int -blktap_device_validate_info(struct blktap *tap, - struct blktap_device_info *info) -{ - struct device *dev = tap->ring.dev; - int sector_order; - - sector_order = ffs(info->sector_size) - 1; - if (sector_order < 9 || - sector_order > 12 || - info->sector_size != 1U<<sector_order) - goto fail; - - if (!info->capacity || - (info->capacity > ULLONG_MAX >> sector_order)) - goto fail; - - return 0; - -fail: - dev_err(dev, "capacity: %llu, sector-size: %u\n", - info->capacity, info->sector_size); - return -EINVAL; -} - -int -blktap_device_destroy(struct blktap *tap) -{ - struct blktap_device *tapdev = &tap->device; - struct block_device *bdev; - struct gendisk *gd; - int err; - - gd = tapdev->gd; - if (!gd) - return 0; - - bdev = bdget_disk(gd, 0); - - err = !mutex_trylock(&bdev->bd_mutex); - if (err) { - /* NB. avoid a deadlock. the last opener syncs the - * bdev holding bd_mutex. */ - err = -EBUSY; - goto out_nolock; - } - - if (bdev->bd_openers) { - err = -EBUSY; - goto out; - } - - del_gendisk(gd); - gd->private_data = NULL; - - blk_cleanup_queue(gd->queue); - - put_disk(gd); - tapdev->gd = NULL; - - clear_bit(BLKTAP_DEVICE, &tap->dev_inuse); - err = 0; -out: - mutex_unlock(&bdev->bd_mutex); -out_nolock: - bdput(bdev); - - return err; -} - -static void -blktap_device_fail_queue(struct blktap *tap) -{ - struct blktap_device *tapdev = &tap->device; - struct request_queue *q = tapdev->gd->queue; - - spin_lock_irq(&tapdev->lock); - queue_flag_clear(QUEUE_FLAG_STOPPED, q); - - do { - struct request *rq = __blktap_next_queued_rq(q); - if (!rq) - break; - - __blktap_end_queued_rq(rq, -EIO); - } while (1); - - spin_unlock_irq(&tapdev->lock); -} - -static int -blktap_device_try_destroy(struct blktap *tap) -{ - int err; - - err = blktap_device_destroy(tap); - if (err) - blktap_device_fail_queue(tap); - - return err; -} - -void -blktap_device_destroy_sync(struct blktap *tap) -{ - wait_event(tap->ring.poll_wait, - !blktap_device_try_destroy(tap)); -} - -int -blktap_device_create(struct blktap *tap, struct blktap_device_info *info) -{ - int minor, err; - struct gendisk *gd; - struct request_queue *rq; - struct blktap_device *tapdev; - - gd = NULL; - rq = NULL; - tapdev = &tap->device; - minor = tap->minor; - - if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) - return -EEXIST; - - if (blktap_device_validate_info(tap, info)) - return -EINVAL; - - gd = alloc_disk(1); - if (!gd) { - err = -ENOMEM; - goto fail; - } - - if (minor < 26) { - sprintf(gd->disk_name, "td%c", ''a'' + minor % 26); - } else if (minor < (26 + 1) * 26) { - sprintf(gd->disk_name, "td%c%c", - ''a'' + minor / 26 - 1,''a'' + minor % 26); - } else { - const unsigned int m1 = (minor / 26 - 1) / 26 - 1; - const unsigned int m2 = (minor / 26 - 1) % 26; - const unsigned int m3 = minor % 26; - sprintf(gd->disk_name, "td%c%c%c", - ''a'' + m1, ''a'' + m2, ''a'' + m3); - } - - gd->major = blktap_device_major; - gd->first_minor = minor; - gd->fops = &blktap_device_file_operations; - gd->private_data = tapdev; - - spin_lock_init(&tapdev->lock); - rq = blk_init_queue(blktap_device_do_request, &tapdev->lock); - if (!rq) { - err = -ENOMEM; - goto fail; - } - elevator_init(rq, "noop"); - - gd->queue = rq; - rq->queuedata = tapdev; - tapdev->gd = gd; - - blktap_device_configure(tap, info); - add_disk(gd); - - set_bit(BLKTAP_DEVICE, &tap->dev_inuse); - - dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n", - queue_logical_block_size(rq), - queue_physical_block_size(rq), - (unsigned long long)get_capacity(gd)); - - return 0; - -fail: - if (gd) - del_gendisk(gd); - if (rq) - blk_cleanup_queue(rq); - - return err; -} - -size_t -blktap_device_debug(struct blktap *tap, char *buf, size_t size) -{ - struct gendisk *disk = tap->device.gd; - struct request_queue *q; - struct block_device *bdev; - char *s = buf, *end = buf + size; - - if (!disk) - return 0; - - q = disk->queue; - - s += snprintf(s, end - s, - "disk capacity:%llu sector size:%u\n", - (unsigned long long)get_capacity(disk), - queue_logical_block_size(q)); - - s += snprintf(s, end - s, - "queue flags:%#lx plugged:%d stopped:%d empty:%d\n", - q->queue_flags, - blk_queue_plugged(q), blk_queue_stopped(q), - elv_queue_empty(q)); - - bdev = bdget_disk(disk, 0); - if (bdev) { - s += snprintf(s, end - s, - "bdev openers:%d closed:%d\n", - bdev->bd_openers, - test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)); - bdput(bdev); - } - - return s - buf; -} - -int __init -blktap_device_init() -{ - int major; - - /* Dynamically allocate a major for this device */ - major = register_blkdev(0, "tapdev"); - if (major < 0) { - BTERR("Couldn''t register blktap device\n"); - return -ENOMEM; - } - - blktap_device_major = major; - BTINFO("blktap device major %d\n", major); - - return 0; -} - -void -blktap_device_exit(void) -{ - if (blktap_device_major) - unregister_blkdev(blktap_device_major, "tapdev"); -} diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c deleted file mode 100644 index 8cfd6c9..0000000 --- a/drivers/xen/blktap/request.c +++ /dev/null @@ -1,418 +0,0 @@ -#include <linux/mempool.h> -#include <linux/spinlock.h> -#include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/device.h> - -#include "blktap.h" - -/* max pages per shared pool. just to prevent accidental dos. */ -#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) - -/* default page pool size. when considering to shrink a shared pool, - * note that paused tapdisks may grab a whole lot of pages for a long - * time. */ -#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) - -/* max number of pages allocatable per request. */ -#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX - -/* min request structs per pool. These grow dynamically. */ -#define POOL_MIN_REQS BLKTAP_RING_SIZE - -static struct kset *pool_set; - -#define kobj_to_pool(_kobj) \ - container_of(_kobj, struct blktap_page_pool, kobj) - -static struct kmem_cache *request_cache; -static mempool_t *request_pool; - -static void -__page_pool_wake(struct blktap_page_pool *pool) -{ - mempool_t *mem = pool->bufs; - - /* - NB. slightly wasteful to always wait for a full segment - set. but this ensures the next disk makes - progress. presently, the repeated request struct - alloc/release cycles would otherwise keep everyone spinning. - */ - - if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES) - wake_up(&pool->wait); -} - -int -blktap_request_get_pages(struct blktap *tap, - struct blktap_request *request, int nr_pages) -{ - struct blktap_page_pool *pool = tap->pool; - mempool_t *mem = pool->bufs; - struct page *page; - - BUG_ON(request->nr_pages != 0); - BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES); - - if (mem->curr_nr < nr_pages) - return -ENOMEM; - - /* NB. avoid thundering herds of tapdisks colliding. */ - spin_lock(&pool->lock); - - if (mem->curr_nr < nr_pages) { - spin_unlock(&pool->lock); - return -ENOMEM; - } - - while (request->nr_pages < nr_pages) { - page = mempool_alloc(mem, GFP_NOWAIT); - BUG_ON(!page); - request->pages[request->nr_pages++] = page; - } - - spin_unlock(&pool->lock); - - return 0; -} - -static void -blktap_request_put_pages(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_page_pool *pool = tap->pool; - struct page *page; - - while (request->nr_pages) { - page = request->pages[--request->nr_pages]; - mempool_free(page, pool->bufs); - } -} - -size_t -blktap_request_debug(struct blktap *tap, char *buf, size_t size) -{ - struct blktap_page_pool *pool = tap->pool; - mempool_t *mem = pool->bufs; - char *s = buf, *end = buf + size; - - s += snprintf(buf, end - s, - "pool:%s pages:%d free:%d\n", - kobject_name(&pool->kobj), - mem->min_nr, mem->curr_nr); - - return s - buf; -} - -struct blktap_request* -blktap_request_alloc(struct blktap *tap) -{ - struct blktap_request *request; - - request = mempool_alloc(request_pool, GFP_NOWAIT); - if (request) - request->tap = tap; - - return request; -} - -void -blktap_request_free(struct blktap *tap, - struct blktap_request *request) -{ - blktap_request_put_pages(tap, request); - - mempool_free(request, request_pool); - - __page_pool_wake(tap->pool); -} - -void -blktap_request_bounce(struct blktap *tap, - struct blktap_request *request, - int seg, int write) -{ - struct scatterlist *sg = &request->sg_table[seg]; - void *s, *p; - - BUG_ON(seg >= request->nr_pages); - - s = sg_virt(sg); - p = page_address(request->pages[seg]) + sg->offset; - - if (write) - memcpy(p, s, sg->length); - else - memcpy(s, p, sg->length); -} - -static void -blktap_request_ctor(void *obj) -{ - struct blktap_request *request = obj; - - memset(request, 0, sizeof(*request)); - sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table)); -} - -static int -blktap_page_pool_resize(struct blktap_page_pool *pool, int target) -{ - mempool_t *bufs = pool->bufs; - int err; - - /* NB. mempool asserts min_nr >= 1 */ - target = max(1, target); - - err = mempool_resize(bufs, target, GFP_KERNEL); - if (err) - return err; - - __page_pool_wake(pool); - - return 0; -} - -struct pool_attribute { - struct attribute attr; - - ssize_t (*show)(struct blktap_page_pool *pool, - char *buf); - - ssize_t (*store)(struct blktap_page_pool *pool, - const char *buf, size_t count); -}; - -#define kattr_to_pool_attr(_kattr) \ - container_of(_kattr, struct pool_attribute, attr) - -static ssize_t -blktap_page_pool_show_size(struct blktap_page_pool *pool, - char *buf) -{ - mempool_t *mem = pool->bufs; - return sprintf(buf, "%d", mem->min_nr); -} - -static ssize_t -blktap_page_pool_store_size(struct blktap_page_pool *pool, - const char *buf, size_t size) -{ - int target; - - /* - * NB. target fixup to avoid undesired results. less than a - * full segment set can wedge the disk. much more than a - * couple times the physical queue depth is rarely useful. - */ - - target = simple_strtoul(buf, NULL, 0); - target = max(POOL_MAX_REQUEST_PAGES, target); - target = min(target, POOL_MAX_PAGES); - - return blktap_page_pool_resize(pool, target) ? : size; -} - -static struct pool_attribute blktap_page_pool_attr_size - __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH, - blktap_page_pool_show_size, - blktap_page_pool_store_size); - -static ssize_t -blktap_page_pool_show_free(struct blktap_page_pool *pool, - char *buf) -{ - mempool_t *mem = pool->bufs; - return sprintf(buf, "%d", mem->curr_nr); -} - -static struct pool_attribute blktap_page_pool_attr_free - __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH, - blktap_page_pool_show_free, - NULL); - -static struct attribute *blktap_page_pool_attrs[] = { - &blktap_page_pool_attr_size.attr, - &blktap_page_pool_attr_free.attr, - NULL, -}; - -static inline struct kobject* -__blktap_kset_find_obj(struct kset *kset, const char *name) -{ - struct kobject *k; - struct kobject *ret = NULL; - - spin_lock(&kset->list_lock); - list_for_each_entry(k, &kset->list, entry) { - if (kobject_name(k) && !strcmp(kobject_name(k), name)) { - ret = kobject_get(k); - break; - } - } - spin_unlock(&kset->list_lock); - return ret; -} - -static ssize_t -blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr, - char *buf) -{ - struct blktap_page_pool *pool = kobj_to_pool(kobj); - struct pool_attribute *attr = kattr_to_pool_attr(kattr); - - if (attr->show) - return attr->show(pool, buf); - - return -EIO; -} - -static ssize_t -blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr, - const char *buf, size_t size) -{ - struct blktap_page_pool *pool = kobj_to_pool(kobj); - struct pool_attribute *attr = kattr_to_pool_attr(kattr); - - if (attr->show) - return attr->store(pool, buf, size); - - return -EIO; -} - -static struct sysfs_ops blktap_page_pool_sysfs_ops = { - .show = blktap_page_pool_show_attr, - .store = blktap_page_pool_store_attr, -}; - -static void -blktap_page_pool_release(struct kobject *kobj) -{ - struct blktap_page_pool *pool = kobj_to_pool(kobj); - mempool_destroy(pool->bufs); - kfree(pool); -} - -struct kobj_type blktap_page_pool_ktype = { - .release = blktap_page_pool_release, - .sysfs_ops = &blktap_page_pool_sysfs_ops, - .default_attrs = blktap_page_pool_attrs, -}; - -static void* -__mempool_page_alloc(gfp_t gfp_mask, void *pool_data) -{ - struct page *page; - - if (!(gfp_mask & __GFP_WAIT)) - return NULL; - - page = alloc_page(gfp_mask); - if (page) - SetPageReserved(page); - - return page; -} - -static void -__mempool_page_free(void *element, void *pool_data) -{ - struct page *page = element; - - ClearPageReserved(page); - put_page(page); -} - -static struct kobject* -blktap_page_pool_create(const char *name, int nr_pages) -{ - struct blktap_page_pool *pool; - int err; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); - if (!pool) - goto fail; - - spin_lock_init(&pool->lock); - init_waitqueue_head(&pool->wait); - - pool->bufs = mempool_create(nr_pages, - __mempool_page_alloc, __mempool_page_free, - pool); - if (!pool->bufs) - goto fail_pool; - - kobject_init(&pool->kobj, &blktap_page_pool_ktype); - pool->kobj.kset = pool_set; - err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name); - if (err) - goto fail_bufs; - - return &pool->kobj; - - kobject_del(&pool->kobj); -fail_bufs: - mempool_destroy(pool->bufs); -fail_pool: - kfree(pool); -fail: - return NULL; -} - -struct blktap_page_pool* -blktap_page_pool_get(const char *name) -{ - struct kobject *kobj; - - kobj = __blktap_kset_find_obj(pool_set, name); - if (!kobj) - kobj = blktap_page_pool_create(name, - POOL_DEFAULT_PAGES); - if (!kobj) - return ERR_PTR(-ENOMEM); - - return kobj_to_pool(kobj); -} - -int __init -blktap_page_pool_init(struct kobject *parent) -{ - request_cache - kmem_cache_create("blktap-request", - sizeof(struct blktap_request), 0, - 0, blktap_request_ctor); - if (!request_cache) - return -ENOMEM; - - request_pool - mempool_create_slab_pool(POOL_MIN_REQS, request_cache); - if (!request_pool) - return -ENOMEM; - - pool_set = kset_create_and_add("pools", NULL, parent); - if (!pool_set) - return -ENOMEM; - - return 0; -} - -void -blktap_page_pool_exit(void) -{ - if (pool_set) { - BUG_ON(!list_empty(&pool_set->list)); - kset_unregister(pool_set); - pool_set = NULL; - } - - if (request_pool) { - mempool_destroy(request_pool); - request_pool = NULL; - } - - if (request_cache) { - kmem_cache_destroy(request_cache); - request_cache = NULL; - } -} diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c deleted file mode 100644 index 635f1fd..0000000 --- a/drivers/xen/blktap/ring.c +++ /dev/null @@ -1,595 +0,0 @@ - -#include <linux/device.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/poll.h> -#include <linux/blkdev.h> - -#include "blktap.h" - -int blktap_ring_major; -static struct cdev blktap_ring_cdev; - - /* - * BLKTAP - immediately before the mmap area, - * we have a bunch of pages reserved for shared memory rings. - */ -#define RING_PAGES 1 - -#define BLKTAP_INFO_SIZE_AT(_memb) \ - offsetof(struct blktap_device_info, _memb) + \ - sizeof(((struct blktap_device_info*)0)->_memb) - -static void -blktap_ring_read_response(struct blktap *tap, - const blktap_ring_rsp_t *rsp) -{ - struct blktap_ring *ring = &tap->ring; - struct blktap_request *request; - int usr_idx, err; - - request = NULL; - - usr_idx = rsp->id; - if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { - err = -ERANGE; - goto invalid; - } - - request = ring->pending[usr_idx]; - - if (!request) { - err = -ESRCH; - goto invalid; - } - - if (rsp->operation != request->operation) { - err = -EINVAL; - goto invalid; - } - - dev_dbg(ring->dev, - "request %d [%p] response: %d\n", - request->usr_idx, request, rsp->status); - - err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; -end_request: - blktap_device_end_request(tap, request, err); - return; - -invalid: - dev_warn(ring->dev, - "invalid response, idx:%d status:%d op:%d/%d: err %d\n", - usr_idx, rsp->status, - rsp->operation, request->operation, - err); - if (request) - goto end_request; -} - -static void -blktap_read_ring(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - blktap_ring_rsp_t rsp; - RING_IDX rc, rp; - - down_read(¤t->mm->mmap_sem); - if (!ring->vma) { - up_read(¤t->mm->mmap_sem); - return; - } - - /* for each outstanding message on the ring */ - rp = ring->ring.sring->rsp_prod; - rmb(); - - for (rc = ring->ring.rsp_cons; rc != rp; rc++) { - memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp)); - blktap_ring_read_response(tap, &rsp); - } - - ring->ring.rsp_cons = rc; - - up_read(¤t->mm->mmap_sem); -} - -#define MMAP_VADDR(_start, _req, _seg) \ - ((_start) + \ - ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ - ((_seg) * BLKTAP_PAGE_SIZE)) - -static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - return VM_FAULT_SIGBUS; -} - -static void -blktap_ring_fail_pending(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct blktap_request *request; - int usr_idx; - - for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { - request = ring->pending[usr_idx]; - if (!request) - continue; - - blktap_device_end_request(tap, request, -EIO); - } -} - -static void -blktap_ring_vm_close(struct vm_area_struct *vma) -{ - struct blktap *tap = vma->vm_private_data; - struct blktap_ring *ring = &tap->ring; - struct page *page = virt_to_page(ring->ring.sring); - - blktap_ring_fail_pending(tap); - - zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); - ClearPageReserved(page); - __free_page(page); - - ring->vma = NULL; - - if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - blktap_control_destroy_tap(tap); -} - -static struct vm_operations_struct blktap_ring_vm_operations = { - .close = blktap_ring_vm_close, - .fault = blktap_ring_fault, -}; - -int -blktap_ring_map_segment(struct blktap *tap, - struct blktap_request *request, - int seg) -{ - struct blktap_ring *ring = &tap->ring; - unsigned long uaddr; - - uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg); - return vm_insert_page(ring->vma, uaddr, request->pages[seg]); -} - -int -blktap_ring_map_request(struct blktap *tap, - struct blktap_request *request) -{ - int seg, err = 0; - int write; - - write = request->operation == BLKTAP_OP_WRITE; - - for (seg = 0; seg < request->nr_pages; seg++) { - if (write) - blktap_request_bounce(tap, request, seg, write); - - err = blktap_ring_map_segment(tap, request, seg); - if (err) - break; - } - - if (err) - blktap_ring_unmap_request(tap, request); - - return err; -} - -void -blktap_ring_unmap_request(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_ring *ring = &tap->ring; - unsigned long uaddr; - unsigned size; - int seg, read; - - uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); - size = request->nr_pages << PAGE_SHIFT; - read = request->operation == BLKTAP_OP_READ; - - if (read) - for (seg = 0; seg < request->nr_pages; seg++) - blktap_request_bounce(tap, request, seg, !read); - - zap_page_range(ring->vma, uaddr, size, NULL); -} - -void -blktap_ring_free_request(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_ring *ring = &tap->ring; - - ring->pending[request->usr_idx] = NULL; - ring->n_pending--; - - blktap_request_free(tap, request); -} - -struct blktap_request* -blktap_ring_make_request(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct blktap_request *request; - int usr_idx; - - if (RING_FULL(&ring->ring)) - return ERR_PTR(-ENOSPC); - - request = blktap_request_alloc(tap); - if (!request) - return ERR_PTR(-ENOMEM); - - for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) - if (!ring->pending[usr_idx]) - break; - - BUG_ON(usr_idx >= BLKTAP_RING_SIZE); - - request->tap = tap; - request->usr_idx = usr_idx; - - ring->pending[usr_idx] = request; - ring->n_pending++; - - return request; -} - -void -blktap_ring_submit_request(struct blktap *tap, - struct blktap_request *request) -{ - struct blktap_ring *ring = &tap->ring; - blktap_ring_req_t *breq; - struct scatterlist *sg; - int i, nsecs = 0; - - dev_dbg(ring->dev, - "request %d [%p] submit\n", request->usr_idx, request); - - breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt); - - breq->id = request->usr_idx; - breq->sector_number = blk_rq_pos(request->rq); - breq->__pad = 0; - breq->operation = request->operation; - breq->nr_segments = request->nr_pages; - - blktap_for_each_sg(sg, request, i) { - struct blktap_segment *seg = &breq->seg[i]; - int first, count; - - count = sg->length >> 9; - first = sg->offset >> 9; - - seg->first_sect = first; - seg->last_sect = first + count - 1; - - nsecs += count; - } - - ring->ring.req_prod_pvt++; - - do_gettimeofday(&request->time); - - - if (request->operation == BLKTAP_OP_WRITE) { - tap->stats.st_wr_sect += nsecs; - tap->stats.st_wr_req++; - } - - if (request->operation == BLKTAP_OP_READ) { - tap->stats.st_rd_sect += nsecs; - tap->stats.st_rd_req++; - } -} - -static int -blktap_ring_open(struct inode *inode, struct file *filp) -{ - struct blktap *tap = NULL; - int minor; - - minor = iminor(inode); - - if (minor < blktap_max_minor) - tap = blktaps[minor]; - - if (!tap) - return -ENXIO; - - if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - return -ENXIO; - - if (tap->ring.task) - return -EBUSY; - - filp->private_data = tap; - tap->ring.task = current; - - return 0; -} - -static int -blktap_ring_release(struct inode *inode, struct file *filp) -{ - struct blktap *tap = filp->private_data; - - blktap_device_destroy_sync(tap); - - tap->ring.task = NULL; - - if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - blktap_control_destroy_tap(tap); - - return 0; -} - -static int -blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct blktap *tap = filp->private_data; - struct blktap_ring *ring = &tap->ring; - blktap_sring_t *sring; - struct page *page = NULL; - int err; - - if (ring->vma) - return -EBUSY; - - page = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!page) - return -ENOMEM; - - SetPageReserved(page); - - err = vm_insert_page(vma, vma->vm_start, page); - if (err) - goto fail; - - sring = page_address(page); - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE); - - ring->ring_vstart = vma->vm_start; - ring->user_vstart = ring->ring_vstart + PAGE_SIZE; - - vma->vm_private_data = tap; - - vma->vm_flags |= VM_DONTCOPY; - vma->vm_flags |= VM_RESERVED; - - vma->vm_ops = &blktap_ring_vm_operations; - - ring->vma = vma; - return 0; - -fail: - if (page) { - zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL); - ClearPageReserved(page); - __free_page(page); - } - - return err; -} - -static int -blktap_ring_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct blktap *tap = filp->private_data; - struct blktap_ring *ring = &tap->ring; - void __user *ptr = (void *)arg; - int err; - - BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg); - - if (!ring->vma || ring->vma->vm_mm != current->mm) - return -EACCES; - - switch(cmd) { - case BLKTAP_IOCTL_RESPOND: - - blktap_read_ring(tap); - return 0; - - case BLKTAP_IOCTL_CREATE_DEVICE_COMPAT: { - struct blktap_device_info info; - struct blktap2_params params; - - if (copy_from_user(¶ms, ptr, sizeof(params))) - return -EFAULT; - - info.capacity = params.capacity; - info.sector_size = params.sector_size; - info.flags = 0; - - err = blktap_device_create(tap, &info); - if (err) - return err; - - if (params.name[0]) { - strncpy(tap->name, params.name, sizeof(params.name)); - tap->name[sizeof(tap->name)-1] = 0; - } - - return 0; - } - - case BLKTAP_IOCTL_CREATE_DEVICE: { - struct blktap_device_info __user *ptr = (void *)arg; - struct blktap_device_info info; - unsigned long mask; - size_t base_sz, sz; - - mask = BLKTAP_DEVICE_FLAG_RO; - - memset(&info, 0, sizeof(info)); - sz = base_sz = BLKTAP_INFO_SIZE_AT(flags); - - if (copy_from_user(&info, ptr, sz)) - return -EFAULT; - - if (sz > base_sz) - if (copy_from_user(&info, ptr, sz)) - return -EFAULT; - - if (put_user(info.flags & mask, &ptr->flags)) - return -EFAULT; - - return blktap_device_create(tap, &info); - } - - case BLKTAP_IOCTL_REMOVE_DEVICE: - - return blktap_device_destroy(tap); - } - - return -ENOIOCTLCMD; -} - -static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait) -{ - struct blktap *tap = filp->private_data; - struct blktap_ring *ring = &tap->ring; - int work; - - poll_wait(filp, &tap->pool->wait, wait); - poll_wait(filp, &ring->poll_wait, wait); - - down_read(¤t->mm->mmap_sem); - if (ring->vma && tap->device.gd) - blktap_device_run_queue(tap); - up_read(¤t->mm->mmap_sem); - - work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod; - RING_PUSH_REQUESTS(&ring->ring); - - if (work || - ring->ring.sring->private.tapif_user.msg || - test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse)) - return POLLIN | POLLRDNORM; - - return 0; -} - -static struct file_operations blktap_ring_file_operations = { - .owner = THIS_MODULE, - .open = blktap_ring_open, - .release = blktap_ring_release, - .ioctl = blktap_ring_ioctl, - .mmap = blktap_ring_mmap, - .poll = blktap_ring_poll, -}; - -void -blktap_ring_kick_user(struct blktap *tap) -{ - wake_up(&tap->ring.poll_wait); -} - -int -blktap_ring_destroy(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - - if (ring->task || ring->vma) - return -EBUSY; - - return 0; -} - -int -blktap_ring_create(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - - init_waitqueue_head(&ring->poll_wait); - ring->devno = MKDEV(blktap_ring_major, tap->minor); - - return 0; -} - -size_t -blktap_ring_debug(struct blktap *tap, char *buf, size_t size) -{ - struct blktap_ring *ring = &tap->ring; - char *s = buf, *end = buf + size; - int usr_idx; - - s += snprintf(s, end - s, - "begin pending:%d\n", ring->n_pending); - - for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { - struct blktap_request *request; - struct timeval *time; - int write; - - request = ring->pending[usr_idx]; - if (!request) - continue; - - write = request->operation == BLKTAP_OP_WRITE; - time = &request->time; - - s += snprintf(s, end - s, - "%02d: usr_idx:%02d " - "op:%c nr_pages:%02d time:%lu.%09lu\n", - usr_idx, request->usr_idx, - write ? ''W'' : ''R'', request->nr_pages, - time->tv_sec, time->tv_usec); - } - - s += snprintf(s, end - s, "end pending\n"); - - return s - buf; -} - - -int __init -blktap_ring_init(void) -{ - dev_t dev = 0; - int err; - - cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations); - blktap_ring_cdev.owner = THIS_MODULE; - - err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2"); - if (err < 0) { - BTERR("error registering ring devices: %d\n", err); - return err; - } - - err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE); - if (err) { - BTERR("error adding ring device: %d\n", err); - unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE); - return err; - } - - blktap_ring_major = MAJOR(dev); - BTINFO("blktap ring major: %d\n", blktap_ring_major); - - return 0; -} - -void -blktap_ring_exit(void) -{ - if (!blktap_ring_major) - return; - - cdev_del(&blktap_ring_cdev); - unregister_chrdev_region(MKDEV(blktap_ring_major, 0), - MAX_BLKTAP_DEVICE); - - blktap_ring_major = 0; -} diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c deleted file mode 100644 index 182de9a..0000000 --- a/drivers/xen/blktap/sysfs.c +++ /dev/null @@ -1,288 +0,0 @@ -#include <linux/types.h> -#include <linux/device.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/genhd.h> -#include <linux/blkdev.h> - -#include "blktap.h" - -int blktap_debug_level = 1; - -static struct class *class; - -static ssize_t -blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) -{ - struct blktap *tap; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - if (size >= BLKTAP_NAME_MAX) - return -ENAMETOOLONG; - - if (strnlen(buf, size) != size) - return -EINVAL; - - strcpy(tap->name, buf); - - return size; -} - -static ssize_t -blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct blktap *tap; - ssize_t size; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - if (tap->name[0]) - size = sprintf(buf, "%s\n", tap->name); - else - size = sprintf(buf, "%d\n", tap->minor); - - return size; -} -static DEVICE_ATTR(name, S_IRUGO|S_IWUSR, - blktap_sysfs_get_name, blktap_sysfs_set_name); - -static void -blktap_sysfs_remove_work(struct work_struct *work) -{ - struct blktap *tap - = container_of(work, struct blktap, remove_work); - blktap_control_destroy_tap(tap); -} - -static ssize_t -blktap_sysfs_remove_device(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct blktap *tap; - int err; - - tap = dev_get_drvdata(dev); - if (!tap) - return size; - - if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) - goto wait; - - if (tap->ring.vma) { - blktap_sring_t *sring = tap->ring.ring.sring; - sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; - blktap_ring_kick_user(tap); - } else { - INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); - schedule_work(&tap->remove_work); - } -wait: - err = wait_event_interruptible(tap->remove_wait, - !dev_get_drvdata(dev)); - if (err) - return err; - - return size; -} -static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device); - -static ssize_t -blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct blktap *tap; - char *s = buf, *end = buf + PAGE_SIZE; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - s += blktap_control_debug(tap, s, end - s); - - s += blktap_request_debug(tap, s, end - s); - - s += blktap_device_debug(tap, s, end - s); - - s += blktap_ring_debug(tap, s, end - s); - - return s - buf; -} -static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL); - -static ssize_t -blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct blktap *tap; - ssize_t rv = 0; - - tap = dev_get_drvdata(dev); - if (!tap) - return 0; - - if (tap->ring.task) - rv = sprintf(buf, "%d\n", tap->ring.task->pid); - - return rv; -} -static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL); - -static ssize_t -blktap_sysfs_show_pool(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct blktap *tap = dev_get_drvdata(dev); - return sprintf(buf, "%s", kobject_name(&tap->pool->kobj)); -} - -static ssize_t -blktap_sysfs_store_pool(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - struct blktap *tap = dev_get_drvdata(dev); - struct blktap_page_pool *pool, *tmp = tap->pool; - - if (tap->device.gd) - return -EBUSY; - - pool = blktap_page_pool_get(buf); - if (IS_ERR(pool)) - return PTR_ERR(pool); - - tap->pool = pool; - kobject_put(&tmp->kobj); - - return size; -} -DEVICE_ATTR(pool, S_IRUSR|S_IWUSR, - blktap_sysfs_show_pool, blktap_sysfs_store_pool); - -int -blktap_sysfs_create(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct device *dev; - int err = 0; - - init_waitqueue_head(&tap->remove_wait); - - dev = device_create(class, NULL, ring->devno, - tap, "blktap%d", tap->minor); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - if (!err) - err = device_create_file(dev, &dev_attr_name); - if (!err) - err = device_create_file(dev, &dev_attr_remove); - if (!err) - err = device_create_file(dev, &dev_attr_debug); - if (!err) - err = device_create_file(dev, &dev_attr_task); - if (!err) - err = device_create_file(dev, &dev_attr_pool); - if (!err) - ring->dev = dev; - else - device_unregister(dev); - - return err; -} - -void -blktap_sysfs_destroy(struct blktap *tap) -{ - struct blktap_ring *ring = &tap->ring; - struct device *dev; - - dev = ring->dev; - - if (!dev) - return; - - dev_set_drvdata(dev, NULL); - wake_up(&tap->remove_wait); - - device_unregister(dev); - ring->dev = NULL; -} - -static ssize_t -blktap_sysfs_show_verbosity(struct class *class, char *buf) -{ - return sprintf(buf, "%d\n", blktap_debug_level); -} - -static ssize_t -blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size) -{ - int level; - - if (sscanf(buf, "%d", &level) == 1) { - blktap_debug_level = level; - return size; - } - - return -EINVAL; -} -static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR, - blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity); - -static ssize_t -blktap_sysfs_show_devices(struct class *class, char *buf) -{ - int i, ret; - struct blktap *tap; - - mutex_lock(&blktap_lock); - - ret = 0; - for (i = 0; i < blktap_max_minor; i++) { - tap = blktaps[i]; - if (!tap) - continue; - - if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse)) - continue; - - ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name); - } - - mutex_unlock(&blktap_lock); - - return ret; -} -static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL); - -void -blktap_sysfs_exit(void) -{ - if (class) - class_destroy(class); -} - -int __init -blktap_sysfs_init(void) -{ - struct class *cls; - int err = 0; - - cls = class_create(THIS_MODULE, "blktap2"); - if (IS_ERR(cls)) - err = PTR_ERR(cls); - if (!err) - err = class_create_file(cls, &class_attr_verbosity); - if (!err) - err = class_create_file(cls, &class_attr_devices); - if (!err) - class = cls; - else - class_destroy(cls); - - return err; -} -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 04/10] blktap: Fix reference to freed struct request.
The request will be freed by the call to __blktap_end_rq(), so rq->q is invalid before spin_unlock_irq(). Signed-off-by: Dominic Curran <dominic.curran@citrix.com> Acked-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/blktap/device.c | 6 ++++-- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c index 9a09457..8caff9f 100644 --- a/drivers/block/blktap/device.c +++ b/drivers/block/blktap/device.c @@ -135,9 +135,11 @@ __blktap_end_rq(struct request *rq, int err) static inline void blktap_end_rq(struct request *rq, int err) { - spin_lock_irq(rq->q->queue_lock); + struct request_queue *q = rq->q; + + spin_lock_irq(q->queue_lock); __blktap_end_rq(rq, err); - spin_unlock_irq(rq->q->queue_lock); + spin_unlock_irq(q->queue_lock); } void -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 05/10] blktap: Support non-R/W requests
Special cases segment vector writes on the blktap ring. Anticipate non-fs requests on the queue. Add some switches, for trim or flush operations to settle. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/blktap/device.c | 24 ++++++-------- drivers/block/blktap/ring.c | 68 +++++++++++++++++++++++++--------------- 2 files changed, 53 insertions(+), 39 deletions(-) diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c index 8caff9f..fcf16c9 100644 --- a/drivers/block/blktap/device.c +++ b/drivers/block/blktap/device.c @@ -166,7 +166,7 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) { struct blktap_device *tapdev = &tap->device; struct blktap_request *request; - int write, nsegs; + int nsegs; int err; request = blktap_ring_make_request(tap); @@ -180,15 +180,17 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) goto fail; } - write = rq_data_dir(rq) == WRITE; + if (!blk_fs_request(rq)) { + err = -EOPNOTSUPP; + goto fail; + } + nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table); - dev_dbg(disk_to_dev(tapdev->gd), - "make_request: op=%c bytes=%d nsegs=%d\n", - write ? ''w'' : ''r'', blk_rq_bytes(rq), nsegs); - - request->rq = rq; - request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; + if (rq_data_dir(rq) == WRITE) + request->operation = BLKTAP_OP_WRITE; + else + request->operation = BLKTAP_OP_READ; err = blktap_request_get_pages(tap, request, nsegs); if (err) @@ -198,6 +200,7 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) if (err) goto fail; + request->rq = rq; blktap_ring_submit_request(tap, request); return 0; @@ -242,11 +245,6 @@ blktap_device_run_queue(struct blktap *tap) if (!rq) break; - if (!blk_fs_request(rq)) { - __blktap_end_queued_rq(rq, -EOPNOTSUPP); - continue; - } - spin_unlock_irq(&tapdev->lock); err = blktap_device_make_request(tap, rq); diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c index 635f1fd..bae6f82 100644 --- a/drivers/block/blktap/ring.c +++ b/drivers/block/blktap/ring.c @@ -241,14 +241,39 @@ blktap_ring_make_request(struct blktap *tap) return request; } +static int +blktap_ring_make_rw_request(struct blktap *tap, + struct blktap_request *request, + struct blktap_ring_request *breq) +{ + struct scatterlist *sg; + unsigned int i, nsecs = 0; + + blktap_for_each_sg(sg, request, i) { + struct blktap_segment *seg = &breq->seg[i]; + int first, count; + + count = sg->length >> 9; + first = sg->offset >> 9; + + seg->first_sect = first; + seg->last_sect = first + count - 1; + + nsecs += count; + } + + breq->sector_number = blk_rq_pos(request->rq); + + return nsecs; +} + void blktap_ring_submit_request(struct blktap *tap, struct blktap_request *request) { struct blktap_ring *ring = &tap->ring; blktap_ring_req_t *breq; - struct scatterlist *sg; - int i, nsecs = 0; + int nsecs; dev_dbg(ring->dev, "request %d [%p] submit\n", request->usr_idx, request); @@ -256,38 +281,31 @@ blktap_ring_submit_request(struct blktap *tap, breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt); breq->id = request->usr_idx; - breq->sector_number = blk_rq_pos(request->rq); breq->__pad = 0; breq->operation = request->operation; breq->nr_segments = request->nr_pages; - blktap_for_each_sg(sg, request, i) { - struct blktap_segment *seg = &breq->seg[i]; - int first, count; + switch (breq->operation) { + case BLKTAP_OP_READ: + nsecs = blktap_ring_make_rw_request(tap, request, breq); - count = sg->length >> 9; - first = sg->offset >> 9; + tap->stats.st_rd_sect += nsecs; + tap->stats.st_rd_req++; + break; - seg->first_sect = first; - seg->last_sect = first + count - 1; + case BLKTAP_OP_WRITE: + nsecs = blktap_ring_make_rw_request(tap, request, breq); - nsecs += count; + tap->stats.st_wr_sect += nsecs; + tap->stats.st_wr_req++; + break; + default: + BUG(); } ring->ring.req_prod_pvt++; do_gettimeofday(&request->time); - - - if (request->operation == BLKTAP_OP_WRITE) { - tap->stats.st_wr_sect += nsecs; - tap->stats.st_wr_req++; - } - - if (request->operation == BLKTAP_OP_READ) { - tap->stats.st_rd_sect += nsecs; - tap->stats.st_rd_req++; - } } static int @@ -530,20 +548,18 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { struct blktap_request *request; struct timeval *time; - int write; request = ring->pending[usr_idx]; if (!request) continue; - write = request->operation == BLKTAP_OP_WRITE; time = &request->time; s += snprintf(s, end - s, "%02d: usr_idx:%02d " - "op:%c nr_pages:%02d time:%lu.%09lu\n", + "op:%x nr_pages:%02d time:%lu.%09lu\n", usr_idx, request->usr_idx, - write ? ''W'' : ''R'', request->nr_pages, + request->operation, request->nr_pages, time->tv_sec, time->tv_usec); } -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 06/10] blktap: Drop the ring message timestamp.
Using gettimeofday() seems overkill. The debug node now just dumps struct request age at jiffy resolution, not time of ring submission. Looks ok because we never really differentiated from block request time anyway. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/blktap/blktap.h | 1 - drivers/block/blktap/ring.c | 8 +++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/block/blktap/blktap.h b/drivers/block/blktap/blktap.h index 1318cad..05cfd1f 100644 --- a/drivers/block/blktap/blktap.h +++ b/drivers/block/blktap/blktap.h @@ -78,7 +78,6 @@ struct blktap_request { int usr_idx; int operation; - struct timeval time; struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; struct page *pages[BLKTAP_SEGMENT_MAX]; diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c index bae6f82..e667d2d 100644 --- a/drivers/block/blktap/ring.c +++ b/drivers/block/blktap/ring.c @@ -304,8 +304,6 @@ blktap_ring_submit_request(struct blktap *tap, } ring->ring.req_prod_pvt++; - - do_gettimeofday(&request->time); } static int @@ -547,20 +545,20 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { struct blktap_request *request; - struct timeval *time; + struct timeval t; request = ring->pending[usr_idx]; if (!request) continue; - time = &request->time; + jiffies_to_timeval(jiffies - request->rq->start_time, &t); s += snprintf(s, end - s, "%02d: usr_idx:%02d " "op:%x nr_pages:%02d time:%lu.%09lu\n", usr_idx, request->usr_idx, request->operation, request->nr_pages, - time->tv_sec, time->tv_usec); + t.tv_sec, t.tv_usec); } s += snprintf(s, end - s, "end pending\n"); -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 07/10] blktap: Avoid some header quirks.
- Include linux/slab.h where needed. - Avoid the ring typedefs, use struct blktap_x instead. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/blktap/blktap.h | 2 +- drivers/block/blktap/control.c | 1 + drivers/block/blktap/request.c | 1 + drivers/block/blktap/ring.c | 2 +- drivers/block/blktap/sysfs.c | 2 +- 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/blktap/blktap.h b/drivers/block/blktap/blktap.h index 05cfd1f..e0f7d99 100644 --- a/drivers/block/blktap/blktap.h +++ b/drivers/block/blktap/blktap.h @@ -44,7 +44,7 @@ struct blktap_ring { struct task_struct *task; struct vm_area_struct *vma; - blktap_front_ring_t ring; + struct blktap_front_ring ring; unsigned long ring_vstart; unsigned long user_vstart; diff --git a/drivers/block/blktap/control.c b/drivers/block/blktap/control.c index 57b1a10..9294a13 100644 --- a/drivers/block/blktap/control.c +++ b/drivers/block/blktap/control.c @@ -2,6 +2,7 @@ #include <linux/sched.h> #include <linux/miscdevice.h> #include <linux/device.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include "blktap.h" diff --git a/drivers/block/blktap/request.c b/drivers/block/blktap/request.c index 8cfd6c9..e799d08 100644 --- a/drivers/block/blktap/request.c +++ b/drivers/block/blktap/request.c @@ -3,6 +3,7 @@ #include <linux/mutex.h> #include <linux/sched.h> #include <linux/device.h> +#include <linux/slab.h> #include "blktap.h" diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c index e667d2d..134583d 100644 --- a/drivers/block/blktap/ring.c +++ b/drivers/block/blktap/ring.c @@ -352,7 +352,7 @@ blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) { struct blktap *tap = filp->private_data; struct blktap_ring *ring = &tap->ring; - blktap_sring_t *sring; + struct blktap_sring *sring; struct page *page = NULL; int err; diff --git a/drivers/block/blktap/sysfs.c b/drivers/block/blktap/sysfs.c index 182de9a..97ebc53 100644 --- a/drivers/block/blktap/sysfs.c +++ b/drivers/block/blktap/sysfs.c @@ -75,7 +75,7 @@ blktap_sysfs_remove_device(struct device *dev, goto wait; if (tap->ring.vma) { - blktap_sring_t *sring = tap->ring.ring.sring; + struct blktap_sring *sring = tap->ring.ring.sring; sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; blktap_ring_kick_user(tap); } else { -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 08/10] blktap: Set up physical sector size and alignment offsets.
Misaligned access wrt to physical block size is a major performance killer. Alignment offsets is for devices which are ''naturally misaligned'', e.g. to compensate for stuff like DOS 63S/track partitioning. Accept a BLKTAP_DEVICE_FLAG_PSZ here, and expect userspace to figure out the physical details. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/blktap/device.c | 37 +++++++++++++++++++++++++++++++++---- drivers/block/blktap/ring.c | 4 ++++ include/linux/blktap.h | 3 +++ 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c index fcf16c9..0e8eb0a 100644 --- a/drivers/block/blktap/device.c +++ b/drivers/block/blktap/device.c @@ -285,10 +285,13 @@ blktap_device_configure(struct blktap *tap, set_capacity(gd, info->capacity); set_disk_ro(gd, !!(info->flags & BLKTAP_DEVICE_FLAG_RO)); - /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, info->sector_size); blk_queue_max_sectors(rq, 512); + /* Hard sector size and alignment in hardware */ + blk_queue_physical_block_size(rq, info->phys_sector_size); + blk_queue_alignment_offset(rq, info->phys_sector_offset); + /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); blk_queue_max_segment_size(rq, PAGE_SIZE); @@ -309,6 +312,7 @@ blktap_device_validate_info(struct blktap *tap, struct blktap_device_info *info) { struct device *dev = tap->ring.dev; + unsigned int phys_mask; int sector_order; sector_order = ffs(info->sector_size) - 1; @@ -321,11 +325,34 @@ blktap_device_validate_info(struct blktap *tap, (info->capacity > ULLONG_MAX >> sector_order)) goto fail; + /* physical blocks default to logical ones */ + if (!(info->flags & BLKTAP_DEVICE_FLAG_PSZ)) { + info->phys_sector_size = info->sector_size; + info->phys_sector_offset = 0; + } + + /* phys block size is 2^n and >= logical */ + sector_order = ilog2(info->phys_sector_size); + if (sector_order < 9 || + info->phys_sector_size != 1U<<sector_order || + info->phys_sector_size < info->sector_size) + goto fail; + + /* alignment offset < physical/logical */ + phys_mask = (info->phys_sector_size / + info->sector_size) - 1; + if ((info->phys_sector_offset & ~phys_mask) != 0) + goto fail; + return 0; fail: - dev_err(dev, "capacity: %llu, sector-size: %u\n", - info->capacity, info->sector_size); + dev_err(dev, + "capacity: %llu, sector-size: %u/%u, " + "phys-offset: %u\n", + info->capacity, info->sector_size, + info->phys_sector_size, info->phys_sector_offset); + return -EINVAL; } @@ -473,9 +500,11 @@ blktap_device_create(struct blktap *tap, struct blktap_device_info *info) set_bit(BLKTAP_DEVICE, &tap->dev_inuse); - dev_info(disk_to_dev(gd), "sector-size: %u/%u capacity: %llu\n", + dev_info(disk_to_dev(gd), + "sector-size: %u/%u phys-offset: %d capacity: %llu\n", queue_logical_block_size(rq), queue_physical_block_size(rq), + queue_alignment_offset(rq), (unsigned long long)get_capacity(gd)); return 0; diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c index 134583d..25bd311 100644 --- a/drivers/block/blktap/ring.c +++ b/drivers/block/blktap/ring.c @@ -446,6 +446,7 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, size_t base_sz, sz; mask = BLKTAP_DEVICE_FLAG_RO; + mask |= BLKTAP_DEVICE_FLAG_PSZ; memset(&info, 0, sizeof(info)); sz = base_sz = BLKTAP_INFO_SIZE_AT(flags); @@ -453,6 +454,9 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, if (copy_from_user(&info, ptr, sz)) return -EFAULT; + if ((info.flags & BLKTAP_DEVICE_FLAG_PSZ) != 0) + sz = BLKTAP_INFO_SIZE_AT(phys_sector_offset); + if (sz > base_sz) if (copy_from_user(&info, ptr, sz)) return -EFAULT; diff --git a/include/linux/blktap.h b/include/linux/blktap.h index 2c3c924..9a280d9 100644 --- a/include/linux/blktap.h +++ b/include/linux/blktap.h @@ -17,6 +17,7 @@ #define BLKTAP_IOCTL_REMOVE_DEVICE 207 #define BLKTAP_DEVICE_FLAG_RO 0x00000001UL /* disk is R/O */ +#define BLKTAP_DEVICE_FLAG_PSZ 0x00000002UL /* physical sector size */ struct blktap_info { unsigned int ring_major; @@ -28,6 +29,8 @@ struct blktap_device_info { unsigned long long capacity; unsigned int sector_size; unsigned long flags; + unsigned int phys_sector_size; + unsigned int phys_sector_offset; }; /* -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 09/10] blktap: device_info validation fixes.
- Logical block size <= 4k is too paranoid. - Use ilog2 and add more comments. --- drivers/block/blktap/device.c | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c index 0e8eb0a..2379643 100644 --- a/drivers/block/blktap/device.c +++ b/drivers/block/blktap/device.c @@ -315,12 +315,13 @@ blktap_device_validate_info(struct blktap *tap, unsigned int phys_mask; int sector_order; - sector_order = ffs(info->sector_size) - 1; + /* sector size is is 2^(n >= 9) */ + sector_order = ilog2(info->sector_size); if (sector_order < 9 || - sector_order > 12 || info->sector_size != 1U<<sector_order) goto fail; + /* make sure capacity doesn''t overflow */ if (!info->capacity || (info->capacity > ULLONG_MAX >> sector_order)) goto fail; -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 00:42 UTC
[Xen-devel] [PATCH 10/10] blktap: Add BLKTAP_OP_FLUSH command option.
Wants to be enabled by userspace with a device flag. Semantics equal a cache flush, i.e. durability for completed I/O but no particular ordering constraints of in-flight requests. Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/blktap/blktap.h | 1 + drivers/block/blktap/device.c | 30 ++++++++++++++++++++++++++---- drivers/block/blktap/ring.c | 7 +++++++ include/linux/blktap.h | 2 ++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/block/blktap/blktap.h b/drivers/block/blktap/blktap.h index e0f7d99..1b63d0e 100644 --- a/drivers/block/blktap/blktap.h +++ b/drivers/block/blktap/blktap.h @@ -62,6 +62,7 @@ struct blktap_statistics { int st_rd_req; int st_wr_req; int st_oo_req; + int st_fl_req; int st_rd_sect; int st_wr_sect; s64 st_rd_cnt; diff --git a/drivers/block/blktap/device.c b/drivers/block/blktap/device.c index 2379643..74c3daf 100644 --- a/drivers/block/blktap/device.c +++ b/drivers/block/blktap/device.c @@ -161,6 +161,15 @@ blktap_device_end_request(struct blktap *tap, blktap_end_rq(rq, error); } +static void +blktap_device_prepare_flush(struct request_queue *q, struct request *rq) +{ + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->timeout = q->rq_timeout; + rq->cmd[0] = BLKTAP_OP_FLUSH; + rq->cmd_len = 1; +} + int blktap_device_make_request(struct blktap *tap, struct request *rq) { @@ -180,6 +189,12 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) goto fail; } + if (blk_pc_request(rq)) { + request->operation = rq->cmd[0]; + request->nr_pages = 0; + goto submit; + } + if (!blk_fs_request(rq)) { err = -EOPNOTSUPP; goto fail; @@ -200,6 +215,7 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) if (err) goto fail; +submit: request->rq = rq; blktap_ring_submit_request(tap, request); @@ -303,8 +319,12 @@ blktap_device_configure(struct blktap *tap, /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); - /* We are reordering, but cacheless. */ - blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); + /* Enable cache control */ + if (info->flags & BLKTAP_DEVICE_FLAG_FLUSH) + blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN_FLUSH, + blktap_device_prepare_flush); + else + blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); } static int @@ -502,11 +522,13 @@ blktap_device_create(struct blktap *tap, struct blktap_device_info *info) set_bit(BLKTAP_DEVICE, &tap->dev_inuse); dev_info(disk_to_dev(gd), - "sector-size: %u/%u phys-offset: %d capacity: %llu\n", + "sector-size: %u/%u phys-offset: %d capacity: %llu" + " ordered: %#x\n", queue_logical_block_size(rq), queue_physical_block_size(rq), queue_alignment_offset(rq), - (unsigned long long)get_capacity(gd)); + (unsigned long long)get_capacity(gd), + rq->ordered); return 0; diff --git a/drivers/block/blktap/ring.c b/drivers/block/blktap/ring.c index 25bd311..9a7696e 100644 --- a/drivers/block/blktap/ring.c +++ b/drivers/block/blktap/ring.c @@ -299,6 +299,12 @@ blktap_ring_submit_request(struct blktap *tap, tap->stats.st_wr_sect += nsecs; tap->stats.st_wr_req++; break; + + case BLKTAP_OP_FLUSH: + breq->sector_number = 0; + tap->stats.st_fl_req++; + break; + default: BUG(); } @@ -447,6 +453,7 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, mask = BLKTAP_DEVICE_FLAG_RO; mask |= BLKTAP_DEVICE_FLAG_PSZ; + mask |= BLKTAP_DEVICE_FLAG_FLUSH; memset(&info, 0, sizeof(info)); sz = base_sz = BLKTAP_INFO_SIZE_AT(flags); diff --git a/include/linux/blktap.h b/include/linux/blktap.h index 9a280d9..4739320 100644 --- a/include/linux/blktap.h +++ b/include/linux/blktap.h @@ -18,6 +18,7 @@ #define BLKTAP_DEVICE_FLAG_RO 0x00000001UL /* disk is R/O */ #define BLKTAP_DEVICE_FLAG_PSZ 0x00000002UL /* physical sector size */ +#define BLKTAP_DEVICE_FLAG_FLUSH 0x00000004UL /* supports FLUSH */ struct blktap_info { unsigned int ring_major; @@ -54,6 +55,7 @@ struct blktap_segment { #define BLKTAP_OP_READ 0 #define BLKTAP_OP_WRITE 1 +#define BLKTAP_OP_FLUSH 2 #define BLKTAP_SEGMENT_MAX 11 -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2011-Mar-09 10:18 UTC
Re: [Xen-devel] [PATCH 01/10] blktap: Add include/linux/blktap.h
On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote:> Moves blktap2 definitions into a common header file. > > Includes xen/interface/io/ring.h and new ring definitions. Makes > blktap build independently from xen-devel headers. > > New blktap_ring structs are fully congrent to blkif rings, for binary > compat. > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > --- > drivers/xen/blktap/blktap.h | 66 ++++---------------------------- > drivers/xen/blktap/control.c | 14 +++--- > drivers/xen/blktap/device.c | 12 +++--- > drivers/xen/blktap/request.c | 8 ++-- > drivers/xen/blktap/ring.c | 51 ++++++++++++++----------- > drivers/xen/blktap/sysfs.c | 6 +- > include/linux/blktap.h | 85 ++++++++++++++++++++++++++++++++++++++++++This new file defines the kernel<->user (tapdisk process) ring protocol, right? I think its proper home would be under include/xen somewhere, which is where the gntdev and evtchn etc driver interfaces are defined. Where is the canonical definition of this interface stored? In the kernel tree or the hypervisor tree? Ian.> 7 files changed, 142 insertions(+), 100 deletions(-) > create mode 100644 include/linux/blktap.h > > diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h > index fe63fc9..1318cad 100644 > --- a/drivers/xen/blktap/blktap.h > +++ b/drivers/xen/blktap/blktap.h > @@ -6,7 +6,7 @@ > #include <linux/cdev.h> > #include <linux/init.h> > #include <linux/scatterlist.h> > -#include <xen/blkif.h> > +#include <linux/blktap.h> > > extern int blktap_debug_level; > extern int blktap_ring_major; > @@ -30,74 +30,26 @@ extern int blktap_device_major; > #define BLKTAP_DEVICE_CLOSED 5 > #define BLKTAP_SHUTDOWN_REQUESTED 8 > > -/* blktap IOCTLs: */ > -#define BLKTAP2_IOCTL_KICK_FE 1 > -#define BLKTAP2_IOCTL_ALLOC_TAP 200 > -#define BLKTAP2_IOCTL_FREE_TAP 201 > -#define BLKTAP2_IOCTL_CREATE_DEVICE 202 > -#define BLKTAP2_IOCTL_REMOVE_DEVICE 207 > - > -#define BLKTAP2_MAX_MESSAGE_LEN 256 > - > -#define BLKTAP2_RING_MESSAGE_CLOSE 3 > - > #define BLKTAP_REQUEST_FREE 0 > #define BLKTAP_REQUEST_PENDING 1 > > -/* > - * The maximum number of requests that can be outstanding at any time > - * is determined by > - * > - * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] > - * > - * where mmap_alloc < MAX_DYNAMIC_MEM. > - * > - * TODO: > - * mmap_alloc is initialised to 2 and should be adjustable on the fly via > - * sysfs. > - */ > -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) > -#define MAX_DYNAMIC_MEM BLK_RING_SIZE > -#define MAX_PENDING_REQS BLK_RING_SIZE > -#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) > -#define MMAP_VADDR(_start, _req, _seg) \ > - (_start + \ > - ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ > - ((_seg) * PAGE_SIZE)) > - > -struct grant_handle_pair { > - grant_handle_t kernel; > - grant_handle_t user; > -}; > -#define INVALID_GRANT_HANDLE 0xFFFF > - > -struct blktap_handle { > - unsigned int ring; > - unsigned int device; > - unsigned int minor; > -}; > - > -struct blktap_params { > - char name[BLKTAP2_MAX_MESSAGE_LEN]; > - unsigned long long capacity; > - unsigned long sector_size; > -}; > - > struct blktap_device { > spinlock_t lock; > struct gendisk *gd; > }; > > +struct blktap_request; > + > struct blktap_ring { > struct task_struct *task; > > struct vm_area_struct *vma; > - struct blkif_front_ring ring; > + blktap_front_ring_t ring; > unsigned long ring_vstart; > unsigned long user_vstart; > > int n_pending; > - struct blktap_request *pending[MAX_PENDING_REQS]; > + struct blktap_request *pending[BLKTAP_RING_SIZE]; > > wait_queue_head_t poll_wait; > > @@ -128,8 +80,8 @@ struct blktap_request { > int operation; > struct timeval time; > > - struct scatterlist sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; > + struct page *pages[BLKTAP_SEGMENT_MAX]; > int nr_pages; > }; > > @@ -148,7 +100,7 @@ struct blktap { > > wait_queue_head_t remove_wait; > struct work_struct remove_work; > - char name[BLKTAP2_MAX_MESSAGE_LEN]; > + char name[BLKTAP_NAME_MAX]; > > struct blktap_statistics stats; > }; > @@ -189,7 +141,7 @@ void blktap_sysfs_destroy(struct blktap *); > int blktap_device_init(void); > void blktap_device_exit(void); > size_t blktap_device_debug(struct blktap *, char *, size_t); > -int blktap_device_create(struct blktap *, struct blktap_params *); > +int blktap_device_create(struct blktap *, struct blktap_device_info *); > int blktap_device_destroy(struct blktap *); > void blktap_device_destroy_sync(struct blktap *); > void blktap_device_run_queue(struct blktap *); > diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c > index f339bba..57b1a10 100644 > --- a/drivers/xen/blktap/control.c > +++ b/drivers/xen/blktap/control.c > @@ -127,19 +127,19 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, > struct blktap *tap; > > switch (cmd) { > - case BLKTAP2_IOCTL_ALLOC_TAP: { > - struct blktap_handle h; > + case BLKTAP_IOCTL_ALLOC_TAP: { > + struct blktap_info info; > void __user *ptr = (void __user*)arg; > > tap = blktap_control_create_tap(); > if (!tap) > return -ENOMEM; > > - h.ring = blktap_ring_major; > - h.device = blktap_device_major; > - h.minor = tap->minor; > + info.ring_major = blktap_ring_major; > + info.bdev_major = blktap_device_major; > + info.ring_minor = tap->minor; > > - if (copy_to_user(ptr, &h, sizeof(h))) { > + if (copy_to_user(ptr, &info, sizeof(info))) { > blktap_control_destroy_tap(tap); > return -EFAULT; > } > @@ -147,7 +147,7 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, > return 0; > } > > - case BLKTAP2_IOCTL_FREE_TAP: { > + case BLKTAP_IOCTL_FREE_TAP: { > int minor = arg; > > if (minor > MAX_BLKTAP_DEVICE) > diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c > index fce2769..6bb04bd 100644 > --- a/drivers/xen/blktap/device.c > +++ b/drivers/xen/blktap/device.c > @@ -186,7 +186,7 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) > write ? ''w'' : ''r'', blk_rq_bytes(rq), nsegs); > > request->rq = rq; > - request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; > + request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; > > err = blktap_request_get_pages(tap, request, nsegs); > if (err) > @@ -276,7 +276,7 @@ blktap_device_do_request(struct request_queue *rq) > > static void > blktap_device_configure(struct blktap *tap, > - struct blktap_params *params) > + struct blktap_device_info *params) > { > struct request_queue *rq; > struct blktap_device *dev = &tap->device; > @@ -297,8 +297,8 @@ blktap_device_configure(struct blktap *tap, > blk_queue_max_segment_size(rq, PAGE_SIZE); > > /* Ensure a merged request will fit in a single I/O ring slot. */ > - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); > - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); > + blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); > + blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); > > /* Make sure buffer addresses are sector-aligned. */ > blk_queue_dma_alignment(rq, 511); > @@ -311,7 +311,7 @@ blktap_device_configure(struct blktap *tap, > > static int > blktap_device_validate_params(struct blktap *tap, > - struct blktap_params *params) > + struct blktap_device_info *params) > { > struct device *dev = tap->ring.dev; > int sector_order, name_sz; > @@ -425,7 +425,7 @@ blktap_device_destroy_sync(struct blktap *tap) > } > > int > -blktap_device_create(struct blktap *tap, struct blktap_params *params) > +blktap_device_create(struct blktap *tap, struct blktap_device_info *params) > { > int minor, err; > struct gendisk *gd; > diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c > index 9bef48c..8cfd6c9 100644 > --- a/drivers/xen/blktap/request.c > +++ b/drivers/xen/blktap/request.c > @@ -7,18 +7,18 @@ > #include "blktap.h" > > /* max pages per shared pool. just to prevent accidental dos. */ > -#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST) > +#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) > > /* default page pool size. when considering to shrink a shared pool, > * note that paused tapdisks may grab a whole lot of pages for a long > * time. */ > -#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES) > +#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) > > /* max number of pages allocatable per request. */ > -#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST > +#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX > > /* min request structs per pool. These grow dynamically. */ > -#define POOL_MIN_REQS BLK_RING_SIZE > +#define POOL_MIN_REQS BLKTAP_RING_SIZE > > static struct kset *pool_set; > > diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c > index 6b86be5..9442a64 100644 > --- a/drivers/xen/blktap/ring.c > +++ b/drivers/xen/blktap/ring.c > @@ -18,7 +18,7 @@ static struct cdev blktap_ring_cdev; > > static void > blktap_ring_read_response(struct blktap *tap, > - const struct blkif_response *rsp) > + const blktap_ring_rsp_t *rsp) > { > struct blktap_ring *ring = &tap->ring; > struct blktap_request *request; > @@ -27,7 +27,7 @@ blktap_ring_read_response(struct blktap *tap, > request = NULL; > > usr_idx = rsp->id; > - if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) { > + if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { > err = -ERANGE; > goto invalid; > } > @@ -48,7 +48,7 @@ blktap_ring_read_response(struct blktap *tap, > "request %d [%p] response: %d\n", > request->usr_idx, request, rsp->status); > > - err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO; > + err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; > end_request: > blktap_device_end_request(tap, request, err); > return; > @@ -67,7 +67,7 @@ static void > blktap_read_ring(struct blktap *tap) > { > struct blktap_ring *ring = &tap->ring; > - struct blkif_response rsp; > + blktap_ring_rsp_t rsp; > RING_IDX rc, rp; > > down_read(¤t->mm->mmap_sem); > @@ -90,6 +90,11 @@ blktap_read_ring(struct blktap *tap) > up_read(¤t->mm->mmap_sem); > } > > +#define MMAP_VADDR(_start, _req, _seg) \ > + ((_start) + \ > + ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ > + ((_seg) * BLKTAP_PAGE_SIZE)) > + > static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > { > return VM_FAULT_SIGBUS; > @@ -102,7 +107,7 @@ blktap_ring_fail_pending(struct blktap *tap) > struct blktap_request *request; > int usr_idx; > > - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { > request = ring->pending[usr_idx]; > if (!request) > continue; > @@ -154,7 +159,7 @@ blktap_ring_map_request(struct blktap *tap, > int seg, err = 0; > int write; > > - write = request->operation == BLKIF_OP_WRITE; > + write = request->operation == BLKTAP_OP_WRITE; > > for (seg = 0; seg < request->nr_pages; seg++) { > if (write) > @@ -182,7 +187,7 @@ blktap_ring_unmap_request(struct blktap *tap, > > uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); > size = request->nr_pages << PAGE_SHIFT; > - read = request->operation == BLKIF_OP_READ; > + read = request->operation == BLKTAP_OP_READ; > > if (read) > for (seg = 0; seg < request->nr_pages; seg++) > @@ -217,11 +222,11 @@ blktap_ring_make_request(struct blktap *tap) > if (!request) > return ERR_PTR(-ENOMEM); > > - for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++) > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) > if (!ring->pending[usr_idx]) > break; > > - BUG_ON(usr_idx >= BLK_RING_SIZE); > + BUG_ON(usr_idx >= BLKTAP_RING_SIZE); > > request->tap = tap; > request->usr_idx = usr_idx; > @@ -237,7 +242,7 @@ blktap_ring_submit_request(struct blktap *tap, > struct blktap_request *request) > { > struct blktap_ring *ring = &tap->ring; > - struct blkif_request *breq; > + blktap_ring_req_t *breq; > struct scatterlist *sg; > int i, nsecs = 0; > > @@ -248,12 +253,12 @@ blktap_ring_submit_request(struct blktap *tap, > > breq->id = request->usr_idx; > breq->sector_number = blk_rq_pos(request->rq); > - breq->handle = 0; > + breq->__pad = 0; > breq->operation = request->operation; > breq->nr_segments = request->nr_pages; > > blktap_for_each_sg(sg, request, i) { > - struct blkif_request_segment *seg = &breq->seg[i]; > + struct blktap_segment *seg = &breq->seg[i]; > int first, count; > > count = sg->length >> 9; > @@ -270,12 +275,12 @@ blktap_ring_submit_request(struct blktap *tap, > do_gettimeofday(&request->time); > > > - if (request->operation == BLKIF_OP_WRITE) { > + if (request->operation == BLKTAP_OP_WRITE) { > tap->stats.st_wr_sect += nsecs; > tap->stats.st_wr_req++; > } > > - if (request->operation == BLKIF_OP_READ) { > + if (request->operation == BLKTAP_OP_READ) { > tap->stats.st_rd_sect += nsecs; > tap->stats.st_rd_req++; > } > @@ -327,7 +332,7 @@ blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) > { > struct blktap *tap = filp->private_data; > struct blktap_ring *ring = &tap->ring; > - struct blkif_sring *sring; > + blktap_sring_t *sring; > struct page *page = NULL; > int err; > > @@ -384,25 +389,25 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, > return -EACCES; > > switch(cmd) { > - case BLKTAP2_IOCTL_KICK_FE: > + case BLKTAP_IOCTL_RESPOND: > > blktap_read_ring(tap); > return 0; > > - case BLKTAP2_IOCTL_CREATE_DEVICE: { > - struct blktap_params params; > + case BLKTAP_IOCTL_CREATE_DEVICE: { > + struct blktap_device_info info; > void __user *ptr = (void *)arg; > > if (!arg) > return -EINVAL; > > - if (copy_from_user(¶ms, ptr, sizeof(params))) > + if (copy_from_user(&info, ptr, sizeof(info))) > return -EFAULT; > > - return blktap_device_create(tap, ¶ms); > + return blktap_device_create(tap, &info); > } > > - case BLKTAP2_IOCTL_REMOVE_DEVICE: > + case BLKTAP_IOCTL_REMOVE_DEVICE: > > return blktap_device_destroy(tap); > } > @@ -482,7 +487,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) > s += snprintf(s, end - s, > "begin pending:%d\n", ring->n_pending); > > - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { > struct blktap_request *request; > struct timeval *time; > int write; > @@ -491,7 +496,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) > if (!request) > continue; > > - write = request->operation == BLKIF_OP_WRITE; > + write = request->operation == BLKTAP_OP_WRITE; > time = &request->time; > > s += snprintf(s, end - s, > diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c > index 7bbfea8..182de9a 100644 > --- a/drivers/xen/blktap/sysfs.c > +++ b/drivers/xen/blktap/sysfs.c > @@ -20,7 +20,7 @@ blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const c > if (!tap) > return 0; > > - if (size >= BLKTAP2_MAX_MESSAGE_LEN) > + if (size >= BLKTAP_NAME_MAX) > return -ENAMETOOLONG; > > if (strnlen(buf, size) != size) > @@ -75,8 +75,8 @@ blktap_sysfs_remove_device(struct device *dev, > goto wait; > > if (tap->ring.vma) { > - blkif_sring_t *sring = tap->ring.ring.sring; > - sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE; > + blktap_sring_t *sring = tap->ring.ring.sring; > + sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; > blktap_ring_kick_user(tap); > } else { > INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); > diff --git a/include/linux/blktap.h b/include/linux/blktap.h > new file mode 100644 > index 0000000..ec33429 > --- /dev/null > +++ b/include/linux/blktap.h > @@ -0,0 +1,85 @@ > +/* > + * Copyright (c) 2011, XenSource Inc. > + * All rights reserved. > + */ > + > +#ifndef _LINUX_BLKTAP_H > +#define _LINUX_BLKTAP_H > + > +/* > + * Control > + */ > + > +#define BLKTAP_IOCTL_RESPOND 1 > +#define BLKTAP_IOCTL_ALLOC_TAP 200 > +#define BLKTAP_IOCTL_FREE_TAP 201 > +#define BLKTAP_IOCTL_CREATE_DEVICE 202 > +#define BLKTAP_IOCTL_REMOVE_DEVICE 207 > + > +#define BLKTAP_NAME_MAX 256 > + > +struct blktap_info { > + unsigned int ring_major; > + unsigned int bdev_major; > + unsigned int ring_minor; > +}; > + > +struct blktap_device_info { > + char name[BLKTAP_NAME_MAX]; > + unsigned long long capacity; > + unsigned long sector_size; > +}; > + > +/* > + * I/O ring > + */ > + > +#ifdef __KERNEL__ > +#define BLKTAP_PAGE_SIZE PAGE_SIZE > +#endif > + > +#include <xen/interface/io/ring.h> > + > +typedef struct blktap_ring_request blktap_ring_req_t; > +typedef struct blktap_ring_response blktap_ring_rsp_t; > + > +struct blktap_segment { > + uint32_t __pad; > + uint8_t first_sect; > + uint8_t last_sect; > +}; > + > +#define BLKTAP_OP_READ 0 > +#define BLKTAP_OP_WRITE 1 > + > +#define BLKTAP_SEGMENT_MAX 11 > + > +struct blktap_ring_request { > + uint8_t operation; > + uint8_t nr_segments; > + uint16_t __pad; > + uint64_t id; > + uint64_t sector_number; > + struct blktap_segment seg[BLKTAP_SEGMENT_MAX]; > +}; > + > +#define BLKTAP_RSP_EOPNOTSUPP -2 > +#define BLKTAP_RSP_ERROR -1 > +#define BLKTAP_RSP_OKAY 0 > + > +struct blktap_ring_response { > + uint64_t id; > + uint8_t operation; > + int16_t status; > +}; > + > +DEFINE_RING_TYPES(blktap, struct blktap_ring_request, struct blktap_ring_response); > +#define BLKTAP_RING_SIZE __CONST_RING_SIZE(blktap, BLKTAP_PAGE_SIZE) > + > +/* > + * Ring messages (DEPRECATED) > + */ > + > +#define BLKTAP_RING_MESSAGE_CLOSE 3 > + > +#endif /* _LINUX_BLKTAP_H */ > -- > 1.7.0.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2011-Mar-09 10:23 UTC
Re: [Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote:> From: Daniel Stodden <dns@somacoma.net> > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > --- > drivers/block/Kconfig | 9 + > drivers/block/Makefile | 1 + > drivers/block/blktap/Makefile | 3 + > drivers/block/blktap/blktap.h | 161 +++++++++++ > drivers/block/blktap/control.c | 315 +++++++++++++++++++++ > drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++ > drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++ > drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++ > drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++ > drivers/xen/Kconfig | 11 - > drivers/xen/Makefile | 1 - > drivers/xen/blktap/Makefile | 3 - > drivers/xen/blktap/blktap.h | 161 ----------- > drivers/xen/blktap/control.c | 315 --------------------- > drivers/xen/blktap/device.c | 551 ------------------------------------- > drivers/xen/blktap/request.c | 418 ---------------------------- > drivers/xen/blktap/ring.c | 595 ---------------------------------------- > drivers/xen/blktap/sysfs.c | 288 ------------------- > 18 files changed, 2341 insertions(+), 2343 deletions(-) > create mode 100644 drivers/block/blktap/Makefile > create mode 100644 drivers/block/blktap/blktap.h > create mode 100644 drivers/block/blktap/control.c > create mode 100644 drivers/block/blktap/device.c > create mode 100644 drivers/block/blktap/request.c > create mode 100644 drivers/block/blktap/ring.c > create mode 100644 drivers/block/blktap/sysfs.c > delete mode 100644 drivers/xen/blktap/Makefile > delete mode 100644 drivers/xen/blktap/blktap.h > delete mode 100644 drivers/xen/blktap/control.c > delete mode 100644 drivers/xen/blktap/device.c > delete mode 100644 drivers/xen/blktap/request.c > delete mode 100644 drivers/xen/blktap/ring.c > delete mode 100644 drivers/xen/blktap/sysfs.cGiven the right options "git format-patch" should display this as: drivers/block/Kconfig | 9 +++++++++ drivers/block/Makefile | 1 + drivers/{xen => block}/blktap/Makefile | 2 +- drivers/{xen => block}/blktap/blktap.h | 0 drivers/{xen => block}/blktap/control.c | 0 drivers/{xen => block}/blktap/device.c | 0 drivers/{xen => block}/blktap/request.c | 0 drivers/{xen => block}/blktap/ring.c | 0 drivers/{xen => block}/blktap/sysfs.c | 0 drivers/xen/Kconfig | 11 ----------- drivers/xen/Makefile | 1 - 11 files changed, 11 insertions(+), 13 deletions(-) [...] diff --git a/drivers/xen/blktap/blktap.h b/drivers/block/blktap/blktap.h similarity index 100% rename from drivers/xen/blktap/blktap.h rename to drivers/block/blktap/blktap.h [...] which is far easier to review. I think it''s -M you want. Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 21:50 UTC
Re: [Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
On Wed, 2011-03-09 at 05:23 -0500, Ian Campbell wrote:> On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote: > > From: Daniel Stodden <dns@somacoma.net> > > > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > > --- > > drivers/block/Kconfig | 9 + > > drivers/block/Makefile | 1 + > > drivers/block/blktap/Makefile | 3 + > > drivers/block/blktap/blktap.h | 161 +++++++++++ > > drivers/block/blktap/control.c | 315 +++++++++++++++++++++ > > drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++ > > drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++ > > drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++ > > drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++ > > drivers/xen/Kconfig | 11 - > > drivers/xen/Makefile | 1 - > > drivers/xen/blktap/Makefile | 3 - > > drivers/xen/blktap/blktap.h | 161 ----------- > > drivers/xen/blktap/control.c | 315 --------------------- > > drivers/xen/blktap/device.c | 551 ------------------------------------- > > drivers/xen/blktap/request.c | 418 ---------------------------- > > drivers/xen/blktap/ring.c | 595 ---------------------------------------- > > drivers/xen/blktap/sysfs.c | 288 ------------------- > > 18 files changed, 2341 insertions(+), 2343 deletions(-) > > create mode 100644 drivers/block/blktap/Makefile > > create mode 100644 drivers/block/blktap/blktap.h > > create mode 100644 drivers/block/blktap/control.c > > create mode 100644 drivers/block/blktap/device.c > > create mode 100644 drivers/block/blktap/request.c > > create mode 100644 drivers/block/blktap/ring.c > > create mode 100644 drivers/block/blktap/sysfs.c > > delete mode 100644 drivers/xen/blktap/Makefile > > delete mode 100644 drivers/xen/blktap/blktap.h > > delete mode 100644 drivers/xen/blktap/control.c > > delete mode 100644 drivers/xen/blktap/device.c > > delete mode 100644 drivers/xen/blktap/request.c > > delete mode 100644 drivers/xen/blktap/ring.c > > delete mode 100644 drivers/xen/blktap/sysfs.c > > Given the right options "git format-patch" should display this as: > drivers/block/Kconfig | 9 +++++++++ > drivers/block/Makefile | 1 + > drivers/{xen => block}/blktap/Makefile | 2 +- > drivers/{xen => block}/blktap/blktap.h | 0 > drivers/{xen => block}/blktap/control.c | 0 > drivers/{xen => block}/blktap/device.c | 0 > drivers/{xen => block}/blktap/request.c | 0 > drivers/{xen => block}/blktap/ring.c | 0 > drivers/{xen => block}/blktap/sysfs.c | 0 > drivers/xen/Kconfig | 11 ----------- > drivers/xen/Makefile | 1 - > 11 files changed, 11 insertions(+), 13 deletions(-) > [...] > diff --git a/drivers/xen/blktap/blktap.h b/drivers/block/blktap/blktap.h > similarity index 100% > rename from drivers/xen/blktap/blktap.h > rename to drivers/block/blktap/blktap.h > [...] > which is far easier to review. > > I think it''s -M you want.Hmm, sounds true. Back then when I created that change, I''m pretty sure I performed a number of git mv, applied some fixup and committed. I rebased that tree a couple times, though. Do you mean to say I must have broken that commit somehere. Okay, I''ll try it again and see if that improves the output. Daniel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 22:26 UTC
Re: [Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
On Wed, 2011-03-09 at 16:50 -0500, Daniel Stodden wrote:> On Wed, 2011-03-09 at 05:23 -0500, Ian Campbell wrote: > > On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote: > > > From: Daniel Stodden <dns@somacoma.net> > > > > > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > > > --- > > > drivers/block/Kconfig | 9 + > > > drivers/block/Makefile | 1 + > > > drivers/block/blktap/Makefile | 3 + > > > drivers/block/blktap/blktap.h | 161 +++++++++++ > > > drivers/block/blktap/control.c | 315 +++++++++++++++++++++ > > > drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++ > > > drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++ > > > drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++ > > > drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++ > > > drivers/xen/Kconfig | 11 - > > > drivers/xen/Makefile | 1 - > > > drivers/xen/blktap/Makefile | 3 - > > > drivers/xen/blktap/blktap.h | 161 ----------- > > > drivers/xen/blktap/control.c | 315 --------------------- > > > drivers/xen/blktap/device.c | 551 ------------------------------------- > > > drivers/xen/blktap/request.c | 418 ---------------------------- > > > drivers/xen/blktap/ring.c | 595 ---------------------------------------- > > > drivers/xen/blktap/sysfs.c | 288 ------------------- > > > 18 files changed, 2341 insertions(+), 2343 deletions(-) > > > create mode 100644 drivers/block/blktap/Makefile > > > create mode 100644 drivers/block/blktap/blktap.h > > > create mode 100644 drivers/block/blktap/control.c > > > create mode 100644 drivers/block/blktap/device.c > > > create mode 100644 drivers/block/blktap/request.c > > > create mode 100644 drivers/block/blktap/ring.c > > > create mode 100644 drivers/block/blktap/sysfs.c > > > delete mode 100644 drivers/xen/blktap/Makefile > > > delete mode 100644 drivers/xen/blktap/blktap.h > > > delete mode 100644 drivers/xen/blktap/control.c > > > delete mode 100644 drivers/xen/blktap/device.c > > > delete mode 100644 drivers/xen/blktap/request.c > > > delete mode 100644 drivers/xen/blktap/ring.c > > > delete mode 100644 drivers/xen/blktap/sysfs.c > > > > Given the right options "git format-patch" should display this as: > > drivers/block/Kconfig | 9 +++++++++ > > drivers/block/Makefile | 1 + > > drivers/{xen => block}/blktap/Makefile | 2 +- > > drivers/{xen => block}/blktap/blktap.h | 0 > > drivers/{xen => block}/blktap/control.c | 0 > > drivers/{xen => block}/blktap/device.c | 0 > > drivers/{xen => block}/blktap/request.c | 0 > > drivers/{xen => block}/blktap/ring.c | 0 > > drivers/{xen => block}/blktap/sysfs.c | 0 > > drivers/xen/Kconfig | 11 ----------- > > drivers/xen/Makefile | 1 - > > 11 files changed, 11 insertions(+), 13 deletions(-) > > [...] > > diff --git a/drivers/xen/blktap/blktap.h b/drivers/block/blktap/blktap.h > > similarity index 100% > > rename from drivers/xen/blktap/blktap.h > > rename to drivers/block/blktap/blktap.h > > [...] > > which is far easier to review. > > > > I think it''s -M you want. > > Hmm, sounds true. > > Back then when I created that change, I''m pretty sure I performed a > number of git mv, applied some fixup and committed. > > I rebased that tree a couple times, though. Do you mean to say I must > have broken that commit somehere. > > Okay, I''ll try it again and see if that improves the output.Thinking of it: Iirc this diff is not only a plain rename, there were some additional changes made in source to have everything compile again. Could that be the reason? Want those split out? Thanks, Daniel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 22:37 UTC
Re: [Xen-devel] [PATCH 01/10] blktap: Add include/linux/blktap.h
On Wed, 2011-03-09 at 05:18 -0500, Ian Campbell wrote:> On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote: > > Moves blktap2 definitions into a common header file. > > > > Includes xen/interface/io/ring.h and new ring definitions. Makes > > blktap build independently from xen-devel headers. > > > > New blktap_ring structs are fully congrent to blkif rings, for binary > > compat. > > > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > > --- > > drivers/xen/blktap/blktap.h | 66 ++++---------------------------- > > drivers/xen/blktap/control.c | 14 +++--- > > drivers/xen/blktap/device.c | 12 +++--- > > drivers/xen/blktap/request.c | 8 ++-- > > drivers/xen/blktap/ring.c | 51 ++++++++++++++----------- > > drivers/xen/blktap/sysfs.c | 6 +- > > include/linux/blktap.h | 85 ++++++++++++++++++++++++++++++++++++++++++ > > This new file defines the kernel<->user (tapdisk process) ring protocol, > right?Yes. It''s exactly as far as I can go right now maintaining compatibility. The main objective was rather to get off xen-devel headers in favour of kernel sources. - includes xen/interface/io/ring. - doesn''t include xen/interface/io/blkif. - certainly doesn''t include xen/interface/blkif.h (the alignment stuff for guests). The old code used blkif and struct blkif_* definitions. The new one got it''s own struct blktap_*s, identical as far as READ/WRITE commands go. But this also means one can develop the userland stuff independently from blkif.h. New commands (flush, trim, ...) get quite a bit more useful freedom.> I think its proper home would be under include/xen somewhere, which is > where the gntdev and evtchn etc driver interfaces are defined.A very long time ago, a somewhat obvious choice was made to use xen ring headers for the blktap user <-> kernel interface. So this header presently still wants xen/interface. It doesn''t depend on anything xenish, nor is this a Xen driver anymore. I thought even with that header dependency, that''s somewhat a linux/blktap.h already, so I made it so. I''m feeling some heat from boston-newxen people because in XCP I''m actually building blktap.hg against the kernel devel rpm contents right now. That''s got to vanish. It''s great for hacking extensions, but the component dependency is a bit gross, admittedly. Once doing so, it''s a standalone kernel blktap.h which can be copied over into userland trees, without additional definitions included. This isn''t sick: Blktap2 doesn''t need the full ring.h macro contents with memory barriers etc anyway, because the userland dispatching is synchronous. It could be just bare structs, and the standard PUSH/PULL macros are rather decoration and could be dropped (or reimplemented as memcpy()s). Will this justify linux/blktap.h? One could also revert that ring.h pad space hack. I''m not passionate about it. If you still disagree, I''ll give up and we move it elsewhere. In this case, it could as well go back into drivers/block/blktap, and I''ll just give up on ''development mode'' hacks to verify tapdisk builds against the kernel tree altogether.> Where is the canonical definition of this interface stored? In the > kernel tree or the hypervisor tree?You mean blktap.h? This is not a xen driver. I''d call this the canonical definition, a reference with what that kernel/driver revision supports, that''s why I put it there. It wouldn''t belong elsewhere, except for occasionally updated verbatim copies in updated blktap sources, to unstress build dependencies. Daniel> Ian. > > > 7 files changed, 142 insertions(+), 100 deletions(-) > > create mode 100644 include/linux/blktap.h > > > > diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h > > index fe63fc9..1318cad 100644 > > --- a/drivers/xen/blktap/blktap.h > > +++ b/drivers/xen/blktap/blktap.h > > @@ -6,7 +6,7 @@ > > #include <linux/cdev.h> > > #include <linux/init.h> > > #include <linux/scatterlist.h> > > -#include <xen/blkif.h> > > +#include <linux/blktap.h> > > > > extern int blktap_debug_level; > > extern int blktap_ring_major; > > @@ -30,74 +30,26 @@ extern int blktap_device_major; > > #define BLKTAP_DEVICE_CLOSED 5 > > #define BLKTAP_SHUTDOWN_REQUESTED 8 > > > > -/* blktap IOCTLs: */ > > -#define BLKTAP2_IOCTL_KICK_FE 1 > > -#define BLKTAP2_IOCTL_ALLOC_TAP 200 > > -#define BLKTAP2_IOCTL_FREE_TAP 201 > > -#define BLKTAP2_IOCTL_CREATE_DEVICE 202 > > -#define BLKTAP2_IOCTL_REMOVE_DEVICE 207 > > - > > -#define BLKTAP2_MAX_MESSAGE_LEN 256 > > - > > -#define BLKTAP2_RING_MESSAGE_CLOSE 3 > > - > > #define BLKTAP_REQUEST_FREE 0 > > #define BLKTAP_REQUEST_PENDING 1 > > > > -/* > > - * The maximum number of requests that can be outstanding at any time > > - * is determined by > > - * > > - * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] > > - * > > - * where mmap_alloc < MAX_DYNAMIC_MEM. > > - * > > - * TODO: > > - * mmap_alloc is initialised to 2 and should be adjustable on the fly via > > - * sysfs. > > - */ > > -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) > > -#define MAX_DYNAMIC_MEM BLK_RING_SIZE > > -#define MAX_PENDING_REQS BLK_RING_SIZE > > -#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) > > -#define MMAP_VADDR(_start, _req, _seg) \ > > - (_start + \ > > - ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ > > - ((_seg) * PAGE_SIZE)) > > - > > -struct grant_handle_pair { > > - grant_handle_t kernel; > > - grant_handle_t user; > > -}; > > -#define INVALID_GRANT_HANDLE 0xFFFF > > - > > -struct blktap_handle { > > - unsigned int ring; > > - unsigned int device; > > - unsigned int minor; > > -}; > > - > > -struct blktap_params { > > - char name[BLKTAP2_MAX_MESSAGE_LEN]; > > - unsigned long long capacity; > > - unsigned long sector_size; > > -}; > > - > > struct blktap_device { > > spinlock_t lock; > > struct gendisk *gd; > > }; > > > > +struct blktap_request; > > + > > struct blktap_ring { > > struct task_struct *task; > > > > struct vm_area_struct *vma; > > - struct blkif_front_ring ring; > > + blktap_front_ring_t ring; > > unsigned long ring_vstart; > > unsigned long user_vstart; > > > > int n_pending; > > - struct blktap_request *pending[MAX_PENDING_REQS]; > > + struct blktap_request *pending[BLKTAP_RING_SIZE]; > > > > wait_queue_head_t poll_wait; > > > > @@ -128,8 +80,8 @@ struct blktap_request { > > int operation; > > struct timeval time; > > > > - struct scatterlist sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > > - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > > + struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; > > + struct page *pages[BLKTAP_SEGMENT_MAX]; > > int nr_pages; > > }; > > > > @@ -148,7 +100,7 @@ struct blktap { > > > > wait_queue_head_t remove_wait; > > struct work_struct remove_work; > > - char name[BLKTAP2_MAX_MESSAGE_LEN]; > > + char name[BLKTAP_NAME_MAX]; > > > > struct blktap_statistics stats; > > }; > > @@ -189,7 +141,7 @@ void blktap_sysfs_destroy(struct blktap *); > > int blktap_device_init(void); > > void blktap_device_exit(void); > > size_t blktap_device_debug(struct blktap *, char *, size_t); > > -int blktap_device_create(struct blktap *, struct blktap_params *); > > +int blktap_device_create(struct blktap *, struct blktap_device_info *); > > int blktap_device_destroy(struct blktap *); > > void blktap_device_destroy_sync(struct blktap *); > > void blktap_device_run_queue(struct blktap *); > > diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c > > index f339bba..57b1a10 100644 > > --- a/drivers/xen/blktap/control.c > > +++ b/drivers/xen/blktap/control.c > > @@ -127,19 +127,19 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, > > struct blktap *tap; > > > > switch (cmd) { > > - case BLKTAP2_IOCTL_ALLOC_TAP: { > > - struct blktap_handle h; > > + case BLKTAP_IOCTL_ALLOC_TAP: { > > + struct blktap_info info; > > void __user *ptr = (void __user*)arg; > > > > tap = blktap_control_create_tap(); > > if (!tap) > > return -ENOMEM; > > > > - h.ring = blktap_ring_major; > > - h.device = blktap_device_major; > > - h.minor = tap->minor; > > + info.ring_major = blktap_ring_major; > > + info.bdev_major = blktap_device_major; > > + info.ring_minor = tap->minor; > > > > - if (copy_to_user(ptr, &h, sizeof(h))) { > > + if (copy_to_user(ptr, &info, sizeof(info))) { > > blktap_control_destroy_tap(tap); > > return -EFAULT; > > } > > @@ -147,7 +147,7 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, > > return 0; > > } > > > > - case BLKTAP2_IOCTL_FREE_TAP: { > > + case BLKTAP_IOCTL_FREE_TAP: { > > int minor = arg; > > > > if (minor > MAX_BLKTAP_DEVICE) > > diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c > > index fce2769..6bb04bd 100644 > > --- a/drivers/xen/blktap/device.c > > +++ b/drivers/xen/blktap/device.c > > @@ -186,7 +186,7 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) > > write ? ''w'' : ''r'', blk_rq_bytes(rq), nsegs); > > > > request->rq = rq; > > - request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; > > + request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; > > > > err = blktap_request_get_pages(tap, request, nsegs); > > if (err) > > @@ -276,7 +276,7 @@ blktap_device_do_request(struct request_queue *rq) > > > > static void > > blktap_device_configure(struct blktap *tap, > > - struct blktap_params *params) > > + struct blktap_device_info *params) > > { > > struct request_queue *rq; > > struct blktap_device *dev = &tap->device; > > @@ -297,8 +297,8 @@ blktap_device_configure(struct blktap *tap, > > blk_queue_max_segment_size(rq, PAGE_SIZE); > > > > /* Ensure a merged request will fit in a single I/O ring slot. */ > > - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); > > - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); > > + blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); > > + blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); > > > > /* Make sure buffer addresses are sector-aligned. */ > > blk_queue_dma_alignment(rq, 511); > > @@ -311,7 +311,7 @@ blktap_device_configure(struct blktap *tap, > > > > static int > > blktap_device_validate_params(struct blktap *tap, > > - struct blktap_params *params) > > + struct blktap_device_info *params) > > { > > struct device *dev = tap->ring.dev; > > int sector_order, name_sz; > > @@ -425,7 +425,7 @@ blktap_device_destroy_sync(struct blktap *tap) > > } > > > > int > > -blktap_device_create(struct blktap *tap, struct blktap_params *params) > > +blktap_device_create(struct blktap *tap, struct blktap_device_info *params) > > { > > int minor, err; > > struct gendisk *gd; > > diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c > > index 9bef48c..8cfd6c9 100644 > > --- a/drivers/xen/blktap/request.c > > +++ b/drivers/xen/blktap/request.c > > @@ -7,18 +7,18 @@ > > #include "blktap.h" > > > > /* max pages per shared pool. just to prevent accidental dos. */ > > -#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST) > > +#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) > > > > /* default page pool size. when considering to shrink a shared pool, > > * note that paused tapdisks may grab a whole lot of pages for a long > > * time. */ > > -#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES) > > +#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) > > > > /* max number of pages allocatable per request. */ > > -#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST > > +#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX > > > > /* min request structs per pool. These grow dynamically. */ > > -#define POOL_MIN_REQS BLK_RING_SIZE > > +#define POOL_MIN_REQS BLKTAP_RING_SIZE > > > > static struct kset *pool_set; > > > > diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c > > index 6b86be5..9442a64 100644 > > --- a/drivers/xen/blktap/ring.c > > +++ b/drivers/xen/blktap/ring.c > > @@ -18,7 +18,7 @@ static struct cdev blktap_ring_cdev; > > > > static void > > blktap_ring_read_response(struct blktap *tap, > > - const struct blkif_response *rsp) > > + const blktap_ring_rsp_t *rsp) > > { > > struct blktap_ring *ring = &tap->ring; > > struct blktap_request *request; > > @@ -27,7 +27,7 @@ blktap_ring_read_response(struct blktap *tap, > > request = NULL; > > > > usr_idx = rsp->id; > > - if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) { > > + if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { > > err = -ERANGE; > > goto invalid; > > } > > @@ -48,7 +48,7 @@ blktap_ring_read_response(struct blktap *tap, > > "request %d [%p] response: %d\n", > > request->usr_idx, request, rsp->status); > > > > - err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO; > > + err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; > > end_request: > > blktap_device_end_request(tap, request, err); > > return; > > @@ -67,7 +67,7 @@ static void > > blktap_read_ring(struct blktap *tap) > > { > > struct blktap_ring *ring = &tap->ring; > > - struct blkif_response rsp; > > + blktap_ring_rsp_t rsp; > > RING_IDX rc, rp; > > > > down_read(¤t->mm->mmap_sem); > > @@ -90,6 +90,11 @@ blktap_read_ring(struct blktap *tap) > > up_read(¤t->mm->mmap_sem); > > } > > > > +#define MMAP_VADDR(_start, _req, _seg) \ > > + ((_start) + \ > > + ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ > > + ((_seg) * BLKTAP_PAGE_SIZE)) > > + > > static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > > { > > return VM_FAULT_SIGBUS; > > @@ -102,7 +107,7 @@ blktap_ring_fail_pending(struct blktap *tap) > > struct blktap_request *request; > > int usr_idx; > > > > - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { > > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { > > request = ring->pending[usr_idx]; > > if (!request) > > continue; > > @@ -154,7 +159,7 @@ blktap_ring_map_request(struct blktap *tap, > > int seg, err = 0; > > int write; > > > > - write = request->operation == BLKIF_OP_WRITE; > > + write = request->operation == BLKTAP_OP_WRITE; > > > > for (seg = 0; seg < request->nr_pages; seg++) { > > if (write) > > @@ -182,7 +187,7 @@ blktap_ring_unmap_request(struct blktap *tap, > > > > uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); > > size = request->nr_pages << PAGE_SHIFT; > > - read = request->operation == BLKIF_OP_READ; > > + read = request->operation == BLKTAP_OP_READ; > > > > if (read) > > for (seg = 0; seg < request->nr_pages; seg++) > > @@ -217,11 +222,11 @@ blktap_ring_make_request(struct blktap *tap) > > if (!request) > > return ERR_PTR(-ENOMEM); > > > > - for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++) > > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) > > if (!ring->pending[usr_idx]) > > break; > > > > - BUG_ON(usr_idx >= BLK_RING_SIZE); > > + BUG_ON(usr_idx >= BLKTAP_RING_SIZE); > > > > request->tap = tap; > > request->usr_idx = usr_idx; > > @@ -237,7 +242,7 @@ blktap_ring_submit_request(struct blktap *tap, > > struct blktap_request *request) > > { > > struct blktap_ring *ring = &tap->ring; > > - struct blkif_request *breq; > > + blktap_ring_req_t *breq; > > struct scatterlist *sg; > > int i, nsecs = 0; > > > > @@ -248,12 +253,12 @@ blktap_ring_submit_request(struct blktap *tap, > > > > breq->id = request->usr_idx; > > breq->sector_number = blk_rq_pos(request->rq); > > - breq->handle = 0; > > + breq->__pad = 0; > > breq->operation = request->operation; > > breq->nr_segments = request->nr_pages; > > > > blktap_for_each_sg(sg, request, i) { > > - struct blkif_request_segment *seg = &breq->seg[i]; > > + struct blktap_segment *seg = &breq->seg[i]; > > int first, count; > > > > count = sg->length >> 9; > > @@ -270,12 +275,12 @@ blktap_ring_submit_request(struct blktap *tap, > > do_gettimeofday(&request->time); > > > > > > - if (request->operation == BLKIF_OP_WRITE) { > > + if (request->operation == BLKTAP_OP_WRITE) { > > tap->stats.st_wr_sect += nsecs; > > tap->stats.st_wr_req++; > > } > > > > - if (request->operation == BLKIF_OP_READ) { > > + if (request->operation == BLKTAP_OP_READ) { > > tap->stats.st_rd_sect += nsecs; > > tap->stats.st_rd_req++; > > } > > @@ -327,7 +332,7 @@ blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) > > { > > struct blktap *tap = filp->private_data; > > struct blktap_ring *ring = &tap->ring; > > - struct blkif_sring *sring; > > + blktap_sring_t *sring; > > struct page *page = NULL; > > int err; > > > > @@ -384,25 +389,25 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, > > return -EACCES; > > > > switch(cmd) { > > - case BLKTAP2_IOCTL_KICK_FE: > > + case BLKTAP_IOCTL_RESPOND: > > > > blktap_read_ring(tap); > > return 0; > > > > - case BLKTAP2_IOCTL_CREATE_DEVICE: { > > - struct blktap_params params; > > + case BLKTAP_IOCTL_CREATE_DEVICE: { > > + struct blktap_device_info info; > > void __user *ptr = (void *)arg; > > > > if (!arg) > > return -EINVAL; > > > > - if (copy_from_user(¶ms, ptr, sizeof(params))) > > + if (copy_from_user(&info, ptr, sizeof(info))) > > return -EFAULT; > > > > - return blktap_device_create(tap, ¶ms); > > + return blktap_device_create(tap, &info); > > } > > > > - case BLKTAP2_IOCTL_REMOVE_DEVICE: > > + case BLKTAP_IOCTL_REMOVE_DEVICE: > > > > return blktap_device_destroy(tap); > > } > > @@ -482,7 +487,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) > > s += snprintf(s, end - s, > > "begin pending:%d\n", ring->n_pending); > > > > - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { > > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { > > struct blktap_request *request; > > struct timeval *time; > > int write; > > @@ -491,7 +496,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) > > if (!request) > > continue; > > > > - write = request->operation == BLKIF_OP_WRITE; > > + write = request->operation == BLKTAP_OP_WRITE; > > time = &request->time; > > > > s += snprintf(s, end - s, > > diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c > > index 7bbfea8..182de9a 100644 > > --- a/drivers/xen/blktap/sysfs.c > > +++ b/drivers/xen/blktap/sysfs.c > > @@ -20,7 +20,7 @@ blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const c > > if (!tap) > > return 0; > > > > - if (size >= BLKTAP2_MAX_MESSAGE_LEN) > > + if (size >= BLKTAP_NAME_MAX) > > return -ENAMETOOLONG; > > > > if (strnlen(buf, size) != size) > > @@ -75,8 +75,8 @@ blktap_sysfs_remove_device(struct device *dev, > > goto wait; > > > > if (tap->ring.vma) { > > - blkif_sring_t *sring = tap->ring.ring.sring; > > - sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE; > > + blktap_sring_t *sring = tap->ring.ring.sring; > > + sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; > > blktap_ring_kick_user(tap); > > } else { > > INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); > > diff --git a/include/linux/blktap.h b/include/linux/blktap.h > > new file mode 100644 > > index 0000000..ec33429 > > --- /dev/null > > +++ b/include/linux/blktap.h > > @@ -0,0 +1,85 @@ > > +/* > > + * Copyright (c) 2011, XenSource Inc. > > + * All rights reserved. > > + */ > > + > > +#ifndef _LINUX_BLKTAP_H > > +#define _LINUX_BLKTAP_H > > + > > +/* > > + * Control > > + */ > > + > > +#define BLKTAP_IOCTL_RESPOND 1 > > +#define BLKTAP_IOCTL_ALLOC_TAP 200 > > +#define BLKTAP_IOCTL_FREE_TAP 201 > > +#define BLKTAP_IOCTL_CREATE_DEVICE 202 > > +#define BLKTAP_IOCTL_REMOVE_DEVICE 207 > > + > > +#define BLKTAP_NAME_MAX 256 > > + > > +struct blktap_info { > > + unsigned int ring_major; > > + unsigned int bdev_major; > > + unsigned int ring_minor; > > +}; > > + > > +struct blktap_device_info { > > + char name[BLKTAP_NAME_MAX]; > > + unsigned long long capacity; > > + unsigned long sector_size; > > +}; > > + > > +/* > > + * I/O ring > > + */ > > + > > +#ifdef __KERNEL__ > > +#define BLKTAP_PAGE_SIZE PAGE_SIZE > > +#endif > > + > > +#include <xen/interface/io/ring.h> > > + > > +typedef struct blktap_ring_request blktap_ring_req_t; > > +typedef struct blktap_ring_response blktap_ring_rsp_t; > > + > > +struct blktap_segment { > > + uint32_t __pad; > > + uint8_t first_sect; > > + uint8_t last_sect; > > +}; > > + > > +#define BLKTAP_OP_READ 0 > > +#define BLKTAP_OP_WRITE 1 > > + > > +#define BLKTAP_SEGMENT_MAX 11 > > + > > +struct blktap_ring_request { > > + uint8_t operation; > > + uint8_t nr_segments; > > + uint16_t __pad; > > + uint64_t id; > > + uint64_t sector_number; > > + struct blktap_segment seg[BLKTAP_SEGMENT_MAX]; > > +}; > > + > > +#define BLKTAP_RSP_EOPNOTSUPP -2 > > +#define BLKTAP_RSP_ERROR -1 > > +#define BLKTAP_RSP_OKAY 0 > > + > > +struct blktap_ring_response { > > + uint64_t id; > > + uint8_t operation; > > + int16_t status; > > +}; > > + > > +DEFINE_RING_TYPES(blktap, struct blktap_ring_request, struct blktap_ring_response); > > +#define BLKTAP_RING_SIZE __CONST_RING_SIZE(blktap, BLKTAP_PAGE_SIZE) > > + > > +/* > > + * Ring messages (DEPRECATED) > > + */ > > + > > +#define BLKTAP_RING_MESSAGE_CLOSE 3 > > + > > +#endif /* _LINUX_BLKTAP_H */ > > -- > > 1.7.0.4 > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xensource.com > > http://lists.xensource.com/xen-devel > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-09 23:57 UTC
Re: [Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
On Wed, 2011-03-09 at 17:26 -0500, Daniel Stodden wrote:> On Wed, 2011-03-09 at 16:50 -0500, Daniel Stodden wrote: > > On Wed, 2011-03-09 at 05:23 -0500, Ian Campbell wrote: > > > On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote: > > > > From: Daniel Stodden <dns@somacoma.net> > > > > > > > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > > > > --- > > > > drivers/block/Kconfig | 9 + > > > > drivers/block/Makefile | 1 + > > > > drivers/block/blktap/Makefile | 3 + > > > > drivers/block/blktap/blktap.h | 161 +++++++++++ > > > > drivers/block/blktap/control.c | 315 +++++++++++++++++++++ > > > > drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++ > > > > drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++ > > > > drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++ > > > > drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++ > > > > drivers/xen/Kconfig | 11 - > > > > drivers/xen/Makefile | 1 - > > > > drivers/xen/blktap/Makefile | 3 - > > > > drivers/xen/blktap/blktap.h | 161 ----------- > > > > drivers/xen/blktap/control.c | 315 --------------------- > > > > drivers/xen/blktap/device.c | 551 ------------------------------------- > > > > drivers/xen/blktap/request.c | 418 ---------------------------- > > > > drivers/xen/blktap/ring.c | 595 ---------------------------------------- > > > > drivers/xen/blktap/sysfs.c | 288 ------------------- > > > > 18 files changed, 2341 insertions(+), 2343 deletions(-) > > > > create mode 100644 drivers/block/blktap/Makefile > > > > create mode 100644 drivers/block/blktap/blktap.h > > > > create mode 100644 drivers/block/blktap/control.c > > > > create mode 100644 drivers/block/blktap/device.c > > > > create mode 100644 drivers/block/blktap/request.c > > > > create mode 100644 drivers/block/blktap/ring.c > > > > create mode 100644 drivers/block/blktap/sysfs.c > > > > delete mode 100644 drivers/xen/blktap/Makefile > > > > delete mode 100644 drivers/xen/blktap/blktap.h > > > > delete mode 100644 drivers/xen/blktap/control.c > > > > delete mode 100644 drivers/xen/blktap/device.c > > > > delete mode 100644 drivers/xen/blktap/request.c > > > > delete mode 100644 drivers/xen/blktap/ring.c > > > > delete mode 100644 drivers/xen/blktap/sysfs.c > > > > > > Given the right options "git format-patch" should display this as: > > > drivers/block/Kconfig | 9 +++++++++ > > > drivers/block/Makefile | 1 + > > > drivers/{xen => block}/blktap/Makefile | 2 +- > > > drivers/{xen => block}/blktap/blktap.h | 0 > > > drivers/{xen => block}/blktap/control.c | 0 > > > drivers/{xen => block}/blktap/device.c | 0 > > > drivers/{xen => block}/blktap/request.c | 0 > > > drivers/{xen => block}/blktap/ring.c | 0 > > > drivers/{xen => block}/blktap/sysfs.c | 0 > > > drivers/xen/Kconfig | 11 ----------- > > > drivers/xen/Makefile | 1 - > > > 11 files changed, 11 insertions(+), 13 deletions(-) > > > [...] > > > diff --git a/drivers/xen/blktap/blktap.h b/drivers/block/blktap/blktap.h > > > similarity index 100% > > > rename from drivers/xen/blktap/blktap.h > > > rename to drivers/block/blktap/blktap.h > > > [...] > > > which is far easier to review. > > > > > > I think it''s -M you want. > > > > Hmm, sounds true. > > > > Back then when I created that change, I''m pretty sure I performed a > > number of git mv, applied some fixup and committed. > > > > I rebased that tree a couple times, though. Do you mean to say I must > > have broken that commit somehere. > > > > Okay, I''ll try it again and see if that improves the output. > > Thinking of it: Iirc this diff is not only a plain rename, there were > some additional changes made in source to have everything compile again. > Could that be the reason? > > Want those split out?Oooh, scratch that, that''s not even the case. I seem to finally be getting what you''re really talking about. This is not really a broken commit, it''s solely about send-email not enabling rename *detection*. Interesting, I always assumed the renaming case must be a property of the commit object, not a diff feature. That correct now? Daniel _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-10 00:02 UTC
[Xen-devel] [PATCH] blktap: Move from drivers/xen to drivers/block
From: Daniel Stodden <dns@somacoma.net> Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> --- drivers/block/Kconfig | 9 +++++++++ drivers/block/Makefile | 1 + drivers/{xen => block}/blktap/Makefile | 2 +- drivers/{xen => block}/blktap/blktap.h | 0 drivers/{xen => block}/blktap/control.c | 0 drivers/{xen => block}/blktap/device.c | 0 drivers/{xen => block}/blktap/request.c | 0 drivers/{xen => block}/blktap/ring.c | 0 drivers/{xen => block}/blktap/sysfs.c | 0 drivers/xen/Kconfig | 11 ----------- drivers/xen/Makefile | 1 - 11 files changed, 11 insertions(+), 13 deletions(-) rename drivers/{xen => block}/blktap/Makefile (59%) rename drivers/{xen => block}/blktap/blktap.h (100%) rename drivers/{xen => block}/blktap/control.c (100%) rename drivers/{xen => block}/blktap/device.c (100%) rename drivers/{xen => block}/blktap/request.c (100%) rename drivers/{xen => block}/blktap/ring.c (100%) rename drivers/{xen => block}/blktap/sysfs.c (100%) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index bea8ae7..c4a55a3 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -471,4 +471,13 @@ config BLK_DEV_HD If unsure, say N. +config BLK_DEV_TAP + tristate "Blktap userspace devices" + help + The block tap driver allows block device requests to be + redirected to processes, through a device interface. + Doing so allows user-space development of high-performance + block storage backends, where disk images may be implemented + as files, in memory, or on other hosts across the network. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7755a5e..8389917 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o +obj-$(CONFIG_BLK_DEV_TAP) += blktap/ obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o diff --git a/drivers/xen/blktap/Makefile b/drivers/block/blktap/Makefile similarity index 59% rename from drivers/xen/blktap/Makefile rename to drivers/block/blktap/Makefile index 822b4e4..923a7c5 100644 --- a/drivers/xen/blktap/Makefile +++ b/drivers/block/blktap/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o +obj-$(CONFIG_BLK_DEV_TAP) := blktap.o blktap-objs := control.o ring.o device.o request.o sysfs.o diff --git a/drivers/xen/blktap/blktap.h b/drivers/block/blktap/blktap.h similarity index 100% rename from drivers/xen/blktap/blktap.h rename to drivers/block/blktap/blktap.h diff --git a/drivers/xen/blktap/control.c b/drivers/block/blktap/control.c similarity index 100% rename from drivers/xen/blktap/control.c rename to drivers/block/blktap/control.c diff --git a/drivers/xen/blktap/device.c b/drivers/block/blktap/device.c similarity index 100% rename from drivers/xen/blktap/device.c rename to drivers/block/blktap/device.c diff --git a/drivers/xen/blktap/request.c b/drivers/block/blktap/request.c similarity index 100% rename from drivers/xen/blktap/request.c rename to drivers/block/blktap/request.c diff --git a/drivers/xen/blktap/ring.c b/drivers/block/blktap/ring.c similarity index 100% rename from drivers/xen/blktap/ring.c rename to drivers/block/blktap/ring.c diff --git a/drivers/xen/blktap/sysfs.c b/drivers/block/blktap/sysfs.c similarity index 100% rename from drivers/xen/blktap/sysfs.c rename to drivers/block/blktap/sysfs.c diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index c34e71c..b951b83 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -45,17 +45,6 @@ config XEN_BLKDEV_BACKEND interface. -config XEN_BLKDEV_TAP - tristate "Block-device tap backend driver" - depends on XEN_BACKEND && BLOCK - help - The block tap driver is an alternative to the block back driver - and allows VM block requests to be redirected to userspace through - a device interface. The tap allows user-space development of - high-performance block backends, where disk images may be implemented - as files, in memory, or on other hosts across the network. This - driver can safely coexist with the existing blockback driver. - config XEN_BLKBACK_PAGEMAP tristate depends on XEN_BLKDEV_BACKEND != n && XEN_BLKDEV_TAP != n diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa4d6e2..44f835e 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -6,6 +6,5 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ -obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file -- 1.7.0.4 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
On Tue, 2011-03-08 at 19:42 -0500, Daniel Stodden wrote:> Please pull upstream/xen/dom0/backend/blktap2 at > git://xenbits.xensource.com/people/dstodden/linux.gitRevoke. Could use some further improvements and I''d like to at least fix some missing comments, and maybe modify stuff according to Ian''s input. I''ll resend it then. Thanks, Daniel> Summary: > > - Move blktap to drivers/block > - Takes a .config update setting BLK_DEV_TAP > - Redo the ABI definitions, in linux/blktap.h > > - Some prerequisites for new commands, such as cache flushing and > trim support. Plus more device configuration driven from userspace, > such as setting physical sector sizes. > > - a new CREATE_DEVICE ioctls to flag extensions, like the above > mentioned ones. > > - Bugfix from Dominic Curran (Fix reference to freed struct request)._______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2011-Mar-10 08:24 UTC
Re: [Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
On Wed, 2011-03-09 at 23:57 +0000, Daniel Stodden wrote:> On Wed, 2011-03-09 at 17:26 -0500, Daniel Stodden wrote: > > On Wed, 2011-03-09 at 16:50 -0500, Daniel Stodden wrote: > > > On Wed, 2011-03-09 at 05:23 -0500, Ian Campbell wrote: > > > > On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote: > > > > > From: Daniel Stodden <dns@somacoma.net> > > > > > > > > > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > > > > > --- > > > > > drivers/block/Kconfig | 9 + > > > > > drivers/block/Makefile | 1 + > > > > > drivers/block/blktap/Makefile | 3 + > > > > > drivers/block/blktap/blktap.h | 161 +++++++++++ > > > > > drivers/block/blktap/control.c | 315 +++++++++++++++++++++ > > > > > drivers/block/blktap/device.c | 551 +++++++++++++++++++++++++++++++++++++ > > > > > drivers/block/blktap/request.c | 418 ++++++++++++++++++++++++++++ > > > > > drivers/block/blktap/ring.c | 595 ++++++++++++++++++++++++++++++++++++++++ > > > > > drivers/block/blktap/sysfs.c | 288 +++++++++++++++++++ > > > > > drivers/xen/Kconfig | 11 - > > > > > drivers/xen/Makefile | 1 - > > > > > drivers/xen/blktap/Makefile | 3 - > > > > > drivers/xen/blktap/blktap.h | 161 ----------- > > > > > drivers/xen/blktap/control.c | 315 --------------------- > > > > > drivers/xen/blktap/device.c | 551 ------------------------------------- > > > > > drivers/xen/blktap/request.c | 418 ---------------------------- > > > > > drivers/xen/blktap/ring.c | 595 ---------------------------------------- > > > > > drivers/xen/blktap/sysfs.c | 288 ------------------- > > > > > 18 files changed, 2341 insertions(+), 2343 deletions(-) > > > > > create mode 100644 drivers/block/blktap/Makefile > > > > > create mode 100644 drivers/block/blktap/blktap.h > > > > > create mode 100644 drivers/block/blktap/control.c > > > > > create mode 100644 drivers/block/blktap/device.c > > > > > create mode 100644 drivers/block/blktap/request.c > > > > > create mode 100644 drivers/block/blktap/ring.c > > > > > create mode 100644 drivers/block/blktap/sysfs.c > > > > > delete mode 100644 drivers/xen/blktap/Makefile > > > > > delete mode 100644 drivers/xen/blktap/blktap.h > > > > > delete mode 100644 drivers/xen/blktap/control.c > > > > > delete mode 100644 drivers/xen/blktap/device.c > > > > > delete mode 100644 drivers/xen/blktap/request.c > > > > > delete mode 100644 drivers/xen/blktap/ring.c > > > > > delete mode 100644 drivers/xen/blktap/sysfs.c > > > > > > > > Given the right options "git format-patch" should display this as: > > > > drivers/block/Kconfig | 9 +++++++++ > > > > drivers/block/Makefile | 1 + > > > > drivers/{xen => block}/blktap/Makefile | 2 +- > > > > drivers/{xen => block}/blktap/blktap.h | 0 > > > > drivers/{xen => block}/blktap/control.c | 0 > > > > drivers/{xen => block}/blktap/device.c | 0 > > > > drivers/{xen => block}/blktap/request.c | 0 > > > > drivers/{xen => block}/blktap/ring.c | 0 > > > > drivers/{xen => block}/blktap/sysfs.c | 0 > > > > drivers/xen/Kconfig | 11 ----------- > > > > drivers/xen/Makefile | 1 - > > > > 11 files changed, 11 insertions(+), 13 deletions(-) > > > > [...] > > > > diff --git a/drivers/xen/blktap/blktap.h b/drivers/block/blktap/blktap.h > > > > similarity index 100% > > > > rename from drivers/xen/blktap/blktap.h > > > > rename to drivers/block/blktap/blktap.h > > > > [...] > > > > which is far easier to review. > > > > > > > > I think it''s -M you want. > > > > > > Hmm, sounds true. > > > > > > Back then when I created that change, I''m pretty sure I performed a > > > number of git mv, applied some fixup and committed. > > > > > > I rebased that tree a couple times, though. Do you mean to say I must > > > have broken that commit somehere. > > > > > > Okay, I''ll try it again and see if that improves the output. > > > > Thinking of it: Iirc this diff is not only a plain rename, there were > > some additional changes made in source to have everything compile again. > > Could that be the reason? > > > > Want those split out? > > Oooh, scratch that, that''s not even the case. > > I seem to finally be getting what you''re really talking about. This is > not really a broken commit, it''s solely about send-email not enabling > rename *detection*. Interesting, I always assumed the renaming case must > be a property of the commit object, not a diff feature.I don''t know if it is a feature of the commit metadata (which FWIW I''d expect to be retained over a rebase etc) or something which is computed at diff generation time. In any case -M is required to get the git patch syntax, the default is to be compatible with old-school patch utils.> That correct now?<1299715326-26236-1-git-send-email-daniel.stodden@citrix.com> looks good.> > Daniel > > > > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Campbell
2011-Mar-10 08:28 UTC
Re: [Xen-devel] [PATCH 01/10] blktap: Add include/linux/blktap.h
On Wed, 2011-03-09 at 22:37 +0000, Daniel Stodden wrote:> On Wed, 2011-03-09 at 05:18 -0500, Ian Campbell wrote: > > On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote: > > > Moves blktap2 definitions into a common header file. > > > > > > Includes xen/interface/io/ring.h and new ring definitions. Makes > > > blktap build independently from xen-devel headers. > > > > > > New blktap_ring structs are fully congrent to blkif rings, for binary > > > compat. > > > > > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > > > --- > > > drivers/xen/blktap/blktap.h | 66 ++++---------------------------- > > > drivers/xen/blktap/control.c | 14 +++--- > > > drivers/xen/blktap/device.c | 12 +++--- > > > drivers/xen/blktap/request.c | 8 ++-- > > > drivers/xen/blktap/ring.c | 51 ++++++++++++++----------- > > > drivers/xen/blktap/sysfs.c | 6 +- > > > include/linux/blktap.h | 85 ++++++++++++++++++++++++++++++++++++++++++ > > > > This new file defines the kernel<->user (tapdisk process) ring protocol, > > right? > > Yes. It''s exactly as far as I can go right now maintaining > compatibility. The main objective was rather to get off xen-devel > headers in favour of kernel sources. > > - includes xen/interface/io/ring. > - doesn''t include xen/interface/io/blkif. > - certainly doesn''t include xen/interface/blkif.h > (the alignment stuff for guests). > > The old code used blkif and struct blkif_* definitions. The new one got > it''s own struct blktap_*s, identical as far as READ/WRITE commands go. > > But this also means one can develop the userland stuff independently > from blkif.h. New commands (flush, trim, ...) get quite a bit more > useful freedom. > > > I think its proper home would be under include/xen somewhere, which is > > where the gntdev and evtchn etc driver interfaces are defined. > > A very long time ago, a somewhat obvious choice was made to use xen ring > headers for the blktap user <-> kernel interface. So this header > presently still wants xen/interface. > > It doesn''t depend on anything xenish, nor is this a Xen driver anymore. > I thought even with that header dependency, that''s somewhat a > linux/blktap.h already, so I made it so.OK.> I''m feeling some heat from boston-newxen people because in XCP I''m > actually building blktap.hg against the kernel devel rpm contents right > now. That''s got to vanish. It''s great for hacking extensions, but the > component dependency is a bit gross, admittedly.Sure, but you could build against a copy of the kernel source tree, which would remove the dependency on the kernel binary RPMs.> Once doing so, it''s a standalone kernel blktap.h which can be copied > over into userland trees, without additional definitions included.If the other user of this interface is the tapdisk userspace, but that includes a copy of the interface header (note: I''m not convinced that is a good idea) then I think the right place for this copy of the header is drivers/block/blktap/. If on the other hand userland is building against this exact header then include/linux is probably right given that the driver has no Xen dependency.> This isn''t sick: Blktap2 doesn''t need the full ring.h macro contents > with memory barriers etc anyway, because the userland dispatching is > synchronous. It could be just bare structs, and the standard PUSH/PULL > macros are rather decoration and could be dropped (or reimplemented as > memcpy()s). > > Will this justify linux/blktap.h? > > One could also revert that ring.h pad space hack. > > I''m not passionate about it. If you still disagree, I''ll give up and we > move it elsewhere. > > In this case, it could as well go back into drivers/block/blktap, and > I''ll just give up on ''development mode'' hacks to verify tapdisk builds > against the kernel tree altogether.What sort of "''development mode'' hacks"?> > Where is the canonical definition of this interface stored? In the > > kernel tree or the hypervisor tree? > > You mean blktap.h? This is not a xen driver. I''d call this the canonical > definition, a reference with what that kernel/driver revision supports, > that''s why I put it there. > > It wouldn''t belong elsewhere, except for occasionally updated verbatim > copies in updated blktap sources, to unstress build dependencies. > > Daniel > > > Ian. > > > > > 7 files changed, 142 insertions(+), 100 deletions(-) > > > create mode 100644 include/linux/blktap.h > > > > > > diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h > > > index fe63fc9..1318cad 100644 > > > --- a/drivers/xen/blktap/blktap.h > > > +++ b/drivers/xen/blktap/blktap.h > > > @@ -6,7 +6,7 @@ > > > #include <linux/cdev.h> > > > #include <linux/init.h> > > > #include <linux/scatterlist.h> > > > -#include <xen/blkif.h> > > > +#include <linux/blktap.h> > > > > > > extern int blktap_debug_level; > > > extern int blktap_ring_major; > > > @@ -30,74 +30,26 @@ extern int blktap_device_major; > > > #define BLKTAP_DEVICE_CLOSED 5 > > > #define BLKTAP_SHUTDOWN_REQUESTED 8 > > > > > > -/* blktap IOCTLs: */ > > > -#define BLKTAP2_IOCTL_KICK_FE 1 > > > -#define BLKTAP2_IOCTL_ALLOC_TAP 200 > > > -#define BLKTAP2_IOCTL_FREE_TAP 201 > > > -#define BLKTAP2_IOCTL_CREATE_DEVICE 202 > > > -#define BLKTAP2_IOCTL_REMOVE_DEVICE 207 > > > - > > > -#define BLKTAP2_MAX_MESSAGE_LEN 256 > > > - > > > -#define BLKTAP2_RING_MESSAGE_CLOSE 3 > > > - > > > #define BLKTAP_REQUEST_FREE 0 > > > #define BLKTAP_REQUEST_PENDING 1 > > > > > > -/* > > > - * The maximum number of requests that can be outstanding at any time > > > - * is determined by > > > - * > > > - * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] > > > - * > > > - * where mmap_alloc < MAX_DYNAMIC_MEM. > > > - * > > > - * TODO: > > > - * mmap_alloc is initialised to 2 and should be adjustable on the fly via > > > - * sysfs. > > > - */ > > > -#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) > > > -#define MAX_DYNAMIC_MEM BLK_RING_SIZE > > > -#define MAX_PENDING_REQS BLK_RING_SIZE > > > -#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) > > > -#define MMAP_VADDR(_start, _req, _seg) \ > > > - (_start + \ > > > - ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ > > > - ((_seg) * PAGE_SIZE)) > > > - > > > -struct grant_handle_pair { > > > - grant_handle_t kernel; > > > - grant_handle_t user; > > > -}; > > > -#define INVALID_GRANT_HANDLE 0xFFFF > > > - > > > -struct blktap_handle { > > > - unsigned int ring; > > > - unsigned int device; > > > - unsigned int minor; > > > -}; > > > - > > > -struct blktap_params { > > > - char name[BLKTAP2_MAX_MESSAGE_LEN]; > > > - unsigned long long capacity; > > > - unsigned long sector_size; > > > -}; > > > - > > > struct blktap_device { > > > spinlock_t lock; > > > struct gendisk *gd; > > > }; > > > > > > +struct blktap_request; > > > + > > > struct blktap_ring { > > > struct task_struct *task; > > > > > > struct vm_area_struct *vma; > > > - struct blkif_front_ring ring; > > > + blktap_front_ring_t ring; > > > unsigned long ring_vstart; > > > unsigned long user_vstart; > > > > > > int n_pending; > > > - struct blktap_request *pending[MAX_PENDING_REQS]; > > > + struct blktap_request *pending[BLKTAP_RING_SIZE]; > > > > > > wait_queue_head_t poll_wait; > > > > > > @@ -128,8 +80,8 @@ struct blktap_request { > > > int operation; > > > struct timeval time; > > > > > > - struct scatterlist sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > > > - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > > > + struct scatterlist sg_table[BLKTAP_SEGMENT_MAX]; > > > + struct page *pages[BLKTAP_SEGMENT_MAX]; > > > int nr_pages; > > > }; > > > > > > @@ -148,7 +100,7 @@ struct blktap { > > > > > > wait_queue_head_t remove_wait; > > > struct work_struct remove_work; > > > - char name[BLKTAP2_MAX_MESSAGE_LEN]; > > > + char name[BLKTAP_NAME_MAX]; > > > > > > struct blktap_statistics stats; > > > }; > > > @@ -189,7 +141,7 @@ void blktap_sysfs_destroy(struct blktap *); > > > int blktap_device_init(void); > > > void blktap_device_exit(void); > > > size_t blktap_device_debug(struct blktap *, char *, size_t); > > > -int blktap_device_create(struct blktap *, struct blktap_params *); > > > +int blktap_device_create(struct blktap *, struct blktap_device_info *); > > > int blktap_device_destroy(struct blktap *); > > > void blktap_device_destroy_sync(struct blktap *); > > > void blktap_device_run_queue(struct blktap *); > > > diff --git a/drivers/xen/blktap/control.c b/drivers/xen/blktap/control.c > > > index f339bba..57b1a10 100644 > > > --- a/drivers/xen/blktap/control.c > > > +++ b/drivers/xen/blktap/control.c > > > @@ -127,19 +127,19 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, > > > struct blktap *tap; > > > > > > switch (cmd) { > > > - case BLKTAP2_IOCTL_ALLOC_TAP: { > > > - struct blktap_handle h; > > > + case BLKTAP_IOCTL_ALLOC_TAP: { > > > + struct blktap_info info; > > > void __user *ptr = (void __user*)arg; > > > > > > tap = blktap_control_create_tap(); > > > if (!tap) > > > return -ENOMEM; > > > > > > - h.ring = blktap_ring_major; > > > - h.device = blktap_device_major; > > > - h.minor = tap->minor; > > > + info.ring_major = blktap_ring_major; > > > + info.bdev_major = blktap_device_major; > > > + info.ring_minor = tap->minor; > > > > > > - if (copy_to_user(ptr, &h, sizeof(h))) { > > > + if (copy_to_user(ptr, &info, sizeof(info))) { > > > blktap_control_destroy_tap(tap); > > > return -EFAULT; > > > } > > > @@ -147,7 +147,7 @@ blktap_control_ioctl(struct inode *inode, struct file *filp, > > > return 0; > > > } > > > > > > - case BLKTAP2_IOCTL_FREE_TAP: { > > > + case BLKTAP_IOCTL_FREE_TAP: { > > > int minor = arg; > > > > > > if (minor > MAX_BLKTAP_DEVICE) > > > diff --git a/drivers/xen/blktap/device.c b/drivers/xen/blktap/device.c > > > index fce2769..6bb04bd 100644 > > > --- a/drivers/xen/blktap/device.c > > > +++ b/drivers/xen/blktap/device.c > > > @@ -186,7 +186,7 @@ blktap_device_make_request(struct blktap *tap, struct request *rq) > > > write ? ''w'' : ''r'', blk_rq_bytes(rq), nsegs); > > > > > > request->rq = rq; > > > - request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ; > > > + request->operation = write ? BLKTAP_OP_WRITE : BLKTAP_OP_READ; > > > > > > err = blktap_request_get_pages(tap, request, nsegs); > > > if (err) > > > @@ -276,7 +276,7 @@ blktap_device_do_request(struct request_queue *rq) > > > > > > static void > > > blktap_device_configure(struct blktap *tap, > > > - struct blktap_params *params) > > > + struct blktap_device_info *params) > > > { > > > struct request_queue *rq; > > > struct blktap_device *dev = &tap->device; > > > @@ -297,8 +297,8 @@ blktap_device_configure(struct blktap *tap, > > > blk_queue_max_segment_size(rq, PAGE_SIZE); > > > > > > /* Ensure a merged request will fit in a single I/O ring slot. */ > > > - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); > > > - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); > > > + blk_queue_max_phys_segments(rq, BLKTAP_SEGMENT_MAX); > > > + blk_queue_max_hw_segments(rq, BLKTAP_SEGMENT_MAX); > > > > > > /* Make sure buffer addresses are sector-aligned. */ > > > blk_queue_dma_alignment(rq, 511); > > > @@ -311,7 +311,7 @@ blktap_device_configure(struct blktap *tap, > > > > > > static int > > > blktap_device_validate_params(struct blktap *tap, > > > - struct blktap_params *params) > > > + struct blktap_device_info *params) > > > { > > > struct device *dev = tap->ring.dev; > > > int sector_order, name_sz; > > > @@ -425,7 +425,7 @@ blktap_device_destroy_sync(struct blktap *tap) > > > } > > > > > > int > > > -blktap_device_create(struct blktap *tap, struct blktap_params *params) > > > +blktap_device_create(struct blktap *tap, struct blktap_device_info *params) > > > { > > > int minor, err; > > > struct gendisk *gd; > > > diff --git a/drivers/xen/blktap/request.c b/drivers/xen/blktap/request.c > > > index 9bef48c..8cfd6c9 100644 > > > --- a/drivers/xen/blktap/request.c > > > +++ b/drivers/xen/blktap/request.c > > > @@ -7,18 +7,18 @@ > > > #include "blktap.h" > > > > > > /* max pages per shared pool. just to prevent accidental dos. */ > > > -#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST) > > > +#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX) > > > > > > /* default page pool size. when considering to shrink a shared pool, > > > * note that paused tapdisks may grab a whole lot of pages for a long > > > * time. */ > > > -#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES) > > > +#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX) > > > > > > /* max number of pages allocatable per request. */ > > > -#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST > > > +#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX > > > > > > /* min request structs per pool. These grow dynamically. */ > > > -#define POOL_MIN_REQS BLK_RING_SIZE > > > +#define POOL_MIN_REQS BLKTAP_RING_SIZE > > > > > > static struct kset *pool_set; > > > > > > diff --git a/drivers/xen/blktap/ring.c b/drivers/xen/blktap/ring.c > > > index 6b86be5..9442a64 100644 > > > --- a/drivers/xen/blktap/ring.c > > > +++ b/drivers/xen/blktap/ring.c > > > @@ -18,7 +18,7 @@ static struct cdev blktap_ring_cdev; > > > > > > static void > > > blktap_ring_read_response(struct blktap *tap, > > > - const struct blkif_response *rsp) > > > + const blktap_ring_rsp_t *rsp) > > > { > > > struct blktap_ring *ring = &tap->ring; > > > struct blktap_request *request; > > > @@ -27,7 +27,7 @@ blktap_ring_read_response(struct blktap *tap, > > > request = NULL; > > > > > > usr_idx = rsp->id; > > > - if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) { > > > + if (usr_idx < 0 || usr_idx >= BLKTAP_RING_SIZE) { > > > err = -ERANGE; > > > goto invalid; > > > } > > > @@ -48,7 +48,7 @@ blktap_ring_read_response(struct blktap *tap, > > > "request %d [%p] response: %d\n", > > > request->usr_idx, request, rsp->status); > > > > > > - err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO; > > > + err = rsp->status == BLKTAP_RSP_OKAY ? 0 : -EIO; > > > end_request: > > > blktap_device_end_request(tap, request, err); > > > return; > > > @@ -67,7 +67,7 @@ static void > > > blktap_read_ring(struct blktap *tap) > > > { > > > struct blktap_ring *ring = &tap->ring; > > > - struct blkif_response rsp; > > > + blktap_ring_rsp_t rsp; > > > RING_IDX rc, rp; > > > > > > down_read(¤t->mm->mmap_sem); > > > @@ -90,6 +90,11 @@ blktap_read_ring(struct blktap *tap) > > > up_read(¤t->mm->mmap_sem); > > > } > > > > > > +#define MMAP_VADDR(_start, _req, _seg) \ > > > + ((_start) + \ > > > + ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \ > > > + ((_seg) * BLKTAP_PAGE_SIZE)) > > > + > > > static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > > > { > > > return VM_FAULT_SIGBUS; > > > @@ -102,7 +107,7 @@ blktap_ring_fail_pending(struct blktap *tap) > > > struct blktap_request *request; > > > int usr_idx; > > > > > > - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { > > > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { > > > request = ring->pending[usr_idx]; > > > if (!request) > > > continue; > > > @@ -154,7 +159,7 @@ blktap_ring_map_request(struct blktap *tap, > > > int seg, err = 0; > > > int write; > > > > > > - write = request->operation == BLKIF_OP_WRITE; > > > + write = request->operation == BLKTAP_OP_WRITE; > > > > > > for (seg = 0; seg < request->nr_pages; seg++) { > > > if (write) > > > @@ -182,7 +187,7 @@ blktap_ring_unmap_request(struct blktap *tap, > > > > > > uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0); > > > size = request->nr_pages << PAGE_SHIFT; > > > - read = request->operation == BLKIF_OP_READ; > > > + read = request->operation == BLKTAP_OP_READ; > > > > > > if (read) > > > for (seg = 0; seg < request->nr_pages; seg++) > > > @@ -217,11 +222,11 @@ blktap_ring_make_request(struct blktap *tap) > > > if (!request) > > > return ERR_PTR(-ENOMEM); > > > > > > - for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++) > > > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) > > > if (!ring->pending[usr_idx]) > > > break; > > > > > > - BUG_ON(usr_idx >= BLK_RING_SIZE); > > > + BUG_ON(usr_idx >= BLKTAP_RING_SIZE); > > > > > > request->tap = tap; > > > request->usr_idx = usr_idx; > > > @@ -237,7 +242,7 @@ blktap_ring_submit_request(struct blktap *tap, > > > struct blktap_request *request) > > > { > > > struct blktap_ring *ring = &tap->ring; > > > - struct blkif_request *breq; > > > + blktap_ring_req_t *breq; > > > struct scatterlist *sg; > > > int i, nsecs = 0; > > > > > > @@ -248,12 +253,12 @@ blktap_ring_submit_request(struct blktap *tap, > > > > > > breq->id = request->usr_idx; > > > breq->sector_number = blk_rq_pos(request->rq); > > > - breq->handle = 0; > > > + breq->__pad = 0; > > > breq->operation = request->operation; > > > breq->nr_segments = request->nr_pages; > > > > > > blktap_for_each_sg(sg, request, i) { > > > - struct blkif_request_segment *seg = &breq->seg[i]; > > > + struct blktap_segment *seg = &breq->seg[i]; > > > int first, count; > > > > > > count = sg->length >> 9; > > > @@ -270,12 +275,12 @@ blktap_ring_submit_request(struct blktap *tap, > > > do_gettimeofday(&request->time); > > > > > > > > > - if (request->operation == BLKIF_OP_WRITE) { > > > + if (request->operation == BLKTAP_OP_WRITE) { > > > tap->stats.st_wr_sect += nsecs; > > > tap->stats.st_wr_req++; > > > } > > > > > > - if (request->operation == BLKIF_OP_READ) { > > > + if (request->operation == BLKTAP_OP_READ) { > > > tap->stats.st_rd_sect += nsecs; > > > tap->stats.st_rd_req++; > > > } > > > @@ -327,7 +332,7 @@ blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma) > > > { > > > struct blktap *tap = filp->private_data; > > > struct blktap_ring *ring = &tap->ring; > > > - struct blkif_sring *sring; > > > + blktap_sring_t *sring; > > > struct page *page = NULL; > > > int err; > > > > > > @@ -384,25 +389,25 @@ blktap_ring_ioctl(struct inode *inode, struct file *filp, > > > return -EACCES; > > > > > > switch(cmd) { > > > - case BLKTAP2_IOCTL_KICK_FE: > > > + case BLKTAP_IOCTL_RESPOND: > > > > > > blktap_read_ring(tap); > > > return 0; > > > > > > - case BLKTAP2_IOCTL_CREATE_DEVICE: { > > > - struct blktap_params params; > > > + case BLKTAP_IOCTL_CREATE_DEVICE: { > > > + struct blktap_device_info info; > > > void __user *ptr = (void *)arg; > > > > > > if (!arg) > > > return -EINVAL; > > > > > > - if (copy_from_user(¶ms, ptr, sizeof(params))) > > > + if (copy_from_user(&info, ptr, sizeof(info))) > > > return -EFAULT; > > > > > > - return blktap_device_create(tap, ¶ms); > > > + return blktap_device_create(tap, &info); > > > } > > > > > > - case BLKTAP2_IOCTL_REMOVE_DEVICE: > > > + case BLKTAP_IOCTL_REMOVE_DEVICE: > > > > > > return blktap_device_destroy(tap); > > > } > > > @@ -482,7 +487,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) > > > s += snprintf(s, end - s, > > > "begin pending:%d\n", ring->n_pending); > > > > > > - for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) { > > > + for (usr_idx = 0; usr_idx < BLKTAP_RING_SIZE; usr_idx++) { > > > struct blktap_request *request; > > > struct timeval *time; > > > int write; > > > @@ -491,7 +496,7 @@ blktap_ring_debug(struct blktap *tap, char *buf, size_t size) > > > if (!request) > > > continue; > > > > > > - write = request->operation == BLKIF_OP_WRITE; > > > + write = request->operation == BLKTAP_OP_WRITE; > > > time = &request->time; > > > > > > s += snprintf(s, end - s, > > > diff --git a/drivers/xen/blktap/sysfs.c b/drivers/xen/blktap/sysfs.c > > > index 7bbfea8..182de9a 100644 > > > --- a/drivers/xen/blktap/sysfs.c > > > +++ b/drivers/xen/blktap/sysfs.c > > > @@ -20,7 +20,7 @@ blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const c > > > if (!tap) > > > return 0; > > > > > > - if (size >= BLKTAP2_MAX_MESSAGE_LEN) > > > + if (size >= BLKTAP_NAME_MAX) > > > return -ENAMETOOLONG; > > > > > > if (strnlen(buf, size) != size) > > > @@ -75,8 +75,8 @@ blktap_sysfs_remove_device(struct device *dev, > > > goto wait; > > > > > > if (tap->ring.vma) { > > > - blkif_sring_t *sring = tap->ring.ring.sring; > > > - sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE; > > > + blktap_sring_t *sring = tap->ring.ring.sring; > > > + sring->private.tapif_user.msg = BLKTAP_RING_MESSAGE_CLOSE; > > > blktap_ring_kick_user(tap); > > > } else { > > > INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work); > > > diff --git a/include/linux/blktap.h b/include/linux/blktap.h > > > new file mode 100644 > > > index 0000000..ec33429 > > > --- /dev/null > > > +++ b/include/linux/blktap.h > > > @@ -0,0 +1,85 @@ > > > +/* > > > + * Copyright (c) 2011, XenSource Inc. > > > + * All rights reserved. > > > + */ > > > + > > > +#ifndef _LINUX_BLKTAP_H > > > +#define _LINUX_BLKTAP_H > > > + > > > +/* > > > + * Control > > > + */ > > > + > > > +#define BLKTAP_IOCTL_RESPOND 1 > > > +#define BLKTAP_IOCTL_ALLOC_TAP 200 > > > +#define BLKTAP_IOCTL_FREE_TAP 201 > > > +#define BLKTAP_IOCTL_CREATE_DEVICE 202 > > > +#define BLKTAP_IOCTL_REMOVE_DEVICE 207 > > > + > > > +#define BLKTAP_NAME_MAX 256 > > > + > > > +struct blktap_info { > > > + unsigned int ring_major; > > > + unsigned int bdev_major; > > > + unsigned int ring_minor; > > > +}; > > > + > > > +struct blktap_device_info { > > > + char name[BLKTAP_NAME_MAX]; > > > + unsigned long long capacity; > > > + unsigned long sector_size; > > > +}; > > > + > > > +/* > > > + * I/O ring > > > + */ > > > + > > > +#ifdef __KERNEL__ > > > +#define BLKTAP_PAGE_SIZE PAGE_SIZE > > > +#endif > > > + > > > +#include <xen/interface/io/ring.h> > > > + > > > +typedef struct blktap_ring_request blktap_ring_req_t; > > > +typedef struct blktap_ring_response blktap_ring_rsp_t; > > > + > > > +struct blktap_segment { > > > + uint32_t __pad; > > > + uint8_t first_sect; > > > + uint8_t last_sect; > > > +}; > > > + > > > +#define BLKTAP_OP_READ 0 > > > +#define BLKTAP_OP_WRITE 1 > > > + > > > +#define BLKTAP_SEGMENT_MAX 11 > > > + > > > +struct blktap_ring_request { > > > + uint8_t operation; > > > + uint8_t nr_segments; > > > + uint16_t __pad; > > > + uint64_t id; > > > + uint64_t sector_number; > > > + struct blktap_segment seg[BLKTAP_SEGMENT_MAX]; > > > +}; > > > + > > > +#define BLKTAP_RSP_EOPNOTSUPP -2 > > > +#define BLKTAP_RSP_ERROR -1 > > > +#define BLKTAP_RSP_OKAY 0 > > > + > > > +struct blktap_ring_response { > > > + uint64_t id; > > > + uint8_t operation; > > > + int16_t status; > > > +}; > > > + > > > +DEFINE_RING_TYPES(blktap, struct blktap_ring_request, struct blktap_ring_response); > > > +#define BLKTAP_RING_SIZE __CONST_RING_SIZE(blktap, BLKTAP_PAGE_SIZE) > > > + > > > +/* > > > + * Ring messages (DEPRECATED) > > > + */ > > > + > > > +#define BLKTAP_RING_MESSAGE_CLOSE 3 > > > + > > > +#endif /* _LINUX_BLKTAP_H */ > > > -- > > > 1.7.0.4 > > > > > > > > > _______________________________________________ > > > Xen-devel mailing list > > > Xen-devel@lists.xensource.com > > > http://lists.xensource.com/xen-devel > > > > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-10 10:38 UTC
Re: [Xen-devel] [PATCH 03/10] blktap: Move from drivers/xen to drivers/block
On Thu, 2011-03-10 at 03:24 -0500, Ian Campbell wrote:> > Oooh, scratch that, that''s not even the case. > > > > I seem to finally be getting what you''re really talking about. This is > > not really a broken commit, it''s solely about send-email not enabling > > rename *detection*. Interesting, I always assumed the renaming case must > > be a property of the commit object, not a diff feature. > > I don''t know if it is a feature of the commit metadata (which FWIW I''d > expect to be retained over a rebase etc) or something which is computed > at diff generation time. In any case -M is required to get the git patch > syntax, the default is to be compatible with old-school patch utils. > > > That correct now? > > <1299715326-26236-1-git-send-email-daniel.stodden@citrix.com> looks > good.Okay. I learned -M is actually a git-diff option passed through. The man page mentions it''s O(n^2). I had to learn all that while wondering if there''s a config option to turn it on, and that took me O(n). It''s [diff] renames = true btw, but might be a bad idea. Cheers. Daniel> > > > Daniel > > > > > > > > > > > > > >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Daniel Stodden
2011-Mar-10 11:40 UTC
Re: [Xen-devel] [PATCH 01/10] blktap: Add include/linux/blktap.h
On Thu, 2011-03-10 at 03:28 -0500, Ian Campbell wrote:> On Wed, 2011-03-09 at 22:37 +0000, Daniel Stodden wrote: > > On Wed, 2011-03-09 at 05:18 -0500, Ian Campbell wrote: > > > On Wed, 2011-03-09 at 00:42 +0000, Daniel Stodden wrote: > > > > Moves blktap2 definitions into a common header file. > > > > > > > > Includes xen/interface/io/ring.h and new ring definitions. Makes > > > > blktap build independently from xen-devel headers. > > > > > > > > New blktap_ring structs are fully congrent to blkif rings, for binary > > > > compat. > > > > > > > > Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com> > > > > --- > > > > drivers/xen/blktap/blktap.h | 66 ++++---------------------------- > > > > drivers/xen/blktap/control.c | 14 +++--- > > > > drivers/xen/blktap/device.c | 12 +++--- > > > > drivers/xen/blktap/request.c | 8 ++-- > > > > drivers/xen/blktap/ring.c | 51 ++++++++++++++----------- > > > > drivers/xen/blktap/sysfs.c | 6 +- > > > > include/linux/blktap.h | 85 ++++++++++++++++++++++++++++++++++++++++++ > > > > > > This new file defines the kernel<->user (tapdisk process) ring protocol, > > > right? > > > > Yes. It''s exactly as far as I can go right now maintaining > > compatibility. The main objective was rather to get off xen-devel > > headers in favour of kernel sources. > > > > - includes xen/interface/io/ring. > > - doesn''t include xen/interface/io/blkif. > > - certainly doesn''t include xen/interface/blkif.h > > (the alignment stuff for guests). > > > > The old code used blkif and struct blkif_* definitions. The new one got > > it''s own struct blktap_*s, identical as far as READ/WRITE commands go. > > > > But this also means one can develop the userland stuff independently > > from blkif.h. New commands (flush, trim, ...) get quite a bit more > > useful freedom. > > > > > I think its proper home would be under include/xen somewhere, which is > > > where the gntdev and evtchn etc driver interfaces are defined. > > > > A very long time ago, a somewhat obvious choice was made to use xen ring > > headers for the blktap user <-> kernel interface. So this header > > presently still wants xen/interface. > > > > It doesn''t depend on anything xenish, nor is this a Xen driver anymore. > > I thought even with that header dependency, that''s somewhat a > > linux/blktap.h already, so I made it so. > > OK. > > > I''m feeling some heat from boston-newxen people because in XCP I''m > > actually building blktap.hg against the kernel devel rpm contents right > > now. That''s got to vanish. It''s great for hacking extensions, but the > > component dependency is a bit gross, admittedly. > > Sure, but you could build against a copy of the kernel source tree, > which would remove the dependency on the kernel binary RPMs. > > > Once doing so, it''s a standalone kernel blktap.h which can be copied > > over into userland trees, without additional definitions included. > > If the other user of this interface is the tapdisk userspace, but that > includes a copy of the interface header (note: I''m not convinced that is > a good idea) then I think the right place for this copy of the header is > drivers/block/blktap/. > > If on the other hand userland is building against this exact header then > include/linux is probably right given that the driver has no Xen > dependency.If include/linux is acceptable, good, I''d keep it. Copies might sound dangerous, but building against kernel headers means sources need to be in sync. The compile-time #ifdef-mess would cause much more grief. Without, it''s just negotiating at runtime, that''s much more flexible.> > This isn''t sick: Blktap2 doesn''t need the full ring.h macro contents > > with memory barriers etc anyway, because the userland dispatching is > > synchronous. It could be just bare structs, and the standard PUSH/PULL > > macros are rather decoration and could be dropped (or reimplemented as > > memcpy()s). > > > > Will this justify linux/blktap.h? > > > > One could also revert that ring.h pad space hack. > > > > I''m not passionate about it. If you still disagree, I''ll give up and we > > move it elsewhere. > > > > In this case, it could as well go back into drivers/block/blktap, and > > I''ll just give up on ''development mode'' hacks to verify tapdisk builds > > against the kernel tree altogether. > > What sort of "''development mode'' hacks"?Just referring to the above: Userspace requiring original kernel includes. Nice for development, but iirc mainline stopped promoting that entirely, a long time ago. Daniel> > > Where is the canonical definition of this interface stored? In the > > > kernel tree or the hypervisor tree? > > > > You mean blktap.h? This is not a xen driver. I''d call this the canonical > > definition, a reference with what that kernel/driver revision supports, > > that''s why I put it there. > > > > It wouldn''t belong elsewhere, except for occasionally updated verbatim > > copies in updated blktap sources, to unstress build dependencies. > > > > Daniel > > > > > Ian. > > > > > > > 7 files changed, 142 insertions(+), 100 deletions(-) > > > > create mode 100644 include/linux/blktap.h > > > > > > > > diff --git a/drivers/xen/blktap/blktap.h b/drivers/xen/blktap/blktap.h > > > > index fe63fc9..1318cad 100644 > > > > --- a/drivers/xen/blktap/blktap.h > > > > +++ b/drivers/xen/blktap/blktap.h>_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel