Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 00 of 21] blktap3/drivers: Introduce tapdisk server.
This patch series copies the core of the tapdisk process from blktap2, with updates coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 01 of 21] blktap3/drivers: driver for RAW images
This patch copies from blktap2 (with few changes coming from blktap2.5) the driver for RAW images. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/block-aio.c b/tools/blktap3/drivers/block-aio.c copy from tools/blktap2/drivers/block-aio.c copy to tools/blktap3/drivers/block-aio.c --- a/tools/blktap2/drivers/block-aio.c +++ b/tools/blktap3/drivers/block-aio.c @@ -4,14 +4,14 @@ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of XenSource Inc. nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -26,7 +26,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - #include <errno.h> #include <fcntl.h> #include <stdio.h> @@ -35,8 +34,8 @@ #include <sys/statvfs.h> #include <sys/stat.h> #include <sys/ioctl.h> +#include <linux/fs.h> -#include "blk.h" #include "tapdisk.h" #include "tapdisk-driver.h" #include "tapdisk-interface.h" @@ -64,9 +63,7 @@ struct tdaio_state { static int tdaio_get_image_info(int fd, td_disk_info_t *info) { int ret; - long size; - unsigned long total_size; - struct statvfs statBuf; + unsigned long long bytes; struct stat stat; ret = fstat(fd, &stat); @@ -78,17 +75,32 @@ static int tdaio_get_image_info(int fd, if (S_ISBLK(stat.st_mode)) { /*Accessing block device directly*/ info->size = 0; - if (blk_getimagesize(fd, &info->size) != 0) + if (ioctl(fd, BLKGETSIZE64, &bytes) == 0) { + info->size = bytes >> SECTOR_SHIFT; + } else if (ioctl(fd, BLKGETSIZE, &info->size) != 0) { + DPRINTF + ("ERR: BLKGETSIZE and BLKGETSIZE64 failed, couldn''t stat image"); return -EINVAL; + } DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost " "sector_shift [%llu]\n", (long long unsigned)(info->size << SECTOR_SHIFT), (long long unsigned)info->size); - /*Get the sector size*/ - if (blk_getsectorsize(fd, &info->sector_size) != 0) + /*Get the sector size */ +#if defined(BLKSSZGET) + { info->sector_size = DEFAULT_SECTOR_SIZE; + ioctl(fd, BLKSSZGET, &info->sector_size); + + if (info->sector_size != DEFAULT_SECTOR_SIZE) + DPRINTF("Note: sector size is %ld (not %d)\n", + info->sector_size, DEFAULT_SECTOR_SIZE); + } +#else + info->sector_size = DEFAULT_SECTOR_SIZE; +#endif } else { /*Local file? try fstat instead*/ @@ -129,23 +141,25 @@ int tdaio_open(td_driver_t *driver, cons /* Open the file */ o_flags = O_DIRECT | O_LARGEFILE | ((flags & TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR); - fd = open(name, o_flags); + fd = open(name, o_flags); - if ( (fd == -1) && (errno == EINVAL) ) { + if ( (fd == -1) && (errno == EINVAL)) { - /* Maybe O_DIRECT isn''t supported. */ + /* Maybe O_DIRECT isn''t supported. */ o_flags &= ~O_DIRECT; - fd = open(name, o_flags); - if (fd != -1) DPRINTF("WARNING: Accessing image without" - "O_DIRECT! (%s)\n", name); + fd = open(name, o_flags); + if (fd != -1) + DPRINTF("WARNING: Accessing image without" + "O_DIRECT! (%s)\n", name); - } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name); - - if (fd == -1) { + } else if (fd != -1) + DPRINTF("open(%s) with O_DIRECT\n", name); + + if (fd == -1) { DPRINTF("Unable to open [%s] (%d)!\n", name, 0 - errno); - ret = 0 - errno; - goto done; - } + ret = 0 - errno; + goto done; + } ret = tdaio_get_image_info(fd, &driver->info); if (ret) { @@ -153,106 +167,126 @@ int tdaio_open(td_driver_t *driver, cons goto done; } - prv->fd = fd; + prv->fd = fd; -done: - return ret; + done: + return ret; } -void tdaio_complete(void *arg, struct tiocb *tiocb, int err) +void +tdaio_complete(void *arg, struct tiocb *tiocb __attribute__((unused)), + int err) { - struct aio_request *aio = (struct aio_request *)arg; + struct aio_request *aio = (struct aio_request *) arg; struct tdaio_state *prv = aio->state; td_complete_request(aio->treq, err); prv->aio_free_list[prv->aio_free_count++] = aio; } -void tdaio_queue_read(td_driver_t *driver, td_request_t treq) +void tdaio_queue_read(td_driver_t * driver, td_request_t treq) { int size; uint64_t offset; struct aio_request *aio; struct tdaio_state *prv; - prv = (struct tdaio_state *)driver->data; - size = treq.secs * driver->info.sector_size; - offset = treq.sec * (uint64_t)driver->info.sector_size; + prv = (struct tdaio_state *) driver->data; + size = treq.secs * driver->info.sector_size; + offset = treq.sec * (uint64_t) driver->info.sector_size; if (prv->aio_free_count == 0) goto fail; - aio = prv->aio_free_list[--prv->aio_free_count]; - aio->treq = treq; + aio = prv->aio_free_list[--prv->aio_free_count]; + aio->treq = treq; aio->state = prv; td_prep_read(&aio->tiocb, prv->fd, treq.buf, - size, offset, tdaio_complete, aio); + size, offset, tdaio_complete, aio); td_queue_tiocb(driver, &aio->tiocb); return; -fail: + fail: td_complete_request(treq, -EBUSY); } -void tdaio_queue_write(td_driver_t *driver, td_request_t treq) +void tdaio_queue_write(td_driver_t * driver, td_request_t treq) { int size; uint64_t offset; struct aio_request *aio; struct tdaio_state *prv; - prv = (struct tdaio_state *)driver->data; - size = treq.secs * driver->info.sector_size; - offset = treq.sec * (uint64_t)driver->info.sector_size; + prv = (struct tdaio_state *) driver->data; + size = treq.secs * driver->info.sector_size; + offset = treq.sec * (uint64_t) driver->info.sector_size; if (prv->aio_free_count == 0) goto fail; - aio = prv->aio_free_list[--prv->aio_free_count]; - aio->treq = treq; + aio = prv->aio_free_list[--prv->aio_free_count]; + aio->treq = treq; aio->state = prv; td_prep_write(&aio->tiocb, prv->fd, treq.buf, - size, offset, tdaio_complete, aio); + size, offset, tdaio_complete, aio); td_queue_tiocb(driver, &aio->tiocb); return; -fail: + fail: td_complete_request(treq, -EBUSY); } -int tdaio_close(td_driver_t *driver) +int tdaio_close(td_driver_t * driver) { - struct tdaio_state *prv = (struct tdaio_state *)driver->data; - + struct tdaio_state *prv = (struct tdaio_state *) driver->data; + close(prv->fd); return 0; } -int tdaio_get_parent_id(td_driver_t *driver, td_disk_id_t *id) +int +tdaio_get_parent_id(td_driver_t * driver __attribute__((unused)), + td_disk_id_t * id __attribute__((unused))) { return TD_NO_PARENT; } -int tdaio_validate_parent(td_driver_t *driver, - td_driver_t *pdriver, td_flag_t flags) +int +tdaio_validate_parent(td_driver_t * driver __attribute__((unused)), + td_driver_t * pdriver __attribute__((unused)), + td_flag_t flags __attribute__((unused))) { return -EINVAL; } +void tdaio_stats(td_driver_t * driver, td_stats_t * st) +{ + struct tdaio_state *prv = (struct tdaio_state *) driver->data; + int n_pending; + + n_pending = MAX_AIO_REQS - prv->aio_free_count; + + tapdisk_stats_field(st, "reqs", "{"); + tapdisk_stats_field(st, "max", "lu", MAX_AIO_REQS); + tapdisk_stats_field(st, "pending", "d", n_pending); + tapdisk_stats_leave(st, ''}''); +} + struct tap_disk tapdisk_aio = { - .disk_type = "tapdisk_aio", - .flags = 0, - .private_data_size = sizeof(struct tdaio_state), - .td_open = tdaio_open, - .td_close = tdaio_close, - .td_queue_read = tdaio_queue_read, - .td_queue_write = tdaio_queue_write, - .td_get_parent_id = tdaio_get_parent_id, + .disk_type = "tapdisk_aio", + .flags = 0, + .private_data_size = sizeof(struct tdaio_state), + .td_open = tdaio_open, + .td_close = tdaio_close, + .td_queue_read = tdaio_queue_read, + .td_queue_write = tdaio_queue_write, + .td_get_parent_id = tdaio_get_parent_id, .td_validate_parent = tdaio_validate_parent, - .td_debug = NULL, + .td_debug = NULL, + .td_stats = tdaio_stats, };
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 02 of 21] blktap3/drivers: Introduce I/O optimisation routines
This patch copies files io-optimize.[ch] from blktap2 (with changes coming from blktap2.5). I haven''t looked thoroughly into them, they seem to contain functionality for optimising the processing of I/O requests, e.g. merging of requests etc. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/io-optimize.c b/tools/blktap3/drivers/io-optimize.c copy from tools/blktap2/drivers/io-optimize.c copy to tools/blktap3/drivers/io-optimize.c --- a/tools/blktap2/drivers/io-optimize.c +++ b/tools/blktap3/drivers/io-optimize.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2008, XenSource Inc. + * Copyright (c) 2007, XenSource Inc. + * Copyright (c) 2010, Citrix Systems, Inc. + * * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,11 +46,7 @@ #define DBG(ctx, f, a...) ((void)0) #endif -static void print_merged_iocbs(struct opioctx *ctx, - struct iocb **iocbs, int num_iocbs); - -void -opio_free(struct opioctx *ctx) +void opio_free(struct opioctx *ctx) { free(ctx->opios); ctx->opios = NULL; @@ -214,12 +212,48 @@ merge(struct opioctx *ctx, struct iocb * return merge_tail(ctx, head, io); } -int -io_merge(struct opioctx *ctx, struct iocb **queue, int num) +#if (defined(TEST) || defined(DEBUG)) +static void +print_optimized_iocbs(struct opioctx *ctx, struct opio *op, int *cnt) +{ + char pref[10]; + + while (op) { + snprintf(pref, 10, " %d: ", (*cnt)++); + __print_iocb(ctx, op->iocb, pref); + op = op->next; + } +} + +static void +print_merged_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs) +{ + int i, cnt; + char pref[10]; + struct iocb *io; + struct opio *op; + + DBG(ctx, "merged iocbs:\n"); + for (i = 0, cnt = 0; i < num_iocbs; i++) { + io = iocbs[i]; + snprintf(pref, 10, "%d: ", cnt++); + __print_iocb(ctx, io, pref); + + if (iocb_optimized(ctx, io)) { + op = (struct opio *) io->data; + print_optimized_iocbs(ctx, op->next, &cnt); + } + } +} +#else +#define print_optimized_iocbs(...) +#define print_merged_iocbs(...) +#endif + +int io_merge(struct opioctx *ctx, struct iocb **queue, int num) { int i, on_queue; struct iocb *io, **q; - struct opio *ophead; if (!num) return 0; @@ -234,9 +268,7 @@ io_merge(struct opioctx *ctx, struct ioc queue[++on_queue] = io; } -#if (defined(TEST) || defined(DEBUG)) print_merged_iocbs(ctx, queue, on_queue + 1); -#endif return ++on_queue; } @@ -346,80 +378,20 @@ io_split(struct opioctx *ctx, struct io_ debug print functions ******************************************************************************/ static inline void -__print_iocb(struct opioctx *ctx, struct iocb *io, char *prefix) +__print_iocb(struct opioctx *ctx __attribute__((unused)), + struct iocb *io __attribute__((unused)), + char *prefix __attribute__((unused))) { - char *type; - - type = (io->aio_lio_opcode == IO_CMD_PREAD ? "read" : "write"); - - DBG(ctx, "%soff: %08llx, nbytes: %04lx, buf: %p, type: %s, data: %08lx," + DBG(ctx, + "%soff: %08llx, nbytes: %04lx, buf: %p, type: %s, data: %08lx," " optimized: %d\n", prefix, io->u.c.offset, io->u.c.nbytes, - io->u.c.buf, type, (unsigned long)io->data, - iocb_optimized(ctx, io)); + io->u.c.buf, + (io->aio_lio_opcode == IO_CMD_PREAD ? "read" : "write"), + (unsigned long) io->data, iocb_optimized(ctx, io)); } -static char *null_prefix = ""; -#define print_iocb(ctx, io) __print_iocb(ctx, io, null_prefix) +#define print_iocb(ctx, io) __print_iocb(ctx, io, "") -static void -print_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs) -{ - int i; - char pref[10]; - struct iocb *io; - - DBG(ctx, "iocbs:\n"); - for (i = 0; i < num_iocbs; i++) { - io = iocbs[i]; - snprintf(pref, 10, "%d: ", i); - __print_iocb(ctx, io, pref); - } -} - -static void -print_optimized_iocbs(struct opioctx *ctx, struct opio *op, int *cnt) -{ - char pref[10]; - - while (op) { - snprintf(pref, 10, " %d: ", (*cnt)++); - __print_iocb(ctx, op->iocb, pref); - op = op->next; - } -} - -static void -print_merged_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs) -{ - int i, cnt; - char pref[10]; - struct iocb *io; - struct opio *op; - - DBG(ctx, "merged iocbs:\n"); - for (i = 0, cnt = 0; i < num_iocbs; i++) { - io = iocbs[i]; - snprintf(pref, 10, "%d: ", cnt++); - __print_iocb(ctx, io, pref); - - if (iocb_optimized(ctx, io)) { - op = (struct opio *)io->data; - print_optimized_iocbs(ctx, op->next, &cnt); - } - } -} - -static void -print_events(struct opioctx *ctx, struct io_event *events, int num_events) -{ - int i; - struct iocb *io; - - for (i = 0; i < num_events; i++) { - io = events[i].obj; - print_iocb(ctx, io); - } -} /****************************************************************************** end debug print functions ******************************************************************************/ @@ -571,8 +543,34 @@ init_optest(struct iocb *iocb_list, iocbs[i] = &iocb_list[i]; } -int -main(int argc, char **argv) +static void +print_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs) +{ + int i; + char pref[10]; + struct iocb *io; + + DBG(ctx, "iocbs:\n"); + for (i = 0; i < num_iocbs; i++) { + io = iocbs[i]; + snprintf(pref, 10, "%d: ", i); + __print_iocb(ctx, io, pref); + } +} + +static void +print_events(struct opioctx *ctx, struct io_event *events, int num_events) +{ + int i; + struct iocb *io; + + for (i = 0; i < num_events; i++) { + io = events[i].obj; + print_iocb(ctx, io); + } +} + +int main(int argc, char **argv) { uint64_t num_secs; struct opioctx ctx;
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 03 of 21] blktap3/drivers: Introduce libaio compatibility
This patch copies libaio-compath.h from blktap2. Not sure what it''s used for, a comment inside it suggests this header addresses a temporary problem. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/libaio-compat.h b/tools/blktap3/drivers/libaio-compat.h copy from tools/blktap2/drivers/libaio-compat.h copy to tools/blktap3/drivers/libaio-compat.h
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 04 of 21] blktap3/drivers: Introduce locking functionality
This patch copies files lock.[ch] from blktap2, with minor changes coming from blktap2.5. I haven''t looked thoroughly into them, they seem to implement some kind of locking functionality. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/lock.c b/tools/blktap3/drivers/lock.c copy from tools/blktap2/drivers/lock.c copy to tools/blktap3/drivers/lock.c --- a/tools/blktap2/drivers/lock.c +++ b/tools/blktap3/drivers/lock.c @@ -41,10 +41,9 @@ #include <time.h> #include <dirent.h> #include <limits.h> +#include "blktap3.h" #include "lock.h" -#define unlikely(x) __builtin_expect(!!(x), 0) - /* format: xenlk.hostname.uuid.<xf><rw>*/ #define LF_POSTFIX ".xenlk" #define LFXL_FORMAT LF_POSTFIX ".%s.%s.x%s" @@ -106,7 +105,8 @@ static char *create_lockfn_link(char *fn return lockfn_link; } -static int NFSnormalizedStatTime(char *fn, struct stat *statnow, int *reterrno) +static int NFSnormalizedStatTime(char *fn, struct stat *statnow, + int *reterrno) { int result = LOCK_OK; int uniq; @@ -121,13 +121,20 @@ static int NFSnormalizedStatTime(char *f srandom((int)time(0) ^ pid); uniq = random() % 0xffffff; buf = malloc(strlen(fn) + 24); - if (unlikely(!buf)) { result = LOCK_ENOMEM; goto finish; } + if (unlikely(!buf)) { + result = LOCK_ENOMEM; + goto finish; + } strcpy(buf, fn); sprintf(buf + strlen(buf), ".xen%08d.tmp", uniq); fd = open(buf, O_WRONLY | O_CREAT, 0644); - if (fd == -1) { *reterrno = errno; result = LOCK_EOPEN; goto finish; } + if (fd == -1) { + *reterrno = errno; + result = LOCK_EOPEN; + goto finish; + } clstat = close(fd); if (unlikely(clstat == -1)) { LOG("fail on close\n"); @@ -144,7 +151,7 @@ finish: return result; } -static int writer_eval(char *name, int readonly) +static int writer_eval(char *name, int readonly __attribute__((unused))) { return name[strlen(name)-1] == ''w''; } @@ -173,8 +180,10 @@ static int lock_holder(char *fn, char *l *ioerror = 0; *elt = 0; - if (!dirname) goto finish; - if (!uname) goto finish; + if (!dirname) + goto finish; + if (!uname) + goto finish; /* get directory */ ptr = strrchr(lockfn, ''/''); @@ -207,9 +216,12 @@ static int lock_holder(char *fn, char *l char *p1 = strrchr(fn, ''/''); char *p2 = strrchr(lockfn, ''/''); char *p3 = strrchr(lockfn_link, ''/''); - if (p1) p1+=1; - if (p2) p2+=1; - if (p3) p3+=1; + if (p1) + p1 += 1; + if (p2) + p2 += 1; + if (p3) + p3 += 1; if (strcmp(dptr->d_name, p1 ? p1 : fn) && strcmp(dptr->d_name, p2 ? p2 : lockfn) && strcmp(dptr->d_name, p3 ? p3 : lockfn_link) && @@ -250,7 +262,7 @@ static int lock_holder(char *fn, char *l } } dptr = readdir(pd); - if (!dptr && errno) { + if (!dptr && !errno) { *ioerror = EIO; } } @@ -265,7 +277,8 @@ finish: return (*ioerror) ? 1 : status; } -int lock(char *fn_to_lock, char *uuid, int force, int readonly, int *lease_time, int *retstatus) +int lock(char *fn_to_lock, char *uuid, int force, int readonly, + int *lease_time, int *retstatus) { char *lockfn = 0; char *lockfn_xlink = 0; @@ -296,15 +309,27 @@ int lock(char *fn_to_lock, char *uuid, i /* build lock file strings */ lockfn = create_lockfn(fn_to_lock); - if (unlikely(!lockfn)) { status = ENOMEM; *retstatus = LOCK_ENOMEM; goto finish; } + if (unlikely(!lockfn)) { + status = ENOMEM; + *retstatus = LOCK_ENOMEM; + goto finish; + } lockfn_xlink = create_lockfn_link(fn_to_lock, LFXL_FORMAT, uuid, readonly); - if (unlikely(!lockfn_xlink)) { status = ENOMEM; *retstatus = LOCK_ENOMEM; goto finish; } + if (unlikely(!lockfn_xlink)) { + status = ENOMEM; + *retstatus = LOCK_ENOMEM; + goto finish; + } lockfn_flink = create_lockfn_link(fn_to_lock, LFFL_FORMAT, uuid, readonly); - if (unlikely(!lockfn_flink)) { status = ENOMEM; *retstatus = LOCK_ENOMEM; goto finish; } + if (unlikely(!lockfn_flink)) { + status = ENOMEM; + *retstatus = LOCK_ENOMEM; + goto finish; + } try_again: if (retry_attempts++ > RETRY_MAX) { @@ -454,8 +479,7 @@ skip: } tmpstat = unlink(lockfn_xlink); if (unlikely(tmpstat == -1)) { - LOG("error removing linked lock file %s", - lockfn_xlink); + LOG("error removing linked lock file %s", lockfn_xlink); } XSLEEP; status = LOCK_ESTAT; @@ -470,8 +494,7 @@ skip: status = 0; tmpstat = unlink(lockfn_xlink); if (unlikely(tmpstat == -1)) { - LOG("error removing linked lock file %s", - lockfn_xlink); + LOG("error removing linked lock file %s", lockfn_xlink); } goto finish; } else { @@ -483,8 +506,7 @@ skip: } tmpstat = unlink(lockfn_xlink); if (unlikely(tmpstat == -1)) { - LOG("error removing linked lock file %s", - lockfn_xlink); + LOG("error removing linked lock file %s", lockfn_xlink); } XSLEEP; *retstatus = LOCK_EINODE; @@ -554,8 +576,8 @@ finish: } *retstatus = LOCK_EHELD_RD; } - if (established_lease_time) *lease_time = - established_lease_time; + if (established_lease_time) + *lease_time = established_lease_time; } skip_scan: @@ -573,10 +595,12 @@ skip_scan: failed_write = write(fd, lockfn_flink, strlen(lockfn_flink)) != strlen(lockfn_flink); - if (failed_write) status = errno; + if (failed_write) + status = errno; failed_write |= write(fd, tmpbuf, strlen(tmpbuf)) != strlen(tmpbuf); - if (failed_write) status = errno; + if (failed_write) + status = errno; if (failed_write) { clstat = close(fd); if (unlikely(clstat == -1)) { @@ -605,8 +629,7 @@ skip_scan: /* remove exclusive lock, final read/write locks will hold */ tmpstat = unlink(lockfn); if (unlikely(tmpstat == -1)) { - LOG("error removing exclusive lock file %s", - lockfn); + LOG("error removing exclusive lock file %s", lockfn); } free(lockfn); @@ -614,7 +637,8 @@ skip_scan: free(lockfn_flink); /* set lease time to -1 if error, so no one is apt to use it */ - if (*retstatus < 0) *lease_time = -1; + if (*retstatus < 0) + *lease_time = -1; LOG("returning status %d, errno=%d\n", status, errno); return status; @@ -633,7 +657,10 @@ int unlock(char *fn_to_unlock, char *uui lockfn_link = create_lockfn_link(fn_to_unlock, LFFL_FORMAT, uuid, readonly); - if (unlikely(!lockfn_link)) { *status = LOCK_ENOMEM; goto finish; } + if (unlikely(!lockfn_link)) { + *status = LOCK_ENOMEM; + goto finish; + } if (unlink(lockfn_link) == -1) { LOG("error removing linked lock file %s", lockfn_link); @@ -686,7 +713,10 @@ int lock_delta(char *fn, int *ret_lease, ptr += 1; } pd = opendir(dirname); - if (!pd) { reterrno = errno; goto finish; } + if (!pd) { + reterrno = errno; + goto finish; + } dptr = readdir(pd); while (dptr) { @@ -703,8 +733,7 @@ int lock_delta(char *fn, int *ret_lease, strcat(fpath, "/"); strcat(fpath, dptr->d_name); if (lstat(fpath, &statbuf) != -1) { - int diff = (int)statnow.st_mtime - - (int)statbuf.st_mtime; + int diff = (int) statnow.st_mtime - (int) statbuf.st_mtime; /* adjust diff if someone updated the lock between now and when we created the "now" file @@ -744,10 +773,12 @@ finish: free(uname); /* returns smallest lock time, or error */ - if (result == INT_MAX) result = LOCK_ENOLOCK; + if (result == INT_MAX) + result = LOCK_ENOLOCK; /* set lease time to -1 if error, so no one is apt to use it */ - if ((result < 0) || reterrno) *max_lease = -1; + if ((result < 0) || reterrno) + *max_lease = -1; *ret_lease = result; return reterrno; } @@ -769,7 +800,8 @@ static void usage(char *prg) printf(" t : test the file (after random locks)\n"); printf(" r : random lock tests (must ^C)\n"); printf(" u : unlock, readonly? uniqID (default is PID)\n"); - printf(" l : lock, readonly? force?, uniqID (default is PID), lease time\n"); + printf + (" l : lock, readonly? force?, uniqID (default is PID), lease time\n"); } static void test_file(char *fn) @@ -792,6 +824,7 @@ static void test_file(char *fn) } prev_count = count + 1; } + fclose(fptr); } static void random_locks(char *fn) @@ -841,10 +874,10 @@ static void random_locks(char *fn) int bw = bytes-2; while (bw && filebuf[bw]!=''\n'') bw--; - if (!bw) bw = -1; + if (!bw) + bw = -1; sscanf(&filebuf[bw+1], - "%d %d %d", - &count, &dummy, &dummy); + "%d %d %d", &count, &dummy, &dummy); count += 1; } lseek(fd, 0, SEEK_END); @@ -919,15 +952,27 @@ int main(int argc, char *argv[]) } else if (!strcmp(argv[1],"p")) { perf_lock(argv[2], argc < 3 ? 100000 : atoi(argv[3])); } else if (!strcmp(argv[1],"l")) { - if (argc < 4) force = 0; else force = atoi(argv[3]); - if (argc < 5) readonly = 0; else readonly = atoi(argv[4]); - if (argc >= 6) ptr = argv[5]; - if (argc == 7) lease = atoi(argv[6]); + if (argc < 4) + force = 0; + else + force = atoi(argv[3]); + if (argc < 5) + readonly = 0; + else + readonly = atoi(argv[4]); + if (argc >= 6) + ptr = argv[5]; + if (argc == 7) + lease = atoi(argv[6]); status = lock(argv[2], ptr, readonly, force, &lease, &intstatus); printf("lock status = %d\n", status); } else if (!strcmp(argv[1],"u") ) { - if (argc < 5) readonly = 0; else readonly = atoi(argv[3]); - if (argc == 5) ptr = argv[4]; + if (argc < 5) + readonly = 0; + else + readonly = atoi(argv[3]); + if (argc == 5) + ptr = argv[4]; status = unlock(argv[2], ptr, readonly, &intstatus); printf("unlock status = %d\n", intstatus); } else { @@ -949,17 +994,19 @@ static void usage(char *prg) " unlock <filename> <r|w> <uniqid>\n" " lock <filename> <r|w> <0|1> <uniqid> <leasetime>\n", prg); printf(" delta : get time since lock last refreshed\n"); - printf(" returns delta time and max lease time in seconds\n"); + printf + (" returns delta time and max lease time in seconds\n"); printf(" unlock: unlock request filename, r|w, uniqID\n"); printf(" returns status (success is 0)\n"); - printf(" lock : lock request filename, r|w, force?, uniqID, lease time request\n"); - printf(" returns status (success is 0) and established lease time in seconds\n"); + printf + (" lock : lock request filename, r|w, force?, uniqID, lease time request\n"); + printf + (" returns status (success is 0) and established lease time in seconds\n"); } int main(int argc, char *argv[]) { int status = 0; - int dlock; char *ptr; int force; int readonly; diff --git a/tools/blktap2/drivers/lock.h b/tools/blktap3/drivers/lock.h copy from tools/blktap2/drivers/lock.h copy to tools/blktap3/drivers/lock.h
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 05 of 21] blktap3/drivers: Introduce logging for the tapdisk
This patch copies logging functionality from blktap2, with changes coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/log.h b/tools/blktap3/drivers/log.h copy from tools/blktap2/drivers/log.h copy to tools/blktap3/drivers/log.h --- a/tools/blktap2/drivers/log.h +++ b/tools/blktap3/drivers/log.h @@ -32,10 +32,7 @@ #define __LOG_H__ 1 #include <inttypes.h> - #include <xen/io/ring.h> -/* for wmb et al */ -#include <xenctrl.h> #define LOGCMD_SHMP "shmp" #define LOGCMD_PEEK "peek" diff --git a/tools/blktap2/drivers/tapdisk-client.c b/tools/blktap3/drivers/tapdisk-client.c copy from tools/blktap2/drivers/tapdisk-client.c copy to tools/blktap3/drivers/tapdisk-client.c --- a/tools/blktap2/drivers/tapdisk-client.c +++ b/tools/blktap3/drivers/tapdisk-client.c @@ -39,6 +39,7 @@ #include <sys/socket.h> #include <sys/types.h> #include <sys/un.h> +#include <xenctrl.h> #include "log.h" @@ -109,7 +110,7 @@ static int tdctl_open(const char* sockpa saddr.sun_family = AF_UNIX; memcpy(saddr.sun_path, sockpath, strlen(sockpath)); - if (connect(fd, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { + if (connect(fd, &saddr, sizeof(saddr)) < 0) { BWPRINTF("error connecting to socket %s: %s", sockpath, strerror(errno)); close(fd); return -1; @@ -392,7 +393,7 @@ int await_responses(struct writelog* wl, BWPRINTF("EOF on control socket"); return -1; } else if (rc < sizeof(msg)) { - BWPRINTF("short reply (%d/%d bytes)", rc, (int) sizeof(msg)); + BWPRINTF("short reply (%d/%lu bytes)", rc, sizeof(msg)); return -1; } diff --git a/tools/blktap2/drivers/tapdisk-log.c b/tools/blktap3/drivers/tapdisk-log.c copy from tools/blktap2/drivers/tapdisk-log.c copy to tools/blktap3/drivers/tapdisk-log.c --- a/tools/blktap2/drivers/tapdisk-log.c +++ b/tools/blktap3/drivers/tapdisk-log.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, XenSource Inc. + * Copyright (c) 2008, 2009, XenSource Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,232 +26,241 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <stdio.h> -#include <fcntl.h> +#include <stdlib.h> #include <unistd.h> -#include <stdlib.h> -#include <string.h> +#include <errno.h> #include <stdarg.h> #include <syslog.h> -#include <inttypes.h> #include <sys/time.h> +#include <sys/stat.h> +#include <sys/types.h> #include "tapdisk-log.h" #include "tapdisk-utils.h" +#include "tapdisk-logfile.h" +#include "tapdisk-syslog.h" +#include "tapdisk-server.h" + +#define TLOG_LOGFILE_BUFSZ (16<<10) +#define TLOG_SYSLOG_BUFSZ (8<<10) #define MAX_ENTRY_LEN 512 -#define MAX_ERROR_MESSAGES 16 -struct error { - int cnt; - int err; - char *func; - char msg[MAX_ENTRY_LEN]; +struct tlog { + char *name; + td_logfile_t logfile; + int precious; + int level; + + char *ident; + td_syslog_t syslog; + unsigned long errors; }; -struct ehandle { - int cnt; - int dropped; - struct error errors[MAX_ERROR_MESSAGES]; -}; - -struct tlog { - char *p; - int size; - uint64_t cnt; - char *buf; - int level; - char *file; - int append; -}; - -static struct ehandle tapdisk_err; static struct tlog tapdisk_log; -void -open_tlog(char *file, size_t bytes, int level, int append) +static void tlog_logfile_vprint(const char *fmt, va_list ap) { - tapdisk_log.size = ((bytes + 511) & (~511)); + tapdisk_logfile_vprintf(&tapdisk_log.logfile, fmt, ap); +} - if (asprintf(&tapdisk_log.file, "%s.%d", file, getpid()) == -1) - return; +static void __printf(1, 2) tlog_logfile_print(const char *fmt, ...) +{ + va_list ap; - if (posix_memalign((void **)&tapdisk_log.buf, 512, tapdisk_log.size)) { - free(tapdisk_log.file); - tapdisk_log.buf = NULL; - return; + va_start(ap, fmt); + tlog_logfile_vprint(fmt, ap); + va_end(ap); } - memset(tapdisk_log.buf, 0, tapdisk_log.size); +#define tlog_info(_fmt, _args ...) \ + tlog_logfile_print("%s: "_fmt, tapdisk_log.ident, ##_args) - tapdisk_log.p = tapdisk_log.buf; - tapdisk_log.level = level; - tapdisk_log.append = append; +static void tlog_logfile_save(void) +{ + td_logfile_t *logfile = &tapdisk_log.logfile; + const char *name = tapdisk_log.name; + int err; + + tlog_info("saving log, %lu errors", tapdisk_log.errors); + + tapdisk_logfile_flush(logfile); + + err = tapdisk_logfile_rename(logfile, TLOG_DIR, name, ".log"); + + tlog_syslog(LOG_INFO, "logfile saved to %s: %d\n", logfile->path, err); +} + +static void tlog_logfile_close(void) +{ + td_logfile_t *logfile = &tapdisk_log.logfile; + int keep; + + keep = tapdisk_log.precious || tapdisk_log.errors; + + tlog_info("closing log, %lu errors", tapdisk_log.errors); + + if (keep) + tlog_logfile_save(); + + tapdisk_logfile_close(logfile); + + if (!keep) + tapdisk_logfile_unlink(logfile); +} + +static int tlog_logfile_open(const char *name, int level) +{ + td_logfile_t *logfile = &tapdisk_log.logfile; + int mode, err; + + err = mkdir(TLOG_DIR, 0755); + if (err) { + err = -errno; + if (err != -EEXIST) + goto fail; + } + + err = tapdisk_logfile_open(logfile, + TLOG_DIR, name, ".tmp", TLOG_LOGFILE_BUFSZ); + if (err) + goto fail; + + mode = (level == TLOG_DBG) ? _IOLBF : _IOFBF; + + err = tapdisk_logfile_setvbuf(logfile, mode); + if (err) + goto fail; + + tlog_info("log start, level %d", level); + + return 0; + + fail: + tlog_logfile_close(); + return err; +} + +static void tlog_syslog_close(void) +{ + td_syslog_t *syslog = &tapdisk_log.syslog; + + tapdisk_syslog_stats(syslog, LOG_INFO); + tapdisk_syslog_flush(syslog); + tapdisk_syslog_close(syslog); + } + +static int tlog_syslog_open(const char *ident __attribute__((unused)), + int facility) +{ + td_syslog_t *syslog = &tapdisk_log.syslog; + int err; + + err = tapdisk_syslog_open(syslog, + tapdisk_log.ident, facility, + TLOG_SYSLOG_BUFSZ); + return err; + } + +void tlog_vsyslog(int prio, const char *fmt, va_list ap) +{ + td_syslog_t *syslog = &tapdisk_log.syslog; + + tapdisk_vsyslog(syslog, prio, fmt, ap); +} + +void tlog_syslog(int prio, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + tlog_vsyslog(prio, fmt, ap); + va_end(ap); +} + +int +tlog_open(const char *name, int facility, int level) +{ + int err; + + DPRINTF("tapdisk-log: started, level %d\n", level); + + tapdisk_log.level = level; + tapdisk_log.name = strdup(name); + tapdisk_log.ident = tapdisk_syslog_ident(name); + + if (!tapdisk_log.name || !tapdisk_log.ident) { + err = -errno; + goto fail; + } + + err = tlog_logfile_open(tapdisk_log.name, level); + if (err) + goto fail; + + err = tlog_syslog_open(tapdisk_log.ident, facility); + if (err) + goto fail; + + return 0; + + fail: + tlog_close(); + return err; } void -close_tlog(void) +tlog_close(void) { - if (!tapdisk_log.buf) - return; + DPRINTF("tapdisk-log: closing after %lu errors\n", tapdisk_log.errors); - if (tapdisk_log.append) - tlog_flush(); + tlog_logfile_close(); + tlog_syslog_close(); - free(tapdisk_log.buf); - free(tapdisk_log.file); + free(tapdisk_log.ident); + tapdisk_log.ident = NULL; +} - memset(&tapdisk_log, 0, sizeof(struct tlog)); +void tlog_precious(void) +{ + if (!tapdisk_log.precious) + tlog_logfile_save(); + + tapdisk_log.precious = 1; } void -__tlog_write(int level, const char *func, const char *fmt, ...) +__tlog_write(int level, const char *fmt, ...) { - char *buf; - va_list ap; - struct timeval t; - int ret, len, avail; + va_list ap; - if (!tapdisk_log.buf) - return; - - if (level > tapdisk_log.level) - return; - - avail = tapdisk_log.size - (tapdisk_log.p - tapdisk_log.buf); - if (avail < MAX_ENTRY_LEN) { - if (tapdisk_log.append) - tlog_flush(); - tapdisk_log.p = tapdisk_log.buf; - } - - buf = tapdisk_log.p; - gettimeofday(&t, NULL); - len = snprintf(buf, MAX_ENTRY_LEN - 1, "%08"PRIu64":%010ld.%06lld:" - "%s ", tapdisk_log.cnt, - t.tv_sec, (unsigned long long)t.tv_usec, func); - - va_start(ap, fmt); - ret = vsnprintf(buf + len, MAX_ENTRY_LEN - (len + 1), fmt, ap); - va_end(ap); - - len = (ret < MAX_ENTRY_LEN - (len + 1) ? - len + ret : MAX_ENTRY_LEN - 1); - buf[len] = ''\0''; - - tapdisk_log.cnt++; - tapdisk_log.p += len; + if (level <= tapdisk_log.level) { + va_start(ap, fmt); + tlog_logfile_vprint(fmt, ap); + va_end(ap); + } } -void -__tlog_error(int err, const char *func, const char *fmt, ...) +void __tlog_error(const char *fmt, ...) { - va_list ap; - int i, len, ret; - struct error *e; - struct timeval t; + va_list ap; - err = (err > 0 ? err : -err); + va_start(ap, fmt); + tlog_vsyslog(LOG_ERR, fmt, ap); + va_end(ap); - for (i = 0; i < tapdisk_err.cnt; i++) { - e = &tapdisk_err.errors[i]; - if (e->err == err && e->func == func) { - e->cnt++; - return; - } - } - - if (tapdisk_err.cnt >= MAX_ERROR_MESSAGES) { - tapdisk_err.dropped++; - return; - } - - gettimeofday(&t, NULL); - e = &tapdisk_err.errors[tapdisk_err.cnt]; - - len = snprintf(e->msg, MAX_ENTRY_LEN - 1, "%010ld.%06lld:%s ", - t.tv_sec, (unsigned long long)t.tv_usec, func); - - va_start(ap, fmt); - ret = vsnprintf(e->msg + len, MAX_ENTRY_LEN - (len + 1), fmt, ap); - va_end(ap); - - len = (ret < MAX_ENTRY_LEN - (len + 1) ? - len + ret : MAX_ENTRY_LEN - 1); - e->msg[len] = ''\0''; - - e->cnt++; - e->err = err; - e->func = (char *)func; - tapdisk_err.cnt++; + tapdisk_log.errors++; } -void -tlog_print_errors(void) +void tapdisk_start_logging(const char *ident, const char *_facility) { - int i; - struct error *e; + int facility; - for (i = 0; i < tapdisk_err.cnt; i++) { - e = &tapdisk_err.errors[i]; - syslog(LOG_INFO, "TAPDISK ERROR: errno %d at %s (cnt = %d): " - "%s\n", e->err, e->func, e->cnt, e->msg); - } - - if (tapdisk_err.dropped) - syslog(LOG_INFO, "TAPDISK ERROR: %d other error messages " - "dropped\n", tapdisk_err.dropped); + facility = tapdisk_syslog_facility(_facility); + tapdisk_server_openlog(ident, LOG_CONS | LOG_ODELAY, facility); } -void -tlog_flush_errors(void) +void tapdisk_stop_logging(void) { - int i; - struct error *e; - - for (i = 0; i < tapdisk_err.cnt; i++) { - e = &tapdisk_err.errors[i]; - tlog_write(TLOG_WARN, "TAPDISK ERROR: errno %d at %s " - "(cnt = %d): %s\n", e->err, e->func, e->cnt, - e->msg); - } - - if (tapdisk_err.dropped) - tlog_write(TLOG_WARN, "TAPDISK ERROR: %d other error messages " - "dropped\n", tapdisk_err.dropped); + tapdisk_server_closelog(); } - -void -tlog_flush(void) -{ - int fd, flags; - size_t size, wsize; - - if (!tapdisk_log.buf) - return; - - flags = O_CREAT | O_WRONLY | O_DIRECT | O_NONBLOCK; - if (!tapdisk_log.append) - flags |= O_TRUNC; - - fd = open(tapdisk_log.file, flags, 0644); - if (fd == -1) - return; - - if (tapdisk_log.append) - if (lseek(fd, 0, SEEK_END) == (off_t)-1) - goto out; - - tlog_flush_errors(); - - size = tapdisk_log.p - tapdisk_log.buf; - wsize = ((size + 511) & (~511)); - - memset(tapdisk_log.buf + size, ''\n'', wsize - size); - write_exact(fd, tapdisk_log.buf, wsize); - - tapdisk_log.p = tapdisk_log.buf; - -out: - close(fd); -} diff --git a/tools/blktap3/drivers/tapdisk-logfile.c b/tools/blktap3/drivers/tapdisk-logfile.c new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-logfile.c @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2009, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <time.h> +#include <stdarg.h> +#include <sys/time.h> +#include <sys/mman.h> + +#include "tapdisk-logfile.h" +#include "tapdisk-utils.h" + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +static inline size_t page_align(size_t size) +{ + size_t page_size = sysconf(_SC_PAGE_SIZE); + return (size + page_size - 1) & ~(page_size - 1); +} + +static void tapdisk_logfile_free_buffer(td_logfile_t * log) +{ + if (log->vbuf) { + munmap(log->vbuf, page_align(log->vbufsz)); + log->vbuf = NULL; + } +} + +static int tapdisk_logfile_init_buffer(td_logfile_t * log, size_t size) +{ + int prot, flags, err; + + if (!size) + return -EINVAL; + + prot = PROT_READ | PROT_WRITE; + flags = MAP_ANONYMOUS | MAP_PRIVATE; + + log->vbuf = mmap(NULL, page_align(size), prot, flags, -1, 0); + if (log->vbuf == MAP_FAILED) { + log->vbuf = NULL; + goto fail; + } + + err = mlock(log->vbuf, page_align(size)); + if (err) + goto fail; + + log->vbufsz = size; + + return 0; + + fail: + tapdisk_logfile_free_buffer(log); + err = -errno; + return err; +} + +int tapdisk_logfile_unlink(td_logfile_t * log) +{ + int err; + + err = unlink(log->path); + if (err) + err = -errno; + + return err; +} + +static int +__tapdisk_logfile_rename(td_logfile_t * log, const char *newpath) +{ + const size_t max = sizeof(log->path); + int err; + + if (!strcmp(log->path, newpath)) + return 0; + + if (strlen(newpath) > max) + return -ENAMETOOLONG; + + err = rename(log->path, newpath); + if (err) { + err = -errno; + return err; + } + + strncpy(log->path, newpath, max); + + return 0; +} + +static int +tapdisk_logfile_name(char *path, size_t size, + const char *dir, const char *ident, + const char *suffix) +{ + const size_t max = MIN(size, TD_LOGFILE_PATH_MAX); + return snprintf(path, max, "%s/%s.%d%s", dir, ident, getpid(), suffix); +} + +int +tapdisk_logfile_rename(td_logfile_t * log, + const char *dir, const char *ident, + const char *suffix) +{ + char newpath[TD_LOGFILE_PATH_MAX + 1]; + + tapdisk_logfile_name(newpath, sizeof(newpath), dir, ident, suffix); + + return __tapdisk_logfile_rename(log, newpath); +} + +void tapdisk_logfile_close(td_logfile_t * log) +{ + if (log->file) { + fclose(log->file); + log->file = NULL; + } + + tapdisk_logfile_free_buffer(log); +} + +int +tapdisk_logfile_open(td_logfile_t * log, + const char *dir, const char *ident, const char *ext, + size_t bufsz) +{ + int err; + + memset(log, 0, sizeof(log)); + + tapdisk_logfile_name(log->path, sizeof(log->path), dir, ident, ext); + + log->file = fopen(log->path, "w"); + if (!log->file) { + err = -errno; + goto fail; + } + + err = tapdisk_logfile_init_buffer(log, bufsz); + if (err) + goto fail; + + return 0; + + fail: + tapdisk_logfile_unlink(log); + tapdisk_logfile_close(log); + return err; +} + +int tapdisk_logfile_setvbuf(td_logfile_t * log, int mode) +{ + int err = 0; + + if (log->file) { + err = setvbuf(log->file, log->vbuf, mode, log->vbufsz); + if (err) + err = -errno; + } + + return err; +} + +ssize_t +tapdisk_logfile_vprintf(td_logfile_t * log, const char *fmt, va_list ap) +{ + char buf[1024]; + size_t size, n; + ssize_t len; + struct timeval tv; + + if (!log->file) + return -EBADF; + + gettimeofday(&tv, NULL); + + size = sizeof(buf); + len = 0; + + len += tapdisk_syslog_strftime(buf, size, &tv); + len += snprintf(buf + len, size - len, ": "); + len += tapdisk_syslog_strftv(buf + len, size - len, &tv); + len += snprintf(buf + len, size - len, " "); + len += vsnprintf(buf + len, size - len, fmt, ap); + + if (buf[len - 1] != ''\n'') + len += snprintf(buf + len, size - len, "\n"); + + n = fwrite(buf, len, 1, log->file); + if (n != len) + len = -ferror(log->file); + + return len; +} + +ssize_t tapdisk_logfile_printf(td_logfile_t * log, const char *fmt, ...) +{ + va_list ap; + int rv; + + va_start(ap, fmt); + rv = tapdisk_logfile_vprintf(log, fmt, ap); + va_end(ap); + + return rv; +} + +int tapdisk_logfile_flush(td_logfile_t * log) +{ + int rv = EOF; + + if (log->file) + rv = fflush(log->file); + + return rv; +} diff --git a/tools/blktap3/drivers/tapdisk-logfile.h b/tools/blktap3/drivers/tapdisk-logfile.h new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-logfile.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2009, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TAPDISK_LOGFILE_H__ +#define __TAPDISK_LOGFILE_H__ + +#include <stdio.h> + +typedef struct _td_logfile td_logfile_t; + +#define TD_LOGFILE_PATH_MAX 128UL + +struct _td_logfile { + char path[TD_LOGFILE_PATH_MAX]; + FILE *file; + char *vbuf; + size_t vbufsz; +}; + +int tapdisk_logfile_open(td_logfile_t *, + const char *dir, const char *ident, + const char *ext, size_t bufsz); + +ssize_t tapdisk_logfile_printf(td_logfile_t *, const char *fmt, ...); +ssize_t tapdisk_logfile_vprintf(td_logfile_t *, const char *fmt, + va_list ap); + +void tapdisk_logfile_close(td_logfile_t *); +int tapdisk_logfile_unlink(td_logfile_t *); +int tapdisk_logfile_rename(td_logfile_t *, + const char *dir, const char *ident, + const char *ext); + +int tapdisk_logfile_setvbuf(td_logfile_t * log, int mode); +int tapdisk_logfile_flush(td_logfile_t *); + +#endif /* __TAPDISK_LOGFILE_H__ */ diff --git a/tools/blktap3/drivers/tapdisk-loglimit.c b/tools/blktap3/drivers/tapdisk-loglimit.c new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-loglimit.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2011, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Simple log rate limiting. Allow for bursts, then drop messages + * until some interval expired. + */ + +#include <stdio.h> +#include "blktap3.h" +#include "tapdisk-loglimit.h" + +void tapdisk_loglimit_init(td_loglimit_t * rl, int burst, int interval) +{ + rl->burst = burst; + rl->interval = interval; + + rl->count = 0; + rl->dropped = 0; + + gettimeofday(&rl->ts, NULL); +} + +static void timeradd_ms(struct timeval *tv, long ms) +{ + tv->tv_usec += ms * 1000; + if (tv->tv_usec > 1000000) { + tv->tv_sec += tv->tv_usec / 1000000; + tv->tv_usec %= 1000000; + } +} + +static void +tapdisk_loglimit_update(td_loglimit_t * rl, struct timeval *now) +{ + struct timeval next = rl->ts; + + timeradd_ms(&next, rl->interval); + + if (timercmp(&next, now, <)) { + rl->count = 0; + rl->ts = *now; + } +} + +static void tapdisk_loglimit_update_now(td_loglimit_t * rl) +{ + struct timeval now; + + gettimeofday(&now, NULL); + + tapdisk_loglimit_update(rl, &now); +} + +int tapdisk_loglimit_pass(td_loglimit_t * rl) +{ + if (!rl->interval) + return 1; /* unlimited */ + + if (unlikely(rl->count >= rl->burst)) { + + tapdisk_loglimit_update_now(rl); + + if (rl->count >= rl->burst) { + rl->dropped++; + return 0; + } + } + + rl->count++; + return 1; +} diff --git a/tools/blktap3/drivers/tapdisk-loglimit.h b/tools/blktap3/drivers/tapdisk-loglimit.h new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-loglimit.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2011, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TAPDISK_LOGLIMIT_H__ +#define __TAPDISK_LOGLIMIT_H__ + +#include <sys/time.h> + +typedef struct td_loglimit td_loglimit_t; + +struct td_loglimit { + int burst; + int interval; + + int count; + int dropped; + + struct timeval ts; +}; + +void tapdisk_loglimit_init(td_loglimit_t * rl, int burst, int interval); + +int tapdisk_loglimit_pass(td_loglimit_t *); + +#endif /* __TAPDISK_LOGLIMIT_H__ */ diff --git a/tools/blktap3/drivers/tapdisk-syslog.c b/tools/blktap3/drivers/tapdisk-syslog.c new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-syslog.c @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2009, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * A non-blocking, buffered BSD syslog client. + * + * http://www.ietf.org/rfc/rfc3164.txt (FIXME: Read this.) + */ + +#define _ISOC99_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <stdarg.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/param.h> + +#include "tapdisk-server.h" +#include "tapdisk-syslog.h" +#include "tapdisk-utils.h" + +static int tapdisk_syslog_sock_send(td_syslog_t * log, + const void *msg, size_t size); +static int tapdisk_syslog_sock_connect(td_syslog_t * log); + +static void tapdisk_syslog_sock_mask(td_syslog_t * log); +static void tapdisk_syslog_sock_unmask(td_syslog_t * log); + +static const struct sockaddr_un syslog_addr = { + .sun_family = AF_UNIX, + .sun_path = "/dev/log" +}; + +#define RING_PTR(_log, _idx) \ + (&(_log)->ring[(_idx) % (_log)->ringsz]) + +#define RING_FREE(_log) \ + ((_log)->ringsz - ((_log)->prod - (_log)->cons)) + +/* + * NB. Ring buffer. + * + * We allocate a number of pages as indicated by @bufsz during + * initialization. From that, 1K is reserved for message staging, the + * rest is cyclic ring space. + * + * All producer/consumer offsets wrap on size_t range, not buffer + * size. Hence the RING() macros. + */ + +static void __tapdisk_syslog_ring_init(td_syslog_t * log) +{ + log->buf = NULL; + log->bufsz = 0; + log->msg = NULL; + log->ring = NULL; + log->ringsz = 0; +} + +static inline size_t page_align(size_t size) +{ + size_t page_size = sysconf(_SC_PAGE_SIZE); + return (size + page_size - 1) & ~(page_size - 1); +} + +static void tapdisk_syslog_ring_uninit(td_syslog_t * log) +{ + if (log->buf) + munmap(log->buf, log->bufsz); + + __tapdisk_syslog_ring_init(log); +} + +static int tapdisk_syslog_ring_init(td_syslog_t * log, size_t size) +{ + int prot, flags, err; + + __tapdisk_syslog_ring_init(log); + + log->bufsz = page_align(size); + + prot = PROT_READ | PROT_WRITE; + flags = MAP_ANONYMOUS | MAP_PRIVATE; + + log->buf = mmap(NULL, log->bufsz, prot, flags, -1, 0); + if (log->buf == MAP_FAILED) { + log->buf = NULL; + err = -ENOMEM; + goto fail; + } + + err = mlock(log->buf, size); + if (err) { + err = -errno; + goto fail; + } + + log->msg = log->buf; + log->ring = log->buf + TD_SYSLOG_PACKET_MAX; + log->ringsz = size - TD_SYSLOG_PACKET_MAX; + + return 0; + + fail: + tapdisk_syslog_ring_uninit(log); + + return err; +} + +static int +tapdisk_syslog_ring_write_str(td_syslog_t * log, const char *msg, + size_t len) +{ + size_t size, prod, i; + + len = MIN(len, TD_SYSLOG_PACKET_MAX); + size = len + 1; + + if (size > RING_FREE(log)) + return -ENOBUFS; + + prod = log->prod; + + for (i = 0; i < len; ++i) { + char c; + + c = msg[i]; + if (c == 0) + break; + + *RING_PTR(log, prod) = c; + prod++; + } + + *RING_PTR(log, prod) = 0; + + log->prod = prod + 1; + + return 0; +} + +static ssize_t +tapdisk_syslog_ring_read_pkt(td_syslog_t * log, char *msg, size_t size) +{ + size_t cons; + ssize_t sz; + + size = MIN(size, TD_SYSLOG_PACKET_MAX); + + sz = 0; + cons = log->cons; + + while (sz < size) { + char c; + + if (cons == log->prod) + break; + + c = *RING_PTR(log, cons); + msg[sz++] = c; + cons++; + + if (c == 0) + break; + } + + return sz - 1; +} + +static int tapdisk_syslog_ring_dispatch_one(td_syslog_t * log) +{ + size_t len; + int err; + + len = tapdisk_syslog_ring_read_pkt(log, log->msg, + TD_SYSLOG_PACKET_MAX); + if (len == -1) + return -ENOMSG; + + err = tapdisk_syslog_sock_send(log, log->msg, len); + + if (err == -EAGAIN) + return err; + + if (err) + goto fail; + + done: + log->cons += len + 1; + return 0; + + fail: + log->stats.fails++; + goto done; +} + +static void tapdisk_syslog_ring_warning(td_syslog_t * log) +{ + int n, err; + + n = log->oom; + log->oom = 0; + + err = tapdisk_syslog(log, LOG_WARNING, + "tapdisk-syslog: %d messages dropped", n); + if (err) + log->oom = n; +} + +static void tapdisk_syslog_ring_dispatch(td_syslog_t * log) +{ + int err; + + do { + err = tapdisk_syslog_ring_dispatch_one(log); + } while (!err); + + if (log->oom) + tapdisk_syslog_ring_warning(log); +} + +static int +tapdisk_syslog_vsprintf(char *buf, size_t size, + int prio, const struct timeval *tv, + const char *ident, const char *fmt, va_list ap) +{ + char tsbuf[TD_SYSLOG_STRTIME_LEN + 1]; + size_t len; + + /* + * PKT := PRI HEADER MSG + * PRI := "<" {"0" .. "9"} ">" + * HEADER := TIMESTAMP HOSTNAME + * MSG := <TAG> <SEP> <CONTENT> + * SEP := ":" | " " | "[" + */ + + tapdisk_syslog_strftime(tsbuf, sizeof(tsbuf), tv); + + len = 0; + + /* NB. meant to work with c99 null buffers */ + + len += snprintf(buf ? buf + len : NULL, buf ? size - len : 0, + "<%d>%s %s: ", prio, tsbuf, ident); + + len += vsnprintf(buf ? buf + len : NULL, buf ? size - len : 0, + fmt, ap); + + return MIN(len, size); +} + +/* + * NB. Sockets. + * + * Syslog is based on a connectionless (DGRAM) unix transport. + * + * While it is reliable, we cannot block on syslogd because -- as with + * any IPC in tapdisk -- we could deadlock in page I/O writeback. + * Hence the syslog(3) avoidance on the datapath, which this code + * facilitates. + * + * This type of socket has a single (global) receive buffer on + * syslogd''s end, but no send buffer at all. The does just that: + * headroom on the sender side. + * + * The transport is rather stateless, but we still need to connect() + * the socket, or select() will find no receive buffer to block + * on. While we never disconnect, connections are unreliable because + * syslog may shut down. + * + * Reconnection will be attempted with every user message submitted. + * Any send() or connect() failure other than EAGAIN discards the + * message. Also, the write event handler will go on to discard any + * remaining ring contents as well, once the socket is disconnected. + * + * In summary, no attempts to mask service blackouts in here. + */ + +int +tapdisk_vsyslog(td_syslog_t * log, int prio, const char *fmt, va_list ap) +{ + struct timeval now; + size_t len; + int err; + + gettimeofday(&now, NULL); + + len = tapdisk_syslog_vsprintf(log->msg, TD_SYSLOG_PACKET_MAX, + prio | log->facility, + &now, log->ident, fmt, ap); + + log->stats.count += 1; + log->stats.bytes += len; + + if (log->cons != log->prod) + goto busy; + + send: + err = tapdisk_syslog_sock_send(log, log->msg, len); + if (!err) + return 0; + + if (err == -ENOTCONN) { + err = tapdisk_syslog_sock_connect(log); + if (!err) + goto send; + } + + if (err != -EAGAIN) + goto fail; + + tapdisk_syslog_sock_unmask(log); + + busy: + if (log->oom) { + err = -ENOBUFS; + goto oom; + } + + err = tapdisk_syslog_ring_write_str(log, log->msg, len); + if (!err) + return 0; + + log->oom_tv = now; + + oom: + log->oom++; + log->stats.drops++; + return err; + + fail: + log->stats.fails++; + return err; +} + +int tapdisk_syslog(td_syslog_t * log, int prio, const char *fmt, ...) +{ + va_list ap; + int err; + + va_start(ap, fmt); + err = tapdisk_vsyslog(log, prio, fmt, ap); + va_end(ap); + + return err; +} + +static int +tapdisk_syslog_sock_send(td_syslog_t * log, const void *msg, size_t size) +{ + ssize_t n; + + log->stats.xmits++; + + n = send(log->sock, msg, size, MSG_DONTWAIT); + if (n < 0) + return -errno; + + return 0; +} + +static void +tapdisk_syslog_sock_event(event_id_t id __attribute__((unused)), + char mode __attribute__((unused)), void *private) +{ + td_syslog_t *log = private; + + tapdisk_syslog_ring_dispatch(log); + + if (log->cons == log->prod) + tapdisk_syslog_sock_mask(log); +} + +static void __tapdisk_syslog_sock_init(td_syslog_t * log) +{ + log->sock = -1; + log->event_id = -1; +} + +static void tapdisk_syslog_sock_close(td_syslog_t * log) +{ + if (log->sock >= 0) + close(log->sock); + + if (log->event_id >= 0) + tapdisk_server_unregister_event(log->event_id); + + __tapdisk_syslog_sock_init(log); +} + +static int tapdisk_syslog_sock_open(td_syslog_t * log) +{ + event_id_t id; + int s, err; + + __tapdisk_syslog_sock_init(log); + + s = socket(PF_UNIX, SOCK_DGRAM, 0); + if (s < 0) { + err = -errno; + goto fail; + } + + log->sock = s; + +#if 0 + err = fcntl(s, F_SETFL, O_NONBLOCK); + if (err < 0) { + err = -errno; + goto fail; + } +#endif + + id = tapdisk_server_register_event(SCHEDULER_POLL_WRITE_FD, + s, 0, + tapdisk_syslog_sock_event, log); + if (id < 0) { + err = id; + goto fail; + } + + log->event_id = id; + + tapdisk_syslog_sock_mask(log); + + return 0; + + fail: + tapdisk_syslog_sock_close(log); + return err; +} + +static int tapdisk_syslog_sock_connect(td_syslog_t * log) +{ + int err; + + err = connect(log->sock, &syslog_addr, sizeof(syslog_addr)); + if (err < 0) + err = -errno; + + return err; +} + +static void tapdisk_syslog_sock_mask(td_syslog_t * log) +{ + tapdisk_server_mask_event(log->event_id, 1); +} + +static void tapdisk_syslog_sock_unmask(td_syslog_t * log) +{ + tapdisk_server_mask_event(log->event_id, 0); +} + +void __tapdisk_syslog_init(td_syslog_t * log) +{ + memset(log, 0, sizeof(td_syslog_t)); + __tapdisk_syslog_sock_init(log); + __tapdisk_syslog_ring_init(log); +} + +void tapdisk_syslog_close(td_syslog_t * log) +{ + tapdisk_syslog_ring_uninit(log); + tapdisk_syslog_sock_close(log); + + if (log->ident) + free(log->ident); + + __tapdisk_syslog_init(log); +} + +int +tapdisk_syslog_open(td_syslog_t * log, const char *ident, int facility, + size_t bufsz) +{ + int err; + + __tapdisk_syslog_init(log); + + log->facility = facility; + log->ident = ident ? strndup(ident, TD_SYSLOG_IDENT_MAX) : NULL; + + err = tapdisk_syslog_sock_open(log); + if (err) + goto fail; + + err = tapdisk_syslog_ring_init(log, bufsz); + if (err) + goto fail; + + return 0; + + fail: + tapdisk_syslog_close(log); + + return err; +} + +void tapdisk_syslog_stats(td_syslog_t * log, int prio) +{ + struct _td_syslog_stats *s = &log->stats; + + tapdisk_syslog(log, prio, + "tapdisk-syslog: %llu messages, %llu bytes, " + "xmits: %llu, failed: %llu, dropped: %llu", + s->count, s->bytes, s->xmits, s->fails, s->drops); +} + +void tapdisk_syslog_flush(td_syslog_t * log) +{ + while (log->cons != log->prod) + tapdisk_server_iterate(); +} diff --git a/tools/blktap3/drivers/tapdisk-syslog.h b/tools/blktap3/drivers/tapdisk-syslog.h new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-syslog.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2009, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __TAPDISK_SYSLOG_H__ +#define __TAPDISK_SYSLOG_H__ + +#include <syslog.h> +#include <stdarg.h> +#include "scheduler.h" + +typedef struct _td_syslog td_syslog_t; + +#define TD_SYSLOG_PACKET_MAX 1024 + +struct _td_syslog_stats { + unsigned long long count; + unsigned long long bytes; + unsigned long long xmits; + unsigned long long fails; + unsigned long long drops; +}; + +struct _td_syslog { + char *ident; + int facility; + + int sock; + event_id_t event_id; + + void *buf; + size_t bufsz; + + char *msg; + + char *ring; + size_t ringsz; + + size_t prod; + size_t cons; + + int oom; + struct timeval oom_tv; + + struct _td_syslog_stats stats; +}; + +int tapdisk_syslog_open(td_syslog_t *, + const char *ident, int facility, size_t bufsz); +void tapdisk_syslog_close(td_syslog_t *); +void tapdisk_syslog_flush(td_syslog_t *); +void tapdisk_syslog_stats(td_syslog_t *, int prio); + +int tapdisk_vsyslog(td_syslog_t *, int prio, const char *fmt, va_list ap); +int tapdisk_syslog(td_syslog_t *, int prio, const char *fmt, ...); + +#endif /* __TAPDISK_SYSLOG_H__ */
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 06 of 21] blktap3/drivers: Introduce tapdisk profiling
This patch copies (what seems like) profiling functionality from blktap2. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/profile.h b/tools/blktap3/drivers/profile.h copy from tools/blktap2/drivers/profile.h copy to tools/blktap3/drivers/profile.h
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 07 of 21] blktap3/drivers: Introduce scheduling of events inside tapdisks
This patch copies the event scheduling functionality from blktap2 with most changes coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/scheduler.c b/tools/blktap3/drivers/scheduler.c copy from tools/blktap2/drivers/scheduler.c copy to tools/blktap3/drivers/scheduler.c --- a/tools/blktap2/drivers/scheduler.c +++ b/tools/blktap3/drivers/scheduler.c @@ -25,59 +25,54 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #include <errno.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <sys/time.h> +#include <sys/select.h> +#include "tapdisk.h" #include "scheduler.h" #include "tapdisk-log.h" -#define DBG(_f, _a...) tlog_write(TLOG_DBG, _f, ##_a) +#define DBG(_f, _a...) if (0) { tlog_syslog(TLOG_DBG, _f, ##_a); } +#define BUG_ON(_cond) if (_cond) td_panic() #define SCHEDULER_MAX_TIMEOUT 600 #define SCHEDULER_POLL_FD (SCHEDULER_POLL_READ_FD | \ SCHEDULER_POLL_WRITE_FD | \ SCHEDULER_POLL_EXCEPT_FD) -#define MIN(a, b) ((a) <= (b) ? (a) : (b)) -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) +#include <sys/param.h> -#define scheduler_for_each_event(s, event, tmp) \ - list_for_each_entry_safe(event, tmp, &(s)->events, next) +#define scheduler_for_each_event(s, event) \ + TAILQ_FOREACH(event, &(s)->events, entry) -typedef struct event { - char mode; - event_id_t id; - - int fd; - int timeout; - int deadline; - - event_cb_t cb; - void *private; - - struct list_head next; -} event_t; +#define scheduler_for_each_event_safe(s, event, tmp) \ + TAILQ_FOREACH_SAFE(event,&(s)->events, entry, tmp) static void scheduler_prepare_events(scheduler_t *s) { int diff; struct timeval now; - event_t *event, *tmp; + event_t *event; FD_ZERO(&s->read_fds); FD_ZERO(&s->write_fds); FD_ZERO(&s->except_fds); - s->max_fd = 0; + s->max_fd = -1; s->timeout = SCHEDULER_MAX_TIMEOUT; gettimeofday(&now, NULL); - scheduler_for_each_event(s, event, tmp) { + scheduler_for_each_event(s, event) { + if (event->masked || event->dead) + continue; + if (event->mode & SCHEDULER_POLL_READ_FD) { FD_SET(event->fd, &s->read_fds); s->max_fd = MAX(event->fd, s->max_fd); @@ -105,61 +100,118 @@ scheduler_prepare_events(scheduler_t *s) s->timeout = MIN(s->timeout, s->max_timeout); } -static void -scheduler_event_callback(event_t *event, char mode) +static int +scheduler_check_fd_events(scheduler_t *s, int nfds) { - if (event->mode & SCHEDULER_POLL_TIMEOUT) { - struct timeval now; - gettimeofday(&now, NULL); - event->deadline = now.tv_sec + event->timeout; - } + event_t *event; - event->cb(event->id, mode, event->private); -} + scheduler_for_each_event(s, event) { + if (!nfds) + break; -static void -scheduler_run_events(scheduler_t *s) -{ - struct timeval now; - event_t *event, *tmp; + if (event->dead) + continue; - gettimeofday(&now, NULL); - - again: - s->restart = 0; - - scheduler_for_each_event(s, event, tmp) { if ((event->mode & SCHEDULER_POLL_READ_FD) && FD_ISSET(event->fd, &s->read_fds)) { FD_CLR(event->fd, &s->read_fds); - scheduler_event_callback(event, SCHEDULER_POLL_READ_FD); - goto next; + event->pending |= SCHEDULER_POLL_READ_FD; + --nfds; } if ((event->mode & SCHEDULER_POLL_WRITE_FD) && FD_ISSET(event->fd, &s->write_fds)) { FD_CLR(event->fd, &s->write_fds); - scheduler_event_callback(event, SCHEDULER_POLL_WRITE_FD); - goto next; + event->pending |= SCHEDULER_POLL_WRITE_FD; + --nfds; } if ((event->mode & SCHEDULER_POLL_EXCEPT_FD) && FD_ISSET(event->fd, &s->except_fds)) { FD_CLR(event->fd, &s->except_fds); - scheduler_event_callback(event, SCHEDULER_POLL_EXCEPT_FD); - goto next; + event->pending |= SCHEDULER_POLL_EXCEPT_FD; + --nfds; + } } - if ((event->mode & SCHEDULER_POLL_TIMEOUT) && - (event->deadline <= now.tv_sec)) - scheduler_event_callback(event, SCHEDULER_POLL_TIMEOUT); + return nfds; +} - next: - if (s->restart) - goto again; +static void +scheduler_check_timeouts(scheduler_t *s) +{ + struct timeval now; + event_t *event; + + gettimeofday(&now, NULL); + + scheduler_for_each_event(s, event) { + BUG_ON(event->pending && event->masked); + + if (event->dead) + continue; + + if (event->pending) + continue; + + if (!(event->mode & SCHEDULER_POLL_TIMEOUT)) + continue; + + if (event->deadline > now.tv_sec) + continue; + + event->pending = SCHEDULER_POLL_TIMEOUT; } } +static int +scheduler_check_events(scheduler_t *s, int nfds) +{ + if (nfds) + nfds = scheduler_check_fd_events(s, nfds); + + scheduler_check_timeouts(s); + + return nfds; +} + +static void +scheduler_event_callback(event_t *event, char mode) +{ + if (event->mode & SCHEDULER_POLL_TIMEOUT) { + struct timeval now; + gettimeofday(&now, NULL); + event->deadline = now.tv_sec + event->timeout; + } + + if (!event->masked) + event->cb(event->id, mode, event->private); +} + +static int +scheduler_run_events(scheduler_t *s) +{ + event_t *event; + int n_dispatched = 0; + + scheduler_for_each_event(s, event) { + char pending; + + if (event->dead) + continue; + + pending = event->pending; + if (pending) { + event->pending = 0; + /* NB. must clear before cb */ + scheduler_event_callback(event, pending); + n_dispatched++; + } + } + + return n_dispatched; +} + int scheduler_register_event(scheduler_t *s, char mode, int fd, int timeout, event_cb_t cb, void *private) @@ -179,8 +231,6 @@ scheduler_register_event(scheduler_t *s, gettimeofday(&now, NULL); - INIT_LIST_HEAD(&event->next); - event->mode = mode; event->fd = fd; event->timeout = timeout; @@ -188,11 +238,12 @@ scheduler_register_event(scheduler_t *s, event->cb = cb; event->private = private; event->id = s->uuid++; + event->masked = 0; if (!s->uuid) s->uuid++; - list_add_tail(&event->next, &s->events); + TAILQ_INSERT_TAIL(&s->events, event, entry); return event->id; } @@ -200,20 +251,44 @@ scheduler_register_event(scheduler_t *s, void scheduler_unregister_event(scheduler_t *s, event_id_t id) { - event_t *event, *tmp; + event_t *event; if (!id) return; - scheduler_for_each_event(s, event, tmp) + scheduler_for_each_event(s, event) if (event->id == id) { - list_del(&event->next); - free(event); - s->restart = 1; + event->dead = 1; + break; + } +} + +void scheduler_mask_event(scheduler_t * s, event_id_t id, int masked) +{ + event_t *event; + + if (!id) + return; + + scheduler_for_each_event(s, event) + if (event->id == id) { + event->masked = ! !masked; break; } } +static void +scheduler_gc_events(scheduler_t *s) +{ + event_t *event, *next; + + scheduler_for_each_event_safe(s, event, next) + if (event->dead) { + TAILQ_REMOVE(&s->events, event, entry); + free(event); + } +} + void scheduler_set_max_timeout(scheduler_t *s, int timeout) { @@ -227,25 +302,41 @@ scheduler_wait_for_events(scheduler_t *s int ret; struct timeval tv; + s->depth++; + ret = 0; + + if (s->depth > 1 && scheduler_run_events(s)) + /* NB. recursive invocations continue with the pending + * event set. We return as soon as we made some + * progress. */ + goto out; + scheduler_prepare_events(s); tv.tv_sec = s->timeout; tv.tv_usec = 0; - DBG("timeout: %d, max_timeout: %d\n", - s->timeout, s->max_timeout); + DBG("timeout: %d, max_timeout: %d\n", s->timeout, s->max_timeout); ret = select(s->max_fd + 1, &s->read_fds, &s->write_fds, &s->except_fds, &tv); - s->restart = 0; + if (ret < 0) + goto out; + + ret = scheduler_check_events(s, ret); + BUG_ON(ret); + s->timeout = SCHEDULER_MAX_TIMEOUT; s->max_timeout = SCHEDULER_MAX_TIMEOUT; - if (ret < 0) - return ret; + scheduler_run_events(s); - scheduler_run_events(s); + if (s->depth == 1) + scheduler_gc_events(s); + + out: + s->depth--; return ret; } @@ -256,10 +347,11 @@ scheduler_initialize(scheduler_t *s) memset(s, 0, sizeof(scheduler_t)); s->uuid = 1; + s->depth = 0; FD_ZERO(&s->read_fds); FD_ZERO(&s->write_fds); FD_ZERO(&s->except_fds); - INIT_LIST_HEAD(&s->events); + TAILQ_INIT(&s->events); }
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 08 of 21] blktap3/drivers: Introduce handling of control commands
This patch copies the functionality of handling control commands from blktap2, with changes coming from blktap2.5. Also, it contains the following blktap3-related changes: * Replaced the minor number with type:/path/to/file or /path/to/file, depending on the occasion. * Removed VBD.attach/detach message handlers. And the following optimisations/clean-up: * Removed the unused uuid member from struct tapdisk_control. * Simplified message handling: all message handler functions accept the response as an argument and the caller of these functions initialises this response and handles errors. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-control.c b/tools/blktap3/drivers/tapdisk-control.c copy from tools/blktap2/drivers/tapdisk-control.c copy to tools/blktap3/drivers/tapdisk-control.c --- a/tools/blktap2/drivers/tapdisk-control.c +++ b/tools/blktap3/drivers/tapdisk-control.c @@ -25,6 +25,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #include <stdio.h> #include <errno.h> #include <fcntl.h> @@ -37,42 +38,350 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/socket.h> +#include <sys/mman.h> +#include <sys/select.h> -#include "list.h" #include "tapdisk.h" -#include "blktap2.h" -#include "blktaplib.h" #include "tapdisk-vbd.h" #include "tapdisk-utils.h" #include "tapdisk-server.h" #include "tapdisk-message.h" #include "tapdisk-disktype.h" +#include "tapdisk-stats.h" +#include "tapdisk-control.h" +#include "sring/td-blkif.h" + +#define TD_CTL_MAX_CONNECTIONS 10 +#define TD_CTL_SOCK_BACKLOG 32 +#define TD_CTL_RECV_TIMEOUT 10 +#define TD_CTL_SEND_TIMEOUT 10 +#define TD_CTL_SEND_BUFSZ ((size_t)4096) + +#define DBG(_f, _a...) tlog_syslog(LOG_DEBUG, "%s:%d " _f, \ + __FILE__, __LINE__, ##_a) +#define ERR(err, _f, _a...) tlog_error(err, "%s:%d " _f, __FILE__, \ + __LINE__, ##_a) + +#define ASSERT(_p) \ + if (!(_p)) { \ + EPRINTF("%s:%d: FAILED ASSERTION: ''%s''\n", \ + __FILE__, __LINE__, #_p); \ + td_panic(); \ + } + +#define WARN_ON(_p) \ + if (_p) { \ + EPRINTF("%s:%d: WARNING: ''%s''\n", \ + __FILE__, __LINE__, #_p); \ + } + +struct tapdisk_ctl_conn { + int fd; + + struct { + void *buf; + size_t bufsz; + int event_id; + int done; + + void *prod; + void *cons; + } out; + + struct { + int event_id; + int busy; + } in; + + struct tapdisk_control_info *info; +}; + +#define TAPDISK_MSG_REENTER (1<<0) /* non-blocking, idempotent */ +#define TAPDISK_MSG_VERBOSE (1<<1) /* tell syslog about it */ +#define TAPDISK_MSG_VERBOSE_ERROR (1<<2) /* tell syslog about it, with errors */ + +struct tapdisk_control_info { + int (*handler) (struct tapdisk_ctl_conn *, tapdisk_message_t *, + tapdisk_message_t * const); + int flags; +}; struct tapdisk_control { char *path; int socket; int event_id; -}; + int busy; -struct tapdisk_control_connection { - int socket; - event_id_t event_id; + int n_conn; + struct tapdisk_ctl_conn __conn[TD_CTL_MAX_CONNECTIONS]; + struct tapdisk_ctl_conn *conn[TD_CTL_MAX_CONNECTIONS]; }; static struct tapdisk_control td_control; +static inline size_t page_align(size_t size) +{ + size_t page_size = sysconf(_SC_PAGE_SIZE); + return (size + page_size - 1) & ~(page_size - 1); +} + +static void tapdisk_ctl_conn_uninit(struct tapdisk_ctl_conn *conn) +{ + if (conn->out.buf) { + munmap(conn->out.buf, conn->out.bufsz); + conn->out.buf = NULL; + } +} + +static int +tapdisk_ctl_conn_init(struct tapdisk_ctl_conn *conn, size_t bufsz) +{ + int prot, flags, err; + + memset(conn, 0, sizeof(*conn)); + conn->out.event_id = -1; + conn->in.event_id = -1; + + prot = PROT_READ | PROT_WRITE; + flags = MAP_ANONYMOUS | MAP_PRIVATE; + + conn->out.buf = mmap(NULL, bufsz, prot, flags, -1, 0); + if (conn->out.buf == MAP_FAILED) { + conn->out.buf = NULL; + err = -ENOMEM; + goto fail; + } + conn->out.bufsz = page_align(bufsz); + + return 0; + + fail: + tapdisk_ctl_conn_uninit(conn); + return err; +} + +static int tapdisk_ctl_conn_connected(struct tapdisk_ctl_conn *conn) +{ + return conn->fd >= 1; +} + +static void tapdisk_ctl_conn_free(struct tapdisk_ctl_conn *conn) +{ + struct tapdisk_ctl_conn *prev, *next; + int i; + + i = --td_control.n_conn; + /* NB. bubble the freed connection off the active list. */ + prev = conn; + do { + ASSERT(i >= 0); + next = td_control.conn[i]; + td_control.conn[i] = prev; + prev = next; + i--; + } while (next != conn); +} + +static void tapdisk_ctl_conn_close(struct tapdisk_ctl_conn *conn) +{ + if (conn->out.event_id >= 0) { + tapdisk_server_unregister_event(conn->out.event_id); + conn->out.event_id = -1; + } + + if (conn->fd >= 0) { + close(conn->fd); + conn->fd = -1; + + tapdisk_ctl_conn_free(conn); + tapdisk_server_mask_event(td_control.event_id, 0); + } +} + +static void tapdisk_ctl_conn_mask_out(struct tapdisk_ctl_conn *conn) +{ + tapdisk_server_mask_event(conn->out.event_id, 1); +} + +static void tapdisk_ctl_conn_unmask_out(struct tapdisk_ctl_conn *conn) +{ + tapdisk_server_mask_event(conn->out.event_id, 0); +} + +static ssize_t tapdisk_ctl_conn_send_buf(struct tapdisk_ctl_conn *conn) +{ + ssize_t size; + + size = conn->out.prod - conn->out.cons; + if (!size) + return 0; + + size = send(conn->fd, conn->out.cons, size, MSG_DONTWAIT); + if (size < 0) + return -errno; + + conn->out.cons += size; + + return size; +} + static void -tapdisk_control_initialize(void) +tapdisk_ctl_conn_send_event(event_id_t id __attribute__((unused)), char mode, + void *private) { + struct tapdisk_ctl_conn *conn = private; + ssize_t rv; + + do { + rv = tapdisk_ctl_conn_send_buf(conn); + } while (rv > 0); + + if (rv == -EAGAIN) + return; + + if (rv < 0) + ERR(rv, "failure sending message at offset %td/%td\n", + conn->out.cons - conn->out.buf, + conn->out.prod - conn->out.buf); + + if (rv || conn->out.done || mode & SCHEDULER_POLL_TIMEOUT) + tapdisk_ctl_conn_close(conn); + else + tapdisk_ctl_conn_mask_out(conn); +} + +/* + * NB. the control interface is still not properly integrated into the + * server, therefore neither the scheduler. After the last close, the + * server will exit but we still have a pending close response in the + * output buffer. + */ +static void tapdisk_ctl_conn_drain(struct tapdisk_ctl_conn *conn) +{ + struct timeval tv = {.tv_sec = TD_CTL_SEND_TIMEOUT, + .tv_usec = 0 + }; + fd_set wfds; + int n, mode; + + ASSERT(conn->out.done); + ASSERT(conn->fd >= 0); + + while (tapdisk_ctl_conn_connected(conn)) { + FD_ZERO(&wfds); + FD_SET(conn->fd, &wfds); + + n = select(conn->fd + 1, NULL, &wfds, NULL, &tv); + if (n < 0) + break; + + if (n) + mode = SCHEDULER_POLL_WRITE_FD; + else + mode = SCHEDULER_POLL_TIMEOUT; + + tapdisk_ctl_conn_send_event(conn->out.event_id, mode, conn); + } +} + + +struct tapdisk_ctl_conn *tapdisk_ctl_conn_open(int fd) +{ + struct tapdisk_ctl_conn *conn; + + if (td_control.n_conn >= TD_CTL_MAX_CONNECTIONS) + return NULL; + + conn = td_control.conn[td_control.n_conn++]; + + conn->out.event_id + tapdisk_server_register_event(SCHEDULER_POLL_WRITE_FD, + fd, TD_CTL_SEND_TIMEOUT, + tapdisk_ctl_conn_send_event, conn); + if (conn->out.event_id < 0) + return NULL; + + conn->fd = fd; + conn->out.prod = conn->out.buf; + conn->out.cons = conn->out.buf; + + tapdisk_ctl_conn_mask_out(conn); + + if (td_control.n_conn >= TD_CTL_MAX_CONNECTIONS) + tapdisk_server_mask_event(td_control.event_id, 1); + + return conn; +} + +static size_t +tapdisk_ctl_conn_write(struct tapdisk_ctl_conn *conn, void *buf, + size_t size) +{ + size_t rest; + + rest = conn->out.buf + conn->out.bufsz - conn->out.prod; + if (rest < size) + size = rest; + if (!size) + return 0; + + memcpy(conn->out.prod, buf, size); + conn->out.prod += size; + tapdisk_ctl_conn_unmask_out(conn); + + return size; +} + +static void tapdisk_ctl_conn_release(struct tapdisk_ctl_conn *conn) +{ + conn->out.done = 1; + + if (conn->out.prod == conn->out.cons) + tapdisk_ctl_conn_close(conn); +} + +static void tapdisk_control_initialize(void) +{ + struct tapdisk_ctl_conn *conn; + int i; + td_control.socket = -1; td_control.event_id = -1; signal(SIGPIPE, SIG_IGN); + + for (i = 0; i < TD_CTL_MAX_CONNECTIONS; i++) { + conn = &td_control.__conn[i]; + tapdisk_ctl_conn_init(conn, TD_CTL_SEND_BUFSZ); + td_control.conn[i] = conn; } -void -tapdisk_control_close(void) + td_control.n_conn = 0; + + DPRINTF("tapdisk-control: init, %d x %zuk buffers\n", + TD_CTL_MAX_CONNECTIONS, TD_CTL_SEND_BUFSZ >> 10); +} + +void tapdisk_control_close(void) { + struct tapdisk_ctl_conn *conn; + int i; + + DPRINTF("tapdisk-control: draining %d connections\n", + td_control.n_conn); + + while (td_control.n_conn) { + conn = td_control.conn[td_control.n_conn - 1]; + tapdisk_ctl_conn_drain(conn); + } + + for (i = 0; i < TD_CTL_MAX_CONNECTIONS; i++) { + conn = &td_control.__conn[i]; + tapdisk_ctl_conn_uninit(conn); + } + + DPRINTF("tapdisk-control: done\n"); + if (td_control.path) { unlink(td_control.path); free(td_control.path); @@ -85,40 +394,41 @@ tapdisk_control_close(void) } } -static struct tapdisk_control_connection * -tapdisk_control_allocate_connection(int fd) +static void +tapdisk_control_release_connection(struct tapdisk_ctl_conn *conn) { - struct tapdisk_control_connection *connection; - size_t sz; - - connection = calloc(1, sizeof(*connection)); - if (!connection) { - EPRINTF("calloc"); - return NULL; + if (conn->in.event_id) { + tapdisk_server_unregister_event(conn->in.event_id); + conn->in.event_id = -1; } - connection->socket = fd; - return connection; + tapdisk_ctl_conn_release(conn); } static void -tapdisk_control_close_connection(struct tapdisk_control_connection *connection) +tapdisk_control_close_connection(struct tapdisk_ctl_conn *conn) { - tapdisk_server_unregister_event(connection->event_id); - close(connection->socket); - free(connection); + tapdisk_control_release_connection(conn); + + if (tapdisk_ctl_conn_connected(conn)) + /* NB. best effort for write/close sequences. */ + tapdisk_ctl_conn_send_buf(conn); + + tapdisk_ctl_conn_close(conn); } + static int -tapdisk_control_read_message(int fd, tapdisk_message_t *message, int timeout) +tapdisk_control_read_message(int fd, tapdisk_message_t * message, + int timeout) { + const int len = sizeof(tapdisk_message_t); fd_set readfds; - int ret, len, offset; + int ret, offset, err = 0; struct timeval tv, *t; t = NULL; offset = 0; - len = sizeof(tapdisk_message_t); if (timeout) { tv.tv_sec = timeout; @@ -144,67 +454,31 @@ tapdisk_control_read_message(int fd, tap break; } - if (offset != len) { - EPRINTF("failure reading message (wanted %d but got %d)\n", - len, offset); - return -EIO; - } + if (ret < 0) + err = -errno; + else if (offset != len) + err = -EIO; + if (err) + ERR(err, "failure reading message at offset %d/%d\n", offset, len); - DPRINTF("received ''%s'' message (uuid = %u)\n", - tapdisk_message_name(message->type), message->cookie); - return 0; + return err; } -static int -tapdisk_control_write_message(int fd, tapdisk_message_t *message, int timeout) +static void +tapdisk_control_write_message(struct tapdisk_ctl_conn *conn, + tapdisk_message_t * message) { - fd_set writefds; - int ret, len, offset; - struct timeval tv, *t; + size_t size = sizeof(*message), count; - t = NULL; - offset = 0; - len = sizeof(tapdisk_message_t); + if (conn->info->flags & TAPDISK_MSG_VERBOSE) + DBG("sending ''%s'' message\n", tapdisk_message_name(message->type)); - if (timeout) { - tv.tv_sec = timeout; - tv.tv_usec = 0; - t = &tv; - } - - DPRINTF("sending ''%s'' message (uuid = %u)\n", - tapdisk_message_name(message->type), message->cookie); - - while (offset < len) { - FD_ZERO(&writefds); - FD_SET(fd, &writefds); - - /* we don''t bother reinitializing tv. at worst, it will wait a - * bit more time than expected. */ - - ret = select(fd + 1, NULL, &writefds, NULL, t); - if (ret == -1) - break; - else if (FD_ISSET(fd, &writefds)) { - ret = write(fd, message + offset, len - offset); - if (ret <= 0) - break; - offset += ret; - } else - break; - } - - if (offset != len) { - EPRINTF("failure writing message\n"); - return -EIO; - } - - return 0; + count = tapdisk_ctl_conn_write(conn, message, size); + WARN_ON(count != size); } -static int -tapdisk_control_validate_request(tapdisk_message_t *request) +static int tapdisk_control_validate_request(tapdisk_message_t * request) { if (strnlen(request->u.params.path, TAPDISK_MESSAGE_MAX_PATH_LENGTH) >@@ -214,8 +488,10 @@ tapdisk_control_validate_request(tapdisk return 0; } +/* XXX Commented out in blktap2.5. */ +#if 0 static void -tapdisk_control_list_minors(struct tapdisk_control_connection *connection, +tapdisk_control_list_minors(struct tapdisk_ctl_conn *conn, tapdisk_message_t *request) { int i; @@ -232,7 +508,11 @@ tapdisk_control_list_minors(struct tapdi head = tapdisk_server_get_all_vbds(); list_for_each_entry(vbd, head, next) { - response.u.minors.list[i++] = vbd->minor; + td_blktap_t *tap = vbd->tap; + if (!tap) + continue; + + response.u.minors.list[i++] = tap->minor; if (i >= TAPDISK_MESSAGE_MAX_MINORS) { response.type = TAPDISK_MESSAGE_ERROR; response.u.response.error = ERANGE; @@ -241,194 +521,101 @@ tapdisk_control_list_minors(struct tapdi } response.u.minors.count = i; - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); + tapdisk_ctl_conn_write(conn, &response, 2); } +#endif -static void -tapdisk_control_list(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) +static int +tapdisk_control_list(struct tapdisk_ctl_conn *conn, + tapdisk_message_t *request, tapdisk_message_t * const response) { td_vbd_t *vbd; - struct list_head *head; - tapdisk_message_t response; - int count, i; + struct tqh_td_vbd_handle *head; + int count; - memset(&response, 0, sizeof(response)); - response.type = TAPDISK_MESSAGE_LIST_RSP; - response.cookie = request->cookie; + assert(conn); + assert(request); + assert(response); + + response->type = TAPDISK_MESSAGE_LIST_RSP; head = tapdisk_server_get_all_vbds(); + /* + * Count all the VBDs. + * TODO avoid this by maintaining this number? + */ count = 0; - list_for_each_entry(vbd, head, next) + TAILQ_FOREACH(vbd, head, entry) count++; - list_for_each_entry(vbd, head, next) { - response.u.list.count = count--; - response.u.list.minor = vbd->minor; - response.u.list.state = vbd->state; - response.u.list.path[0] = 0; + TAILQ_FOREACH(vbd, head, entry) { + response->u.list.count = count--; + response->u.list.state = vbd->state; + response->u.list.path[0] = 0; - if (!list_empty(&vbd->images)) { - td_image_t *image = list_entry(vbd->images.next, - td_image_t, next); - snprintf(response.u.list.path, - sizeof(response.u.list.path), - "%s:%s", - tapdisk_disk_types[image->type]->name, - image->name); - } + if (vbd->name) + strncpy(response->u.list.path, vbd->name, + sizeof(response->u.list.path)); - tapdisk_control_write_message(connection->socket, &response, 2); + tapdisk_control_write_message(conn, response); } - response.u.list.count = count; - response.u.list.minor = -1; - response.u.list.path[0] = 0; + response->u.list.count = count; + response->u.list.path[0] = 0; - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); + return 0; } -static void -tapdisk_control_get_pid(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) +static int +tapdisk_control_get_pid(struct tapdisk_ctl_conn *conn __attribute__((unused)), + tapdisk_message_t *request __attribute__((unused)), + tapdisk_message_t * const response) { - tapdisk_message_t response; + assert(response); - memset(&response, 0, sizeof(response)); - response.type = TAPDISK_MESSAGE_PID_RSP; - response.cookie = request->cookie; - response.u.tapdisk_pid = getpid(); + response->type = TAPDISK_MESSAGE_PID_RSP; + response->u.tapdisk_pid = getpid(); - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); + return 0; } -static void -tapdisk_control_attach_vbd(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) +static int +tapdisk_control_open_image( + struct tapdisk_ctl_conn *conn __attribute__((unused)), + tapdisk_message_t *request, tapdisk_message_t * const response) { - tapdisk_message_t response; - char *devname; + int err; td_vbd_t *vbd; - struct blktap2_params params; - image_t image; - int minor, err; + td_flag_t flags; + td_disk_info_t info; + int prt_path_len; + char * prt_path; - /* - * TODO: check for max vbds per process - */ + assert(request); + assert(response); - vbd = tapdisk_server_get_vbd(request->cookie); - if (vbd) { - err = -EEXIST; - goto out; - } + /* TODO Check whether the image is already open by another VBD? */ + prt_path_len = strnlen(request->u.params.prt_path, + TAPDISK_MESSAGE_MAX_PATH_LENGTH); + if (unlikely(prt_path_len == TAPDISK_MESSAGE_MAX_PATH_LENGTH)) { + err = -EINVAL; + goto out; + } else if (prt_path_len == 0) + prt_path = NULL; + else + prt_path = request->u.params.prt_path; - minor = request->cookie; - if (minor < 0) { - err = -EINVAL; - goto out; - } - - vbd = tapdisk_vbd_create(minor); + vbd = tapdisk_vbd_create(); if (!vbd) { err = -ENOMEM; goto out; } - err = asprintf(&devname, BLKTAP2_RING_DEVICE"%d", minor); - if (err == -1) { - err = -ENOMEM; - goto fail_vbd; - } - - err = tapdisk_vbd_attach(vbd, devname, minor); - free(devname); - if (err) - goto fail_vbd; - + /* TODO Add after everything has been initialised? */ tapdisk_server_add_vbd(vbd); -out: - memset(&response, 0, sizeof(response)); - response.type = TAPDISK_MESSAGE_ATTACH_RSP; - response.cookie = request->cookie; - response.u.response.error = -err; - - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); - - return; - -fail_vbd: - tapdisk_vbd_detach(vbd); - free(vbd); - goto out; -} - - -static void -tapdisk_control_detach_vbd(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) -{ - tapdisk_message_t response; - td_vbd_t *vbd; - int err; - - vbd = tapdisk_server_get_vbd(request->cookie); - if (!vbd) { - err = -EINVAL; - goto out; - } - - tapdisk_vbd_detach(vbd); - - if (list_empty(&vbd->images)) { - tapdisk_server_remove_vbd(vbd); - free(vbd); - } - - err = 0; -out: - memset(&response, 0, sizeof(response)); - response.type = TAPDISK_MESSAGE_DETACH_RSP; - response.cookie = request->cookie; - response.u.response.error = -err; - - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); -} - -static void -tapdisk_control_open_image(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) -{ - int err; - image_t image; - td_vbd_t *vbd; - td_flag_t flags; - tapdisk_message_t response; - struct blktap2_params params; - - vbd = tapdisk_server_get_vbd(request->cookie); - if (!vbd) { - err = -EINVAL; - goto out; - } - - if (vbd->minor == -1) { - err = -EINVAL; - goto out; - } - - if (vbd->name) { - err = -EALREADY; - goto out; - } - + /* TODO check for unsupported flags */ flags = 0; if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_RDONLY) flags |= TD_OPEN_RDONLY; @@ -440,83 +627,92 @@ tapdisk_control_open_image(struct tapdis flags |= TD_OPEN_VHD_INDEX; if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_LOG_DIRTY) flags |= TD_OPEN_LOG_DIRTY; + if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_ADD_LCACHE) + flags |= TD_OPEN_LOCAL_CACHE; + if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_REUSE_PRT) + flags |= TD_OPEN_REUSE_PARENT; + if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_STANDBY) + flags |= TD_OPEN_STANDBY; + if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_SECONDARY) { + char *name = strdup(request->u.params.secondary); + if (!name) { + err = -errno; + goto out; + } + vbd->secondary_name = name; + flags |= TD_OPEN_SECONDARY; + } - vbd->name = strndup(request->u.params.path, - sizeof(request->u.params.path)); - if (!vbd->name) { - err = -ENOMEM; + err = tapdisk_vbd_open_vdi(vbd, request->u.params.path, flags, prt_path); + if (err) { + EPRINTF("failed to open VDI: %s\n", strerror(-err)); goto out; - } + } - err = tapdisk_vbd_parse_stack(vbd, request->u.params.path); - if (err) - goto out; - - err = tapdisk_vbd_open_stack(vbd, request->u.params.storage, flags); - if (err) - goto out; - - err = tapdisk_vbd_get_image_info(vbd, &image); - if (err) - goto fail_close; - - params.capacity = image.size; - params.sector_size = image.secsize; - strncpy(params.name, vbd->name, BLKTAP2_MAX_MESSAGE_LEN); - - err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_CREATE_DEVICE, ¶ms); - if (err && errno != EEXIST) { - err = -errno; - EPRINTF("create device failed: %d\n", err); + err = tapdisk_vbd_get_disk_info(vbd, &info); + if (err) { + EPRINTF("failed to get disk info: %s\n", strerror(-err)); goto fail_close; } err = 0; out: - memset(&response, 0, sizeof(response)); - response.cookie = request->cookie; + if (!err) { + response->u.image.sectors = info.size; + response->u.image.sector_size = info.sector_size; + response->u.image.info = info.info; + response->type = TAPDISK_MESSAGE_OPEN_RSP; + } - if (err) { - response.type = TAPDISK_MESSAGE_ERROR; - response.u.response.error = -err; - } else { - response.u.image.sectors = image.size; - response.u.image.sector_size = image.secsize; - response.u.image.info = image.info; - response.type = TAPDISK_MESSAGE_OPEN_RSP; - } - - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); - - return; + return err; fail_close: tapdisk_vbd_close_vdi(vbd); - free(vbd->name); - vbd->name = NULL; + + if (vbd->name) { + free(vbd->name); + vbd->name = NULL; + } + goto out; } -static void -tapdisk_control_close_image(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) +static int +tapdisk_control_close_image(struct tapdisk_ctl_conn *conn, + tapdisk_message_t *request, tapdisk_message_t * const response) { - tapdisk_message_t response; td_vbd_t *vbd; - int err; + int err = 0; + int len; - vbd = tapdisk_server_get_vbd(request->cookie); + assert(conn); + assert(request); + assert(response); + + len = strnlen(request->u.string.text, TAPDISK_MESSAGE_STRING_LENGTH); + if (len < 1) { + err = -EINVAL; + goto out; + } + if (len >= TAPDISK_MESSAGE_STRING_LENGTH) { + err = -ENAMETOOLONG; + goto out; + } + + vbd = tapdisk_server_get_vbd(request->u.string.text); if (!vbd) { - err = -EINVAL; + EPRINTF("no VBD \''%s\''", request->u.string.text); + err = -ENODEV; goto out; } - if (!list_empty(&vbd->pending_requests)) { - err = -EAGAIN; - goto out; - } + /* TODO How do we make sure that new requests won''t enter the ring? + * I assume we have disconnected from the ring before? If yes, then + * make sure we check this. */ + + while (!TAILQ_EMPTY(&vbd->pending_requests)) + tapdisk_server_iterate(); tapdisk_vbd_close_vdi(vbd); @@ -526,35 +722,45 @@ tapdisk_control_close_image(struct tapdi free(vbd->name); vbd->name = NULL; - if (vbd->minor == -1) { - tapdisk_server_remove_vbd(vbd); - tapdisk_vbd_free(vbd); + tapdisk_server_remove_vbd(vbd); + +out: + if (!err) { + response->type = TAPDISK_MESSAGE_CLOSE_RSP; + response->u.response.error = -err; } - err = 0; -out: - memset(&response, 0, sizeof(response)); - response.type = TAPDISK_MESSAGE_CLOSE_RSP; - response.cookie = request->cookie; - response.u.response.error = -err; - - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); + return err; } -static void -tapdisk_control_pause_vbd(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) +static int +tapdisk_control_pause_vbd(struct tapdisk_ctl_conn *conn, + tapdisk_message_t *request, tapdisk_message_t * const response) { int err; td_vbd_t *vbd; - tapdisk_message_t response; + int len; - memset(&response, 0, sizeof(response)); + assert(conn); + assert(request); + assert(response); - response.type = TAPDISK_MESSAGE_PAUSE_RSP; + len = strnlen(request->u.string.text, TAPDISK_MESSAGE_STRING_LENGTH); - vbd = tapdisk_server_get_vbd(request->cookie); + /* TODO boilerplate */ + if (len < 1) { + err = -EINVAL; + goto out; + } + if (len >= TAPDISK_MESSAGE_STRING_LENGTH) { + err = -ENAMETOOLONG; + goto out; + } + + response->type = TAPDISK_MESSAGE_PAUSE_RSP; + + /* TODO Need to fix this in control/tap-ctl-pause.c */ + vbd = tapdisk_server_get_vbd(request->u.string.text); if (!vbd) { err = -EINVAL; goto out; @@ -567,155 +773,429 @@ tapdisk_control_pause_vbd(struct tapdisk break; tapdisk_server_iterate(); - } while (1); + + } while (conn->fd >= 0); out: - response.cookie = request->cookie; - response.u.response.error = -err; - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); + if (!err) + /* TODO useless? */ + response->u.response.error = -err; + return err; } -static void -tapdisk_control_resume_vbd(struct tapdisk_control_connection *connection, - tapdisk_message_t *request) +static int +tapdisk_control_resume_vbd( + struct tapdisk_ctl_conn *conn __attribute__((unused)), + tapdisk_message_t *request, tapdisk_message_t * const response) { int err; td_vbd_t *vbd; - tapdisk_message_t response; + const char *desc = NULL; + int len; - memset(&response, 0, sizeof(response)); + assert(request); + assert(response); - response.type = TAPDISK_MESSAGE_RESUME_RSP; + len = strnlen(request->u.string.text, TAPDISK_MESSAGE_STRING_LENGTH); - vbd = tapdisk_server_get_vbd(request->cookie); + /* TODO boilerplate */ + if (len < 1) { + err = -EINVAL; + goto out; + } + if (len >= TAPDISK_MESSAGE_STRING_LENGTH) { + err = -ENAMETOOLONG; + goto out; + } + + response->type = TAPDISK_MESSAGE_RESUME_RSP; + + /* TODO Need to fix this in control/tap-ctl-pause.c */ + vbd = tapdisk_server_get_vbd(request->u.string.text); if (!vbd) { err = -EINVAL; goto out; } - if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) { - err = -EINVAL; - goto out; + /* TODO What''s this path? */ + if (request->u.params.path[0]) + desc = request->u.params.path; + + err = tapdisk_vbd_resume(vbd, desc); +out: + if (!err) + /* TODO useless? */ + response->u.response.error = -err; + return err; +} + +static int +tapdisk_control_stats(struct tapdisk_ctl_conn *conn __attribute__((unused)), + tapdisk_message_t * request, tapdisk_message_t * const response) +{ + td_stats_t _st, *st = &_st; + td_vbd_t *vbd; + size_t rv = 0; + int err = 0; + int len; + + assert(request); + assert(response); + + len = strnlen(request->u.string.text, TAPDISK_MESSAGE_STRING_LENGTH); + + tapdisk_stats_init(st, + conn->out.buf + sizeof(*response), + conn->out.bufsz - sizeof(*response)); + if (len > 1) { + if (len >= TAPDISK_MESSAGE_STRING_LENGTH) { + err = -ENAMETOOLONG; + goto out; + } + + vbd = tapdisk_server_get_vbd(request->u.string.text); + if (!vbd) { + err = -ENODEV; + goto out; + } + + tapdisk_vbd_stats(vbd, st); + + } else { + struct tqh_td_vbd_handle *list = tapdisk_server_get_all_vbds(); + + tapdisk_stats_enter(st, ''[''); + + TAILQ_FOREACH(vbd, list, entry) + tapdisk_vbd_stats(vbd, st); + + tapdisk_stats_leave(st, '']''); } - if (request->u.params.path[0]) { - free(vbd->name); - vbd->name = strndup(request->u.params.path, - sizeof(request->u.params.path)); - if (!vbd->name) { - err = -ENOMEM; - goto out; - } - } else if (!vbd->name) { - err = -EINVAL; + rv = tapdisk_stats_length(st); +out: + if (!err) { + response->type = TAPDISK_MESSAGE_STATS_RSP; + response->u.info.length = rv; + } + + /* TODO Should only be executed if err == 0? */ + if (rv > 0) + conn->out.prod += rv; + + return err; +} + +/** + * Message handler executed for TAPDISK_MESSAGE_XENBLKIF_CONNECT. + * + * This is the entry point for connecting the tapdisk to the shared ring. It + * also sets up the necessary structures/descriptors (TODO explain). + */ +static int +tapdisk_control_xenblkif_connect( + struct tapdisk_ctl_conn *conn __attribute__((unused)), + tapdisk_message_t *request, tapdisk_message_t * const response) +{ + /* + * Get the block interface parameters (domain ID, device ID, etc.). + */ + tapdisk_message_blkif_t *blkif; + + td_vbd_t *vbd; + const char *pool; + size_t len; + int err; + + assert(request); + assert(response); + + len = strnlen(request->u.string.text, TAPDISK_MESSAGE_STRING_LENGTH); + /* TODO boilerplate */ + if (len < 1) { + err = -EINVAL; + goto out; + } + if (len >= TAPDISK_MESSAGE_STRING_LENGTH) { + err = -ENAMETOOLONG; + goto out; + } + + vbd = tapdisk_server_get_vbd(request->u.blkif.params); + if (!vbd) { + err = -ENODEV; goto out; + } + + blkif = &request->u.blkif; + len = strnlen(blkif->pool, sizeof(blkif->pool)); + if (!len) + pool = NULL; + else if (len >= sizeof(blkif->pool)) { + err = -EINVAL; + goto out; + } else + pool = blkif->pool; + + DPRINTF("connecting VBD domid=%d, devid=%d, pool %s, evt %d\n", + blkif->domid, blkif->devid, pool, blkif->port); + + err = tapdisk_xenblkif_connect(blkif->domid, blkif->devid, blkif->gref, + blkif->order, blkif->port, blkif->proto, pool, vbd); +out: + if (!err) { + response->type = TAPDISK_MESSAGE_XENBLKIF_CONNECT_RSP; + /* TODO Useless? */ + response->u.response.error = -err; + } + return err; +} + +static int +tapdisk_control_xenblkif_disconnect( + struct tapdisk_ctl_conn *conn __attribute__((unused)), + tapdisk_message_t * request, tapdisk_message_t * const response) +{ + tapdisk_message_blkif_t *blkif; + int err; + + assert(request); + assert(response); + + blkif = &request->u.blkif; + + DPRINTF("disconnecting VBD domid=%d, devid=%d\n", blkif->domid, + blkif->devid); + + err = tapdisk_xenblkif_disconnect(blkif->domid, blkif->devid); + + if (!err) { + response->type = TAPDISK_MESSAGE_XENBLKIF_DISCONNECT_RSP; + /* TODO Useless? */ + response->u.response.error = -err; + } + return err; +} + +static int +tapdisk_control_disk_info( + struct tapdisk_ctl_conn *conn __attribute__((unused)), + tapdisk_message_t * request, tapdisk_message_t * const response) +{ + tapdisk_message_image_t *image; + int err; + td_vbd_t *vbd; + td_disk_info_t info; + int len; + + assert(request); + assert(response); + + image = &response->u.image; + len = strnlen(request->u.string.text, TAPDISK_MESSAGE_STRING_LENGTH); + + /* TODO boilerplate */ + if (len < 1) { + err = -EINVAL; + goto out; + } + if (len >= TAPDISK_MESSAGE_STRING_LENGTH) { + err = -ENAMETOOLONG; + goto out; + } + + DPRINTF("getting info vbd %s\n", request->u.string.text); + + vbd = tapdisk_server_get_vbd(request->u.string.text); + if (!vbd) { + err = -ENODEV; + goto out; } - err = tapdisk_vbd_parse_stack(vbd, vbd->name); - if (err) - goto out; + err = tapdisk_vbd_get_disk_info(vbd, &info); + if (err) { + EPRINTF("tapdisk_vbd_get_disk_info failed %d\n", err); + goto out; + } - err = tapdisk_vbd_resume(vbd, NULL, -1); - if (err) - goto out; - + EPRINTF("got disk info: %ld %d\n", info.sector_size, err); out: - response.cookie = request->cookie; - response.u.response.error = -err; - tapdisk_control_write_message(connection->socket, &response, 2); - tapdisk_control_close_connection(connection); + if (!err) { + response->type = TAPDISK_MESSAGE_DISK_INFO_RSP; + image->sectors = info.size; + image->sector_size = info.sector_size; + image->info = info.info; + } + return err; } -static void -tapdisk_control_handle_request(event_id_t id, char mode, void *private) +struct tapdisk_control_info message_infos[] = { + [TAPDISK_MESSAGE_PID] = { + .handler = tapdisk_control_get_pid, + .flags = TAPDISK_MSG_REENTER, + }, + [TAPDISK_MESSAGE_LIST] = { + .handler = tapdisk_control_list, + .flags = TAPDISK_MSG_REENTER, + }, + [TAPDISK_MESSAGE_OPEN] = { + .handler = tapdisk_control_open_image, + .flags = TAPDISK_MSG_VERBOSE, + }, + [TAPDISK_MESSAGE_PAUSE] = { + .handler = tapdisk_control_pause_vbd, + .flags = TAPDISK_MSG_VERBOSE, + }, + [TAPDISK_MESSAGE_RESUME] = { + .handler = tapdisk_control_resume_vbd, + .flags = TAPDISK_MSG_VERBOSE, + }, + [TAPDISK_MESSAGE_CLOSE] = { + .handler = tapdisk_control_close_image, + .flags = TAPDISK_MSG_VERBOSE, + }, + [TAPDISK_MESSAGE_STATS] = { + .handler = tapdisk_control_stats, + .flags = TAPDISK_MSG_REENTER, + }, + [TAPDISK_MESSAGE_XENBLKIF_CONNECT] = { + .handler + tapdisk_control_xenblkif_connect, + .flags + TAPDISK_MSG_VERBOSE | + TAPDISK_MSG_VERBOSE_ERROR, + }, + [TAPDISK_MESSAGE_XENBLKIF_DISCONNECT] = { + .handler + tapdisk_control_xenblkif_disconnect, + .flags = TAPDISK_MSG_VERBOSE + || TAPDISK_MSG_VERBOSE_ERROR, + }, + [TAPDISK_MESSAGE_DISK_INFO] = { + .handler = tapdisk_control_disk_info, + .flags + TAPDISK_MSG_VERBOSE | + TAPDISK_MSG_VERBOSE_ERROR, + }, +}; + + +static void tapdisk_control_handle_request( + event_id_t id __attribute__((unused)), + char mode __attribute__((unused)), void *private) { - int err; - tapdisk_message_t message; - struct tapdisk_control_connection *connection - (struct tapdisk_control_connection *)private; + int err, excl; + tapdisk_message_t message, response; + struct tapdisk_ctl_conn *conn = private; + struct tapdisk_control_info *info; - if (tapdisk_control_read_message(connection->socket, &message, 2)) { - EPRINTF("failed to read message from %d\n", connection->socket); - tapdisk_control_close_connection(connection); + err = tapdisk_control_read_message(conn->fd, &message, 2); + if (err) + goto close; + + if (conn->in.busy) + goto busy; + + err = tapdisk_control_validate_request(&message); + if (err) + goto invalid; + + if (message.type > TAPDISK_MESSAGE_EXIT) + goto invalid; + + info = &message_infos[message.type]; + + if (!info->handler) + goto invalid; + + if (info->flags & TAPDISK_MSG_VERBOSE) + DBG("received ''%s'' message\n", + tapdisk_message_name(message.type)); + + excl = !(info->flags & TAPDISK_MSG_REENTER); + if (excl) { + if (td_control.busy) + goto busy; + + td_control.busy = 1; + } + conn->in.busy = 1; + conn->info = info; + + memset(&response, 0, sizeof(response)); + + err = info->handler(conn, &message, &response); + if (err) { + response.type = TAPDISK_MESSAGE_ERROR; + response.u.response.error = -err; + } + tapdisk_control_write_message(conn, &response); + + conn->in.busy = 0; + if (excl) + td_control.busy = 0; + + tapdisk_control_release_connection(conn); + return; + + error: + memset(&response, 0, sizeof(response)); + response.type = TAPDISK_MESSAGE_ERROR; + response.u.response.error = (err ? -err : EINVAL); + tapdisk_control_write_message(conn, &response); + + close: + tapdisk_control_close_connection(conn); + return; + + busy: + err = -EBUSY; + ERR(err, "rejecting message ''%s'' while busy\n", + tapdisk_message_name(message.type)); + goto error; + + invalid: + err = -EINVAL; + ERR(err, "rejecting unsupported message ''%s''\n", + tapdisk_message_name(message.type)); + goto error; +} + +static void tapdisk_control_accept(event_id_t id __attribute__((unused)), + char mode __attribute__((unused)), + void *private __attribute__((unused))) +{ + int err, fd; + struct tapdisk_ctl_conn *conn; + + fd = accept(td_control.socket, NULL, NULL); + if (fd == -1) { + ERR(-errno, "failed to accept new control connection: %d\n", + errno); return; } - err = tapdisk_control_validate_request(&message); - if (err) - goto fail; - - switch (message.type) { - case TAPDISK_MESSAGE_PID: - return tapdisk_control_get_pid(connection, &message); - case TAPDISK_MESSAGE_LIST_MINORS: - return tapdisk_control_list_minors(connection, &message); - case TAPDISK_MESSAGE_LIST: - return tapdisk_control_list(connection, &message); - case TAPDISK_MESSAGE_ATTACH: - return tapdisk_control_attach_vbd(connection, &message); - case TAPDISK_MESSAGE_DETACH: - return tapdisk_control_detach_vbd(connection, &message); - case TAPDISK_MESSAGE_OPEN: - return tapdisk_control_open_image(connection, &message); - case TAPDISK_MESSAGE_PAUSE: - return tapdisk_control_pause_vbd(connection, &message); - case TAPDISK_MESSAGE_RESUME: - return tapdisk_control_resume_vbd(connection, &message); - case TAPDISK_MESSAGE_CLOSE: - return tapdisk_control_close_image(connection, &message); - default: { - tapdisk_message_t response; - fail: - - EPRINTF("received unsupported message ''%s''\n", - tapdisk_message_name(message.type)); - - memset(&response, 0, sizeof(response)); - - response.type = TAPDISK_MESSAGE_ERROR; - response.u.response.error = (err ? -err : EINVAL); - tapdisk_control_write_message(connection->socket, &response, 2); - - tapdisk_control_close_connection(connection); - break; - } - } -} - -static void -tapdisk_control_accept(event_id_t id, char mode, void *private) -{ - int err, fd; - struct tapdisk_control_connection *connection; - - fd = accept(td_control.socket, NULL, NULL); - if (fd == -1) { - EPRINTF("failed to accept new control connection: %d\n", errno); - return; - } - - connection = tapdisk_control_allocate_connection(fd); - if (!connection) { + conn = tapdisk_ctl_conn_open(fd); + if (!conn) { close(fd); - EPRINTF("failed to allocate new control connection\n"); + ERR(-ENOMEM, "failed to allocate new control connection\n"); + return; } err = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, - connection->socket, 0, + conn->fd, TD_CTL_RECV_TIMEOUT, tapdisk_control_handle_request, - connection); + conn); if (err == -1) { - close(fd); - free(connection); - EPRINTF("failed to register new control event: %d\n", err); + tapdisk_control_close_connection(conn); + ERR(err, "failed to register new control event\n"); + return; } - connection->event_id = err; + conn->in.event_id = err; } -static int -tapdisk_control_mkdir(const char *dir) +static int tapdisk_control_mkdir(const char *dir) { int err; char *ptr, *name, *start; @@ -738,8 +1218,7 @@ tapdisk_control_mkdir(const char *dir) err = mkdir(name, 0755); if (err && errno != EEXIST) { err = -errno; - EPRINTF("failed to create directory %s: %d\n", - name, err); + EPRINTF("failed to create directory %s: %d\n", name, err); break; } @@ -755,21 +1234,20 @@ tapdisk_control_mkdir(const char *dir) return err; } -static int -tapdisk_control_create_socket(char **socket_path) +static int tapdisk_control_create_socket(char **socket_path) { - int err, flags; struct sockaddr_un saddr; + int err; - err = tapdisk_control_mkdir(BLKTAP2_CONTROL_DIR); + err = tapdisk_control_mkdir(BLKTAP3_CONTROL_DIR); if (err) { EPRINTF("failed to create directory %s: %d\n", - BLKTAP2_CONTROL_DIR, err); + BLKTAP3_CONTROL_DIR, err); return err; } err = asprintf(&td_control.path, "%s/%s%d", - BLKTAP2_CONTROL_DIR, BLKTAP2_CONTROL_SOCKET, getpid()); + BLKTAP3_CONTROL_DIR, BLKTAP3_CONTROL_SOCKET, getpid()); if (err == -1) { td_control.path = NULL; err = (errno ? : ENOMEM); @@ -801,7 +1279,7 @@ tapdisk_control_create_socket(char **soc goto fail; } - err = listen(td_control.socket, 10); + err = listen(td_control.socket, TD_CTL_SOCK_BACKLOG); if (err == -1) { err = errno; EPRINTF("failed to listen: %d\n", err); @@ -826,11 +1304,8 @@ fail: return err; } -int -tapdisk_control_open(char **path) +int tapdisk_control_open(char **path) { - int err; - tapdisk_control_initialize(); return tapdisk_control_create_socket(path); diff --git a/tools/blktap2/drivers/tapdisk-control.h b/tools/blktap3/drivers/tapdisk-control.h copy from tools/blktap2/drivers/tapdisk-control.h copy to tools/blktap3/drivers/tapdisk-control.h
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 09 of 21] blktap3/drivers: Introduce back-end driver types
This patch copies the back-end driver types from blktap2 with changes coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-disktype.c b/tools/blktap3/drivers/tapdisk-disktype.c copy from tools/blktap2/drivers/tapdisk-disktype.c copy to tools/blktap3/drivers/tapdisk-disktype.c --- a/tools/blktap2/drivers/tapdisk-disktype.c +++ b/tools/blktap3/drivers/tapdisk-disktype.c @@ -30,8 +30,10 @@ #include <string.h> #include <errno.h> +#include "blktap3.h" +#include "tapdisk.h" + #include "tapdisk-disktype.h" -#include "tapdisk-message.h" static const disk_info_t aio_disk = { "aio", @@ -91,15 +93,39 @@ static const disk_info_t vhd_index_disk static const disk_info_t log_disk = { "log", "write logger (log)", - 0, + DISK_TYPE_FILTER, }; -static const disk_info_t remus_disk = { - "remus", +static disk_info_t remus_disk = { "remus disk replicator (remus)", + "remus", + 0, +}; + +static const disk_info_t lcache_disk = { + "lc", + "local parent cache (lc)", + DISK_TYPE_FILTER, +}; + +static const disk_info_t llpcache_disk = { + "llp", + "local leaf cache, persistent (llp)", + 0, +}; + +static const disk_info_t llecache_disk = { + "lle", + "local leaf cache, ephemeral (lle)", 0, }; +static const disk_info_t valve_disk = { + "valve", + "group rate limiting (valve)", + DISK_TYPE_FILTER, +}; + const disk_info_t *tapdisk_disk_types[] = { [DISK_TYPE_AIO] = &aio_disk, [DISK_TYPE_SYNC] = &sync_disk, @@ -109,47 +135,85 @@ const disk_info_t *tapdisk_disk_types[] [DISK_TYPE_RAM] = &ram_disk, [DISK_TYPE_QCOW] = &qcow_disk, [DISK_TYPE_BLOCK_CACHE] = &block_cache_disk, - [DISK_TYPE_LOG] = &log_disk, [DISK_TYPE_VINDEX] = &vhd_index_disk, + [DISK_TYPE_LOG] = &log_disk, [DISK_TYPE_REMUS] = &remus_disk, + [DISK_TYPE_LCACHE] = &lcache_disk, + [DISK_TYPE_VALVE] = &valve_disk, + [DISK_TYPE_LLPCACHE] = &llpcache_disk, + [DISK_TYPE_LLECACHE] = &llecache_disk, 0, }; extern struct tap_disk tapdisk_aio; + +/* + * TODO Why commented out? + */ +#if 0 extern struct tap_disk tapdisk_sync; extern struct tap_disk tapdisk_vmdk; extern struct tap_disk tapdisk_vhdsync; +#endif + extern struct tap_disk tapdisk_vhd; extern struct tap_disk tapdisk_ram; + +/* + * TODO Why commented out? + */ +#if 0 extern struct tap_disk tapdisk_qcow; -extern struct tap_disk tapdisk_block_cache; +#endif + extern struct tap_disk tapdisk_vhd_index; + +/* + * TODO Why commented out? + */ +#if 0 extern struct tap_disk tapdisk_log; -extern struct tap_disk tapdisk_remus; +#endif const struct tap_disk *tapdisk_disk_drivers[] = { [DISK_TYPE_AIO] = &tapdisk_aio, + +/* + * TODO Why commented out? + */ #if 0 [DISK_TYPE_SYNC] = &tapdisk_sync, [DISK_TYPE_VMDK] = &tapdisk_vmdk, + [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk #endif - [DISK_TYPE_VHD] = &tapdisk_vhd, - [DISK_TYPE_RAM] = &tapdisk_ram, + +/* + * TODO Why commented out? + */ +#if 0 [DISK_TYPE_QCOW] = &tapdisk_qcow, - [DISK_TYPE_BLOCK_CACHE] = &tapdisk_block_cache, - [DISK_TYPE_VINDEX] = &tapdisk_vhd_index, +#endif + +/* + * TODO Why commented out? + */ +#if 0 [DISK_TYPE_LOG] = &tapdisk_log, - [DISK_TYPE_REMUS] = &tapdisk_remus, +#endif 0, }; int tapdisk_disktype_find(const char *name) { - const disk_info_t *info; int i; - for (i = 0; info = tapdisk_disk_types[i], info != NULL; ++i) { + for (i = 0; i < ARRAY_SIZE(tapdisk_disk_types); i++) { + const disk_info_t *info = tapdisk_disk_types[i]; + + if (!info) + continue; + if (strcmp(name, info->name)) continue; @@ -183,22 +247,7 @@ tapdisk_disktype_parse_params(const char type = tapdisk_disktype_find(name); - if (type >= 0) - *_path = params + len + 1; + *_path = params + len + 1; return type; } - -int -tapdisk_parse_disk_type(const char *params, const char **_path, int *_type) -{ - int type; - - type = tapdisk_disktype_parse_params(params, _path); - if (type < 0) - return type; - - *_type = type; - - return 0; -} diff --git a/tools/blktap2/drivers/tapdisk-disktype.h b/tools/blktap3/drivers/tapdisk-disktype.h copy from tools/blktap2/drivers/tapdisk-disktype.h copy to tools/blktap3/drivers/tapdisk-disktype.h --- a/tools/blktap2/drivers/tapdisk-disktype.h +++ b/tools/blktap3/drivers/tapdisk-disktype.h @@ -37,9 +37,13 @@ #define DISK_TYPE_RAM 5 #define DISK_TYPE_QCOW 6 #define DISK_TYPE_BLOCK_CACHE 7 -#define DISK_TYPE_LOG 8 -#define DISK_TYPE_REMUS 9 -#define DISK_TYPE_VINDEX 10 +#define DISK_TYPE_VINDEX 8 +#define DISK_TYPE_LOG 9 +#define DISK_TYPE_REMUS 10 +#define DISK_TYPE_LCACHE 11 +#define DISK_TYPE_LLECACHE 12 +#define DISK_TYPE_LLPCACHE 13 +#define DISK_TYPE_VALVE 14 #define DISK_TYPE_NAME_MAX 32 @@ -55,8 +59,10 @@ extern const struct tap_disk *tapdisk_di /* one single controller for all instances of disk type */ #define DISK_TYPE_SINGLE_CONTROLLER (1<<0) +/* filter driver without physical image data */ +#define DISK_TYPE_FILTER (1<<1) + int tapdisk_disktype_find(const char *name); int tapdisk_disktype_parse_params(const char *params, const char **_path); -int tapdisk_parse_disk_type(const char *, const char **, int *); #endif
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 10 of 21] blktap3/drivers: Introduce back-end driver abstraction
This patch copies the back-end driver abstraction layer from blktap2, with changes coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-driver.c b/tools/blktap3/drivers/tapdisk-driver.c copy from tools/blktap2/drivers/tapdisk-driver.c copy to tools/blktap3/drivers/tapdisk-driver.c --- a/tools/blktap2/drivers/tapdisk-driver.c +++ b/tools/blktap3/drivers/tapdisk-driver.c @@ -25,14 +25,49 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #include <stdlib.h> +#include <stdio.h> #include "tapdisk-driver.h" #include "tapdisk-server.h" #include "tapdisk-disktype.h" +#include "tapdisk-stats.h" + +static void +tapdisk_driver_log_flush(td_driver_t * driver, const char *__caller) +{ + td_loglimit_t *rl = &driver->loglimit; + + if (rl->dropped) { + tlog_syslog(LOG_WARNING, + "%s: %s: %d messages suppressed", + driver->name, __caller, rl->dropped); + rl->dropped = 0; + } +} + +int tapdisk_driver_log_pass(td_driver_t * driver, const char *__caller) +{ + td_loglimit_t *rl = &driver->loglimit; + int dropping = rl->dropped; + + if (tapdisk_loglimit_pass(rl)) { + tapdisk_driver_log_flush(driver, __caller); + return 1; + } + + if (!dropping) + tlog_syslog(LOG_WARNING, + "%s: %s: too many errors, dropped.", + driver->name, __caller); + + return 0; +} td_driver_t * -tapdisk_driver_allocate(int type, char *name, td_flag_t flags, int storage) +tapdisk_driver_allocate(int type, const char *name, + td_flag_t flags) { int err; td_driver_t *driver; @@ -52,7 +87,7 @@ tapdisk_driver_allocate(int type, char * driver->ops = ops; driver->type = type; - driver->storage = storage; + driver->storage = -1; driver->data = calloc(1, ops->private_data_size); if (!driver->data) goto fail; @@ -60,6 +95,9 @@ tapdisk_driver_allocate(int type, char * if (td_flag_test(flags, TD_OPEN_RDONLY)) td_flag_set(driver->state, TD_DRIVER_RDONLY); + tapdisk_loglimit_init(&driver->loglimit, 16 /* msgs */ , + 90 * 1000 /* ms */ ); + return driver; fail: @@ -82,13 +120,16 @@ tapdisk_driver_free(td_driver_t *driver) EPRINTF("freeing open driver %s (state 0x%08x)\n", driver->name, driver->state); + tapdisk_driver_log_flush(driver, __func__); + free(driver->name); free(driver->data); free(driver); } void -tapdisk_driver_queue_tiocb(td_driver_t *driver, struct tiocb *tiocb) +tapdisk_driver_queue_tiocb(td_driver_t *driver __attribute__((unused)), + struct tiocb *tiocb) { tapdisk_server_queue_tiocb(tiocb); } @@ -99,3 +140,21 @@ tapdisk_driver_debug(td_driver_t *driver if (driver->ops->td_debug) driver->ops->td_debug(driver); } + +void tapdisk_driver_stats(td_driver_t * driver, td_stats_t * st) +{ + const disk_info_t *info; + + tapdisk_stats_field(st, "type", "d", driver->type); + + info = tapdisk_disk_types[driver->type]; + tapdisk_stats_field(st, "name", "s", info->name); + + if (driver->ops->td_stats) { + tapdisk_stats_field(st, "status", "{"); + driver->ops->td_stats(driver, st); + tapdisk_stats_leave(st, ''}''); + } else + tapdisk_stats_field(st, "status", NULL); + +} diff --git a/tools/blktap2/drivers/tapdisk-driver.h b/tools/blktap3/drivers/tapdisk-driver.h copy from tools/blktap2/drivers/tapdisk-driver.h copy to tools/blktap3/drivers/tapdisk-driver.h --- a/tools/blktap2/drivers/tapdisk-driver.h +++ b/tools/blktap3/drivers/tapdisk-driver.h @@ -31,6 +31,7 @@ #include "tapdisk.h" #include "scheduler.h" #include "tapdisk-queue.h" +#include "tapdisk-loglimit.h" #define TD_DRIVER_OPEN 0x0001 #define TD_DRIVER_RDONLY 0x0002 @@ -49,14 +50,19 @@ struct td_driver_handle { void *data; const struct tap_disk *ops; - struct list_head next; + td_loglimit_t loglimit; + TAILQ_ENTRY(td_driver_handle) next; }; -td_driver_t *tapdisk_driver_allocate(int, char *, td_flag_t, int); +td_driver_t *tapdisk_driver_allocate(int, const char *, td_flag_t); void tapdisk_driver_free(td_driver_t *); void tapdisk_driver_queue_tiocb(td_driver_t *, struct tiocb *); void tapdisk_driver_debug(td_driver_t *); +void tapdisk_driver_stats(td_driver_t *, td_stats_t *); + +int tapdisk_driver_log_pass(td_driver_t *, const char *caller); + #endif
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 11 of 21] blktap3/drivers: Introduce I/O request filtering functionality
This patch copies from blktap2 what seems to be I/O request filtering functionality (e.g. failure injection, data integrity check), with changes coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-filter.c b/tools/blktap3/drivers/tapdisk-filter.c copy from tools/blktap2/drivers/tapdisk-filter.c copy to tools/blktap3/drivers/tapdisk-filter.c --- a/tools/blktap2/drivers/tapdisk-filter.c +++ b/tools/blktap3/drivers/tapdisk-filter.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2008, XenSource Inc. + * Copyright (c) 2007, XenSource Inc. + * Copyright (c) 2010, Citrix Systems, Inc. + * * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -113,11 +115,10 @@ check_hash(struct tfilter *filter, uint6 if (hash->hash != chksum(buf)) { struct timeval now; gettimeofday(&now, NULL); - DBG("%s: hash table: 0x%020" PRIx64 " at %012lu.%06llu, " - "from disk: 0x%020" PRIx64 " at %012lu.%06llu\n", + DBG("%s: hash table: 0x%020" PRIx64 " at %012lu.%06lu, " + "from disk: 0x%020" PRIx64 " at %012lu.%06lu\n", type, hash->hash, hash->time.tv_sec, - (unsigned long long)hash->time.tv_usec, sum, - now.tv_sec, (unsigned long long)now.tv_usec); + hash->time.tv_usec, sum, now.tv_sec, now.tv_usec); } } @@ -134,13 +135,9 @@ insert_hash(struct tfilter *filter, uint static void check_sector(struct tfilter *filter, int type, int rw, uint64_t sec, char *buf) { - struct dhash *hash; - if (sec >= filter->secs) return; - hash = filter->dhash + sec; - if (rw) { if (type == PRE_CHECK) insert_hash(filter, sec, buf); @@ -156,7 +153,7 @@ static void check_data(struct tfilter *filter, int type, struct iocb *io) { int rw; - uint64_t i, sec; + uint64_t i; rw = (io->aio_lio_opcode == IO_CMD_PWRITE); diff --git a/tools/blktap2/drivers/tapdisk-filter.h b/tools/blktap3/drivers/tapdisk-filter.h copy from tools/blktap2/drivers/tapdisk-filter.h copy to tools/blktap3/drivers/tapdisk-filter.h
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 12 of 21] blktap3/drivers: Introduce back-end image abstraction layer
This patch copies from blktap2 the functionality that allows back-end drivers to be used transparently, with most changes coming from blkta2.5. Also, the parent minor number is replaced with the parent /path/to/file in functions __tapdisk_image_open_chain and tapdisk_image_open_chain, as there is no minor number in blktap3. Singed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-image.c b/tools/blktap3/drivers/tapdisk-image.c copy from tools/blktap2/drivers/tapdisk-image.c copy to tools/blktap3/drivers/tapdisk-image.c --- a/tools/blktap2/drivers/tapdisk-image.c +++ b/tools/blktap3/drivers/tapdisk-image.c @@ -25,22 +25,39 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + + #include <errno.h> #include <unistd.h> #include <stdlib.h> -#ifdef MEMSHR -#include <memshr.h> -#endif +#include <stdio.h> +#include <limits.h> +#include <regex.h> +#include <inttypes.h> #include "tapdisk-image.h" #include "tapdisk-driver.h" #include "tapdisk-server.h" +#include "tapdisk-stats.h" +#include "tapdisk-interface.h" +#include "tapdisk-disktype.h" +#include "tapdisk-storage.h" +/* TODO already defined in tapdisk.h/tapdisk-log.h */ +#define DBG(_f, _a...) tlog_syslog(TLOG_DBG, _f, ##_a) +#define INFO(_f, _a...) tlog_syslog(TLOG_INFO, _f, ##_a) #define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a) -td_image_t * -tapdisk_image_allocate(const char *file, int type, int storage, - td_flag_t flags, void *private) +#define BUG() td_panic() + +#define BUG_ON(_cond) \ + if (unlikely(_cond)) { \ + ERR(-EINVAL, "(%s) = %d", #_cond, _cond); \ + BUG(); \ + } + +td_image_t *tapdisk_image_allocate(const char *file, const int type, + const td_flag_t flags) { int err; td_image_t *image; @@ -57,27 +74,19 @@ tapdisk_image_allocate(const char *file, image->type = type; image->flags = flags; - image->storage = storage; - image->private = private; -#ifdef MEMSHR - image->memshr_id = memshr_vbd_image_get(file); -#endif - INIT_LIST_HEAD(&image->next); return image; } void -tapdisk_image_free(td_image_t *image) +tapdisk_image_free(td_image_t * image, struct tqh_td_image_handle *head) { if (!image) return; - list_del(&image->next); + if (head) + TAILQ_REMOVE(head, image, entry); -#ifdef MEMSHR - memshr_vbd_image_put(image->memshr_id); -#endif free(image->name); tapdisk_driver_free(image->driver); free(image); @@ -86,9 +95,42 @@ tapdisk_image_free(td_image_t *image) int tapdisk_image_check_td_request(td_image_t *image, td_request_t treq) { - int rdonly; + int rdonly, err; + td_disk_info_t *info; + + err = -EINVAL; + + info = &image->info; + rdonly = td_flag_test(image->flags, TD_OPEN_RDONLY); + + if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE) + goto fail; + + if (treq.op == TD_OP_WRITE && rdonly) + { + err = -EPERM; + goto fail; + } + + if (treq.secs <= 0 || treq.sec + treq.secs > info->size) + goto fail; + + return 0; + +fail: + ERR(err, "bad td request on %s (%s, %" PRIu64 "): %d at %" PRIu64, + image->name, (rdonly ? "ro" : "rw"), info->size, treq.op, + treq.sec + treq.secs); + return err; + +} + +int +tapdisk_image_check_request(td_image_t * image, td_vbd_request_t * vreq) +{ td_driver_t *driver; td_disk_info_t *info; + int i, rdonly, secs, err; driver = image->driver; if (!driver) @@ -97,73 +139,442 @@ tapdisk_image_check_td_request(td_image_ info = &driver->info; rdonly = td_flag_test(image->flags, TD_OPEN_RDONLY); - if (treq.op != TD_OP_READ && treq.op != TD_OP_WRITE) + secs = 0; + + if (vreq->iovcnt < 0) { + err = -EINVAL; + goto fail; + } + + for (i = 0; i < vreq->iovcnt; i++) + secs += vreq->iov[i].secs; + + switch (vreq->op) { + case TD_OP_WRITE: + if (rdonly) { + err = -EPERM; + goto fail; + } + /* continue */ + case TD_OP_READ: + if (vreq->sec + secs > info->size) { + err = -EINVAL; + goto fail; + } + break; + default: + err = -EOPNOTSUPP; + goto fail; + } + + return 0; + + fail: + ERR(err, + "bad request on %s (%s, %" PRIu64 "): req %s op %d at %" PRIu64, + image->name, (rdonly ? "ro" : "rw"), info->size, vreq->name, + vreq->op, vreq->sec + secs); + + return err; +} + +void +tapdisk_image_close(td_image_t * image, struct tqh_td_image_handle *head) +{ + td_close(image); + tapdisk_image_free(image, head); +} + +int +tapdisk_image_open(const int type, const char *name, const int flags, + td_image_t ** _image) +{ + td_image_t *image; + int err; + + image = tapdisk_image_allocate(name, type, flags); + if (!image) { + err = -ENOMEM; + goto fail; + } + + err = td_load(image); + if (!err) + goto done; + + image->driver = tapdisk_driver_allocate(image->type, + image->name, image->flags); + if (!image->driver) { + err = -ENOMEM; + goto fail; + } + + err = td_open(image); + if (err) { + EPRINTF("failed to open image \''%s\'': %s\n", image->name, + strerror(-err)); + goto fail; + } + +done: + *_image = image; + return 0; + +fail: + if (image) + tapdisk_image_close(image, NULL); + return err; +} + +/** + * Opens the parent of the image. + * + * @param image the image to open + * @param _parent output parameter that receives the parent + * @returns 0 on success + */ +static int +tapdisk_image_open_parent(td_image_t * image, td_image_t ** _parent) +{ + td_image_t *parent = NULL; + td_disk_id_t id; + int err; + + memset(&id, 0, sizeof(id)); + id.flags = image->flags; + + err = td_get_parent_id(image, &id); + if (err == TD_NO_PARENT) { + err = 0; + goto out; + } + if (err) + return err; + + err = tapdisk_image_open(id.type, id.name, id.flags, &parent); + if (err) + return err; + + out: + *_parent = parent; + return 0; +} + +/** + * Opens all parents of the image, adding them to the parent list (first is + * youngest). + * + * @param image the image whose parents to open + * @returns 0 on success + */ +static int +tapdisk_image_open_parents(td_image_t * image, + struct tqh_td_image_handle *head) +{ + td_image_t *parent; + int err; + + do { + err = tapdisk_image_open_parent(image, &parent); + if (err) + break; + + if (parent) { + TAILQ_INSERT_AFTER(head, image, parent, entry); + image = parent; + } + } while (parent); + + return err; +} + +void tapdisk_image_close_chain(struct tqh_td_image_handle *list) +{ + td_image_t *image, *next; + + tapdisk_for_each_image_safe(image, next, list) + tapdisk_image_close(image, list); +} + +/** + * Opens the image and all of its parents. + * + * @param type DISK_TYPE_* (see tapdisk-disktype.h) + * @param name /path/to/file + * @param flags + * @param _head + * @param prt_params parent type:/path/to/file (optional) + * @returns + */ +static int +__tapdisk_image_open_chain(int type, const char *name, int flags, + struct tqh_td_image_handle *_head, const char *prt_path) +{ + struct tqh_td_image_handle head = TAILQ_HEAD_INITIALIZER(head); + td_image_t *image; + int err; + + err = tapdisk_image_open(type, name, flags, &image); + if (err) + goto fail; + + TAILQ_INSERT_TAIL(&head, image, entry); + + if (unlikely(prt_path)) { + err = tapdisk_image_open(DISK_TYPE_AIO, prt_path, + flags | TD_OPEN_RDONLY, &image); + if (err) + goto fail; + + TAILQ_INSERT_TAIL(&head, image, entry); + goto done; + } + + err = tapdisk_image_open_parents(image, &head); + if (err) goto fail; - if (treq.op == TD_OP_WRITE && rdonly) - goto fail; - - if (treq.secs <= 0 || treq.sec + treq.secs > info->size) - goto fail; - +done: + TAILQ_CONCAT(_head, &head, entry); return 0; fail: - ERR(-EINVAL, "bad td request on %s (%s, %"PRIu64"): %d at %"PRIu64, - image->name, (rdonly ? "ro" : "rw"), info->size, treq.op, - treq.sec + treq.secs); + tapdisk_image_close_chain(&head); + return err; +} + +static int tapdisk_image_parse_flags(char *args, unsigned long *_flags) +{ + unsigned long flags = 0; + char *token; + + BUG_ON(!args); + + do { + token = strtok(args, ","); + if (!token) + break; + + switch (token[0]) { + case ''r'': + if (!strcmp(token, "ro")) { + flags |= TD_OPEN_RDONLY; + break; + } + goto fail; + + default: + goto fail; + } + + args = NULL; + } while (1); + + *_flags |= flags; + + return 0; + + fail: + ERR(-EINVAL, "Invalid token ''%s''", token); return -EINVAL; +} +/** + * TODO opens the image chain? + */ +static int +tapdisk_image_open_x_chain(const char *path, + struct tqh_td_image_handle *_head) +{ + struct tqh_td_image_handle head = TAILQ_HEAD_INITIALIZER(head); + td_image_t *image = NULL, *next; + regex_t _im, *im = NULL, _ws, *ws = NULL; + FILE *s; + int err; + + s = fopen(path, "r"); + if (!s) { + err = -errno; + goto fail; + } + + err = regcomp(&_ws, "^[:space:]*$", REG_NOSUB); + if (err) + goto fail; + ws = &_ws; + + err = regcomp(&_im, + "^([^:]+):([^ \t]+)([ \t]+([a-z,]+))?", + REG_EXTENDED | REG_NEWLINE); + if (err) + goto fail; + im = &_im; + + do { + char line[512], *l; + regmatch_t match[5]; + char *typename, *path, *args = NULL; + unsigned long flags; + int type; + + l = fgets(line, sizeof(line), s); + if (!l) + break; + + err = regexec(im, line, ARRAY_SIZE(match), match, 0); + if (err) { + err = regexec(ws, line, ARRAY_SIZE(match), match, 0); + if (!err) + continue; + err = -EINVAL; + goto fail; + } + + line[match[1].rm_eo] = 0; + typename = line + match[1].rm_so; + + line[match[2].rm_eo] = 0; + path = line + match[2].rm_so; + + if (match[4].rm_so >= 0) { + line[match[4].rm_eo] = 0; + args = line + match[4].rm_so; + } + + type = tapdisk_disktype_find(typename); + if (type < 0) { + err = type; + goto fail; + } + + flags = 0; + + if (args) { + err = tapdisk_image_parse_flags(args, &flags); + if (err) + goto fail; + } + + err = tapdisk_image_open(type, path, flags, &image); + if (err) + goto fail; + + TAILQ_INSERT_TAIL(&head, image, entry); + } while (1); + + if (!image) { + err = -EINVAL; + goto fail; + } + + err = tapdisk_image_open_parents(image, &head); + if (err) + goto fail; + + TAILQ_CONCAT(&head, _head, entry); + out: + if (im) + regfree(im); + if (ws) + regfree(ws); + if (s) + fclose(s); + + return err; + + fail: + tapdisk_for_each_image_safe(image, next, &head) + tapdisk_image_free(image, &head); + + goto out; } int -tapdisk_image_check_ring_request(td_image_t *image, blkif_request_t *req) +tapdisk_image_open_chain(const char *params, int flags, const char * prt_path, + struct tqh_td_image_handle *head) { - td_driver_t *driver; - td_disk_info_t *info; - int i, psize, rdonly; - uint64_t nsects, total; + const char *name; + int type, err; - driver = image->driver; - if (!driver) - return -ENODEV; + type = tapdisk_disktype_parse_params(params, &name); + if (type >= 0) { + err = __tapdisk_image_open_chain(type, name, flags, head, prt_path); + BUG_ON(TAILQ_EMPTY(head)); + return err; + } - nsects = 0; - total = 0; - info = &driver->info; + err = type; - rdonly = td_flag_test(image->flags, TD_OPEN_RDONLY); + if (err == -ENOENT && strlen(params) >= 3) { + switch (params[2]) { + case ''c'': + if (!strncmp(params, "x-chain", strlen("x-chain"))) + err = tapdisk_image_open_x_chain(name, head); + break; + } + } - if (req->operation != BLKIF_OP_READ && - req->operation != BLKIF_OP_WRITE) - goto fail; + return err; +} - if (req->operation == BLKIF_OP_WRITE && rdonly) - goto fail; +int tapdisk_image_validate_chain(struct tqh_td_image_handle *head) +{ + td_image_t *image, *parent; + int flags, err; - if (!req->nr_segments || req->nr_segments > MAX_SEGMENTS_PER_REQ) - goto fail; + INFO("VBD CHAIN:\n"); - total = 0; - psize = getpagesize(); + tapdisk_for_each_image_reverse(parent, head) { + image = TAILQ_PREV(parent, tqh_td_image_handle, entry); - for (i = 0; i < req->nr_segments; i++) { - nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1; - - if (req->seg[i].last_sect >= psize >> 9 || nsects <= 0) - goto fail; + /* + * FIXME this was: image == TAILQ_FIRST(head), not sure if the new + * check is correct + */ + if (image == NULL) + break; - total += nsects; - } + err = td_validate_parent(image, parent); + if (err) + return err; - if (req->sector_number + nsects > info->size) - goto fail; + flags = tapdisk_disk_types[image->type]->flags; + if (flags & DISK_TYPE_FILTER) { + image->driver->info = parent->driver->info; + image->info = parent->info; + } + } - return 0; + tapdisk_for_each_image(image, head) { + INFO("%s: type:%s(%d) storage:%s(%d)\n", + image->name, + tapdisk_disk_types[image->type]->name, + image->type, + tapdisk_storage_name(image->driver->storage), + image->driver->storage); + } -fail: - ERR(-EINVAL, "bad request on %s (%s, %"PRIu64"): id: %"PRIu64": %d at %"PRIu64, - image->name, (rdonly ? "ro" : "rw"), info->size, req->id, - req->operation, req->sector_number + total); - return -EINVAL; + return 0; } + +void tapdisk_image_stats(td_image_t * image, td_stats_t * st) +{ + tapdisk_stats_enter(st, ''{''); + tapdisk_stats_field(st, "name", "s", image->name); + + tapdisk_stats_field(st, "hits", "["); + tapdisk_stats_val(st, "llu", image->stats.hits.rd); + tapdisk_stats_val(st, "llu", image->stats.hits.wr); + tapdisk_stats_leave(st, '']''); + + tapdisk_stats_field(st, "fail", "["); + tapdisk_stats_val(st, "llu", image->stats.fail.rd); + tapdisk_stats_val(st, "llu", image->stats.fail.wr); + tapdisk_stats_leave(st, '']''); + + tapdisk_stats_field(st, "driver", "{"); + tapdisk_driver_stats(image->driver, st); + tapdisk_stats_leave(st, ''}''); + + tapdisk_stats_leave(st, ''}''); +} diff --git a/tools/blktap2/drivers/tapdisk-interface.c b/tools/blktap3/drivers/tapdisk-interface.c copy from tools/blktap2/drivers/tapdisk-interface.c copy to tools/blktap3/drivers/tapdisk-interface.c --- a/tools/blktap2/drivers/tapdisk-interface.c +++ b/tools/blktap3/drivers/tapdisk-interface.c @@ -25,6 +25,9 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#include <signal.h> +#include <unistd.h> #include <errno.h> #include "tapdisk.h" @@ -33,11 +36,11 @@ #include "tapdisk-driver.h" #include "tapdisk-server.h" #include "tapdisk-interface.h" +#include "tapdisk-log.h" int td_load(td_image_t *image) { - int err; td_image_t *shared; td_driver_t *driver; @@ -68,8 +71,7 @@ int if (!driver) { driver = tapdisk_driver_allocate(image->type, image->name, - image->flags, - image->storage); + image->flags); if (!driver) return -ENOMEM; @@ -86,9 +88,10 @@ int } td_flag_set(driver->state, TD_DRIVER_OPEN); - DPRINTF("opened image %s (%d users, state: 0x%08x, type: %d)\n", - driver->name, driver->refcnt + 1, - driver->state, driver->type); + DPRINTF("opened image %s (%d users, state: 0x%08x, type: %d, %s)\n", + driver->name, driver->refcnt + 1, + driver->state, driver->type, + td_flag_test(image->flags, TD_OPEN_RDONLY) ? "ro" : "rw"); } image->driver = driver; @@ -104,7 +107,7 @@ td_open(td_image_t *image) } int -td_close(td_image_t *image) +td_close(td_image_t * image) { td_driver_t *driver; @@ -153,6 +156,7 @@ td_validate_parent(td_image_t *image, td !td_flag_test(pdriver->state, TD_DRIVER_OPEN)) return -EBADF; + /* TODO wtf? */ return 0; return driver->ops->td_validate_parent(driver, pdriver, 0); } @@ -174,6 +178,11 @@ td_queue_write(td_image_t *image, td_req goto fail; } + if (!driver->ops->td_queue_write) { + err = -EOPNOTSUPP; + goto fail; + } + err = tapdisk_image_check_td_request(image, treq); if (err) goto fail; @@ -202,6 +211,11 @@ td_queue_read(td_image_t *image, td_requ goto fail; } + if (!driver->ops->td_queue_read) { + err = -EOPNOTSUPP; + goto fail; + } + err = tapdisk_image_check_td_request(image, treq); if (err) goto fail; @@ -222,7 +236,7 @@ td_forward_request(td_request_t treq) void td_complete_request(td_request_t treq, int res) { - ((td_callback_t)treq.cb)(treq, res); + treq.cb(treq, res); } void @@ -257,3 +271,13 @@ td_debug(td_image_t *image) tapdisk_driver_debug(driver); } + +__attribute__ ((noreturn)) +void td_panic(void) +{ + tlog_precious(); + raise(SIGABRT); + + /* TODO delete? */ + _exit(-1); /* not reached */ +} diff --git a/tools/blktap2/drivers/tapdisk-interface.h b/tools/blktap3/drivers/tapdisk-interface.h copy from tools/blktap2/drivers/tapdisk-interface.h copy to tools/blktap3/drivers/tapdisk-interface.h --- a/tools/blktap2/drivers/tapdisk-interface.h +++ b/tools/blktap3/drivers/tapdisk-interface.h @@ -50,5 +50,6 @@ void td_prep_read(struct tiocb *, int, c long long, td_queue_callback_t, void *); void td_prep_write(struct tiocb *, int, char *, size_t, long long, td_queue_callback_t, void *); +void td_panic(void) __attribute__ ((noreturn)); #endif
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 13 of 21] blktap3/drivers: Introduce queuing and queue management for I/O requests
This patch copies I/O request queue functionality from blktap2, with changes coming from blktap2.5. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-queue.c b/tools/blktap3/drivers/tapdisk-queue.c copy from tools/blktap2/drivers/tapdisk-queue.c copy to tools/blktap3/drivers/tapdisk-queue.c --- a/tools/blktap2/drivers/tapdisk-queue.c +++ b/tools/blktap3/drivers/tapdisk-queue.c @@ -1,5 +1,7 @@ /* - * Copyright (c) 2008, XenSource Inc. + * Copyright (c) 2007, XenSource Inc. + * Copyright (c) 2010, Citrix Systems, Inc. + * * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,8 +57,7 @@ */ #define REQUEST_ASYNC_FD ((io_context_t)1) -static inline void -queue_tiocb(struct tqueue *queue, struct tiocb *tiocb) +static inline void queue_tiocb(struct tqueue *queue, struct tiocb *tiocb) { struct iocb *iocb = &tiocb->iocb; @@ -69,14 +70,12 @@ queue_tiocb(struct tqueue *queue, struct queue->iocbs[queue->queued++] = iocb; } -static inline int -deferred_tiocbs(struct tqueue *queue) +static inline int deferred_tiocbs(struct tqueue *queue) { return (queue->deferred.head != NULL); } -static inline void -defer_tiocb(struct tqueue *queue, struct tiocb *tiocb) +static inline void defer_tiocb(struct tqueue *queue, struct tiocb *tiocb) { struct tlist *list = &queue->deferred; @@ -89,8 +88,7 @@ defer_tiocb(struct tqueue *queue, struct queue->deferrals++; } -static inline void -queue_deferred_tiocb(struct tqueue *queue) +static inline void queue_deferred_tiocb(struct tqueue *queue) { struct tlist *list = &queue->deferred; @@ -106,8 +104,7 @@ queue_deferred_tiocb(struct tqueue *queu } } -static inline void -queue_deferred_tiocbs(struct tqueue *queue) +static inline void queue_deferred_tiocbs(struct tqueue *queue) { while (!tapdisk_queue_full(queue) && deferred_tiocbs(queue)) queue_deferred_tiocb(queue); @@ -116,8 +113,8 @@ queue_deferred_tiocbs(struct tqueue *que /* * td_complete may queue more tiocbs */ -static void -complete_tiocb(struct tqueue *queue, struct tiocb *tiocb, unsigned long res) +static void complete_tiocb(struct tqueue *queue __attribute__((unused)), + struct tiocb *tiocb, unsigned long res) { int err; struct iocb *iocb = &tiocb->iocb; @@ -132,8 +129,7 @@ complete_tiocb(struct tqueue *queue, str tiocb->cb(tiocb->arg, tiocb, err); } -static int -cancel_tiocbs(struct tqueue *queue, int err) +static int cancel_tiocbs(struct tqueue *queue, int err) { int queued; struct tiocb *tiocb; @@ -160,8 +156,7 @@ cancel_tiocbs(struct tqueue *queue, int static int fail_tiocbs(struct tqueue *queue, int succeeded, int total, int err) { - ERR(err, "io_submit error: %d of %d failed", - total - succeeded, total); + ERR(err, "io_submit error: %d of %d failed", total - succeeded, total); /* take any non-submitted, merged iocbs * off of the queue, split them, and fail them */ @@ -179,8 +174,7 @@ struct rwio { struct io_event *aio_events; }; -static void -tapdisk_rwio_destroy(struct tqueue *queue) +static void tapdisk_rwio_destroy(struct tqueue *queue) { struct rwio *rwio = queue->tio_data; @@ -190,11 +184,9 @@ tapdisk_rwio_destroy(struct tqueue *queu } } -static int -tapdisk_rwio_setup(struct tqueue *queue, int size) +static int tapdisk_rwio_setup(struct tqueue *queue, int size) { struct rwio *rwio = queue->tio_data; - int err; rwio->aio_events = calloc(size, sizeof(struct io_event)); if (!rwio->aio_events) @@ -203,8 +195,7 @@ tapdisk_rwio_setup(struct tqueue *queue, return 0; } -static inline ssize_t -tapdisk_rwio_rw(const struct iocb *iocb) +static inline ssize_t tapdisk_rwio_rw(const struct iocb *iocb) { int fd = iocb->aio_fildes; char *buf = iocb->u.c.buf; @@ -213,7 +204,7 @@ tapdisk_rwio_rw(const struct iocb *iocb) ssize_t (*func)(int, void *, size_t) = (iocb->aio_lio_opcode == IO_CMD_PWRITE ? vwrite : read); - if (lseek(fd, off, SEEK_SET) == (off_t)-1) + if (lseek64(fd, off, SEEK_SET) == (off64_t) - 1) return -errno; if (atomicio(func, fd, buf, size) != size) @@ -222,8 +213,7 @@ tapdisk_rwio_rw(const struct iocb *iocb) return size; } -static int -tapdisk_rwio_submit(struct tqueue *queue) +static int tapdisk_rwio_submit(struct tqueue *queue) { struct rwio *rwio = queue->tio_data; int i, merged, split; @@ -263,8 +253,8 @@ tapdisk_rwio_submit(struct tqueue *queue static const struct tio td_tio_rwio = { .name = "rwio", .data_size = 0, - .tio_setup = NULL, - .tio_destroy = NULL, + .tio_setup = tapdisk_rwio_setup, + .tio_destroy = tapdisk_rwio_destroy, .tio_submit = tapdisk_rwio_submit }; @@ -284,18 +274,12 @@ struct lio { #define LIO_FLAG_EVENTFD (1<<0) -static int -tapdisk_lio_check_resfd(void) +static int tapdisk_lio_check_resfd(void) { -#if defined(__linux__) return tapdisk_linux_version() >= KERNEL_VERSION(2, 6, 22); -#else - return 1; -#endif } -static void -tapdisk_lio_destroy_aio(struct tqueue *queue) +static void tapdisk_lio_destroy_aio(struct tqueue *queue) { struct lio *lio = queue->tio_data; @@ -310,8 +294,7 @@ tapdisk_lio_destroy_aio(struct tqueue *q } } -static int -__lio_setup_aio_poll(struct tqueue *queue, int qlen) +static int __lio_setup_aio_poll(struct tqueue *queue, int qlen) { struct lio *lio = queue->tio_data; int err, fd; @@ -341,8 +324,7 @@ fail: return err; } -static int -__lio_setup_aio_eventfd(struct tqueue *queue, int qlen) +static int __lio_setup_aio_eventfd(struct tqueue *queue, int qlen) { struct lio *lio = queue->tio_data; int err; @@ -362,8 +344,7 @@ static int return 0; } -static int -tapdisk_lio_setup_aio(struct tqueue *queue, int qlen) +static int tapdisk_lio_setup_aio(struct tqueue *queue, int qlen) { struct lio *lio = queue->tio_data; int err; @@ -396,8 +377,7 @@ fail_rsv: } -static void -tapdisk_lio_destroy(struct tqueue *queue) +static void tapdisk_lio_destroy(struct tqueue *queue) { struct lio *lio = queue->tio_data; @@ -428,18 +408,20 @@ tapdisk_lio_set_eventfd(struct tqueue *q __io_set_eventfd(iocbs[i], lio->event_fd); } -static void -tapdisk_lio_ack_event(struct tqueue *queue) +static void tapdisk_lio_ack_event(struct tqueue *queue) { struct lio *lio = queue->tio_data; uint64_t val; - if (lio->flags & LIO_FLAG_EVENTFD) - read_exact(lio->event_fd, &val, sizeof(val)); + if (lio->flags & LIO_FLAG_EVENTFD) { + int gcc = read(lio->event_fd, &val, sizeof(val)); + if (gcc) { + }; + } } -static void -tapdisk_lio_event(event_id_t id, char mode, void *private) +static void tapdisk_lio_event(event_id_t id __attribute__((unused)), + char mode __attribute__((unused)), void *private) { struct tqueue *queue = private; struct lio *lio; @@ -470,11 +452,9 @@ tapdisk_lio_event(event_id_t id, char mo queue_deferred_tiocbs(queue); } -static int -tapdisk_lio_setup(struct tqueue *queue, int qlen) +static int tapdisk_lio_setup(struct tqueue *queue, int qlen) { struct lio *lio = queue->tio_data; - size_t sz; int err; lio->event_id = -1; @@ -486,8 +466,7 @@ tapdisk_lio_setup(struct tqueue *queue, lio->event_id tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, lio->event_fd, 0, - tapdisk_lio_event, - queue); + tapdisk_lio_event, queue); err = lio->event_id; if (err < 0) goto fail; @@ -505,8 +484,7 @@ fail: return err; } -static int -tapdisk_lio_submit(struct tqueue *queue) +static int tapdisk_lio_submit(struct tqueue *queue) { struct lio *lio = queue->tio_data; int merged, submitted, err = 0; @@ -547,8 +525,7 @@ static const struct tio td_tio_lio = { .tio_submit = tapdisk_lio_submit, }; -static void -tapdisk_queue_free_io(struct tqueue *queue) +static void tapdisk_queue_free_io(struct tqueue *queue) { if (queue->tio) { if (queue->tio->tio_destroy) @@ -562,8 +539,7 @@ tapdisk_queue_free_io(struct tqueue *que } } -static int -tapdisk_queue_init_io(struct tqueue *queue, int drv) +static int tapdisk_queue_init_io(struct tqueue *queue, int drv) { const struct tio *tio; int err; @@ -608,7 +584,7 @@ int tapdisk_init_queue(struct tqueue *queue, int size, int drv, struct tfilter *filter) { - int i, err; + int err; memset(queue, 0, sizeof(struct tqueue)); @@ -639,8 +615,7 @@ tapdisk_init_queue(struct tqueue *queue, return err; } -void -tapdisk_free_queue(struct tqueue *queue) +void tapdisk_free_queue(struct tqueue *queue) { tapdisk_queue_free_io(queue); @@ -650,16 +625,16 @@ tapdisk_free_queue(struct tqueue *queue) opio_free(&queue->opioctx); } -void -tapdisk_debug_queue(struct tqueue *queue) +void tapdisk_debug_queue(struct tqueue *queue) { struct tiocb *tiocb = queue->deferred.head; WARN("TAPDISK QUEUE:\n"); WARN("size: %d, tio: %s, queued: %d, iocbs_pending: %d, " - "tiocbs_pending: %d, tiocbs_deferred: %d, deferrals: %"PRIx64"\n", - queue->size, queue->tio->name, queue->queued, queue->iocbs_pending, - queue->tiocbs_pending, queue->tiocbs_deferred, queue->deferrals); + "tiocbs_pending: %d, tiocbs_deferred: %d, deferrals: %" PRIx64 + "\n", queue->size, queue->tio->name, queue->queued, + queue->iocbs_pending, queue->tiocbs_pending, + queue->tiocbs_deferred, queue->deferrals); if (tiocb) { WARN("deferred:\n"); @@ -667,15 +642,15 @@ tapdisk_debug_queue(struct tqueue *queue struct iocb *io = &tiocb->iocb; WARN("%s of %lu bytes at %lld\n", (io->aio_lio_opcode == IO_CMD_PWRITE ? - "write" : "read"), - io->u.c.nbytes, io->u.c.offset); + "write" : "read"), io->u.c.nbytes, io->u.c.offset); } } } void -tapdisk_prep_tiocb(struct tiocb *tiocb, int fd, int rw, char *buf, size_t size, - long long offset, td_queue_callback_t cb, void *arg) +tapdisk_prep_tiocb(struct tiocb *tiocb, int fd, int rw, char *buf, + size_t size, long long offset, td_queue_callback_t cb, + void *arg) { struct iocb *iocb = &tiocb->iocb; @@ -690,8 +665,7 @@ tapdisk_prep_tiocb(struct tiocb *tiocb, tiocb->next = NULL; } -void -tapdisk_queue_tiocb(struct tqueue *queue, struct tiocb *tiocb) +void tapdisk_queue_tiocb(struct tqueue *queue, struct tiocb *tiocb) { if (!tapdisk_queue_full(queue)) queue_tiocb(queue, tiocb); @@ -703,14 +677,12 @@ tapdisk_queue_tiocb(struct tqueue *queue /* * fail_tiocbs may queue more tiocbs */ -int -tapdisk_submit_tiocbs(struct tqueue *queue) +int tapdisk_submit_tiocbs(struct tqueue *queue) { return queue->tio->tio_submit(queue); } -int -tapdisk_submit_all_tiocbs(struct tqueue *queue) +int tapdisk_submit_all_tiocbs(struct tqueue *queue) { int submitted = 0; @@ -724,14 +696,12 @@ tapdisk_submit_all_tiocbs(struct tqueue /* * cancel_tiocbs may queue more tiocbs */ -int -tapdisk_cancel_tiocbs(struct tqueue *queue) +int tapdisk_cancel_tiocbs(struct tqueue *queue) { return cancel_tiocbs(queue, -EIO); } -int -tapdisk_cancel_all_tiocbs(struct tqueue *queue) +int tapdisk_cancel_all_tiocbs(struct tqueue *queue) { int cancelled = 0;
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 14 of 21] blktap3/drivers: Introduce core tapdisk server
This patch copies from blktap2 the core of the tapdisk process, with most changes coming from blktap2.5. Also, it replaces the minor number with type:/path/to/file in function tapdisk_server_get_vbd as there is no minor number in blktap3. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-server.c b/tools/blktap3/drivers/tapdisk-server.c copy from tools/blktap2/drivers/tapdisk-server.c copy to tools/blktap3/drivers/tapdisk-server.c --- a/tools/blktap2/drivers/tapdisk-server.c +++ b/tools/blktap3/drivers/tapdisk-server.c @@ -25,6 +25,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #include <stdio.h> #include <errno.h> #include <unistd.h> @@ -32,18 +33,32 @@ #include <sys/ioctl.h> #include <sys/signal.h> -#include "tapdisk-utils.h" +#include "tapdisk-syslog.h" #include "tapdisk-server.h" #include "tapdisk-driver.h" #include "tapdisk-interface.h" +#include "tapdisk-log.h" #define DBG(_level, _f, _a...) tlog_write(_level, _f, ##_a) #define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a) - tapdisk_server_t server; +#define TAPDISK_TIOCBS (TAPDISK_DATA_REQUESTS + 50) + +typedef struct tapdisk_server { + int run; + + struct tqh_td_vbd_handle vbds; + scheduler_t scheduler; + struct tqueue aio_queue; + char *name; + char *ident; + int facility; +} tapdisk_server_t; + +static tapdisk_server_t server; #define tapdisk_server_for_each_vbd(vbd, tmp) \ - list_for_each_entry_safe(vbd, tmp, &server.vbds, next) + TAILQ_FOREACH_SAFE(vbd, &server.vbds, entry, tmp) td_image_t * tapdisk_server_get_shared_image(td_image_t *image) @@ -56,26 +71,26 @@ tapdisk_server_get_shared_image(td_image tapdisk_server_for_each_vbd(vbd, tmpv) tapdisk_vbd_for_each_image(vbd, img, tmpi) - if (img->type == image->type && - !strcmp(img->name, image->name)) + if (img->type == image->type && !strcmp(img->name, image->name)) return img; return NULL; } -struct list_head * -tapdisk_server_get_all_vbds(void) +struct tqh_td_vbd_handle *tapdisk_server_get_all_vbds(void) { return &server.vbds; } td_vbd_t * -tapdisk_server_get_vbd(uint16_t uuid) +tapdisk_server_get_vbd(const char *params) { td_vbd_t *vbd, *tmp; + assert(params); + tapdisk_server_for_each_vbd(vbd, tmp) - if (vbd->uuid == uuid) + if (!strcmp(vbd->name, params)) return vbd; return NULL; @@ -84,14 +99,13 @@ tapdisk_server_get_vbd(uint16_t uuid) void tapdisk_server_add_vbd(td_vbd_t *vbd) { - list_add_tail(&vbd->next, &server.vbds); + TAILQ_INSERT_TAIL(&server.vbds, vbd, entry); } void tapdisk_server_remove_vbd(td_vbd_t *vbd) { - list_del(&vbd->next); - INIT_LIST_HEAD(&vbd->next); + TAILQ_REMOVE(&server.vbds, vbd, entry); tapdisk_server_check_state(); } @@ -111,13 +125,14 @@ tapdisk_server_debug(void) tapdisk_server_for_each_vbd(vbd, tmp) tapdisk_vbd_debug(vbd); - tlog_flush(); + DBG(TLOG_INFO, "debug log completed\n"); + tlog_precious(); } void tapdisk_server_check_state(void) { - if (list_empty(&server.vbds)) + if (TAILQ_EMPTY(&server.vbds)) server.run = 0; } @@ -136,7 +151,12 @@ tapdisk_server_unregister_event(event_id } void -tapdisk_server_set_max_timeout(int seconds) +tapdisk_server_mask_event(event_id_t event, int masked) +{ + return scheduler_mask_event(&server.scheduler, event, masked); +} + +void tapdisk_server_set_max_timeout(int seconds) { scheduler_set_max_timeout(&server.scheduler, seconds); } @@ -180,7 +200,6 @@ tapdisk_server_submit_tiocbs(void) static void tapdisk_server_kick_responses(void) { - int n; td_vbd_t *vbd, *tmp; tapdisk_server_for_each_vbd(vbd, tmp) @@ -196,8 +215,19 @@ tapdisk_server_check_vbds(void) tapdisk_vbd_check_state(vbd); } -static void -tapdisk_server_stop_vbds(void) +static int +tapdisk_server_recheck_vbds(void) +{ + td_vbd_t *vbd, *tmp; + int rv = 0; + + tapdisk_server_for_each_vbd(vbd, tmp) + rv += tapdisk_vbd_recheck_state(vbd); + + return rv; +} + +static void tapdisk_server_stop_vbds(void) { td_vbd_t *vbd, *tmp; @@ -218,9 +248,49 @@ tapdisk_server_close_aio(void) tapdisk_free_queue(&server.aio_queue); } -static void -tapdisk_server_close(void) +int tapdisk_server_openlog(const char *name, int options, int facility) { + server.facility = facility; + server.name = strdup(name); + server.ident = tapdisk_syslog_ident(name); + + if (!server.name || !server.ident) + return -errno; + + openlog(server.ident, options, facility); + + return 0; +} + +void tapdisk_server_closelog(void) +{ + closelog(); + + free(server.name); + server.name = NULL; + + free(server.ident); + server.ident = NULL; +} + +static int tapdisk_server_open_tlog(void) +{ + int err = 0; + + if (server.name) + err = tlog_open(server.name, server.facility, TLOG_WARN); + + return err; +} + +static void tapdisk_server_close_tlog(void) +{ + tlog_close(); +} + +static void tapdisk_server_close(void) +{ + tapdisk_server_close_tlog(); tapdisk_server_close_aio(); } @@ -238,8 +308,12 @@ tapdisk_server_iterate(void) DBG(TLOG_WARN, "server wait returned %d\n", ret); tapdisk_server_check_vbds(); - tapdisk_server_submit_tiocbs(); - tapdisk_server_kick_responses(); + do { + tapdisk_server_submit_tiocbs(); + tapdisk_server_kick_responses(); + + ret = tapdisk_server_recheck_vbds(); + } while (ret); } static void @@ -272,6 +346,7 @@ tapdisk_server_signal_handler(int signal break; case SIGUSR1: + DBG(TLOG_INFO, "debugging on signal %d\n", signal); tapdisk_server_debug(); break; } @@ -281,7 +356,7 @@ int tapdisk_server_init(void) { memset(&server, 0, sizeof(server)); - INIT_LIST_HEAD(&server.vbds); + TAILQ_INIT(&server.vbds); scheduler_initialize(&server.scheduler); @@ -297,17 +372,23 @@ tapdisk_server_complete(void) if (err) goto fail; + err = tapdisk_server_open_tlog(); + if (err) + goto fail; + server.run = 1; return 0; fail: + tapdisk_server_close_tlog(); tapdisk_server_close_aio(); return err; } int -tapdisk_server_initialize(void) +tapdisk_server_initialize(const char *read __attribute__((unused)), + const char *write __attribute__((unused))) { int err;
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 15 of 21] blktap3/drivers: Introduce stats reporting
This patch copies from blktap2.5 stats reporting functionality. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap3/drivers/tapdisk-stats.c b/tools/blktap3/drivers/tapdisk-stats.c new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-stats.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2010, Citrix Systems, Inc. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdarg.h> + +#include "tapdisk.h" +#include "tapdisk-stats.h" + +/* TODO already defined elsewhere */ +#define BUG_ON(_cond) if (_cond) { td_panic(); } + +static void __stats_vsprintf(td_stats_t * st, const char *fmt, va_list ap) +{ + size_t size = st->buf + st->size - st->pos; + st->pos += vsnprintf(st->pos, size, fmt, ap); +} + +static void __printf(2, 3) +__stats_sprintf(td_stats_t * st, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __stats_vsprintf(st, fmt, ap); + va_end(ap); +} + +static void __stats_enter(td_stats_t * st) +{ + st->depth++; + BUG_ON(st->depth > TD_STATS_MAX_DEPTH); + st->n_elem[st->depth] = 0; +} + +static void __stats_leave(td_stats_t * st) +{ + st->depth--; +} + +static void __stats_next(td_stats_t * st) +{ + int n_elem; + + n_elem = st->n_elem[st->depth]; + if (n_elem > 0) + __stats_sprintf(st, ", "); + st->n_elem[st->depth]++; +} + +static void __tapdisk_stats_enter(td_stats_t * st, char t) +{ + __stats_sprintf(st, "%c ", t); + __stats_enter(st); +} + +void tapdisk_stats_enter(td_stats_t * st, char t) +{ + __stats_next(st); + __tapdisk_stats_enter(st, t); +} + +void tapdisk_stats_leave(td_stats_t * st, char t) +{ + __stats_leave(st); + __stats_sprintf(st, " %c", t); +} + +static void +tapdisk_stats_vval(td_stats_t * st, const char *conv, va_list ap) +{ + char t = conv[0], fmt[32]; + + __stats_next(st); + + switch (t) { + case ''s'': + __stats_vsprintf(st, "\"%s\"", ap); + break; + + default: + sprintf(fmt, "%%%s", conv); + __stats_vsprintf(st, fmt, ap); + break; + } +} + +void tapdisk_stats_val(td_stats_t * st, const char *conv, ...) +{ + va_list ap; + + va_start(ap, conv); + tapdisk_stats_vval(st, conv, ap); + va_end(ap); +} + +void +tapdisk_stats_field(td_stats_t * st, const char *key, const char *conv, + ...) +{ + va_list ap; + int n_elem; + char t; + + n_elem = st->n_elem[st->depth]++; + if (n_elem > 0) + __stats_sprintf(st, ", "); + + __stats_sprintf(st, "\"%s\": ", key); + + if (!conv) { + __stats_sprintf(st, "null"); + return; + } + + t = conv[0]; + switch (t) { + case ''['': + case ''{'': + __tapdisk_stats_enter(st, t); + break; + default: + va_start(ap, conv); + __stats_enter(st); + tapdisk_stats_vval(st, conv, ap); + __stats_leave(st); + va_end(ap); + } +}
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 16 of 21] blktap3/drivers: Introduces back-end storage type discovery
This patch copies from blktap2.5 functionality that retrieves the type of the back-end storage. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap3/drivers/tapdisk-storage.c b/tools/blktap3/drivers/tapdisk-storage.c new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-storage.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2010, Citrix Systems, Inc. + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/vfs.h> + +#include "tapdisk-storage.h" + +#ifndef NFS_SUPER_MAGIC +#define NFS_SUPER_MAGIC 0x6969 +#endif + +static int __tapdisk_fs_storage_type(const char *rpath) +{ + struct statfs fst; + int type, err; + + err = statfs(rpath, &fst); + if (err) + return -errno; + + switch (fst.f_type) { + case NFS_SUPER_MAGIC: + type = TAPDISK_STORAGE_TYPE_NFS; + break; + default: + type = TAPDISK_STORAGE_TYPE_EXT; + break; + } + + return type; +} + +static int +__tapdisk_blk_storage_type(const char *rpath __attribute__((unused))) +{ + return TAPDISK_STORAGE_TYPE_LVM; +} + +int tapdisk_storage_type(const char *path) +{ + char rpath[PATH_MAX], *p; + struct stat st; + int err, rv; + + p = realpath(path, rpath); + if (!p) + return -errno; + + err = stat(rpath, &st); + if (err) + return -errno; + + switch (st.st_mode & S_IFMT) { + case S_IFBLK: + rv = __tapdisk_blk_storage_type(rpath); + break; + case S_IFREG: + rv = __tapdisk_fs_storage_type(rpath); + break; + default: + rv = -EINVAL; + break; + } + + return rv; +} + +const char *tapdisk_storage_name(int type) +{ + switch (type) { + case TAPDISK_STORAGE_TYPE_NFS: + return "nfs"; + case TAPDISK_STORAGE_TYPE_EXT: + return "ext"; + case TAPDISK_STORAGE_TYPE_LVM: + return "lvm"; + case -1: + return "n/a"; + default: + return "<unknown-type>"; + } +} diff --git a/tools/blktap3/drivers/tapdisk-storage.h b/tools/blktap3/drivers/tapdisk-storage.h new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk-storage.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2008, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _TAPDISK_STORAGE_H_ +#define _TAPDISK_STORAGE_H_ + +#define TAPDISK_STORAGE_TYPE_NFS 1 +#define TAPDISK_STORAGE_TYPE_EXT 2 +#define TAPDISK_STORAGE_TYPE_LVM 3 + +int tapdisk_storage_type(const char *path); +const char *tapdisk_storage_name(int type); + +#endif
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 17 of 21] blktap3/drivers: Introduce representation and management of Virtual Block Devices in tapdisk
This patch copies the representation and management of Virtual Block Devices in tapdisk from blktap2. Most changes come from blktap2.5. Also, it contains the following blktap3-related changes: * Don''t set the minor number in function tapdisk_vbd_create as it has been removed. * Function tapdisk_vbd_initialize now uses the type:/path/to/file instead of the minor number. * Function signal_enospc uses the /path/to/file instead of the minor number. * Function tapdisk_vbd_open_vdi uses the type:/path/to/file instead of the minor number, and /path/to/file instead of the parent minor number. * Remove functions tapdisk_vbd_detach, tapdisk_vbd_attach, and tapdisk_vbd_open. Singed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-vbd.c b/tools/blktap3/drivers/tapdisk-vbd.c copy from tools/blktap2/drivers/tapdisk-vbd.c copy to tools/blktap3/drivers/tapdisk-vbd.c --- a/tools/blktap2/drivers/tapdisk-vbd.c +++ b/tools/blktap3/drivers/tapdisk-vbd.c @@ -1,5 +1,7 @@ -/* +/* * Copyright (c) 2008, XenSource Inc. + * Copyright (c) 2010, Citrix Systems, Inc. + * * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,6 +27,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #include <stdio.h> #include <errno.h> #include <fcntl.h> @@ -34,22 +37,22 @@ #include <libgen.h> #include <sys/mman.h> #include <sys/ioctl.h> -#ifdef MEMSHR -#include <memshr.h> -#endif +#include "libvhd.h" #include "tapdisk-image.h" #include "tapdisk-driver.h" #include "tapdisk-server.h" +#include "tapdisk-vbd.h" +#include "tapdisk-disktype.h" #include "tapdisk-interface.h" -#include "tapdisk-disktype.h" -#include "tapdisk-vbd.h" -#include "blktap2.h" +#include "tapdisk-stats.h" +#include "sring/td-stats.h" +#include "tapdisk-storage.h" #define DBG(_level, _f, _a...) tlog_write(_level, _f, ##_a) #define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a) -#if 1 +#if 1 #define ASSERT(p) \ do { \ if (!(p)) { \ @@ -60,43 +63,30 @@ } while (0) #else #define ASSERT(p) ((void)0) -#endif +#endif #define TD_VBD_EIO_RETRIES 10 #define TD_VBD_EIO_SLEEP 1 #define TD_VBD_WATCHDOG_TIMEOUT 10 -static void tapdisk_vbd_ring_event(event_id_t, char, void *); -static void tapdisk_vbd_callback(void *, blkif_response_t *); +static void tapdisk_vbd_complete_vbd_request(td_vbd_t *, + td_vbd_request_t *); +static int tapdisk_vbd_queue_ready(td_vbd_t *); +static void tapdisk_vbd_check_queue_state(td_vbd_t *); -/* +/* * initialization */ -static inline void -tapdisk_vbd_initialize_vreq(td_vbd_request_t *vreq) +static void tapdisk_vbd_mark_progress(td_vbd_t * vbd) { - memset(vreq, 0, sizeof(td_vbd_request_t)); - INIT_LIST_HEAD(&vreq->next); + gettimeofday(&vbd->ts, NULL); } -void -tapdisk_vbd_free(td_vbd_t *vbd) -{ - if (vbd) { - tapdisk_vbd_free_stack(vbd); - list_del_init(&vbd->next); - free(vbd->name); - free(vbd); - } -} - -td_vbd_t* -tapdisk_vbd_create(uint16_t uuid) +td_vbd_t *tapdisk_vbd_create(void) { td_vbd_t *vbd; - int i; vbd = calloc(1, sizeof(td_vbd_t)); if (!vbd) { @@ -104,103 +94,62 @@ tapdisk_vbd_create(uint16_t uuid) return NULL; } - vbd->uuid = uuid; - vbd->minor = -1; - vbd->ring.fd = -1; - - /* default blktap ring completion */ - vbd->callback = tapdisk_vbd_callback; - vbd->argument = vbd; - -#ifdef MEMSHR - memshr_vbd_initialize(); -#endif - - INIT_LIST_HEAD(&vbd->driver_stack); - INIT_LIST_HEAD(&vbd->images); - INIT_LIST_HEAD(&vbd->new_requests); - INIT_LIST_HEAD(&vbd->pending_requests); - INIT_LIST_HEAD(&vbd->failed_requests); - INIT_LIST_HEAD(&vbd->completed_requests); - INIT_LIST_HEAD(&vbd->next); - gettimeofday(&vbd->ts, NULL); - - for (i = 0; i < MAX_REQUESTS; i++) - tapdisk_vbd_initialize_vreq(vbd->request_list + i); + TAILQ_INIT(&vbd->images); + TAILQ_INIT(&vbd->new_requests); + TAILQ_INIT(&vbd->pending_requests); + TAILQ_INIT(&vbd->failed_requests); + TAILQ_INIT(&vbd->completed_requests); + tapdisk_vbd_mark_progress(vbd); return vbd; } -int -tapdisk_vbd_initialize(uint16_t uuid) +int tapdisk_vbd_initialize(int rfd __attribute__((unused)), + int wfd __attribute__((unused)), const char * params) { td_vbd_t *vbd; - vbd = tapdisk_server_get_vbd(uuid); + assert(params); + + vbd = tapdisk_server_get_vbd(params); if (vbd) { - EPRINTF("duplicate vbds! %u\n", uuid); + EPRINTF("duplicate vbds %s\n", params); return -EEXIST; } - vbd = tapdisk_vbd_create(uuid); + vbd = tapdisk_vbd_create(); tapdisk_server_add_vbd(vbd); return 0; } -void -tapdisk_vbd_set_callback(td_vbd_t *vbd, td_vbd_cb_t callback, void *argument) +static int tapdisk_vbd_validate_chain(td_vbd_t * vbd) { - vbd->callback = callback; - vbd->argument = argument; + return tapdisk_image_validate_chain(&vbd->images); } -static int -tapdisk_vbd_validate_chain(td_vbd_t *vbd) +void tapdisk_vbd_close_vdi(td_vbd_t * vbd) { - int err; - td_image_t *image, *parent, *tmp; + tapdisk_image_close_chain(&vbd->images); - DPRINTF("VBD CHAIN:\n"); - - tapdisk_vbd_for_each_image(vbd, image, tmp) { - DPRINTF("%s: %d\n", image->name, image->type); - - if (tapdisk_vbd_is_last_image(vbd, image)) - break; - - parent = tapdisk_vbd_next_image(image); - err = td_validate_parent(image, parent); - if (err) - return err; + if (vbd->secondary && vbd->secondary_mode != TD_VBD_SECONDARY_MIRROR) { + tapdisk_image_close(vbd->secondary, NULL); + vbd->secondary = NULL; } - return 0; + if (vbd->retired) { + tapdisk_image_close(vbd->retired, NULL); + vbd->retired = NULL; + } + + td_flag_set(vbd->state, TD_VBD_CLOSED); } -void -tapdisk_vbd_close_vdi(td_vbd_t *vbd) +static int tapdisk_vbd_add_block_cache(td_vbd_t * vbd) { - td_image_t *image, *tmp; - - tapdisk_vbd_for_each_image(vbd, image, tmp) { - td_close(image); - tapdisk_image_free(image); - } - - INIT_LIST_HEAD(&vbd->images); - td_flag_set(vbd->state, TD_VBD_CLOSED); - - tapdisk_vbd_free_stack(vbd); -} - -static int -tapdisk_vbd_add_block_cache(td_vbd_t *vbd) -{ - int err; - td_driver_t *driver; td_image_t *cache, *image, *target, *tmp; + int err; target = NULL; @@ -215,10 +164,7 @@ tapdisk_vbd_add_block_cache(td_vbd_t *vb return 0; cache = tapdisk_image_allocate(target->name, - DISK_TYPE_BLOCK_CACHE, - target->storage, - target->flags, - target->private); + DISK_TYPE_BLOCK_CACHE, target->flags); if (!cache) return -ENOMEM; @@ -234,9 +180,7 @@ tapdisk_vbd_add_block_cache(td_vbd_t *vb } cache->driver = tapdisk_driver_allocate(cache->type, - cache->name, - cache->flags, - cache->storage); + cache->name, cache->flags); if (!cache->driver) { err = -ENOMEM; goto fail; @@ -251,468 +195,297 @@ tapdisk_vbd_add_block_cache(td_vbd_t *vb fail: /* give up */ - tapdisk_image_free(target); + tapdisk_image_free(target, NULL); return err; done: /* insert cache before image */ - list_add(&cache->next, target->next.prev); + TAILQ_INSERT_BEFORE(target, cache, entry); return 0; } -static int -tapdisk_vbd_add_dirty_log(td_vbd_t *vbd) +static int tapdisk_vbd_add_local_cache(td_vbd_t * vbd) { + td_image_t *cache, *parent; int err; - td_driver_t *driver; - td_image_t *log, *parent; - - driver = NULL; - log = NULL; parent = tapdisk_vbd_first_image(vbd); + if (tapdisk_vbd_is_last_image(vbd, parent)) { + DPRINTF("Single-image chain, nothing to cache"); + return 0; + } - log = tapdisk_image_allocate(parent->name, - DISK_TYPE_LOG, - parent->storage, - parent->flags, - vbd); - if (!log) + cache = tapdisk_image_allocate(parent->name, + DISK_TYPE_LCACHE, parent->flags); + + if (!cache) return -ENOMEM; - driver = tapdisk_driver_allocate(log->type, - log->name, - log->flags, - log->storage); - if (!driver) { + /* try to load existing cache */ + err = td_load(cache); + if (!err) + goto done; + + cache->driver = tapdisk_driver_allocate(cache->type, + cache->name, cache->flags); + if (!cache->driver) { err = -ENOMEM; goto fail; } - driver->info = parent->driver->info; - log->driver = driver; + cache->driver->info = parent->driver->info; - err = td_open(log); + /* try to open new cache */ + err = td_open(cache); + if (!err) + goto done; + +fail: + tapdisk_image_free(cache, NULL); + return err; + + done: + /* insert cache right above leaf image */ + TAILQ_INSERT_AFTER(&vbd->images, parent, cache, entry); + + DPRINTF("Added local_cache driver\n"); + return 0; +} + +/** + * Adds a secondary VBD to this VBD. + */ +static int tapdisk_vbd_add_secondary(td_vbd_t * vbd) +{ + td_image_t *leaf, *second = NULL; + const char *path; + int type, err; + + DPRINTF("Adding secondary image: %s\n", vbd->secondary_name); + + type = tapdisk_disktype_parse_params(vbd->secondary_name, &path); + if (type < 0) + return type; + + leaf = tapdisk_vbd_first_image(vbd); + if (!leaf) { + err = -EINVAL; + goto fail; + } + + err = tapdisk_image_open(type, path, leaf->flags, &second); + if (err) + goto fail; + + if (second->info.size != leaf->info.size) { + EPRINTF("Secondary image size %" PRIu64 " != image size %" PRIu64 + "\n", second->info.size, leaf->info.size); + err = -EINVAL; + goto fail; + } + + vbd->secondary = second; + leaf->flags |= TD_IGNORE_ENOSPC; + if (td_flag_test(vbd->flags, TD_OPEN_STANDBY)) { + DPRINTF("In standby mode\n"); + vbd->secondary_mode = TD_VBD_SECONDARY_STANDBY; + } else { + DPRINTF("In mirror mode\n"); + vbd->secondary_mode = TD_VBD_SECONDARY_MIRROR; + /* we actually need this image to also be part of the chain, + * since it may already contain data */ + TAILQ_INSERT_AFTER(&vbd->images, leaf, second, entry); + } + + DPRINTF("Added secondary image\n"); + return 0; + + fail: + if (second) + tapdisk_image_close(second, NULL); + return err; + } + +static void signal_enospc(td_vbd_t * vbd) +{ + int fd, err; + char *fn; + + /* TODO Some external tool is probably using this, figure out which and + * update it. */ + assert(vbd->name); + err = asprintf(&fn, BLKTAP3_ENOSPC_SIGNAL_FILE "%s", vbd->name); + if (err == -1) { + EPRINTF("Failed to signal ENOSPC condition\n"); + return; + } + + fd = open(fn, O_WRONLY | O_CREAT | O_NONBLOCK, 0666); + if (fd == -1) + EPRINTF("Failed to open file to signal ENOSPC condition\n"); + else + close(fd); + + free(fn); +} + +/* XXX This is commented out in blktap2.5. */ +#if 0 +static int tapdisk_vbd_open_index(td_vbd_t * vbd) +{ + int err; + char *path; + td_flag_t flags; + td_image_t *last, *image; + + last = tapdisk_vbd_last_image(vbd); + err = asprintf(&path, "%s.bat", last->name); + if (err == -1) + return -errno; + + err = access(path, R_OK); + if (err == -1) { + free(path); + return -errno; + } + + flags = vbd->flags | TD_OPEN_RDONLY | TD_OPEN_SHAREABLE; + image = tapdisk_image_allocate(path, DISK_TYPE_VINDEX, flags); + if (!image) { + err = -ENOMEM; + goto fail; + } + + err = td_open(image); if (err) goto fail; - list_add(&log->next, &vbd->images); + tapdisk_vbd_add_image(vbd, image); return 0; fail: - tapdisk_image_free(log); + if (image) + tapdisk_image_free(image); + free(path); return err; } +#endif -static int -tapdisk_vbd_open_level(td_vbd_t *vbd, struct list_head *head, - const char *params, int driver_type, - td_disk_info_t *driver_info, td_flag_t flags) +static int tapdisk_vbd_add_dirty_log(td_vbd_t * vbd) { - const char *name; - int type, err; - td_image_t *image; - td_disk_id_t id; - td_driver_t *driver; + int err; + td_driver_t *driver; + td_image_t *log, *parent; - name = params; - id.name = NULL; - type = driver_type; - INIT_LIST_HEAD(head); + driver = NULL; + log = NULL; - for (;;) { - err = -ENOMEM; - image = tapdisk_image_allocate(name, type, - vbd->storage, flags, vbd); + parent = tapdisk_vbd_first_image(vbd); - free(id.name); + log = tapdisk_image_allocate(parent->name, + DISK_TYPE_LOG, parent->flags); + if (!log) + return -ENOMEM; - if (!image) - goto out; - - - /* this breaks if a driver modifies its info within a layer */ - err = __td_open(image, driver_info); - if (err) - goto out; - - /* TODO: non-sink drivers that don''t care about their child - * currently return EINVAL. Could return TD_PARENT_OK or - * TD_ANY_PARENT */ - - err = td_get_parent_id(image, &id); - if (err && (err != TD_NO_PARENT && err != -EINVAL)) { - td_close(image); - goto out; - } - - /* add this image to the end of the list */ - list_add_tail(&image->next, head); - image = NULL; - - /* if the image does not have a parent we return the - * list of images generated by this level of the stack */ - if (err == TD_NO_PARENT || err == -EINVAL) { - err = 0; - goto out; - } - - name = id.name; - type = id.drivertype; - - flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE); - } - -out: - if (err) { - if (image) { - td_close(image); - tapdisk_image_free(image); - } - while (!list_empty(head)) { - image = list_entry(&head->next, td_image_t, next); - td_close(image); - tapdisk_image_free(image); - } - } - - return err; + driver = tapdisk_driver_allocate(log->type, log->name, log->flags); + if (!driver) { + err = -ENOMEM; + goto fail; } -static int -__tapdisk_vbd_open_vdi(td_vbd_t *vbd, td_flag_t extra_flags) -{ - int err; - td_flag_t flags; - td_image_t *tmp; - td_vbd_driver_info_t *driver_info; - struct list_head *images; - td_disk_info_t *parent_info = NULL; + driver->info = parent->driver->info; + log->driver = driver; - if (list_empty(&vbd->driver_stack)) - return -ENOENT; - - flags = (vbd->flags & ~TD_OPEN_SHAREABLE) | extra_flags; - - /* loop on each user specified driver. - * NOTE: driver_info is in reverse order. That is, the first - * item is the ''parent'' or ''sink'' driver */ - list_for_each_entry(driver_info, &vbd->driver_stack, next) { - LIST_HEAD(images); - - err = tapdisk_vbd_open_level(vbd, &images, - driver_info->params, - driver_info->type, - parent_info, flags); - if (err) - goto fail; - - /* after each loop, - * append the created stack to the result stack */ - list_splice(&images, &vbd->images); - - /* set the parent_info to the first diskinfo on the stack */ - tmp = tapdisk_vbd_first_image(vbd); - parent_info = &tmp->info; - } - - if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) { - err = tapdisk_vbd_add_dirty_log(vbd); - if (err) - goto fail; - } - - if (td_flag_test(vbd->flags, TD_OPEN_ADD_CACHE)) { - err = tapdisk_vbd_add_block_cache(vbd); - if (err) - goto fail; - } - - err = tapdisk_vbd_validate_chain(vbd); + err = td_open(log); if (err) goto fail; - td_flag_clear(vbd->state, TD_VBD_CLOSED); - - return 0; - -fail: - tapdisk_vbd_close_vdi(vbd); - return err; -} - -/* this populates a vbd type based on path */ -int -tapdisk_vbd_parse_stack(td_vbd_t *vbd, const char *path) -{ - int err; - char *params, *driver_str; - td_vbd_driver_info_t *driver; - - err = tapdisk_namedup(¶ms, path); - if (err) - return err; - - /* tokenize params based on pipe ''|'' */ - driver_str = strtok(params, "|"); - while (driver_str != NULL) { - const char *path; - int type; - - /* parse driver info and add to vbd */ - driver = calloc(1, sizeof(td_vbd_driver_info_t)); - if (!driver) { - PERROR("malloc"); - err = -errno; - goto out; - } - INIT_LIST_HEAD(&driver->next); - - err = tapdisk_parse_disk_type(driver_str, &path, &type); - if (err) { - free(driver); - goto out; - } - - driver->type = type; - driver->params = strdup(path); - if (!driver->params) { - err = -ENOMEM; - free(driver); - goto out; - } - - /* build the list backwards as the last driver will be the - * first driver to open in the stack */ - list_add(&driver->next, &vbd->driver_stack); - - /* get next driver string */ - driver_str = strtok(NULL, "|"); - } - -out: - free(params); - if (err) - tapdisk_vbd_free_stack(vbd); - - return err; -} - -void -tapdisk_vbd_free_stack(td_vbd_t *vbd) -{ - td_vbd_driver_info_t *driver; - - while (!list_empty(&vbd->driver_stack)) { - driver = list_entry(vbd->driver_stack.next, - td_vbd_driver_info_t, next); - list_del(&driver->next); - free(driver->params); - free(driver); - } -} - -/* NOTE: driver type, etc. must be set */ -int -tapdisk_vbd_open_stack(td_vbd_t *vbd, uint16_t storage, td_flag_t flags) -{ - int i, err; - - vbd->flags = flags; - vbd->storage = storage; - - for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { - err = __tapdisk_vbd_open_vdi(vbd, 0); - if (err != -EIO) - break; - - sleep(TD_VBD_EIO_SLEEP); - } - if (err) - goto fail; - + tapdisk_vbd_add_image(vbd, log); return 0; fail: + tapdisk_image_free(log, NULL); return err; } int -tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *path, - uint16_t drivertype, uint16_t storage, td_flag_t flags) +tapdisk_vbd_open_vdi(td_vbd_t * vbd, const char *params, td_flag_t flags, + const char * prt_path) { - int i, err; - const struct tap_disk *ops; + char *tmp = vbd->name; + int err; - ops = tapdisk_disk_drivers[drivertype]; - if (!ops) - return -EINVAL; - DPRINTF("Loaded %s driver for vbd %u %s 0x%08x\n", - ops->disk_type, vbd->uuid, path, flags); + if (!TAILQ_EMPTY(&vbd->images)) { + err = -EBUSY; + goto fail; + } - err = tapdisk_namedup(&vbd->name, path); - if (err) - return err; + if (!params && !vbd->name) { + err = -EINVAL; + goto fail; + } - vbd->flags = flags; - vbd->storage = storage; + if (params) { + vbd->name = strdup(params); + if (!vbd->name) { + err = -errno; + goto fail; + } + } - for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { - err = __tapdisk_vbd_open_vdi(vbd, 0); - if (err != -EIO) - break; + err = tapdisk_image_open_chain(vbd->name, flags, prt_path, &vbd->images); + if (err) + goto fail; + assert(!TAILQ_EMPTY(&vbd->images)); - sleep(TD_VBD_EIO_SLEEP); - } - if (err) - goto fail; + td_flag_clear(vbd->state, TD_VBD_CLOSED); + vbd->flags = flags; - return 0; + if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) { + err = tapdisk_vbd_add_dirty_log(vbd); + if (err) + goto fail; + } -fail: - free(vbd->name); - vbd->name = NULL; - return err; -} - -static int -tapdisk_vbd_register_event_watches(td_vbd_t *vbd) -{ - event_id_t id; - - id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, - vbd->ring.fd, 0, - tapdisk_vbd_ring_event, vbd); - if (id < 0) - return id; - - vbd->ring_event_id = id; - - return 0; -} - -static void -tapdisk_vbd_unregister_events(td_vbd_t *vbd) -{ - if (vbd->ring_event_id) - tapdisk_server_unregister_event(vbd->ring_event_id); -} - -static int -tapdisk_vbd_map_device(td_vbd_t *vbd, const char *devname) -{ - - int err, psize; - td_ring_t *ring; - - ring = &vbd->ring; - psize = getpagesize(); - - ring->fd = open(devname, O_RDWR); - if (ring->fd == -1) { - err = -errno; - EPRINTF("failed to open %s: %d\n", devname, err); + if (td_flag_test(vbd->flags, TD_OPEN_ADD_CACHE)) { + err = tapdisk_vbd_add_block_cache(vbd); + if (err) goto fail; } - ring->mem = mmap(0, psize * BLKTAP_MMAP_REGION_SIZE, - PROT_READ | PROT_WRITE, MAP_SHARED, ring->fd, 0); - if (ring->mem == MAP_FAILED) { - err = -errno; - EPRINTF("failed to mmap %s: %d\n", devname, err); + if (td_flag_test(vbd->flags, TD_OPEN_LOCAL_CACHE)) { + err = tapdisk_vbd_add_local_cache(vbd); + if (err) goto fail; } - ring->sring = (blkif_sring_t *)((unsigned long)ring->mem); - BACK_RING_INIT(&ring->fe_ring, ring->sring, psize); + err = tapdisk_vbd_validate_chain(vbd); + if (err) + goto fail; - ring->vstart - (unsigned long)ring->mem + (BLKTAP_RING_PAGES * psize); + if (td_flag_test(vbd->flags, TD_OPEN_SECONDARY)) { + err = tapdisk_vbd_add_secondary(vbd); + if (err) + goto fail; + } - ioctl(ring->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE); + if (tmp != vbd->name) + free(tmp); - return 0; + return err; fail: - if (ring->mem && ring->mem != MAP_FAILED) - munmap(ring->mem, psize * BLKTAP_MMAP_REGION_SIZE); - if (ring->fd != -1) - close(ring->fd); - ring->fd = -1; - ring->mem = NULL; - return err; -} + if (vbd->name != tmp) { + free(vbd->name); + vbd->name = tmp; + } -static int -tapdisk_vbd_unmap_device(td_vbd_t *vbd) -{ - int psize; + if (!TAILQ_EMPTY(&vbd->images)) + tapdisk_image_close_chain(&vbd->images); - psize = getpagesize(); + vbd->flags = 0; - if (vbd->ring.fd != -1) - close(vbd->ring.fd); - if (vbd->ring.mem > 0) - munmap(vbd->ring.mem, psize * BLKTAP_MMAP_REGION_SIZE); - - return 0; -} - -void -tapdisk_vbd_detach(td_vbd_t *vbd) -{ - tapdisk_vbd_unregister_events(vbd); - - tapdisk_vbd_unmap_device(vbd); - vbd->minor = -1; -} - - -int -tapdisk_vbd_attach(td_vbd_t *vbd, const char *devname, int minor) -{ - int err; - - err = tapdisk_vbd_map_device(vbd, devname); - if (err) - goto fail; - - err = tapdisk_vbd_register_event_watches(vbd); - if (err) - goto fail; - - vbd->minor = minor; - - return 0; - -fail: - tapdisk_vbd_detach(vbd); - - return err; -} - -int -tapdisk_vbd_open(td_vbd_t *vbd, const char *name, uint16_t type, - uint16_t storage, int minor, const char *ring, td_flag_t flags) -{ - int err; - - err = tapdisk_vbd_open_stack(vbd, storage, flags); - if (err) - goto out; - - err = tapdisk_vbd_attach(vbd, ring, minor); - if (err) - goto out; - - return 0; - -out: - tapdisk_vbd_detach(vbd); - tapdisk_vbd_close_vdi(vbd); - free(vbd->name); - vbd->name = NULL; - return err; + return err; } static void @@ -745,44 +518,39 @@ tapdisk_vbd_queue_count(td_vbd_t *vbd, i *completed = c; } -static int -tapdisk_vbd_shutdown(td_vbd_t *vbd) +static int tapdisk_vbd_shutdown(td_vbd_t * vbd) { int new, pending, failed, completed; - if (!list_empty(&vbd->pending_requests)) + if (!TAILQ_EMPTY(&vbd->pending_requests)) return -EAGAIN; - tapdisk_vbd_kick(vbd); tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed); DPRINTF("%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, " "failed: 0x%02x, completed: 0x%02x\n", vbd->name, vbd->state, new, pending, failed, completed); - DPRINTF("last activity: %010ld.%06lld, errors: 0x%04"PRIx64", " + DPRINTF("last activity: %010ld.%06ld, errors: 0x%04" PRIx64 ", " "retries: 0x%04"PRIx64", received: 0x%08"PRIx64", " "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n", - vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec, + vbd->ts.tv_sec, vbd->ts.tv_usec, vbd->errors, vbd->retries, vbd->received, vbd->returned, vbd->kicked); tapdisk_vbd_close_vdi(vbd); - tapdisk_vbd_detach(vbd); tapdisk_server_remove_vbd(vbd); - tapdisk_vbd_free(vbd); - - tlog_print_errors(); + free(vbd->name); + free(vbd); return 0; } -int -tapdisk_vbd_close(td_vbd_t *vbd) +int tapdisk_vbd_close(td_vbd_t * vbd) { /* * don''t close if any requests are pending in the aio layer */ - if (!list_empty(&vbd->pending_requests)) + if (!TAILQ_EMPTY(&vbd->pending_requests)) goto fail; /* @@ -790,9 +558,9 @@ tapdisk_vbd_close(td_vbd_t *vbd) * requests, try to complete them before closing. */ if (tapdisk_vbd_queue_ready(vbd) && - (!list_empty(&vbd->new_requests) || - !list_empty(&vbd->failed_requests) || - !list_empty(&vbd->completed_requests))) + (!TAILQ_EMPTY(&vbd->new_requests) || + !TAILQ_EMPTY(&vbd->failed_requests) || + !TAILQ_EMPTY(&vbd->completed_requests))) goto fail; return tapdisk_vbd_shutdown(vbd); @@ -807,8 +575,7 @@ fail: * control operations */ -void -tapdisk_vbd_debug(td_vbd_t *vbd) +void tapdisk_vbd_debug(td_vbd_t * vbd) { td_image_t *image, *tmp; int new, pending, failed, completed; @@ -816,49 +583,41 @@ tapdisk_vbd_debug(td_vbd_t *vbd) tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed); DBG(TLOG_WARN, "%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, " - "failed: 0x%02x, completed: 0x%02x, last activity: %010ld.%06lld, " - "errors: 0x%04"PRIx64", retries: 0x%04"PRIx64", received: 0x%08"PRIx64", " - "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n", + "failed: 0x%02x, completed: 0x%02x, last activity: %010ld.%06ld, " + "errors: 0x%04" PRIx64 ", retries: 0x%04" PRIx64 ", " + "received: 0x%08" PRIx64 ", returned: 0x%08" PRIx64 ", " + "kicked: 0x%08" PRIx64 "\n", vbd->name, vbd->state, new, pending, failed, completed, - vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec, - vbd->errors, vbd->retries, + vbd->ts.tv_sec, vbd->ts.tv_usec, vbd->errors, vbd->retries, vbd->received, vbd->returned, vbd->kicked); tapdisk_vbd_for_each_image(vbd, image, tmp) td_debug(image); } -static void -tapdisk_vbd_drop_log(td_vbd_t *vbd) +static void tapdisk_vbd_drop_log(td_vbd_t * vbd) { if (td_flag_test(vbd->state, TD_VBD_LOG_DROPPED)) return; tapdisk_vbd_debug(vbd); - tlog_flush(); + tlog_precious(); td_flag_set(vbd->state, TD_VBD_LOG_DROPPED); } -int -tapdisk_vbd_get_image_info(td_vbd_t *vbd, image_t *img) +int tapdisk_vbd_get_disk_info(td_vbd_t * vbd, td_disk_info_t * info) { - td_image_t *image; + if (TAILQ_EMPTY(&vbd->images)) + { + EPRINTF("no images\n"); + return -EINVAL; + } - memset(img, 0, sizeof(image_t)); - - if (list_empty(&vbd->images)) - return -EINVAL; - - image = tapdisk_vbd_first_image(vbd); - img->size = image->info.size; - img->secsize = image->info.sector_size; - img->info = image->info.info; - + *info = tapdisk_vbd_first_image(vbd)->info; return 0; } -int -tapdisk_vbd_queue_ready(td_vbd_t *vbd) +static int tapdisk_vbd_queue_ready(td_vbd_t * vbd) { return (!td_flag_test(vbd->state, TD_VBD_DEAD) && !td_flag_test(vbd->state, TD_VBD_CLOSED) && @@ -866,22 +625,15 @@ tapdisk_vbd_queue_ready(td_vbd_t *vbd) !td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED)); } -int -tapdisk_vbd_retry_needed(td_vbd_t *vbd) +int tapdisk_vbd_retry_needed(td_vbd_t * vbd) { - return td_flag_test(vbd->state, TD_VBD_RETRY_NEEDED); + return !(TAILQ_EMPTY(&vbd->failed_requests) && + TAILQ_EMPTY(&vbd->new_requests)); } -int -tapdisk_vbd_lock(td_vbd_t *vbd) +int tapdisk_vbd_quiesce_queue(td_vbd_t * vbd) { - return 0; -} - -int -tapdisk_vbd_quiesce_queue(td_vbd_t *vbd) -{ - if (!list_empty(&vbd->pending_requests)) { + if (!TAILQ_EMPTY(&vbd->pending_requests)) { td_flag_set(vbd->state, TD_VBD_QUIESCE_REQUESTED); return -EAGAIN; } @@ -891,24 +643,24 @@ tapdisk_vbd_quiesce_queue(td_vbd_t *vbd) return 0; } -int -tapdisk_vbd_start_queue(td_vbd_t *vbd) +int tapdisk_vbd_start_queue(td_vbd_t * vbd) { td_flag_clear(vbd->state, TD_VBD_QUIESCED); td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED); + tapdisk_vbd_mark_progress(vbd); return 0; } -int -tapdisk_vbd_kill_queue(td_vbd_t *vbd) +int tapdisk_vbd_kill_queue(td_vbd_t * vbd) { tapdisk_vbd_quiesce_queue(vbd); td_flag_set(vbd->state, TD_VBD_DEAD); return 0; } -static int -tapdisk_vbd_open_image(td_vbd_t *vbd, td_image_t *image) +/* XXX This is commented out in blktap2.5. */ +#if 0 +static int tapdisk_vbd_open_image(td_vbd_t * vbd, td_image_t * image) { int err; td_image_t *parent; @@ -928,33 +680,14 @@ tapdisk_vbd_open_image(td_vbd_t *vbd, td return 0; } +#endif -static int -tapdisk_vbd_close_and_reopen_image(td_vbd_t *vbd, td_image_t *image) -{ - int i, err; - - td_close(image); - - for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { - err = tapdisk_vbd_open_image(vbd, image); - if (err != -EIO) - break; - - sleep(TD_VBD_EIO_SLEEP); - } - - if (err) - td_flag_set(vbd->state, TD_VBD_CLOSED); - - return err; -} - -int -tapdisk_vbd_pause(td_vbd_t *vbd) +int tapdisk_vbd_pause(td_vbd_t * vbd) { int err; + DBG(TLOG_DBG, "pause requested\n"); + td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED); err = tapdisk_vbd_quiesce_queue(vbd); @@ -963,34 +696,29 @@ tapdisk_vbd_pause(td_vbd_t *vbd) tapdisk_vbd_close_vdi(vbd); + DBG(TLOG_DBG, "pause completed\n"); + td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED); td_flag_set(vbd->state, TD_VBD_PAUSED); return 0; } -int -tapdisk_vbd_resume(td_vbd_t *vbd, const char *path, uint16_t drivertype) +int tapdisk_vbd_resume(td_vbd_t * vbd, const char *name) { int i, err; + DBG(TLOG_DBG, "resume requested\n"); + if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) { EPRINTF("resume request for unpaused vbd %s\n", vbd->name); return -EINVAL; } - if (path) { - free(vbd->name); - vbd->name = strdup(path); - if (!vbd->name) { - EPRINTF("copying new vbd %s name failed\n", path); - return -EINVAL; - } - } - for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { - err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT); - if (err != -EIO) + err + tapdisk_vbd_open_vdi(vbd, name, vbd->flags | TD_OPEN_STRICT, NULL); + if (!err) break; sleep(TD_VBD_EIO_SLEEP); @@ -999,101 +727,68 @@ tapdisk_vbd_resume(td_vbd_t *vbd, const if (err) return err; + DBG(TLOG_DBG, "resume completed\n"); + tapdisk_vbd_start_queue(vbd); td_flag_clear(vbd->state, TD_VBD_PAUSED); td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED); tapdisk_vbd_check_state(vbd); + DBG(TLOG_DBG, "state checked\n"); + return 0; } -int -tapdisk_vbd_kick(td_vbd_t *vbd) +static int +tapdisk_vbd_request_ttl(td_vbd_request_t * vreq, const struct timeval *now) { - int n; - td_ring_t *ring; - - tapdisk_vbd_check_state(vbd); - - ring = &vbd->ring; - if (!ring->sring) - return 0; - - n = (ring->fe_ring.rsp_prod_pvt - ring->fe_ring.sring->rsp_prod); - if (!n) - return 0; - - vbd->kicked += n; - RING_PUSH_RESPONSES(&ring->fe_ring); - ioctl(ring->fd, BLKTAP_IOCTL_KICK_FE, 0); - - DBG(TLOG_INFO, "kicking %d: rec: 0x%08"PRIx64", ret: 0x%08"PRIx64", kicked: " - "0x%08"PRIx64"\n", n, vbd->received, vbd->returned, vbd->kicked); - - return n; + struct timeval delta; + timersub(now, &vreq->ts, &delta); + return TD_VBD_REQUEST_TIMEOUT - delta.tv_sec; } -static inline void -tapdisk_vbd_write_response_to_ring(td_vbd_t *vbd, blkif_response_t *rsp) +static int +__tapdisk_vbd_request_timeout(td_vbd_request_t * vreq, + const struct timeval *now) { - td_ring_t *ring; - blkif_response_t *rspp; + int timeout; - ring = &vbd->ring; - rspp = RING_GET_RESPONSE(&ring->fe_ring, ring->fe_ring.rsp_prod_pvt); - memcpy(rspp, rsp, sizeof(blkif_response_t)); - ring->fe_ring.rsp_prod_pvt++; + timeout = tapdisk_vbd_request_ttl(vreq, now) < 0; + if (timeout) + ERR(vreq->error, + "req %s timed out, retried %d times\n", + vreq->name, vreq->num_retries); + + return timeout; } -static void -tapdisk_vbd_callback(void *arg, blkif_response_t *rsp) +static int tapdisk_vbd_request_timeout(td_vbd_request_t * vreq) { - td_vbd_t *vbd = (td_vbd_t *)arg; - tapdisk_vbd_write_response_to_ring(vbd, rsp); + struct timeval now; + gettimeofday(&now, NULL); + return __tapdisk_vbd_request_timeout(vreq, &now); } -static void -tapdisk_vbd_make_response(td_vbd_t *vbd, td_vbd_request_t *vreq) -{ - blkif_request_t tmp; - blkif_response_t *rsp; - - tmp = vreq->req; - rsp = (blkif_response_t *)&vreq->req; - - rsp->id = tmp.id; - rsp->operation = tmp.operation; - rsp->status = vreq->status; - - DBG(TLOG_DBG, "writing req %d, sec 0x%08"PRIx64", res %d to ring\n", - (int)tmp.id, tmp.sector_number, vreq->status); - - if (rsp->status != BLKIF_RSP_OKAY) - ERR(EIO, "returning BLKIF_RSP %d", rsp->status); - - vbd->returned++; - vbd->callback(vbd->argument, rsp); -} - -void -tapdisk_vbd_check_state(td_vbd_t *vbd) +static void tapdisk_vbd_check_queue_state(td_vbd_t * vbd) { td_vbd_request_t *vreq, *tmp; + struct timeval now; + gettimeofday(&now, NULL); tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) - if (vreq->num_retries >= TD_VBD_MAX_RETRIES) + if (__tapdisk_vbd_request_timeout(vreq, &now)) tapdisk_vbd_complete_vbd_request(vbd, vreq); - if (!list_empty(&vbd->new_requests) || - !list_empty(&vbd->failed_requests)) + if (!TAILQ_EMPTY(&vbd->new_requests) || + !TAILQ_EMPTY(&vbd->failed_requests)) tapdisk_vbd_issue_requests(vbd); - tapdisk_vbd_for_each_request(vreq, tmp, &vbd->completed_requests) { - tapdisk_vbd_make_response(vbd, vreq); - list_del(&vreq->next); - tapdisk_vbd_initialize_vreq(vreq); } +void tapdisk_vbd_check_state(td_vbd_t * vbd) +{ + tapdisk_vbd_check_queue_state(vbd); + if (td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED)) tapdisk_vbd_quiesce_queue(vbd); @@ -1104,21 +799,21 @@ tapdisk_vbd_check_state(td_vbd_t *vbd) tapdisk_vbd_close(vbd); } -void -tapdisk_vbd_check_progress(td_vbd_t *vbd) +void tapdisk_vbd_check_progress(td_vbd_t * vbd) { - int diff; - struct timeval now; + time_t diff; + struct timeval now, delta; - if (list_empty(&vbd->pending_requests)) + if (TAILQ_EMPTY(&vbd->pending_requests)) return; gettimeofday(&now, NULL); - diff = now.tv_sec - vbd->ts.tv_sec; + timersub(&now, &vbd->ts, &delta); + diff = delta.tv_sec; - if (diff >= TD_VBD_WATCHDOG_TIMEOUT) { + if (diff >= TD_VBD_WATCHDOG_TIMEOUT && tapdisk_vbd_queue_ready(vbd)) { DBG(TLOG_WARN, "%s: watchdog timeout: pending requests " - "idle for %d seconds\n", vbd->name, diff); + "idle for %ld seconds\n", vbd->name, diff); tapdisk_vbd_drop_log(vbd); return; } @@ -1130,106 +825,90 @@ tapdisk_vbd_check_progress(td_vbd_t *vbd * request submission */ -static int -tapdisk_vbd_check_queue(td_vbd_t *vbd) +static int tapdisk_vbd_check_queue(td_vbd_t * vbd) { - int err; - td_image_t *image; - - if (list_empty(&vbd->images)) + if (TAILQ_EMPTY(&vbd->images)) return -ENOSYS; if (!tapdisk_vbd_queue_ready(vbd)) return -EAGAIN; - if (!vbd->reopened) { - if (td_flag_test(vbd->state, TD_VBD_LOCKING)) { - err = tapdisk_vbd_lock(vbd); - if (err) - return err; + return 0; } - image = tapdisk_vbd_first_image(vbd); - td_flag_set(image->flags, TD_OPEN_STRICT); +static int +tapdisk_vbd_request_should_retry(td_vbd_t * vbd, td_vbd_request_t * vreq) +{ + if (td_flag_test(vbd->state, TD_VBD_DEAD) || + td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED)) + return 0; - if (tapdisk_vbd_close_and_reopen_image(vbd, image)) - EPRINTF("reopening disks failed\n"); - else { - DPRINTF("reopening disks succeeded\n"); - vbd->reopened = 1; - } + switch (abs(vreq->error)) { + case EPERM: + case ENOSYS: + case ESTALE: + case ENOSPC: + return 0; } + if (tapdisk_vbd_request_timeout(vreq)) return 0; + + return 1; } -void +static void tapdisk_vbd_complete_vbd_request(td_vbd_t *vbd, td_vbd_request_t *vreq) { if (!vreq->submitting && !vreq->secs_pending) { - if (vreq->status == BLKIF_RSP_ERROR && - vreq->num_retries < TD_VBD_MAX_RETRIES && - !td_flag_test(vbd->state, TD_VBD_DEAD) && - !td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED)) + if (vreq->error && tapdisk_vbd_request_should_retry(vbd, vreq)) tapdisk_vbd_move_request(vreq, &vbd->failed_requests); else tapdisk_vbd_move_request(vreq, &vbd->completed_requests); } } -static uint64_t -tapdisk_vbd_breq_get_sector(blkif_request_t *breq, td_request_t treq) +static void +FIXME_maybe_count_enospc_redirect(td_vbd_t * vbd, td_request_t treq) { - int seg, nsects; - uint64_t sector_nr = breq->sector_number; - - for(seg=0; seg < treq.sidx; seg++) { - nsects = breq->seg[seg].last_sect - breq->seg[seg].first_sect + 1; - sector_nr += nsects; - } - - return sector_nr; + int write = treq.op == TD_OP_WRITE; + if (write && + treq.image == tapdisk_vbd_first_image(vbd) && + vbd->FIXME_enospc_redirect_count_enabled) + vbd->FIXME_enospc_redirect_count += treq.secs; } static void __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq, td_request_t treq, int res) { - int err; td_image_t *image = treq.image; + int err; err = (res <= 0 ? res : -res); vbd->secs_pending -= treq.secs; vreq->secs_pending -= treq.secs; - vreq->blocked = treq.blocked; + if (err != -EBUSY) { + int write = treq.op == TD_OP_WRITE; + td_sector_count_add(&image->stats.hits, treq.secs, write); + if (err) + td_sector_count_add(&image->stats.fail, treq.secs, write); + + FIXME_maybe_count_enospc_redirect(vbd, treq); + } if (err) { - vreq->status = BLKIF_RSP_ERROR; - vreq->error = (vreq->error ? : err); if (err != -EBUSY) { - vbd->errors++; - ERR(err, "req %"PRIu64": %s 0x%04x secs to " - "0x%08"PRIx64, vreq->req.id, + if (!vreq->error && err != vreq->prev_error) + tlog_drv_error(image->driver, err, + "req %s: %s 0x%04x secs @ 0x%08" PRIx64, + vreq->name, (treq.op == TD_OP_WRITE ? "write" : "read"), treq.secs, treq.sec); + vbd->errors++; } - } else { -#ifdef MEMSHR - if (treq.op == TD_OP_READ - && td_flag_test(image->flags, TD_OPEN_RDONLY)) { - share_tuple_t hnd = treq.memshr_hnd; - uint16_t uid = image->memshr_id; - blkif_request_t *breq = &vreq->req; - uint64_t sec = tapdisk_vbd_breq_get_sector(breq, treq); - int secs = breq->seg[treq.sidx].last_sect - - breq->seg[treq.sidx].first_sect + 1; - - if (hnd.handle != 0) - memshr_vbd_complete_ro_request(hnd, uid, - sec, secs); - } -#endif + vreq->error = (vreq->error ? : err); } tapdisk_vbd_complete_vbd_request(vbd, vreq); @@ -1242,7 +921,7 @@ static void td_image_t *parent; td_vbd_request_t *vreq; - vreq = (td_vbd_request_t *)treq.private; + vreq = treq.vreq; gettimeofday(&vreq->last_try, NULL); vreq->submitting++; @@ -1282,27 +961,6 @@ static void break; case TD_OP_READ: -#ifdef MEMSHR - if(td_flag_test(parent->flags, TD_OPEN_RDONLY)) { - int ret, seg = treq.sidx; - blkif_request_t *breq = &vreq->req; - - ret = memshr_vbd_issue_ro_request(treq.buf, - breq->seg[seg].gref, - parent->memshr_id, - treq.sec, - treq.secs, - &treq.memshr_hnd); - if(ret == 0) { - /* Reset memshr handle. This''ll prevent - * memshr_vbd_complete_ro_request being called - */ - treq.memshr_hnd.handle = 0; - td_complete_request(treq, 0); - } else - td_queue_read(parent, treq); - } else -#endif td_queue_read(parent, treq); break; } @@ -1313,114 +971,144 @@ done: tapdisk_vbd_complete_vbd_request(vbd, vreq); } -void -tapdisk_vbd_forward_request(td_request_t treq) +void tapdisk_vbd_forward_request(td_request_t treq) { td_vbd_t *vbd; td_image_t *image; td_vbd_request_t *vreq; image = treq.image; - vbd = (td_vbd_t *)image->private; - vreq = (td_vbd_request_t *)treq.private; + vreq = treq.vreq; + vbd = vreq->vbd; - gettimeofday(&vbd->ts, NULL); + tapdisk_vbd_mark_progress(vbd); if (tapdisk_vbd_queue_ready(vbd)) __tapdisk_vbd_reissue_td_request(vbd, image, treq); else - __tapdisk_vbd_complete_td_request(vbd, vreq, treq, -EIO); + __tapdisk_vbd_complete_td_request(vbd, vreq, treq, -EBUSY); } -static void -tapdisk_vbd_complete_td_request(td_request_t treq, int res) +void tapdisk_vbd_complete_td_request(td_request_t treq, int res) { td_vbd_t *vbd; - td_image_t *image; + td_image_t *image, *leaf; td_vbd_request_t *vreq; image = treq.image; - vbd = (td_vbd_t *)image->private; - vreq = (td_vbd_request_t *)treq.private; + vreq = treq.vreq; + vbd = vreq->vbd; - gettimeofday(&vbd->ts, NULL); - DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" " + tapdisk_vbd_mark_progress(vbd); + + if (abs(res) == ENOSPC && td_flag_test(image->flags, TD_IGNORE_ENOSPC)) { + res = 0; + leaf = tapdisk_vbd_first_image(vbd); + if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR) { + DPRINTF("ENOSPC: disabling mirroring\n"); + TAILQ_REMOVE(&vbd->images, leaf, entry); + vbd->retired = leaf; + } else if (vbd->secondary_mode == TD_VBD_SECONDARY_STANDBY) { + DPRINTF("ENOSPC: failing over to secondary image\n"); + TAILQ_INSERT_BEFORE(leaf, vbd->secondary, entry); + vbd->FIXME_enospc_redirect_count_enabled = 1; + } + if (vbd->secondary_mode != TD_VBD_SECONDARY_DISABLED) { + vbd->secondary = NULL; + vbd->secondary_mode = TD_VBD_SECONDARY_DISABLED; + signal_enospc(vbd); + } + } + + DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08" PRIx64 "secs 0x%04x buf %p op %d res %d\n", image->name, - (int)treq.id, treq.sidx, treq.sec, treq.secs, - treq.buf, (int)vreq->req.operation, res); + vreq->name, treq.sidx, treq.sec, treq.secs, + treq.buf, vreq->op, res); __tapdisk_vbd_complete_td_request(vbd, vreq, treq, res); } +static inline void queue_mirror_req(td_vbd_t * vbd, td_request_t clone) +{ + clone.image = vbd->secondary; + td_queue_write(vbd->secondary, clone); +} + static int tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq) { - char *page; - td_ring_t *ring; td_image_t *image; td_request_t treq; - uint64_t sector_nr; - blkif_request_t *req; - int i, err, id, nsects; + td_sector_t sec; + int i, err; - req = &vreq->req; - id = req->id; - ring = &vbd->ring; - sector_nr = req->sector_number; + sec = vreq->sec; image = tapdisk_vbd_first_image(vbd); vreq->submitting = 1; - gettimeofday(&vbd->ts, NULL); - gettimeofday(&vreq->last_try, NULL); + + tapdisk_vbd_mark_progress(vbd); + vreq->last_try = vbd->ts; + tapdisk_vbd_move_request(vreq, &vbd->pending_requests); -#if 0 err = tapdisk_vbd_check_queue(vbd); - if (err) + if (err) { + vreq->error = err; goto fail; -#endif + } - err = tapdisk_image_check_ring_request(image, req); - if (err) + err = tapdisk_image_check_request(image, vreq); + if (err) { + vreq->error = err; goto fail; + } - for (i = 0; i < req->nr_segments; i++) { - nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1; - page = (char *)MMAP_VADDR(ring->vstart, - (unsigned long)req->id, i); - page += (req->seg[i].first_sect << SECTOR_SHIFT); + for (i = 0; i < vreq->iovcnt; i++) { + struct td_iovec *iov = &vreq->iov[i]; - treq.id = id; treq.sidx = i; - treq.blocked = 0; - treq.buf = page; - treq.sec = sector_nr; - treq.secs = nsects; + treq.buf = iov->base; + treq.sec = sec; + treq.secs = iov->secs; treq.image = image; treq.cb = tapdisk_vbd_complete_td_request; treq.cb_data = NULL; - treq.private = vreq; + treq.vreq = vreq; - DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" secs 0x%04x " - "buf %p op %d\n", image->name, id, i, treq.sec, treq.secs, - treq.buf, (int)req->operation); - vreq->secs_pending += nsects; - vbd->secs_pending += nsects; + vreq->secs_pending += iov->secs; + vbd->secs_pending += iov->secs; + if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR && + vreq->op == TD_OP_WRITE) { + vreq->secs_pending += iov->secs; + vbd->secs_pending += iov->secs; + } - switch (req->operation) { - case BLKIF_OP_WRITE: + switch (vreq->op) { + case TD_OP_WRITE: treq.op = TD_OP_WRITE; - td_queue_write(image, treq); + /* it''s important to queue the mirror request before queuing + * the main one. If the main image runs into ENOSPC, the + * mirroring could be disabled before td_queue_write returns, + * so if the mirror request was queued after (which would then + * not happen), we''d lose that write and cause the process to + * hang with unacknowledged writes */ + if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR) + queue_mirror_req(vbd, treq); + td_queue_write(treq.image, treq); break; - case BLKIF_OP_READ: + case TD_OP_READ: treq.op = TD_OP_READ; - td_queue_read(image, treq); + td_queue_read(treq.image, treq); break; } - sector_nr += nsects; + DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08" PRIx64 " secs 0x%04x " + "buf %p op %d\n", image->name, vreq->name, i, treq.sec, + treq.secs, treq.buf, vreq->op); + sec += iov->secs; } err = 0; @@ -1435,12 +1123,17 @@ out: return err; fail: - vreq->status = BLKIF_RSP_ERROR; + vreq->error = err; goto out; } static int -tapdisk_vbd_reissue_failed_requests(td_vbd_t *vbd) +tapdisk_vbd_request_completed(td_vbd_t * vbd, td_vbd_request_t * vreq) +{ + return vreq->list_head == &vbd->completed_requests; +} + +static int tapdisk_vbd_reissue_failed_requests(td_vbd_t * vbd) { int err; struct timeval now; @@ -1453,93 +1146,109 @@ tapdisk_vbd_reissue_failed_requests(td_v if (vreq->secs_pending) continue; - if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED)) - goto fail; + if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED)) { + tapdisk_vbd_complete_vbd_request(vbd, vreq); + continue; + } if (vreq->error != -EBUSY && now.tv_sec - vreq->last_try.tv_sec < TD_VBD_RETRY_INTERVAL) continue; - if (vreq->num_retries >= TD_VBD_MAX_RETRIES) { - fail: - DBG(TLOG_INFO, "req %"PRIu64"retried %d times\n", - vreq->req.id, vreq->num_retries); - tapdisk_vbd_complete_vbd_request(vbd, vreq); - continue; - } - - /* - * never fail due to too many retries if we are blocked on a - * dependency - */ - if (vreq->blocked) { - vreq->blocked = 0; - } else { vbd->retries++; vreq->num_retries++; - } + + vreq->prev_error = vreq->error; vreq->error = 0; - vreq->status = BLKIF_RSP_OKAY; - DBG(TLOG_DBG, "retry #%d of req %"PRIu64", " - "sec 0x%08"PRIx64", nr_segs: %d\n", vreq->num_retries, - vreq->req.id, vreq->req.sector_number, - vreq->req.nr_segments); + + DBG(TLOG_DBG, "retry #%d of req %s, " + "sec 0x%08" PRIx64 ", iovcnt: %d\n", vreq->num_retries, + vreq->name, vreq->sec, vreq->iovcnt); err = tapdisk_vbd_issue_request(vbd, vreq); - if (err) + /* + * if this request failed, but was not completed, + * we''ll back off for a while. + */ + if (err && !tapdisk_vbd_request_completed(vbd, vreq)) break; } - if (list_empty(&vbd->failed_requests)) - td_flag_clear(vbd->state, TD_VBD_RETRY_NEEDED); - else - td_flag_set(vbd->state, TD_VBD_RETRY_NEEDED); - - return err; + return 0; } -static int -tapdisk_vbd_issue_new_requests(td_vbd_t *vbd) +static void +tapdisk_vbd_count_new_request(td_vbd_t * vbd, td_vbd_request_t * vreq) +{ + struct td_iovec *iov; + int write; + + write = vreq->op == TD_OP_WRITE; + + for (iov = &vreq->iov[0]; iov < &vreq->iov[vreq->iovcnt]; iov++) + td_sector_count_add(&vbd->secs, iov->secs, write); +} + +static int tapdisk_vbd_issue_new_requests(td_vbd_t * vbd) { int err; td_vbd_request_t *vreq, *tmp; tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) { err = tapdisk_vbd_issue_request(vbd, vreq); - if (err) + /* + * if this request failed, but was not completed, + * we''ll back off for a while. + */ + if (err && !tapdisk_vbd_request_completed(vbd, vreq)) return err; + + tapdisk_vbd_count_new_request(vbd, vreq); } return 0; } -static int -tapdisk_vbd_kill_requests(td_vbd_t *vbd) +int tapdisk_vbd_recheck_state(td_vbd_t * vbd) +{ + if (TAILQ_EMPTY(&vbd->new_requests)) + return 0; + + if (td_flag_test(vbd->state, TD_VBD_QUIESCED) || + td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED)) + return 0; + + tapdisk_vbd_issue_new_requests(vbd); + + return 1; +} + +static int tapdisk_vbd_kill_requests(td_vbd_t * vbd) { td_vbd_request_t *vreq, *tmp; tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) { - vreq->status = BLKIF_RSP_ERROR; + vreq->error = -ESHUTDOWN; tapdisk_vbd_move_request(vreq, &vbd->completed_requests); } tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) { - vreq->status = BLKIF_RSP_ERROR; + vreq->error = -ESHUTDOWN; tapdisk_vbd_move_request(vreq, &vbd->completed_requests); } return 0; } -int -tapdisk_vbd_issue_requests(td_vbd_t *vbd) +int tapdisk_vbd_issue_requests(td_vbd_t * vbd) { int err; if (td_flag_test(vbd->state, TD_VBD_DEAD)) return tapdisk_vbd_kill_requests(vbd); - if (!tapdisk_vbd_queue_ready(vbd)) + if (td_flag_test(vbd->state, TD_VBD_QUIESCED) || + td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED)) return -EAGAIN; err = tapdisk_vbd_reissue_failed_requests(vbd); @@ -1549,175 +1258,71 @@ tapdisk_vbd_issue_requests(td_vbd_t *vbd return tapdisk_vbd_issue_new_requests(vbd); } -static void -tapdisk_vbd_pull_ring_requests(td_vbd_t *vbd) +int tapdisk_vbd_queue_request(td_vbd_t * vbd, td_vbd_request_t * vreq) { - int idx; - RING_IDX rp, rc; - td_ring_t *ring; - blkif_request_t *req; - td_vbd_request_t *vreq; - - ring = &vbd->ring; - if (!ring->sring) - return; - - rp = ring->fe_ring.sring->req_prod; - xen_rmb(); - - for (rc = ring->fe_ring.req_cons; rc != rp; rc++) { - req = RING_GET_REQUEST(&ring->fe_ring, rc); - ++ring->fe_ring.req_cons; - - idx = req->id; - vreq = &vbd->request_list[idx]; - - ASSERT(list_empty(&vreq->next)); - ASSERT(vreq->secs_pending == 0); - - memcpy(&vreq->req, req, sizeof(blkif_request_t)); - vbd->received++; + gettimeofday(&vreq->ts, NULL); vreq->vbd = vbd; - tapdisk_vbd_move_request(vreq, &vbd->new_requests); + vreq->list_head = &vbd->new_requests; + TAILQ_INSERT_TAIL(&vbd->new_requests, vreq, next); + vbd->received++; - DBG(TLOG_DBG, "%s: request %d \n", vbd->name, idx); + return 0; +} + +void tapdisk_vbd_kick(td_vbd_t * vbd) +{ + struct tqh_td_vbd_request *list = &vbd->completed_requests; + td_vbd_request_t *vreq, *prev, *next; + + vbd->kicked++; + + while (!TAILQ_EMPTY(list)) { + prev = TAILQ_FIRST(list); + TAILQ_REMOVE(list, prev, next); + + tapdisk_vbd_for_each_request(vreq, next, list) { + if (vreq->token == prev->token) { + + prev->cb(prev, prev->error, prev->token, 0); + vbd->returned++; + + TAILQ_REMOVE(list, vreq, next); + prev = vreq; + } + } + + prev->cb(prev, prev->error, prev->token, 1); + vbd->returned++; } } -static int -tapdisk_vbd_pause_ring(td_vbd_t *vbd) +void tapdisk_vbd_stats(td_vbd_t * vbd, td_stats_t * st) { - int err; + td_image_t *image, *next; - if (td_flag_test(vbd->state, TD_VBD_PAUSED)) - return 0; + tapdisk_stats_enter(st, ''{''); + tapdisk_stats_field(st, "name", "s", vbd->name); - td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED); + tapdisk_stats_field(st, "secs", "["); + tapdisk_stats_val(st, "llu", vbd->secs.rd); + tapdisk_stats_val(st, "llu", vbd->secs.wr); + tapdisk_stats_leave(st, '']''); - err = tapdisk_vbd_quiesce_queue(vbd); - if (err) { - EPRINTF("%s: ring pause request on active queue\n", vbd->name); - return err; + tapdisk_stats_field(st, "images", "["); + tapdisk_vbd_for_each_image(vbd, image, next) + tapdisk_image_stats(image, st); + tapdisk_stats_leave(st, '']''); + + if (vbd->tap) { + tapdisk_stats_field(st, "tap", "{"); + tapdisk_xenblkif_stats(vbd->tap, st); + tapdisk_stats_leave(st, ''}''); } - tapdisk_vbd_close_vdi(vbd); + tapdisk_stats_field(st, + "FIXME_enospc_redirect_count", + "llu", vbd->FIXME_enospc_redirect_count); - err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_PAUSE, 0); - if (err) - EPRINTF("%s: pause ioctl failed: %d\n", vbd->name, errno); - else { - td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED); - td_flag_set(vbd->state, TD_VBD_PAUSED); - } - - return err; + tapdisk_stats_leave(st, ''}''); } - -static int -tapdisk_vbd_resume_ring(td_vbd_t *vbd) -{ - int i, err, type; - char message[BLKTAP2_MAX_MESSAGE_LEN]; - const char *path; - - memset(message, 0, sizeof(message)); - - if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) { - EPRINTF("%s: resume message for unpaused vbd\n", vbd->name); - return -EINVAL; - } - - err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_REOPEN, &message); - if (err) { - EPRINTF("%s: resume ioctl failed: %d\n", vbd->name, errno); - return err; - } - - err = tapdisk_parse_disk_type(message, &path, &type); - if (err) { - EPRINTF("%s: invalid resume string %s\n", vbd->name, message); - goto out; - } - - free(vbd->name); - vbd->name = strdup(path); - if (!vbd->name) { - EPRINTF("resume malloc failed\n"); - err = -ENOMEM; - goto out; - } - - tapdisk_vbd_start_queue(vbd); - - for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { - err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT); - if (err != -EIO) - break; - - sleep(TD_VBD_EIO_SLEEP); - } - -out: - if (!err) { - image_t image; - struct blktap2_params params; - - memset(¶ms, 0, sizeof(params)); - tapdisk_vbd_get_image_info(vbd, &image); - - params.sector_size = image.secsize; - params.capacity = image.size; - snprintf(params.name, sizeof(params.name) - 1, "%s", message); - - ioctl(vbd->ring.fd, BLKTAP2_IOCTL_SET_PARAMS, ¶ms); - td_flag_clear(vbd->state, TD_VBD_PAUSED); - } - - ioctl(vbd->ring.fd, BLKTAP2_IOCTL_RESUME, err); - return err; -} - -static int -tapdisk_vbd_check_ring_message(td_vbd_t *vbd) -{ - if (!vbd->ring.sring) - return -EINVAL; - - switch (vbd->ring.sring->private.tapif_user.msg) { - case 0: - return 0; - - case BLKTAP2_RING_MESSAGE_PAUSE: - return tapdisk_vbd_pause_ring(vbd); - - case BLKTAP2_RING_MESSAGE_RESUME: - return tapdisk_vbd_resume_ring(vbd); - - case BLKTAP2_RING_MESSAGE_CLOSE: - return tapdisk_vbd_close(vbd); - - default: - return -EINVAL; - } -} - -static void -tapdisk_vbd_ring_event(event_id_t id, char mode, void *private) -{ - td_vbd_t *vbd; - - vbd = (td_vbd_t *)private; - - tapdisk_vbd_pull_ring_requests(vbd); - tapdisk_vbd_issue_requests(vbd); - - /* vbd may be destroyed after this call */ - tapdisk_vbd_check_ring_message(vbd); -} - -td_image_t * -tapdisk_vbd_first_image(td_vbd_t *vbd) -{ - return list_entry(vbd->images.next, td_image_t, next); -}
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 18 of 21] blktap3/drivers: Introduce tapdisk utility functions
This patch copies assorted utility functions from blktap2. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/drivers/tapdisk-utils.c b/tools/blktap3/drivers/tapdisk-utils.c copy from tools/blktap2/drivers/tapdisk-utils.c copy to tools/blktap3/drivers/tapdisk-utils.c --- a/tools/blktap2/drivers/tapdisk-utils.c +++ b/tools/blktap3/drivers/tapdisk-utils.c @@ -25,10 +25,13 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#include <stdlib.h> #include <errno.h> #include <stdio.h> #include <string.h> #include <unistd.h> +#include <linux/fs.h> #include <sys/stat.h> #include <sys/mman.h> #include <sys/ioctl.h> @@ -38,31 +41,102 @@ #include <linux/version.h> #endif -#include "blk.h" +#define SYSLOG_NAMES +#include <syslog.h> + +#include <time.h> + #include "tapdisk.h" -#include "blktaplib.h" #include "tapdisk-log.h" #include "tapdisk-utils.h" +#include "tapdisk-syslog.h" -void -tapdisk_start_logging(const char *name) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +static int tapdisk_syslog_facility_by_name(const char *name) { - static char buf[128]; + int facility; + CODE *c; - snprintf(buf, sizeof(buf), "%s[%d]", name, getpid()); - openlog(buf, LOG_CONS | LOG_ODELAY, LOG_DAEMON); - open_tlog("/tmp/tapdisk.log", (64 << 10), TLOG_WARN, 0); + facility = -1; + + for (c = facilitynames; c->c_name != NULL; ++c) + if (!strcmp(c->c_name, name)) { + facility = c->c_val; + break; } -void -tapdisk_stop_logging(void) -{ - closelog(); - close_tlog(); + return facility; } -int -tapdisk_set_resource_limits(void) +int tapdisk_syslog_facility(const char *arg) +{ + int facility; + char *endptr; + + if (arg) { + facility = strtol(arg, &endptr, 0); + if (*endptr == 0) + return facility; + + facility = tapdisk_syslog_facility_by_name(arg); + if (facility >= 0) + return facility; +} + + return LOG_DAEMON; +} + +char *tapdisk_syslog_ident(const char *name) +{ + char ident[TD_SYSLOG_IDENT_MAX + 1]; + size_t size, len; + pid_t pid; + + pid = getpid(); + size = sizeof(ident); + len = 0; + + len = snprintf(NULL, 0, "[%d]", pid); + len = snprintf(ident, size - len, "%s", name); + len += snprintf(ident + len, size - len, "[%d]", pid); + + return strdup(ident); +} + +size_t +tapdisk_syslog_strftime(char *buf, size_t size, const struct timeval * tv) +{ + const char *mon[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }; + struct tm tm; + + /* + * TIMESTAMP := <Mmm> " " <dd> " " <hh> ":" <mm> ":" <ss>. + * Local time, no locales. + */ + + localtime_r(&tv->tv_sec, &tm); + + return snprintf(buf, size, "%s %2d %02d:%02d:%02d", + mon[tm.tm_mon], tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); +} + +size_t +tapdisk_syslog_strftv(char *buf, size_t size, const struct timeval * tv) +{ + struct tm tm; + + localtime_r(&tv->tv_sec, &tm); + + return snprintf(buf, size, "[%02d:%02d:%02d.%03ld]", + tm.tm_hour, tm.tm_min, tm.tm_sec, + (long) tv->tv_usec / 1000); +} + +int tapdisk_set_resource_limits(void) { int err; struct rlimit rlim; @@ -111,10 +185,9 @@ tapdisk_namedup(char **dup, const char * int tapdisk_get_image_size(int fd, uint64_t *_sectors, uint32_t *_sector_size) { - int ret; struct stat stat; - uint64_t sectors; - uint64_t sector_size; + uint64_t sectors, bytes; + uint32_t sector_size; sectors = 0; sector_size = 0; @@ -128,12 +201,28 @@ tapdisk_get_image_size(int fd, uint64_t if (S_ISBLK(stat.st_mode)) { /*Accessing block device directly*/ - if (blk_getimagesize(fd, §ors) != 0) + if (ioctl(fd, BLKGETSIZE64, &bytes) == 0) { + sectors = bytes >> SECTOR_SHIFT; + } else if (ioctl(fd, BLKGETSIZE, §ors) != 0) { + DPRINTF + ("ERR: BLKGETSIZE and BLKGETSIZE64 failed, couldn''t stat image"); return -EINVAL; + } /*Get the sector size*/ - if (blk_getsectorsize(fd, §or_size) != 0) +#if defined(BLKSSZGET) + { sector_size = DEFAULT_SECTOR_SIZE; + ioctl(fd, BLKSSZGET, §or_size); + + if (sector_size != DEFAULT_SECTOR_SIZE) + DPRINTF("Note: sector size is %u (not %d)\n", + sector_size, DEFAULT_SECTOR_SIZE); + } +#else + sector_size = DEFAULT_SECTOR_SIZE; +#endif + } else { /*Local file? try fstat instead*/ sectors = (stat.st_size >> SECTOR_SHIFT); @@ -175,40 +264,3 @@ int tapdisk_linux_version(void) } #endif -int read_exact(int fd, void *data, size_t size) -{ - size_t offset = 0; - ssize_t len; - - while ( offset < size ) - { - len = read(fd, (char *)data + offset, size - offset); - if ( (len == -1) && (errno == EINTR) ) - continue; - if ( len == 0 ) - errno = 0; - if ( len <= 0 ) - return -1; - offset += len; - } - - return 0; -} - -int write_exact(int fd, const void *data, size_t size) -{ - size_t offset = 0; - ssize_t len; - - while ( offset < size ) - { - len = write(fd, (const char *)data + offset, size - offset); - if ( (len == -1) && (errno == EINTR) ) - continue; - if ( len <= 0 ) - return -1; - offset += len; - } - - return 0; -}
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 19 of 21] blktap3/drivers: Introduce tapdisk''s main function
This patch copies from blktap2.5 tapdisk''s main function. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap3/drivers/tapdisk.c b/tools/blktap3/drivers/tapdisk.c new file mode 100644 --- /dev/null +++ b/tools/blktap3/drivers/tapdisk.c @@ -0,0 +1,140 @@ + /* + * Copyright (c) 2008, XenSource Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of XenSource Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include "tapdisk.h" +#include "tapdisk-utils.h" +#include "tapdisk-server.h" +#include "tapdisk-control.h" + +static void usage(const char *app, int err) +{ + fprintf(stderr, "usage: %s <-u uuid> <-c control socket>\n", app); + exit(err); +} + +static FILE +*fdup(FILE * stream __attribute__((unused)), const char *mode) +{ + int fd, err; + FILE *f; + + fd = dup(STDOUT_FILENO); + if (fd < 0) + goto fail; + + f = fdopen(fd, mode); + if (!f) + goto fail; + + return f; + + fail: + err = -errno; + if (fd >= 0) + close(fd); + errno = -err; + + return NULL; +} + +int main(int argc, char *argv[]) +{ + char *control; + int c, err, nodaemon; + FILE *out; + + control = NULL; + nodaemon = 0; + + while ((c = getopt(argc, argv, "Dh")) != -1) { + switch (c) { + case ''D'': + nodaemon = 1; + break; + case ''h'': + usage(argv[0], 0); + break; + default: + usage(argv[0], EINVAL); + } + } + + if (optind != argc) + usage(argv[0], EINVAL); + + err = tapdisk_server_init(); + if (err) { + DPRINTF("failed to initialize server: %d\n", err); + goto out; + } + + out = fdup(stdout, "w"); + if (!out) { + err = -errno; + DPRINTF("failed to dup stdout: %d\n", err); + goto out; + } + + if (!nodaemon) { + err = daemon(0, 0); + if (err) { + DPRINTF("failed to daemonize: %d\n", errno); + goto out; + } + } + + tapdisk_start_logging("tapdisk", NULL); + + err = tapdisk_control_open(&control); + if (err) { + DPRINTF("failed to open control socket: %d\n", err); + goto out; + } + + err = tapdisk_server_complete(); + if (err) { + DPRINTF("failed to complete server: %d\n", err); + goto out; + } + + fprintf(out, "%s\n", control); + fclose(out); + + err = tapdisk_server_run(); + + out: + tapdisk_control_close(); + tapdisk_stop_logging(); + return -err; +}
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 20 of 21] blktap3/drivers: Introduce tapdisk makefile
This patch copies from blktap2.5 the makefile that builds tapdisk. Any object that doesn''t seem to be necessary for the basic operation of tapdisk has been left out. Also, we use a distinct binary name for the blktap3 tapdisk process (tapdisk3) so that blktap2 and blktap3 can co-exist. Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -377,3 +377,4 @@ # blktap3 ^tools/blktap3/tapback/tapback$ +^tools/blktap3/drivers/tapdisk3$ diff --git a/tools/blktap2/drivers/Makefile b/tools/blktap3/drivers/Makefile copy from tools/blktap2/drivers/Makefile copy to tools/blktap3/drivers/Makefile --- a/tools/blktap2/drivers/Makefile +++ b/tools/blktap3/drivers/Makefile @@ -1,43 +1,65 @@ XEN_ROOT=$(CURDIR)/../../.. -BLKTAP_ROOT= .. include $(XEN_ROOT)/tools/Rules.mk +BLKTAP_ROOT = .. + +SUBDIRS-y :+SUBDIRS-y += sring + LIBVHDDIR = $(BLKTAP_ROOT)/vhd/lib -IBIN = tapdisk2 td-util tapdisk-client tapdisk-stream tapdisk-diff -QCOW_UTIL = img2qcow qcow-create qcow2raw +# FIXME tapdisk-client tapdisk-stream tapdisk-diff not in blktap2.5 +IBIN = tapdisk3 LOCK_UTIL = lock-util INST_DIR = $(SBINDIR) -CFLAGS += -Werror -g -CFLAGS += -Wno-unused -CFLAGS += -fno-strict-aliasing -CFLAGS += -I$(BLKTAP_ROOT)/include -I$(BLKTAP_ROOT)/drivers -CFLAGS += $(CFLAGS_libxenctrl) -CFLAGS += -D_GNU_SOURCE -CFLAGS += -DUSE_NFS_LOCKS +override CFLAGS += \ + -fno-strict-aliasing \ + -I$(BLKTAP_ROOT)/include \ + -I$(BLKTAP_ROOT)/drivers \ + -I$(BLKTAP_ROOT)/vhd/lib \ + $(CFLAGS_libxenctrl) \ + -D_GNU_SOURCE \ + -DUSE_NFS_LOCKS \ + -Werror \ + -Wall \ + -Wextra +# FIXME cause trouble +override CFLAGS += \ + -Wno-override-init \ + -Wno-sign-compare \ + -Wno-type-limits + +# TODO Why only in 64-bit? ifeq ($(CONFIG_X86_64),y) CFLAGS += -fPIC endif +override LDFLAGS += \ + -L$(XEN_ROOT)/tools/libxc \ + -lxenctrl \ + -luuid + VHDLIBS := -L$(LIBVHDDIR) -lvhd -REMUS-OBJS := block-remus.o -REMUS-OBJS += hashtable.o -REMUS-OBJS += hashtable_itr.o -REMUS-OBJS += hashtable_utility.o +# FIXME The following exist in blktap2 but not in blktap2.5. +# REMUS-OBJS := block-remus.o +# REMUS-OBJS += hashtable.o +# REMUS-OBJS += hashtable_itr.o +# REMUS-OBJS += hashtable_utility.o ifneq ($(CONFIG_SYSTEM_LIBAIO),y) CFLAGS += -I $(LIBAIO_DIR) LIBAIO_DIR = $(XEN_ROOT)/tools/libaio/src -tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := $(LIBAIO_DIR)/libaio.a -tapdisk-client tapdisk-stream tapdisk-diff $(QCOW_UTIL): CFLAGS += -I$(LIBAIO_DIR) +tapdisk3 tapdisk-stream tapdisk-diff: AIOLIBS := $(LIBAIO_DIR)/libaio.a +tapdisk-client tapdisk-stream tapdisk-diff: CFLAGS += -I$(LIBAIO_DIR) else -tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := -laio +tapdisk3 tapdisk-stream tapdisk-diff: AIOLIBS := -laio endif MEMSHRLIBS :+# FIXME __fixme__? ifeq ($(CONFIG_Linux), __fixme__) MEMSHR_DIR = $(XEN_ROOT)/tools/memshr CFLAGS += -DMEMSHR @@ -45,14 +67,6 @@ CFLAGS += -I $(MEMSHR_DIR) MEMSHRLIBS += -L$(XEN_ROOT)/tools/libxc -lxenctrl $(MEMSHR_DIR)/libmemshr.a endif -ifeq ($(VHD_STATIC),y) -td-util: CFLAGS += -static -endif - -PORTABLE-OBJS-y :-PORTABLE-OBJS-$(CONFIG_Linux) += blk_linux.o -PORTABLE-OBJS-$(CONFIG_NetBSD) += blk_netbsd.o - TAP-OBJS-y := scheduler.o TAP-OBJS-y += tapdisk-vbd.o TAP-OBJS-y += tapdisk-control.o @@ -63,54 +77,57 @@ TAP-OBJS-y += tapdisk-interface.o TAP-OBJS-y += tapdisk-server.o TAP-OBJS-y += tapdisk-queue.o TAP-OBJS-y += tapdisk-filter.o -TAP-OBJS-y += tapdisk-log.o TAP-OBJS-y += tapdisk-utils.o +TAP-OBJS-y += tapdisk-log.o TAP-OBJS-y += io-optimize.o -TAP-OBJS-y += lock.o -TAP-OBJS-y += $(PORTABLE-OBJS-y) +#TAP-OBJS-y += lock.o +#TAP-OBJS-y += tapdisk-blktap.o +TAP-OBJS-y += tapdisk-stats.o +TAP-OBJS-y += tapdisk-storage.o +TAP-OBJS-y += tapdisk-loglimit.o +TAP-OBJS-y += tapdisk-logfile.o +TAP-OBJS-y += tapdisk-syslog.o +#TAP-OBJS-y += $(PORTABLE-OBJS-y) -MISC-OBJS-y := atomicio.o +LIBSRING := sring/libsring.a +$(LIBSRING): subdirs-all + +#MISC-OBJS-y := atomicio.o BLK-OBJS-y := block-aio.o -BLK-OBJS-y += block-ram.o -BLK-OBJS-y += block-cache.o -BLK-OBJS-y += block-vhd.o -BLK-OBJS-y += block-log.o -BLK-OBJS-y += block-qcow.o -BLK-OBJS-y += aes.o -BLK-OBJS-y += md5.o +# FIXME The following exist in blktap2 but not in blktap2.5. +#BLK-OBJS-y += aes.o +#BLK-OBJS-y += md5.o BLK-OBJS-y += $(PORTABLE-OBJS-y) BLK-OBJS-y += $(REMUS-OBJS) -all: $(IBIN) lock-util qcow-util +# FIXME qcow-util not in blktap2.5 +all: $(IBIN) +$(BLKTAP_ROOT)/vhd/lib/libvhd.a: + make -C $(BLKTAP_ROOT)/vhd/lib libvhd.a -tapdisk2: $(TAP-OBJS-y) $(BLK-OBJS-y) $(MISC-OBJS-y) tapdisk2.o +tapdisk3: $(TAP-OBJS-y) $(BLK-OBJS-y) $(MISC-OBJS-y) tapdisk.o \ + $(BLKTAP_ROOT)/vhd/lib/libvhd.a $(LIBSRING) $(CC) -o $@ $^ $(LDFLAGS) -lrt -lz $(VHDLIBS) $(AIOLIBS) $(MEMSHRLIBS) -lm tapdisk-client: tapdisk-client.o $(CC) -o $@ $^ $(LDFLAGS) -lrt -tapdisk-stream tapdisk-diff: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y) - $(CC) -o $@ $^ $(LDFLAGS) -lrt -lz $(VHDLIBS) $(AIOLIBS) $(MEMSHRLIBS) -lm - -td-util: td.o tapdisk-utils.o tapdisk-log.o $(PORTABLE-OBJS-y) - $(CC) -o $@ $^ $(LDFLAGS) $(VHDLIBS) +# FIXME what''s tapdisk-stream? +#tapdisk-stream tapdisk-diff: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y) +# $(CC) -o $@ $^ $(LDFLAGS) -lrt -lz $(VHDLIBS) $(AIOLIBS) $(MEMSHRLIBS) -lm lock-util: lock.c $(CC) $(CFLAGS) -DUTIL -o lock-util lock.c $(LDFLAGS) -.PHONY: qcow-util -qcow-util: img2qcow qcow2raw qcow-create - -img2qcow qcow2raw qcow-create: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y) - $(CC) -o $@ $^ $(LDFLAGS) -lrt -lz $(VHDLIBS) $(AIOLIBS) $(MEMSHRLIBS) -lm - +# FIXME img2qcow, qcow-create, qcow2raw not built so not installed +# FIXME lock-util should be installed install: all $(INSTALL_DIR) -p $(DESTDIR)$(INST_DIR) - $(INSTALL_PROG) $(IBIN) $(LOCK_UTIL) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR) + $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INST_DIR) -clean: - rm -rf .*.d *.o *~ xen TAGS $(IBIN) $(LIB) $(LOCK_UTIL) $(QCOW_UTIL) +clean: subdirs-clean + rm -rf .*.d *.o *~ xen TAGS $(IBIN) $(LIB) $(LOCK_UTIL) .PHONY: clean install
Thanos Makatos
2013-Apr-19 15:40 UTC
[PATCH 21 of 21] blktap3: Introduce top-level blktap3 makefile
Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com> diff --git a/tools/blktap2/Makefile b/tools/blktap3/Makefile copy from tools/blktap2/Makefile copy to tools/blktap3/Makefile --- a/tools/blktap2/Makefile +++ b/tools/blktap3/Makefile @@ -1,18 +1,24 @@ XEN_ROOT = $(CURDIR)/../.. include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += $(CFLAGS_libxenctrl) LDLIBS += $(LDLIBS_libxenctrl) +override CPPCHECK_DIR ?= . + SUBDIRS-y :-SUBDIRS-y += include -SUBDIRS-y += lvm SUBDIRS-y += vhd -SUBDIRS-$(CONFIG_Linux) += drivers -SUBDIRS-$(CONFIG_Linux) += control +SUBDIRS-y += control +SUBDIRS-y += tapback +SUBDIRS-y += drivers + +tags: + ctags -R --language-force=C --c-kinds=+px clean: - rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) TAGS + rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) TAGS tags -.PHONY: all clean install +check: + cppcheck --enable=all -q $(CPPCHECK_DIR) + +.PHONY: all clean install tags check all clean install: %: subdirs-%