This patch adds dm-userspace to the -xen Linux kernel. I''d like to get it into the tree so that people that want to can play with it. Anyone wishing to to do so can download the tools separately, but they need the kernel module to be able to use it. The tools are available here: http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Anthony Liguori
2006-Jun-09 21:48 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
Shouldn''t this go to LKML? Regards, Anthony Liguori Dan Smith wrote:> This patch adds dm-userspace to the -xen Linux kernel. I''d like to > get it into the tree so that people that want to can play with it. > Anyone wishing to to do so can download the tools separately, but they > need the kernel module to be able to use it. > > The tools are available here: > > http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz > http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz > > ------------------------------------------------------------------------ > > # HG changeset patch > # User Dan Smith <danms@us.ibm.com> > # Node ID db178a1b30f3e92da9ce6fd14f757efa9f6763c5 > # Parent 5a0ed6c476732da229c3307ea5357cdd196e5462 > This adds dm-userspace to the xen linux kernel via another entry in the > patches/ directory. The dm-userspace module is completely self-contained > and will not affect anything unless it is loaded. People wishing to > experiment with dm-userspace can download the tools packages separately, but > they need this module to use them. > > Signed-off-by: Dan Smith <danms@us.ibm.com> > > diff -r 5a0ed6c47673 -r db178a1b30f3 buildconfigs/linux-defconfig_xen_x86_32 > --- a/buildconfigs/linux-defconfig_xen_x86_32 Fri Jun 9 14:29:00 2006 +0100 > +++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Jun 9 13:27:36 2006 -0700 > @@ -1187,6 +1187,7 @@ CONFIG_DM_ZERO=m > CONFIG_DM_ZERO=m > CONFIG_DM_MULTIPATH=m > CONFIG_DM_MULTIPATH_EMC=m > +CONFIG_DM_USERSPACE=m > > # > # Fusion MPT device support > diff -r 5a0ed6c47673 -r db178a1b30f3 patches/linux-2.6.16.13/dm-userspace.patch > --- /dev/null Thu Jan 1 00:00:00 1970 +0000 > +++ b/patches/linux-2.6.16.13/dm-userspace.patch Fri Jun 9 13:27:36 2006 -0700 > @@ -0,0 +1,1737 @@ > +diff -Naur ./drivers/md/dm-userspace.c ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c > +--- ./drivers/md/dm-userspace.c 1969-12-31 16:00:00.000000000 -0800 > ++++ ../linux-2.6.16.13-dmu/drivers/md/dm-userspace.c 2006-06-09 12:00:32.627933616 -0700 > +@@ -0,0 +1,1613 @@ > ++/* > ++ * Copyright (C) International Business Machines Corp., 2006 > ++ * Author: Dan Smith <danms@us.ibm.com> > ++ * > ++ * This program is free software; you can redistribute it and/or modify > ++ * it under the terms of the GNU General Public License as published by > ++ * the Free Software Foundation; under version 2 of the License. > ++ * > ++ * This program is distributed in the hope that it will be useful, > ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of > ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > ++ * GNU General Public License for more details. > ++ * > ++ * You should have received a copy of the GNU General Public License > ++ * along with this program; if not, write to the Free Software > ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > ++ * > ++ */ > ++ > ++#include <linux/module.h> > ++#include <linux/init.h> > ++#include <linux/blkdev.h> > ++#include <linux/bio.h> > ++#include <linux/slab.h> > ++#include <linux/spinlock.h> > ++#include <linux/list.h> > ++#include <linux/fs.h> > ++#include <linux/cdev.h> > ++#include <linux/types.h> > ++#include <linux/poll.h> > ++ > ++#include <linux/dm-userspace.h> > ++ > ++#include "dm.h" > ++#include "dm-bio-list.h" > ++#include "kcopyd.h" > ++ > ++#define DMU_DEBUG 0 > ++ > ++#define DMU_COPY_PAGES 256 > ++#define DMU_KEY_LEN 256 > ++ > ++#define DMU_PREFIX "dm-userspace: " > ++#define DMU_SET_ERROR(ti, msg) ti->error = DMU_PREFIX msg > ++ > ++#define DMU_LIFETIME 128 > ++ > ++#if DMU_DEBUG > ++#define DPRINTK( s, arg... ) printk(DMU_PREFIX s, ##arg) > ++#else > ++#define DPRINTK( s, arg... ) > ++#endif > ++ > ++kmem_cache_t *request_cache; > ++kmem_cache_t *remap_cache; > ++ > ++static int enable_watchdog = 0; > ++static struct work_struct wd; > ++ > ++static spinlock_t devices_lock; > ++static LIST_HEAD(devices); > ++ > ++/* Device number for the control device */ > ++static dev_t our_dev; > ++ > ++struct target_device { > ++ struct list_head list; > ++ struct block_device *bdev; > ++ struct kref users; > ++}; > ++ > ++struct hash_table { > ++ struct list_head *table; > ++ uint64_t size; > ++ uint32_t mask; > ++ uint64_t count; > ++}; > ++ > ++/* A dm-userspace device, which consists of multiple targets sharing a > ++ * common key > ++ */ > ++struct dmu_device { > ++ spinlock_t lock; > ++ struct list_head list; > ++ struct list_head requests; > ++ struct list_head target_devs; > ++ > ++ struct hash_table remaps; > ++ > ++ struct cdev cdev; > ++ dev_t ctl_dev; > ++ > ++ char key[DMU_KEY_LEN]; > ++ struct kref users; > ++ > ++ wait_queue_head_t wqueue; > ++ > ++ uint64_t block_size; > ++ uint64_t block_mask; > ++ unsigned int block_shift; > ++ > ++ struct kcopyd_client *kcopyd_client; > ++ > ++ /* > ++ * Count of the number of outstanding requests that have been > ++ * made against this device, but have not yet been flushed > ++ */ > ++ atomic_t remap_ct; > ++ > ++ uint32_t id_counter; > ++}; > ++ > ++struct userspace_request { > ++ spinlock_t lock; > ++ struct list_head list; > ++ struct dmu_device *dev; > ++ int type; > ++ int sent; > ++ uint32_t flags; > ++ uint32_t id; > ++ union { > ++ struct bio_list bios; > ++ uint64_t block; > ++ } u; > ++ atomic_t refcnt; > ++}; > ++ > ++struct dmu_map { > ++ spinlock_t lock; > ++ uint64_t org_block; /* Original block */ > ++ uint64_t new_block; /* Destination block */ > ++ int64_t offset; > ++ uint32_t flags; > ++ struct target_device *src; > ++ struct target_device *dest; > ++ struct bio_list bios; > ++ struct list_head list; > ++ struct dmu_device *dev; > ++ > ++ uint32_t use_count; > ++ > ++ struct dmu_map *next; /* Next remap that is dependent on this one */ > ++}; > ++ > ++/* Forward delcarations */ > ++static struct file_operations ctl_fops; > ++static void copy_block(struct dmu_map *remap); > ++ > ++/* > ++ * Return the block number for @sector > ++ */ > ++static inline u64 dmu_block(struct dmu_device *dev, > ++ sector_t sector) > ++{ > ++ return sector >> dev->block_shift; > ++} > ++ > ++/* > ++ * Return the sector offset in a block for @sector > ++ */ > ++static inline u64 dmu_sector_offset(struct dmu_device *dev, > ++ sector_t sector) > ++{ > ++ return sector & dev->block_mask; > ++} > ++ > ++/* > ++ * Return the starting sector for @block > ++ */ > ++static inline u64 dmu_sector(struct dmu_device *dev, > ++ uint64_t block) > ++{ > ++ return block << dev->block_shift; > ++} > ++ > ++static void error_bios(struct bio_list *bios) > ++{ > ++ struct bio *bio; > ++ int count = 0; > ++ > ++ while ((bio = bio_list_pop(bios)) != NULL) { > ++ bio_io_error(bio, bio->bi_size); > ++ count++; > ++ } > ++ > ++ if (count) > ++ printk(KERN_ERR DMU_PREFIX > ++ "*** Failed %i requests\n", count); > ++} > ++ > ++static void init_remap(struct dmu_device *dev, struct dmu_map *remap) > ++{ > ++ spin_lock_init(&remap->lock); > ++ remap->org_block = remap->new_block = 0; > ++ remap->offset = 0; > ++ remap->flags = 0; > ++ remap->src = remap->dest = NULL; > ++ bio_list_init(&remap->bios); > ++ INIT_LIST_HEAD(&remap->list); > ++ remap->dev = dev; > ++ remap->use_count = DMU_LIFETIME; > ++ remap->next = NULL; > ++} > ++ > ++static void init_request(struct dmu_device *dev, > ++ int type, > ++ struct userspace_request *req) > ++{ > ++ spin_lock_init(&req->lock); > ++ INIT_LIST_HEAD(&req->list); > ++ req->dev = dev; > ++ req->type = type; > ++ req->sent = 0; > ++ req->flags = 0; > ++ if (type == DM_USERSPACE_COPY_FINISHED) { > ++ req->u.block = 0; > ++ req->id = 0; > ++ } else { > ++ bio_list_init(&req->u.bios); > ++ spin_lock(&dev->lock); > ++ dev->id_counter++; > ++ if (dev->id_counter == 0) > ++ dev->id_counter = 1; > ++ req->id = dev->id_counter; > ++ spin_unlock(&dev->lock); > ++ } > ++ atomic_set(&req->refcnt, 0); > ++} > ++ > ++static void destroy_remap(struct dmu_map *remap) > ++{ > ++ error_bios(&remap->bios); > ++} > ++ > ++/* > ++ * For an even block distribution, this is not too bad, but it could > ++ * probably be better > ++ */ > ++static uint32_t ht_hash(struct hash_table *ht, uint64_t block) > ++{ > ++ return (uint32_t)block & ht->mask; > ++} > ++ > ++static int ht_init(struct hash_table *ht, unsigned long size) > ++{ > ++ uint64_t i; > ++ unsigned long pages; > ++ unsigned int order = ffs((size * sizeof(struct list_head *)) / > ++ PAGE_SIZE); > ++ > ++ DPRINTK("Going to allocate 2^%u pages for %lu-entry table\n", > ++ order, size); > ++ > ++ pages = __get_free_pages(GFP_ATOMIC, order); > ++ if (!pages) { > ++ DPRINTK("Failed to allocate hash table (%lu)\n", size); > ++ return 0; > ++ } > ++ > ++ ht->table = (void *)pages; > ++ ht->size = size; > ++ ht->count = 0; > ++ ht->mask = size - 1; > ++ > ++ for (i = 0; i < size; i++) > ++ INIT_LIST_HEAD(&ht->table[i]); > ++ > ++ return 1; > ++} > ++ > ++static void ht_insert_bucket(struct dmu_map *map, struct list_head *list) > ++{ > ++ list_add_tail(&map->list, list); > ++} > ++ > ++/* > ++ * I''m sure this is quite dumb, but it works for now > ++ */ > ++static int ht_should_grow(struct hash_table *ht) > ++{ > ++ return ht->count > (2 * (ht->size / 4)); > ++} > ++ > ++static void ht_grow_table(struct hash_table *ht); > ++static void ht_insert_map(struct hash_table *ht, struct dmu_map *map) > ++{ > ++ uint32_t addr; > ++ > ++ addr = ht_hash(ht, map->org_block) & ht->mask; > ++ > ++ BUG_ON(addr >= ht->size); > ++ > ++ ht_insert_bucket(map, &ht->table[addr]); > ++ ht->count++; > ++ > ++ if (ht_should_grow(ht)) > ++ ht_grow_table(ht); > ++} > ++ > ++static void ht_insert_map_dev(struct dmu_device *dev, struct dmu_map *map) > ++{ > ++ spin_lock(&dev->lock); > ++ ht_insert_map(&dev->remaps, map); > ++ spin_unlock(&dev->lock); > ++} > ++ > ++static void ht_delete_map(struct hash_table *ht, struct dmu_map *map) > ++{ > ++ list_del(&map->list); > ++ BUG_ON(ht->count == 0); > ++ ht->count--; > ++} > ++ > ++static void ht_delete_map_dev(struct dmu_device *dev, struct dmu_map *map) > ++{ > ++ spin_lock(&dev->lock); > ++ ht_delete_map(&dev->remaps, map); > ++ spin_unlock(&dev->lock); > ++} > ++ > ++static struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block) > ++{ > ++ uint32_t addr; > ++ struct dmu_map *m; > ++ > ++ addr = ht_hash(ht, block) & ht->mask; > ++ > ++ BUG_ON(addr >= ht->size); > ++ > ++ list_for_each_entry(m, &ht->table[addr], list) { > ++ if (m->org_block == block) > ++ return m; > ++ } > ++ > ++ return NULL; > ++} > ++ > ++static struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block) > ++{ > ++ struct dmu_map *remap; > ++ > ++ spin_lock(&dev->lock); > ++ > ++ remap = ht_find_map(&dev->remaps, block); > ++ > ++ spin_unlock(&dev->lock); > ++ > ++ return remap; > ++} > ++ > ++static void ht_grow_table(struct hash_table *ht) > ++{ > ++ struct hash_table old_table; > ++ uint64_t i; > ++ > ++ old_table = *ht; > ++ > ++ if (!ht_init(ht, old_table.size * 2)) { > ++ DPRINTK("Can''t grow table to %llu\n", > ++ old_table.size * 2); > ++ return; > ++ } > ++ > ++ DPRINTK("Growing from %llu to %llu\n", > ++ old_table.size, ht->size); > ++ > ++ for (i = 0; i < old_table.size; i++ ) { > ++ struct dmu_map *m, *n; > ++ list_for_each_entry_safe(m, n, &old_table.table[i], > ++ list) { > ++ list_del_init(&m->list); > ++ ht_insert_map(ht, m); > ++ } > ++ } > ++ > ++ free_pages((unsigned long)old_table.table, > ++ ffs((old_table.size * sizeof(struct list_head *)) > ++ / PAGE_SIZE)); > ++} > ++ > ++static uint64_t ht_destroy_table(struct hash_table *ht) > ++{ > ++ uint64_t i, count = 0; > ++ struct dmu_map *m, *n; > ++ > ++ for (i = 0; i < ht->size; i++) { > ++ list_for_each_entry_safe(m, n, &ht->table[i], list) { > ++ ht_delete_map(ht, m); > ++ kmem_cache_free(remap_cache, m); > ++ count++; > ++ } > ++ } > ++ > ++ return count; > ++} > ++ > ++static struct target_device *get_target(struct dmu_device *dev, > ++ dev_t devno) > ++{ > ++ > ++ struct target_device *target; > ++ struct block_device *bdev; > ++ > ++ spin_lock(&dev->lock); > ++ list_for_each_entry(target, &dev->target_devs, list) { > ++ if (target->bdev->bd_dev == devno) { > ++ spin_unlock(&dev->lock); > ++ goto out; > ++ } > ++ } > ++ spin_unlock(&dev->lock); > ++ > ++ bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE); > ++ if (IS_ERR(bdev)) { > ++ printk(KERN_ERR DMU_PREFIX "Unable to lookup device %x\n", > ++ devno); > ++ return NULL; > ++ } > ++ > ++ target = kmalloc(sizeof(*target), GFP_KERNEL); > ++ if (!target) { > ++ printk(KERN_ERR DMU_PREFIX > ++ "Unable to alloc new target device\n"); > ++ return NULL; > ++ } > ++ > ++ target->bdev = bdev; > ++ INIT_LIST_HEAD(&target->list); > ++ > ++ spin_lock(&dev->lock); > ++ list_add_tail(&target->list, &dev->target_devs); > ++ spin_unlock(&dev->lock); > ++ > ++ out: > ++ return target; > ++} > ++ > ++/* Caller must hold dev->lock */ > ++static void put_target(struct dmu_device *dev, > ++ struct target_device *target) > ++{ > ++ list_del(&target->list); > ++ > ++ bd_release(target->bdev); > ++ blkdev_put(target->bdev); > ++ > ++ kfree(target); > ++} > ++ > ++/* > ++ * Add a request to the device''s request queue > ++ */ > ++static void add_request(struct dmu_device *dev, > ++ struct userspace_request *req) > ++{ > ++ spin_lock(&dev->lock); > ++ list_add_tail(&req->list, &dev->requests); > ++ spin_unlock(&dev->lock); > ++ > ++ wake_up(&dev->wqueue); > ++} > ++ > ++/* > ++ * > ++ */ > ++static int have_pending_requests(struct dmu_device *dev) > ++{ > ++ struct userspace_request *req; > ++ int ret = 0; > ++ > ++ spin_lock(&dev->lock); > ++ > ++ list_for_each_entry(req, &dev->requests, list) { > ++ if (!req->sent) { > ++ ret = 1; > ++ break; > ++ } > ++ } > ++ > ++ spin_unlock(&dev->lock); > ++ > ++ return ret; > ++} > ++ > ++/* > ++ * This periodically dumps out some debug information. It''s really > ++ * only useful while developing. > ++ */ > ++static void watchdog(void *data) > ++{ > ++ unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0; > ++ struct dmu_device *dev; > ++ struct dmu_map *map; > ++ struct userspace_request *req; > ++ uint64_t i; > ++ > ++ spin_lock(&devices_lock); > ++ > ++ list_for_each_entry(dev, &devices, list) { > ++ spin_lock(&dev->lock); > ++ > ++ v_remaps = i_remaps = reqs = s_reqs = 0; > ++ > ++ for (i = 0; i < dev->remaps.size; i++) { > ++ list_for_each_entry(map, &dev->remaps.table[i], list) > ++ if (dmu_get_flag(&map->flags, DMU_FLAG_VALID)) > ++ v_remaps++; > ++ else > ++ i_remaps++; > ++ } > ++ > ++ list_for_each_entry(req, &dev->requests, list) > ++ if (req->sent) > ++ s_reqs++; > ++ else > ++ reqs++; > ++ > ++ printk("Device %x:%x: " > ++ " reqs: %u/%u " > ++ " inv maps: %u " > ++ " val maps: %u (%i)\n", > ++ MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev), > ++ reqs, s_reqs, i_remaps, v_remaps, > ++ atomic_read(&dev->remap_ct)); > ++ devs++; > ++ > ++ spin_unlock(&dev->lock); > ++ } > ++ > ++ spin_unlock(&devices_lock); > ++ > ++ schedule_delayed_work(&wd, HZ); > ++} > ++ > ++static void __bio_remap(struct bio *bio, > ++ struct dmu_map *remap) > ++{ > ++ BUG_ON(remap->dest == NULL); > ++ > ++ bio->bi_sector = dmu_sector(remap->dev, remap->new_block) + > ++ dmu_sector_offset(remap->dev, bio->bi_sector) + > ++ remap->offset; > ++ > ++ bio->bi_bdev = remap->dest->bdev; > ++} > ++ > ++/* > ++ Pop, remap, and flush a bio. Set VALID flag if no bios > ++ available > ++*/ > ++static struct bio *pop_and_remap(struct dmu_map *remap) > ++{ > ++ struct bio *bio = NULL; > ++ > ++ spin_lock(&remap->lock); > ++ > ++ bio = bio_list_pop(&remap->bios); > ++ if (bio) > ++ __bio_remap(bio, remap); > ++ else { > ++ /* If there are no more bios, we must set the VALID > ++ flag before we release the lock */ > ++ dmu_set_flag(&remap->flags, DMU_FLAG_VALID); > ++ } > ++ > ++ spin_unlock(&remap->lock); > ++ > ++ return bio; > ++} > ++ > ++static void get_remap_attrs(struct dmu_map *remap, > ++ int *copy_first, > ++ int *temporary, > ++ struct dmu_map **next) > ++{ > ++ spin_lock(&remap->lock); > ++ > ++ *copy_first = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST); > ++ *temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY); > ++ *next = remap->next; > ++ remap->next = NULL; > ++ > ++ spin_unlock(&remap->lock); > ++} > ++ > ++static void remap_flusher(struct dmu_map *remap) > ++{ > ++ struct bio *bio; > ++ struct userspace_request *req; > ++ int copy_first = 0, temporary = 0; > ++ struct dmu_map *next; > ++ > ++ DPRINTK("Flushing bios for block %llu:%llu\n", > ++ remap->org_block, remap->new_block); > ++ > ++ while (1) { > ++ > ++ bio = pop_and_remap(remap); > ++ > ++ if (bio) > ++ generic_make_request(bio); > ++ else > ++ break; > ++ > ++ atomic_dec(&remap->dev->remap_ct); > ++ > ++ DPRINTK("Flushed %llu:%llu (%u bytes)\n", > ++ dmu_block(remap->dev, bio->bi_sector), > ++ dmu_sector_offset(remap->dev, bio->bi_sector), > ++ bio->bi_size); > ++ } > ++ > ++ get_remap_attrs(remap, ©_first, &temporary, &next); > ++ > ++ if (next) { > ++ /* FIXME: Make copy_block check for this flag and just > ++ flush if not set to avoid this non-locked access */ > ++ if (dmu_get_flag(&next->flags, DMU_FLAG_COPY_FIRST)) > ++ copy_block(next); > ++ else > ++ remap_flusher(next); > ++ } > ++ > ++ /* Notify userspace */ > ++ if (copy_first) { > ++ req = kmem_cache_alloc(request_cache, GFP_KERNEL); > ++ if (!req) { > ++ printk(KERN_ERR DMU_PREFIX > ++ "Failed to allocate copy response\n"); > ++ return; > ++ } > ++ > ++ init_request(remap->dev, DM_USERSPACE_COPY_FINISHED, req); > ++ > ++ req->u.block = remap->org_block; > ++ > ++ add_request(remap->dev, req); > ++ } > ++ > ++ if (temporary) { > ++ destroy_remap(remap); > ++ kmem_cache_free(remap_cache, remap); > ++ } > ++} > ++ > ++static void destroy_dmu_device(struct kref *ref) > ++{ > ++ struct dmu_device *dev; > ++ struct list_head *cursor, *next; > ++ uint64_t remaps; > ++ > ++ dev = container_of(ref, struct dmu_device, users); > ++ > ++ DPRINTK("Destroying device: %s\n", dev->key); > ++ > ++ spin_lock(&devices_lock); > ++ list_del(&dev->list); > ++ spin_unlock(&devices_lock); > ++ > ++ list_for_each_safe(cursor, next, &dev->target_devs) { > ++ struct target_device *target; > ++ > ++ target = list_entry(cursor, > ++ struct target_device, > ++ list); > ++ > ++ put_target(dev, target); > ++ } > ++ > ++ remaps = ht_destroy_table(&dev->remaps); > ++ DPRINTK("Destroyed %llu/%llu remaps\n", remaps, dev->remaps.count); > ++ > ++ list_for_each_safe(cursor, next, &dev->requests) { > ++ struct userspace_request *req; > ++ > ++ req = list_entry(cursor, > ++ struct userspace_request, > ++ list); > ++ > ++ list_del(&req->list); > ++ > ++ error_bios(&req->u.bios); > ++ > ++ kmem_cache_free(request_cache, req); > ++ } > ++ > ++ kcopyd_client_destroy(dev->kcopyd_client); > ++ > ++ cdev_del(&dev->cdev); > ++ kfree(dev); > ++} > ++ > ++static inline void get_dev(struct dmu_device *dev) > ++{ > ++ DPRINTK("get on %s\n", dev->key); > ++ kref_get(&dev->users); > ++} > ++ > ++static inline void put_dev(struct dmu_device *dev) > ++{ > ++ DPRINTK("put on %s\n", dev->key); > ++ kref_put(&dev->users, destroy_dmu_device); > ++} > ++ > ++static int get_free_minor(void) > ++{ > ++ struct dmu_device *dev; > ++ int minor = 0; > ++ > ++ spin_lock(&devices_lock); > ++ > ++ list_for_each_entry(dev, &devices, list) { > ++ if (MINOR(dev->ctl_dev) != minor) > ++ break; > ++ minor++; > ++ } > ++ > ++ spin_unlock(&devices_lock); > ++ > ++ return minor; > ++} > ++ > ++static int init_dmu_device(struct dmu_device *dev, u32 block_size) > ++{ > ++ int ret; > ++ > ++ cdev_init(&dev->cdev, &ctl_fops); > ++ dev->cdev.owner = THIS_MODULE; > ++ dev->cdev.ops = &ctl_fops; > ++ > ++ init_waitqueue_head(&dev->wqueue); > ++ INIT_LIST_HEAD(&dev->list); > ++ INIT_LIST_HEAD(&dev->requests); > ++ INIT_LIST_HEAD(&dev->target_devs); > ++ kref_init(&dev->users); > ++ spin_lock_init(&dev->lock); > ++ > ++ atomic_set(&dev->remap_ct, 0); > ++ dev->id_counter = 1; /* reserve 0 for unsolicited maps */ > ++ > ++ if (!ht_init(&dev->remaps, 2048)) { > ++ printk(KERN_ERR DMU_PREFIX > ++ "Unable to allocate hash table\n"); > ++ return 0; > ++ } > ++ > ++ dev->block_size = block_size; > ++ dev->block_mask = block_size - 1; > ++ dev->block_shift = ffs(block_size) - 1; > ++ > ++ ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopyd_client); > ++ if (ret) { > ++ printk(DMU_PREFIX "Failed to initialize kcopyd client\n"); > ++ return 0; > ++ } > ++ > ++ return 1; > ++} > ++ > ++static struct dmu_device *new_dmu_device(char *key, > ++ struct dm_target *ti, > ++ u32 block_size) > ++{ > ++ struct dmu_device *dev, *ptr; > ++ int ret; > ++ > ++ dev = kmalloc(sizeof(*dev), GFP_KERNEL); > ++ if (dev == NULL) { > ++ printk(DMU_PREFIX "Failed to allocate new userspace device\n"); > ++ return NULL; > ++ } > ++ > ++ if (!init_dmu_device(dev, block_size)) > ++ goto bad1; > ++ > ++ snprintf(dev->key, DMU_KEY_LEN, "%s", key); > ++ > ++ DPRINTK("New device with size %llu mask 0x%llX shift %u\n", > ++ dev->block_size, dev->block_mask, dev->block_shift); > ++ > ++ dev->ctl_dev = MKDEV(MAJOR(our_dev), get_free_minor()); > ++ > ++ ret = cdev_add(&dev->cdev, dev->ctl_dev, 1); > ++ if (ret < 0) { > ++ printk(DMU_PREFIX "Failed to register control device %d:%d\n", > ++ MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev)); > ++ goto bad2; > ++ } > ++ > ++ DPRINTK("Registered new control interface: %i:%i\n", > ++ MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev)); > ++ > ++ spin_lock(&devices_lock); > ++ if (list_empty(&devices)) > ++ list_add(&dev->list, &devices); > ++ else > ++ list_for_each_entry(ptr, &devices, list) > ++ if (MINOR(ptr->ctl_dev) < MINOR(dev->ctl_dev)) > ++ list_add(&dev->list, &ptr->list); > ++ spin_unlock(&devices_lock); > ++ > ++ return dev; > ++ > ++ bad2: > ++ cdev_del(&dev->cdev); > ++ bad1: > ++ kfree(dev); > ++ printk(KERN_ERR DMU_PREFIX "Failed to create device\n"); > ++ return NULL; > ++} > ++ > ++static struct dmu_device *find_dmu_device(const char *key) > ++{ > ++ struct dmu_device *dev; > ++ struct dmu_device *match = NULL; > ++ > ++ spin_lock(&devices_lock); > ++ > ++ list_for_each_entry(dev, &devices, list) { > ++ spin_lock(&dev->lock); > ++ if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) { > ++ match = dev; > ++ spin_unlock(&dev->lock); > ++ break; > ++ } > ++ spin_unlock(&dev->lock); > ++ } > ++ > ++ spin_unlock(&devices_lock); > ++ > ++ return match; > ++} > ++ > ++static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv) > ++{ > ++ uint64_t block_size; > ++ struct dmu_device *dev; > ++ char *device_key; > ++ char *block_size_param; > ++ > ++ if (argc < 2) { > ++ DMU_SET_ERROR(ti, "Invalid argument count"); > ++ return -EINVAL; > ++ } > ++ > ++ device_key = argv[0]; > ++ block_size_param = argv[1]; > ++ > ++ block_size = simple_strtoul(block_size_param, NULL, 10) / 512; > ++ > ++ dev = find_dmu_device(device_key); > ++ if (dev == NULL) { > ++ dev = new_dmu_device(device_key, > ++ ti, > ++ block_size); > ++ if (dev == NULL) { > ++ DMU_SET_ERROR(ti, "Failed to create device"); > ++ goto bad; > ++ } > ++ } else { > ++ get_dev(dev); > ++ } > ++ > ++ spin_lock(&dev->lock); > ++ if (dev->block_size != block_size) { > ++ DMU_SET_ERROR(ti, "Invalid block size"); > ++ goto bad; > ++ } > ++ spin_unlock(&dev->lock); > ++ > ++ ti->private = dev; > ++ ti->split_io = block_size; > ++ > ++ DPRINTK(" block-size: %llu sectors\n", dev->block_size); > ++ DPRINTK(" block-shift: %u\n", dev->block_shift); > ++ DPRINTK(" block-mask: %llx\n", dev->block_mask); > ++ > ++ return 0; > ++ > ++ bad: > ++ if (dev) { > ++ spin_unlock(&dev->lock); > ++ put_dev(dev); > ++ } > ++ > ++ return -EINVAL; > ++} > ++ > ++static void dmu_dtr(struct dm_target *ti) > ++{ > ++ struct dmu_device *dev = (struct dmu_device *) ti->private; > ++ > ++ put_dev(dev); > ++ > ++ DPRINTK("destroyed %d:%d\n", (int)ti->begin, (int)ti->len); > ++} > ++ > ++/* Search @dev for an outstanding request for remapping @block */ > ++static struct userspace_request *find_existing_req(struct dmu_device *dev, > ++ uint64_t block) > ++{ > ++ struct userspace_request *req; > ++ struct userspace_request *maybe = NULL; > ++ > ++ spin_lock(&dev->lock); > ++ > ++ list_for_each_entry(req, &dev->requests, list) { > ++ /* FIXME: Blech */ > ++ if ((req->type == DM_USERSPACE_MAP_BLOCK) && > ++ (dmu_block(dev, req->u.bios.head->bi_sector) == block)) { > ++ if (maybe) { > ++ atomic_dec(&maybe->refcnt); > ++ } > ++ maybe = req; > ++ atomic_inc(&maybe->refcnt); > ++ } > ++ } > ++ > ++ spin_unlock(&dev->lock); > ++ > ++ return maybe; > ++} > ++ > ++static int make_new_request(struct dmu_device *dev, struct bio *bio) > ++{ > ++ struct userspace_request *req; > ++ > ++ req = kmem_cache_alloc(request_cache, GFP_KERNEL); > ++ if (req == NULL) > ++ goto bad; > ++ > ++ init_request(dev, DM_USERSPACE_MAP_BLOCK, req); > ++ > ++ dmu_set_flag(&req->flags, DMU_FLAG_RD); > ++ if (bio_rw(bio)) > ++ dmu_set_flag(&req->flags, DMU_FLAG_WR); > ++ else > ++ dmu_clr_flag(&req->flags, DMU_FLAG_WR); > ++ bio_list_add(&req->u.bios, bio); > ++ > ++ add_request(dev, req); > ++ > ++ DPRINTK("Queued %s request for sector " SECTOR_FORMAT "\n", > ++ dmu_get_flag(&req->flags, DMU_FLAG_WR) ? "write" : "read", > ++ bio->bi_sector); > ++ > ++ return 0; > ++ > ++ bad: > ++ printk(KERN_ERR DMU_PREFIX "Failed to queue bio!\n"); > ++ return -1; > ++} > ++ > ++static int dmu_map_remap_case(struct dmu_device *dev, > ++ struct dmu_map *remap, > ++ struct bio *bio) > ++{ > ++ int ret = 0; > ++ > ++ spin_lock(&remap->lock); > ++ > ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_WR) != bio_rw(bio)) { > ++ ret = -1; > ++ } else { > ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) { > ++ __bio_remap(bio, remap); > ++ ret = 1; > ++ atomic_dec(&dev->remap_ct); > ++ } else { > ++ bio_list_add(&remap->bios, bio); > ++ } > ++ } > ++ > ++ spin_unlock(&remap->lock); > ++ > ++ return ret; > ++} > ++ > ++static int dmu_map_request_case(struct dmu_device *dev, > ++ struct userspace_request *req, > ++ struct bio *bio) > ++{ > ++ int ret = 0; > ++ int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR); > ++ > ++ spin_lock(&req->lock); > ++ > ++ if (!req_rw && bio_rw(bio) && !req->sent) { > ++ /* Convert to R/W and Queue */ > ++ dmu_set_flag(&req->flags, DMU_FLAG_WR); > ++ bio_list_add(&req->u.bios, bio); > ++ } else if (!req_rw && bio_rw(bio) && req->sent) { > ++ /* Can''t convert, must re-request */ > ++ ret = -1; > ++ } else { > ++ /* Queue */ > ++ bio_list_add(&req->u.bios, bio); > ++ } > ++ > ++ spin_unlock(&req->lock); > ++ > ++ return ret; > ++} > ++ > ++static int dmu_map(struct dm_target *ti, struct bio *bio, > ++ union map_info *map_context) > ++{ > ++ struct dmu_device *dev = (struct dmu_device *) ti->private; > ++ struct dmu_map *remap; > ++ struct userspace_request *req; > ++ int ret = 0; > ++ u64 block; > ++ > ++ atomic_inc(&dev->remap_ct); > ++ > ++ block = dmu_block(dev, bio->bi_sector); > ++ > ++ remap = ht_find_map_dev(dev, block); > ++ if (remap) { > ++ ret = dmu_map_remap_case(dev, remap, bio); > ++ if (ret >= 0) > ++ goto done; > ++ } > ++ > ++ req = find_existing_req(dev, block); > ++ if (req) { > ++ ret = dmu_map_request_case(dev, req, bio); > ++ atomic_dec(&req->refcnt); > ++ if (ret >= 0) > ++ goto done; > ++ } > ++ > ++ ret = make_new_request(dev, bio); > ++ > ++ done: > ++ return ret; > ++} > ++ > ++static int dmu_status(struct dm_target *ti, status_type_t type, > ++ char *result, unsigned int maxlen) > ++{ > ++ struct dmu_device *dev = (struct dmu_device *) ti->private; > ++ > ++ switch (type) { > ++ case STATUSTYPE_INFO: > ++ snprintf(result, maxlen, "%x:%x\n", > ++ MAJOR(dev->ctl_dev), > ++ MINOR(dev->ctl_dev)); > ++ break; > ++ > ++ case STATUSTYPE_TABLE: > ++ snprintf(result, maxlen, "%s %llu", > ++ dev->key, > ++ dev->block_size * 512); > ++ break; > ++ } > ++ > ++ return 0; > ++} > ++ > ++static struct target_type userspace_target = { > ++ .name = "userspace", > ++ .version = {0, 1, 0}, > ++ .module = THIS_MODULE, > ++ .ctr = dmu_ctr, > ++ .dtr = dmu_dtr, > ++ .map = dmu_map, > ++ .status = dmu_status, > ++}; > ++ > ++static int format_userspace_message(struct dmu_write *msg, > ++ struct userspace_request *req) > ++{ > ++ int ret = 1; > ++ > ++ spin_lock(&req->lock); > ++ > ++ if (req->sent) > ++ goto out; > ++ > ++ msg->id = req->id; > ++ msg->type = req->type; > ++ dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_RD); > ++ dmu_cpy_flag(&msg->flags, req->flags, DMU_FLAG_WR); > ++ > ++ if (msg->type == DM_USERSPACE_MAP_BLOCK) { > ++ msg->org_block = dmu_block(req->dev, > ++ req->u.bios.head->bi_sector); > ++ DPRINTK("Asking userspace to map %llu (%c)\n", > ++ msg->org_block, > ++ dmu_get_flag(&msg->flags, DMU_FLAG_WR) ? ''W'' : ''R''); > ++ } else if (msg->type == DM_USERSPACE_COPY_FINISHED) { > ++ msg->org_block = req->u.block; > ++ } else { > ++ printk(KERN_INFO DMU_PREFIX > ++ "Userspace sent unknown message type %i\n", > ++ msg->type); > ++ list_del(&req->list); > ++ ret = 0; > ++ } > ++ > ++ req->sent = 1; > ++ > ++ out: > ++ spin_unlock(&req->lock); > ++ > ++ if (msg->type == DM_USERSPACE_COPY_FINISHED) { > ++ /* COPY_FINISHED messages don''t get responses, so > ++ * we take them off the request queue here > ++ */ > ++ list_del(&req->list); > ++ kmem_cache_free(request_cache, req); > ++ } > ++ > ++ return ret; > ++} > ++ > ++ssize_t dmu_ctl_read(struct file *file, char __user *buffer, > ++ size_t size, loff_t *offset) > ++{ > ++ > ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; > ++ struct dmu_write *msg; > ++ struct userspace_request *req = NULL; > ++ struct userspace_request *next; > ++ int ret = 0; > ++ int num_reqs, req_idx = 0; > ++ > ++ num_reqs = size / sizeof(*msg); > ++ > ++ if (num_reqs == 0) > ++ return -EINVAL; > ++ > ++ msg = kmalloc(num_reqs * sizeof(*msg), GFP_KERNEL); > ++ if (!msg) { > ++ printk(KERN_ERR DMU_PREFIX > ++ "Failed to alloc %i reqs!\n", num_reqs); > ++ return -ENOMEM; > ++ } > ++ > ++ while (!have_pending_requests(dev)) { > ++ if (file->f_flags & O_NONBLOCK) { > ++ return 0; > ++ } > ++ > ++ if (wait_event_interruptible(dev->wqueue, > ++ have_pending_requests(dev))) > ++ return -ERESTARTSYS; > ++ } > ++ > ++ spin_lock(&dev->lock); > ++ > ++ list_for_each_entry_safe(req, next, &dev->requests, list) { > ++ > ++ if (!format_userspace_message(&msg[req_idx], req)) > ++ continue; > ++ > ++ ret += sizeof(*msg); > ++ if (++req_idx >= num_reqs) { > ++ break; > ++ } > ++ } > ++ > ++ spin_unlock(&dev->lock); > ++ > ++ if (copy_to_user(buffer, msg, sizeof(*msg) * req_idx)) { > ++ DPRINTK("control read copy_to_user failed!\n"); > ++ ret = -EFAULT; > ++ } > ++ > ++ kfree(msg); > ++ > ++ return ret; > ++} > ++ > ++static void copy_callback(int read_err, > ++ unsigned int write_err, > ++ void *data) > ++{ > ++ remap_flusher((struct dmu_map *)data); > ++} > ++ > ++static void copy_block(struct dmu_map *remap) > ++{ > ++ struct io_region src, dst; > ++ struct kcopyd_client *client; > ++ > ++ spin_lock(&remap->lock); > ++ > ++ src.bdev = remap->src->bdev; > ++ src.sector = remap->org_block << remap->dev->block_shift; > ++ src.count = remap->dev->block_size; > ++ > ++ dst.bdev = remap->dest->bdev; > ++ dst.sector = (remap->new_block << remap->dev->block_shift); > ++ dst.sector += remap->offset; > ++ dst.count = remap->dev->block_size; > ++ > ++ DPRINTK("Copying: " > ++ SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT " -> " > ++ SECTOR_FORMAT "(" SECTOR_FORMAT "):" SECTOR_FORMAT "\n", > ++ remap->org_block, > ++ src.sector, > ++ src.count * 512, > ++ remap->new_block, > ++ dst.sector, > ++ dst.count * 512); > ++ > ++ client = remap->dev->kcopyd_client; > ++ > ++ spin_unlock(&remap->lock); > ++ > ++ kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap); > ++} > ++ > ++static int remap_request(struct dmu_write *msg, > ++ struct dmu_device *dev, > ++ struct userspace_request *req) > ++ > ++{ > ++ struct dmu_map *remap = NULL, *parent = NULL; > ++ struct target_device *s_dev = NULL, *d_dev = NULL; > ++ int is_chained = 0; > ++ struct bio_list bio_holder; > ++ > ++ if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) { > ++ s_dev = get_target(dev, MKDEV(msg->src_maj, msg->src_min)); > ++ if (!s_dev) { > ++ printk(KERN_ERR DMU_PREFIX > ++ "Failed to find src device %i:%i\n", > ++ msg->src_maj, msg->src_min); > ++ goto bad; > ++ } > ++ } > ++ > ++ d_dev = get_target(dev, MKDEV(msg->dest_maj, msg->dest_min)); > ++ if (!d_dev) { > ++ printk(KERN_ERR DMU_PREFIX "Failed to find dst device %i:%i\n", > ++ msg->dest_maj, msg->dest_min); > ++ goto bad; > ++ } > ++ > ++ if (req) { > ++ while (atomic_read(&req->refcnt) != 0) { > ++ DPRINTK("Waiting for exclusive use of request\n"); > ++ schedule(); > ++ } > ++ > ++ spin_lock(&req->lock); > ++ bio_holder = req->u.bios; > ++ spin_unlock(&req->lock); > ++ } else { > ++ bio_list_init(&bio_holder); > ++ } > ++ > ++ /* Allocate a new remap early (before grabbing locks), since > ++ we will most likely need it */ > ++ remap = kmem_cache_alloc(remap_cache, GFP_KERNEL); > ++ if (!remap) { > ++ printk(KERN_ERR DMU_PREFIX "Failed to alloc remap!"); > ++ goto bad; > ++ } > ++ init_remap(dev, remap); > ++ spin_lock(&remap->lock); > ++ remap->org_block = msg->org_block; > ++ > ++ spin_lock(&dev->lock); > ++ > ++ /* Here, we insert the new remap into the table, and remove > ++ the existing map, if present, all in one locked operation */ > ++ > ++ parent = ht_find_map(&dev->remaps, msg->org_block); > ++ if (parent) { > ++ > ++ spin_lock(&parent->lock); > ++ > ++ if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) { > ++ if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) => ++ dmu_get_flag(&msg->flags, DMU_FLAG_WR)) { > ++ /* Perms match for this not-yet-valid remap, > ++ so tag our bios on to it and bail */ > ++ bio_list_merge(&parent->bios, > ++ &bio_holder); > ++ > ++ spin_unlock(&parent->lock); > ++ spin_unlock(&dev->lock); > ++ kmem_cache_free(remap_cache, remap); > ++ return 1; > ++ } else { > ++ /* Remove parent from remap table, and > ++ chain our new remap to this one so > ++ it will fire when parent goes > ++ valid */ > ++ list_del(&parent->list); > ++ if (parent->next) { > ++ DPRINTK("Parent already chained!\n"); > ++ BUG(); > ++ } > ++ parent->next = remap; > ++ dmu_set_flag(&parent->flags, > ++ DMU_FLAG_TEMPORARY); > ++ is_chained = 1; > ++ } > ++ } else { > ++ /* Remove existing valid remap */ > ++ list_del(&parent->list); > ++ destroy_remap(parent); > ++ kmem_cache_free(remap_cache, parent); > ++ } > ++ > ++ spin_unlock(&parent->lock); > ++ } > ++ > ++ ht_insert_map(&dev->remaps, remap); > ++ > ++ spin_unlock(&dev->lock); > ++ > ++ remap->new_block = msg->new_block; > ++ remap->use_count = DMU_LIFETIME; > ++ remap->offset = msg->offset; > ++ remap->src = s_dev; > ++ remap->dest = d_dev; > ++ remap->dev = dev; > ++ > ++ dmu_clr_flag(&remap->flags, DMU_FLAG_VALID); > ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY); > ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR); > ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD); > ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST); > ++ > ++ remap->bios = bio_holder; > ++ > ++ spin_unlock(&remap->lock); > ++ > ++ if (! is_chained) { > ++ if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) > ++ copy_block(remap); > ++ else { > ++ remap_flusher(remap); > ++ } > ++ } > ++ > ++ return 1; > ++ > ++ bad: > ++ printk(KERN_ERR DMU_PREFIX "Remap error: chaos may ensue\n"); > ++ > ++ return 0; > ++} > ++ > ++static int invalidate_request(struct dmu_write *msg, > ++ struct dmu_device *dev) > ++{ > ++ struct dmu_map *remap; > ++ struct userspace_request *req; > ++ int ret = 1; > ++ > ++ remap = ht_find_map_dev(dev, msg->org_block); > ++ if (!remap) > ++ ret = 0; > ++ else { > ++ spin_lock(&dev->lock); > ++ spin_lock(&remap->lock); > ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) > ++ ht_delete_map(&dev->remaps, remap); > ++ else > ++ ret = 0; > ++ spin_unlock(&remap->lock); > ++ spin_unlock(&dev->lock); > ++ } > ++ > ++ req = kmem_cache_alloc(request_cache, GFP_KERNEL); > ++ if (!req) { > ++ printk(KERN_ERR DMU_PREFIX > ++ "Failed to allocate request\n"); > ++ return 0; > ++ } > ++ > ++ if (ret) > ++ init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req); > ++ else > ++ init_request(dev, DM_USERSPACE_INVAL_FAILED, req); > ++ > ++ req->u.block = msg->org_block; > ++ > ++ add_request(dev, req); > ++ > ++ return ret; > ++} > ++ > ++ssize_t dmu_ctl_write(struct file *file, const char __user *buffer, > ++ size_t size, loff_t *offset) > ++{ > ++ > ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; > ++ struct dmu_write msg; > ++ struct userspace_request *next; > ++ struct userspace_request *req = NULL, *match = NULL; > ++ int num_resp, resp_idx; > ++ int ret = 0; > ++ > ++ num_resp = size / sizeof(struct dmu_write); > ++ > ++ if (num_resp == 0) > ++ return -EINVAL; > ++ > ++ for (resp_idx = 0; resp_idx < num_resp; resp_idx++) { > ++ if (copy_from_user(&msg, buffer+ret, sizeof(msg))) { > ++ printk(DMU_PREFIX > ++ "control_write copy_from_user failed!\n"); > ++ ret = -EFAULT; > ++ goto out; > ++ } > ++ > ++ ret += sizeof(msg); > ++ > ++ match = NULL; > ++ /* See if we have a pending request that matches this */ > ++ spin_lock(&dev->lock); > ++ list_for_each_entry_safe(req, next, &dev->requests, list) { > ++ if ((req->type == DM_USERSPACE_MAP_BLOCK) && > ++ (req->id == msg.id)) { > ++ list_del(&req->list); > ++ match = req; > ++ break; > ++ } > ++ } > ++ spin_unlock(&dev->lock); > ++ > ++ if (!match) > ++ DPRINTK("Processing unsolicited request: %u\n", > ++ msg.id); > ++ > ++ switch (msg.type) { > ++ > ++ case DM_USERSPACE_MAP_BLOCK: > ++ DPRINTK("Got map: %llu -> %llu:%lli (%i:%i) [%c]\n", > ++ msg.org_block, > ++ msg.new_block, > ++ msg.offset, > ++ msg.dest_maj, > ++ msg.dest_min, > ++ dmu_get_flag(&msg.flags, DMU_FLAG_WR)?''W'':''R''); > ++ remap_request(&msg, dev, match); > ++ break; > ++ > ++ case DM_USERSPACE_MAP_FAILED: > ++ if (match) { > ++ printk(KERN_EMERG DMU_PREFIX > ++ "userspace reported " > ++ "failure to map sector %lu\n", > ++ (unsigned long) > ++ match->u.bios.head->bi_sector); > ++ > ++ spin_lock(&match->lock); > ++ error_bios(&match->u.bios); > ++ spin_unlock(&match->lock); > ++ } > ++ break; > ++ default: > ++ printk(KERN_ERR DMU_PREFIX > ++ "Unknown request type: %i\n", msg.type); > ++ } > ++ > ++ if (match) > ++ kmem_cache_free(request_cache, match); > ++ } > ++ out: > ++ return ret; > ++} > ++ > ++int dmu_ctl_open(struct inode *inode, struct file *file) > ++{ > ++ struct dmu_device *dev; > ++ > ++ dev = container_of(inode->i_cdev, struct dmu_device, cdev); > ++ > ++ get_dev(dev); > ++ > ++ file->private_data = dev; > ++ > ++ return 0; > ++} > ++ > ++int dmu_ctl_release(struct inode *inode, struct file *file) > ++{ > ++ struct dmu_device *dev; > ++ > ++ dev = (struct dmu_device *)file->private_data; > ++ > ++ put_dev(dev); > ++ > ++ return 0; > ++} > ++ > ++unsigned dmu_ctl_poll(struct file *file, poll_table *wait) > ++{ > ++ struct dmu_device *dev; > ++ unsigned mask = 0; > ++ > ++ dev = (struct dmu_device *)file->private_data; > ++ > ++ poll_wait(file, &dev->wqueue, wait); > ++ > ++ if (have_pending_requests(dev)) > ++ mask |= POLLIN | POLLRDNORM; > ++ > ++ return mask; > ++} > ++ > ++static struct file_operations ctl_fops = { > ++ .open = dmu_ctl_open, > ++ .release = dmu_ctl_release, > ++ .read = dmu_ctl_read, > ++ .write = dmu_ctl_write, > ++ .poll = dmu_ctl_poll, > ++ .owner = THIS_MODULE, > ++}; > ++ > ++int __init dm_userspace_init(void) > ++{ > ++ int r = dm_register_target(&userspace_target); > ++ if (r < 0) { > ++ DMERR(DMU_PREFIX "Register failed %d", r); > ++ return 0; > ++ } > ++ > ++ spin_lock_init(&devices_lock); > ++ > ++ if (enable_watchdog) { > ++ INIT_WORK(&wd, watchdog, NULL); > ++ schedule_delayed_work(&wd, HZ); > ++ } > ++ > ++ request_cache > ++ kmem_cache_create("dm-userspace-requests", > ++ sizeof(struct userspace_request), > ++ __alignof__ (struct userspace_request), > ++ 0, NULL, NULL); > ++ if (!request_cache) { > ++ DMERR(DMU_PREFIX "Failed to allocate request cache\n"); > ++ goto bad; > ++ } > ++ > ++ remap_cache > ++ kmem_cache_create("dm-userspace-remaps", > ++ sizeof(struct dmu_map), > ++ __alignof__ (struct dmu_map), > ++ 0, NULL, NULL); > ++ if (!remap_cache) { > ++ DMERR(DMU_PREFIX "Failed to allocate remap cache\n"); > ++ goto bad2; > ++ } > ++ > ++ r = alloc_chrdev_region(&our_dev, 0, 10, "dm-userspace"); > ++ if (r) { > ++ DMERR(DMU_PREFIX "Failed to allocate chardev region\n"); > ++ goto bad3; > ++ } > ++ > ++ DPRINTK(DMU_PREFIX "Loaded (major %i)\n", MAJOR(our_dev)); > ++ > ++ return 1; > ++ > ++ bad3: > ++ kmem_cache_destroy(remap_cache); > ++ bad2: > ++ kmem_cache_destroy(request_cache); > ++ bad: > ++ dm_unregister_target(&userspace_target); > ++ return 0; > ++ > ++} > ++ > ++void __exit dm_userspace_exit(void) > ++{ > ++ int r; > ++ struct list_head *cursor, *next; > ++ struct dmu_device *dev; > ++ > ++ DPRINTK(DMU_PREFIX "Unloading\n"); > ++ > ++ if (enable_watchdog) > ++ if (!cancel_delayed_work(&wd)) > ++ flush_scheduled_work(); > ++ > ++ spin_lock(&devices_lock); > ++ > ++ list_for_each_safe(cursor, next, &devices) { > ++ dev = list_entry(cursor, struct dmu_device, list); > ++ list_del(cursor); > ++ } > ++ > ++ spin_unlock(&devices_lock); > ++ > ++ unregister_chrdev_region(our_dev, 10); > ++ > ++ kmem_cache_destroy(request_cache); > ++ kmem_cache_destroy(remap_cache); > ++ > ++ r = dm_unregister_target(&userspace_target); > ++ if (r < 0) > ++ DMERR(DMU_PREFIX "unregister failed %d", r); > ++} > ++ > ++module_init(dm_userspace_init); > ++module_exit(dm_userspace_exit); > ++ > ++module_param(enable_watchdog, int, S_IRUGO); > ++ > ++MODULE_DESCRIPTION(DM_NAME " userspace target"); > ++MODULE_AUTHOR("Dan Smith"); > ++MODULE_LICENSE("GPL"); > +diff -Naur ./drivers/md/Kconfig ../linux-2.6.16.13-dmu/drivers/md/Kconfig > +--- ./drivers/md/Kconfig 2006-05-02 14:38:44.000000000 -0700 > ++++ ../linux-2.6.16.13-dmu/drivers/md/Kconfig 2006-06-09 10:20:35.701604992 -0700 > +@@ -210,6 +210,12 @@ > + ---help--- > + Allow volume managers to take writeable snapshots of a device. > + > ++config DM_USERSPACE > ++ tristate "Userspace target (EXPERIMENTAL)" > ++ depends on BLK_DEV_DM && EXPERIMENTAL > ++ ---help--- > ++ A target that provides a userspace interface to device-mapper > ++ > + config DM_MIRROR > + tristate "Mirror target (EXPERIMENTAL)" > + depends on BLK_DEV_DM && EXPERIMENTAL > +diff -Naur ./drivers/md/Makefile ../linux-2.6.16.13-dmu/drivers/md/Makefile > +--- ./drivers/md/Makefile 2006-05-02 14:38:44.000000000 -0700 > ++++ ../linux-2.6.16.13-dmu/drivers/md/Makefile 2006-06-09 10:20:35.701604992 -0700 > +@@ -37,6 +37,7 @@ > + obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o > + obj-$(CONFIG_DM_MIRROR) += dm-mirror.o > + obj-$(CONFIG_DM_ZERO) += dm-zero.o > ++obj-$(CONFIG_DM_USERSPACE) += dm-userspace.o > + > + quiet_cmd_unroll = UNROLL $@ > + cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ > +diff -Naur ./include/linux/dm-userspace.h ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h > +--- ./include/linux/dm-userspace.h 1969-12-31 16:00:00.000000000 -0800 > ++++ ../linux-2.6.16.13-dmu/include/linux/dm-userspace.h 2006-06-09 12:00:32.630933160 -0700 > +@@ -0,0 +1,89 @@ > ++/* > ++ * Copyright (C) International Business Machines Corp., 2006 > ++ * Author: Dan Smith <danms@us.ibm.com> > ++ * > ++ * This program is free software; you can redistribute it and/or modify > ++ * it under the terms of the GNU General Public License as published by > ++ * the Free Software Foundation; under version 2 of the License. > ++ * > ++ * This program is distributed in the hope that it will be useful, > ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of > ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > ++ * GNU General Public License for more details. > ++ * > ++ * You should have received a copy of the GNU General Public License > ++ * along with this program; if not, write to the Free Software > ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > ++ * > ++ */ > ++ > ++#ifndef __DM_USERSPACE_H > ++#define __DM_USERSPACE_H > ++ > ++#ifdef __KERNEL__ > ++# include <linux/types.h> > ++#else > ++# include <stdint.h> > ++#endif > ++ > ++/* > ++ * Message Types > ++ */ > ++#define DM_USERSPACE_MAP_BLOCK 1 > ++#define DM_USERSPACE_MAP_FAILED 2 > ++#define DM_USERSPACE_MAP_INVALIDATE 3 > ++#define DM_USERSPACE_COPY_FINISHED 100 > ++#define DM_USERSPACE_INVAL_COMPLETE 101 > ++#define DM_USERSPACE_INVAL_FAILED 102 > ++ > ++/* > ++ * Flags and associated macros > ++ */ > ++#define DMU_FLAG_VALID 1 > ++#define DMU_FLAG_RD 2 > ++#define DMU_FLAG_WR 4 > ++#define DMU_FLAG_COPY_FIRST 8 > ++#define DMU_FLAG_TEMPORARY 16 > ++ > ++static int dmu_get_flag(uint32_t *flags, uint32_t flag) > ++{ > ++ return (*flags & flag) != 0; > ++} > ++ > ++static void dmu_set_flag(uint32_t *flags, uint32_t flag) > ++{ > ++ *flags |= flag; > ++} > ++ > ++static void dmu_clr_flag(uint32_t *flags, uint32_t flag) > ++{ > ++ *flags &= (~flag); > ++} > ++ > ++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) > ++{ > ++ *flags = (*flags & ~flag) | (src & flag); > ++} > ++ > ++/* > ++ * This is the message that is passed back and forth between the > ++ * kernel and the user application > ++ */ > ++struct dmu_write { > ++ uint32_t id; > ++ uint32_t type; /* Type of request */ > ++ uint32_t flags; /* Flags */ > ++ > ++ uint64_t org_block; /* Block that was accessed */ > ++ uint64_t new_block; /* The new block it should go to */ > ++ int64_t offset; /* Sector offset of the block, if needed */ > ++ > ++ uint32_t src_maj; /* The source device for copying */ > ++ uint32_t src_min; > ++ > ++ uint32_t dest_maj; /* Destination device for copying, and */ > ++ uint32_t dest_min; /* for the block access */ > ++ > ++}; > ++ > ++#endif > ------------------------------------------------------------------------ > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Smith
2006-Jun-09 23:12 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
AL> Shouldn''t this go to LKML? Perhaps so. I haven''t received much feedback from the device-mapper maintainer and community, although I plan to push it out for acceptance on Monday. However, it may take a while before it makes it into mainline, and even when it does, the linux version in the xen tree will have to be updated before it will be easily available to xen users. My thought is that since it''s a clean patch applied at build time, that it would be ok to get it in the tree for testing and experimentation in the meantime. If/when it goes into mainline and is merged into xen, then the patch can just come out. What do the maintainers think about this plan? -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2006-Jun-10 08:55 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
On 10 Jun 2006, at 00:12, Dan Smith wrote:> Perhaps so. I haven''t received much feedback from the device-mapper > maintainer and community, although I plan to push it out for > acceptance on Monday. However, it may take a while before it makes it > into mainline, and even when it does, the linux version in the xen > tree will have to be updated before it will be easily available to xen > users. My thought is that since it''s a clean patch applied at build > time, that it would be ok to get it in the tree for testing and > experimentation in the meantime. If/when it goes into mainline and is > merged into xen, then the patch can just come out. > > What do the maintainers think about this plan?If there are potential users who will shout for it to go in then it seems reasonable. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Bastian Blank
2006-Jun-10 20:40 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
On Fri, Jun 09, 2006 at 02:08:15PM -0700, Dan Smith wrote:> ++/* > ++ * This is the message that is passed back and forth between the > ++ * kernel and the user application > ++ */ > ++struct dmu_write { > ++ uint32_t id; > ++ uint32_t type; /* Type of request */ > ++ uint32_t flags; /* Flags */ > ++ > ++ uint64_t org_block; /* Block that was accessed */ > ++ uint64_t new_block; /* The new block it should go to */ > ++ int64_t offset; /* Sector offset of the block, if needed */ > ++ > ++ uint32_t src_maj; /* The source device for copying */ > ++ uint32_t src_min; > ++ > ++ uint32_t dest_maj; /* Destination device for copying, and */ > ++ uint32_t dest_min; /* for the block access */ > ++ > ++}; > ++ > ++#endifAny reason why 1. this struct differs from the old patches published on dm-devel and lkml? 2. Why do you use a construct which gives different allignment on i386 and x86_64? (i386 alligns 64bit ints on 32bit, x86_64 on 64bit Bastian -- A woman should have compassion. -- Kirk, "Catspaw", stardate 3018.2 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Smith
2006-Jun-12 14:52 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
BB> 1. this struct differs from the old patches published on dm-devel BB> and lkml? I added an ''id'' field, which contains a unique integer for each request, which helps me to match up the response to the correct item in the kernel queue. Previously, I was matching based on the original block, which has the potential to be wrong if there are two requests on the queue for the same block (i.e. one for a read mapping and one for a write mapping). I thought the id would be easier. BB> 2. Why do you use a construct which gives different allignment on BB> i386 and x86_64? (i386 alligns 64bit ints on 32bit, x86_64 on BB> 64bit I assume you mean because there are an odd number of 32-bit fields, is that correct? The answer to the question is: "because I haven''t given much thought to x86_64 issues yet" :). This week, I plan to test on x86_64, so I can submit another patch with resolutions to any other x86_64 issues that may be present, if it''s likely to be accepted. -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Bastian Blank
2006-Jun-13 08:57 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
On Mon, Jun 12, 2006 at 07:52:06AM -0700, Dan Smith wrote:> BB> 2. Why do you use a construct which gives different allignment on > BB> i386 and x86_64? (i386 alligns 64bit ints on 32bit, x86_64 on > BB> 64bit > > I assume you mean because there are an odd number of 32-bit fields, is > that correct? The answer to the question is: "because I haven''t given > much thought to x86_64 issues yet" :). This week, I plan to test on > x86_64, so I can submit another patch with resolutions to any other > x86_64 issues that may be present, if it''s likely to be accepted.The problem is not between x86_64 kernel and userspace, but if you mix x86_64 kernel and i386 userland, which is allowed. Bastian -- "That unit is a woman." "A mass of conflicting impulses." -- Spock and Nomad, "The Changeling", stardate 3541.9 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
This patch adds dm-userspace to the -xen Linux kernel. I''d like to get it into the tree so that people that want to can play with it. Anyone wishing to to do so can download the tools separately, but they need the kernel module to be able to use it. I''ve updated the code for and tested it on x86_64. The tools are available here: http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Anthony Liguori
2006-Jun-15 21:07 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
I think dm-userspace is great, but I don''t think it belongs in patches/. To quote Chris W.: "Good things there are bits that are ready to go upstream or fixes to base Linux that are transient by version (backport of a fix that''s in the -rc for the next kernel or somesuch)." Until this ends up in -mm, I really don''t think we should add this to the -xen tree as it has no dependence on Xen. We should be good members of the kernel community here and go through the normal channels. At any rate, I would think that you would want it to be useful first before adding it which means bringing in all of the tools into the tree. If a user already has to fetch a bunch of tools and compile from source, adding an additional patch to the Xen build isn''t so bad. Regards, Anthony Liguori Dan Smith wrote: _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Nivedita Singhvi
2006-Jun-15 21:38 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
Anthony Liguori wrote:> I think dm-userspace is great, but I don''t think it belongs in > patches/. To quote Chris W.: > > "Good things there are bits that are ready to go upstream or fixes to > base Linux that are transient by version (backport of a fix that''s in > the -rc for the next kernel or somesuch)." > > Until this ends up in -mm, I really don''t think we should add this to > the -xen tree as it has no dependence on Xen. We should be good members > of the kernel community here and go through the normal channels. At any > rate, I would think that you would want it to be useful first before > adding it which means bringing in all of the tools into the tree.It won''t be useful in the kernel unless there are actual consumers, of which Xen could/might be one. Having a use case flesh out problems and get the performance and design architecture examined in detail will be useful to the Linux maintainers - they get something more robust and better designed, although it''s certainly not Xen''s obligation to provide that, per se. I''m just seeing the possibility of mutual benefit, here. That said, I believe since the patch is in the device-mapper tree for review, there is a good chance it will go into kernel in any case. thanks, Nivedita _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Anthony Liguori
2006-Jun-15 21:44 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
Nivedita Singhvi wrote:> Anthony Liguori wrote: >> I think dm-userspace is great, but I don''t think it belongs in >> patches/. To quote Chris W.: >> >> "Good things there are bits that are ready to go upstream or fixes to >> base Linux that are transient by version (backport of a fix that''s in >> the -rc for the next kernel or somesuch)." >> >> Until this ends up in -mm, I really don''t think we should add this to >> the -xen tree as it has no dependence on Xen. We should be good >> members of the kernel community here and go through the normal >> channels. At any rate, I would think that you would want it to be >> useful first before adding it which means bringing in all of the >> tools into the tree. > > It won''t be useful in the kernel unless there are actual > consumers, of which Xen could/might be one. Having a use > case flesh out problems and get the performance and design > architecture examined in detail will be useful to the Linux > maintainers - they get something more robust and better > designed, although it''s certainly not Xen''s obligation to > provide that, per se. I''m just seeing the possibility of > mutual benefit, here. > > That said, I believe since the patch is in the device-mapper > tree for review, there is a good chance it will go into > kernel in any case.Agreed. I think it''s a good idea to let it go through that review process before going into the Xen tree. As I said, once it''s in -mm, I would imagine that it the interfaces would be stable enough that there''s no harm putting it into the Xen tree. Especially since libdmu is being considered for inclusion in the device-mapper userspace. Things would get pretty ugly if the dm-userspace ABI changes between now and when it shows up in -mm as there''d be conflicting userspace packages. Regards, Anthony Liguori> thanks, > Nivedita >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Smith
2006-Jun-15 22:09 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
AL> Agreed. I think it''s a good idea to let it go through that review AL> process before going into the Xen tree. As I said, once it''s in AL> -mm, I would imagine that it the interfaces would be stable enough AL> that there''s no harm putting it into the Xen tree. That may be true, however, putting it into the Xen tree at the moment would only be temporary anyway. I think that it changing shape while in the Xen tree isn''t likely to break anything, as people using it at that point would have to know that it was subject to change. I don''t think there is any danger in anyone snapshotting the Xen tree with a not-yet-final version of dm-userspace in it and assuming it''s production-ready. To quote your quote: CW> "Good things there are bits that are ready to go upstream or fixes CW> to base Linux that are transient by version (backport of a fix CW> that''s in the -rc for the next kernel or somesuch)." The code has been accepted into the device-mapper review tree, which may qualify it as almost "ready to go upstream". So, if we fall under that umbrella, I think it''s reasonable to get it into the Xen tree for testing and review. Perhaps Chris can weigh in here? -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Niraj Tolia
2006-Jun-16 18:31 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
Hi Dan, I had a question or two about dm-userspace. It looks very interesting and I hope to find time sometime soon to experiment with it. My question was, would this allow me to export a logical disk without having all the data present locally. The scenario I am thinking of would be applicable for VM migration across WANs. In that case, if I was to represent a virtual disk as a number of logical blocks, I could have the userspace application page the logical blocks in on demand. This would be useful when a VM migrates to a machine it has never been to earlier. Does this make sense? Would it work with dm-userspace? Cheers, Niraj On 6/15/06, Dan Smith <danms@us.ibm.com> wrote:> > This patch adds dm-userspace to the -xen Linux kernel. I''d like to > get it into the tree so that people that want to can play with it. > Anyone wishing to to do so can download the tools separately, but they > need the kernel module to be able to use it. > > I''ve updated the code for and tested it on x86_64. > > The tools are available here: > > http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz > http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz > > -- > Dan Smith > IBM Linux Technology Center > Open Hypervisor Team > email: danms@us.ibm.com > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel > > > >-- http://www.cs.cmu.edu/~ntolia _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Smith
2006-Jun-16 18:50 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
NT> My question was, would this allow me to export a logical disk NT> without having all the data present locally. The scenario I am NT> thinking of would be applicable for VM migration across WANs. Sure. NT> In that case, if I was to represent a virtual disk as a number of NT> logical blocks, I could have the userspace application page the NT> logical blocks in on demand. This would be useful when a VM NT> migrates to a machine it has never been to earlier. Does this make NT> sense? Would it work with dm-userspace? Definitely! You could migrate the domain to another machine, and then start a background process that starts copying its blocks to the other machine. If a request comes in for a block (dm-userspace will let you handle it from the userspace app doing the copy), you immediately pull it across to satisfy the request. This would be a very interesting way to support migration of domains with block devices, without needing to wait for the block migration before starting the domain back up on the target machine. Very cool idea! -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@us.ibm.com _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Anthony Liguori
2006-Jun-16 18:56 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
You probably want to check out drbd. A number of people have used it successfully to do storage migration with Xen domains. Regards, Anthony Liguori Niraj Tolia wrote:> Hi Dan, > > I had a question or two about dm-userspace. It looks very interesting > and I hope to find time sometime soon to experiment with it. My > question was, would this allow me to export a logical disk without > having all the data present locally. The scenario I am thinking of > would be applicable for VM migration across WANs. > > In that case, if I was to represent a virtual disk as a number of > logical blocks, I could have the userspace application page the > logical blocks in on demand. This would be useful when a VM migrates > to a machine it has never been to earlier. Does this make sense? Would > it work with dm-userspace? > > Cheers, > Niraj > > On 6/15/06, *Dan Smith* <danms@us.ibm.com <mailto:danms@us.ibm.com>> > wrote: > > This patch adds dm-userspace to the -xen Linux kernel. I''d like to > get it into the tree so that people that want to can play with it. > Anyone wishing to to do so can download the tools separately, but they > need the kernel module to be able to use it. > > I''ve updated the code for and tested it on x86_64. > > The tools are available here: > > http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz > <http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz> > http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz > > -- > Dan Smith > IBM Linux Technology Center > Open Hypervisor Team > email: danms@us.ibm.com <mailto:danms@us.ibm.com> > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com <mailto:Xen-devel@lists.xensource.com> > http://lists.xensource.com/xen-devel > > > > > > > -- > http://www.cs.cmu.edu/~ntolia <http://www.cs.cmu.edu/%7Entolia> > ------------------------------------------------------------------------ > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Andrew Warfield
2006-Jun-16 19:39 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
On 6/16/06, Anthony Liguori <aliguori@us.ibm.com> wrote:> You probably want to check out drbd. A number of people have used it > successfully to do storage migration with Xen domains.gnbd is also worth a peek... a. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Niraj Tolia
2006-Jun-16 19:41 UTC
Re: [Xen-devel] [PATCH] Add dm-userspace to the Xen kernel
On 6/16/06, Anthony Liguori <aliguori@us.ibm.com> wrote:> > You probably want to check out drbd. A number of people have used it > successfully to do storage migration with Xen domains.DRBD has a number of disadvantages. The last time I checked, it only mirrors data between two nodes. Further, it requires a complete copy of the disk to be present on both nodes. Niraj Regards,> > Anthony Liguori > > Niraj Tolia wrote: > > Hi Dan, > > > > I had a question or two about dm-userspace. It looks very interesting > > and I hope to find time sometime soon to experiment with it. My > > question was, would this allow me to export a logical disk without > > having all the data present locally. The scenario I am thinking of > > would be applicable for VM migration across WANs. > > > > In that case, if I was to represent a virtual disk as a number of > > logical blocks, I could have the userspace application page the > > logical blocks in on demand. This would be useful when a VM migrates > > to a machine it has never been to earlier. Does this make sense? Would > > it work with dm-userspace? > > > > Cheers, > > Niraj > > > > On 6/15/06, *Dan Smith* <danms@us.ibm.com <mailto:danms@us.ibm.com>> > > wrote: > > > > This patch adds dm-userspace to the -xen Linux kernel. I''d like to > > get it into the tree so that people that want to can play with it. > > Anyone wishing to to do so can download the tools separately, but > they > > need the kernel module to be able to use it. > > > > I''ve updated the code for and tested it on x86_64. > > > > The tools are available here: > > > > http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz > > <http://static.danplanet.com/dm-userspace/libdmu-0.3.tar.gz> > > http://static.danplanet.com/dm-userspace/cowd-0.3.tar.gz > > > > -- > > Dan Smith > > IBM Linux Technology Center > > Open Hypervisor Team > > email: danms@us.ibm.com <mailto:danms@us.ibm.com> > > > > > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xensource.com <mailto:Xen-devel@lists.xensource.com> > > http://lists.xensource.com/xen-devel > > > > > > > > > > > > > > -- > > http://www.cs.cmu.edu/~ntolia <http://www.cs.cmu.edu/%7Entolia> > > ------------------------------------------------------------------------ > > > > _______________________________________________ > > Xen-devel mailing list > > Xen-devel@lists.xensource.com > > http://lists.xensource.com/xen-devel > > > >-- http://www.cs.cmu.edu/~ntolia _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel