# HG changeset patch # User Ryan Grimm <grimm@us.ibm.com> # Date 1156190582 18000 # Node ID 9ebba79efbe99774c4063174ab569783017c7e78 # Parent 6a8204e4619d5bd182364b768b1ec2c8a43e8d15 dm-userspace xen kernel patch Signed-off-by: Ryan Grimm <grimm@us.ibm.com> Signed-off-by: Dan Smith <danms@us.ibm.com> diff -r 6a8204e4619d -r 9ebba79efbe9 buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Mon Aug 21 13:36:05 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Mon Aug 21 15:03:02 2006 -0500 @@ -485,6 +485,7 @@ CONFIG_MD=y CONFIG_MD=y # CONFIG_BLK_DEV_MD is not set # CONFIG_BLK_DEV_DM is not set +CONFIG_DM_USERSPACE=n # # Fusion MPT device support diff -r 6a8204e4619d -r 9ebba79efbe9 buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Mon Aug 21 13:36:05 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Mon Aug 21 15:03:02 2006 -0500 @@ -600,6 +600,7 @@ CONFIG_DM_MIRROR=y CONFIG_DM_MIRROR=y # CONFIG_DM_ZERO is not set # CONFIG_DM_MULTIPATH is not set +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r 6a8204e4619d -r 9ebba79efbe9 buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Mon Aug 21 13:36:05 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Mon Aug 21 15:03:02 2006 -0500 @@ -552,6 +552,7 @@ CONFIG_DM_MIRROR=y # CONFIG_DM_ZERO is not set CONFIG_DM_MULTIPATH=y CONFIG_DM_MULTIPATH_EMC=y +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r 6a8204e4619d -r 9ebba79efbe9 buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Mon Aug 21 13:36:05 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Mon Aug 21 15:03:02 2006 -0500 @@ -1187,6 +1187,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r 6a8204e4619d -r 9ebba79efbe9 buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Mon Aug 21 13:36:05 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Mon Aug 21 15:03:02 2006 -0500 @@ -1130,6 +1130,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r 6a8204e4619d -r 9ebba79efbe9 patches/linux-2.6.16.13/series --- a/patches/linux-2.6.16.13/series Mon Aug 21 13:36:05 2006 +0100 +++ b/patches/linux-2.6.16.13/series Mon Aug 21 15:03:02 2006 -0500 @@ -1,5 +1,6 @@ blktap-aio-16_03_06.patch blktap-aio-16_03_06.patch device_bind.patch +dmu.patch fix-hz-suspend.patch fix-ide-cd-pio-mode.patch i386-mach-io-check-nmi.patch diff -r 6a8204e4619d -r 9ebba79efbe9 patches/linux-2.6.16.13/dmu.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/dmu.patch Mon Aug 21 15:03:02 2006 -0500 @@ -0,0 +1,2591 @@ +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-user.h ./drivers/md/dm-user.h +--- ../pristine-linux-2.6.16.13/drivers/md/dm-user.h 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-user.h 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,209 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#ifndef __DM_USER_H ++#define __DM_USER_H ++ ++#include <linux/hardirq.h> ++ ++#define DMU_KEY_LEN 256 ++ ++extern struct target_type userspace_target; ++extern mempool_t *request_pool; ++extern dev_t dmu_dev; ++extern spinlock_t devices_lock; ++extern struct list_head devices; ++ ++/* ++ * A hash table of remaps ++ */ ++struct hash_table { ++ struct list_head *table; /* Array of lists (buckets) */ ++ uint64_t size; /* Number of buckets */ ++ uint32_t mask; /* Mask used to determine bucket */ ++ uint64_t count; /* Number of remaps in entire table */ ++}; ++ ++/* ++ * A block device that we can send bios to ++ */ ++struct target_device { ++ struct list_head list; /* Our place in the targets list */ ++ struct block_device *bdev; /* The target block_device */ ++ struct kref users; /* Self-destructing reference count */ ++}; ++ ++/* ++ * A dm-userspace device, which consists of multiple targets sharing a ++ * common key ++ */ ++struct dmu_device { ++ struct list_head list; /* Our place in the devices list */ ++ ++ spinlock_t lock; /* Protects all the fields below */ ++ ++ struct list_head requests; /* List of pending requests */ ++ struct list_head target_devs; /* List of devices we can target */ ++ struct hash_table remaps; /* Hash table of all our maps */ ++ ++ void *transport_private; /* Private data for userspace comms */ ++ ++ char key[DMU_KEY_LEN]; /* Unique name string for device */ ++ struct kref users; /* Self-destructing reference count */ ++ ++ wait_queue_head_t wqueue; /* To block while waiting for reqs */ ++ ++ uint64_t block_size; /* Block size for this device */ ++ uint64_t block_mask; /* Mask for offset in block */ ++ unsigned int block_shift; /* Shift to convert to/from block */ ++ ++ struct kcopyd_client *kcopy; /* Interface to kcopyd */ ++ ++ uint32_t id_counter; /* Used to generate request IDs */ ++}; ++ ++struct userspace_request { ++ struct list_head list; /* Our place on the request queue */ ++ ++ spinlock_t lock; /* Protects all the fields below */ ++ ++ struct dmu_device *dev; /* The DMU device that owns us */ ++ ++ int type; /* Type of request */ ++ int sent; /* Non-zero if we''ve been sent */ ++ uint32_t flags; /* Attribute flags */ ++ uint32_t id; /* Unique ID for sync with userspace */ ++ union { ++ uint64_t block; /* The block in question */ ++ } u; ++ atomic_t refcnt; /* Reference count */ ++ ++ struct dmu_map *remap; /* The remap we represent */ ++}; ++ ++struct dmu_map { ++ struct list_head list; /* Our place in a remap bucket chain */ ++ struct list_head mru_list; /* Our place on the MRU list */ ++ ++ spinlock_t lock; /* Protects all the fields below */ ++ ++ uint64_t org_block; /* Original block */ ++ uint64_t new_block; /* Destination block */ ++ int64_t offset; /* Sectors to offset remapped block */ ++ uint32_t flags; /* Attribute flags */ ++ uint32_t id; /* Unique ID for sync with userspace */ ++ ++ struct target_device *src; /* Source blkdev for COPY_FIRST */ ++ struct target_device *dest; /* Where the remapped block is */ ++ ++ struct bio_list bios; /* Bios queued for remapping */ ++ struct bio_list bios_waiting; /* Bios waiting for endio sync */ ++ ++ struct dmu_device *dev; /* The DMU device that owns us */ ++ struct dmu_map *next; /* Next remap that depends on us */ ++ ++ struct work_struct endio_task;/* Work to be done on bio endios */ ++}; ++ ++/* Find and grab a reference to a target device */ ++struct target_device *find_target(struct dmu_device *dev, ++ dev_t devno); ++ ++/* Object allocation, destruction, and initialization routines */ ++void init_remap(struct dmu_device *dev, struct dmu_map *remap); ++void init_request(struct dmu_device *dev, ++ int type, ++ struct userspace_request *req); ++void free_remap(struct dmu_map *remap); ++void __free_remap(struct dmu_map *remap); ++struct dmu_map *alloc_remap_atomic(struct dmu_device *dev); ++ ++/* Hash table manipulation */ ++struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block); ++void ht_insert_map(struct hash_table *ht, struct dmu_map *map); ++struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block); ++void ht_delete_map(struct hash_table *ht, struct dmu_map *map); ++ ++/* Character device transport functions */ ++int register_chardev_transport(struct dmu_device *dev); ++void unregister_chardev_transport(struct dmu_device *dev); ++int init_chardev_transport(void); ++void cleanup_chardev_transport(void); ++void write_chardev_transport_info(struct dmu_device *dev, ++ char *buf, unsigned int maxlen); ++ ++/* Return the block number for @sector */ ++static inline u64 dmu_block(struct dmu_device *dev, ++ sector_t sector) ++{ ++ return sector >> dev->block_shift; ++} ++ ++/* Return the sector offset in a block for @sector */ ++static inline u64 dmu_sector_offset(struct dmu_device *dev, ++ sector_t sector) ++{ ++ return sector & dev->block_mask; ++} ++ ++/* Return the starting sector for @block */ ++static inline u64 dmu_sector(struct dmu_device *dev, ++ uint64_t block) ++{ ++ return block << dev->block_shift; ++} ++ ++/* Add a request to a device''s request queue */ ++static void add_request(struct dmu_device *dev, ++ struct userspace_request *req) ++{ ++ spin_lock(&dev->lock); ++ list_add_tail(&req->list, &dev->requests); ++ spin_unlock(&dev->lock); ++ ++ wake_up(&dev->wqueue); ++} ++ ++/* Remap @bio based on the information in @remap */ ++static void __bio_remap(struct bio *bio, ++ struct dmu_map *remap) ++{ ++ BUG_ON(remap->dest == NULL); ++ ++ bio->bi_sector = dmu_sector(remap->dev, remap->new_block) + ++ dmu_sector_offset(remap->dev, bio->bi_sector) + ++ remap->offset; ++ ++ bio->bi_bdev = remap->dest->bdev; ++} ++ ++/* Increase the usage count for @dev */ ++static inline void get_dev(struct dmu_device *dev) ++{ ++ kref_get(&dev->users); ++} ++ ++/* Decrease the usage count for @dev */ ++void destroy_dmu_device(struct kref *ref); ++static inline void put_dev(struct dmu_device *dev) ++{ ++ kref_put(&dev->users, destroy_dmu_device); ++} ++ ++#endif +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.c ./drivers/md/dm-userspace.c +--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.c 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-userspace.c 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,1132 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * Author: Ryan Grimm <grimm@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/blkdev.h> ++#include <linux/bio.h> ++#include <linux/slab.h> ++#include <linux/spinlock.h> ++#include <linux/list.h> ++#include <linux/fs.h> ++#include <linux/cdev.h> ++#include <linux/types.h> ++#include <linux/poll.h> ++ ++#include <linux/dm-userspace.h> ++ ++#include "dm.h" ++#include "dm-bio-list.h" ++#include "kcopyd.h" ++#include "dm-user.h" ++ ++#define DMU_COPY_PAGES 256 ++#define DMU_REMAP_RESERVE 128 ++ ++#define DM_MSG_PREFIX "dm-userspace" ++ ++static kmem_cache_t *request_cache; ++static kmem_cache_t *remap_cache; ++ ++mempool_t *request_pool; ++ ++static int enable_watchdog = 0; ++static struct work_struct wd; ++ ++spinlock_t devices_lock; ++LIST_HEAD(devices); ++ ++static spinlock_t mru_list_lock; ++static LIST_HEAD(mru_list); ++ ++/* Device number for the control device */ ++dev_t dmu_dev; ++ ++static int error_bios(struct bio_list *bios) ++{ ++ struct bio *bio; ++ int count = 0; ++ ++ while ((bio = bio_list_pop(bios)) != NULL) { ++ bio_io_error(bio, bio->bi_size); ++ count++; ++ } ++ ++ if (count) ++ DMERR("*** Failed %i requests", count); ++ ++ return count; ++} ++ ++static void remap_hit(struct dmu_map *remap) ++{ ++ spin_lock(&mru_list_lock); ++ ++ list_del_init(&remap->mru_list); ++ list_add(&remap->mru_list, &mru_list); ++ ++ spin_unlock(&mru_list_lock); ++} ++ ++struct dmu_map *alloc_remap_atomic(struct dmu_device *dev) ++{ ++ struct dmu_map *remap = NULL; ++ ++ /* Try to allocate one from the cache */ ++ remap = kmem_cache_alloc(remap_cache, GFP_NOIO); ++ if (remap) { ++ INIT_LIST_HEAD(&remap->mru_list); ++ ++ spin_lock(&mru_list_lock); ++ list_add_tail(&remap->mru_list, &mru_list); ++ spin_unlock(&mru_list_lock); ++ ++ goto out; ++ } ++ ++ /* Unable to alloc one, so get the LRU item off the list */ ++ spin_lock(&mru_list_lock); ++ remap = list_entry(mru_list.prev, struct dmu_map, mru_list); ++ spin_unlock(&mru_list_lock); ++ ++ if (remap) { ++ struct dmu_device *dev = remap->dev; ++ unsigned long flags; ++ ++ DMINFO("Memory is low. Stealing the LRU remap..."); ++ ++ spin_lock(&dev->lock); ++ spin_lock_irqsave(&remap->lock, flags); ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) { ++ /* Remove it from whatever device owns it */ ++ ++ if (!list_empty(&remap->list)) ++ list_del_init(&remap->list); ++ ++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE); ++ ++ if (!dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) { ++ /* If the LRU remap is not valid, ++ we''re in trouble */ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ spin_unlock(&dev->lock); ++ printk(KERN_EMERG ++ "dm-userspace: Unable to allocate " ++ "or steal a remap!\n"); ++ goto out; ++ } ++ } ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ spin_unlock(&dev->lock); ++ ++ remap_hit(remap); ++ } else { ++ DMERR("Failed to alloc or steal a remap!"); ++ } ++ ++ out: ++ return remap; ++} ++ ++void free_remap(struct dmu_map *remap) ++{ ++ unsigned long flags; ++ ++ if (error_bios(&remap->bios)) { ++ DMERR("Freed a map with in-flight data!"); ++ BUG(); ++ } ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE); ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ spin_lock(&remap->dev->lock); ++ list_del(&remap->list); ++ spin_unlock(&remap->dev->lock); ++ ++ spin_lock(&mru_list_lock); ++ list_del_init(&remap->mru_list); ++ list_add_tail(&remap->mru_list, &mru_list); ++ spin_unlock(&mru_list_lock); ++} ++ ++void __free_remap(struct dmu_map *remap) ++{ ++ if (error_bios(&remap->bios)) { ++ DMERR("Freed a map with in-flight data!"); ++ BUG(); ++ } ++ ++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE); ++ ++ list_del(&remap->list); ++ ++ spin_lock(&mru_list_lock); ++ list_del_init(&remap->mru_list); ++ list_add_tail(&remap->mru_list, &mru_list); ++ spin_unlock(&mru_list_lock); ++} ++ ++static struct userspace_request *make_sync_req(struct dmu_device *dev, ++ struct dmu_map *remap) ++{ ++ struct userspace_request *req; ++ unsigned long flags; ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (!req) { ++ DMERR("Failed to allocate copy response"); ++ return NULL; ++ } ++ init_request(dev, DM_USERSPACE_SYNC_COMPLETE, req); ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ req->id = remap->id; ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ req->remap = remap; ++ ++ return req; ++} ++ ++static void endio_worker(void *data) ++{ ++ struct dmu_map *remap = data; ++ struct userspace_request *req = NULL; ++ ++ req = make_sync_req(remap->dev, remap); ++ ++ if (req) ++ add_request(req->dev, req); ++} ++ ++void init_remap(struct dmu_device *dev, struct dmu_map *remap) ++{ ++ spin_lock_init(&remap->lock); ++ remap->org_block = remap->new_block = 0; ++ remap->offset = 0; ++ remap->flags = 0; ++ dmu_set_flag(&remap->flags, DMU_FLAG_INUSE); ++ remap->src = remap->dest = NULL; ++ bio_list_init(&remap->bios); ++ bio_list_init(&remap->bios_waiting); ++ INIT_LIST_HEAD(&remap->list); ++ remap->dev = dev; ++ remap->next = NULL; ++ ++ INIT_WORK(&remap->endio_task, endio_worker, remap); ++} ++ ++void init_request(struct dmu_device *dev, ++ int type, ++ struct userspace_request *req) ++{ ++ spin_lock_init(&req->lock); ++ INIT_LIST_HEAD(&req->list); ++ req->dev = dev; ++ req->type = type; ++ req->sent = 0; ++ req->flags = 0; ++ if (type == DM_USERSPACE_SYNC_COMPLETE) { ++ req->u.block = 0; ++ req->id = 0; ++ } else { ++ spin_lock(&dev->lock); ++ dev->id_counter++; ++ if (dev->id_counter == 0) ++ dev->id_counter = 1; ++ req->id = dev->id_counter; ++ spin_unlock(&dev->lock); ++ } ++ atomic_set(&req->refcnt, 0); ++} ++ ++/* ++ * For an even block distribution, this is not too bad, but it could ++ * probably be better ++ */ ++static uint32_t ht_hash(struct hash_table *ht, uint64_t block) ++{ ++ return (uint32_t)block & ht->mask; ++} ++ ++static int ht_init(struct hash_table *ht, unsigned long size) ++{ ++ uint64_t i; ++ unsigned long pages; ++ unsigned int order = ffs((size * sizeof(struct list_head *)) / ++ PAGE_SIZE); ++ ++ if (order > 9) ++ return 0; ++ ++ pages = __get_free_pages(GFP_ATOMIC, order); ++ if (!pages) ++ return 0; ++ ++ ht->table = (void *)pages; ++ ht->size = size; ++ ht->count = 0; ++ ht->mask = size - 1; ++ ++ for (i = 0; i < size; i++) ++ INIT_LIST_HEAD(&ht->table[i]); ++ ++ return 1; ++} ++ ++static void ht_insert_bucket(struct dmu_map *map, struct list_head *list) ++{ ++ list_add_tail(&map->list, list); ++} ++ ++/* ++ * I''m sure this is quite dumb, but it works for now ++ */ ++static int ht_should_grow(struct hash_table *ht) ++{ ++ return ht->count > (2 * (ht->size / 4)); ++} ++ ++static void ht_grow_table(struct hash_table *ht); ++void ht_insert_map(struct hash_table *ht, struct dmu_map *map) ++{ ++ uint32_t addr; ++ ++ addr = ht_hash(ht, map->org_block) & ht->mask; ++ ++ BUG_ON(addr >= ht->size); ++ ++ ht_insert_bucket(map, &ht->table[addr]); ++ ht->count++; ++ ++ if (ht_should_grow(ht)) ++ ht_grow_table(ht); ++} ++ ++void ht_delete_map(struct hash_table *ht, struct dmu_map *map) ++{ ++ list_del_init(&map->list); ++ BUG_ON(ht->count == 0); ++ ht->count--; ++} ++ ++struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block) ++{ ++ uint32_t addr; ++ struct dmu_map *m; ++ ++ addr = ht_hash(ht, block) & ht->mask; ++ ++ BUG_ON(addr >= ht->size); ++ ++ list_for_each_entry(m, &ht->table[addr], list) { ++ if (m->org_block == block) { ++ remap_hit(m); ++ return m; ++ } ++ } ++ ++ return NULL; ++} ++ ++struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block) ++{ ++ struct dmu_map *remap; ++ ++ spin_lock(&dev->lock); ++ ++ remap = ht_find_map(&dev->remaps, block); ++ ++ spin_unlock(&dev->lock); ++ ++ return remap; ++} ++ ++static void ht_grow_table(struct hash_table *ht) ++{ ++ struct hash_table old_table; ++ uint64_t i; ++ ++ old_table = *ht; ++ ++ if (!ht_init(ht, old_table.size * 2)) ++ return; ++ ++ for (i = 0; i < old_table.size; i++ ) { ++ struct dmu_map *m, *n; ++ list_for_each_entry_safe(m, n, &old_table.table[i], ++ list) { ++ list_del_init(&m->list); ++ ht_insert_map(ht, m); ++ } ++ } ++ ++ free_pages((unsigned long)old_table.table, ++ ffs((old_table.size * sizeof(struct list_head *)) ++ / PAGE_SIZE)); ++} ++ ++static uint64_t ht_destroy_table(struct hash_table *ht) ++{ ++ uint64_t i, count = 0; ++ struct dmu_map *m, *n; ++ ++ for (i = 0; i < ht->size; i++) { ++ list_for_each_entry_safe(m, n, &ht->table[i], list) { ++ ht_delete_map(ht, m); ++ free_remap(m); ++ count++; ++ } ++ } ++ ++ free_pages((unsigned long)ht->table, ++ ffs((ht->size * sizeof(struct list_head *)) ++ / PAGE_SIZE)); ++ ++ return count; ++} ++ ++struct target_device *find_target(struct dmu_device *dev, ++ dev_t devno) ++{ ++ struct target_device *target, *match = NULL; ++ ++ spin_lock(&dev->lock); ++ list_for_each_entry(target, &dev->target_devs, list) { ++ if (target->bdev->bd_dev == devno) { ++ match = target; ++ break; ++ } ++ } ++ spin_unlock(&dev->lock); ++ ++ return match; ++} ++ ++static struct target_device *get_target(struct dmu_device *dev, ++ dev_t devno) ++{ ++ ++ struct target_device *target; ++ struct block_device *bdev; ++ ++ target = find_target(dev, devno); ++ if (target) ++ return target; ++ ++ bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE); ++ if (IS_ERR(bdev)) { ++ DMERR("Unable to lookup device %x", devno); ++ return NULL; ++ } ++ ++ target = kmalloc(sizeof(*target), GFP_KERNEL); ++ if (!target) { ++ DMERR("Unable to alloc new target device"); ++ return NULL; ++ } ++ ++ target->bdev = bdev; ++ INIT_LIST_HEAD(&target->list); ++ ++ spin_lock(&dev->lock); ++ list_add_tail(&target->list, &dev->target_devs); ++ spin_unlock(&dev->lock); ++ ++ return target; ++} ++ ++/* Caller must hold dev->lock */ ++static void put_target(struct dmu_device *dev, ++ struct target_device *target) ++{ ++ list_del(&target->list); ++ ++ bd_release(target->bdev); ++ blkdev_put(target->bdev); ++ ++ kfree(target); ++} ++ ++/* ++ * This periodically dumps out some debug information. It''s really ++ * only useful while developing. ++ */ ++static void watchdog(void *data) ++{ ++ unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0; ++ struct dmu_device *dev; ++ struct dmu_map *map; ++ struct userspace_request *req; ++ uint64_t i; ++ ++ spin_lock(&devices_lock); ++ ++ list_for_each_entry(dev, &devices, list) { ++ spin_lock(&dev->lock); ++ ++ v_remaps = i_remaps = reqs = s_reqs = 0; ++ ++ for (i = 0; i < dev->remaps.size; i++) { ++ list_for_each_entry(map, &dev->remaps.table[i], list) ++ if (dmu_get_flag(&map->flags, DMU_FLAG_VALID)) ++ v_remaps++; ++ else ++ i_remaps++; ++ } ++ ++ list_for_each_entry(req, &dev->requests, list) ++ if (req->sent) ++ s_reqs++; ++ else ++ reqs++; ++ ++ printk("Device " ++ " reqs: %u/%u " ++ " inv maps: %u " ++ " val maps: %u\n", ++ reqs, s_reqs, i_remaps, v_remaps); ++ devs++; ++ ++ spin_unlock(&dev->lock); ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ schedule_delayed_work(&wd, HZ); ++} ++ ++void destroy_dmu_device(struct kref *ref) ++{ ++ struct dmu_device *dev; ++ struct list_head *cursor, *next; ++ uint64_t remaps; ++ ++ dev = container_of(ref, struct dmu_device, users); ++ ++ spin_lock(&devices_lock); ++ list_del(&dev->list); ++ spin_unlock(&devices_lock); ++ ++ list_for_each_safe(cursor, next, &dev->target_devs) { ++ struct target_device *target; ++ ++ target = list_entry(cursor, ++ struct target_device, ++ list); ++ ++ put_target(dev, target); ++ } ++ ++ remaps = ht_destroy_table(&dev->remaps); ++ ++ list_for_each_safe(cursor, next, &dev->requests) { ++ struct userspace_request *req; ++ ++ req = list_entry(cursor, ++ struct userspace_request, ++ list); ++ ++ list_del(&req->list); ++ ++ mempool_free(req, request_pool); ++ } ++ ++ kcopyd_client_destroy(dev->kcopy); ++ unregister_chardev_transport(dev); ++ ++ kfree(dev); ++} ++ ++static int init_dmu_device(struct dmu_device *dev, u32 block_size) ++{ ++ int ret; ++ ++ init_waitqueue_head(&dev->wqueue); ++ INIT_LIST_HEAD(&dev->list); ++ INIT_LIST_HEAD(&dev->requests); ++ INIT_LIST_HEAD(&dev->target_devs); ++ kref_init(&dev->users); ++ spin_lock_init(&dev->lock); ++ ++ dev->id_counter = 1; /* reserve 0 for unsolicited maps */ ++ ++ if (!ht_init(&dev->remaps, 2048)) { ++ DMERR("Unable to allocate hash table"); ++ return 0; ++ } ++ ++ dev->block_size = block_size; ++ dev->block_mask = block_size - 1; ++ dev->block_shift = ffs(block_size) - 1; ++ ++ ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopy); ++ if (ret) { ++ DMERR("Failed to initialize kcopyd client"); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static struct dmu_device *new_dmu_device(char *key, ++ struct dm_target *ti, ++ u32 block_size) ++{ ++ struct dmu_device *dev; ++ int ret; ++ ++ dev = kmalloc(sizeof(*dev), GFP_KERNEL); ++ if (dev == NULL) { ++ DMERR("Failed to allocate new userspace device"); ++ return NULL; ++ } ++ ++ if (!init_dmu_device(dev, block_size)) ++ goto bad1; ++ ++ snprintf(dev->key, DMU_KEY_LEN, "%s", key); ++ ++ ret = register_chardev_transport(dev); ++ if (!ret) ++ goto bad2; ++ ++ spin_lock(&devices_lock); ++ list_add(&dev->list, &devices); ++ spin_unlock(&devices_lock); ++ ++ return dev; ++ ++ bad2: ++ put_dev(dev); ++ bad1: ++ kfree(dev); ++ DMERR("Failed to create device"); ++ return NULL; ++} ++ ++static struct dmu_device *find_dmu_device(const char *key) ++{ ++ struct dmu_device *dev; ++ struct dmu_device *match = NULL; ++ ++ spin_lock(&devices_lock); ++ ++ list_for_each_entry(dev, &devices, list) { ++ spin_lock(&dev->lock); ++ if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) { ++ match = dev; ++ spin_unlock(&dev->lock); ++ break; ++ } ++ spin_unlock(&dev->lock); ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ return match; ++} ++ ++static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv) ++{ ++ uint64_t block_size; ++ struct dmu_device *dev; ++ char *device_key; ++ char *block_size_param; ++ int target_idx = 2; ++ ++ if (argc < 3) { ++ ti->error = "Invalid argument count"; ++ return -EINVAL; ++ } ++ ++ device_key = argv[0]; ++ block_size_param = argv[1]; ++ ++ block_size = simple_strtoul(block_size_param, NULL, 10) / 512; ++ ++ dev = find_dmu_device(device_key); ++ if (dev == NULL) { ++ dev = new_dmu_device(device_key, ++ ti, ++ block_size); ++ if (dev == NULL) { ++ ti->error = "Failed to create device"; ++ goto bad; ++ } ++ } else { ++ get_dev(dev); ++ } ++ ++ spin_lock(&dev->lock); ++ if (dev->block_size != block_size) { ++ ti->error = "Invalid block size"; ++ goto bad; ++ } ++ spin_unlock(&dev->lock); ++ ++ /* Resolve target devices */ ++ do { ++ int maj, min; ++ sscanf(argv[target_idx], "%i:%i", &maj, &min); ++ if (!get_target(dev, MKDEV(maj, min))) { ++ DMERR("Failed to find target device %i:%i (%s)", ++ maj, min, argv[target_idx]); ++ goto out; ++ } ++ } while (++target_idx < argc); ++ ++ ti->private = dev; ++ ti->split_io = block_size; ++ ++ return 0; ++ ++ bad: ++ if (dev) { ++ spin_unlock(&dev->lock); ++ } ++ out: ++ if (dev) { ++ put_dev(dev); ++ } ++ ++ return -EINVAL; ++} ++ ++static void dmu_dtr(struct dm_target *ti) ++{ ++ struct dmu_device *dev = (struct dmu_device *) ti->private; ++ ++ put_dev(dev); ++} ++ ++/* Search @dev for an outstanding request for remapping @block */ ++static struct userspace_request *find_existing_req(struct dmu_device *dev, ++ uint64_t block) ++{ ++ struct userspace_request *req; ++ struct userspace_request *match = NULL; ++ ++ spin_lock(&dev->lock); ++ ++ list_for_each_entry_reverse(req, &dev->requests, list) { ++ if ((req->type == DM_USERSPACE_MAP_BLOCK_REQ) && ++ (req->remap->org_block == block)) { ++ match = req; ++ atomic_inc(&match->refcnt); ++ break; ++ } ++ } ++ ++ spin_unlock(&dev->lock); ++ ++ return match; ++} ++ ++static int make_new_request(struct dmu_device *dev, ++ struct bio *bio, ++ void **ctxptr) ++{ ++ struct userspace_request *req; ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (req == NULL) ++ goto bad; ++ ++ init_request(dev, DM_USERSPACE_MAP_BLOCK_REQ, req); ++ ++ dmu_set_flag(&req->flags, DMU_FLAG_RD); ++ if (bio_rw(bio)) ++ dmu_set_flag(&req->flags, DMU_FLAG_WR); ++ else ++ dmu_clr_flag(&req->flags, DMU_FLAG_WR); ++ ++ req->remap = alloc_remap_atomic(dev); ++ if (!req->remap) { ++ DMERR("Failed to alloc remap!"); ++ goto bad; ++ } ++ init_remap(dev, req->remap); ++ ++ bio_list_add(&req->remap->bios, bio); ++ req->remap->org_block = dmu_block(dev, bio->bi_sector); ++ ++ *ctxptr = req->remap; ++ ++ add_request(dev, req); ++ ++ return 0; ++ ++ bad: ++ DMERR("Failed to queue bio!"); ++ return -1; ++} ++ ++static int dmu_map_remap_case(struct dmu_device *dev, ++ struct dmu_map *remap, ++ struct bio *bio) ++{ ++ int ret = 0; ++ int rw; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ /* ++ * We''ve got it locked, so make sure the info is still valid ++ * before we use it ++ */ ++ if (!dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) { ++ ret = -1; ++ DMERR("Got an invalid remap from hashtable"); ++ goto unlock; ++ } else if (remap->org_block != dmu_block(dev, bio->bi_sector)) { ++ ret = -1; ++ DMERR("Aiee, org block changed underneath us!"); ++ goto unlock; ++ } ++ ++ rw = dmu_get_flag(&remap->flags, DMU_FLAG_WR); ++ ++ ++ if (rw || (bio_rw(bio) == rw)) { ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) { ++ __bio_remap(bio, remap); ++ ret = 1; ++ } else { ++ bio_list_add(&remap->bios, bio); ++ } ++ } else { ++ ret = -1; ++// printk("Remap doesn''t match perms: %llu (%c!=%c)\n", ++// remap->org_block, ++// rw ? ''W'':''R'', ++// bio_rw(bio) ? ''W'':''R''); ++ } ++ ++ unlock: ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ return ret; ++} ++ ++static int dmu_map_request_case(struct dmu_device *dev, ++ struct userspace_request *req, ++ struct bio *bio) ++{ ++ int ret = 0; ++ int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR); ++ unsigned long flags; ++ ++ spin_lock(&req->lock); ++ spin_lock_irqsave(&req->remap->lock, flags); ++ ++ if (!req_rw && bio_rw(bio) && !req->sent) { ++ /* Convert to R/W and Queue */ ++ dmu_set_flag(&req->flags, DMU_FLAG_WR); ++ bio_list_add(&req->remap->bios, bio); ++ } else if (!req_rw && bio_rw(bio) && req->sent) { ++ /* Can''t convert, must re-request */ ++ ret = -1; ++ } else { ++ /* Queue */ ++ bio_list_add(&req->remap->bios, bio); ++ } ++ ++ spin_unlock_irqrestore(&req->remap->lock, flags); ++ spin_unlock(&req->lock); ++ ++ return ret; ++} ++ ++DECLARE_MUTEX(map_mutex); ++ ++static int dmu_map(struct dm_target *ti, struct bio *bio, ++ union map_info *map_context) ++{ ++ struct dmu_device *dev = (struct dmu_device *) ti->private; ++ struct dmu_map *remap; ++ struct userspace_request *req; ++ int ret = 0; ++ u64 block; ++ ++ down(&map_mutex); ++ ++ map_context->ptr = NULL; ++ ++ block = dmu_block(dev, bio->bi_sector); ++ ++ remap = ht_find_map_dev(dev, block); ++ if (remap) { ++ ret = dmu_map_remap_case(dev, remap, bio); ++ if (ret >= 0) { ++ map_context->ptr = remap; ++ goto done; ++ } ++ ++ } ++ ++ req = find_existing_req(dev, block); ++ if (req) { ++ ret = dmu_map_request_case(dev, req, bio); ++ atomic_dec(&req->refcnt); ++ if (ret >= 0) { ++ map_context->ptr = req->remap; ++ goto done; ++ } ++ } ++ ++ ret = make_new_request(dev, bio, &map_context->ptr); ++ ++ done: ++ up(&map_mutex); ++ ++ return ret; ++} ++ ++static int dmu_status(struct dm_target *ti, status_type_t type, ++ char *result, unsigned int maxlen) ++{ ++ struct dmu_device *dev = (struct dmu_device *) ti->private; ++ ++ switch (type) { ++ case STATUSTYPE_INFO: ++ write_chardev_transport_info(dev, result, maxlen); ++ break; ++ ++ case STATUSTYPE_TABLE: ++ snprintf(result, maxlen, "%s %llu", ++ dev->key, ++ dev->block_size * 512); ++ break; ++ } ++ ++ return 0; ++} ++ ++static int __handle_bio_endio(struct dmu_map *remap, ++ struct bio *bio, ++ struct userspace_request **req) ++{ ++ int ret = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING) && ++ remap->bios_waiting.head == NULL) { ++ /* First endio and waiting for resp from userspace */ ++ bio_list_add(&remap->bios_waiting, bio); ++ ++ /* Schedule request worker */ ++ INIT_WORK(&remap->endio_task, endio_worker, remap); ++ schedule_work(&remap->endio_task); ++ ++ ret = 1; ++ } else if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING)) { ++ /* Still waiting for resp from userspace */ ++ bio_list_add(&remap->bios_waiting, bio); ++ ret = 1; ++ } else if (remap->bios_waiting.head != NULL) { ++ /* Got resp from userspace but bios waiting list nonempty */ ++ if (bio == remap->bios_waiting.head) { ++ bio_list_pop(&remap->bios_waiting); ++ ret = 0; ++ } else { ++ bio_list_add(&remap->bios_waiting, bio); ++ ret = 1; ++ } ++ } ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ return ret; ++} ++ ++static int dmu_end_io(struct dm_target *ti, struct bio *bio, ++ int error, union map_info *map_context) ++{ ++ struct dmu_map *remap; ++ struct userspace_request *req = NULL; ++ int ret = 0; ++ ++ remap = map_context->ptr; ++ ++ if (error) { ++ DMERR("Error in dmu_end_io"); ++ return -1; ++ } else if (!remap) { ++ return 0; ++ } ++ ++ ret = __handle_bio_endio(remap, bio, &req); ++ ++ return ret; ++} ++ ++struct target_type userspace_target = { ++ .name = "userspace", ++ .version = {0, 1, 0}, ++ .module = THIS_MODULE, ++ .ctr = dmu_ctr, ++ .dtr = dmu_dtr, ++ .map = dmu_map, ++ .status = dmu_status, ++ .end_io = dmu_end_io ++}; ++ ++static int destroy_mru_list(void) ++{ ++ struct dmu_map *map, *next; ++ int count = 0; ++ ++ spin_lock(&mru_list_lock); ++ ++ list_for_each_entry_safe(map, next, &mru_list, mru_list) { ++ list_del(&map->mru_list); ++ kmem_cache_free(remap_cache, map); ++ count++; ++ } ++ ++ spin_unlock(&mru_list_lock); ++ ++ return count; ++} ++ ++int __init dm_userspace_init(void) ++{ ++ int i; ++ int r = dm_register_target(&userspace_target); ++ if (r < 0) { ++ DMERR("Register failed %d", r); ++ return 0; ++ } ++ ++ spin_lock_init(&devices_lock); ++ spin_lock_init(&mru_list_lock); ++ ++ if (enable_watchdog) { ++ INIT_WORK(&wd, watchdog, NULL); ++ schedule_delayed_work(&wd, HZ); ++ } ++ ++ request_cache ++ kmem_cache_create("dm-userspace-requests", ++ sizeof(struct userspace_request), ++ __alignof__ (struct userspace_request), ++ 0, NULL, NULL); ++ if (!request_cache) { ++ DMERR("Failed to allocate request cache"); ++ goto bad; ++ } ++ ++ remap_cache ++ kmem_cache_create("dm-userspace-remaps", ++ sizeof(struct dmu_map), ++ __alignof__ (struct dmu_map), ++ 0, NULL, NULL); ++ if (!remap_cache) { ++ DMERR("Failed to allocate remap cache"); ++ goto bad2; ++ } ++ ++ request_pool = mempool_create(64, ++ mempool_alloc_slab, mempool_free_slab, ++ request_cache); ++ if (!request_pool) { ++ DMERR("Failed to allocate request pool"); ++ goto bad3; ++ } ++ ++ r = init_chardev_transport(); ++ if (!r) ++ goto bad4; ++ ++ for (i = 0; i < DMU_REMAP_RESERVE; i++) { ++ struct dmu_map *remap; ++ ++ remap = alloc_remap_atomic(NULL); ++ if (!remap) { ++ DMERR("Failed to allocate %i/%i reserve remap", ++ i, DMU_REMAP_RESERVE); ++ goto bad5; ++ } ++ init_remap(NULL, remap); ++ remap_hit(remap); ++ } ++ ++ return 1; ++ ++ bad5: ++ destroy_mru_list(); ++ bad4: ++ mempool_destroy(request_pool); ++ bad3: ++ kmem_cache_destroy(remap_cache); ++ bad2: ++ kmem_cache_destroy(request_cache); ++ bad: ++ dm_unregister_target(&userspace_target); ++ ++ return 0; ++} ++ ++void __exit dm_userspace_exit(void) ++{ ++ int r; ++ struct list_head *cursor, *next; ++ struct dmu_device *dev; ++ ++ if (enable_watchdog) ++ if (!cancel_delayed_work(&wd)) ++ flush_scheduled_work(); ++ ++ spin_lock(&devices_lock); ++ ++ list_for_each_safe(cursor, next, &devices) { ++ dev = list_entry(cursor, struct dmu_device, list); ++ list_del(cursor); ++ destroy_dmu_device(&dev->users); ++ DMERR("Destroying hanging device %s", dev->key); ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ cleanup_chardev_transport(); ++ ++ r = destroy_mru_list(); ++ ++ mempool_destroy(request_pool); ++ kmem_cache_destroy(request_cache); ++ kmem_cache_destroy(remap_cache); ++ ++ r = dm_unregister_target(&userspace_target); ++ if (r < 0) ++ DMERR("unregister failed %d", r); ++} ++ ++module_init(dm_userspace_init); ++module_exit(dm_userspace_exit); ++ ++module_param(enable_watchdog, int, S_IRUGO); ++ ++MODULE_DESCRIPTION(DM_NAME " userspace target"); ++MODULE_AUTHOR("Dan Smith"); ++MODULE_LICENSE("GPL"); +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace-chardev.c ./drivers/md/dm-userspace-chardev.c +--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace-chardev.c 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-userspace-chardev.c 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,900 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#include <linux/spinlock.h> ++#include <linux/blkdev.h> ++#include <linux/mempool.h> ++#include <linux/dm-userspace.h> ++#include <linux/list.h> ++#include <linux/sched.h> ++#include <linux/wait.h> ++#include <linux/poll.h> ++#include <linux/fs.h> ++#include <linux/cdev.h> ++#include <asm/uaccess.h> ++ ++#include "dm.h" ++#include "dm-bio-list.h" ++#include "kcopyd.h" ++#include "dm-user.h" ++ ++#define DM_MSG_PREFIX "dm-userspace" ++ ++/* This allows for a cleaner separation between the dm-userspace ++ * device-mapper target, and the userspace transport used. Right now, ++ * only a chardev transport exists, but it''s possible that there could ++ * be more in the future ++ */ ++struct chardev_transport { ++ struct cdev cdev; ++ dev_t ctl_dev; ++ struct dmu_device *parent; ++}; ++ ++static void remap_flusher(struct dmu_map *remap); ++ ++static int have_pending_requests(struct dmu_device *dev) ++{ ++ struct userspace_request *req; ++ int ret = 0; ++ ++ /* FIXME: We could keep a count of how many waiting reqs ++ * there are, eliminating the need to count, and possibly the ++ * need to lock ++ */ ++ ++ spin_lock(&dev->lock); ++ ++ list_for_each_entry(req, &dev->requests, list) { ++ if (!req->sent) { ++ ret = 1; ++ break; ++ } ++ } ++ ++ spin_unlock(&dev->lock); ++ ++ return ret; ++} ++ ++static void copy_callback(int read_err, ++ unsigned int write_err, ++ void *data) ++{ ++ remap_flusher((struct dmu_map *)data); ++} ++ ++static void copy_block(struct dmu_map *remap) ++{ ++ struct io_region src, dst; ++ struct kcopyd_client *client; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ src.bdev = remap->src->bdev; ++ src.sector = remap->org_block << remap->dev->block_shift; ++ src.count = remap->dev->block_size; ++ ++ dst.bdev = remap->dest->bdev; ++ dst.sector = (remap->new_block << remap->dev->block_shift); ++ dst.sector += remap->offset; ++ dst.count = remap->dev->block_size; ++ ++ client = remap->dev->kcopy; ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap); ++} ++ ++static void copy_or_flush(struct dmu_map *remap) ++{ ++ int copy; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ copy = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST); ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ if (copy) ++ copy_block(remap); ++ else ++ remap_flusher(remap); ++} ++ ++static struct bio *pop_and_remap(struct dmu_map *remap) ++{ ++ struct bio *bio = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ bio = bio_list_pop(&remap->bios); ++ if (bio) ++ __bio_remap(bio, remap); ++ else { ++ /* If there are no more bios, we must set the VALID ++ * flag before we release the lock ++ */ ++ dmu_set_flag(&remap->flags, DMU_FLAG_VALID); ++ } ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ return bio; ++} ++ ++static void get_remap_attrs(struct dmu_map *remap, ++ int *temporary, ++ struct dmu_map **next) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ *temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY); ++ *next = remap->next; ++ remap->next = NULL; ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++} ++ ++static void remap_flusher(struct dmu_map *remap) ++{ ++ struct bio *bio; ++ int temporary = 0; ++ struct dmu_map *next; ++ ++ while (1) { ++ ++ bio = pop_and_remap(remap); ++ ++ if (bio) ++ generic_make_request(bio); ++ else ++ break; ++ } ++ ++ get_remap_attrs(remap, &temporary, &next); ++ ++ if (next) ++ copy_or_flush(next); ++ ++ if (temporary) { ++ free_remap(remap); ++ } ++} ++ ++static int send_userspace_message(uint8_t __user *buffer, ++ struct userspace_request *req) ++{ ++ int ret = 0; ++ struct dmu_msg_header hdr; ++ union { ++ struct dmu_msg_map_request map_req; ++ struct dmu_msg_status status_req; ++ struct dmu_msg_version ver_req; ++ } msgs; ++ ++ memset(&msgs, 0, sizeof(msgs)); ++ spin_lock(&req->lock); ++ ++ hdr.id = req->id; ++ ++ switch (req->type) { ++ case DM_USERSPACE_GET_VERSION: ++ hdr.msg_type = req->type; ++ hdr.payload_len = sizeof(msgs.ver_req); ++ msgs.ver_req.kernel_ver ++ userspace_target.version[0] << 16 | ++ userspace_target.version[1] << 8 | ++ userspace_target.version[2]; ++ ++ break; ++ ++ case DM_USERSPACE_MAP_BLOCK_REQ: ++ hdr.msg_type = req->type; ++ hdr.payload_len = sizeof(msgs.map_req); ++ msgs.map_req.org_block ++ dmu_block(req->dev, req->remap->bios.head->bi_sector); ++ dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_RD); ++ dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_WR); ++ ++ break; ++ ++ case DM_USERSPACE_SYNC_COMPLETE: ++ case DM_USERSPACE_INVAL_COMPLETE: ++ case DM_USERSPACE_INVAL_FAILED: ++ hdr.msg_type = DM_USERSPACE_STATUS; ++ hdr.payload_len = sizeof(msgs.status_req); ++ msgs.status_req.status = req->type; ++ msgs.status_req.id_of_op = req->id; ++ ++ break; ++ ++ default: ++ DMWARN("Unknown message type %i", req->type); ++ ret = 0; ++ } ++ ++ spin_unlock(&req->lock); ++ ++ if (copy_to_user(buffer, &hdr, sizeof(hdr))) ++ return -EFAULT; ++ if (copy_to_user(buffer + sizeof(hdr), &msgs, hdr.payload_len)) ++ return -EFAULT; ++ ++ ret = sizeof(hdr) + hdr.payload_len; ++ ++ if ((req->type != DM_USERSPACE_MAP_BLOCK_REQ) && ++ (req->type != DM_USERSPACE_SYNC_COMPLETE)) { ++ /* Only some requests get responses, so we take others ++ * off the request queue here ++ */ ++ spin_lock(&req->dev->lock); ++ list_del(&req->list); ++ spin_unlock(&req->dev->lock); ++ mempool_free(req, request_pool); ++ } ++ ++ return ret; ++} ++ ++struct userspace_request *pluck_next_request(struct dmu_device *dev, ++ int size_available) ++{ ++ struct userspace_request *req, *match = NULL; ++ ++ spin_lock(&dev->lock); ++ ++ list_for_each_entry(req, &dev->requests, list) { ++ spin_lock(&req->lock); ++ if (!req->sent) { ++ if (dmu_get_msg_len(req->type) < size_available) { ++ req->sent = 1; ++ match = req; ++ } else { ++ /* Must break here to preserve order */ ++ spin_unlock(&req->lock); ++ break; ++ } ++ } ++ spin_unlock(&req->lock); ++ ++ if (match) ++ break; ++ } ++ ++ spin_unlock(&dev->lock); ++ ++ return match; ++} ++ ++ssize_t dmu_ctl_read(struct file *file, char __user *buffer, ++ size_t size, loff_t *offset) ++{ ++ ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; ++ struct userspace_request *req = NULL; ++ int ret = 0, r; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ while (!have_pending_requests(dev)) { ++ if (file->f_flags & O_NONBLOCK) { ++ return 0; ++ } ++ ++ if (wait_event_interruptible(dev->wqueue, ++ have_pending_requests(dev))) ++ return -ERESTARTSYS; ++ } ++ ++ while(ret < size) { ++ req = pluck_next_request(dev, size - ret); ++ if (!req) ++ /* One or more of the following conditions is true: ++ * 1. No more requests available for sending ++ * 2. No more room in the outgoing buffer ++ */ ++ break; ++ ++ r = send_userspace_message((void *)(buffer + ret), req); ++ if (r == 0) ++ continue; ++ else if (r < 0) ++ return r; ++ ++ ret += r; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Returns: ++ * 1 if we''re chained to our parent ++ * 0 if parent is valid and was removed ++ * -1 if we gave our bios to the invalid parent ++ */ ++static int handle_parent_remap(struct dmu_map *parent, ++ struct dmu_map *remap, ++ struct dmu_msg_map_response *msg) ++{ ++ int ret = 0; ++ int free_parent = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&parent->lock, flags); ++ ++ if (!dmu_get_flag(&parent->flags, DMU_FLAG_INUSE)) { ++ /* This is in the process of being destroyed, ++ * so we can''t use it ++ */ ++ goto end_parent; ++ } ++ ++ if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) { ++ if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) =++ dmu_get_flag(&msg->flags, DMU_FLAG_WR) && ++ (parent->new_block == msg->new_block)) { ++ /* Perms match for this not-yet-valid remap, ++ so tag our bios on to it and bail */ ++ bio_list_merge(&parent->bios, &remap->bios); ++ bio_list_init(&remap->bios); ++ ret = -1; ++ } else { ++ /* Remove parent from remap table, and ++ * chain our new remap to this one so ++ * it will fire when parent goes ++ * valid ++ */ ++ list_del_init(&parent->list); ++ if (parent->next) { ++ DMERR("Parent already chained!"); ++ BUG(); ++ } ++ parent->next = remap; ++ dmu_set_flag(&parent->flags, DMU_FLAG_TEMPORARY); ++ ret = 1; ++ } ++ } else { ++ /* Remove existing valid remap */ ++ free_parent = 1; ++ } ++ ++ end_parent: ++ if (free_parent) ++ __free_remap(parent); ++ ++ spin_unlock_irqrestore(&parent->lock, flags); ++ ++ return ret; ++} ++ ++static int remap_request(struct dmu_msg_map_response *msg, ++ struct dmu_device *dev, uint32_t id) ++{ ++ struct dmu_map *remap = NULL, *parent = NULL; ++ struct target_device *s_dev = NULL, *d_dev = NULL; ++ int is_chained = 0; ++ struct userspace_request *cursor, *next, *req = NULL; ++ ++ /* See if we have a pending request that matches */ ++ spin_lock(&dev->lock); ++ list_for_each_entry_safe(cursor, next, &dev->requests, list) { ++ if ((cursor->type == DM_USERSPACE_MAP_BLOCK_REQ) && ++ (cursor->id == msg->id_of_req)) { ++ req = cursor; ++ list_del(&req->list); ++ break; ++ } ++ } ++ spin_unlock(&dev->lock); ++ ++ if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) { ++ s_dev = find_target(dev, MKDEV(msg->src_maj, msg->src_min)); ++ if (!s_dev) { ++ DMERR("Failed to find src device %i:%i", ++ msg->src_maj, msg->src_min); ++ goto bad; ++ } ++ } ++ ++ d_dev = find_target(dev, MKDEV(msg->dst_maj, msg->dst_min)); ++ if (!d_dev) { ++ DMERR("Failed to find dest device %i:%i", ++ msg->dst_maj, msg->dst_min); ++ goto bad; ++ } ++ ++ if (req) { ++ while (atomic_read(&req->refcnt) != 0) ++ /* Wait for exclusive use of request. Even ++ * though we have removed it from the list, ++ * someone still has a pointer to it, which ++ * means we must wait for them to finish with ++ * it before continuing. ++ */ ++ schedule(); ++ remap = req->remap; ++ mempool_free(req, request_pool); ++ } else { ++ /* Allocate a new remap early (before grabbing locks), ++ * since we will most likely need it, and we didn''t ++ * get one with the request ++ */ ++ /* FIXME */ ++ remap = alloc_remap_atomic(dev); ++ if (!remap) { ++ DMERR("Failed to alloc remap!"); ++ goto bad; ++ } ++ init_remap(dev, remap); ++ } ++ ++ spin_lock(&dev->lock); ++ ++ /* FIXME: Now that we pass the remap with the req, do we need ++ IRQs disabled here? */ ++ spin_lock(&remap->lock); ++ remap->org_block = msg->org_block; ++ ++ /* Now, we insert the new remap into the table, and remove the ++ * existing map, if present, all while the device is locked ++ */ ++ ++ parent = ht_find_map(&dev->remaps, msg->org_block); ++ if (parent) { ++ is_chained = handle_parent_remap(parent, remap, msg); ++ if (is_chained < 0) { ++ __free_remap(remap); ++ spin_unlock(&remap->lock); ++ spin_unlock(&dev->lock); ++ return 1; ++ } ++ } ++ ++ if (dmu_get_flag(&msg->flags, DMU_FLAG_SYNC)) ++ dmu_set_flag(&remap->flags, DMU_FLAG_WAITING); ++ ++ remap->new_block = msg->new_block; ++ remap->offset = msg->offset; ++ remap->src = s_dev; ++ remap->dest = d_dev; ++ remap->dev = dev; ++ remap->id = id; ++ ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_SYNC); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD); ++ dmu_clr_flag(&remap->flags, DMU_FLAG_VALID); ++ ++ spin_unlock(&remap->lock); ++ ++ ht_insert_map(&dev->remaps, remap); ++ ++ spin_unlock(&dev->lock); ++ ++ if (! is_chained) ++ copy_or_flush(remap); ++ ++ return 1; ++ ++ bad: ++ DMERR("Remap error: chaos may ensue"); ++ ++ return 0; ++} ++ ++/* ++ * Adds the request to the front of the queue so it''s picked up first ++ */ ++static void add_urgent_request(struct dmu_device *dev, ++ struct userspace_request *req) ++{ ++ spin_lock(&dev->lock); ++ list_add(&req->list, &dev->requests); ++ spin_unlock(&dev->lock); ++ ++ wake_up(&dev->wqueue); ++} ++ ++static int version_request(struct dmu_msg_version *msg, ++ struct dmu_device *dev, uint32_t id) ++{ ++ struct userspace_request *req; ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (!req) { ++ DMERR("Failed to alloc version response"); ++ return 0; ++ } ++ ++ init_request(dev, DM_USERSPACE_GET_VERSION, req); ++ add_urgent_request(dev, req); ++ ++ return 1; ++} ++ ++static int invalidate_request(struct dmu_msg_invalidate_map *msg, ++ struct dmu_device *dev, uint32_t id) ++{ ++ struct dmu_map *remap; ++ struct userspace_request *req; ++ int ret = 1; ++ unsigned long flags; ++ ++ remap = ht_find_map_dev(dev, msg->org_block); ++ if (!remap) ++ ret = 0; ++ else { ++ spin_lock(&dev->lock); ++ spin_lock_irqsave(&remap->lock, flags); ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) ++ ht_delete_map(&dev->remaps, remap); ++ else ++ ret = 0; ++ spin_unlock_irqrestore(&remap->lock, flags); ++ spin_unlock(&dev->lock); ++ } ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (!req) { ++ DMERR("Failed to allocate request"); ++ return 0; ++ } ++ ++ if (ret) ++ init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req); ++ else ++ init_request(dev, DM_USERSPACE_INVAL_FAILED, req); ++ ++ req->u.block = msg->org_block; ++ req->id = id; ++ ++ add_request(dev, req); ++ ++ return ret; ++} ++ ++static void sync_complete(struct dmu_device *dev, uint32_t id_of_op) { ++ struct dmu_map *remap = NULL; ++ struct bio *bio; ++ struct userspace_request *req, *next; ++ unsigned long flags; ++ ++ spin_lock(&dev->lock); ++ list_for_each_entry_safe(req, next, &dev->requests, list) { ++ if (req->id == id_of_op) { ++ list_del(&req->list); ++ break; ++ } ++ } ++ spin_unlock(&dev->lock); ++ ++ if (!req) { ++ DMERR("Unable to complete unknown request: %u\n", ++ id_of_op); ++ return; ++ } ++ ++ while (atomic_read(&req->refcnt) != 0) ++ /* Wait for exclusive use of request. Even ++ * though we have removed it from the list, ++ * someone still has a pointer to it, which ++ * means we must wait for them to finish with ++ * it before continuing. ++ */ ++ schedule(); ++ ++ remap = req->remap; ++ mempool_free(req, request_pool); ++ ++ if (remap) { ++ spin_lock_irqsave(&remap->lock, flags); ++ dmu_clr_flag(&remap->flags, DMU_FLAG_WAITING); ++ spin_unlock_irqrestore(&remap->lock, flags); ++ while(1) { ++ spin_lock_irqsave(&remap->lock, flags); ++ bio = remap->bios_waiting.head; ++ spin_unlock_irqrestore(&remap->lock, flags); ++ if (!bio) ++ break; ++ bio->bi_end_io(bio, 0, 0); ++ } ++ } else { ++ DMERR("Unable to complete empty request: %u\n", ++ id_of_op); ++ } ++} ++ ++ssize_t dmu_ctl_write(struct file *file, const char __user *buffer, ++ size_t size, loff_t *offset) ++{ ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; ++ int ret = 0; ++ struct dmu_msg_header hdr; ++ union { ++ struct dmu_msg_map_response map_rsp; ++ struct dmu_msg_invalidate_map inval_rsp; ++ struct dmu_msg_version ver_req; ++ struct dmu_msg_status status_rsp; ++ } msgs; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ while ((ret + sizeof(hdr)) < size) { ++ if (copy_from_user(&hdr, buffer+ret, sizeof(hdr))) { ++ DMERR("%s copy_from_user failed!", __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ ret += sizeof(hdr); ++ ++ switch (hdr.msg_type) { ++ ++ case DM_USERSPACE_GET_VERSION: ++ if (hdr.payload_len != sizeof(msgs.ver_req)) { ++ DMERR("Malformed version request"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.ver_req, buffer+ret, ++ sizeof(msgs.ver_req))) { ++ DMERR("%s copy_from_user failed!", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ version_request(&msgs.ver_req, dev, hdr.id); ++ break; ++ ++ case DM_USERSPACE_MAP_BLOCK_RESP: ++ if (hdr.payload_len != sizeof(msgs.map_rsp)) { ++ DMERR("Malformed block response"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.map_rsp, buffer+ret, ++ sizeof(msgs.map_rsp))) { ++ DMERR("%s copy_from_user failed!", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ remap_request(&msgs.map_rsp, dev, ++ msgs.map_rsp.id_of_req); ++ break; ++ ++ case DM_USERSPACE_MAP_FAILED: ++ if (hdr.payload_len != sizeof(msgs.map_rsp)) { ++ DMERR("Malformed block failed response"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.map_rsp, buffer+ret, ++ sizeof(msgs.map_rsp))) { ++ DMERR("%s copy_from_user failed", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ DMERR("Userspace map failed"); ++ break; ++ ++ case DM_USERSPACE_MAP_INVALIDATE: ++ if (hdr.payload_len != sizeof(msgs.inval_rsp)) { ++ DMERR("Malformed invalidate request"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.inval_rsp, buffer+ret, ++ sizeof(msgs.inval_rsp))) { ++ DMERR("%s copy_from_user failed", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ invalidate_request(&msgs.inval_rsp, dev, hdr.id); ++ break; ++ ++ case DM_USERSPACE_STATUS: ++ if (hdr.payload_len != sizeof(msgs.status_rsp)) { ++ DMERR("Malformed invalidate request"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.status_rsp, buffer+ret, ++ sizeof(msgs.status_rsp))) { ++ DMERR("%s copy_from_user failed", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ if (msgs.status_rsp.status =++ DM_USERSPACE_SYNC_COMPLETE) { ++ /* FIXME: check req */ ++ sync_complete(dev, msgs.status_rsp.id_of_op); ++ } ++ break; ++ ++ default: ++ DMWARN("Unknown request type: %i", hdr.msg_type); ++ } ++ ++ ret += hdr.payload_len; ++ } ++ out: ++ return ret; ++} ++ ++int dmu_ctl_open(struct inode *inode, struct file *file) ++{ ++ struct chardev_transport *t; ++ struct dmu_device *dev; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ t = container_of(inode->i_cdev, struct chardev_transport, cdev); ++ dev = t->parent; ++ ++ get_dev(dev); ++ ++ file->private_data = dev; ++ ++ return 0; ++} ++ ++int dmu_ctl_release(struct inode *inode, struct file *file) ++{ ++ struct dmu_device *dev; ++ ++ dev = (struct dmu_device *)file->private_data; ++ ++ put_dev(dev); ++ ++ return 0; ++} ++ ++unsigned dmu_ctl_poll(struct file *file, poll_table *wait) ++{ ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; ++ unsigned mask = 0; ++ ++ poll_wait(file, &dev->wqueue, wait); ++ ++ if (have_pending_requests(dev)) ++ mask |= POLLIN | POLLRDNORM; ++ ++ return mask; ++} ++ ++static struct file_operations ctl_fops = { ++ .open = dmu_ctl_open, ++ .release = dmu_ctl_release, ++ .read = dmu_ctl_read, ++ .write = dmu_ctl_write, ++ .poll = dmu_ctl_poll, ++ .owner = THIS_MODULE, ++}; ++ ++static int get_free_minor(void) ++{ ++ struct dmu_device *dev; ++ int minor = 0; ++ ++ spin_lock(&devices_lock); ++ ++ while (1) { ++ list_for_each_entry(dev, &devices, list) { ++ struct chardev_transport *t = dev->transport_private; ++ if (MINOR(t->ctl_dev) == minor) ++ goto dupe; ++ } ++ break; ++ dupe: ++ minor++; ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ return minor; ++} ++ ++int register_chardev_transport(struct dmu_device *dev) ++{ ++ struct chardev_transport *t; ++ int ret; ++ ++ dev->transport_private = kmalloc(sizeof(struct chardev_transport), ++ GFP_KERNEL); ++ t = dev->transport_private; ++ ++ if (!t) { ++ DMERR("Failed to allocate chardev transport"); ++ goto bad; ++ } ++ ++ t->ctl_dev = MKDEV(MAJOR(dmu_dev), get_free_minor()); ++ t->parent = dev; ++ ++ cdev_init(&t->cdev, &ctl_fops); ++ t->cdev.owner = THIS_MODULE; ++ t->cdev.ops = &ctl_fops; ++ ++ ret = cdev_add(&t->cdev, t->ctl_dev, 1); ++ if (ret < 0) { ++ DMERR("Failed to register control device %d:%d", ++ MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); ++ goto bad; ++ } ++ ++ return 1; ++ ++ bad: ++ kfree(t); ++ return 0; ++} ++ ++void unregister_chardev_transport(struct dmu_device *dev) ++{ ++ struct chardev_transport *t = dev->transport_private; ++ ++ cdev_del(&t->cdev); ++ kfree(t); ++} ++ ++int init_chardev_transport(void) ++{ ++ int r; ++ ++ r = alloc_chrdev_region(&dmu_dev, 0, 10, "dm-userspace"); ++ if (r) { ++ DMERR("Failed to allocate chardev region"); ++ return 0; ++ } else ++ return 1; ++} ++ ++void cleanup_chardev_transport(void) ++{ ++ unregister_chrdev_region(dmu_dev, 10); ++} ++ ++void write_chardev_transport_info(struct dmu_device *dev, ++ char *buf, unsigned int maxlen) ++{ ++ struct chardev_transport *t = dev->transport_private; ++ ++ snprintf(buf, maxlen, "%x:%x", ++ MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); ++} +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.h ./drivers/md/dm-userspace.h +--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.h 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-userspace.h 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#ifndef __DM_USERSPACE_H ++#define __DM_USERSPACE_H ++ ++#include <linux/types.h> ++ ++/* ++ * Message Types ++ */ ++#define DM_USERSPACE_GET_VERSION 1 ++#define DM_USERSPACE_MAP_BLOCK_REQ 2 ++#define DM_USERSPACE_MAP_BLOCK_RESP 3 ++#define DM_USERSPACE_MAP_FAILED 4 ++#define DM_USERSPACE_MAP_INVALIDATE 5 ++#define DM_USERSPACE_STATUS 6 ++ ++/* ++ * Status codes ++ */ ++#define DM_USERSPACE_INVAL_COMPLETE 101 ++#define DM_USERSPACE_INVAL_FAILED 102 ++#define DM_USERSPACE_SYNC_COMPLETE 103 ++ ++/* ++ * Flags and associated macros ++ */ ++#define DMU_FLAG_VALID 1 ++#define DMU_FLAG_RD 2 ++#define DMU_FLAG_WR 4 ++#define DMU_FLAG_COPY_FIRST 8 ++#define DMU_FLAG_TEMPORARY 16 ++#define DMU_FLAG_INUSE 32 ++#define DMU_FLAG_SYNC 64 ++#define DMU_FLAG_WAITING 128 ++ ++static int dmu_get_flag(uint32_t *flags, uint32_t flag) ++{ ++ return (*flags & flag) != 0; ++} ++ ++static void dmu_set_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags |= flag; ++} ++ ++static void dmu_clr_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags &= (~flag); ++} ++ ++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) ++{ ++ *flags = (*flags & ~flag) | (src & flag); ++} ++ ++/* ++ * This message header is sent in front of every message, in both ++ * directions ++ */ ++struct dmu_msg_header { ++ uint32_t msg_type; ++ uint32_t payload_len; ++ uint32_t id; ++}; ++ ++/* DM_USERSPACE_GET_VERSION */ ++struct dmu_msg_version { ++ uint32_t userspace_ver; ++ uint32_t kernel_ver; ++}; ++ ++/* For status codes */ ++struct dmu_msg_status { ++ uint32_t id_of_op; ++ uint32_t status; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_REQ */ ++struct dmu_msg_map_request { ++ uint64_t org_block; ++ ++ uint32_t flags; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_RESP ++ * DM_USERSPACE_MAP_BLOCK_FAILED ++ */ ++struct dmu_msg_map_response { ++ uint64_t org_block; ++ uint64_t new_block; ++ int64_t offset; ++ ++ uint32_t id_of_req; ++ uint32_t flags; ++ ++ uint32_t src_maj; ++ uint32_t src_min; ++ ++ uint32_t dst_maj; ++ uint32_t dst_min; ++}; ++ ++/* DM_USERSPACE_MAP_INVALIDATE */ ++struct dmu_msg_invalidate_map { ++ uint64_t org_block; ++}; ++ ++static inline int dmu_get_msg_len(int type) ++{ ++ switch (type) { ++ case DM_USERSPACE_GET_VERSION: ++ return sizeof(struct dmu_msg_version); ++ case DM_USERSPACE_INVAL_COMPLETE: ++ case DM_USERSPACE_INVAL_FAILED: ++ case DM_USERSPACE_STATUS: ++ return sizeof(struct dmu_msg_status); ++ case DM_USERSPACE_MAP_BLOCK_REQ: ++ return sizeof(struct dmu_msg_map_request); ++ case DM_USERSPACE_MAP_BLOCK_RESP: ++ case DM_USERSPACE_MAP_FAILED: ++ return sizeof(struct dmu_msg_map_response); ++ case DM_USERSPACE_MAP_INVALIDATE: ++ return sizeof(struct dmu_msg_invalidate_map); ++ default: ++ return -1; ++ }; ++} ++ ++#endif +diff -purN ../pristine-linux-2.6.16.13/drivers/md/Kconfig ./drivers/md/Kconfig +--- ../pristine-linux-2.6.16.13/drivers/md/Kconfig 2006-05-02 16:38:44.000000000 -0500 ++++ ./drivers/md/Kconfig 2006-08-16 18:48:18.000000000 -0500 +@@ -210,6 +210,12 @@ config DM_SNAPSHOT + ---help--- + Allow volume managers to take writeable snapshots of a device. + ++config DM_USERSPACE ++ tristate "Userspace target (EXPERIMENTAL)" ++ depends on BLK_DEV_DM && EXPERIMENTAL ++ ---help--- ++ A target that provides a userspace interface to device-mapper ++ + config DM_MIRROR + tristate "Mirror target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL +diff -purN ../pristine-linux-2.6.16.13/drivers/md/Makefile ./drivers/md/Makefile +--- ../pristine-linux-2.6.16.13/drivers/md/Makefile 2006-05-02 16:38:44.000000000 -0500 ++++ ./drivers/md/Makefile 2006-08-16 18:48:18.000000000 -0500 +@@ -14,6 +14,7 @@ raid6-objs := raid6main.o raid6algos.o r + raid6altivec1.o raid6altivec2.o raid6altivec4.o \ + raid6altivec8.o \ + raid6mmx.o raid6sse1.o raid6sse2.o ++dm-user-objs := dm-userspace.o dm-userspace-chardev.o + hostprogs-y := mktables + + # Note: link order is important. All raid personalities +@@ -37,6 +38,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc + obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o + obj-$(CONFIG_DM_MIRROR) += dm-mirror.o + obj-$(CONFIG_DM_ZERO) += dm-zero.o ++obj-$(CONFIG_DM_USERSPACE) += dm-user.o + + quiet_cmd_unroll = UNROLL $@ + cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ +diff -purN ../pristine-linux-2.6.16.13/include/linux/dm-userspace.h ./include/linux/dm-userspace.h +--- ../pristine-linux-2.6.16.13/include/linux/dm-userspace.h 1969-12-31 18:00:00.000000000 -0600 ++++ ./include/linux/dm-userspace.h 2006-08-16 18:48:28.000000000 -0500 +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#ifndef __DM_USERSPACE_H ++#define __DM_USERSPACE_H ++ ++#include <linux/types.h> ++ ++/* ++ * Message Types ++ */ ++#define DM_USERSPACE_GET_VERSION 1 ++#define DM_USERSPACE_MAP_BLOCK_REQ 2 ++#define DM_USERSPACE_MAP_BLOCK_RESP 3 ++#define DM_USERSPACE_MAP_FAILED 4 ++#define DM_USERSPACE_MAP_INVALIDATE 5 ++#define DM_USERSPACE_STATUS 6 ++ ++/* ++ * Status codes ++ */ ++#define DM_USERSPACE_INVAL_COMPLETE 101 ++#define DM_USERSPACE_INVAL_FAILED 102 ++#define DM_USERSPACE_SYNC_COMPLETE 103 ++ ++/* ++ * Flags and associated macros ++ */ ++#define DMU_FLAG_VALID 1 ++#define DMU_FLAG_RD 2 ++#define DMU_FLAG_WR 4 ++#define DMU_FLAG_COPY_FIRST 8 ++#define DMU_FLAG_TEMPORARY 16 ++#define DMU_FLAG_INUSE 32 ++#define DMU_FLAG_SYNC 64 ++#define DMU_FLAG_WAITING 128 ++ ++static int dmu_get_flag(uint32_t *flags, uint32_t flag) ++{ ++ return (*flags & flag) != 0; ++} ++ ++static void dmu_set_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags |= flag; ++} ++ ++static void dmu_clr_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags &= (~flag); ++} ++ ++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) ++{ ++ *flags = (*flags & ~flag) | (src & flag); ++} ++ ++/* ++ * This message header is sent in front of every message, in both ++ * directions ++ */ ++struct dmu_msg_header { ++ uint32_t msg_type; ++ uint32_t payload_len; ++ uint32_t id; ++}; ++ ++/* DM_USERSPACE_GET_VERSION */ ++struct dmu_msg_version { ++ uint32_t userspace_ver; ++ uint32_t kernel_ver; ++}; ++ ++/* For status codes */ ++struct dmu_msg_status { ++ uint32_t id_of_op; ++ uint32_t status; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_REQ */ ++struct dmu_msg_map_request { ++ uint64_t org_block; ++ ++ uint32_t flags; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_RESP ++ * DM_USERSPACE_MAP_BLOCK_FAILED ++ */ ++struct dmu_msg_map_response { ++ uint64_t org_block; ++ uint64_t new_block; ++ int64_t offset; ++ ++ uint32_t id_of_req; ++ uint32_t flags; ++ ++ uint32_t src_maj; ++ uint32_t src_min; ++ ++ uint32_t dst_maj; ++ uint32_t dst_min; ++}; ++ ++/* DM_USERSPACE_MAP_INVALIDATE */ ++struct dmu_msg_invalidate_map { ++ uint64_t org_block; ++}; ++ ++static inline int dmu_get_msg_len(int type) ++{ ++ switch (type) { ++ case DM_USERSPACE_GET_VERSION: ++ return sizeof(struct dmu_msg_version); ++ case DM_USERSPACE_INVAL_COMPLETE: ++ case DM_USERSPACE_INVAL_FAILED: ++ case DM_USERSPACE_STATUS: ++ return sizeof(struct dmu_msg_status); ++ case DM_USERSPACE_MAP_BLOCK_REQ: ++ return sizeof(struct dmu_msg_map_request); ++ case DM_USERSPACE_MAP_BLOCK_RESP: ++ case DM_USERSPACE_MAP_FAILED: ++ return sizeof(struct dmu_msg_map_response); ++ case DM_USERSPACE_MAP_INVALIDATE: ++ return sizeof(struct dmu_msg_invalidate_map); ++ default: ++ return -1; ++ }; ++} ++ ++#endif _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Signed-off-by: Ryan Grimm <grimm@us.ibm.com> Signed-off-by: Dan Smith <danms@us.ibm.com> # HG changeset patch # User Ryan Grimm <grimm@us.ibm.com> # Date 1156521490 18000 # Node ID 2cb702dcea0e44dcfb9c243943d3e523245ad495 # Parent d5eb5205ff3588cc151fb15d6201c26c67e87506 dm-userspace xen kernel patch diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Thu Aug 24 16:25:49 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Fri Aug 25 10:58:10 2006 -0500 @@ -485,6 +485,7 @@ CONFIG_MD=y CONFIG_MD=y # CONFIG_BLK_DEV_MD is not set # CONFIG_BLK_DEV_DM is not set +CONFIG_DM_USERSPACE=n # # Fusion MPT device support diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen0_x86_32 --- a/buildconfigs/linux-defconfig_xen0_x86_32 Thu Aug 24 16:25:49 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_x86_32 Fri Aug 25 10:58:10 2006 -0500 @@ -600,6 +600,7 @@ CONFIG_DM_MIRROR=y CONFIG_DM_MIRROR=y # CONFIG_DM_ZERO is not set # CONFIG_DM_MULTIPATH is not set +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Thu Aug 24 16:25:49 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Fri Aug 25 10:58:10 2006 -0500 @@ -552,6 +552,7 @@ CONFIG_DM_MIRROR=y # CONFIG_DM_ZERO is not set CONFIG_DM_MULTIPATH=y CONFIG_DM_MULTIPATH_EMC=y +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen_x86_32 --- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Aug 24 16:25:49 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Aug 25 10:58:10 2006 -0500 @@ -1187,6 +1187,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Thu Aug 24 16:25:49 2006 +0100 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Fri Aug 25 10:58:10 2006 -0500 @@ -1130,6 +1130,7 @@ CONFIG_DM_ZERO=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m +CONFIG_DM_USERSPACE=m # # Fusion MPT device support diff -r d5eb5205ff35 -r 2cb702dcea0e patches/linux-2.6.16.13/series --- a/patches/linux-2.6.16.13/series Thu Aug 24 16:25:49 2006 +0100 +++ b/patches/linux-2.6.16.13/series Fri Aug 25 10:58:10 2006 -0500 @@ -1,5 +1,6 @@ blktap-aio-16_03_06.patch blktap-aio-16_03_06.patch device_bind.patch +dmu.patch fix-hz-suspend.patch fix-ide-cd-pio-mode.patch i386-mach-io-check-nmi.patch diff -r d5eb5205ff35 -r 2cb702dcea0e patches/linux-2.6.16.13/dmu.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/dmu.patch Fri Aug 25 10:58:10 2006 -0500 @@ -0,0 +1,2591 @@ +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-user.h ./drivers/md/dm-user.h +--- ../pristine-linux-2.6.16.13/drivers/md/dm-user.h 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-user.h 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,209 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#ifndef __DM_USER_H ++#define __DM_USER_H ++ ++#include <linux/hardirq.h> ++ ++#define DMU_KEY_LEN 256 ++ ++extern struct target_type userspace_target; ++extern mempool_t *request_pool; ++extern dev_t dmu_dev; ++extern spinlock_t devices_lock; ++extern struct list_head devices; ++ ++/* ++ * A hash table of remaps ++ */ ++struct hash_table { ++ struct list_head *table; /* Array of lists (buckets) */ ++ uint64_t size; /* Number of buckets */ ++ uint32_t mask; /* Mask used to determine bucket */ ++ uint64_t count; /* Number of remaps in entire table */ ++}; ++ ++/* ++ * A block device that we can send bios to ++ */ ++struct target_device { ++ struct list_head list; /* Our place in the targets list */ ++ struct block_device *bdev; /* The target block_device */ ++ struct kref users; /* Self-destructing reference count */ ++}; ++ ++/* ++ * A dm-userspace device, which consists of multiple targets sharing a ++ * common key ++ */ ++struct dmu_device { ++ struct list_head list; /* Our place in the devices list */ ++ ++ spinlock_t lock; /* Protects all the fields below */ ++ ++ struct list_head requests; /* List of pending requests */ ++ struct list_head target_devs; /* List of devices we can target */ ++ struct hash_table remaps; /* Hash table of all our maps */ ++ ++ void *transport_private; /* Private data for userspace comms */ ++ ++ char key[DMU_KEY_LEN]; /* Unique name string for device */ ++ struct kref users; /* Self-destructing reference count */ ++ ++ wait_queue_head_t wqueue; /* To block while waiting for reqs */ ++ ++ uint64_t block_size; /* Block size for this device */ ++ uint64_t block_mask; /* Mask for offset in block */ ++ unsigned int block_shift; /* Shift to convert to/from block */ ++ ++ struct kcopyd_client *kcopy; /* Interface to kcopyd */ ++ ++ uint32_t id_counter; /* Used to generate request IDs */ ++}; ++ ++struct userspace_request { ++ struct list_head list; /* Our place on the request queue */ ++ ++ spinlock_t lock; /* Protects all the fields below */ ++ ++ struct dmu_device *dev; /* The DMU device that owns us */ ++ ++ int type; /* Type of request */ ++ int sent; /* Non-zero if we''ve been sent */ ++ uint32_t flags; /* Attribute flags */ ++ uint32_t id; /* Unique ID for sync with userspace */ ++ union { ++ uint64_t block; /* The block in question */ ++ } u; ++ atomic_t refcnt; /* Reference count */ ++ ++ struct dmu_map *remap; /* The remap we represent */ ++}; ++ ++struct dmu_map { ++ struct list_head list; /* Our place in a remap bucket chain */ ++ struct list_head mru_list; /* Our place on the MRU list */ ++ ++ spinlock_t lock; /* Protects all the fields below */ ++ ++ uint64_t org_block; /* Original block */ ++ uint64_t new_block; /* Destination block */ ++ int64_t offset; /* Sectors to offset remapped block */ ++ uint32_t flags; /* Attribute flags */ ++ uint32_t id; /* Unique ID for sync with userspace */ ++ ++ struct target_device *src; /* Source blkdev for COPY_FIRST */ ++ struct target_device *dest; /* Where the remapped block is */ ++ ++ struct bio_list bios; /* Bios queued for remapping */ ++ struct bio_list bios_waiting; /* Bios waiting for endio sync */ ++ ++ struct dmu_device *dev; /* The DMU device that owns us */ ++ struct dmu_map *next; /* Next remap that depends on us */ ++ ++ struct work_struct endio_task;/* Work to be done on bio endios */ ++}; ++ ++/* Find and grab a reference to a target device */ ++struct target_device *find_target(struct dmu_device *dev, ++ dev_t devno); ++ ++/* Object allocation, destruction, and initialization routines */ ++void init_remap(struct dmu_device *dev, struct dmu_map *remap); ++void init_request(struct dmu_device *dev, ++ int type, ++ struct userspace_request *req); ++void free_remap(struct dmu_map *remap); ++void __free_remap(struct dmu_map *remap); ++struct dmu_map *alloc_remap_atomic(struct dmu_device *dev); ++ ++/* Hash table manipulation */ ++struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block); ++void ht_insert_map(struct hash_table *ht, struct dmu_map *map); ++struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block); ++void ht_delete_map(struct hash_table *ht, struct dmu_map *map); ++ ++/* Character device transport functions */ ++int register_chardev_transport(struct dmu_device *dev); ++void unregister_chardev_transport(struct dmu_device *dev); ++int init_chardev_transport(void); ++void cleanup_chardev_transport(void); ++void write_chardev_transport_info(struct dmu_device *dev, ++ char *buf, unsigned int maxlen); ++ ++/* Return the block number for @sector */ ++static inline u64 dmu_block(struct dmu_device *dev, ++ sector_t sector) ++{ ++ return sector >> dev->block_shift; ++} ++ ++/* Return the sector offset in a block for @sector */ ++static inline u64 dmu_sector_offset(struct dmu_device *dev, ++ sector_t sector) ++{ ++ return sector & dev->block_mask; ++} ++ ++/* Return the starting sector for @block */ ++static inline u64 dmu_sector(struct dmu_device *dev, ++ uint64_t block) ++{ ++ return block << dev->block_shift; ++} ++ ++/* Add a request to a device''s request queue */ ++static void add_request(struct dmu_device *dev, ++ struct userspace_request *req) ++{ ++ spin_lock(&dev->lock); ++ list_add_tail(&req->list, &dev->requests); ++ spin_unlock(&dev->lock); ++ ++ wake_up(&dev->wqueue); ++} ++ ++/* Remap @bio based on the information in @remap */ ++static void __bio_remap(struct bio *bio, ++ struct dmu_map *remap) ++{ ++ BUG_ON(remap->dest == NULL); ++ ++ bio->bi_sector = dmu_sector(remap->dev, remap->new_block) + ++ dmu_sector_offset(remap->dev, bio->bi_sector) + ++ remap->offset; ++ ++ bio->bi_bdev = remap->dest->bdev; ++} ++ ++/* Increase the usage count for @dev */ ++static inline void get_dev(struct dmu_device *dev) ++{ ++ kref_get(&dev->users); ++} ++ ++/* Decrease the usage count for @dev */ ++void destroy_dmu_device(struct kref *ref); ++static inline void put_dev(struct dmu_device *dev) ++{ ++ kref_put(&dev->users, destroy_dmu_device); ++} ++ ++#endif +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.c ./drivers/md/dm-userspace.c +--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.c 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-userspace.c 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,1132 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * Author: Ryan Grimm <grimm@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/blkdev.h> ++#include <linux/bio.h> ++#include <linux/slab.h> ++#include <linux/spinlock.h> ++#include <linux/list.h> ++#include <linux/fs.h> ++#include <linux/cdev.h> ++#include <linux/types.h> ++#include <linux/poll.h> ++ ++#include <linux/dm-userspace.h> ++ ++#include "dm.h" ++#include "dm-bio-list.h" ++#include "kcopyd.h" ++#include "dm-user.h" ++ ++#define DMU_COPY_PAGES 256 ++#define DMU_REMAP_RESERVE 128 ++ ++#define DM_MSG_PREFIX "dm-userspace" ++ ++static kmem_cache_t *request_cache; ++static kmem_cache_t *remap_cache; ++ ++mempool_t *request_pool; ++ ++static int enable_watchdog = 0; ++static struct work_struct wd; ++ ++spinlock_t devices_lock; ++LIST_HEAD(devices); ++ ++static spinlock_t mru_list_lock; ++static LIST_HEAD(mru_list); ++ ++/* Device number for the control device */ ++dev_t dmu_dev; ++ ++static int error_bios(struct bio_list *bios) ++{ ++ struct bio *bio; ++ int count = 0; ++ ++ while ((bio = bio_list_pop(bios)) != NULL) { ++ bio_io_error(bio, bio->bi_size); ++ count++; ++ } ++ ++ if (count) ++ DMERR("*** Failed %i requests", count); ++ ++ return count; ++} ++ ++static void remap_hit(struct dmu_map *remap) ++{ ++ spin_lock(&mru_list_lock); ++ ++ list_del_init(&remap->mru_list); ++ list_add(&remap->mru_list, &mru_list); ++ ++ spin_unlock(&mru_list_lock); ++} ++ ++struct dmu_map *alloc_remap_atomic(struct dmu_device *dev) ++{ ++ struct dmu_map *remap = NULL; ++ ++ /* Try to allocate one from the cache */ ++ remap = kmem_cache_alloc(remap_cache, GFP_NOIO); ++ if (remap) { ++ INIT_LIST_HEAD(&remap->mru_list); ++ ++ spin_lock(&mru_list_lock); ++ list_add_tail(&remap->mru_list, &mru_list); ++ spin_unlock(&mru_list_lock); ++ ++ goto out; ++ } ++ ++ /* Unable to alloc one, so get the LRU item off the list */ ++ spin_lock(&mru_list_lock); ++ remap = list_entry(mru_list.prev, struct dmu_map, mru_list); ++ spin_unlock(&mru_list_lock); ++ ++ if (remap) { ++ struct dmu_device *dev = remap->dev; ++ unsigned long flags; ++ ++ DMINFO("Memory is low. Stealing the LRU remap..."); ++ ++ spin_lock(&dev->lock); ++ spin_lock_irqsave(&remap->lock, flags); ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) { ++ /* Remove it from whatever device owns it */ ++ ++ if (!list_empty(&remap->list)) ++ list_del_init(&remap->list); ++ ++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE); ++ ++ if (!dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) { ++ /* If the LRU remap is not valid, ++ we''re in trouble */ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ spin_unlock(&dev->lock); ++ printk(KERN_EMERG ++ "dm-userspace: Unable to allocate " ++ "or steal a remap!\n"); ++ goto out; ++ } ++ } ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ spin_unlock(&dev->lock); ++ ++ remap_hit(remap); ++ } else { ++ DMERR("Failed to alloc or steal a remap!"); ++ } ++ ++ out: ++ return remap; ++} ++ ++void free_remap(struct dmu_map *remap) ++{ ++ unsigned long flags; ++ ++ if (error_bios(&remap->bios)) { ++ DMERR("Freed a map with in-flight data!"); ++ BUG(); ++ } ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE); ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ spin_lock(&remap->dev->lock); ++ list_del(&remap->list); ++ spin_unlock(&remap->dev->lock); ++ ++ spin_lock(&mru_list_lock); ++ list_del_init(&remap->mru_list); ++ list_add_tail(&remap->mru_list, &mru_list); ++ spin_unlock(&mru_list_lock); ++} ++ ++void __free_remap(struct dmu_map *remap) ++{ ++ if (error_bios(&remap->bios)) { ++ DMERR("Freed a map with in-flight data!"); ++ BUG(); ++ } ++ ++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE); ++ ++ list_del(&remap->list); ++ ++ spin_lock(&mru_list_lock); ++ list_del_init(&remap->mru_list); ++ list_add_tail(&remap->mru_list, &mru_list); ++ spin_unlock(&mru_list_lock); ++} ++ ++static struct userspace_request *make_sync_req(struct dmu_device *dev, ++ struct dmu_map *remap) ++{ ++ struct userspace_request *req; ++ unsigned long flags; ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (!req) { ++ DMERR("Failed to allocate copy response"); ++ return NULL; ++ } ++ init_request(dev, DM_USERSPACE_SYNC_COMPLETE, req); ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ req->id = remap->id; ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ req->remap = remap; ++ ++ return req; ++} ++ ++static void endio_worker(void *data) ++{ ++ struct dmu_map *remap = data; ++ struct userspace_request *req = NULL; ++ ++ req = make_sync_req(remap->dev, remap); ++ ++ if (req) ++ add_request(req->dev, req); ++} ++ ++void init_remap(struct dmu_device *dev, struct dmu_map *remap) ++{ ++ spin_lock_init(&remap->lock); ++ remap->org_block = remap->new_block = 0; ++ remap->offset = 0; ++ remap->flags = 0; ++ dmu_set_flag(&remap->flags, DMU_FLAG_INUSE); ++ remap->src = remap->dest = NULL; ++ bio_list_init(&remap->bios); ++ bio_list_init(&remap->bios_waiting); ++ INIT_LIST_HEAD(&remap->list); ++ remap->dev = dev; ++ remap->next = NULL; ++ ++ INIT_WORK(&remap->endio_task, endio_worker, remap); ++} ++ ++void init_request(struct dmu_device *dev, ++ int type, ++ struct userspace_request *req) ++{ ++ spin_lock_init(&req->lock); ++ INIT_LIST_HEAD(&req->list); ++ req->dev = dev; ++ req->type = type; ++ req->sent = 0; ++ req->flags = 0; ++ if (type == DM_USERSPACE_SYNC_COMPLETE) { ++ req->u.block = 0; ++ req->id = 0; ++ } else { ++ spin_lock(&dev->lock); ++ dev->id_counter++; ++ if (dev->id_counter == 0) ++ dev->id_counter = 1; ++ req->id = dev->id_counter; ++ spin_unlock(&dev->lock); ++ } ++ atomic_set(&req->refcnt, 0); ++} ++ ++/* ++ * For an even block distribution, this is not too bad, but it could ++ * probably be better ++ */ ++static uint32_t ht_hash(struct hash_table *ht, uint64_t block) ++{ ++ return (uint32_t)block & ht->mask; ++} ++ ++static int ht_init(struct hash_table *ht, unsigned long size) ++{ ++ uint64_t i; ++ unsigned long pages; ++ unsigned int order = ffs((size * sizeof(struct list_head *)) / ++ PAGE_SIZE); ++ ++ if (order > 9) ++ return 0; ++ ++ pages = __get_free_pages(GFP_ATOMIC, order); ++ if (!pages) ++ return 0; ++ ++ ht->table = (void *)pages; ++ ht->size = size; ++ ht->count = 0; ++ ht->mask = size - 1; ++ ++ for (i = 0; i < size; i++) ++ INIT_LIST_HEAD(&ht->table[i]); ++ ++ return 1; ++} ++ ++static void ht_insert_bucket(struct dmu_map *map, struct list_head *list) ++{ ++ list_add_tail(&map->list, list); ++} ++ ++/* ++ * I''m sure this is quite dumb, but it works for now ++ */ ++static int ht_should_grow(struct hash_table *ht) ++{ ++ return ht->count > (2 * (ht->size / 4)); ++} ++ ++static void ht_grow_table(struct hash_table *ht); ++void ht_insert_map(struct hash_table *ht, struct dmu_map *map) ++{ ++ uint32_t addr; ++ ++ addr = ht_hash(ht, map->org_block) & ht->mask; ++ ++ BUG_ON(addr >= ht->size); ++ ++ ht_insert_bucket(map, &ht->table[addr]); ++ ht->count++; ++ ++ if (ht_should_grow(ht)) ++ ht_grow_table(ht); ++} ++ ++void ht_delete_map(struct hash_table *ht, struct dmu_map *map) ++{ ++ list_del_init(&map->list); ++ BUG_ON(ht->count == 0); ++ ht->count--; ++} ++ ++struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block) ++{ ++ uint32_t addr; ++ struct dmu_map *m; ++ ++ addr = ht_hash(ht, block) & ht->mask; ++ ++ BUG_ON(addr >= ht->size); ++ ++ list_for_each_entry(m, &ht->table[addr], list) { ++ if (m->org_block == block) { ++ remap_hit(m); ++ return m; ++ } ++ } ++ ++ return NULL; ++} ++ ++struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block) ++{ ++ struct dmu_map *remap; ++ ++ spin_lock(&dev->lock); ++ ++ remap = ht_find_map(&dev->remaps, block); ++ ++ spin_unlock(&dev->lock); ++ ++ return remap; ++} ++ ++static void ht_grow_table(struct hash_table *ht) ++{ ++ struct hash_table old_table; ++ uint64_t i; ++ ++ old_table = *ht; ++ ++ if (!ht_init(ht, old_table.size * 2)) ++ return; ++ ++ for (i = 0; i < old_table.size; i++ ) { ++ struct dmu_map *m, *n; ++ list_for_each_entry_safe(m, n, &old_table.table[i], ++ list) { ++ list_del_init(&m->list); ++ ht_insert_map(ht, m); ++ } ++ } ++ ++ free_pages((unsigned long)old_table.table, ++ ffs((old_table.size * sizeof(struct list_head *)) ++ / PAGE_SIZE)); ++} ++ ++static uint64_t ht_destroy_table(struct hash_table *ht) ++{ ++ uint64_t i, count = 0; ++ struct dmu_map *m, *n; ++ ++ for (i = 0; i < ht->size; i++) { ++ list_for_each_entry_safe(m, n, &ht->table[i], list) { ++ ht_delete_map(ht, m); ++ free_remap(m); ++ count++; ++ } ++ } ++ ++ free_pages((unsigned long)ht->table, ++ ffs((ht->size * sizeof(struct list_head *)) ++ / PAGE_SIZE)); ++ ++ return count; ++} ++ ++struct target_device *find_target(struct dmu_device *dev, ++ dev_t devno) ++{ ++ struct target_device *target, *match = NULL; ++ ++ spin_lock(&dev->lock); ++ list_for_each_entry(target, &dev->target_devs, list) { ++ if (target->bdev->bd_dev == devno) { ++ match = target; ++ break; ++ } ++ } ++ spin_unlock(&dev->lock); ++ ++ return match; ++} ++ ++static struct target_device *get_target(struct dmu_device *dev, ++ dev_t devno) ++{ ++ ++ struct target_device *target; ++ struct block_device *bdev; ++ ++ target = find_target(dev, devno); ++ if (target) ++ return target; ++ ++ bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE); ++ if (IS_ERR(bdev)) { ++ DMERR("Unable to lookup device %x", devno); ++ return NULL; ++ } ++ ++ target = kmalloc(sizeof(*target), GFP_KERNEL); ++ if (!target) { ++ DMERR("Unable to alloc new target device"); ++ return NULL; ++ } ++ ++ target->bdev = bdev; ++ INIT_LIST_HEAD(&target->list); ++ ++ spin_lock(&dev->lock); ++ list_add_tail(&target->list, &dev->target_devs); ++ spin_unlock(&dev->lock); ++ ++ return target; ++} ++ ++/* Caller must hold dev->lock */ ++static void put_target(struct dmu_device *dev, ++ struct target_device *target) ++{ ++ list_del(&target->list); ++ ++ bd_release(target->bdev); ++ blkdev_put(target->bdev); ++ ++ kfree(target); ++} ++ ++/* ++ * This periodically dumps out some debug information. It''s really ++ * only useful while developing. ++ */ ++static void watchdog(void *data) ++{ ++ unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0; ++ struct dmu_device *dev; ++ struct dmu_map *map; ++ struct userspace_request *req; ++ uint64_t i; ++ ++ spin_lock(&devices_lock); ++ ++ list_for_each_entry(dev, &devices, list) { ++ spin_lock(&dev->lock); ++ ++ v_remaps = i_remaps = reqs = s_reqs = 0; ++ ++ for (i = 0; i < dev->remaps.size; i++) { ++ list_for_each_entry(map, &dev->remaps.table[i], list) ++ if (dmu_get_flag(&map->flags, DMU_FLAG_VALID)) ++ v_remaps++; ++ else ++ i_remaps++; ++ } ++ ++ list_for_each_entry(req, &dev->requests, list) ++ if (req->sent) ++ s_reqs++; ++ else ++ reqs++; ++ ++ printk("Device " ++ " reqs: %u/%u " ++ " inv maps: %u " ++ " val maps: %u\n", ++ reqs, s_reqs, i_remaps, v_remaps); ++ devs++; ++ ++ spin_unlock(&dev->lock); ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ schedule_delayed_work(&wd, HZ); ++} ++ ++void destroy_dmu_device(struct kref *ref) ++{ ++ struct dmu_device *dev; ++ struct list_head *cursor, *next; ++ uint64_t remaps; ++ ++ dev = container_of(ref, struct dmu_device, users); ++ ++ spin_lock(&devices_lock); ++ list_del(&dev->list); ++ spin_unlock(&devices_lock); ++ ++ list_for_each_safe(cursor, next, &dev->target_devs) { ++ struct target_device *target; ++ ++ target = list_entry(cursor, ++ struct target_device, ++ list); ++ ++ put_target(dev, target); ++ } ++ ++ remaps = ht_destroy_table(&dev->remaps); ++ ++ list_for_each_safe(cursor, next, &dev->requests) { ++ struct userspace_request *req; ++ ++ req = list_entry(cursor, ++ struct userspace_request, ++ list); ++ ++ list_del(&req->list); ++ ++ mempool_free(req, request_pool); ++ } ++ ++ kcopyd_client_destroy(dev->kcopy); ++ unregister_chardev_transport(dev); ++ ++ kfree(dev); ++} ++ ++static int init_dmu_device(struct dmu_device *dev, u32 block_size) ++{ ++ int ret; ++ ++ init_waitqueue_head(&dev->wqueue); ++ INIT_LIST_HEAD(&dev->list); ++ INIT_LIST_HEAD(&dev->requests); ++ INIT_LIST_HEAD(&dev->target_devs); ++ kref_init(&dev->users); ++ spin_lock_init(&dev->lock); ++ ++ dev->id_counter = 1; /* reserve 0 for unsolicited maps */ ++ ++ if (!ht_init(&dev->remaps, 2048)) { ++ DMERR("Unable to allocate hash table"); ++ return 0; ++ } ++ ++ dev->block_size = block_size; ++ dev->block_mask = block_size - 1; ++ dev->block_shift = ffs(block_size) - 1; ++ ++ ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopy); ++ if (ret) { ++ DMERR("Failed to initialize kcopyd client"); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static struct dmu_device *new_dmu_device(char *key, ++ struct dm_target *ti, ++ u32 block_size) ++{ ++ struct dmu_device *dev; ++ int ret; ++ ++ dev = kmalloc(sizeof(*dev), GFP_KERNEL); ++ if (dev == NULL) { ++ DMERR("Failed to allocate new userspace device"); ++ return NULL; ++ } ++ ++ if (!init_dmu_device(dev, block_size)) ++ goto bad1; ++ ++ snprintf(dev->key, DMU_KEY_LEN, "%s", key); ++ ++ ret = register_chardev_transport(dev); ++ if (!ret) ++ goto bad2; ++ ++ spin_lock(&devices_lock); ++ list_add(&dev->list, &devices); ++ spin_unlock(&devices_lock); ++ ++ return dev; ++ ++ bad2: ++ put_dev(dev); ++ bad1: ++ kfree(dev); ++ DMERR("Failed to create device"); ++ return NULL; ++} ++ ++static struct dmu_device *find_dmu_device(const char *key) ++{ ++ struct dmu_device *dev; ++ struct dmu_device *match = NULL; ++ ++ spin_lock(&devices_lock); ++ ++ list_for_each_entry(dev, &devices, list) { ++ spin_lock(&dev->lock); ++ if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) { ++ match = dev; ++ spin_unlock(&dev->lock); ++ break; ++ } ++ spin_unlock(&dev->lock); ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ return match; ++} ++ ++static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv) ++{ ++ uint64_t block_size; ++ struct dmu_device *dev; ++ char *device_key; ++ char *block_size_param; ++ int target_idx = 2; ++ ++ if (argc < 3) { ++ ti->error = "Invalid argument count"; ++ return -EINVAL; ++ } ++ ++ device_key = argv[0]; ++ block_size_param = argv[1]; ++ ++ block_size = simple_strtoul(block_size_param, NULL, 10) / 512; ++ ++ dev = find_dmu_device(device_key); ++ if (dev == NULL) { ++ dev = new_dmu_device(device_key, ++ ti, ++ block_size); ++ if (dev == NULL) { ++ ti->error = "Failed to create device"; ++ goto bad; ++ } ++ } else { ++ get_dev(dev); ++ } ++ ++ spin_lock(&dev->lock); ++ if (dev->block_size != block_size) { ++ ti->error = "Invalid block size"; ++ goto bad; ++ } ++ spin_unlock(&dev->lock); ++ ++ /* Resolve target devices */ ++ do { ++ int maj, min; ++ sscanf(argv[target_idx], "%i:%i", &maj, &min); ++ if (!get_target(dev, MKDEV(maj, min))) { ++ DMERR("Failed to find target device %i:%i (%s)", ++ maj, min, argv[target_idx]); ++ goto out; ++ } ++ } while (++target_idx < argc); ++ ++ ti->private = dev; ++ ti->split_io = block_size; ++ ++ return 0; ++ ++ bad: ++ if (dev) { ++ spin_unlock(&dev->lock); ++ } ++ out: ++ if (dev) { ++ put_dev(dev); ++ } ++ ++ return -EINVAL; ++} ++ ++static void dmu_dtr(struct dm_target *ti) ++{ ++ struct dmu_device *dev = (struct dmu_device *) ti->private; ++ ++ put_dev(dev); ++} ++ ++/* Search @dev for an outstanding request for remapping @block */ ++static struct userspace_request *find_existing_req(struct dmu_device *dev, ++ uint64_t block) ++{ ++ struct userspace_request *req; ++ struct userspace_request *match = NULL; ++ ++ spin_lock(&dev->lock); ++ ++ list_for_each_entry_reverse(req, &dev->requests, list) { ++ if ((req->type == DM_USERSPACE_MAP_BLOCK_REQ) && ++ (req->remap->org_block == block)) { ++ match = req; ++ atomic_inc(&match->refcnt); ++ break; ++ } ++ } ++ ++ spin_unlock(&dev->lock); ++ ++ return match; ++} ++ ++static int make_new_request(struct dmu_device *dev, ++ struct bio *bio, ++ void **ctxptr) ++{ ++ struct userspace_request *req; ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (req == NULL) ++ goto bad; ++ ++ init_request(dev, DM_USERSPACE_MAP_BLOCK_REQ, req); ++ ++ dmu_set_flag(&req->flags, DMU_FLAG_RD); ++ if (bio_rw(bio)) ++ dmu_set_flag(&req->flags, DMU_FLAG_WR); ++ else ++ dmu_clr_flag(&req->flags, DMU_FLAG_WR); ++ ++ req->remap = alloc_remap_atomic(dev); ++ if (!req->remap) { ++ DMERR("Failed to alloc remap!"); ++ goto bad; ++ } ++ init_remap(dev, req->remap); ++ ++ bio_list_add(&req->remap->bios, bio); ++ req->remap->org_block = dmu_block(dev, bio->bi_sector); ++ ++ *ctxptr = req->remap; ++ ++ add_request(dev, req); ++ ++ return 0; ++ ++ bad: ++ DMERR("Failed to queue bio!"); ++ return -1; ++} ++ ++static int dmu_map_remap_case(struct dmu_device *dev, ++ struct dmu_map *remap, ++ struct bio *bio) ++{ ++ int ret = 0; ++ int rw; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ /* ++ * We''ve got it locked, so make sure the info is still valid ++ * before we use it ++ */ ++ if (!dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) { ++ ret = -1; ++ DMERR("Got an invalid remap from hashtable"); ++ goto unlock; ++ } else if (remap->org_block != dmu_block(dev, bio->bi_sector)) { ++ ret = -1; ++ DMERR("Aiee, org block changed underneath us!"); ++ goto unlock; ++ } ++ ++ rw = dmu_get_flag(&remap->flags, DMU_FLAG_WR); ++ ++ ++ if (rw || (bio_rw(bio) == rw)) { ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) { ++ __bio_remap(bio, remap); ++ ret = 1; ++ } else { ++ bio_list_add(&remap->bios, bio); ++ } ++ } else { ++ ret = -1; ++// printk("Remap doesn''t match perms: %llu (%c!=%c)\n", ++// remap->org_block, ++// rw ? ''W'':''R'', ++// bio_rw(bio) ? ''W'':''R''); ++ } ++ ++ unlock: ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ return ret; ++} ++ ++static int dmu_map_request_case(struct dmu_device *dev, ++ struct userspace_request *req, ++ struct bio *bio) ++{ ++ int ret = 0; ++ int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR); ++ unsigned long flags; ++ ++ spin_lock(&req->lock); ++ spin_lock_irqsave(&req->remap->lock, flags); ++ ++ if (!req_rw && bio_rw(bio) && !req->sent) { ++ /* Convert to R/W and Queue */ ++ dmu_set_flag(&req->flags, DMU_FLAG_WR); ++ bio_list_add(&req->remap->bios, bio); ++ } else if (!req_rw && bio_rw(bio) && req->sent) { ++ /* Can''t convert, must re-request */ ++ ret = -1; ++ } else { ++ /* Queue */ ++ bio_list_add(&req->remap->bios, bio); ++ } ++ ++ spin_unlock_irqrestore(&req->remap->lock, flags); ++ spin_unlock(&req->lock); ++ ++ return ret; ++} ++ ++DECLARE_MUTEX(map_mutex); ++ ++static int dmu_map(struct dm_target *ti, struct bio *bio, ++ union map_info *map_context) ++{ ++ struct dmu_device *dev = (struct dmu_device *) ti->private; ++ struct dmu_map *remap; ++ struct userspace_request *req; ++ int ret = 0; ++ u64 block; ++ ++ down(&map_mutex); ++ ++ map_context->ptr = NULL; ++ ++ block = dmu_block(dev, bio->bi_sector); ++ ++ remap = ht_find_map_dev(dev, block); ++ if (remap) { ++ ret = dmu_map_remap_case(dev, remap, bio); ++ if (ret >= 0) { ++ map_context->ptr = remap; ++ goto done; ++ } ++ ++ } ++ ++ req = find_existing_req(dev, block); ++ if (req) { ++ ret = dmu_map_request_case(dev, req, bio); ++ atomic_dec(&req->refcnt); ++ if (ret >= 0) { ++ map_context->ptr = req->remap; ++ goto done; ++ } ++ } ++ ++ ret = make_new_request(dev, bio, &map_context->ptr); ++ ++ done: ++ up(&map_mutex); ++ ++ return ret; ++} ++ ++static int dmu_status(struct dm_target *ti, status_type_t type, ++ char *result, unsigned int maxlen) ++{ ++ struct dmu_device *dev = (struct dmu_device *) ti->private; ++ ++ switch (type) { ++ case STATUSTYPE_INFO: ++ write_chardev_transport_info(dev, result, maxlen); ++ break; ++ ++ case STATUSTYPE_TABLE: ++ snprintf(result, maxlen, "%s %llu", ++ dev->key, ++ dev->block_size * 512); ++ break; ++ } ++ ++ return 0; ++} ++ ++static int __handle_bio_endio(struct dmu_map *remap, ++ struct bio *bio, ++ struct userspace_request **req) ++{ ++ int ret = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING) && ++ remap->bios_waiting.head == NULL) { ++ /* First endio and waiting for resp from userspace */ ++ bio_list_add(&remap->bios_waiting, bio); ++ ++ /* Schedule request worker */ ++ INIT_WORK(&remap->endio_task, endio_worker, remap); ++ schedule_work(&remap->endio_task); ++ ++ ret = 1; ++ } else if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING)) { ++ /* Still waiting for resp from userspace */ ++ bio_list_add(&remap->bios_waiting, bio); ++ ret = 1; ++ } else if (remap->bios_waiting.head != NULL) { ++ /* Got resp from userspace but bios waiting list nonempty */ ++ if (bio == remap->bios_waiting.head) { ++ bio_list_pop(&remap->bios_waiting); ++ ret = 0; ++ } else { ++ bio_list_add(&remap->bios_waiting, bio); ++ ret = 1; ++ } ++ } ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ return ret; ++} ++ ++static int dmu_end_io(struct dm_target *ti, struct bio *bio, ++ int error, union map_info *map_context) ++{ ++ struct dmu_map *remap; ++ struct userspace_request *req = NULL; ++ int ret = 0; ++ ++ remap = map_context->ptr; ++ ++ if (error) { ++ DMERR("Error in dmu_end_io"); ++ return -1; ++ } else if (!remap) { ++ return 0; ++ } ++ ++ ret = __handle_bio_endio(remap, bio, &req); ++ ++ return ret; ++} ++ ++struct target_type userspace_target = { ++ .name = "userspace", ++ .version = {0, 1, 0}, ++ .module = THIS_MODULE, ++ .ctr = dmu_ctr, ++ .dtr = dmu_dtr, ++ .map = dmu_map, ++ .status = dmu_status, ++ .end_io = dmu_end_io ++}; ++ ++static int destroy_mru_list(void) ++{ ++ struct dmu_map *map, *next; ++ int count = 0; ++ ++ spin_lock(&mru_list_lock); ++ ++ list_for_each_entry_safe(map, next, &mru_list, mru_list) { ++ list_del(&map->mru_list); ++ kmem_cache_free(remap_cache, map); ++ count++; ++ } ++ ++ spin_unlock(&mru_list_lock); ++ ++ return count; ++} ++ ++int __init dm_userspace_init(void) ++{ ++ int i; ++ int r = dm_register_target(&userspace_target); ++ if (r < 0) { ++ DMERR("Register failed %d", r); ++ return 0; ++ } ++ ++ spin_lock_init(&devices_lock); ++ spin_lock_init(&mru_list_lock); ++ ++ if (enable_watchdog) { ++ INIT_WORK(&wd, watchdog, NULL); ++ schedule_delayed_work(&wd, HZ); ++ } ++ ++ request_cache ++ kmem_cache_create("dm-userspace-requests", ++ sizeof(struct userspace_request), ++ __alignof__ (struct userspace_request), ++ 0, NULL, NULL); ++ if (!request_cache) { ++ DMERR("Failed to allocate request cache"); ++ goto bad; ++ } ++ ++ remap_cache ++ kmem_cache_create("dm-userspace-remaps", ++ sizeof(struct dmu_map), ++ __alignof__ (struct dmu_map), ++ 0, NULL, NULL); ++ if (!remap_cache) { ++ DMERR("Failed to allocate remap cache"); ++ goto bad2; ++ } ++ ++ request_pool = mempool_create(64, ++ mempool_alloc_slab, mempool_free_slab, ++ request_cache); ++ if (!request_pool) { ++ DMERR("Failed to allocate request pool"); ++ goto bad3; ++ } ++ ++ r = init_chardev_transport(); ++ if (!r) ++ goto bad4; ++ ++ for (i = 0; i < DMU_REMAP_RESERVE; i++) { ++ struct dmu_map *remap; ++ ++ remap = alloc_remap_atomic(NULL); ++ if (!remap) { ++ DMERR("Failed to allocate %i/%i reserve remap", ++ i, DMU_REMAP_RESERVE); ++ goto bad5; ++ } ++ init_remap(NULL, remap); ++ remap_hit(remap); ++ } ++ ++ return 1; ++ ++ bad5: ++ destroy_mru_list(); ++ bad4: ++ mempool_destroy(request_pool); ++ bad3: ++ kmem_cache_destroy(remap_cache); ++ bad2: ++ kmem_cache_destroy(request_cache); ++ bad: ++ dm_unregister_target(&userspace_target); ++ ++ return 0; ++} ++ ++void __exit dm_userspace_exit(void) ++{ ++ int r; ++ struct list_head *cursor, *next; ++ struct dmu_device *dev; ++ ++ if (enable_watchdog) ++ if (!cancel_delayed_work(&wd)) ++ flush_scheduled_work(); ++ ++ spin_lock(&devices_lock); ++ ++ list_for_each_safe(cursor, next, &devices) { ++ dev = list_entry(cursor, struct dmu_device, list); ++ list_del(cursor); ++ destroy_dmu_device(&dev->users); ++ DMERR("Destroying hanging device %s", dev->key); ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ cleanup_chardev_transport(); ++ ++ r = destroy_mru_list(); ++ ++ mempool_destroy(request_pool); ++ kmem_cache_destroy(request_cache); ++ kmem_cache_destroy(remap_cache); ++ ++ r = dm_unregister_target(&userspace_target); ++ if (r < 0) ++ DMERR("unregister failed %d", r); ++} ++ ++module_init(dm_userspace_init); ++module_exit(dm_userspace_exit); ++ ++module_param(enable_watchdog, int, S_IRUGO); ++ ++MODULE_DESCRIPTION(DM_NAME " userspace target"); ++MODULE_AUTHOR("Dan Smith"); ++MODULE_LICENSE("GPL"); +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace-chardev.c ./drivers/md/dm-userspace-chardev.c +--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace-chardev.c 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-userspace-chardev.c 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,900 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#include <linux/spinlock.h> ++#include <linux/blkdev.h> ++#include <linux/mempool.h> ++#include <linux/dm-userspace.h> ++#include <linux/list.h> ++#include <linux/sched.h> ++#include <linux/wait.h> ++#include <linux/poll.h> ++#include <linux/fs.h> ++#include <linux/cdev.h> ++#include <asm/uaccess.h> ++ ++#include "dm.h" ++#include "dm-bio-list.h" ++#include "kcopyd.h" ++#include "dm-user.h" ++ ++#define DM_MSG_PREFIX "dm-userspace" ++ ++/* This allows for a cleaner separation between the dm-userspace ++ * device-mapper target, and the userspace transport used. Right now, ++ * only a chardev transport exists, but it''s possible that there could ++ * be more in the future ++ */ ++struct chardev_transport { ++ struct cdev cdev; ++ dev_t ctl_dev; ++ struct dmu_device *parent; ++}; ++ ++static void remap_flusher(struct dmu_map *remap); ++ ++static int have_pending_requests(struct dmu_device *dev) ++{ ++ struct userspace_request *req; ++ int ret = 0; ++ ++ /* FIXME: We could keep a count of how many waiting reqs ++ * there are, eliminating the need to count, and possibly the ++ * need to lock ++ */ ++ ++ spin_lock(&dev->lock); ++ ++ list_for_each_entry(req, &dev->requests, list) { ++ if (!req->sent) { ++ ret = 1; ++ break; ++ } ++ } ++ ++ spin_unlock(&dev->lock); ++ ++ return ret; ++} ++ ++static void copy_callback(int read_err, ++ unsigned int write_err, ++ void *data) ++{ ++ remap_flusher((struct dmu_map *)data); ++} ++ ++static void copy_block(struct dmu_map *remap) ++{ ++ struct io_region src, dst; ++ struct kcopyd_client *client; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ src.bdev = remap->src->bdev; ++ src.sector = remap->org_block << remap->dev->block_shift; ++ src.count = remap->dev->block_size; ++ ++ dst.bdev = remap->dest->bdev; ++ dst.sector = (remap->new_block << remap->dev->block_shift); ++ dst.sector += remap->offset; ++ dst.count = remap->dev->block_size; ++ ++ client = remap->dev->kcopy; ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap); ++} ++ ++static void copy_or_flush(struct dmu_map *remap) ++{ ++ int copy; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ copy = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST); ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ if (copy) ++ copy_block(remap); ++ else ++ remap_flusher(remap); ++} ++ ++static struct bio *pop_and_remap(struct dmu_map *remap) ++{ ++ struct bio *bio = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ bio = bio_list_pop(&remap->bios); ++ if (bio) ++ __bio_remap(bio, remap); ++ else { ++ /* If there are no more bios, we must set the VALID ++ * flag before we release the lock ++ */ ++ dmu_set_flag(&remap->flags, DMU_FLAG_VALID); ++ } ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++ ++ return bio; ++} ++ ++static void get_remap_attrs(struct dmu_map *remap, ++ int *temporary, ++ struct dmu_map **next) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&remap->lock, flags); ++ ++ *temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY); ++ *next = remap->next; ++ remap->next = NULL; ++ ++ spin_unlock_irqrestore(&remap->lock, flags); ++} ++ ++static void remap_flusher(struct dmu_map *remap) ++{ ++ struct bio *bio; ++ int temporary = 0; ++ struct dmu_map *next; ++ ++ while (1) { ++ ++ bio = pop_and_remap(remap); ++ ++ if (bio) ++ generic_make_request(bio); ++ else ++ break; ++ } ++ ++ get_remap_attrs(remap, &temporary, &next); ++ ++ if (next) ++ copy_or_flush(next); ++ ++ if (temporary) { ++ free_remap(remap); ++ } ++} ++ ++static int send_userspace_message(uint8_t __user *buffer, ++ struct userspace_request *req) ++{ ++ int ret = 0; ++ struct dmu_msg_header hdr; ++ union { ++ struct dmu_msg_map_request map_req; ++ struct dmu_msg_status status_req; ++ struct dmu_msg_version ver_req; ++ } msgs; ++ ++ memset(&msgs, 0, sizeof(msgs)); ++ spin_lock(&req->lock); ++ ++ hdr.id = req->id; ++ ++ switch (req->type) { ++ case DM_USERSPACE_GET_VERSION: ++ hdr.msg_type = req->type; ++ hdr.payload_len = sizeof(msgs.ver_req); ++ msgs.ver_req.kernel_ver ++ userspace_target.version[0] << 16 | ++ userspace_target.version[1] << 8 | ++ userspace_target.version[2]; ++ ++ break; ++ ++ case DM_USERSPACE_MAP_BLOCK_REQ: ++ hdr.msg_type = req->type; ++ hdr.payload_len = sizeof(msgs.map_req); ++ msgs.map_req.org_block ++ dmu_block(req->dev, req->remap->bios.head->bi_sector); ++ dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_RD); ++ dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_WR); ++ ++ break; ++ ++ case DM_USERSPACE_SYNC_COMPLETE: ++ case DM_USERSPACE_INVAL_COMPLETE: ++ case DM_USERSPACE_INVAL_FAILED: ++ hdr.msg_type = DM_USERSPACE_STATUS; ++ hdr.payload_len = sizeof(msgs.status_req); ++ msgs.status_req.status = req->type; ++ msgs.status_req.id_of_op = req->id; ++ ++ break; ++ ++ default: ++ DMWARN("Unknown message type %i", req->type); ++ ret = 0; ++ } ++ ++ spin_unlock(&req->lock); ++ ++ if (copy_to_user(buffer, &hdr, sizeof(hdr))) ++ return -EFAULT; ++ if (copy_to_user(buffer + sizeof(hdr), &msgs, hdr.payload_len)) ++ return -EFAULT; ++ ++ ret = sizeof(hdr) + hdr.payload_len; ++ ++ if ((req->type != DM_USERSPACE_MAP_BLOCK_REQ) && ++ (req->type != DM_USERSPACE_SYNC_COMPLETE)) { ++ /* Only some requests get responses, so we take others ++ * off the request queue here ++ */ ++ spin_lock(&req->dev->lock); ++ list_del(&req->list); ++ spin_unlock(&req->dev->lock); ++ mempool_free(req, request_pool); ++ } ++ ++ return ret; ++} ++ ++struct userspace_request *pluck_next_request(struct dmu_device *dev, ++ int size_available) ++{ ++ struct userspace_request *req, *match = NULL; ++ ++ spin_lock(&dev->lock); ++ ++ list_for_each_entry(req, &dev->requests, list) { ++ spin_lock(&req->lock); ++ if (!req->sent) { ++ if (dmu_get_msg_len(req->type) < size_available) { ++ req->sent = 1; ++ match = req; ++ } else { ++ /* Must break here to preserve order */ ++ spin_unlock(&req->lock); ++ break; ++ } ++ } ++ spin_unlock(&req->lock); ++ ++ if (match) ++ break; ++ } ++ ++ spin_unlock(&dev->lock); ++ ++ return match; ++} ++ ++ssize_t dmu_ctl_read(struct file *file, char __user *buffer, ++ size_t size, loff_t *offset) ++{ ++ ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; ++ struct userspace_request *req = NULL; ++ int ret = 0, r; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ while (!have_pending_requests(dev)) { ++ if (file->f_flags & O_NONBLOCK) { ++ return 0; ++ } ++ ++ if (wait_event_interruptible(dev->wqueue, ++ have_pending_requests(dev))) ++ return -ERESTARTSYS; ++ } ++ ++ while(ret < size) { ++ req = pluck_next_request(dev, size - ret); ++ if (!req) ++ /* One or more of the following conditions is true: ++ * 1. No more requests available for sending ++ * 2. No more room in the outgoing buffer ++ */ ++ break; ++ ++ r = send_userspace_message((void *)(buffer + ret), req); ++ if (r == 0) ++ continue; ++ else if (r < 0) ++ return r; ++ ++ ret += r; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Returns: ++ * 1 if we''re chained to our parent ++ * 0 if parent is valid and was removed ++ * -1 if we gave our bios to the invalid parent ++ */ ++static int handle_parent_remap(struct dmu_map *parent, ++ struct dmu_map *remap, ++ struct dmu_msg_map_response *msg) ++{ ++ int ret = 0; ++ int free_parent = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&parent->lock, flags); ++ ++ if (!dmu_get_flag(&parent->flags, DMU_FLAG_INUSE)) { ++ /* This is in the process of being destroyed, ++ * so we can''t use it ++ */ ++ goto end_parent; ++ } ++ ++ if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) { ++ if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) =++ dmu_get_flag(&msg->flags, DMU_FLAG_WR) && ++ (parent->new_block == msg->new_block)) { ++ /* Perms match for this not-yet-valid remap, ++ so tag our bios on to it and bail */ ++ bio_list_merge(&parent->bios, &remap->bios); ++ bio_list_init(&remap->bios); ++ ret = -1; ++ } else { ++ /* Remove parent from remap table, and ++ * chain our new remap to this one so ++ * it will fire when parent goes ++ * valid ++ */ ++ list_del_init(&parent->list); ++ if (parent->next) { ++ DMERR("Parent already chained!"); ++ BUG(); ++ } ++ parent->next = remap; ++ dmu_set_flag(&parent->flags, DMU_FLAG_TEMPORARY); ++ ret = 1; ++ } ++ } else { ++ /* Remove existing valid remap */ ++ free_parent = 1; ++ } ++ ++ end_parent: ++ if (free_parent) ++ __free_remap(parent); ++ ++ spin_unlock_irqrestore(&parent->lock, flags); ++ ++ return ret; ++} ++ ++static int remap_request(struct dmu_msg_map_response *msg, ++ struct dmu_device *dev, uint32_t id) ++{ ++ struct dmu_map *remap = NULL, *parent = NULL; ++ struct target_device *s_dev = NULL, *d_dev = NULL; ++ int is_chained = 0; ++ struct userspace_request *cursor, *next, *req = NULL; ++ ++ /* See if we have a pending request that matches */ ++ spin_lock(&dev->lock); ++ list_for_each_entry_safe(cursor, next, &dev->requests, list) { ++ if ((cursor->type == DM_USERSPACE_MAP_BLOCK_REQ) && ++ (cursor->id == msg->id_of_req)) { ++ req = cursor; ++ list_del(&req->list); ++ break; ++ } ++ } ++ spin_unlock(&dev->lock); ++ ++ if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) { ++ s_dev = find_target(dev, MKDEV(msg->src_maj, msg->src_min)); ++ if (!s_dev) { ++ DMERR("Failed to find src device %i:%i", ++ msg->src_maj, msg->src_min); ++ goto bad; ++ } ++ } ++ ++ d_dev = find_target(dev, MKDEV(msg->dst_maj, msg->dst_min)); ++ if (!d_dev) { ++ DMERR("Failed to find dest device %i:%i", ++ msg->dst_maj, msg->dst_min); ++ goto bad; ++ } ++ ++ if (req) { ++ while (atomic_read(&req->refcnt) != 0) ++ /* Wait for exclusive use of request. Even ++ * though we have removed it from the list, ++ * someone still has a pointer to it, which ++ * means we must wait for them to finish with ++ * it before continuing. ++ */ ++ schedule(); ++ remap = req->remap; ++ mempool_free(req, request_pool); ++ } else { ++ /* Allocate a new remap early (before grabbing locks), ++ * since we will most likely need it, and we didn''t ++ * get one with the request ++ */ ++ /* FIXME */ ++ remap = alloc_remap_atomic(dev); ++ if (!remap) { ++ DMERR("Failed to alloc remap!"); ++ goto bad; ++ } ++ init_remap(dev, remap); ++ } ++ ++ spin_lock(&dev->lock); ++ ++ /* FIXME: Now that we pass the remap with the req, do we need ++ IRQs disabled here? */ ++ spin_lock(&remap->lock); ++ remap->org_block = msg->org_block; ++ ++ /* Now, we insert the new remap into the table, and remove the ++ * existing map, if present, all while the device is locked ++ */ ++ ++ parent = ht_find_map(&dev->remaps, msg->org_block); ++ if (parent) { ++ is_chained = handle_parent_remap(parent, remap, msg); ++ if (is_chained < 0) { ++ __free_remap(remap); ++ spin_unlock(&remap->lock); ++ spin_unlock(&dev->lock); ++ return 1; ++ } ++ } ++ ++ if (dmu_get_flag(&msg->flags, DMU_FLAG_SYNC)) ++ dmu_set_flag(&remap->flags, DMU_FLAG_WAITING); ++ ++ remap->new_block = msg->new_block; ++ remap->offset = msg->offset; ++ remap->src = s_dev; ++ remap->dest = d_dev; ++ remap->dev = dev; ++ remap->id = id; ++ ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_SYNC); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR); ++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD); ++ dmu_clr_flag(&remap->flags, DMU_FLAG_VALID); ++ ++ spin_unlock(&remap->lock); ++ ++ ht_insert_map(&dev->remaps, remap); ++ ++ spin_unlock(&dev->lock); ++ ++ if (! is_chained) ++ copy_or_flush(remap); ++ ++ return 1; ++ ++ bad: ++ DMERR("Remap error: chaos may ensue"); ++ ++ return 0; ++} ++ ++/* ++ * Adds the request to the front of the queue so it''s picked up first ++ */ ++static void add_urgent_request(struct dmu_device *dev, ++ struct userspace_request *req) ++{ ++ spin_lock(&dev->lock); ++ list_add(&req->list, &dev->requests); ++ spin_unlock(&dev->lock); ++ ++ wake_up(&dev->wqueue); ++} ++ ++static int version_request(struct dmu_msg_version *msg, ++ struct dmu_device *dev, uint32_t id) ++{ ++ struct userspace_request *req; ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (!req) { ++ DMERR("Failed to alloc version response"); ++ return 0; ++ } ++ ++ init_request(dev, DM_USERSPACE_GET_VERSION, req); ++ add_urgent_request(dev, req); ++ ++ return 1; ++} ++ ++static int invalidate_request(struct dmu_msg_invalidate_map *msg, ++ struct dmu_device *dev, uint32_t id) ++{ ++ struct dmu_map *remap; ++ struct userspace_request *req; ++ int ret = 1; ++ unsigned long flags; ++ ++ remap = ht_find_map_dev(dev, msg->org_block); ++ if (!remap) ++ ret = 0; ++ else { ++ spin_lock(&dev->lock); ++ spin_lock_irqsave(&remap->lock, flags); ++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) ++ ht_delete_map(&dev->remaps, remap); ++ else ++ ret = 0; ++ spin_unlock_irqrestore(&remap->lock, flags); ++ spin_unlock(&dev->lock); ++ } ++ ++ req = mempool_alloc(request_pool, GFP_NOIO); ++ if (!req) { ++ DMERR("Failed to allocate request"); ++ return 0; ++ } ++ ++ if (ret) ++ init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req); ++ else ++ init_request(dev, DM_USERSPACE_INVAL_FAILED, req); ++ ++ req->u.block = msg->org_block; ++ req->id = id; ++ ++ add_request(dev, req); ++ ++ return ret; ++} ++ ++static void sync_complete(struct dmu_device *dev, uint32_t id_of_op) { ++ struct dmu_map *remap = NULL; ++ struct bio *bio; ++ struct userspace_request *req, *next; ++ unsigned long flags; ++ ++ spin_lock(&dev->lock); ++ list_for_each_entry_safe(req, next, &dev->requests, list) { ++ if (req->id == id_of_op) { ++ list_del(&req->list); ++ break; ++ } ++ } ++ spin_unlock(&dev->lock); ++ ++ if (!req) { ++ DMERR("Unable to complete unknown request: %u\n", ++ id_of_op); ++ return; ++ } ++ ++ while (atomic_read(&req->refcnt) != 0) ++ /* Wait for exclusive use of request. Even ++ * though we have removed it from the list, ++ * someone still has a pointer to it, which ++ * means we must wait for them to finish with ++ * it before continuing. ++ */ ++ schedule(); ++ ++ remap = req->remap; ++ mempool_free(req, request_pool); ++ ++ if (remap) { ++ spin_lock_irqsave(&remap->lock, flags); ++ dmu_clr_flag(&remap->flags, DMU_FLAG_WAITING); ++ spin_unlock_irqrestore(&remap->lock, flags); ++ while(1) { ++ spin_lock_irqsave(&remap->lock, flags); ++ bio = remap->bios_waiting.head; ++ spin_unlock_irqrestore(&remap->lock, flags); ++ if (!bio) ++ break; ++ bio->bi_end_io(bio, 0, 0); ++ } ++ } else { ++ DMERR("Unable to complete empty request: %u\n", ++ id_of_op); ++ } ++} ++ ++ssize_t dmu_ctl_write(struct file *file, const char __user *buffer, ++ size_t size, loff_t *offset) ++{ ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; ++ int ret = 0; ++ struct dmu_msg_header hdr; ++ union { ++ struct dmu_msg_map_response map_rsp; ++ struct dmu_msg_invalidate_map inval_rsp; ++ struct dmu_msg_version ver_req; ++ struct dmu_msg_status status_rsp; ++ } msgs; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ while ((ret + sizeof(hdr)) < size) { ++ if (copy_from_user(&hdr, buffer+ret, sizeof(hdr))) { ++ DMERR("%s copy_from_user failed!", __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ ret += sizeof(hdr); ++ ++ switch (hdr.msg_type) { ++ ++ case DM_USERSPACE_GET_VERSION: ++ if (hdr.payload_len != sizeof(msgs.ver_req)) { ++ DMERR("Malformed version request"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.ver_req, buffer+ret, ++ sizeof(msgs.ver_req))) { ++ DMERR("%s copy_from_user failed!", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ version_request(&msgs.ver_req, dev, hdr.id); ++ break; ++ ++ case DM_USERSPACE_MAP_BLOCK_RESP: ++ if (hdr.payload_len != sizeof(msgs.map_rsp)) { ++ DMERR("Malformed block response"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.map_rsp, buffer+ret, ++ sizeof(msgs.map_rsp))) { ++ DMERR("%s copy_from_user failed!", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ remap_request(&msgs.map_rsp, dev, ++ msgs.map_rsp.id_of_req); ++ break; ++ ++ case DM_USERSPACE_MAP_FAILED: ++ if (hdr.payload_len != sizeof(msgs.map_rsp)) { ++ DMERR("Malformed block failed response"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.map_rsp, buffer+ret, ++ sizeof(msgs.map_rsp))) { ++ DMERR("%s copy_from_user failed", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ DMERR("Userspace map failed"); ++ break; ++ ++ case DM_USERSPACE_MAP_INVALIDATE: ++ if (hdr.payload_len != sizeof(msgs.inval_rsp)) { ++ DMERR("Malformed invalidate request"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.inval_rsp, buffer+ret, ++ sizeof(msgs.inval_rsp))) { ++ DMERR("%s copy_from_user failed", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ invalidate_request(&msgs.inval_rsp, dev, hdr.id); ++ break; ++ ++ case DM_USERSPACE_STATUS: ++ if (hdr.payload_len != sizeof(msgs.status_rsp)) { ++ DMERR("Malformed invalidate request"); ++ break; ++ } ++ ++ if (copy_from_user(&msgs.status_rsp, buffer+ret, ++ sizeof(msgs.status_rsp))) { ++ DMERR("%s copy_from_user failed", ++ __FUNCTION__); ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ if (msgs.status_rsp.status =++ DM_USERSPACE_SYNC_COMPLETE) { ++ /* FIXME: check req */ ++ sync_complete(dev, msgs.status_rsp.id_of_op); ++ } ++ break; ++ ++ default: ++ DMWARN("Unknown request type: %i", hdr.msg_type); ++ } ++ ++ ret += hdr.payload_len; ++ } ++ out: ++ return ret; ++} ++ ++int dmu_ctl_open(struct inode *inode, struct file *file) ++{ ++ struct chardev_transport *t; ++ struct dmu_device *dev; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; ++ ++ t = container_of(inode->i_cdev, struct chardev_transport, cdev); ++ dev = t->parent; ++ ++ get_dev(dev); ++ ++ file->private_data = dev; ++ ++ return 0; ++} ++ ++int dmu_ctl_release(struct inode *inode, struct file *file) ++{ ++ struct dmu_device *dev; ++ ++ dev = (struct dmu_device *)file->private_data; ++ ++ put_dev(dev); ++ ++ return 0; ++} ++ ++unsigned dmu_ctl_poll(struct file *file, poll_table *wait) ++{ ++ struct dmu_device *dev = (struct dmu_device *)file->private_data; ++ unsigned mask = 0; ++ ++ poll_wait(file, &dev->wqueue, wait); ++ ++ if (have_pending_requests(dev)) ++ mask |= POLLIN | POLLRDNORM; ++ ++ return mask; ++} ++ ++static struct file_operations ctl_fops = { ++ .open = dmu_ctl_open, ++ .release = dmu_ctl_release, ++ .read = dmu_ctl_read, ++ .write = dmu_ctl_write, ++ .poll = dmu_ctl_poll, ++ .owner = THIS_MODULE, ++}; ++ ++static int get_free_minor(void) ++{ ++ struct dmu_device *dev; ++ int minor = 0; ++ ++ spin_lock(&devices_lock); ++ ++ while (1) { ++ list_for_each_entry(dev, &devices, list) { ++ struct chardev_transport *t = dev->transport_private; ++ if (MINOR(t->ctl_dev) == minor) ++ goto dupe; ++ } ++ break; ++ dupe: ++ minor++; ++ } ++ ++ spin_unlock(&devices_lock); ++ ++ return minor; ++} ++ ++int register_chardev_transport(struct dmu_device *dev) ++{ ++ struct chardev_transport *t; ++ int ret; ++ ++ dev->transport_private = kmalloc(sizeof(struct chardev_transport), ++ GFP_KERNEL); ++ t = dev->transport_private; ++ ++ if (!t) { ++ DMERR("Failed to allocate chardev transport"); ++ goto bad; ++ } ++ ++ t->ctl_dev = MKDEV(MAJOR(dmu_dev), get_free_minor()); ++ t->parent = dev; ++ ++ cdev_init(&t->cdev, &ctl_fops); ++ t->cdev.owner = THIS_MODULE; ++ t->cdev.ops = &ctl_fops; ++ ++ ret = cdev_add(&t->cdev, t->ctl_dev, 1); ++ if (ret < 0) { ++ DMERR("Failed to register control device %d:%d", ++ MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); ++ goto bad; ++ } ++ ++ return 1; ++ ++ bad: ++ kfree(t); ++ return 0; ++} ++ ++void unregister_chardev_transport(struct dmu_device *dev) ++{ ++ struct chardev_transport *t = dev->transport_private; ++ ++ cdev_del(&t->cdev); ++ kfree(t); ++} ++ ++int init_chardev_transport(void) ++{ ++ int r; ++ ++ r = alloc_chrdev_region(&dmu_dev, 0, 10, "dm-userspace"); ++ if (r) { ++ DMERR("Failed to allocate chardev region"); ++ return 0; ++ } else ++ return 1; ++} ++ ++void cleanup_chardev_transport(void) ++{ ++ unregister_chrdev_region(dmu_dev, 10); ++} ++ ++void write_chardev_transport_info(struct dmu_device *dev, ++ char *buf, unsigned int maxlen) ++{ ++ struct chardev_transport *t = dev->transport_private; ++ ++ snprintf(buf, maxlen, "%x:%x", ++ MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); ++} +diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.h ./drivers/md/dm-userspace.h +--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.h 1969-12-31 18:00:00.000000000 -0600 ++++ ./drivers/md/dm-userspace.h 2006-08-16 18:48:18.000000000 -0500 +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#ifndef __DM_USERSPACE_H ++#define __DM_USERSPACE_H ++ ++#include <linux/types.h> ++ ++/* ++ * Message Types ++ */ ++#define DM_USERSPACE_GET_VERSION 1 ++#define DM_USERSPACE_MAP_BLOCK_REQ 2 ++#define DM_USERSPACE_MAP_BLOCK_RESP 3 ++#define DM_USERSPACE_MAP_FAILED 4 ++#define DM_USERSPACE_MAP_INVALIDATE 5 ++#define DM_USERSPACE_STATUS 6 ++ ++/* ++ * Status codes ++ */ ++#define DM_USERSPACE_INVAL_COMPLETE 101 ++#define DM_USERSPACE_INVAL_FAILED 102 ++#define DM_USERSPACE_SYNC_COMPLETE 103 ++ ++/* ++ * Flags and associated macros ++ */ ++#define DMU_FLAG_VALID 1 ++#define DMU_FLAG_RD 2 ++#define DMU_FLAG_WR 4 ++#define DMU_FLAG_COPY_FIRST 8 ++#define DMU_FLAG_TEMPORARY 16 ++#define DMU_FLAG_INUSE 32 ++#define DMU_FLAG_SYNC 64 ++#define DMU_FLAG_WAITING 128 ++ ++static int dmu_get_flag(uint32_t *flags, uint32_t flag) ++{ ++ return (*flags & flag) != 0; ++} ++ ++static void dmu_set_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags |= flag; ++} ++ ++static void dmu_clr_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags &= (~flag); ++} ++ ++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) ++{ ++ *flags = (*flags & ~flag) | (src & flag); ++} ++ ++/* ++ * This message header is sent in front of every message, in both ++ * directions ++ */ ++struct dmu_msg_header { ++ uint32_t msg_type; ++ uint32_t payload_len; ++ uint32_t id; ++}; ++ ++/* DM_USERSPACE_GET_VERSION */ ++struct dmu_msg_version { ++ uint32_t userspace_ver; ++ uint32_t kernel_ver; ++}; ++ ++/* For status codes */ ++struct dmu_msg_status { ++ uint32_t id_of_op; ++ uint32_t status; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_REQ */ ++struct dmu_msg_map_request { ++ uint64_t org_block; ++ ++ uint32_t flags; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_RESP ++ * DM_USERSPACE_MAP_BLOCK_FAILED ++ */ ++struct dmu_msg_map_response { ++ uint64_t org_block; ++ uint64_t new_block; ++ int64_t offset; ++ ++ uint32_t id_of_req; ++ uint32_t flags; ++ ++ uint32_t src_maj; ++ uint32_t src_min; ++ ++ uint32_t dst_maj; ++ uint32_t dst_min; ++}; ++ ++/* DM_USERSPACE_MAP_INVALIDATE */ ++struct dmu_msg_invalidate_map { ++ uint64_t org_block; ++}; ++ ++static inline int dmu_get_msg_len(int type) ++{ ++ switch (type) { ++ case DM_USERSPACE_GET_VERSION: ++ return sizeof(struct dmu_msg_version); ++ case DM_USERSPACE_INVAL_COMPLETE: ++ case DM_USERSPACE_INVAL_FAILED: ++ case DM_USERSPACE_STATUS: ++ return sizeof(struct dmu_msg_status); ++ case DM_USERSPACE_MAP_BLOCK_REQ: ++ return sizeof(struct dmu_msg_map_request); ++ case DM_USERSPACE_MAP_BLOCK_RESP: ++ case DM_USERSPACE_MAP_FAILED: ++ return sizeof(struct dmu_msg_map_response); ++ case DM_USERSPACE_MAP_INVALIDATE: ++ return sizeof(struct dmu_msg_invalidate_map); ++ default: ++ return -1; ++ }; ++} ++ ++#endif +diff -purN ../pristine-linux-2.6.16.13/drivers/md/Kconfig ./drivers/md/Kconfig +--- ../pristine-linux-2.6.16.13/drivers/md/Kconfig 2006-05-02 16:38:44.000000000 -0500 ++++ ./drivers/md/Kconfig 2006-08-16 18:48:18.000000000 -0500 +@@ -210,6 +210,12 @@ config DM_SNAPSHOT + ---help--- + Allow volume managers to take writeable snapshots of a device. + ++config DM_USERSPACE ++ tristate "Userspace target (EXPERIMENTAL)" ++ depends on BLK_DEV_DM && EXPERIMENTAL ++ ---help--- ++ A target that provides a userspace interface to device-mapper ++ + config DM_MIRROR + tristate "Mirror target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL +diff -purN ../pristine-linux-2.6.16.13/drivers/md/Makefile ./drivers/md/Makefile +--- ../pristine-linux-2.6.16.13/drivers/md/Makefile 2006-05-02 16:38:44.000000000 -0500 ++++ ./drivers/md/Makefile 2006-08-16 18:48:18.000000000 -0500 +@@ -14,6 +14,7 @@ raid6-objs := raid6main.o raid6algos.o r + raid6altivec1.o raid6altivec2.o raid6altivec4.o \ + raid6altivec8.o \ + raid6mmx.o raid6sse1.o raid6sse2.o ++dm-user-objs := dm-userspace.o dm-userspace-chardev.o + hostprogs-y := mktables + + # Note: link order is important. All raid personalities +@@ -37,6 +38,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc + obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o + obj-$(CONFIG_DM_MIRROR) += dm-mirror.o + obj-$(CONFIG_DM_ZERO) += dm-zero.o ++obj-$(CONFIG_DM_USERSPACE) += dm-user.o + + quiet_cmd_unroll = UNROLL $@ + cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ +diff -purN ../pristine-linux-2.6.16.13/include/linux/dm-userspace.h ./include/linux/dm-userspace.h +--- ../pristine-linux-2.6.16.13/include/linux/dm-userspace.h 1969-12-31 18:00:00.000000000 -0600 ++++ ./include/linux/dm-userspace.h 2006-08-16 18:48:28.000000000 -0500 +@@ -0,0 +1,147 @@ ++/* ++ * Copyright (C) International Business Machines Corp., 2006 ++ * Author: Dan Smith <danms@us.ibm.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; under version 2 of the License. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ */ ++ ++#ifndef __DM_USERSPACE_H ++#define __DM_USERSPACE_H ++ ++#include <linux/types.h> ++ ++/* ++ * Message Types ++ */ ++#define DM_USERSPACE_GET_VERSION 1 ++#define DM_USERSPACE_MAP_BLOCK_REQ 2 ++#define DM_USERSPACE_MAP_BLOCK_RESP 3 ++#define DM_USERSPACE_MAP_FAILED 4 ++#define DM_USERSPACE_MAP_INVALIDATE 5 ++#define DM_USERSPACE_STATUS 6 ++ ++/* ++ * Status codes ++ */ ++#define DM_USERSPACE_INVAL_COMPLETE 101 ++#define DM_USERSPACE_INVAL_FAILED 102 ++#define DM_USERSPACE_SYNC_COMPLETE 103 ++ ++/* ++ * Flags and associated macros ++ */ ++#define DMU_FLAG_VALID 1 ++#define DMU_FLAG_RD 2 ++#define DMU_FLAG_WR 4 ++#define DMU_FLAG_COPY_FIRST 8 ++#define DMU_FLAG_TEMPORARY 16 ++#define DMU_FLAG_INUSE 32 ++#define DMU_FLAG_SYNC 64 ++#define DMU_FLAG_WAITING 128 ++ ++static int dmu_get_flag(uint32_t *flags, uint32_t flag) ++{ ++ return (*flags & flag) != 0; ++} ++ ++static void dmu_set_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags |= flag; ++} ++ ++static void dmu_clr_flag(uint32_t *flags, uint32_t flag) ++{ ++ *flags &= (~flag); ++} ++ ++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) ++{ ++ *flags = (*flags & ~flag) | (src & flag); ++} ++ ++/* ++ * This message header is sent in front of every message, in both ++ * directions ++ */ ++struct dmu_msg_header { ++ uint32_t msg_type; ++ uint32_t payload_len; ++ uint32_t id; ++}; ++ ++/* DM_USERSPACE_GET_VERSION */ ++struct dmu_msg_version { ++ uint32_t userspace_ver; ++ uint32_t kernel_ver; ++}; ++ ++/* For status codes */ ++struct dmu_msg_status { ++ uint32_t id_of_op; ++ uint32_t status; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_REQ */ ++struct dmu_msg_map_request { ++ uint64_t org_block; ++ ++ uint32_t flags; ++}; ++ ++/* DM_USERSPACE_MAP_BLOCK_RESP ++ * DM_USERSPACE_MAP_BLOCK_FAILED ++ */ ++struct dmu_msg_map_response { ++ uint64_t org_block; ++ uint64_t new_block; ++ int64_t offset; ++ ++ uint32_t id_of_req; ++ uint32_t flags; ++ ++ uint32_t src_maj; ++ uint32_t src_min; ++ ++ uint32_t dst_maj; ++ uint32_t dst_min; ++}; ++ ++/* DM_USERSPACE_MAP_INVALIDATE */ ++struct dmu_msg_invalidate_map { ++ uint64_t org_block; ++}; ++ ++static inline int dmu_get_msg_len(int type) ++{ ++ switch (type) { ++ case DM_USERSPACE_GET_VERSION: ++ return sizeof(struct dmu_msg_version); ++ case DM_USERSPACE_INVAL_COMPLETE: ++ case DM_USERSPACE_INVAL_FAILED: ++ case DM_USERSPACE_STATUS: ++ return sizeof(struct dmu_msg_status); ++ case DM_USERSPACE_MAP_BLOCK_REQ: ++ return sizeof(struct dmu_msg_map_request); ++ case DM_USERSPACE_MAP_BLOCK_RESP: ++ case DM_USERSPACE_MAP_FAILED: ++ return sizeof(struct dmu_msg_map_response); ++ case DM_USERSPACE_MAP_INVALIDATE: ++ return sizeof(struct dmu_msg_invalidate_map); ++ default: ++ return -1; ++ }; ++} ++ ++#endif _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel