Sunil Mushran
2011-Apr-26 23:03 UTC
[Ocfs2-devel] [PATCH 1/3] ocfs2/dlm: dlm_is_lockres_migrateable() returns boolean
Patch cleans up the gunk added by commit 388c4bcb4e63e88fb1f312a2f5f9eb2623afcf5b. dlm_is_lockres_migrateable() now returns 1 if lockresource is deemed migrateable and 0 if not. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 12 ++++ fs/ocfs2/dlm/dlmmaster.c | 135 +++++++++++++++++---------------------------- 2 files changed, 63 insertions(+), 84 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4bdf7ba..1aac42a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -401,6 +401,18 @@ static inline int dlm_lvb_is_empty(char *lvb) return 1; } +static inline char *dlm_list_in_text(enum dlm_lockres_list idx) +{ + if (idx == DLM_GRANTED_LIST) + return "granted"; + else if (idx == DLM_CONVERTING_LIST) + return "converting"; + else if (idx == DLM_BLOCKED_LIST) + return "blocked"; + else + return "unknown"; +} + static inline struct list_head * dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) { diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9d67610..3e59ff9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) dlm_lockres_put(res); } -/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 - * if not. If 0, numlocks is set to the number of locks in the lockres. +/* + * A migrateable resource is one that is : + * 1. locally mastered, and, + * 2. zero local locks, and, + * 3. one or more non-local locks, or, one or more references + * Returns 1 if yes, 0 if not. */ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int *numlocks, - int *hasrefs) + struct dlm_lock_resource *res) { - int ret; - int i; - int count = 0; + enum dlm_lockres_list idx; + int nonlocal = 0; struct list_head *queue; struct dlm_lock *lock; + u64 cookie; assert_spin_locked(&res->spinlock); - *numlocks = 0; - *hasrefs = 0; - - ret = -EINVAL; - if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "cannot migrate lockres with unknown owner!\n"); - goto leave; - } - - if (res->owner != dlm->node_num) { - mlog(0, "cannot migrate lockres this node doesn't own!\n"); - goto leave; - } + if (res->owner != dlm->node_num) + return 0; - ret = 0; - queue = &res->granted; - for (i = 0; i < 3; i++) { + for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { + queue = dlm_list_idx_to_ptr(res, idx); list_for_each_entry(lock, queue, list) { - ++count; - if (lock->ml.node == dlm->node_num) { - mlog(0, "found a lock owned by this node still " - "on the %s queue! will not migrate this " - "lockres\n", (i == 0 ? "granted" : - (i == 1 ? "converting" : - "blocked"))); - ret = -ENOTEMPTY; - goto leave; + if (lock->ml.node != dlm->node_num) { + nonlocal++; + continue; } + cookie = be64_to_cpu(lock->ml.cookie); + mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " + "%s list\n", dlm->name, res->lockname.len, + res->lockname.name, + dlm_get_lock_cookie_node(cookie), + dlm_get_lock_cookie_seq(cookie), + dlm_list_in_text(idx)); + return 0; } - queue++; } - *numlocks = count; - - count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (count < O2NM_MAX_NODES) - *hasrefs = 1; + if (!nonlocal) { + nonlocal = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + if (nonlocal >= O2NM_MAX_NODES) + return 0; + } - mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name, - res->lockname.len, res->lockname.name, *numlocks, *hasrefs); + mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, + res->lockname.name); -leave: - return ret; + return 1; } /* @@ -2416,7 +2406,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, const char *name; unsigned int namelen; int mle_added = 0; - int numlocks, hasrefs; int wake = 0; if (!dlm_grab(dlm)) @@ -2427,19 +2416,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); - /* - * ensure this lockres is a proper candidate for migration - */ + /* Ensure this lockres is a proper candidate for migration */ spin_lock(&res->spinlock); - ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); - if (ret < 0) { - spin_unlock(&res->spinlock); - goto leave; - } + ret = dlm_is_lockres_migrateable(dlm, res); spin_unlock(&res->spinlock); - /* no work to do */ - if (numlocks == 0 && !hasrefs) + /* No work to do */ + if (!ret) goto leave; /* @@ -2655,44 +2638,35 @@ leave: dlm_put(dlm); - mlog(0, "returning %d\n", ret); + mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen, + name, target, ret); return ret; } #define DLM_MIGRATION_RETRY_MS 100 -/* Should be called only after beginning the domain leave process. +/* + * Should be called only after beginning the domain leave process. * There should not be any remaining locks on nonlocal lock resources, * and there should be no local locks left on locally mastered resources. * * Called with the dlm spinlock held, may drop it to do migration, but * will re-acquire before exit. * - * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ + * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped + */ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - int ret; + int mig, ret; int lock_dropped = 0; - int numlocks, hasrefs; - spin_lock(&res->spinlock); - if (res->owner != dlm->node_num) { - if (!__dlm_lockres_unused(res)) { - mlog(ML_ERROR, "%s:%.*s: this node is not master, " - "trying to free this but locks remain\n", - dlm->name, res->lockname.len, res->lockname.name); - } - spin_unlock(&res->spinlock); - goto leave; - } + assert_spin_locked(&dlm->spinlock); - /* No need to migrate a lockres having no locks */ - ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); - if (ret >= 0 && numlocks == 0 && !hasrefs) { - spin_unlock(&res->spinlock); - goto leave; - } + spin_lock(&res->spinlock); + mig = dlm_is_lockres_migrateable(dlm, res); spin_unlock(&res->spinlock); + if (!mig) + goto leave; /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ spin_unlock(&dlm->spinlock); @@ -2701,15 +2675,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); if (ret >= 0) break; - if (ret == -ENOTEMPTY) { - mlog(ML_ERROR, "lockres %.*s still has local locks!\n", - res->lockname.len, res->lockname.name); - BUG(); - } - - mlog(0, "lockres %.*s: migrate failed, " - "retrying\n", res->lockname.len, - res->lockname.name); + mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name, + res->lockname.len, res->lockname.name); msleep(DLM_MIGRATION_RETRY_MS); } spin_lock(&dlm->spinlock); -- 1.7.1
Sunil Mushran
2011-Apr-26 23:03 UTC
[Ocfs2-devel] [PATCH 2/3] ocfs2/dlm: Add new dlm message DLM_BEGIN_EXIT_DOMAIN_MSG
This patch adds a new dlm message DLM_BEGIN_EXIT_DOMAIN_MSG and ups the dlm protocol to 1.2. o2dlm sends this new message in dlm_unregister_domain() to mark the beginning of the exit domain. This message is sent to all nodes in the domain. Currently o2dlm has no way of informing other nodes of its impending exit. This information is useful as the other nodes could disregard the exiting node in certain operations. For example, in resource migration. If two or more nodes were umounting in parallel, it would be more efficient if o2dlm were to choose a non-exiting node to be the new master node rather than an exiting one. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 2 + fs/ocfs2/dlm/dlmdebug.c | 6 +++ fs/ocfs2/dlm/dlmdomain.c | 84 ++++++++++++++++++++++++++++++++++++++------ fs/ocfs2/dlm/dlmrecovery.c | 1 + 4 files changed, 82 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 1aac42a..d602abb 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -144,6 +144,7 @@ struct dlm_ctxt wait_queue_head_t dlm_join_events; unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; struct dlm_recovery_ctxt reco; spinlock_t master_lock; @@ -460,6 +461,7 @@ enum { DLM_FINALIZE_RECO_MSG = 518, DLM_QUERY_REGION = 519, DLM_QUERY_NODEINFO = 520, + DLM_BEGIN_EXIT_DOMAIN_MSG = 521, }; struct dlm_reco_node_data diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 04a32be..56f82cb 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) buf + out, len - out); out += snprintf(buf + out, len - out, "\n"); + /* Exit Domain Map: xx xx xx */ + out += snprintf(buf + out, len - out, "Exit Domain Map: "); + out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES, + buf + out, len - out); + out += snprintf(buf + out, len - out, "\n"); + /* Live Map: xx xx xx */ out += snprintf(buf + out, len - out, "Live Map: "); out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3b179d6..3aff23f 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); * New in version 1.1: * - Message DLM_QUERY_REGION added to support global heartbeat * - Message DLM_QUERY_NODEINFO added to allow online node removes + * New in version 1.2: + * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain */ static const struct dlm_protocol_version dlm_protocol = { .pv_major = 1, - .pv_minor = 1, + .pv_minor = 2, }; #define DLM_DOMAIN_BACKOFF_MS 200 @@ -486,6 +488,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm) return ret; } +static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data) +{ + struct dlm_ctxt *dlm = data; + unsigned int node; + struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; + + if (!dlm_grab(dlm)) + return 0; + + node = exit_msg->node_idx; + mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node); + + spin_lock(&dlm->spinlock); + set_bit(node, dlm->exit_domain_map); + spin_unlock(&dlm->spinlock); + + dlm_put(dlm); + + return 0; +} + static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) { /* Yikes, a double spinlock! I need domain_lock for the dlm @@ -542,6 +566,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, spin_lock(&dlm->spinlock); clear_bit(node, dlm->domain_map); + clear_bit(node, dlm->exit_domain_map); __dlm_print_nodes(dlm); /* notify anything attached to the heartbeat events */ @@ -554,29 +579,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, return 0; } -static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, +static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type, unsigned int node) { int status; struct dlm_exit_domain leave_msg; - mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", - node, dlm->name, dlm->node_num); + mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name, + msg_type, node); memset(&leave_msg, 0, sizeof(leave_msg)); leave_msg.node_idx = dlm->node_num; - status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, - &leave_msg, sizeof(leave_msg), node, - NULL); + status = o2net_send_message(msg_type, dlm->key, &leave_msg, + sizeof(leave_msg), node, NULL); if (status < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node); - mlog(0, "status return %d from o2net_send_message\n", status); + mlog(ML_ERROR, "Error %d sending domain exit message %u " + "to node %u on domain %s\n", status, msg_type, node, + dlm->name); return status; } +static void dlm_begin_exit_domain(struct dlm_ctxt *dlm) +{ + int node = -1; + + /* Support for begin exit domain was added in 1.2 */ + if (dlm->dlm_locking_proto.pv_major == 1 && + dlm->dlm_locking_proto.pv_minor < 2) + return; + + /* + * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely + * informational. Meaning if a node does not receive the message, + * so be it. + */ + spin_lock(&dlm->spinlock); + while (1) { + node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1); + if (node >= O2NM_MAX_NODES) + break; + if (node == dlm->node_num) + continue; + + spin_unlock(&dlm->spinlock); + dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node); + spin_lock(&dlm->spinlock); + } + spin_unlock(&dlm->spinlock); +} static void dlm_leave_domain(struct dlm_ctxt *dlm) { @@ -602,7 +654,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) clear_node = 1; - status = dlm_send_one_domain_exit(dlm, node); + status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG, + node); if (status < 0 && status != -ENOPROTOOPT && status != -ENOTCONN) { @@ -677,6 +730,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) if (leave) { mlog(0, "shutting down domain %s\n", dlm->name); + dlm_begin_exit_domain(dlm); /* We changed dlm state, notify the thread */ dlm_kick_thread(dlm, NULL); @@ -909,6 +963,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, * leftover join state. */ BUG_ON(dlm->joining_node != assert->node_idx); set_bit(assert->node_idx, dlm->domain_map); + clear_bit(assert->node_idx, dlm->exit_domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", @@ -1793,6 +1848,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) if (status) goto bail; + status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key, + sizeof(struct dlm_exit_domain), + dlm_begin_exit_domain_handler, + dlm, NULL, &dlm->dlm_domain_handlers); + if (status) + goto bail; + bail: if (status) dlm_unregister_domain_handlers(dlm); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f1beb6f..7efab6d 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) mlog(0, "node %u being removed from domain map!\n", idx); clear_bit(idx, dlm->domain_map); + clear_bit(idx, dlm->exit_domain_map); /* wake up migration waiters if a node goes down. * perhaps later we can genericize this for other waiters. */ wake_up(&dlm->migration_wq); -- 1.7.1
Sunil Mushran
2011-Apr-26 23:03 UTC
[Ocfs2-devel] [PATCH 3/3] ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that have no locks and references are freed. Ones that have locks and/or references are migrated to another node. The first task in migration is finding a target. Currently we scan the lock resource and find one node that either has a lock or a reference. This is not very efficient in a parallel umount case as we might end up migrating the lock resource to a node which itself may have to migrate it to a third node. The patch scans the dlm->exit_domain_map to ensure the target node is not leaving the domain. If no valid target node is found, o2dlm does not migrate the resource but instead waits for the unlock and deref messages that will allow it to free the resource. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmdomain.c | 10 ++- fs/ocfs2/dlm/dlmmaster.c | 139 ++++++++++++++++----------------------------- 2 files changed, 57 insertions(+), 92 deletions(-) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3aff23f..6ed6b95 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -451,14 +451,18 @@ redo_bucket: dropped = dlm_empty_lockres(dlm, res); spin_lock(&res->spinlock); - __dlm_lockres_calc_usage(dlm, res); - iter = res->hash_node.next; + if (dropped) + __dlm_lockres_calc_usage(dlm, res); + else + iter = res->hash_node.next; spin_unlock(&res->spinlock); dlm_lockres_put(res); - if (dropped) + if (dropped) { + cond_resched_lock(&dlm->spinlock); goto redo_bucket; + } } cond_resched_lock(&dlm->spinlock); num += n; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3e59ff9..4499d86 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, static int dlm_migrate_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 target) + struct dlm_lock_resource *res, u8 target) { struct dlm_master_list_entry *mle = NULL; struct dlm_master_list_entry *oldmle = NULL; @@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, if (!dlm_grab(dlm)) return -EINVAL; + BUG_ON(target == O2NM_MAX_NODES); + name = res->lockname.name; namelen = res->lockname.len; - mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); - - /* Ensure this lockres is a proper candidate for migration */ - spin_lock(&res->spinlock); - ret = dlm_is_lockres_migrateable(dlm, res); - spin_unlock(&res->spinlock); - - /* No work to do */ - if (!ret) - goto leave; - - /* - * preallocate up front - * if this fails, abort - */ + mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, + target); + /* preallocate up front. if this fails, abort */ ret = -ENOMEM; mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); if (!mres) { @@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, ret = 0; /* - * find a node to migrate the lockres to - */ - - spin_lock(&dlm->spinlock); - /* pick a new node */ - if (!test_bit(target, dlm->domain_map) || - target >= O2NM_MAX_NODES) { - target = dlm_pick_migration_target(dlm, res); - } - mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, - namelen, name, target); - - if (target >= O2NM_MAX_NODES || - !test_bit(target, dlm->domain_map)) { - /* target chosen is not alive */ - ret = -EINVAL; - } - - if (ret) { - spin_unlock(&dlm->spinlock); - goto fail; - } - - mlog(0, "continuing with target = %u\n", target); - - /* * clear any existing master requests and * add the migration mle to the list */ + spin_lock(&dlm->spinlock); spin_lock(&dlm->master_lock); ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, target, dlm->node_num); @@ -2514,6 +2478,7 @@ fail: dlm_put_mle(mle); } else if (mle) { kmem_cache_free(dlm_mle_cache, mle); + mle = NULL; } goto leave; } @@ -2632,7 +2597,6 @@ leave: if (wake) wake_up(&res->wq); - /* TODO: cleanup */ if (mres) free_page((unsigned long)mres); @@ -2657,28 +2621,28 @@ leave: */ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - int mig, ret; + int ret; int lock_dropped = 0; + u8 target = O2NM_MAX_NODES; assert_spin_locked(&dlm->spinlock); spin_lock(&res->spinlock); - mig = dlm_is_lockres_migrateable(dlm, res); + if (dlm_is_lockres_migrateable(dlm, res)) + target = dlm_pick_migration_target(dlm, res); spin_unlock(&res->spinlock); - if (!mig) + + if (target == O2NM_MAX_NODES) goto leave; /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ spin_unlock(&dlm->spinlock); lock_dropped = 1; - while (1) { - ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); - if (ret >= 0) - break; - mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name, - res->lockname.len, res->lockname.name); - msleep(DLM_MIGRATION_RETRY_MS); - } + ret = dlm_migrate_lockres(dlm, res, target); + if (ret) + mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", + dlm->name, res->lockname.len, res->lockname.name, + target, ret); spin_lock(&dlm->spinlock); leave: return lock_dropped; @@ -2862,61 +2826,58 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, } } -/* for now this is not too intelligent. we will - * need stats to make this do the right thing. - * this just finds the first lock on one of the - * queues and uses that node as the target. */ +/* + * Pick a node to migrate the lock resource to. This function selects a + * potential target based first on the locks and then on refmap. It skips + * nodes that are in the process of exiting the domain. + */ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - int i; + enum dlm_lockres_list idx; struct list_head *queue = &res->granted; struct dlm_lock *lock; int nodenum; assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&res->spinlock); - spin_lock(&res->spinlock); - for (i=0; i<3; i++) { + /* Go through all the locks */ + for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { + queue = dlm_list_idx_to_ptr(res, idx); list_for_each_entry(lock, queue, list) { - /* up to the caller to make sure this node - * is alive */ - if (lock->ml.node != dlm->node_num) { - spin_unlock(&res->spinlock); - return lock->ml.node; - } + if (lock->ml.node == dlm->node_num) + continue; + if (test_bit(lock->ml.node, dlm->exit_domain_map)) + continue; + if (!test_bit(lock->ml.node, dlm->domain_map)) + continue; + nodenum = lock->ml.node; + goto bail; } - queue++; - } - - nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (nodenum < O2NM_MAX_NODES) { - spin_unlock(&res->spinlock); - return nodenum; } - spin_unlock(&res->spinlock); - mlog(0, "have not found a suitable target yet! checking domain map\n"); - /* ok now we're getting desperate. pick anyone alive. */ + /* Go thru the refmap */ nodenum = -1; while (1) { - nodenum = find_next_bit(dlm->domain_map, - O2NM_MAX_NODES, nodenum+1); - mlog(0, "found %d in domain map\n", nodenum); + nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, + nodenum + 1); if (nodenum >= O2NM_MAX_NODES) break; - if (nodenum != dlm->node_num) { - mlog(0, "picking %d\n", nodenum); - return nodenum; - } + if (nodenum == dlm->node_num) + continue; + if (test_bit(nodenum, dlm->exit_domain_map)) + continue; + if (!test_bit(lock->ml.node, dlm->domain_map)) + continue; + goto bail; } - mlog(0, "giving up. no master to migrate to\n"); - return DLM_LOCK_RES_OWNER_UNKNOWN; + nodenum = O2NM_MAX_NODES; +bail: + return nodenum; } - - /* this is called by the new master once all lockres * data has been received */ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, -- 1.7.1
Mark Fasheh
2011-May-05 22:09 UTC
[Ocfs2-devel] [PATCH 1/3] ocfs2/dlm: dlm_is_lockres_migrateable() returns boolean
On Tue, Apr 26, 2011 at 04:03:23PM -0700, Sunil Mushran wrote:> Patch cleans up the gunk added by commit 388c4bcb4e63e88fb1f312a2f5f9eb2623afcf5b. > dlm_is_lockres_migrateable() now returns 1 if lockresource is deemed > migrateable and 0 if not. > > Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> > --- > fs/ocfs2/dlm/dlmcommon.h | 12 ++++ > fs/ocfs2/dlm/dlmmaster.c | 135 +++++++++++++++++---------------------------- > 2 files changed, 63 insertions(+), 84 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h > index 4bdf7ba..1aac42a 100644 > --- a/fs/ocfs2/dlm/dlmcommon.h > +++ b/fs/ocfs2/dlm/dlmcommon.h > @@ -401,6 +401,18 @@ static inline int dlm_lvb_is_empty(char *lvb) > return 1; > } > > +static inline char *dlm_list_in_text(enum dlm_lockres_list idx) > +{ > + if (idx == DLM_GRANTED_LIST) > + return "granted"; > + else if (idx == DLM_CONVERTING_LIST) > + return "converting"; > + else if (idx == DLM_BLOCKED_LIST) > + return "blocked"; > + else > + return "unknown"; > +} > + > static inline struct list_head * > dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) > { > diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c > index 9d67610..3e59ff9 100644 > --- a/fs/ocfs2/dlm/dlmmaster.c > +++ b/fs/ocfs2/dlm/dlmmaster.c > @@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) > dlm_lockres_put(res); > } > > -/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 > - * if not. If 0, numlocks is set to the number of locks in the lockres. > +/* > + * A migrateable resource is one that is : > + * 1. locally mastered, and, > + * 2. zero local locks, and, > + * 3. one or more non-local locks, or, one or more references > + * Returns 1 if yes, 0 if not. > */ > static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, > - struct dlm_lock_resource *res, > - int *numlocks, > - int *hasrefs) > + struct dlm_lock_resource *res) > { > - int ret; > - int i; > - int count = 0; > + enum dlm_lockres_list idx; > + int nonlocal = 0; > struct list_head *queue; > struct dlm_lock *lock; > + u64 cookie; > > assert_spin_locked(&res->spinlock); > > - *numlocks = 0; > - *hasrefs = 0; > - > - ret = -EINVAL; > - if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { > - mlog(0, "cannot migrate lockres with unknown owner!\n"); > - goto leave; > - } > - > - if (res->owner != dlm->node_num) { > - mlog(0, "cannot migrate lockres this node doesn't own!\n"); > - goto leave; > - } > + if (res->owner != dlm->node_num) > + return 0; > > - ret = 0; > - queue = &res->granted; > - for (i = 0; i < 3; i++) { > + for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { > + queue = dlm_list_idx_to_ptr(res, idx); > list_for_each_entry(lock, queue, list) { > - ++count; > - if (lock->ml.node == dlm->node_num) { > - mlog(0, "found a lock owned by this node still " > - "on the %s queue! will not migrate this " > - "lockres\n", (i == 0 ? "granted" : > - (i == 1 ? "converting" : > - "blocked"))); > - ret = -ENOTEMPTY; > - goto leave; > + if (lock->ml.node != dlm->node_num) { > + nonlocal++; > + continue; > } > + cookie = be64_to_cpu(lock->ml.cookie); > + mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " > + "%s list\n", dlm->name, res->lockname.len, > + res->lockname.name, > + dlm_get_lock_cookie_node(cookie), > + dlm_get_lock_cookie_seq(cookie), > + dlm_list_in_text(idx)); > + return 0; > } > - queue++; > } > > - *numlocks = count; > - > - count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); > - if (count < O2NM_MAX_NODES) > - *hasrefs = 1; > + if (!nonlocal) { > + nonlocal = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); > + if (nonlocal >= O2NM_MAX_NODES) > + return 0;Minor quibble, but can you use a new variable for these two lines? It took me a minute to realize that what you were doing was gettting a refcount on the lockres. Maybe: if (!nonlocal) { /* * We have no locks on the resource (local or remote). Check for * references now. */ node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); if (node_ref >= O2NM_MAX_NODES) return 0; } } Otherwise this looks much nicer than before, thanks. --Mark -- Mark Fasheh
Sunil Mushran
2011-May-05 22:15 UTC
[Ocfs2-devel] [PATCH 1/3] ocfs2/dlm: dlm_is_lockres_migrateable() returns boolean
On 05/05/2011 03:09 PM, Mark Fasheh wrote:> On Tue, Apr 26, 2011 at 04:03:23PM -0700, Sunil Mushran wrote: >> >> - *numlocks = count; >> - >> - count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); >> - if (count< O2NM_MAX_NODES) >> - *hasrefs = 1; >> + if (!nonlocal) { >> + nonlocal = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); >> + if (nonlocal>= O2NM_MAX_NODES) >> + return 0; > Minor quibble, but can you use a new variable for these two lines? It took > me a minute to realize that what you were doing was gettting a refcount on > the lockres. Maybe: > > if (!nonlocal) { > /* > * We have no locks on the resource (local or remote). Check for > * references now. > */ > node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); > if (node_ref>= O2NM_MAX_NODES) > return 0; > } > }ok.
Mark Fasheh
2011-May-05 22:24 UTC
[Ocfs2-devel] [PATCH 3/3] ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
On Tue, Apr 26, 2011 at 04:03:25PM -0700, Sunil Mushran wrote:> During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that > have no locks and references are freed. Ones that have locks and/or references > are migrated to another node. > > The first task in migration is finding a target. Currently we scan the lock > resource and find one node that either has a lock or a reference. This is not > very efficient in a parallel umount case as we might end up migrating the > lock resource to a node which itself may have to migrate it to a third node. > > The patch scans the dlm->exit_domain_map to ensure the target node is not > leaving the domain. If no valid target node is found, o2dlm does not migrate > the resource but instead waits for the unlock and deref messages that will > allow it to free the resource. > > Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> > --- > fs/ocfs2/dlm/dlmdomain.c | 10 ++- > fs/ocfs2/dlm/dlmmaster.c | 139 ++++++++++++++++----------------------------- > 2 files changed, 57 insertions(+), 92 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c > index 3aff23f..6ed6b95 100644 > --- a/fs/ocfs2/dlm/dlmdomain.c > +++ b/fs/ocfs2/dlm/dlmdomain.c > @@ -451,14 +451,18 @@ redo_bucket: > dropped = dlm_empty_lockres(dlm, res); > > spin_lock(&res->spinlock); > - __dlm_lockres_calc_usage(dlm, res); > - iter = res->hash_node.next; > + if (dropped) > + __dlm_lockres_calc_usage(dlm, res); > + else > + iter = res->hash_node.next; > spin_unlock(&res->spinlock); > > dlm_lockres_put(res); > > - if (dropped) > + if (dropped) { > + cond_resched_lock(&dlm->spinlock); > goto redo_bucket; > + } > } > cond_resched_lock(&dlm->spinlock); > num += n; > diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c > index 3e59ff9..4499d86 100644 > --- a/fs/ocfs2/dlm/dlmmaster.c > +++ b/fs/ocfs2/dlm/dlmmaster.c > @@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, > > > static int dlm_migrate_lockres(struct dlm_ctxt *dlm, > - struct dlm_lock_resource *res, > - u8 target) > + struct dlm_lock_resource *res, u8 target) > { > struct dlm_master_list_entry *mle = NULL; > struct dlm_master_list_entry *oldmle = NULL; > @@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, > if (!dlm_grab(dlm)) > return -EINVAL; > > + BUG_ON(target == O2NM_MAX_NODES); > + > name = res->lockname.name; > namelen = res->lockname.len; > > - mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); > - > - /* Ensure this lockres is a proper candidate for migration */ > - spin_lock(&res->spinlock); > - ret = dlm_is_lockres_migrateable(dlm, res); > - spin_unlock(&res->spinlock); > - > - /* No work to do */ > - if (!ret) > - goto leave; > - > - /* > - * preallocate up front > - * if this fails, abort > - */ > + mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, > + target); > > + /* preallocate up front. if this fails, abort */ > ret = -ENOMEM; > mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); > if (!mres) { > @@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, > ret = 0; > > /* > - * find a node to migrate the lockres to > - */ > - > - spin_lock(&dlm->spinlock); > - /* pick a new node */ > - if (!test_bit(target, dlm->domain_map) || > - target >= O2NM_MAX_NODES) { > - target = dlm_pick_migration_target(dlm, res); > - } > - mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, > - namelen, name, target); > - > - if (target >= O2NM_MAX_NODES || > - !test_bit(target, dlm->domain_map)) { > - /* target chosen is not alive */ > - ret = -EINVAL; > - } > - > - if (ret) { > - spin_unlock(&dlm->spinlock); > - goto fail; > - } > - > - mlog(0, "continuing with target = %u\n", target); > - > - /* > * clear any existing master requests and > * add the migration mle to the list > */ > + spin_lock(&dlm->spinlock); > spin_lock(&dlm->master_lock); > ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, > namelen, target, dlm->node_num); > @@ -2514,6 +2478,7 @@ fail: > dlm_put_mle(mle); > } else if (mle) { > kmem_cache_free(dlm_mle_cache, mle); > + mle = NULL; > } > goto leave; > } > @@ -2632,7 +2597,6 @@ leave: > if (wake) > wake_up(&res->wq); > > - /* TODO: cleanup */ > if (mres) > free_page((unsigned long)mres); > > @@ -2657,28 +2621,28 @@ leave: > */ > int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) > { > - int mig, ret; > + int ret; > int lock_dropped = 0; > + u8 target = O2NM_MAX_NODES; > > assert_spin_locked(&dlm->spinlock); > > spin_lock(&res->spinlock); > - mig = dlm_is_lockres_migrateable(dlm, res); > + if (dlm_is_lockres_migrateable(dlm, res)) > + target = dlm_pick_migration_target(dlm, res); > spin_unlock(&res->spinlock); > - if (!mig) > + > + if (target == O2NM_MAX_NODES) > goto leave; > > /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ > spin_unlock(&dlm->spinlock); > lock_dropped = 1; > - while (1) { > - ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); > - if (ret >= 0) > - break; > - mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name, > - res->lockname.len, res->lockname.name); > - msleep(DLM_MIGRATION_RETRY_MS); > - } > + ret = dlm_migrate_lockres(dlm, res, target); > + if (ret) > + mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", > + dlm->name, res->lockname.len, res->lockname.name, > + target, ret); > spin_lock(&dlm->spinlock); > leave: > return lock_dropped; > @@ -2862,61 +2826,58 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, > } > } > > -/* for now this is not too intelligent. we will > - * need stats to make this do the right thing. > - * this just finds the first lock on one of the > - * queues and uses that node as the target. */ > +/* > + * Pick a node to migrate the lock resource to. This function selects a > + * potential target based first on the locks and then on refmap. It skips > + * nodes that are in the process of exiting the domain. > + */ > static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, > struct dlm_lock_resource *res) > { > - int i; > + enum dlm_lockres_list idx; > struct list_head *queue = &res->granted; > struct dlm_lock *lock; > int nodenum; > > assert_spin_locked(&dlm->spinlock); > + assert_spin_locked(&res->spinlock); > > - spin_lock(&res->spinlock); > - for (i=0; i<3; i++) { > + /* Go through all the locks */ > + for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { > + queue = dlm_list_idx_to_ptr(res, idx); > list_for_each_entry(lock, queue, list) { > - /* up to the caller to make sure this node > - * is alive */ > - if (lock->ml.node != dlm->node_num) { > - spin_unlock(&res->spinlock); > - return lock->ml.node; > - } > + if (lock->ml.node == dlm->node_num) > + continue; > + if (test_bit(lock->ml.node, dlm->exit_domain_map)) > + continue; > + if (!test_bit(lock->ml.node, dlm->domain_map)) > + continue; > + nodenum = lock->ml.node; > + goto bail; > } > - queue++; > - } > - > - nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); > - if (nodenum < O2NM_MAX_NODES) { > - spin_unlock(&res->spinlock); > - return nodenum; > } > - spin_unlock(&res->spinlock); > - mlog(0, "have not found a suitable target yet! checking domain map\n"); > > - /* ok now we're getting desperate. pick anyone alive. */ > + /* Go thru the refmap */ > nodenum = -1; > while (1) { > - nodenum = find_next_bit(dlm->domain_map, > - O2NM_MAX_NODES, nodenum+1); > - mlog(0, "found %d in domain map\n", nodenum); > + nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, > + nodenum + 1); > if (nodenum >= O2NM_MAX_NODES) > break; > - if (nodenum != dlm->node_num) { > - mlog(0, "picking %d\n", nodenum); > - return nodenum; > - } > + if (nodenum == dlm->node_num) > + continue; > + if (test_bit(nodenum, dlm->exit_domain_map)) > + continue; > + if (!test_bit(lock->ml.node, dlm->domain_map)) > + continue;If the lock's owning node isn't in the domain map, we're just ignoring it? I guess I'm not following what the last 'if (!test_bit(lock->ml.node, dlm->domain_map))' line is trying to do. --Mark PS: I'm rusty on fs/ocfs2/dlm stuff as you can tell :) -- Mark Fasheh
Wengang Wang
2011-May-10 06:19 UTC
[Ocfs2-devel] [PATCH 3/3] ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
Hi Sunil, Your this series of patches fix a problem in mmap(and could also other) test in ocfs2-test. The problem is that when all the three nodes do unmount at the same time, two of them keeps migrating a lockres to each other. So the umount on these two nodes hang there. I have idea why dlm_run_purge_list have no change to purge it. Your patches fix it :) thanks, wengang. On 11-04-26 16:03, Sunil Mushran wrote:> During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that > have no locks and references are freed. Ones that have locks and/or references > are migrated to another node. > > The first task in migration is finding a target. Currently we scan the lock > resource and find one node that either has a lock or a reference. This is not > very efficient in a parallel umount case as we might end up migrating the > lock resource to a node which itself may have to migrate it to a third node. > > The patch scans the dlm->exit_domain_map to ensure the target node is not > leaving the domain. If no valid target node is found, o2dlm does not migrate > the resource but instead waits for the unlock and deref messages that will > allow it to free the resource. > > Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> > --- > fs/ocfs2/dlm/dlmdomain.c | 10 ++- > fs/ocfs2/dlm/dlmmaster.c | 139 ++++++++++++++++----------------------------- > 2 files changed, 57 insertions(+), 92 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c > index 3aff23f..6ed6b95 100644 > --- a/fs/ocfs2/dlm/dlmdomain.c > +++ b/fs/ocfs2/dlm/dlmdomain.c > @@ -451,14 +451,18 @@ redo_bucket: > dropped = dlm_empty_lockres(dlm, res); > > spin_lock(&res->spinlock); > - __dlm_lockres_calc_usage(dlm, res); > - iter = res->hash_node.next; > + if (dropped) > + __dlm_lockres_calc_usage(dlm, res); > + else > + iter = res->hash_node.next; > spin_unlock(&res->spinlock); > > dlm_lockres_put(res); > > - if (dropped) > + if (dropped) { > + cond_resched_lock(&dlm->spinlock); > goto redo_bucket; > + } > } > cond_resched_lock(&dlm->spinlock); > num += n; > diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c > index 3e59ff9..4499d86 100644 > --- a/fs/ocfs2/dlm/dlmmaster.c > +++ b/fs/ocfs2/dlm/dlmmaster.c > @@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, > > > static int dlm_migrate_lockres(struct dlm_ctxt *dlm, > - struct dlm_lock_resource *res, > - u8 target) > + struct dlm_lock_resource *res, u8 target) > { > struct dlm_master_list_entry *mle = NULL; > struct dlm_master_list_entry *oldmle = NULL; > @@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, > if (!dlm_grab(dlm)) > return -EINVAL; > > + BUG_ON(target == O2NM_MAX_NODES); > + > name = res->lockname.name; > namelen = res->lockname.len; > > - mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); > - > - /* Ensure this lockres is a proper candidate for migration */ > - spin_lock(&res->spinlock); > - ret = dlm_is_lockres_migrateable(dlm, res); > - spin_unlock(&res->spinlock); > - > - /* No work to do */ > - if (!ret) > - goto leave; > - > - /* > - * preallocate up front > - * if this fails, abort > - */ > + mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, > + target); > > + /* preallocate up front. if this fails, abort */ > ret = -ENOMEM; > mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); > if (!mres) { > @@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, > ret = 0; > > /* > - * find a node to migrate the lockres to > - */ > - > - spin_lock(&dlm->spinlock); > - /* pick a new node */ > - if (!test_bit(target, dlm->domain_map) || > - target >= O2NM_MAX_NODES) { > - target = dlm_pick_migration_target(dlm, res); > - } > - mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, > - namelen, name, target); > - > - if (target >= O2NM_MAX_NODES || > - !test_bit(target, dlm->domain_map)) { > - /* target chosen is not alive */ > - ret = -EINVAL; > - } > - > - if (ret) { > - spin_unlock(&dlm->spinlock); > - goto fail; > - } > - > - mlog(0, "continuing with target = %u\n", target); > - > - /* > * clear any existing master requests and > * add the migration mle to the list > */ > + spin_lock(&dlm->spinlock); > spin_lock(&dlm->master_lock); > ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, > namelen, target, dlm->node_num); > @@ -2514,6 +2478,7 @@ fail: > dlm_put_mle(mle); > } else if (mle) { > kmem_cache_free(dlm_mle_cache, mle); > + mle = NULL; > } > goto leave; > } > @@ -2632,7 +2597,6 @@ leave: > if (wake) > wake_up(&res->wq); > > - /* TODO: cleanup */ > if (mres) > free_page((unsigned long)mres); > > @@ -2657,28 +2621,28 @@ leave: > */ > int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) > { > - int mig, ret; > + int ret; > int lock_dropped = 0; > + u8 target = O2NM_MAX_NODES; > > assert_spin_locked(&dlm->spinlock); > > spin_lock(&res->spinlock); > - mig = dlm_is_lockres_migrateable(dlm, res); > + if (dlm_is_lockres_migrateable(dlm, res)) > + target = dlm_pick_migration_target(dlm, res); > spin_unlock(&res->spinlock); > - if (!mig) > + > + if (target == O2NM_MAX_NODES) > goto leave; > > /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ > spin_unlock(&dlm->spinlock); > lock_dropped = 1; > - while (1) { > - ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); > - if (ret >= 0) > - break; > - mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name, > - res->lockname.len, res->lockname.name); > - msleep(DLM_MIGRATION_RETRY_MS); > - } > + ret = dlm_migrate_lockres(dlm, res, target); > + if (ret) > + mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", > + dlm->name, res->lockname.len, res->lockname.name, > + target, ret); > spin_lock(&dlm->spinlock); > leave: > return lock_dropped; > @@ -2862,61 +2826,58 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, > } > } > > -/* for now this is not too intelligent. we will > - * need stats to make this do the right thing. > - * this just finds the first lock on one of the > - * queues and uses that node as the target. */ > +/* > + * Pick a node to migrate the lock resource to. This function selects a > + * potential target based first on the locks and then on refmap. It skips > + * nodes that are in the process of exiting the domain. > + */ > static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, > struct dlm_lock_resource *res) > { > - int i; > + enum dlm_lockres_list idx; > struct list_head *queue = &res->granted; > struct dlm_lock *lock; > int nodenum; > > assert_spin_locked(&dlm->spinlock); > + assert_spin_locked(&res->spinlock); > > - spin_lock(&res->spinlock); > - for (i=0; i<3; i++) { > + /* Go through all the locks */ > + for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { > + queue = dlm_list_idx_to_ptr(res, idx); > list_for_each_entry(lock, queue, list) { > - /* up to the caller to make sure this node > - * is alive */ > - if (lock->ml.node != dlm->node_num) { > - spin_unlock(&res->spinlock); > - return lock->ml.node; > - } > + if (lock->ml.node == dlm->node_num) > + continue; > + if (test_bit(lock->ml.node, dlm->exit_domain_map)) > + continue; > + if (!test_bit(lock->ml.node, dlm->domain_map)) > + continue; > + nodenum = lock->ml.node; > + goto bail; > } > - queue++; > - } > - > - nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); > - if (nodenum < O2NM_MAX_NODES) { > - spin_unlock(&res->spinlock); > - return nodenum; > } > - spin_unlock(&res->spinlock); > - mlog(0, "have not found a suitable target yet! checking domain map\n"); > > - /* ok now we're getting desperate. pick anyone alive. */ > + /* Go thru the refmap */ > nodenum = -1; > while (1) { > - nodenum = find_next_bit(dlm->domain_map, > - O2NM_MAX_NODES, nodenum+1); > - mlog(0, "found %d in domain map\n", nodenum); > + nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, > + nodenum + 1); > if (nodenum >= O2NM_MAX_NODES) > break; > - if (nodenum != dlm->node_num) { > - mlog(0, "picking %d\n", nodenum); > - return nodenum; > - } > + if (nodenum == dlm->node_num) > + continue; > + if (test_bit(nodenum, dlm->exit_domain_map)) > + continue; > + if (!test_bit(lock->ml.node, dlm->domain_map)) > + continue; > + goto bail; > } > > - mlog(0, "giving up. no master to migrate to\n"); > - return DLM_LOCK_RES_OWNER_UNKNOWN; > + nodenum = O2NM_MAX_NODES; > +bail: > + return nodenum; > } > > - > - > /* this is called by the new master once all lockres > * data has been received */ > static int dlm_do_migrate_request(struct dlm_ctxt *dlm, > -- > 1.7.1 > > > _______________________________________________ > Ocfs2-devel mailing list > Ocfs2-devel at oss.oracle.com > http://oss.oracle.com/mailman/listinfo/ocfs2-devel