The changes from the last drop are: 1. Patch 11 removes struct dlm_lock_name. 2. Patch 12 is an unrelated bugfix. Actually is related to a bugfix that we are retracting in mainline currently. The patch may need more testing. While I did hit the condition in my testing, Marcos hasn't. I am sending it because it can be queued for 2.6.30. Give us more time to test. 3. Patch 13 will be useful when we later attempt to size the mle hash appropriately. Sunil
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 01/13] ocfs2/dlm: Encapsulate adding and removing of mle from dlm->master_list
This patch encapsulates adding and removing of the mle from the dlm->master_list. This patch is part of the series of patches that converts the mle list to a mle hash. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 3 +++ fs/ocfs2/dlm/dlmmaster.c | 34 +++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index bb53714..261e265 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1008,6 +1008,9 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) DLM_LOCK_RES_MIGRATING)); } +void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); +void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle); + /* create/destroy slab caches */ int dlm_init_master_caches(void); void dlm_destroy_master_caches(void); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 54e182a..3b77703 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -318,6 +318,21 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, __dlm_mle_attach_hb_events(dlm, mle); } +void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) +{ + assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&dlm->master_lock); + + if (!list_empty(&mle->list)) + list_del_init(&mle->list); +} + +void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) +{ + assert_spin_locked(&dlm->master_lock); + + list_add(&mle->list, &dlm->master_list); +} /* returns 1 if found, 0 if not */ static int dlm_find_mle(struct dlm_ctxt *dlm, @@ -420,8 +435,7 @@ static void dlm_mle_release(struct kref *kref) assert_spin_locked(&dlm->master_lock); /* remove from list if not already */ - if (!list_empty(&mle->list)) - list_del_init(&mle->list); + __dlm_unlink_mle(dlm, mle); /* detach the mle from the domain node up/down events */ __dlm_mle_detach_hb_events(dlm, mle); @@ -843,7 +857,7 @@ lookup: alloc_mle = NULL; dlm_init_mle(mle, DLM_MLE_MASTER, dlm, res, NULL, 0); set_bit(dlm->node_num, mle->maybe_map); - list_add(&mle->list, &dlm->master_list); + __dlm_insert_mle(dlm, mle); /* still holding the dlm spinlock, check the recovery map * to see if there are any nodes that still need to be @@ -1575,7 +1589,7 @@ way_up_top: // "add the block.\n"); dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen); set_bit(request->node_idx, mle->maybe_map); - list_add(&mle->list, &dlm->master_list); + __dlm_insert_mle(dlm, mle); response = DLM_MASTER_RESP_NO; } else { // mlog(0, "mle was found\n"); @@ -1967,7 +1981,7 @@ ok: assert->node_idx, rr, extra_ref, mle->inuse); dlm_print_one_mle(mle); } - list_del_init(&mle->list); + __dlm_unlink_mle(dlm, mle); __dlm_mle_detach_hb_events(dlm, mle); __dlm_put_mle(mle); if (extra_ref) { @@ -3159,10 +3173,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, tmp->master = master; atomic_set(&tmp->woken, 1); wake_up(&tmp->wq); - /* remove it from the list so that only one - * mle will be found */ - list_del_init(&tmp->list); - /* this was obviously WRONG. mle is uninited here. should be tmp. */ + /* remove it so that only one mle will be found */ + __dlm_unlink_mle(dlm, tmp); __dlm_mle_detach_hb_events(dlm, tmp); ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; mlog(0, "%s:%.*s: master=%u, newmaster=%u, " @@ -3181,7 +3193,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, mle->master = master; /* do this for consistency with other mle types */ set_bit(new_master, mle->maybe_map); - list_add(&mle->list, &dlm->master_list); + __dlm_insert_mle(dlm, mle); return ret; } @@ -3264,7 +3276,7 @@ top: * list_head while in list_for_each_safe */ __dlm_mle_detach_hb_events(dlm, mle); spin_lock(&mle->spinlock); - list_del_init(&mle->list); + __dlm_unlink_mle(dlm, mle); atomic_set(&mle->woken, 1); spin_unlock(&mle->spinlock); wake_up(&mle->wq); -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 02/13] ocfs2/dlm: Clean up struct dlm_lock_name
For master mle, the name it stored in the attached lockres in struct qstr. For block and migration mle, the name is stored inline in struct dlm_lock_name. This patch attempts to make struct dlm_lock_name look like a struct qstr. While we could use struct qstr, we don't because we want to avoid having to malloc and free the lockname string as the mle's lifetime is fairly short. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 8 ++-- fs/ocfs2/dlm/dlmdebug.c | 10 +++--- fs/ocfs2/dlm/dlmmaster.c | 79 +++++++++++++++++++++++++-------------------- 3 files changed, 53 insertions(+), 44 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 261e265..b232aa0 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -56,8 +56,8 @@ enum dlm_mle_type { }; struct dlm_lock_name { - u8 len; - u8 name[DLM_LOCKID_NAME_MAX]; + unsigned int len; + unsigned char name[DLM_LOCKID_NAME_MAX]; }; struct dlm_master_list_entry { @@ -79,8 +79,8 @@ struct dlm_master_list_entry { struct o2hb_callback_func mle_hb_up; struct o2hb_callback_func mle_hb_down; union { - struct dlm_lock_resource *res; - struct dlm_lock_name name; + struct dlm_lock_resource *mleres; + struct dlm_lock_name mlename; } u; }; diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index b32f60a..c82feb7 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -288,15 +288,15 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) { int out = 0; unsigned int namelen; - const char *name; + unsigned char *name; char *mle_type; if (mle->type != DLM_MLE_MASTER) { - namelen = mle->u.name.len; - name = mle->u.name.name; + name = mle->u.mlename.name; + namelen = mle->u.mlename.len; } else { - namelen = mle->u.res->lockname.len; - name = mle->u.res->lockname.name; + name = (unsigned char *)mle->u.mleres->lockname.name; + namelen = mle->u.mleres->lockname.len; } if (mle->type == DLM_MLE_BLOCK) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3b77703..e566d5e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -68,27 +68,38 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, void *nodemap, u32 flags); static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data); +static inline void __dlm_mle_name(struct dlm_master_list_entry *mle, + unsigned char **name, unsigned int *namelen) +{ + BUG_ON(mle->type != DLM_MLE_BLOCK && + mle->type != DLM_MLE_MASTER && + mle->type != DLM_MLE_MIGRATION); + + if (mle->type != DLM_MLE_MASTER) { + *name = mle->u.mlename.name; + *namelen = mle->u.mlename.len; + } else { + *name = (unsigned char *)mle->u.mleres->lockname.name; + *namelen = mle->u.mleres->lockname.len; + } +} + static inline int dlm_mle_equal(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, const char *name, unsigned int namelen) { - struct dlm_lock_resource *res; + unsigned char *mlename; + unsigned int mlelen; if (dlm != mle->dlm) return 0; - if (mle->type == DLM_MLE_BLOCK || - mle->type == DLM_MLE_MIGRATION) { - if (namelen != mle->u.name.len || - memcmp(name, mle->u.name.name, namelen)!=0) - return 0; - } else { - res = mle->u.res; - if (namelen != res->lockname.len || - memcmp(res->lockname.name, name, namelen) != 0) - return 0; - } + __dlm_mle_name(mle, &mlename, &mlelen); + + if (namelen != mlelen || memcmp(name, mlename, namelen) != 0) + return 0; + return 1; } @@ -295,17 +306,17 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, mle->new_master = O2NM_MAX_NODES; mle->inuse = 0; + BUG_ON(mle->type != DLM_MLE_BLOCK && + mle->type != DLM_MLE_MASTER && + mle->type != DLM_MLE_MIGRATION); + if (mle->type == DLM_MLE_MASTER) { BUG_ON(!res); - mle->u.res = res; - } else if (mle->type == DLM_MLE_BLOCK) { - BUG_ON(!name); - memcpy(mle->u.name.name, name, namelen); - mle->u.name.len = namelen; - } else /* DLM_MLE_MIGRATION */ { + mle->u.mleres = res; + } else { BUG_ON(!name); - memcpy(mle->u.name.name, name, namelen); - mle->u.name.len = namelen; + memcpy(mle->u.mlename.name, name, namelen); + mle->u.mlename.len = namelen; } /* copy off the node_map and register hb callbacks on our copy */ @@ -425,11 +436,11 @@ static void dlm_mle_release(struct kref *kref) if (mle->type != DLM_MLE_MASTER) { mlog(0, "calling mle_release for %.*s, type %d\n", - mle->u.name.len, mle->u.name.name, mle->type); + mle->u.mlename.len, mle->u.mlename.name, mle->type); } else { mlog(0, "calling mle_release for %.*s, type %d\n", - mle->u.res->lockname.len, - mle->u.res->lockname.name, mle->type); + mle->u.mleres->lockname.len, + mle->u.mleres->lockname.name, mle->type); } assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); @@ -1284,7 +1295,7 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, res->lockname.len, res->lockname.name); mle->type = DLM_MLE_MASTER; - mle->u.res = res; + mle->u.mleres = res; } } } @@ -1323,20 +1334,18 @@ static int dlm_do_master_request(struct dlm_lock_resource *res, struct dlm_ctxt *dlm = mle->dlm; struct dlm_master_request request; int ret, response=0, resend; + unsigned char *mlename; + unsigned int mlenamelen; memset(&request, 0, sizeof(request)); request.node_idx = dlm->node_num; BUG_ON(mle->type == DLM_MLE_MIGRATION); - if (mle->type != DLM_MLE_MASTER) { - request.namelen = mle->u.name.len; - memcpy(request.name, mle->u.name.name, request.namelen); - } else { - request.namelen = mle->u.res->lockname.len; - memcpy(request.name, mle->u.res->lockname.name, - request.namelen); - } + __dlm_mle_name(mle, &mlename, &mlenamelen); + + request.namelen = (u8)mlenamelen; + memcpy(request.name, mlename, request.namelen); again: ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request, @@ -3286,9 +3295,9 @@ top: mle->master, mle->new_master); /* if there is a lockres associated with this * mle, find it and set its owner to UNKNOWN */ - hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len); - res = __dlm_lookup_lockres(dlm, mle->u.name.name, - mle->u.name.len, hash); + hash = dlm_lockid_hash(mle->u.mlename.name, mle->u.mlename.len); + res = __dlm_lookup_lockres(dlm, mle->u.mlename.name, + mle->u.mlename.len, hash); if (res) { /* unfortunately if we hit this rare case, our * lock ordering is messed. we need to drop -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 03/13] ocfs2/dlm: Refactor dlm_clean_master_list()
This patch refactors dlm_clean_master_list() so as to make it easier to convert the mle list to a hash. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmmaster.c | 148 ++++++++++++++++++++++++++------------------- 1 files changed, 85 insertions(+), 63 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index e566d5e..a29f132 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3207,12 +3207,87 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, return ret; } +/* + * Sets the owner of the lockres, associated to the mle, to UNKNOWN + */ +static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm, + struct dlm_master_list_entry *mle) +{ + struct dlm_lock_resource *res; + unsigned int hash; + + /* Find the lockres associated to the mle and set its owner to UNK */ + hash = dlm_lockid_hash(mle->u.mlename.name, mle->u.mlename.len); + res = __dlm_lookup_lockres(dlm, mle->u.mlename.name, mle->u.mlename.len, + hash); + if (res) { + spin_unlock(&dlm->master_lock); + + /* move lockres onto recovery list */ + spin_lock(&res->spinlock); + dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN); + dlm_move_lockres_to_recovery_list(dlm, res); + spin_unlock(&res->spinlock); + dlm_lockres_put(res); + + /* about to get rid of mle, detach from heartbeat */ + __dlm_mle_detach_hb_events(dlm, mle); + + /* dump the mle */ + spin_lock(&dlm->master_lock); + __dlm_put_mle(mle); + spin_unlock(&dlm->master_lock); + } + + return res; +} + +static void dlm_clean_migration_mle(struct dlm_ctxt *dlm, + struct dlm_master_list_entry *mle) +{ + __dlm_mle_detach_hb_events(dlm, mle); + + spin_lock(&mle->spinlock); + __dlm_unlink_mle(dlm, mle); + atomic_set(&mle->woken, 1); + spin_unlock(&mle->spinlock); + + wake_up(&mle->wq); +} + +static void dlm_clean_block_mle(struct dlm_ctxt *dlm, + struct dlm_master_list_entry *mle, u8 dead_node) +{ + int bit; + + BUG_ON(mle->type != DLM_MLE_BLOCK); + + spin_lock(&mle->spinlock); + bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); + if (bit != dead_node) { + mlog(0, "mle found, but dead node %u would not have been " + "master\n", dead_node); + spin_unlock(&mle->spinlock); + } else { + /* Must drop the refcount by one since the assert_master will + * never arrive. This may result in the mle being unlinked and + * freed, but there may still be a process waiting in the + * dlmlock path which is fine. */ + mlog(0, "node %u was expected master\n", dead_node); + atomic_set(&mle->woken, 1); + spin_unlock(&mle->spinlock); + wake_up(&mle->wq); + + /* Do not need events any longer, so detach from heartbeat */ + __dlm_mle_detach_hb_events(dlm, mle); + __dlm_put_mle(mle); + } +} void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) { struct dlm_master_list_entry *mle, *next; struct dlm_lock_resource *res; - unsigned int hash; mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); top: @@ -3236,30 +3311,7 @@ top: * need to clean up if the dead node would have * been the master. */ if (mle->type == DLM_MLE_BLOCK) { - int bit; - - spin_lock(&mle->spinlock); - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); - if (bit != dead_node) { - mlog(0, "mle found, but dead node %u would " - "not have been master\n", dead_node); - spin_unlock(&mle->spinlock); - } else { - /* must drop the refcount by one since the - * assert_master will never arrive. this - * may result in the mle being unlinked and - * freed, but there may still be a process - * waiting in the dlmlock path which is fine. */ - mlog(0, "node %u was expected master\n", - dead_node); - atomic_set(&mle->woken, 1); - spin_unlock(&mle->spinlock); - wake_up(&mle->wq); - /* do not need events any longer, so detach - * from heartbeat */ - __dlm_mle_detach_hb_events(dlm, mle); - __dlm_put_mle(mle); - } + dlm_clean_block_mle(dlm, mle, dead_node); continue; } @@ -3280,51 +3332,21 @@ top: /* if we have reached this point, this mle needs to * be removed from the list and freed. */ - - /* remove from the list early. NOTE: unlinking - * list_head while in list_for_each_safe */ - __dlm_mle_detach_hb_events(dlm, mle); - spin_lock(&mle->spinlock); - __dlm_unlink_mle(dlm, mle); - atomic_set(&mle->woken, 1); - spin_unlock(&mle->spinlock); - wake_up(&mle->wq); + dlm_clean_migration_mle(dlm, mle); mlog(0, "%s: node %u died during migration from " "%u to %u!\n", dlm->name, dead_node, mle->master, mle->new_master); - /* if there is a lockres associated with this - * mle, find it and set its owner to UNKNOWN */ - hash = dlm_lockid_hash(mle->u.mlename.name, mle->u.mlename.len); - res = __dlm_lookup_lockres(dlm, mle->u.mlename.name, - mle->u.mlename.len, hash); - if (res) { - /* unfortunately if we hit this rare case, our - * lock ordering is messed. we need to drop - * the master lock so that we can take the - * lockres lock, meaning that we will have to - * restart from the head of list. */ - spin_unlock(&dlm->master_lock); - - /* move lockres onto recovery list */ - spin_lock(&res->spinlock); - dlm_set_lockres_owner(dlm, res, - DLM_LOCK_RES_OWNER_UNKNOWN); - dlm_move_lockres_to_recovery_list(dlm, res); - spin_unlock(&res->spinlock); - dlm_lockres_put(res); - - /* about to get rid of mle, detach from heartbeat */ - __dlm_mle_detach_hb_events(dlm, mle); - - /* dump the mle */ - spin_lock(&dlm->master_lock); - __dlm_put_mle(mle); - spin_unlock(&dlm->master_lock); + /* If we find a lockres associated with the mle, we've + * hit this rare case that messes up our lock ordering. + * If so, we need to drop the master lock so that we can + * take the lockres lock, meaning that we will have to + * restart from the head of list. */ + res = dlm_reset_mleres_owner(dlm, mle); + if (res) /* restart */ goto top; - } /* this may be the last reference */ __dlm_put_mle(mle); -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 04/13] ocfs2/dlm: Create and destroy the dlm->master_hash
This patch adds code to create and destroy the dlm->master_hash. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 8 ++++++++ fs/ocfs2/dlm/dlmdomain.c | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index b232aa0..425653f 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -151,6 +151,7 @@ struct dlm_ctxt unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; struct dlm_recovery_ctxt reco; spinlock_t master_lock; + struct hlist_head **master_hash; struct list_head master_list; struct list_head mle_hb_events; @@ -195,6 +196,13 @@ static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE); } +static inline struct hlist_head *dlm_master_hash(struct dlm_ctxt *dlm, + unsigned i) +{ + return dlm->master_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + + (i % DLM_BUCKETS_PER_PAGE); +} + /* these keventd work queue items are for less-frequently * called functions that cannot be directly called from the * net message handlers for some reason, usually because diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index d8d578f..4531504 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -304,6 +304,9 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) if (dlm->lockres_hash) dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); + if (dlm->master_hash) + dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); + if (dlm->name) kfree(dlm->name); @@ -1534,12 +1537,27 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, for (i = 0; i < DLM_HASH_BUCKETS; i++) INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); + dlm->master_hash = (struct hlist_head **) + dlm_alloc_pagevec(DLM_HASH_PAGES); + if (!dlm->master_hash) { + mlog_errno(-ENOMEM); + dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); + kfree(dlm->name); + kfree(dlm); + dlm = NULL; + goto leave; + } + + for (i = 0; i < DLM_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); + strcpy(dlm->name, domain); dlm->key = key; dlm->node_num = o2nm_this_node(); ret = dlm_create_debugfs_subroot(dlm); if (ret < 0) { + dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); kfree(dlm->name); kfree(dlm); -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 05/13] ocfs2/dlm: Activate dlm->master_hash for master list entries
With this patch, the mles are stored in a hash and not a simple list. This should improve the mle lookup time when the number of outstanding masteries is large. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 4 +- fs/ocfs2/dlm/dlmdebug.c | 24 +++++++++++------- fs/ocfs2/dlm/dlmdomain.c | 1 - fs/ocfs2/dlm/dlmmaster.c | 61 ++++++++++++++++++++++++++++++++------------- 4 files changed, 60 insertions(+), 30 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 425653f..aa55271 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -56,12 +56,13 @@ enum dlm_mle_type { }; struct dlm_lock_name { + unsigned int hash; unsigned int len; unsigned char name[DLM_LOCKID_NAME_MAX]; }; struct dlm_master_list_entry { - struct list_head list; + struct hlist_node master_hash_node; struct list_head hb_events; struct dlm_ctxt *dlm; spinlock_t spinlock; @@ -152,7 +153,6 @@ struct dlm_ctxt struct dlm_recovery_ctxt reco; spinlock_t master_lock; struct hlist_head **master_hash; - struct list_head master_list; struct list_head mle_hb_events; /* these give a really vague idea of the system load */ diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index c82feb7..336a98e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -501,18 +501,25 @@ static struct file_operations debug_purgelist_fops = { static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) { struct dlm_master_list_entry *mle; - int out = 0; + struct hlist_head *bucket; + struct hlist_node *list; + int i, out = 0; unsigned long total = 0; out += snprintf(db->buf + out, db->len - out, "Dumping MLEs for Domain: %s\n", dlm->name); spin_lock(&dlm->master_lock); - list_for_each_entry(mle, &dlm->master_list, list) { - ++total; - if (db->len - out < 200) - continue; - out += dump_mle(mle, db->buf + out, db->len - out); + for (i = 0; i < DLM_HASH_BUCKETS; i++) { + bucket = dlm_master_hash(dlm, i); + hlist_for_each(list, bucket) { + mle = hlist_entry(list, struct dlm_master_list_entry, + master_hash_node); + ++total; + if (db->len - out < 200) + continue; + out += dump_mle(mle, db->buf + out, db->len - out); + } } spin_unlock(&dlm->master_lock); @@ -813,12 +820,11 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ out += snprintf(db->buf + out, db->len - out, "Lists: Dirty=%s Purge=%s PendingASTs=%s " - "PendingBASTs=%s Master=%s\n", + "PendingBASTs=%s\n", (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), - (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"), - (list_empty(&dlm->master_list) ? "Empty" : "InUse")); + (list_empty(&dlm->pending_basts) ? "Empty" : "InUse")); /* Purge Count: xxx Refs: xxx */ out += snprintf(db->buf + out, db->len - out, diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 4531504..869648c 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1597,7 +1597,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, init_waitqueue_head(&dlm->reco.event); init_waitqueue_head(&dlm->ast_wq); init_waitqueue_head(&dlm->migration_wq); - INIT_LIST_HEAD(&dlm->master_list); INIT_LIST_HEAD(&dlm->mle_hb_events); dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a29f132..f1673f2 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -69,7 +69,8 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data); static inline void __dlm_mle_name(struct dlm_master_list_entry *mle, - unsigned char **name, unsigned int *namelen) + unsigned char **name, unsigned int *namelen, + unsigned int *namehash) { BUG_ON(mle->type != DLM_MLE_BLOCK && mle->type != DLM_MLE_MASTER && @@ -78,9 +79,13 @@ static inline void __dlm_mle_name(struct dlm_master_list_entry *mle, if (mle->type != DLM_MLE_MASTER) { *name = mle->u.mlename.name; *namelen = mle->u.mlename.len; + if (namehash) + *namehash = mle->u.mlename.hash; } else { *name = (unsigned char *)mle->u.mleres->lockname.name; *namelen = mle->u.mleres->lockname.len; + if (namehash) + *namehash = mle->u.mleres->lockname.hash; } } @@ -95,7 +100,7 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm, if (dlm != mle->dlm) return 0; - __dlm_mle_name(mle, &mlename, &mlelen); + __dlm_mle_name(mle, &mlename, &mlelen, NULL); if (namelen != mlelen || memcmp(name, mlename, namelen) != 0) return 0; @@ -294,7 +299,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, mle->dlm = dlm; mle->type = type; - INIT_LIST_HEAD(&mle->list); + INIT_HLIST_NODE(&mle->master_hash_node); INIT_LIST_HEAD(&mle->hb_events); memset(mle->maybe_map, 0, sizeof(mle->maybe_map)); spin_lock_init(&mle->spinlock); @@ -317,6 +322,7 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, BUG_ON(!name); memcpy(mle->u.mlename.name, name, namelen); mle->u.mlename.len = namelen; + mle->u.mlename.hash = dlm_lockid_hash(name, namelen); } /* copy off the node_map and register hb callbacks on our copy */ @@ -334,15 +340,21 @@ void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); - if (!list_empty(&mle->list)) - list_del_init(&mle->list); + if (!hlist_unhashed(&mle->master_hash_node)) + hlist_del_init(&mle->master_hash_node); } void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) { + struct hlist_head *bucket; + unsigned char *mname; + unsigned int mlen, hash; + assert_spin_locked(&dlm->master_lock); - list_add(&mle->list, &dlm->master_list); + __dlm_mle_name(mle, &mname, &mlen, &hash); + bucket = dlm_master_hash(dlm, hash); + hlist_add_head(&mle->master_hash_node, bucket); } /* returns 1 if found, 0 if not */ @@ -351,10 +363,17 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, char *name, unsigned int namelen) { struct dlm_master_list_entry *tmpmle; + struct hlist_head *bucket; + struct hlist_node *list; + unsigned int hash; assert_spin_locked(&dlm->master_lock); - list_for_each_entry(tmpmle, &dlm->master_list, list) { + hash = dlm_lockid_hash(name, namelen); + bucket = dlm_master_hash(dlm, hash); + hlist_for_each(list, bucket) { + tmpmle = hlist_entry(list, struct dlm_master_list_entry, + master_hash_node); if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) continue; dlm_get_mle(tmpmle); @@ -428,23 +447,20 @@ static void dlm_mle_release(struct kref *kref) { struct dlm_master_list_entry *mle; struct dlm_ctxt *dlm; + unsigned char *mname; + unsigned int mlen; mlog_entry_void(); mle = container_of(kref, struct dlm_master_list_entry, mle_refs); dlm = mle->dlm; - if (mle->type != DLM_MLE_MASTER) { - mlog(0, "calling mle_release for %.*s, type %d\n", - mle->u.mlename.len, mle->u.mlename.name, mle->type); - } else { - mlog(0, "calling mle_release for %.*s, type %d\n", - mle->u.mleres->lockname.len, - mle->u.mleres->lockname.name, mle->type); - } assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); + __dlm_mle_name(mle, &mname, &mlen, NULL); + mlog(0, "Releasing mle for %.*s, type %d\n", mlen, mname, mle->type); + /* remove from list if not already */ __dlm_unlink_mle(dlm, mle); @@ -1342,7 +1358,7 @@ static int dlm_do_master_request(struct dlm_lock_resource *res, BUG_ON(mle->type == DLM_MLE_MIGRATION); - __dlm_mle_name(mle, &mlename, &mlenamelen); + __dlm_mle_name(mle, &mlename, &mlenamelen, NULL); request.namelen = (u8)mlenamelen; memcpy(request.name, mlename, request.namelen); @@ -3286,8 +3302,11 @@ static void dlm_clean_block_mle(struct dlm_ctxt *dlm, void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) { - struct dlm_master_list_entry *mle, *next; + struct dlm_master_list_entry *mle; struct dlm_lock_resource *res; + struct hlist_head *bucket; + struct hlist_node *list; + unsigned int i; mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); top: @@ -3295,7 +3314,12 @@ top: /* clean the master list */ spin_lock(&dlm->master_lock); - list_for_each_entry_safe(mle, next, &dlm->master_list, list) { + for (i = 0; i < DLM_HASH_BUCKETS; i++) { + bucket = dlm_master_hash(dlm, i); + hlist_for_each(list, bucket) { + mle = hlist_entry(list, struct dlm_master_list_entry, + master_hash_node); + BUG_ON(mle->type != DLM_MLE_BLOCK && mle->type != DLM_MLE_MASTER && mle->type != DLM_MLE_MIGRATION); @@ -3351,6 +3375,7 @@ top: /* this may be the last reference */ __dlm_put_mle(mle); } + } spin_unlock(&dlm->master_lock); } -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 06/13] ocfs2/dlm: Indent dlm_cleanup_master_list()
The previous patch explicitly did not indent dlm_cleanup_master_list() so as to make the patch readable. This patch properly indents the function. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmmaster.c | 106 ++++++++++++++++++++++----------------------- 1 files changed, 52 insertions(+), 54 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index f1673f2..5ee4448 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3320,66 +3320,64 @@ top: mle = hlist_entry(list, struct dlm_master_list_entry, master_hash_node); - BUG_ON(mle->type != DLM_MLE_BLOCK && - mle->type != DLM_MLE_MASTER && - mle->type != DLM_MLE_MIGRATION); - - /* MASTER mles are initiated locally. the waiting - * process will notice the node map change - * shortly. let that happen as normal. */ - if (mle->type == DLM_MLE_MASTER) - continue; - + BUG_ON(mle->type != DLM_MLE_BLOCK && + mle->type != DLM_MLE_MASTER && + mle->type != DLM_MLE_MIGRATION); + + /* MASTER mles are initiated locally. The waiting + * process will notice the node map change shortly. + * Let that happen as normal. */ + if (mle->type == DLM_MLE_MASTER) + continue; + + /* BLOCK mles are initiated by other nodes. Need to + * clean up if the dead node would have been the + * master. */ + if (mle->type == DLM_MLE_BLOCK) { + dlm_clean_block_mle(dlm, mle, dead_node); + continue; + } - /* BLOCK mles are initiated by other nodes. - * need to clean up if the dead node would have - * been the master. */ - if (mle->type == DLM_MLE_BLOCK) { - dlm_clean_block_mle(dlm, mle, dead_node); - continue; + /* Everything else is a MIGRATION mle */ + + /* The rule for MIGRATION mles is that the master + * becomes UNKNOWN if *either* the original or the new + * master dies. All UNKNOWN lockres' are sent to + * whichever node becomes the recovery master. The new + * master is responsible for determining if there is + * still a master for this lockres, or if he needs to + * take over mastery. Either way, this node should + * expect another message to resolve this. */ + + if (mle->master != dead_node && + mle->new_master != dead_node) + continue; + + /* If we have reached this point, this mle needs to be + * removed from the list and freed. */ + dlm_clean_migration_mle(dlm, mle); + + mlog(0, "%s: node %u died during migration from " + "%u to %u!\n", dlm->name, dead_node, mle->master, + mle->new_master); + + /* If we find a lockres associated with the mle, we've + * hit this rare case that messes up our lock ordering. + * If so, we need to drop the master lock so that we can + * take the lockres lock, meaning that we will have to + * restart from the head of list. */ + res = dlm_reset_mleres_owner(dlm, mle); + if (res) + /* restart */ + goto top; + + /* This may be the last reference */ + __dlm_put_mle(mle); } - - /* everything else is a MIGRATION mle */ - - /* the rule for MIGRATION mles is that the master - * becomes UNKNOWN if *either* the original or - * the new master dies. all UNKNOWN lockreses - * are sent to whichever node becomes the recovery - * master. the new master is responsible for - * determining if there is still a master for - * this lockres, or if he needs to take over - * mastery. either way, this node should expect - * another message to resolve this. */ - if (mle->master != dead_node && - mle->new_master != dead_node) - continue; - - /* if we have reached this point, this mle needs to - * be removed from the list and freed. */ - dlm_clean_migration_mle(dlm, mle); - - mlog(0, "%s: node %u died during migration from " - "%u to %u!\n", dlm->name, dead_node, - mle->master, mle->new_master); - - /* If we find a lockres associated with the mle, we've - * hit this rare case that messes up our lock ordering. - * If so, we need to drop the master lock so that we can - * take the lockres lock, meaning that we will have to - * restart from the head of list. */ - res = dlm_reset_mleres_owner(dlm, mle); - if (res) - /* restart */ - goto top; - - /* this may be the last reference */ - __dlm_put_mle(mle); - } } spin_unlock(&dlm->master_lock); } - int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 old_master) { -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 07/13] ocfs2/dlm: Track number of mles
The lifetime of a mle is limited to the duration of the lockres mastery process. While typically this lifetime is fairly short, we have noticed the number of mles explode under certain circumstances. This patch tracks the number of each different types of mles and should help us determine how best to speed up the mastery process. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 5 ++++- fs/ocfs2/dlm/dlmdomain.c | 5 +++++ fs/ocfs2/dlm/dlmmaster.c | 5 +++++ 3 files changed, 14 insertions(+), 1 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index aa55271..67b3447 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -52,7 +52,8 @@ enum dlm_mle_type { DLM_MLE_BLOCK, DLM_MLE_MASTER, - DLM_MLE_MIGRATION + DLM_MLE_MIGRATION, + DLM_MLE_NUM_TYPES }; struct dlm_lock_name { @@ -156,6 +157,8 @@ struct dlm_ctxt struct list_head mle_hb_events; /* these give a really vague idea of the system load */ + atomic_t mle_tot_count[DLM_MLE_NUM_TYPES]; + atomic_t mle_cur_count[DLM_MLE_NUM_TYPES]; atomic_t local_resources; atomic_t remote_resources; atomic_t unknown_resources; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 869648c..0479bdf 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1608,6 +1608,11 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, atomic_set(&dlm->remote_resources, 0); atomic_set(&dlm->unknown_resources, 0); + for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) { + atomic_set(&dlm->mle_tot_count[i], 0); + atomic_set(&dlm->mle_cur_count[i], 0); + } + spin_lock_init(&dlm->work_lock); INIT_LIST_HEAD(&dlm->work_list); INIT_WORK(&dlm->dispatched_work, dlm_dispatch_work); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 5ee4448..aa1d75f 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -325,6 +325,9 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, mle->u.mlename.hash = dlm_lockid_hash(name, namelen); } + atomic_inc(&dlm->mle_tot_count[mle->type]); + atomic_inc(&dlm->mle_cur_count[mle->type]); + /* copy off the node_map and register hb callbacks on our copy */ memcpy(mle->node_map, dlm->domain_map, sizeof(mle->node_map)); memcpy(mle->vote_map, dlm->domain_map, sizeof(mle->vote_map)); @@ -467,6 +470,8 @@ static void dlm_mle_release(struct kref *kref) /* detach the mle from the domain node up/down events */ __dlm_mle_detach_hb_events(dlm, mle); + atomic_dec(&dlm->mle_cur_count[mle->type]); + /* NOTE: kfree under spinlock here. * if this is bad, we can move this to a freelist. */ kmem_cache_free(dlm_mle_cache, mle); -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 08/13] ocfs2/dlm: Improve lockres counts
This patch replaces the lockres counts that tracked the number number of locally and remotely mastered lockres' with a current and total count. The total count is the number of lockres' that have been created since the dlm domain was created. The number of locally and remotely mastered counts can be computed using the locking_state output. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 5 ++--- fs/ocfs2/dlm/dlmdebug.c | 12 ------------ fs/ocfs2/dlm/dlmdomain.c | 5 ++--- fs/ocfs2/dlm/dlmmaster.c | 27 +++++++-------------------- 4 files changed, 11 insertions(+), 38 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 67b3447..e5026ce 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -159,9 +159,8 @@ struct dlm_ctxt /* these give a really vague idea of the system load */ atomic_t mle_tot_count[DLM_MLE_NUM_TYPES]; atomic_t mle_cur_count[DLM_MLE_NUM_TYPES]; - atomic_t local_resources; - atomic_t remote_resources; - atomic_t unknown_resources; + atomic_t res_tot_count; + atomic_t res_cur_count; struct dlm_debug_ctxt *dlm_debug_ctxt; struct dentry *dlm_debugfs_subroot; diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 336a98e..d7decaa 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -763,12 +763,6 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) int out = 0; struct dlm_reco_node_data *node; char *state; - int lres, rres, ures, tres; - - lres = atomic_read(&dlm->local_resources); - rres = atomic_read(&dlm->remote_resources); - ures = atomic_read(&dlm->unknown_resources); - tres = lres + rres + ures; spin_lock(&dlm->spinlock); @@ -811,12 +805,6 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) db->buf + out, db->len - out); out += snprintf(db->buf + out, db->len - out, "\n"); - /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */ - out += snprintf(db->buf + out, db->len - out, - "Mastered Resources Total: %d Locally: %d " - "Remotely: %d Unknown: %d\n", - tres, lres, rres, ures); - /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ out += snprintf(db->buf + out, db->len - out, "Lists: Dirty=%s Purge=%s PendingASTs=%s " diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0479bdf..4d9e6b2 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1604,10 +1604,9 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, dlm->reco.new_master = O2NM_INVALID_NODE_NUM; dlm->reco.dead_node = O2NM_INVALID_NODE_NUM; - atomic_set(&dlm->local_resources, 0); - atomic_set(&dlm->remote_resources, 0); - atomic_set(&dlm->unknown_resources, 0); + atomic_set(&dlm->res_tot_count, 0); + atomic_set(&dlm->res_cur_count, 0); for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) { atomic_set(&dlm->mle_tot_count[i], 0); atomic_set(&dlm->mle_cur_count[i], 0); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index aa1d75f..13df364 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -517,15 +517,6 @@ static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, { assert_spin_locked(&res->spinlock); - mlog_entry("%.*s, %u\n", res->lockname.len, res->lockname.name, owner); - - if (owner == dlm->node_num) - atomic_inc(&dlm->local_resources); - else if (owner == DLM_LOCK_RES_OWNER_UNKNOWN) - atomic_inc(&dlm->unknown_resources); - else - atomic_inc(&dlm->remote_resources); - res->owner = owner; } @@ -534,17 +525,8 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm, { assert_spin_locked(&res->spinlock); - if (owner == res->owner) - return; - - if (res->owner == dlm->node_num) - atomic_dec(&dlm->local_resources); - else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) - atomic_dec(&dlm->unknown_resources); - else - atomic_dec(&dlm->remote_resources); - - dlm_set_lockres_owner(dlm, res, owner); + if (owner != res->owner) + dlm_set_lockres_owner(dlm, res, owner); } @@ -573,6 +555,8 @@ static void dlm_lockres_release(struct kref *kref) } spin_unlock(&dlm->track_lock); + atomic_dec(&dlm->res_cur_count); + dlm_put(dlm); if (!hlist_unhashed(&res->hash_node) || @@ -653,6 +637,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, kref_init(&res->refs); + atomic_inc(&dlm->res_tot_count); + atomic_inc(&dlm->res_cur_count); + /* just for consistency */ spin_lock(&res->spinlock); dlm_set_lockres_owner(dlm, res, DLM_LOCK_RES_OWNER_UNKNOWN); -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 09/13] ocfs2/dlm: dlm_set_lockres_owner() and dlm_change_lockres_owner() inlined
This patch inlines dlm_set_lockres_owner() and dlm_change_lockres_owner(). Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 21 ++++++++++++++++++--- fs/ocfs2/dlm/dlmmaster.c | 19 ------------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index e5026ce..266fde9 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -858,9 +858,7 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, unsigned int len); int dlm_is_host_down(int errno); -void dlm_change_lockres_owner(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 owner); + struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, const char *lockid, int namelen, @@ -1123,6 +1121,23 @@ static inline int dlm_node_iter_next(struct dlm_node_iter *iter) return bit; } +static inline void dlm_set_lockres_owner(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + u8 owner) +{ + assert_spin_locked(&res->spinlock); + + res->owner = owner; +} +static inline void dlm_change_lockres_owner(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + u8 owner) +{ + assert_spin_locked(&res->spinlock); + + if (owner != res->owner) + dlm_set_lockres_owner(dlm, res, owner); +} #endif /* DLMCOMMON_H */ diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 13df364..2093133 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -511,25 +511,6 @@ void dlm_destroy_master_caches(void) kmem_cache_destroy(dlm_lockres_cache); } -static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - u8 owner) -{ - assert_spin_locked(&res->spinlock); - - res->owner = owner; -} - -void dlm_change_lockres_owner(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 owner) -{ - assert_spin_locked(&res->spinlock); - - if (owner != res->owner) - dlm_set_lockres_owner(dlm, res, owner); -} - - static void dlm_lockres_release(struct kref *kref) { struct dlm_lock_resource *res; -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 10/13] ocfs2/dlm: Show the number of lockres/mles in dlm_state
This patch shows the number of lockres' and mles in the debugfs file, dlm_state. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmdebug.c | 36 ++++++++++++++++++++++++++++++++++++ 1 files changed, 36 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index d7decaa..bf9fa27 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -763,6 +763,8 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) int out = 0; struct dlm_reco_node_data *node; char *state; + int cur_mles = 0, tot_mles = 0; + int i; spin_lock(&dlm->spinlock); @@ -805,6 +807,40 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) db->buf + out, db->len - out); out += snprintf(db->buf + out, db->len - out, "\n"); + /* Lock Resources: xxx (xxx) */ + out += snprintf(db->buf + out, db->len - out, + "Lock Resources: %d (%d)\n", + atomic_read(&dlm->res_cur_count), + atomic_read(&dlm->res_tot_count)); + + for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) + tot_mles += atomic_read(&dlm->mle_tot_count[i]); + + for (i = 0; i < DLM_MLE_NUM_TYPES; ++i) + cur_mles += atomic_read(&dlm->mle_cur_count[i]); + + /* MLEs: xxx (xxx) */ + out += snprintf(db->buf + out, db->len - out, + "MLEs: %d (%d)\n", cur_mles, tot_mles); + + /* Blocking: xxx (xxx) */ + out += snprintf(db->buf + out, db->len - out, + " Blocking: %d (%d)\n", + atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]), + atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK])); + + /* Mastery: xxx (xxx) */ + out += snprintf(db->buf + out, db->len - out, + " Mastery: %d (%d)\n", + atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]), + atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER])); + + /* Migration: xxx (xxx) */ + out += snprintf(db->buf + out, db->len - out, + " Migration: %d (%d)\n", + atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]), + atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION])); + /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ out += snprintf(db->buf + out, db->len - out, "Lists: Dirty=%s Purge=%s PendingASTs=%s " -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 11/13] ocfs2/dlm: Remove struct dlm_lock_name in struct dlm_master_list_entry
This patch removes struct dlm_lock_name and adds the entries directly to struct dlm_master_list_entry. Under the new scheme, both mles that are backed by a lockres or not, will have the name populated in mle->mname. This allows us to get rid of code that was figuring out the location of the mle name. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 14 +++------- fs/ocfs2/dlm/dlmdebug.c | 12 +------- fs/ocfs2/dlm/dlmmaster.c | 68 ++++++++++++--------------------------------- 3 files changed, 23 insertions(+), 71 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 266fde9..0102be3 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -56,12 +56,6 @@ enum dlm_mle_type { DLM_MLE_NUM_TYPES }; -struct dlm_lock_name { - unsigned int hash; - unsigned int len; - unsigned char name[DLM_LOCKID_NAME_MAX]; -}; - struct dlm_master_list_entry { struct hlist_node master_hash_node; struct list_head hb_events; @@ -80,10 +74,10 @@ struct dlm_master_list_entry { enum dlm_mle_type type; struct o2hb_callback_func mle_hb_up; struct o2hb_callback_func mle_hb_down; - union { - struct dlm_lock_resource *mleres; - struct dlm_lock_name mlename; - } u; + struct dlm_lock_resource *mleres; + unsigned char mname[DLM_LOCKID_NAME_MAX]; + unsigned int mnamelen; + unsigned int mnamehash; }; enum dlm_ast_type { diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index bf9fa27..bdf1c78 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -287,18 +287,8 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes, static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) { int out = 0; - unsigned int namelen; - unsigned char *name; char *mle_type; - if (mle->type != DLM_MLE_MASTER) { - name = mle->u.mlename.name; - namelen = mle->u.mlename.len; - } else { - name = (unsigned char *)mle->u.mleres->lockname.name; - namelen = mle->u.mleres->lockname.len; - } - if (mle->type == DLM_MLE_BLOCK) mle_type = "BLK"; else if (mle->type == DLM_MLE_MASTER) @@ -306,7 +296,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) else mle_type = "MIG"; - out += stringify_lockname(name, namelen, buf + out, len - out); + out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out); out += snprintf(buf + out, len - out, "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", mle_type, mle->master, mle->new_master, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 2093133..ba6d473 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -68,41 +68,16 @@ static int dlm_do_assert_master(struct dlm_ctxt *dlm, void *nodemap, u32 flags); static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data); -static inline void __dlm_mle_name(struct dlm_master_list_entry *mle, - unsigned char **name, unsigned int *namelen, - unsigned int *namehash) -{ - BUG_ON(mle->type != DLM_MLE_BLOCK && - mle->type != DLM_MLE_MASTER && - mle->type != DLM_MLE_MIGRATION); - - if (mle->type != DLM_MLE_MASTER) { - *name = mle->u.mlename.name; - *namelen = mle->u.mlename.len; - if (namehash) - *namehash = mle->u.mlename.hash; - } else { - *name = (unsigned char *)mle->u.mleres->lockname.name; - *namelen = mle->u.mleres->lockname.len; - if (namehash) - *namehash = mle->u.mleres->lockname.hash; - } -} - static inline int dlm_mle_equal(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, const char *name, unsigned int namelen) { - unsigned char *mlename; - unsigned int mlelen; - if (dlm != mle->dlm) return 0; - __dlm_mle_name(mle, &mlename, &mlelen, NULL); - - if (namelen != mlelen || memcmp(name, mlename, namelen) != 0) + if (namelen != mle->mnamelen || + memcmp(name, mle->mname, namelen) != 0) return 0; return 1; @@ -317,12 +292,16 @@ static void dlm_init_mle(struct dlm_master_list_entry *mle, if (mle->type == DLM_MLE_MASTER) { BUG_ON(!res); - mle->u.mleres = res; + mle->mleres = res; + memcpy(mle->mname, res->lockname.name, res->lockname.len); + mle->mnamelen = res->lockname.len; + mle->mnamehash = res->lockname.hash; } else { BUG_ON(!name); - memcpy(mle->u.mlename.name, name, namelen); - mle->u.mlename.len = namelen; - mle->u.mlename.hash = dlm_lockid_hash(name, namelen); + mle->mleres = NULL; + memcpy(mle->mname, name, namelen); + mle->mnamelen = namelen; + mle->mnamehash = dlm_lockid_hash(name, namelen); } atomic_inc(&dlm->mle_tot_count[mle->type]); @@ -350,13 +329,10 @@ void __dlm_unlink_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) void __dlm_insert_mle(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) { struct hlist_head *bucket; - unsigned char *mname; - unsigned int mlen, hash; assert_spin_locked(&dlm->master_lock); - __dlm_mle_name(mle, &mname, &mlen, &hash); - bucket = dlm_master_hash(dlm, hash); + bucket = dlm_master_hash(dlm, mle->mnamehash); hlist_add_head(&mle->master_hash_node, bucket); } @@ -450,8 +426,6 @@ static void dlm_mle_release(struct kref *kref) { struct dlm_master_list_entry *mle; struct dlm_ctxt *dlm; - unsigned char *mname; - unsigned int mlen; mlog_entry_void(); @@ -461,8 +435,8 @@ static void dlm_mle_release(struct kref *kref) assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); - __dlm_mle_name(mle, &mname, &mlen, NULL); - mlog(0, "Releasing mle for %.*s, type %d\n", mlen, mname, mle->type); + mlog(0, "Releasing mle for %.*s, type %d\n", mle->mnamelen, mle->mname, + mle->type); /* remove from list if not already */ __dlm_unlink_mle(dlm, mle); @@ -1284,7 +1258,7 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, res->lockname.len, res->lockname.name); mle->type = DLM_MLE_MASTER; - mle->u.mleres = res; + mle->mleres = res; } } } @@ -1323,18 +1297,14 @@ static int dlm_do_master_request(struct dlm_lock_resource *res, struct dlm_ctxt *dlm = mle->dlm; struct dlm_master_request request; int ret, response=0, resend; - unsigned char *mlename; - unsigned int mlenamelen; memset(&request, 0, sizeof(request)); request.node_idx = dlm->node_num; BUG_ON(mle->type == DLM_MLE_MIGRATION); - __dlm_mle_name(mle, &mlename, &mlenamelen, NULL); - - request.namelen = (u8)mlenamelen; - memcpy(request.name, mlename, request.namelen); + request.namelen = (u8)mle->mnamelen; + memcpy(request.name, mle->mname, request.namelen); again: ret = o2net_send_message(DLM_MASTER_REQUEST_MSG, dlm->key, &request, @@ -3203,12 +3173,10 @@ static struct dlm_lock_resource *dlm_reset_mleres_owner(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle) { struct dlm_lock_resource *res; - unsigned int hash; /* Find the lockres associated to the mle and set its owner to UNK */ - hash = dlm_lockid_hash(mle->u.mlename.name, mle->u.mlename.len); - res = __dlm_lookup_lockres(dlm, mle->u.mlename.name, mle->u.mlename.len, - hash); + res = __dlm_lookup_lockres(dlm, mle->mname, mle->mnamelen, + mle->mnamehash); if (res) { spin_unlock(&dlm->master_lock); -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 12/13] ocfs2/dlm: Do not purge lockres that is being migrated dlm_purge_lockres()
This patch attempts to fix a fine race between purging and migration. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmthread.c | 20 ++++++++++++++++++-- 1 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d129520..10a51ba 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -162,12 +162,28 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); if (!__dlm_lockres_unused(res)) { - spin_unlock(&res->spinlock); mlog(0, "%s:%.*s: tried to purge but not unused\n", dlm->name, res->lockname.len, res->lockname.name); - return -ENOTEMPTY; + __dlm_print_one_lock_resource(res); + spin_unlock(&res->spinlock); + BUG(); } + + if (res->state & DLM_LOCK_RES_MIGRATING) { + mlog(0, "%s:%.*s: Delay dropref as this lockres is " + "being remastered\n", dlm->name, res->lockname.len, + res->lockname.name); + /* Re-add the lockres to the end of the purge list */ + if (!list_empty(&res->purge)) { + list_del_init(&res->purge); + list_add_tail(&res->purge, &dlm->purge_list); + } + spin_unlock(&res->spinlock); + return 0; + } + master = (res->owner == dlm->node_num); + if (!master) res->state |= DLM_LOCK_RES_DROPPING_REF; spin_unlock(&res->spinlock); -- 1.5.6.3
Sunil Mushran
2009-Feb-26 23:00 UTC
[Ocfs2-devel] [PATCH 13/13] ocfs2/dlm: Tweak mle_state output
The debugfs file, mle_state, now prints the number of largest number of mles in one hash link. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmdebug.c | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index bdf1c78..df52f70 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -494,7 +494,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) struct hlist_head *bucket; struct hlist_node *list; int i, out = 0; - unsigned long total = 0; + unsigned long total = 0, longest = 0, bktcnt; out += snprintf(db->buf + out, db->len - out, "Dumping MLEs for Domain: %s\n", dlm->name); @@ -506,15 +506,18 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) mle = hlist_entry(list, struct dlm_master_list_entry, master_hash_node); ++total; + ++bktcnt; if (db->len - out < 200) continue; out += dump_mle(mle, db->buf + out, db->len - out); } + longest = max(longest, bktcnt); + bktcnt = 0; } spin_unlock(&dlm->master_lock); out += snprintf(db->buf + out, db->len - out, - "Total on list: %ld\n", total); + "Total: %ld, Longest: %ld\n", total, longest); return out; } -- 1.5.6.3