Patches include o2hb and ocfs2 info exposure via debugfs and some dlm fixes including the race in flock() discovered recently. Sunil
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 1/9] ocfs2/hb: Exposes list of heartbeating nodes via debugfs
Patch creates a debugfs file, o2hb/livesnodes, which exposes the aggregate list of heartbeating node across all heartbeat regions. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/cluster/heartbeat.c | 140 +++++++++++++++++++++++++++++++++++++++- fs/ocfs2/cluster/heartbeat.h | 3 +- fs/ocfs2/cluster/nodemanager.c | 9 ++- 3 files changed, 148 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6ebaa58..7e9d25c 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -33,6 +33,7 @@ #include <linux/random.h> #include <linux/crc32.h> #include <linux/time.h> +#include <linux/debugfs.h> #include "heartbeat.h" #include "tcp.h" @@ -60,6 +61,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; static LIST_HEAD(o2hb_node_events); static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); +#define O2HB_DEBUG_DIR "o2hb" +#define O2HB_DEBUG_LIVENODES "livenodes" +static struct dentry *o2hb_debug_dir; +static struct dentry *o2hb_debug_livenodes; + static LIST_HEAD(o2hb_all_regions); static struct o2hb_callback { @@ -905,7 +911,121 @@ static int o2hb_thread(void *data) return 0; } -void o2hb_init(void) +#ifdef CONFIG_DEBUG_FS +struct o2hb_debug_buffer { + int len; + char *buf; +}; + +static int o2hb_debug_open(struct inode *inode, struct file *file) +{ + unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + struct o2hb_debug_buffer *odb = NULL; + int i = -1; + int out = 0; + + odb = kzalloc(sizeof(struct o2hb_debug_buffer), GFP_KERNEL); + if (!odb) + goto bail; + + odb->len = PAGE_SIZE; + odb->buf = kmalloc(odb->len, GFP_KERNEL); + if (!odb->buf) { + kfree(odb); + goto bail; + } + + o2hb_fill_node_map(map, sizeof(map)); + + while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) + out += snprintf(odb->buf + out, odb->len - out, "%d ", i); + out += snprintf(odb->buf + out, odb->len - out, "\n"); + + odb->len = out; + + file->private_data = odb; + + return 0; +bail: + return -ENOMEM; +} + +static int o2hb_debug_release(struct inode *inode, struct file *file) +{ + struct o2hb_debug_buffer *odb + (struct o2hb_debug_buffer *)file->private_data; + + kfree(odb->buf); + kfree(odb); + + return 0; +} + +static ssize_t o2hb_debug_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct o2hb_debug_buffer *odb + (struct o2hb_debug_buffer *)file->private_data; + + return simple_read_from_buffer(buf, nbytes, ppos, odb->buf, odb->len); +} + +static loff_t o2hb_debug_llseek(struct file *file, loff_t off, int whence) +{ + struct o2hb_debug_buffer *odb + (struct o2hb_debug_buffer *)file->private_data; + loff_t new = -1; + + switch (whence) { + case 0: + new = off; + break; + case 1: + new = file->f_pos + off; + break; + } + + if (new < 0 || new > odb->len) + return -EINVAL; + + return (file->f_pos = new); +} +#else +static int o2hb_debug_open(struct inode *inode, struct file *file) +{ + return 0; +} +static int o2hb_debug_release(struct inode *inode, struct file *file) +{ + return 0; +} +static ssize_t o2hb_debug_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + return 0; +} +static loff_t o2hb_debug_llseek(struct file *file, loff_t off, int whence) +{ + return 0; +} +#endif /* CONFIG_DEBUG_FS */ + +static struct file_operations o2hb_debug_fops = { + .open = o2hb_debug_open, + .release = o2hb_debug_release, + .read = o2hb_debug_read, + .llseek = o2hb_debug_llseek, +}; + +void o2hb_exit(void) +{ + if (o2hb_debug_livenodes) + debugfs_remove(o2hb_debug_livenodes); + if (o2hb_debug_dir) + debugfs_remove(o2hb_debug_dir); +} + +int o2hb_init(void) { int i; @@ -918,6 +1038,24 @@ void o2hb_init(void) INIT_LIST_HEAD(&o2hb_node_events); memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); + + o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); + if (!o2hb_debug_dir) { + mlog_errno(-ENOMEM); + return -ENOMEM; + } + + o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, + S_IFREG|S_IRUSR, + o2hb_debug_dir, NULL, + &o2hb_debug_fops); + if (!o2hb_debug_livenodes) { + mlog_errno(-ENOMEM); + debugfs_remove(o2hb_debug_dir); + return -ENOMEM; + } + + return 0; } /* if we're already in a callback then we're already serialized by the sem */ diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index e511339..2f16492 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -75,7 +75,8 @@ void o2hb_unregister_callback(const char *region_uuid, struct o2hb_callback_func *hc); void o2hb_fill_node_map(unsigned long *map, unsigned bytes); -void o2hb_init(void); +void o2hb_exit(void); +int o2hb_init(void); int o2hb_check_node_heartbeating(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num); int o2hb_check_local_node_heartbeating(void); diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 816a3f6..2ce28d7 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -881,6 +881,7 @@ static void __exit exit_o2nm(void) o2cb_sys_shutdown(); o2net_exit(); + o2hb_exit(); } static int __init init_o2nm(void) @@ -889,11 +890,13 @@ static int __init init_o2nm(void) cluster_print_version(); - o2hb_init(); + ret = o2hb_init(); + if (ret) + goto out; ret = o2net_init(); if (ret) - goto out; + goto out_o2hb; ret = o2net_register_hb_callbacks(); if (ret) @@ -916,6 +919,8 @@ out_callbacks: o2net_unregister_hb_callbacks(); out_o2net: o2net_exit(); +out_o2hb: + o2hb_exit(); out: return ret; } -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 2/9] ocfs2: Moves struct recovery_map to a header file
Moves the definition of struct recovery_map from journal.c to ocfs2.h. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/journal.c | 11 ----------- fs/ocfs2/journal.h | 9 +++++++++ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 99fe9d5..3d2c81f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -64,17 +64,6 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, int slot); static int ocfs2_commit_thread(void *arg); - -/* - * The recovery_list is a simple linked list of node numbers to recover. - * It is protected by the recovery_lock. - */ - -struct ocfs2_recovery_map { - unsigned int rm_used; - unsigned int *rm_entries; -}; - int ocfs2_recovery_init(struct ocfs2_super *osb) { struct ocfs2_recovery_map *rm; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index d4d14e9..efbcf8e 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -43,6 +43,15 @@ enum ocfs2_journal_state { struct ocfs2_super; struct ocfs2_dinode; +/* + * The recovery_list is a simple linked list of node numbers to recover. + * It is protected by the recovery_lock. + */ +struct ocfs2_recovery_map { + unsigned int rm_used; + unsigned int *rm_entries; +}; + struct ocfs2_journal { enum ocfs2_journal_state j_state; /* Journals current state */ -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 3/9] ocfs2: Exposes the file system state via debugfs
Patch creates a per mount debugfs file, fs_state, which exposes information like, cluster stack in use, states of the downconvert, recovery and commit threads, number of journal txns, some allocation stats, list of all slots, etc. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/super.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 3fed9e3..d912b70 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -301,6 +301,7 @@ struct ocfs2_super struct ocfs2_dlm_debug *osb_dlm_debug; struct dentry *osb_debug_root; + struct dentry *osb_ctxt; wait_queue_head_t recovery_event; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 304b63a..7808866 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -619,6 +619,214 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, return 0; } +#ifdef CONFIG_DEBUG_FS +struct ocfs2_debug_buffer { + int len; + char *buf; +}; + +static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) +{ + int out = 0; + int i; + struct ocfs2_cluster_connection *cconn = osb->cconn; + struct ocfs2_recovery_map *rm = osb->recovery_map; + + out += snprintf(buf + out, len - out, + "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", + "Device", osb->dev_str, osb->uuid_str, + osb->fs_generation, osb->vol_label); + + out += snprintf(buf + out, len - out, + "%10s => State: %d Flags: 0x%lX\n", "Volume", + atomic_read(&osb->vol_state), osb->osb_flags); + + out += snprintf(buf + out, len - out, + "%10s => Block: %lu Cluster: %d\n", "Sizes", + osb->sb->s_blocksize, osb->s_clustersize); + + out += snprintf(buf + out, len - out, + "%10s => Compat: 0x%X Incompat: 0x%X " + "ROcompat: 0x%X\n", + "Features", osb->s_feature_compat, + osb->s_feature_incompat, osb->s_feature_ro_compat); + + out += snprintf(buf + out, len - out, + "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", + osb->s_mount_opt, osb->s_atime_quantum); + + out += snprintf(buf + out, len - out, + "%10s => Stack: %s Name: %*s Version: %d.%d\n", + "Cluster", + (*osb->osb_cluster_stack == '\0' ? + "o2cb" : osb->osb_cluster_stack), + cconn->cc_namelen, cconn->cc_name, + cconn->cc_version.pv_major, cconn->cc_version.pv_minor); + + spin_lock(&osb->dc_task_lock); + out += snprintf(buf + out, len - out, + "%10s => Pid: %d Count: %lu WakeSeq: %lu " + "WorkSeq: %lu\n", "DownCnvt", + task_pid_nr(osb->dc_task), osb->blocked_lock_count, + osb->dc_wake_sequence, osb->dc_work_sequence); + spin_unlock(&osb->dc_task_lock); + + spin_lock(&osb->osb_lock); + out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:", + "Recovery", + (osb->recovery_thread_task ? + task_pid_nr(osb->recovery_thread_task) : -1)); + if (rm->rm_used == 0) + out += snprintf(buf + out, len - out, " None\n"); + else { + for (i = 0; i < rm->rm_used; i++) + out += snprintf(buf + out, len - out, " %d", + rm->rm_entries[i]); + out += snprintf(buf + out, len - out, "\n"); + } + spin_unlock(&osb->osb_lock); + + out += snprintf(buf + out, len - out, + "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", + task_pid_nr(osb->commit_task), osb->osb_commit_interval, + atomic_read(&osb->needs_checkpoint)); + + out += snprintf(buf + out, len - out, + "%10s => State: %d NumTxns: %d TxnId: %lu\n", + "Journal", osb->journal->j_state, + atomic_read(&osb->journal->j_num_trans), + osb->journal->j_trans_id); + + out += snprintf(buf + out, len - out, + "%10s => GlobalAllocs: %d LocalAllocs: %d " + "SubAllocs: %d LAWinMoves: %d SAExtends: %d\n", + "Stats", + atomic_read(&osb->alloc_stats.bitmap_data), + atomic_read(&osb->alloc_stats.local_data), + atomic_read(&osb->alloc_stats.bg_allocs), + atomic_read(&osb->alloc_stats.moves), + atomic_read(&osb->alloc_stats.bg_extends)); + + out += snprintf(buf + out, len - out, + "%10s => State: %u Descriptor: %llu Size: %u bits " + "Default: %u bits\n", + "LocalAlloc", osb->local_alloc_state, + (unsigned long long)osb->la_last_gd, + osb->local_alloc_bits, osb->local_alloc_default_bits); + + spin_lock(&osb->osb_lock); + out += snprintf(buf + out, len - out, + "%10s => Slot: %d NumStolen: %d\n", "Steal", + osb->s_inode_steal_slot, + atomic_read(&osb->s_num_inodes_stolen)); + spin_unlock(&osb->osb_lock); + + out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", + "Slots", "Num", "RecoGen"); + + for (i = 0; i < osb->max_slots; ++i) { + out += snprintf(buf + out, len - out, + "%10s %c %3d %10d\n", + " ", + (i == osb->slot_num ? '*' : ' '), + i, osb->slot_recovery_generations[i]); + } + + return out; +} + +static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) +{ + struct ocfs2_debug_buffer *odb = NULL; + struct ocfs2_super *osb = inode->i_private; + + odb = kzalloc(sizeof(struct ocfs2_debug_buffer), GFP_KERNEL); + if (!odb) + goto bail; + + odb->len = PAGE_SIZE; + odb->buf = kmalloc(odb->len, GFP_KERNEL); + if (!odb->buf) { + kfree(odb); + goto bail; + } + + odb->len = ocfs2_osb_dump(osb, odb->buf, odb->len); + + file->private_data = odb; + + return 0; +bail: + return -ENOMEM; +} + +static int ocfs2_debug_release(struct inode *inode, struct file *file) +{ + struct ocfs2_debug_buffer *odb + (struct ocfs2_debug_buffer *)file->private_data; + + kfree(odb->buf); + kfree(odb); + + return 0; +} + +static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + struct ocfs2_debug_buffer *odb + (struct ocfs2_debug_buffer *)file->private_data; + + return simple_read_from_buffer(buf, nbytes, ppos, odb->buf, odb->len); +} + +static loff_t ocfs2_debug_llseek(struct file *file, loff_t off, int whence) +{ + struct ocfs2_debug_buffer *odb + (struct ocfs2_debug_buffer *)file->private_data; + loff_t new = -1; + + switch (whence) { + case 0: + new = off; + break; + case 1: + new = file->f_pos + off; + break; + } + + if (new < 0 || new > odb->len) + return -EINVAL; + + return (file->f_pos = new); +} +#else +static int ocfs2_osb_debug_open(struct inode *inode, struct file *file) +{ + return 0; +} +static int ocfs2_debug_release(struct inode *inode, struct file *file) +{ + return 0; +} +static ssize_t ocfs2_debug_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + return 0; +} +static loff_t ocfs2_debug_llseek(struct file *file, loff_t off, int whence) +{ + return NULL; +} +#endif /* CONFIG_DEBUG_FS */ + +static struct file_operations ocfs2_osb_debug_fops = { + .open = ocfs2_osb_debug_open, + .release = ocfs2_debug_release, + .read = ocfs2_debug_read, + .llseek = ocfs2_debug_llseek, +}; + static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; @@ -720,6 +928,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) goto read_super_error; } + osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR, + osb->osb_debug_root, + osb, + &ocfs2_osb_debug_fops); + if (!osb->osb_ctxt) { + status = -EINVAL; + mlog_errno(status); + goto read_super_error; + } + status = ocfs2_mount_volume(sb); if (osb->root_inode) inode = igrab(osb->root_inode); @@ -1303,6 +1521,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) osb = OCFS2_SB(sb); BUG_ON(!osb); + debugfs_remove(osb->osb_ctxt); + ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 4/9] ocfs2: Remove debugfs file local_alloc_stats
This patch removes the debugfs file local_alloc_stats as that information is now included in the fs_state debugfs file. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/localalloc.c | 86 ------------------------------------------------- fs/ocfs2/ocfs2.h | 5 --- 2 files changed, 0 insertions(+), 91 deletions(-) diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 687b287..500f34d 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -28,7 +28,6 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/bitops.h> -#include <linux/debugfs.h> #define MLOG_MASK_PREFIX ML_DISK_ALLOC #include <cluster/masklog.h> @@ -74,84 +73,6 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, struct inode *local_alloc_inode); -#ifdef CONFIG_OCFS2_FS_STATS - -static int ocfs2_la_debug_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - -#define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE -#define LA_DEBUG_VER 1 -static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - static DEFINE_MUTEX(la_debug_mutex); - struct ocfs2_super *osb = file->private_data; - int written, ret; - char *buf = osb->local_alloc_debug_buf; - - mutex_lock(&la_debug_mutex); - memset(buf, 0, LA_DEBUG_BUF_SZ); - - written = snprintf(buf, LA_DEBUG_BUF_SZ, - "0x%x\t0x%llx\t%u\t%u\t0x%x\n", - LA_DEBUG_VER, - (unsigned long long)osb->la_last_gd, - osb->local_alloc_default_bits, - osb->local_alloc_bits, osb->local_alloc_state); - - ret = simple_read_from_buffer(userbuf, count, ppos, buf, written); - - mutex_unlock(&la_debug_mutex); - return ret; -} - -static const struct file_operations ocfs2_la_debug_fops = { - .open = ocfs2_la_debug_open, - .read = ocfs2_la_debug_read, -}; - -static void ocfs2_init_la_debug(struct ocfs2_super *osb) -{ - osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS); - if (!osb->local_alloc_debug_buf) - return; - - osb->local_alloc_debug = debugfs_create_file("local_alloc_stats", - S_IFREG|S_IRUSR, - osb->osb_debug_root, - osb, - &ocfs2_la_debug_fops); - if (!osb->local_alloc_debug) { - kfree(osb->local_alloc_debug_buf); - osb->local_alloc_debug_buf = NULL; - } -} - -static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) -{ - if (osb->local_alloc_debug) - debugfs_remove(osb->local_alloc_debug); - - if (osb->local_alloc_debug_buf) - kfree(osb->local_alloc_debug_buf); - - osb->local_alloc_debug_buf = NULL; - osb->local_alloc_debug = NULL; -} -#else /* CONFIG_OCFS2_FS_STATS */ -static void ocfs2_init_la_debug(struct ocfs2_super *osb) -{ - return; -} -static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) -{ - return; -} -#endif - static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) { return (osb->local_alloc_state == OCFS2_LA_THROTTLED || @@ -225,8 +146,6 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) mlog_entry_void(); - ocfs2_init_la_debug(osb); - if (osb->local_alloc_bits == 0) goto bail; @@ -298,9 +217,6 @@ bail: if (inode) iput(inode); - if (status < 0) - ocfs2_shutdown_la_debug(osb); - mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); mlog_exit(status); @@ -330,8 +246,6 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) cancel_delayed_work(&osb->la_enable_wq); flush_workqueue(ocfs2_wq); - ocfs2_shutdown_la_debug(osb); - if (osb->local_alloc_state == OCFS2_LA_UNUSED) goto out; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d912b70..22711d4 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -281,11 +281,6 @@ struct ocfs2_super u64 la_last_gd; -#ifdef CONFIG_OCFS2_FS_STATS - struct dentry *local_alloc_debug; - char *local_alloc_debug_buf; -#endif - /* Next two fields are for local node slot recovery during * mount. */ int dirty; -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 5/9] ocfs2/dlm: Fixes race between migrate request and exit domain
Patch address a racing migrate request message and an exit domain message. Instead of blocking exit domains for the duration of the migrate, we ignore failure to deliver that message. This is because an exiting domain should not have any active locks and thus has no role to play in the migration. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmmaster.c | 23 +++++++++++++++++++---- 1 files changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 44f87ca..92fd1d7 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2949,7 +2949,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, struct dlm_node_iter *iter) { struct dlm_migrate_request migrate; - int ret, status = 0; + int ret, skip, status = 0; int nodenum; memset(&migrate, 0, sizeof(migrate)); @@ -2966,12 +2966,27 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, nodenum == new_master) continue; + /* We could race exit domain. If exited, skip. */ + spin_lock(&dlm->spinlock); + skip = (!test_bit(nodenum, dlm->domain_map)); + spin_unlock(&dlm->spinlock); + if (skip) { + clear_bit(nodenum, iter->node_map); + continue; + } + ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key, &migrate, sizeof(migrate), nodenum, &status); - if (ret < 0) - mlog_errno(ret); - else if (status < 0) { + if (ret < 0) { + mlog(0, "migrate_request returned %d!\n", ret); + if (!dlm_is_host_down(ret)) { + mlog(ML_ERROR, "unhandled error=%d!\n", ret); + BUG(); + } + clear_bit(nodenum, iter->node_map); + ret = 0; + } else if (status < 0) { mlog(0, "migrate request (node %u) returned %d!\n", nodenum, status); ret = status; -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 6/9] ocfs2/dlm: Clean errors in dlm_proxy_ast_handler()
Patch cleans printed errors in dlm_proxy_ast_handler(). The errors now includes the node number that sent the (b)ast. Also it reduces the number of endian swaps of the cookie. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmast.c | 52 +++++++++++++++++++++++++----------------------- 1 files changed, 27 insertions(+), 25 deletions(-) diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 644bee5..d07ddbe 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -275,6 +275,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct list_head *iter, *head=NULL; u64 cookie; u32 flags; + u8 node; if (!dlm_grab(dlm)) { dlm_error(DLM_REJECTED); @@ -286,18 +287,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, name = past->name; locklen = past->namelen; - cookie = be64_to_cpu(past->cookie); + cookie = past->cookie; flags = be32_to_cpu(past->flags); + node = past->node_idx; if (locklen > DLM_LOCKID_NAME_MAX) { ret = DLM_IVBUFLEN; - mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n"); + mlog(ML_ERROR, "Invalid name length (%d) in proxy ast " + "handler!\n", locklen); goto leave; } if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) = (LKM_PUT_LVB|LKM_GET_LVB)) { - mlog(ML_ERROR, "both PUT and GET lvb specified\n"); + mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n", + flags); ret = DLM_BADARGS; goto leave; } @@ -310,22 +314,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, if (past->type != DLM_AST && past->type != DLM_BAST) { mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" - "name=%.*s\n", past->type, - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name); + "name=%.*s, node=%u\n", past->type, + dlm_get_lock_cookie_node(be64_to_cpu(cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), + locklen, name, node); ret = DLM_IVLOCKID; goto leave; } res = dlm_lookup_lockres(dlm, name, locklen); if (!res) { - mlog(0, "got %sast for unknown lockres! " - "cookie=%u:%llu, name=%.*s, namelen=%u\n", - past->type == DLM_AST ? "" : "b", - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name, locklen); + mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, " + "name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"), + dlm_get_lock_cookie_node(be64_to_cpu(cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), + locklen, name, node); ret = DLM_IVLOCKID; goto leave; } @@ -337,12 +340,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, spin_lock(&res->spinlock); if (res->state & DLM_LOCK_RES_RECOVERING) { - mlog(0, "responding with DLM_RECOVERING!\n"); + mlog(0, "Responding with DLM_RECOVERING!\n"); ret = DLM_RECOVERING; goto unlock_out; } if (res->state & DLM_LOCK_RES_MIGRATING) { - mlog(0, "responding with DLM_MIGRATING!\n"); + mlog(0, "Responding with DLM_MIGRATING!\n"); ret = DLM_MIGRATING; goto unlock_out; } @@ -351,7 +354,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, lock = NULL; list_for_each(iter, head) { lock = list_entry (iter, struct dlm_lock, list); - if (be64_to_cpu(lock->ml.cookie) == cookie) + if (lock->ml.cookie == cookie) goto do_ast; } @@ -363,15 +366,15 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, list_for_each(iter, head) { lock = list_entry (iter, struct dlm_lock, list); - if (be64_to_cpu(lock->ml.cookie) == cookie) + if (lock->ml.cookie == cookie) goto do_ast; } - mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " - "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", - dlm_get_lock_cookie_node(cookie), - dlm_get_lock_cookie_seq(cookie), - locklen, name, locklen); + mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " + "node=%u\n", past->type == DLM_AST ? "" : "b", + dlm_get_lock_cookie_node(be64_to_cpu(cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(cookie)), + locklen, name, node); ret = DLM_NORMAL; unlock_out: @@ -383,8 +386,8 @@ do_ast: if (past->type == DLM_AST) { /* do not alter lock refcount. switching lists. */ list_move_tail(&lock->list, &res->granted); - mlog(0, "ast: adding to granted list... type=%d, " - "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); + mlog(0, "ast: Adding to granted list... type=%d, " + "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); if (lock->ml.convert_type != LKM_IVMODE) { lock->ml.type = lock->ml.convert_type; lock->ml.convert_type = LKM_IVMODE; @@ -408,7 +411,6 @@ do_ast: dlm_do_local_bast(dlm, res, lock, past->blocked_type); leave: - if (res) dlm_lockres_put(res); -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 7/9] ocfs2/dlm: Hold off sending lockres drop ref message while lockres is migrating
During lockres purge, o2dlm sends a drop reference message to the lockres master. This patch delays the message if the lockres is being migrated. Fixes oss bugzilla#1012 http://oss.oracle.com/bugzilla/show_bug.cgi?id=1012 Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmthread.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 4060bb3..d129520 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -181,7 +181,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); /* This ensures that clear refmap is sent after the set */ - __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); + __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG | + DLM_LOCK_RES_MIGRATING)); spin_unlock(&res->spinlock); /* clear our bit from the master's refmap, ignore errors */ -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 8/9] ocfs2/dlm: Fix race in adding/removing lockres' to/from the tracking list
This patch adds a new lock, dlm->tracking_lock, to protect adding/removing lockres' to/from the dlm->tracking_list. We were previously using dlm->spinlock for the same, but that proved inadequate as we could be freeing a lockres from a context that did not hold that lock. As the new lock only protects this list, we can explicitly take it when removing the lockres from the tracking list. This bug was exposed when testing multiple processes concurrently flock() the same file. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 3 ++ fs/ocfs2/dlm/dlmdebug.c | 53 ++++++++++++++++++++------------------------- fs/ocfs2/dlm/dlmdomain.c | 1 + fs/ocfs2/dlm/dlmmaster.c | 10 ++++++++ 4 files changed, 38 insertions(+), 29 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index d5a86fb..bb53714 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -140,6 +140,7 @@ struct dlm_ctxt unsigned int purge_count; spinlock_t spinlock; spinlock_t ast_lock; + spinlock_t track_lock; char *name; u8 node_num; u32 key; @@ -316,6 +317,8 @@ struct dlm_lock_resource * put on a list for the dlm thread to run. */ unsigned long last_used; + struct dlm_ctxt *dlm; + unsigned migration_pending:1; atomic_t asts_reserved; spinlock_t spinlock; diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 1b81dcb..b32f60a 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -630,43 +630,38 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) { struct debug_lockres *dl = m->private; struct dlm_ctxt *dlm = dl->dl_ctxt; + struct dlm_lock_resource *oldres = dl->dl_res; struct dlm_lock_resource *res = NULL; + struct list_head *track_list; - spin_lock(&dlm->spinlock); + spin_lock(&dlm->track_lock); + if (oldres) + track_list = &oldres->tracking; + else + track_list = &dlm->tracking_list; - if (dl->dl_res) { - list_for_each_entry(res, &dl->dl_res->tracking, tracking) { - if (dl->dl_res) { - dlm_lockres_put(dl->dl_res); - dl->dl_res = NULL; - } - if (&res->tracking == &dlm->tracking_list) { - mlog(0, "End of list found, %p\n", res); - dl = NULL; - break; - } + list_for_each_entry(res, track_list, tracking) { + if (&res->tracking == &dlm->tracking_list) + res = NULL; + else dlm_lockres_get(res); - dl->dl_res = res; - break; - } - } else { - if (!list_empty(&dlm->tracking_list)) { - list_for_each_entry(res, &dlm->tracking_list, tracking) - break; - dlm_lockres_get(res); - dl->dl_res = res; - } else - dl = NULL; + break; } + spin_unlock(&dlm->track_lock); - if (dl) { - spin_lock(&dl->dl_res->spinlock); - dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1); - spin_unlock(&dl->dl_res->spinlock); - } + if (oldres) + dlm_lockres_put(oldres); - spin_unlock(&dlm->spinlock); + dl->dl_res = res; + + if (res) { + spin_lock(&res->spinlock); + dump_lockres(res, dl->dl_buf, dl->dl_len - 1); + spin_unlock(&res->spinlock); + } else + dl = NULL; + /* passed to seq_show */ return dl; } diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 63f8125..d8d578f 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1550,6 +1550,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, spin_lock_init(&dlm->spinlock); spin_lock_init(&dlm->master_lock); spin_lock_init(&dlm->ast_lock); + spin_lock_init(&dlm->track_lock); INIT_LIST_HEAD(&dlm->list); INIT_LIST_HEAD(&dlm->dirty_list); INIT_LIST_HEAD(&dlm->reco.resources); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 92fd1d7..cbf3abe 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -505,8 +505,10 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm, static void dlm_lockres_release(struct kref *kref) { struct dlm_lock_resource *res; + struct dlm_ctxt *dlm; res = container_of(kref, struct dlm_lock_resource, refs); + dlm = res->dlm; /* This should not happen -- all lockres' have a name * associated with them at init time. */ @@ -515,6 +517,7 @@ static void dlm_lockres_release(struct kref *kref) mlog(0, "destroying lockres %.*s\n", res->lockname.len, res->lockname.name); + spin_lock(&dlm->track_lock); if (!list_empty(&res->tracking)) list_del_init(&res->tracking); else { @@ -522,6 +525,9 @@ static void dlm_lockres_release(struct kref *kref) res->lockname.len, res->lockname.name); dlm_print_one_lock_resource(res); } + spin_unlock(&dlm->track_lock); + + dlm_put(dlm); if (!hlist_unhashed(&res->hash_node) || !list_empty(&res->granted) || @@ -595,6 +601,10 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->migration_pending = 0; res->inflight_locks = 0; + /* put in dlm_lockres_release */ + dlm_grab(dlm); + res->dlm = dlm; + kref_init(&res->refs); /* just for consistency */ -- 1.5.6.3
Sunil Mushran
2008-Dec-16 23:49 UTC
[Ocfs2-devel] [PATCH 9/9] ocfs2/dlm: Fix race during lockres mastery
dlm_get_lock_resource() is supposed to return a lock resource with a proper master. If multiple concurrent threads attempt to lookup the lockres for the same lockid while the lock mastery in underway, one or more threads are likely to return a lockres without a proper master. This patch makes the threads wait in dlm_get_lock_resource() while the mastery is underway, ensuring all threads return the lockres with a proper master. This issue is known to be limited to users using the flock() syscall. For all other fs operations, the ocfs2 dlmglue layer serializes the dlm op for each lockid. Patch fixes Novell bz#425491 https://bugzilla.novell.com/show_bug.cgi?id=425491 Users encountering this bug will see flock() return EINVAL and dmesg have the following error: ERROR: Dlm error "DLM_BADARGS" while calling dlmlock on resource <LOCKID>: bad api args Reported-by: Coly Li <coyli at suse.de> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmmaster.c | 9 ++++++++- 1 files changed, 8 insertions(+), 1 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index cbf3abe..54e182a 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -732,14 +732,21 @@ lookup: if (tmpres) { int dropping_ref = 0; + spin_unlock(&dlm->spinlock); + spin_lock(&tmpres->spinlock); + /* We wait for the other thread that is mastering the resource */ + if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { + __dlm_wait_on_lockres(tmpres); + BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); + } + if (tmpres->owner == dlm->node_num) { BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); dlm_lockres_grab_inflight_ref(dlm, tmpres); } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) dropping_ref = 1; spin_unlock(&tmpres->spinlock); - spin_unlock(&dlm->spinlock); /* wait until done messaging the master, drop our ref to allow * the lockres to be purged, start over. */ -- 1.5.6.3
Mark Fasheh
2008-Dec-17 00:25 UTC
[Ocfs2-devel] [PATCH 9/9] ocfs2/dlm: Fix race during lockres mastery
On Tue, Dec 16, 2008 at 03:49:23PM -0800, Sunil Mushran wrote:> dlm_get_lock_resource() is supposed to return a lock resource with a proper > master. If multiple concurrent threads attempt to lookup the lockres for the > same lockid while the lock mastery in underway, one or more threads are likely > to return a lockres without a proper master. > > This patch makes the threads wait in dlm_get_lock_resource() while the mastery > is underway, ensuring all threads return the lockres with a proper master. > > This issue is known to be limited to users using the flock() syscall. For all > other fs operations, the ocfs2 dlmglue layer serializes the dlm op for each > lockid. > > Patch fixes Novell bz#425491 > https://bugzilla.novell.com/show_bug.cgi?id=425491 > > Users encountering this bug will see flock() return EINVAL and dmesg have the > following error: > ERROR: Dlm error "DLM_BADARGS" while calling dlmlock on resource <LOCKID>: bad api args > > Reported-by: Coly Li <coyli at suse.de> > Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> > --- > fs/ocfs2/dlm/dlmmaster.c | 9 ++++++++- > 1 files changed, 8 insertions(+), 1 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c > index cbf3abe..54e182a 100644 > --- a/fs/ocfs2/dlm/dlmmaster.c > +++ b/fs/ocfs2/dlm/dlmmaster.c > @@ -732,14 +732,21 @@ lookup: > if (tmpres) { > int dropping_ref = 0; > > + spin_unlock(&dlm->spinlock); > + > spin_lock(&tmpres->spinlock); > + /* We wait for the other thread that is mastering the resource */ > + if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { > + __dlm_wait_on_lockres(tmpres); > + BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); > + }Do we have a ref on tmpres here, when we decide to wait on it? Could it get destroyed during mastery (for example, due to mastery ending in error) and we wake up with a bad pointer? --Mark -- Mark Fasheh
Sunil Mushran
2008-Dec-17 00:40 UTC
[Ocfs2-devel] [PATCH 9/9] ocfs2/dlm: Fix race during lockres mastery
Mark Fasheh wrote:> On Tue, Dec 16, 2008 at 03:49:23PM -0800, Sunil Mushran wrote: > >> dlm_get_lock_resource() is supposed to return a lock resource with a proper >> master. If multiple concurrent threads attempt to lookup the lockres for the >> same lockid while the lock mastery in underway, one or more threads are likely >> to return a lockres without a proper master. >> >> This patch makes the threads wait in dlm_get_lock_resource() while the mastery >> is underway, ensuring all threads return the lockres with a proper master. >> >> This issue is known to be limited to users using the flock() syscall. For all >> other fs operations, the ocfs2 dlmglue layer serializes the dlm op for each >> lockid. >> >> Patch fixes Novell bz#425491 >> https://bugzilla.novell.com/show_bug.cgi?id=425491 >> >> Users encountering this bug will see flock() return EINVAL and dmesg have the >> following error: >> ERROR: Dlm error "DLM_BADARGS" while calling dlmlock on resource <LOCKID>: bad api args >> >> Reported-by: Coly Li <coyli at suse.de> >> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> >> --- >> fs/ocfs2/dlm/dlmmaster.c | 9 ++++++++- >> 1 files changed, 8 insertions(+), 1 deletions(-) >> >> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c >> index cbf3abe..54e182a 100644 >> --- a/fs/ocfs2/dlm/dlmmaster.c >> +++ b/fs/ocfs2/dlm/dlmmaster.c >> @@ -732,14 +732,21 @@ lookup: >> if (tmpres) { >> int dropping_ref = 0; >> >> + spin_unlock(&dlm->spinlock); >> + >> spin_lock(&tmpres->spinlock); >> + /* We wait for the other thread that is mastering the resource */ >> + if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { >> + __dlm_wait_on_lockres(tmpres); >> + BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); >> + } >> > > Do we have a ref on tmpres here, when we decide to wait on it? Could it get > destroyed during mastery (for example, due to mastery ending in error) and > we wake up with a bad pointer? >Yes. __dlm_lookup_lockres_full() takes a reference.
Coly Li
2008-Dec-23 21:05 UTC
[Ocfs2-devel] [PATCH 9/9] ocfs2/dlm: Fix race during lockres mastery
Hi Sunil, I do not find this patch in upstream yet. Do we have a recent plan to push this patch into upstream ? Once this patch get merged into linus tree, I can add it into sles10 sp2 kernel. Thanks. Sunil Mushran Wrote:> dlm_get_lock_resource() is supposed to return a lock resource with a proper > master. If multiple concurrent threads attempt to lookup the lockres for the > same lockid while the lock mastery in underway, one or more threads are likely > to return a lockres without a proper master. > > This patch makes the threads wait in dlm_get_lock_resource() while the mastery > is underway, ensuring all threads return the lockres with a proper master. > > This issue is known to be limited to users using the flock() syscall. For all > other fs operations, the ocfs2 dlmglue layer serializes the dlm op for each > lockid. > > Patch fixes Novell bz#425491 > https://bugzilla.novell.com/show_bug.cgi?id=425491 > > Users encountering this bug will see flock() return EINVAL and dmesg have the > following error: > ERROR: Dlm error "DLM_BADARGS" while calling dlmlock on resource <LOCKID>: bad api args > > Reported-by: Coly Li <coyli at suse.de> > Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> > --- > fs/ocfs2/dlm/dlmmaster.c | 9 ++++++++- > 1 files changed, 8 insertions(+), 1 deletions(-) > > diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c > index cbf3abe..54e182a 100644 > --- a/fs/ocfs2/dlm/dlmmaster.c > +++ b/fs/ocfs2/dlm/dlmmaster.c > @@ -732,14 +732,21 @@ lookup: > if (tmpres) { > int dropping_ref = 0; > > + spin_unlock(&dlm->spinlock); > + > spin_lock(&tmpres->spinlock); > + /* We wait for the other thread that is mastering the resource */ > + if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { > + __dlm_wait_on_lockres(tmpres); > + BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); > + } > + > if (tmpres->owner == dlm->node_num) { > BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); > dlm_lockres_grab_inflight_ref(dlm, tmpres); > } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) > dropping_ref = 1; > spin_unlock(&tmpres->spinlock); > - spin_unlock(&dlm->spinlock); > > /* wait until done messaging the master, drop our ref to allow > * the lockres to be purged, start over. */-- Coly Li SuSE PRC Labs