This is the first drop of the global heartbeat patches for ocfs2/kernel. The first few patches add support for heartbeat mode in sysfs, the new incompat clusterinfo flag and the new mount option heartbeat=global. 0001-ocfs2-cluster-Add-heartbeat-mode-configfs-parameter.patch 0002-ocfs2-Add-an-incompat-feature-flag-OCFS2_FEATURE_INC.patch 0003-ocfs2-Add-support-for-heartbeat-global-mount-option.patch 0004-ocfs2-dlm-Expose-dlm_protocol-in-dlm_state.patch The next few patches enhance the join domain protocol to get the list of configured nodes and heartbeating regions to ensure that all nodes in the cluster have the same view of the cluster. 0005-ocfs2-cluster-Get-all-heartbeat-regions.patch 0006-ocfs2-dlm-Add-message-DLM_QUERY_HBREGION.patch 0007-ocfs2-Print-message-if-user-mounts-without-starting-.patch 0008-ocfs2-dlm-Add-message-DLM_QUERY_NODEINFO.patch The one known missing bit concerns quorum calculation. I am still working on it. http://oss.oracle.com/osswiki/OCFS2/DesignDocs/NewGlobalHeartbeat Thanks Sunil
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 1/8] ocfs2/cluster: Add heartbeat mode configfs parameter
Add heartbeat mode parameter to the configfs tree. This will be used to set/show the heartbeat mode. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/cluster/heartbeat.c | 70 ++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 70 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index d191f45..1107629 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -76,7 +76,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); #define O2HB_DEFAULT_BLOCK_BITS 9 +enum o2hb_heartbeat_modes { + O2HB_HEARTBEAT_LOCAL = 0, + O2HB_HEARTBEAT_GLOBAL, + O2HB_HEARTBEAT_NUM_MODES, +}; + +char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { + "local", /* O2HB_HEARTBEAT_LOCAL */ + "global", /* O2HB_HEARTBEAT_GLOBAL */ +}; + unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; +unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; /* Only sets a new threshold if there are no active regions. * @@ -93,6 +105,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold) } } +static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) +{ + int ret = -1; + + if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) { + spin_lock(&o2hb_live_lock); + if (list_empty(&o2hb_all_regions)) { + o2hb_heartbeat_mode = hb_mode; + ret = 0; + } + spin_unlock(&o2hb_live_lock); + } + + return ret; +} + struct o2hb_node_event { struct list_head hn_item; enum o2hb_callback_type hn_event_type; @@ -1694,6 +1722,39 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group return count; } +static +ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, + char *page) +{ + return sprintf(page, "%s\n", + o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); +} + +static +ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, + const char *page, size_t count) +{ + unsigned int i; + int ret; + size_t len; + + len = (page[count - 1] == '\n') ? count - 1 : count; + + for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) { + if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) + continue; + + ret = o2hb_global_hearbeat_mode_set(i); + if (!ret) + printk(KERN_INFO "ocfs2: Heartbeat mode set to %s\n", + o2hb_heartbeat_mode_desc[i]); + return count; + } + + return -EINVAL; + +} + static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { .attr = { .ca_owner = THIS_MODULE, .ca_name = "dead_threshold", @@ -1702,8 +1763,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold .store = o2hb_heartbeat_group_threshold_store, }; +static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { + .attr = { .ca_owner = THIS_MODULE, + .ca_name = "mode", + .ca_mode = S_IRUGO | S_IWUSR }, + .show = o2hb_heartbeat_group_mode_show, + .store = o2hb_heartbeat_group_mode_store, +}; + static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { &o2hb_heartbeat_group_attr_threshold.attr, + &o2hb_heartbeat_group_attr_mode.attr, NULL, }; -- 1.7.0.4
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 2/8] ocfs2: Add an incompat feature flag OCFS2_FEATURE_INCOMPAT_CLUSTERINFO
OCFS2_FEATURE_INCOMPAT_CLUSTERINFO allows us to use sb->s_cluster_info for both userspace and o2cb cluster stacks. It also allows us to extend cluster info to include stack flags. This patch also adds stackflags to sb->s_clusterinfo. It also introduces a clusterinfo flag OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT to denote the enabled global heartbeat mode. This incompat flag can be set/cleared using tunefs.ocfs2 --fs-features. The clusterinfo flag is set/cleared using tunefs.ocfs2 --update-cluster-stack. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/ocfs2.h | 31 +++++++++++++++++++++++++++++-- fs/ocfs2/ocfs2_fs.h | 40 ++++++++++++++++++++++++++++++++++------ fs/ocfs2/super.c | 4 +++- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 5a3d08d..259015a 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -366,6 +366,8 @@ struct ocfs2_super struct ocfs2_alloc_stats alloc_stats; char dev_str[20]; /* "major,minor" of the device */ + u8 osb_stackflags; + char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; struct ocfs2_cluster_connection *cconn; struct ocfs2_lock_res osb_super_lockres; @@ -592,10 +594,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) return ret; } -static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) +static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) { return (osb->s_feature_incompat & - OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); + (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | + OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); +} + +static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) +{ + if (ocfs2_clusterinfo_valid(osb) && + memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, + OCFS2_STACK_LABEL_LEN)) + return 1; + return 0; +} + +static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) +{ + if (ocfs2_clusterinfo_valid(osb) && + !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, + OCFS2_STACK_LABEL_LEN)) + return 1; + return 0; +} + +static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) +{ + return ocfs2_o2cb_stack(osb) && + (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); } static inline int ocfs2_mount_local(struct ocfs2_super *osb) diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index bb37218..c936cf0 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -100,7 +100,8 @@ | OCFS2_FEATURE_INCOMPAT_XATTR \ | OCFS2_FEATURE_INCOMPAT_META_ECC \ | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ - | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) + | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ + | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) @@ -166,6 +167,13 @@ #define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000 /* + * Incompat bit to indicate useable clusterinfo with stackflags for all + * cluster stacks (userspace adnd o2cb). If this bit is set, + * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set. + */ +#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x2000 + +/* * backup superblock flag is used to indicate that this volume * has backup superblocks. */ @@ -275,10 +283,13 @@ #define OCFS2_VOL_UUID_LEN 16 #define OCFS2_MAX_VOL_LABEL_LEN 64 -/* The alternate, userspace stack fields */ +/* The cluster stack fields */ #define OCFS2_STACK_LABEL_LEN 4 #define OCFS2_CLUSTER_NAME_LEN 16 +/* Classic (historically speaking) cluster stack */ +#define OCFS2_CLASSIC_CLUSTER_STACK "o2cb" + /* Journal limits (in bytes) */ #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) @@ -296,6 +307,11 @@ */ #define OCFS2_MIN_XATTR_INLINE_SIZE 256 +/* + * Cluster info flags (ocfs2_cluster_info.ci_stackflags) + */ +#define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01) + struct ocfs2_system_inode_info { char *si_name; int si_iflags; @@ -554,9 +570,21 @@ struct ocfs2_slot_map_extended { */ }; +/* + * ci_stackflags is only valid if the incompat bit + * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set. + */ struct ocfs2_cluster_info { /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; - __le32 ci_reserved; + union { + __le32 ci_reserved; + struct { + __u8 ci_reserved1; + __u8 ci_reserved2; + __u8 ci_reserved3; + __u8 ci_stackflags; + }; + }; /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; /*18*/ }; @@ -593,9 +621,9 @@ struct ocfs2_super_block { * group header */ /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ -/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace - stack. Only valid - with INCOMPAT flag. */ +/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either + userspace or clusterinfo + INCOMPAT flag set. */ /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size for this fs*/ __le16 s_reserved0; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7dfed85..6ecdc07 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2162,7 +2162,9 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - if (ocfs2_userspace_stack(osb)) { + if (ocfs2_clusterinfo_valid(osb)) { + osb->osb_stackflags + OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; memcpy(osb->osb_cluster_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, OCFS2_STACK_LABEL_LEN); -- 1.7.0.4
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 3/8] ocfs2: Add support for heartbeat=global mount option
Adds support for heartbeat=global mount option. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/ocfs2.h | 4 ++- fs/ocfs2/ocfs2_fs.h | 1 + fs/ocfs2/super.c | 55 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 259015a..db96bbd 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -249,7 +249,7 @@ enum ocfs2_local_alloc_state enum ocfs2_mount_options { - OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ + OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ @@ -262,6 +262,8 @@ enum ocfs2_mount_options control lists */ OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ + OCFS2_MOUNT_HB_NONE = 1 << 12, /* No heartbeat */ + OCFS2_MOUNT_HB_GLOBAL = 1 << 13, /* Global heartbeat */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index c936cf0..e5507d5 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -367,6 +367,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { /* Parameter passed from mount.ocfs2 to module */ #define OCFS2_HB_NONE "heartbeat=none" #define OCFS2_HB_LOCAL "heartbeat=local" +#define OCFS2_HB_GLOBAL "heartbeat=global" /* * OCFS2 directory file types. Only the low 3 bits are used. The diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 6ecdc07..1e280eb 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -169,6 +169,7 @@ enum { Opt_nointr, Opt_hb_none, Opt_hb_local, + Opt_hb_global, Opt_data_ordered, Opt_data_writeback, Opt_atime_quantum, @@ -202,6 +203,7 @@ static const match_table_t tokens = { {Opt_nointr, "nointr"}, {Opt_hb_none, OCFS2_HB_NONE}, {Opt_hb_local, OCFS2_HB_LOCAL}, + {Opt_hb_global, OCFS2_HB_GLOBAL}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, {Opt_atime_quantum, "atime_quantum=%u"}, @@ -621,6 +623,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) int ret = 0; struct mount_options parsed_options; struct ocfs2_super *osb = OCFS2_SB(sb); + u32 tmp; lock_kernel(); @@ -630,8 +633,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto out; } - if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !- (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE; + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); goto out; @@ -824,23 +828,29 @@ bail: static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) { - if (ocfs2_mount_local(osb)) { - if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; + + if (osb->s_mount_opt & hb_enabled) { + if (ocfs2_mount_local(osb)) { mlog(ML_ERROR, "Cannot heartbeat on a locally " "mounted device.\n"); return -EINVAL; } - } - - if (ocfs2_userspace_stack(osb)) { - if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + if (ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Userspace stack expected, but " "o2cb heartbeat arguments passed to mount\n"); return -EINVAL; } + if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && + !ocfs2_cluster_o2cb_global_heartbeat(osb)) || + ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && + ocfs2_cluster_o2cb_global_heartbeat(osb))) { + mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); + return -EINVAL; + } } - if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { + if (!(osb->s_mount_opt & hb_enabled)) { if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && !ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Heartbeat has to be started to mount " @@ -1319,6 +1329,7 @@ static int ocfs2_parse_options(struct super_block *sb, { int status; char *p; + u32 tmp; mlog_entry("remount: %d, options: \"%s\"\n", is_remount, options ? options : "(none)"); @@ -1349,7 +1360,10 @@ static int ocfs2_parse_options(struct super_block *sb, mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; break; case Opt_hb_none: - mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; + mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; + break; + case Opt_hb_global: + mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; break; case Opt_barrier: if (match_int(&args[0], &option)) { @@ -1489,6 +1503,15 @@ static int ocfs2_parse_options(struct super_block *sb, } } + /* Ensure only one heartbeat mode */ + tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE); + if (hweight32(tmp) != 1) { + mlog(ML_ERROR, "Invalid heartbeat mount option: %s\n", options); + status = 0; + goto bail; + } + status = 1; bail: @@ -1502,10 +1525,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) unsigned long opts = osb->s_mount_opt; unsigned int local_alloc_megs; - if (opts & OCFS2_MOUNT_HB_LOCAL) - seq_printf(s, ",_netdev,heartbeat=local"); - else - seq_printf(s, ",heartbeat=none"); + if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { + seq_printf(s, ",_netdev"); + if (opts & OCFS2_MOUNT_HB_LOCAL) + seq_printf(s, ",%s", OCFS2_HB_LOCAL); + else + seq_printf(s, ",%s", OCFS2_HB_GLOBAL); + } else + seq_printf(s, ",%s", OCFS2_HB_NONE); if (opts & OCFS2_MOUNT_NOINTR) seq_printf(s, ",nointr"); -- 1.7.0.4
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 4/8] ocfs2/dlm: Expose dlm_protocol in dlm_state
Add dlm_protocol to the list of info shown by the debugfs file, dlm_state. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmdebug.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 75efd45..cf27d81 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -779,7 +779,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ out += snprintf(db->buf + out, db->len - out, - "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); + "Domain: %s Key: 0x%08x Protocol: %d.%d\n", + dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor); /* Thread Pid: xxx Node: xxx State: xxxxx */ out += snprintf(db->buf + out, db->len - out, -- 1.7.0.4
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 5/8] ocfs2/cluster: Get all heartbeat regions
Export function in o2hb to get a list of heartbeat regions. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/cluster/heartbeat.c | 34 ++++++++++++++++++++++++++++++++++ fs/ocfs2/cluster/heartbeat.h | 4 ++++ 2 files changed, 38 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 1107629..00a7fd6 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1629,6 +1629,9 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g if (reg == NULL) return ERR_PTR(-ENOMEM); + if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + config_item_init_type_name(®->hr_item, name, &o2hb_region_type); spin_lock(&o2hb_live_lock); @@ -2039,3 +2042,34 @@ void o2hb_stop_all_regions(void) spin_unlock(&o2hb_live_lock); } EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); + +int o2hb_get_all_regions(char *region_uuids, u8 max_regions) +{ + struct o2hb_region *reg; + int numregs = 0; + char *p; + + spin_lock(&o2hb_live_lock); + + p = region_uuids; + list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + mlog(ML_NOTICE, "Region: %s\n", config_item_name(®->hr_item)); + if (numregs < max_regions) { + memcpy(p, config_item_name(®->hr_item), + O2HB_MAX_REGION_NAME_LEN); + p += O2HB_MAX_REGION_NAME_LEN; + } + numregs++; + } + + spin_unlock(&o2hb_live_lock); + + return numregs; +} +EXPORT_SYMBOL_GPL(o2hb_get_all_regions); + +int o2hb_global_heartbeat_active(void) +{ + return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL); +} +EXPORT_SYMBOL(o2hb_global_heartbeat_active); diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 2f16492..00ad8e8 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -31,6 +31,8 @@ #define O2HB_REGION_TIMEOUT_MS 2000 +#define O2HB_MAX_REGION_NAME_LEN 32 + /* number of changes to be seen as live */ #define O2HB_LIVE_THRESHOLD 2 /* number of equal samples to be seen as dead */ @@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num); int o2hb_check_local_node_heartbeating(void); void o2hb_stop_all_regions(void); +int o2hb_get_all_regions(char *region_uuids, u8 numregions); +int o2hb_global_heartbeat_active(void); #endif /* O2CLUSTER_HEARTBEAT_H */ -- 1.7.0.4
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 6/8] ocfs2/dlm: Add message DLM_QUERY_HBREGION
Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/cluster/ocfs2_nodemanager.h | 2 + fs/ocfs2/dlm/dlmcommon.h | 12 ++- fs/ocfs2/dlm/dlmdomain.c | 222 +++++++++++++++++++++++++++++++++- 3 files changed, 234 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h index 5b9854b..1829c01 100644 --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h @@ -36,4 +36,6 @@ /* host name, group name, cluster name all 64 bytes */ #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN +#define O2NM_MAX_HBREGIONS 16 + #endif /* _OCFS2_NODEMANAGER_H */ diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index a13292a..2c05138 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -445,7 +445,8 @@ enum { DLM_LOCK_REQUEST_MSG, /* 515 */ DLM_RECO_DATA_DONE_MSG, /* 516 */ DLM_BEGIN_RECO_MSG, /* 517 */ - DLM_FINALIZE_RECO_MSG /* 518 */ + DLM_FINALIZE_RECO_MSG, /* 518 */ + DLM_QUERY_HBREGION, /* 519 */ }; struct dlm_reco_node_data @@ -727,6 +728,15 @@ struct dlm_cancel_join u8 domain[O2NM_MAX_NAME_LEN]; }; +struct dlm_query_hbregion { + u8 qhb_node; + u8 qhb_numregions; + u8 qhb_namelen; + u8 pad1; + u8 qhb_domain[O2NM_MAX_NAME_LEN]; + u8 qhb_hbregions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_HBREGIONS]; +}; + struct dlm_exit_domain { u8 node_idx; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 2408b9f..3521a00 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -128,10 +128,13 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); * will have a negotiated version with the same major number and a minor * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should * be used to determine what a running domain is actually using. + * + * New in version 1.1: + * - Message DLM_QUERY_HBREGION added to support global heartbeat */ static const struct dlm_protocol_version dlm_protocol = { .pv_major = 1, - .pv_minor = 0, + .pv_minor = 1, }; #define DLM_DOMAIN_BACKOFF_MS 200 @@ -142,6 +145,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); +static int dlm_query_hbregion_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data); static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); static int dlm_protocol_compare(struct dlm_protocol_version *existing, @@ -918,6 +923,205 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, return 0; } +static int dlm_match_hbregions(struct dlm_ctxt *dlm, + struct dlm_query_hbregion *qhb) +{ + char *local = NULL, *remote = qhb->qhb_hbregions; + char *l, *r; + int localnr, i, j, foundit; + int status = 0; + + if (!o2hb_global_heartbeat_active()) { + if (qhb->qhb_numregions) { + mlog(ML_ERROR, "Domain %s: Joining node %d has global " + "heartbeat enabled but local node %d does not\n", + qhb->qhb_domain, qhb->qhb_node, dlm->node_num); + status = -EINVAL; + } + goto bail; + } + + if (o2hb_global_heartbeat_active() && !qhb->qhb_numregions) { + mlog(ML_ERROR, "Domain %s: Local node %d has global " + "heartbeat enabled but joining node %d does not\n", + qhb->qhb_domain, dlm->node_num, qhb->qhb_node); + status = -EINVAL; + goto bail; + } + + r = remote; + for (i = 0; i < qhb->qhb_numregions; ++i) { + mlog(ML_NOTICE, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); + r += O2HB_MAX_REGION_NAME_LEN; + } + + local = kmalloc(sizeof(qhb->qhb_hbregions), GFP_KERNEL); + if (!local) { + status = -ENOMEM; + goto bail; + } + + localnr = o2hb_get_all_regions(local, O2NM_MAX_HBREGIONS); + + /* compare local regions with remote */ + l = local; + for (i = 0; i < localnr; ++i) { + foundit = 0; + r = remote; + for (j = 0; j <= qhb->qhb_numregions; ++j) { + if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { + foundit = 1; + break; + } + r += O2HB_MAX_REGION_NAME_LEN; + } + if (!foundit) { + status = -EINVAL; + mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " + "in local node %d but not in joining node %d\n", + qhb->qhb_domain, O2HB_MAX_REGION_NAME_LEN, l, + dlm->node_num, qhb->qhb_node); + goto bail; + } + l += O2HB_MAX_REGION_NAME_LEN; + } + + /* compare remote with local regions */ + r = remote; + for (i = 0; i < qhb->qhb_numregions; ++i) { + foundit = 0; + l = local; + for (j = 0; j < localnr; ++j) { + if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { + foundit = 1; + break; + } + l += O2HB_MAX_REGION_NAME_LEN; + } + if (!foundit) { + status = -EINVAL; + mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " + "in joining node %d but not in local node %d\n", + qhb->qhb_domain, O2HB_MAX_REGION_NAME_LEN, r, + qhb->qhb_node, dlm->node_num); + goto bail; + } + r += O2HB_MAX_REGION_NAME_LEN; + } + +bail: + kfree(local); + + return status; +} + +static int dlm_send_hbregions(struct dlm_ctxt *dlm, unsigned long *node_map) +{ + struct dlm_query_hbregion *qhb = NULL; + int status, ret = 0, i; + char *p; + + if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + goto bail; + + qhb = kmalloc(sizeof(struct dlm_query_hbregion), GFP_KERNEL); + if (!qhb) { + ret = -ENOMEM; + mlog_errno(ret); + goto bail; + } + + memset(qhb, 0, sizeof(struct dlm_query_hbregion)); + + qhb->qhb_node = dlm->node_num; + qhb->qhb_namelen = strlen(dlm->name); + memcpy(qhb->qhb_domain, dlm->name, qhb->qhb_namelen); + /* if local hb, the numregions will be zero */ + if (o2hb_global_heartbeat_active()) + qhb->qhb_numregions = o2hb_get_all_regions(qhb->qhb_hbregions, + O2NM_MAX_HBREGIONS); + + p = qhb->qhb_hbregions; + for (i = 0; i < qhb->qhb_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) + mlog(ML_NOTICE, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); + + i = -1; + while ((i = find_next_bit(node_map, O2NM_MAX_NODES, + i + 1)) < O2NM_MAX_NODES) { + if (i == dlm->node_num) + continue; + + mlog(ML_NOTICE, "Sending hbregion to node %d\n", i); + + ret = o2net_send_message(DLM_QUERY_HBREGION, DLM_MOD_KEY, qhb, + sizeof(struct dlm_query_hbregion), + i, &status); + if (ret >= 0) + ret = status; + if (ret) { + mlog(ML_ERROR, "hbregion mismatch %d, node %d\n", + ret, i); + break; + } + } + +bail: + kfree(qhb); + return ret; +} + +static int dlm_query_hbregion_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data) +{ + struct dlm_query_hbregion *qhb; + struct dlm_ctxt *dlm = NULL; + int status = 0; + int locked = 0; + + qhb = (struct dlm_query_hbregion *) msg->buf; + + mlog(ML_NOTICE, "Node %u queries hb regions on domain %s\n", + qhb->qhb_node, qhb->qhb_domain); + + status = -EINVAL; + + spin_lock(&dlm_domain_lock); + dlm = __dlm_lookup_domain_full(qhb->qhb_domain, qhb->qhb_namelen); + if (!dlm) { + mlog(ML_ERROR, "Node %d queried hb regions on domain %s " + "before join domain\n", qhb->qhb_node, qhb->qhb_domain); + goto bail; + } + + spin_lock(&dlm->spinlock); + locked = 1; + if (dlm->joining_node != qhb->qhb_node) { + mlog(ML_ERROR, "Node %d queried hb regions on domain %s " + "but joining node is %d\n", qhb->qhb_node, qhb->qhb_domain, + dlm->joining_node); + goto bail; + } + + /* Support for global heartbeat was added in 1.1 */ + if (dlm->dlm_locking_proto.pv_major == 1 && + dlm->dlm_locking_proto.pv_minor == 0) { + mlog(ML_ERROR, "Node %d queried hb regions on domain %s " + "but active dlm protocol is %d.%d\n", qhb->qhb_node, + qhb->qhb_domain, dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor); + goto bail; + } + + status = dlm_match_hbregions(dlm, qhb); + +bail: + if (locked) + spin_unlock(&dlm->spinlock); + spin_unlock(&dlm_domain_lock); + + return status; +} + static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data) { @@ -1233,6 +1437,15 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) set_bit(dlm->node_num, dlm->domain_map); spin_unlock(&dlm->spinlock); + /* Support for global heartbeat was added in 1.1 */ + if (dlm_protocol.pv_major >= 1 && dlm_protocol.pv_minor > 0) { + status = dlm_send_hbregions(dlm, ctxt->yes_resp_map); + if (status) { + mlog_errno(status); + goto bail; + } + } + dlm_send_join_asserts(dlm, ctxt->yes_resp_map); /* Joined state *must* be set before the joining node @@ -1799,6 +2012,13 @@ static int dlm_register_net_handlers(void) sizeof(struct dlm_cancel_join), dlm_cancel_join_handler, NULL, NULL, &dlm_join_handlers); + if (status) + goto bail; + + status = o2net_register_handler(DLM_QUERY_HBREGION, DLM_MOD_KEY, + sizeof(struct dlm_query_hbregion), + dlm_query_hbregion_handler, + NULL, NULL, &dlm_join_handlers); bail: if (status < 0) -- 1.7.0.4
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 7/8] ocfs2: Print message if user mounts without starting global heartbeat
In global heartbeat mode, the heartbeat is started by the user. This patch prints an error if the user attempts to mount a volume without starting the heartbeat. Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/stack_o2cb.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 7020e12..4b14b5b 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -282,6 +282,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) /* for now we only have one cluster/node, make sure we see it * in the heartbeat universe */ if (!o2hb_check_local_node_heartbeating()) { + if (o2hb_global_heartbeat_active()) + mlog(ML_ERROR, "Global heartbeat not started\n"); rc = -EINVAL; goto out; } -- 1.7.0.4
Sunil Mushran
2010-Jul-23 23:55 UTC
[Ocfs2-devel] [PATCH 8/8] ocfs2/dlm: Add message DLM_QUERY_NODEINFO
Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> --- fs/ocfs2/dlm/dlmcommon.h | 17 ++++ fs/ocfs2/dlm/dlmdomain.c | 188 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 204 insertions(+), 1 deletions(-) diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 2c05138..34d9cd8 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -447,6 +447,7 @@ enum { DLM_BEGIN_RECO_MSG, /* 517 */ DLM_FINALIZE_RECO_MSG, /* 518 */ DLM_QUERY_HBREGION, /* 519 */ + DLM_QUERY_NODEINFO, /* 520 */ }; struct dlm_reco_node_data @@ -737,6 +738,22 @@ struct dlm_query_hbregion { u8 qhb_hbregions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_HBREGIONS]; }; +struct dlm_node_info { + u8 ni_nodenum; + u8 pad1; + u16 ni_ipv4_port; + u32 ni_ipv4_address; +}; + +struct dlm_query_nodeinfo { + u8 qn_nodenum; + u8 qn_numnodes; + u8 qn_namelen; + u8 pad1; + u8 qn_domain[O2NM_MAX_NAME_LEN]; + struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; +}; + struct dlm_exit_domain { u8 node_idx; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3521a00..2325087 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -131,6 +131,7 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); * * New in version 1.1: * - Message DLM_QUERY_HBREGION added to support global heartbeat + * - Message DLM_QUERY_NODEINFO added to allow online node removes */ static const struct dlm_protocol_version dlm_protocol = { .pv_major = 1, @@ -1122,6 +1123,179 @@ bail: return status; } +static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) +{ + struct o2nm_node *local; + struct dlm_node_info *remote; + int i, j; + int status = 0; + + for (j = 0; j < qn->qn_numnodes; ++j) + mlog(ML_NOTICE, "Node %3d, %u.%u.%u.%u:%u\n", + qn->qn_nodes[j].ni_nodenum, + NIPQUAD(qn->qn_nodes[j].ni_ipv4_address), + ntohs(qn->qn_nodes[j].ni_ipv4_port)); + + for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { + local = o2nm_get_node_by_num(i); + remote = NULL; + for (j = 0; j < qn->qn_numnodes; ++j) { + if (qn->qn_nodes[j].ni_nodenum == i) { + remote = &(qn->qn_nodes[j]); + break; + } + } + + if (!local && !remote) + continue; + + if ((local && !remote) || (!local && remote)) + status = -EINVAL; + + if (!status && + ((remote->ni_nodenum != local->nd_num) || + (remote->ni_ipv4_port != local->nd_ipv4_port) || + (remote->ni_ipv4_address != local->nd_ipv4_address))) + status = -EINVAL; + + if (status) { + if (remote && !local) + mlog(ML_ERROR, "Domain %s: Node %d " + "(%u.%u.%u.%u:%u) registered in joining " + "node %d but not in local node %d\n", + qn->qn_domain, remote->ni_nodenum, + NIPQUAD(remote->ni_ipv4_address), + ntohs(remote->ni_ipv4_port), + qn->qn_nodenum, dlm->node_num); + if (local && !remote) + mlog(ML_ERROR, "Domain %s: Node %d " + "(%u.%u.%u.%u:%u) registered in local " + "node %d but not in joining node %d\n", + qn->qn_domain, local->nd_num, + NIPQUAD(local->nd_ipv4_address), + ntohs(local->nd_ipv4_port), + dlm->node_num, qn->qn_nodenum); + BUG_ON((!local && !remote)); + } + + if (local) + o2nm_node_put(local); + } + + return status; +} + +static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) +{ + struct dlm_query_nodeinfo *qn = NULL; + struct o2nm_node *node; + int ret = 0, status, count, i; + + if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + goto bail; + + qn = kmalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); + if (!qn) { + ret = -ENOMEM; + mlog_errno(ret); + goto bail; + } + + memset(qn, 0, sizeof(struct dlm_query_nodeinfo)); + + for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { + node = o2nm_get_node_by_num(i); + if (!node) + continue; + qn->qn_nodes[count].ni_nodenum = node->nd_num; + qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; + qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; + mlog(ML_NOTICE, "Node %3d, %u.%u.%u.%u:%u\n", node->nd_num, + NIPQUAD(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); + ++count; + o2nm_node_put(node); + } + + qn->qn_nodenum = dlm->node_num; + qn->qn_numnodes = count; + qn->qn_namelen = strlen(dlm->name); + memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); + + i = -1; + while ((i = find_next_bit(node_map, O2NM_MAX_NODES, + i + 1)) < O2NM_MAX_NODES) { + if (i == dlm->node_num) + continue; + + mlog(ML_NOTICE, "Sending nodeinfo to node %d\n", i); + + ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, + qn, sizeof(struct dlm_query_nodeinfo), + i, &status); + if (ret >= 0) + ret = status; + if (ret) { + mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); + break; + } + } + +bail: + kfree(qn); + return ret; +} + +static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data) +{ + struct dlm_query_nodeinfo *qn; + struct dlm_ctxt *dlm = NULL; + int locked = 0, status = 0; + + qn = (struct dlm_query_nodeinfo *) msg->buf; + + mlog(ML_NOTICE, "Node %u queries nodes on domain %s\n", + qn->qn_nodenum, qn->qn_domain); + + status = -EINVAL; + + spin_lock(&dlm_domain_lock); + dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); + if (!dlm) { + mlog(ML_ERROR, "Node %d queried nodes on domain %s before " + "join domain\n", qn->qn_nodenum, qn->qn_domain); + goto bail; + } + + spin_lock(&dlm->spinlock); + locked = 1; + if (dlm->joining_node != qn->qn_nodenum) { + mlog(ML_ERROR, "Node %d queried nodes on domain %s but " + "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, + dlm->joining_node); + goto bail; + } + + /* Support for node query was added in 1.1 */ + if (dlm->dlm_locking_proto.pv_major == 1 && + dlm->dlm_locking_proto.pv_minor == 0) { + mlog(ML_ERROR, "Node %d queried nodes on domain %s " + "but active dlm protocol is %d.%d\n", qn->qn_nodenum, + qn->qn_domain, dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor); + goto bail; + } + + status = dlm_match_nodes(dlm, qn); + +bail: + if (locked) + spin_unlock(&dlm->spinlock); + spin_unlock(&dlm_domain_lock); + + return status; +} + static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data) { @@ -1437,8 +1611,13 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) set_bit(dlm->node_num, dlm->domain_map); spin_unlock(&dlm->spinlock); - /* Support for global heartbeat was added in 1.1 */ + /* Support for global heartbeat and node info was added in 1.1 */ if (dlm_protocol.pv_major >= 1 && dlm_protocol.pv_minor > 0) { + status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); + if (status) { + mlog_errno(status); + goto bail; + } status = dlm_send_hbregions(dlm, ctxt->yes_resp_map); if (status) { mlog_errno(status); @@ -2020,6 +2199,13 @@ static int dlm_register_net_handlers(void) dlm_query_hbregion_handler, NULL, NULL, &dlm_join_handlers); + if (status) + goto bail; + + status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, + sizeof(struct dlm_query_nodeinfo), + dlm_query_nodeinfo_handler, + NULL, NULL, &dlm_join_handlers); bail: if (status < 0) dlm_unregister_net_handlers(); -- 1.7.0.4
Hi Sunil, The global heartbeat also introduce a difference comparing with local heart. With the ghb, what if the non-heartbeat ocfs2 volume(s) fail(s)? Say some lower layer(raid/disk-driver) become unhappy to work anymore. In case, no failover since no self-fence I think. So it could cause the domain in question unavailable(finally), isn't it? With the original lhb scheme, no such problem. Is there a solution? regards, wengang. On 10-07-23 16:55, Sunil Mushran wrote:> > This is the first drop of the global heartbeat patches for ocfs2/kernel. > > The first few patches add support for heartbeat mode in sysfs, the new > incompat clusterinfo flag and the new mount option heartbeat=global. > > 0001-ocfs2-cluster-Add-heartbeat-mode-configfs-parameter.patch > 0002-ocfs2-Add-an-incompat-feature-flag-OCFS2_FEATURE_INC.patch > 0003-ocfs2-Add-support-for-heartbeat-global-mount-option.patch > 0004-ocfs2-dlm-Expose-dlm_protocol-in-dlm_state.patch > > The next few patches enhance the join domain protocol to get the list > of configured nodes and heartbeating regions to ensure that all nodes > in the cluster have the same view of the cluster. > > 0005-ocfs2-cluster-Get-all-heartbeat-regions.patch > 0006-ocfs2-dlm-Add-message-DLM_QUERY_HBREGION.patch > 0007-ocfs2-Print-message-if-user-mounts-without-starting-.patch > 0008-ocfs2-dlm-Add-message-DLM_QUERY_NODEINFO.patch > > The one known missing bit concerns quorum calculation. I am still > working on it. > > http://oss.oracle.com/osswiki/OCFS2/DesignDocs/NewGlobalHeartbeat > > Thanks > Sunil > > _______________________________________________ > Ocfs2-devel mailing list > Ocfs2-devel at oss.oracle.com > http://oss.oracle.com/mailman/listinfo/ocfs2-devel
Wengang Wang
2010-Jul-29 06:49 UTC
[Ocfs2-devel] [PATCH 8/8] ocfs2/dlm: Add message DLM_QUERY_NODEINFO
On 10-07-23 16:55, Sunil Mushran wrote:> Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com> > --- > fs/ocfs2/dlm/dlmcommon.h | 17 ++++ > fs/ocfs2/dlm/dlmdomain.c | 188 +++++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 204 insertions(+), 1 deletions(-) > > + for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { > + local = o2nm_get_node_by_num(i); > + remote = NULL; > + for (j = 0; j < qn->qn_numnodes; ++j) { > + if (qn->qn_nodes[j].ni_nodenum == i) { > + remote = &(qn->qn_nodes[j]); > + break; > + } > + } > + > + if (!local && !remote) > + continue; > + > + if ((local && !remote) || (!local && remote)) > + status = -EINVAL; > + > + if (!status && > + ((remote->ni_nodenum != local->nd_num) ||ni_nodenum already checked in the for loop.> + (remote->ni_ipv4_port != local->nd_ipv4_port) || > + (remote->ni_ipv4_address != local->nd_ipv4_address))) > + status = -EINVAL; > +> +static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) > +{ > + struct dlm_query_nodeinfo *qn = NULL; > + struct o2nm_node *node; > + int ret = 0, status, count, i; > + > + if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) > + goto bail; > + > + qn = kmalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); > + if (!qn) { > + ret = -ENOMEM; > + mlog_errno(ret); > + goto bail; > + } > + > + memset(qn, 0, sizeof(struct dlm_query_nodeinfo));Any reason we are not using kzalloc() here and in dlm_send_hbregions()?> + for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { > + node = o2nm_get_node_by_num(i); > + if (!node) > + continue; > + qn->qn_nodes[count].ni_nodenum = node->nd_num; > + qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; > +> +static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, > + void *data, void **ret_data) > +{ > + struct dlm_query_nodeinfo *qn; > + struct dlm_ctxt *dlm = NULL; > + int locked = 0, status = 0; > + > + qn = (struct dlm_query_nodeinfo *) msg->buf; > + > + mlog(ML_NOTICE, "Node %u queries nodes on domain %s\n", > + qn->qn_nodenum, qn->qn_domain); > + > + status = -EINVAL;how about remove this line and + int locked = 0, status = -EINVAL; regards, wengang.> + > + spin_lock(&dlm_domain_lock); > + dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); > + if (!dlm) { > + mlog(ML_ERROR, "Node %d queried nodes on domain %s before " > + "join domain\n", qn->qn_nodenum, qn->qn_domain); > + goto bail; > + } > +
Sunil Mushran
2010-Jul-29 18:59 UTC
[Ocfs2-devel] [PATCH 8/8] ocfs2/dlm: Add message DLM_QUERY_NODEINFO
On 07/28/2010 11:49 PM, Wengang Wang wrote:> ni_nodenum already checked in the for loop. >For readability.> Any reason we are not using kzalloc() here and in dlm_send_hbregions()? >ok.> how about remove this line and > > + int locked = 0, status = -EINVAL; >ok.
Wengang Wang
2010-Jul-30 01:55 UTC
[Ocfs2-devel] [PATCH 6/8] ocfs2/dlm: Add message DLM_QUERY_HBREGION
Got your idea. Thanks for explaination! regards, wengang. On 10-07-29 10:22, Sunil Mushran wrote:> On 07/28/2010 10:03 PM, Wengang Wang wrote: > >Yes, I can see what you are doing there. > >But you are comparing twice. I was emphasising "again" :) > >The bhregions is like a collection. we say collection A is equal to > >collection B, it can mean the number is equal, and all elements in > >collection A are all in collection B. So no need to compare each region > >again. > > Sorry, my example was not correct. What if node sends regions A, B, A > while the receiving node expects A, B, C. It should not happen. > But considering we rely on the hb regions being consistent, we can > never be too careful. > > Agreed, I can come up with another scheme. But here the max > is small. And in such cases, brute force typically works best.