Jeff Mahoney
2006-Jan-09 22:39 UTC
[Ocfs2-devel] [PATCH 07/11] ocfs2: allow per-resource node status queries
This patch allows callers of o2hb_fill_node_map* and o2hb_check_{,local_}node_heartbeating* to specify a particular heartbeat resource to find status on. If the query is just to discover if the node is up *at all*, NULL can be specified. The disk heartbeat currently doesn't make a distinction between different resources and simply maintains a unified node map. The userspace heartbeat resource will allow distinct per-resource membership, so this is needed to lay the groundwork. The actual file system now queries the UUID sooner and uses it to discover membership in the resource for that UUID. fs/ocfs2/cluster/disk_heartbeat.c | 3 + fs/ocfs2/cluster/heartbeat.c | 58 ++++++++++++++++++++++++++++------ fs/ocfs2/cluster/heartbeat.h | 24 +++++++++++--- fs/ocfs2/cluster/nodemanager.c | 13 ------- fs/ocfs2/cluster/nodemanager.h | 12 +++++++ fs/ocfs2/cluster/tcp.c | 2 - fs/ocfs2/dlm/dlmdomain.c | 7 ++-- fs/ocfs2/heartbeat.c | 21 ++++++++++-- fs/ocfs2/heartbeat.h | 2 - fs/ocfs2/ocfs2.h | 1 fs/ocfs2/super.c | 64 ++++++++++++++++++++++++++++---------- 11 files changed, 155 insertions(+), 52 deletions(-) Signed-off-by: Jeff Mahoney <jeffm at suse.com> diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/cluster/disk_heartbeat.c linux-2.6.15-staging1a/fs/ocfs2/cluster/disk_heartbeat.c --- linux-2.6.15-staging1/fs/ocfs2/cluster/disk_heartbeat.c 2006-01-08 19:20:38.491417304 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/cluster/disk_heartbeat.c 2006-01-08 19:19:50.967642016 -0500 @@ -853,11 +853,12 @@ static int o2hb_thread(void *data) } /* if we're already in a callback then we're already serialized by the sem */ -static void fill_node_map(unsigned long *map, size_t bytes) +static int fill_node_map(const char *resource, unsigned long *map, size_t bytes) { BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); memcpy(map, &o2hb_live_node_bitmap, bytes); + return 0; } /* diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/cluster/heartbeat.c linux-2.6.15-staging1a/fs/ocfs2/cluster/heartbeat.c --- linux-2.6.15-staging1/fs/ocfs2/cluster/heartbeat.c 2006-01-08 19:20:38.492417152 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/cluster/heartbeat.c 2006-01-08 19:19:50.968641864 -0500 @@ -172,23 +172,26 @@ void o2hb_init(void) INIT_LIST_HEAD(&o2hb_node_events); } -void o2hb_fill_node_map_from_callback(unsigned long *map, unsigned bytes) +int o2hb_fill_node_map_from_callback(const char *resource, unsigned long *map, + unsigned bytes) { - o2hb_active_group->fill_node_map(map, bytes); + return o2hb_active_group->fill_node_map(resource, map, bytes); } /* * get a map of all nodes that are heartbeating in any regions */ -void o2hb_fill_node_map(unsigned long *map, unsigned bytes) +int o2hb_fill_node_map(const char *resource, unsigned long *map, unsigned bytes) { /* callers want to serialize this map and callbacks so that they * can trust that they don't miss nodes coming to the party */ + int err; down_read(&o2hb_callback_sem); spin_lock(&o2hb_live_lock); - o2hb_fill_node_map_from_callback(map, bytes); + err = o2hb_fill_node_map_from_callback(resource, map, bytes); spin_unlock(&o2hb_live_lock); up_read(&o2hb_callback_sem); + return err; } EXPORT_SYMBOL_GPL(o2hb_fill_node_map); @@ -341,6 +344,39 @@ int o2hb_unregister_callback(struct o2hb } EXPORT_SYMBOL_GPL(o2hb_unregister_callback); +struct o2hb_heartbeat_resource *o2hb_heartbeat_resource_get_by_name(const char *name) +{ + struct config_group *hb_root; + struct config_item *item; + struct o2hb_heartbeat_resource *hbset = NULL; + struct o2nm_cluster *cluster = o2nm_single_cluster; + int found = 0; + + if (cluster == NULL) + return NULL; + + /* This may change, but for now it's hard coded */ + hb_root = cluster->cl_group.default_groups[1]; + config_group_get(hb_root); + + /* There should definitely be locking in place here */ + list_for_each_entry(item, &hb_root->cg_children, ci_entry) { + config_item_get(item); + if (strcmp(item->ci_name, name) == 0) { + found = 1; + break; + } + config_item_put(item); + } + config_group_put(hb_root); + + if (found) + hbset = to_o2hb_heartbeat_resource(item); + + return hbset; +} +EXPORT_SYMBOL_GPL(o2hb_heartbeat_resource_get_by_name); + int o2hb_register_heartbeat_group(struct o2hb_heartbeat_group *group) { spin_lock(&o2hb_group_lock); @@ -367,11 +403,11 @@ int o2hb_unregister_heartbeat_group(stru } EXPORT_SYMBOL_GPL(o2hb_unregister_heartbeat_group); -int o2hb_check_node_heartbeating(u8 node_num) +int o2hb_check_node_heartbeating(const char *resource, u8 node_num) { unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - o2hb_fill_node_map(testing_map, sizeof(testing_map)); + o2hb_fill_node_map(resource, testing_map, sizeof(testing_map)); if (!test_bit(node_num, testing_map)) { mlog(ML_HEARTBEAT, "node (%u) does not have heartbeating enabled.\n", @@ -383,11 +419,13 @@ int o2hb_check_node_heartbeating(u8 node } EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); -int o2hb_check_node_heartbeating_from_callback(u8 node_num) +int o2hb_check_node_heartbeating_from_callback(const char *resource, + u8 node_num) { unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; - o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); + o2hb_fill_node_map_from_callback(resource, testing_map, + sizeof(testing_map)); if (!test_bit(node_num, testing_map)) { mlog(ML_HEARTBEAT, "node (%u) does not have heartbeating enabled.\n", @@ -401,7 +439,7 @@ EXPORT_SYMBOL_GPL(o2hb_check_node_heartb /* Makes sure our local node is configured with a node number, and is * heartbeating. */ -int o2hb_check_local_node_heartbeating(void) +int o2hb_check_local_node_heartbeating(const char *resource) { u8 node_num; @@ -412,6 +450,6 @@ int o2hb_check_local_node_heartbeating(v return 0; } - return o2hb_check_node_heartbeating(node_num); + return o2hb_check_node_heartbeating(resource, node_num); } EXPORT_SYMBOL_GPL(o2hb_check_local_node_heartbeating); diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/cluster/heartbeat.h linux-2.6.15-staging1a/fs/ocfs2/cluster/heartbeat.h --- linux-2.6.15-staging1/fs/ocfs2/cluster/heartbeat.h 2006-01-08 19:20:38.492417152 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/cluster/heartbeat.h 2006-01-08 19:21:49.536616792 -0500 @@ -49,7 +49,8 @@ struct o2hb_heartbeat_group { const char *hs_name; int (*init)(struct o2hb_heartbeat_group *hs); void (*exit)(struct o2hb_heartbeat_group *hs); - void (*fill_node_map)(unsigned long *map, size_t bytes); + int (*fill_node_map)(const char *resource, unsigned long *map, + size_t bytes); atomic_t hs_count; struct list_head hs_list; }; @@ -98,15 +99,18 @@ void o2hb_setup_callback(struct o2hb_cal int priority, struct o2hb_heartbeat_resource *res); int o2hb_register_callback(struct o2hb_callback_func *hc); int o2hb_unregister_callback(struct o2hb_callback_func *hc); -void o2hb_fill_node_map(unsigned long *map, +int o2hb_fill_node_map(const char *resource, unsigned long *map, unsigned bytes); void o2hb_init(void); -int o2hb_check_node_heartbeating(u8 node_num); -int o2hb_check_node_heartbeating_from_callback(u8 node_num); -int o2hb_check_local_node_heartbeating(void); +int o2hb_check_node_heartbeating(const char *resource, u8 node_num); +int o2hb_check_node_heartbeating_from_callback(const char *resource, + u8 node_num); +int o2hb_check_local_node_heartbeating(const char *resource); void o2hb_notify(enum o2hb_callback_type type, struct o2nm_node *node, int node_num); +struct o2hb_heartbeat_resource *o2hb_heartbeat_resource_get_by_name(const char * name); + static inline struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group *group) { return container_of(group, struct o2hb_heartbeat_group, hs_group); @@ -116,4 +120,14 @@ static inline struct o2hb_heartbeat_reso { return container_of(item, struct o2hb_heartbeat_resource, hr_item); } + +static inline void o2hb_heartbeat_resource_get(struct o2hb_heartbeat_resource *hbres) +{ + config_item_get(&hbres->hr_item); +} + +static inline void o2hb_heartbeat_resource_put(struct o2hb_heartbeat_resource *hbres) +{ + config_item_put(&hbres->hr_item); +} #endif /* O2CLUSTER_HEARTBEAT_H */ diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/cluster/nodemanager.c linux-2.6.15-staging1a/fs/ocfs2/cluster/nodemanager.c --- linux-2.6.15-staging1/fs/ocfs2/cluster/nodemanager.c 2006-01-08 19:20:38.492417152 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/cluster/nodemanager.c 2006-01-08 19:19:50.969641712 -0500 @@ -36,7 +36,7 @@ /* for now we operate under the assertion that there can be only one * cluster active at a time. Changing this will require trickling * cluster references throughout where nodes are looked up */ -static struct o2nm_cluster *o2nm_single_cluster = NULL; +struct o2nm_cluster *o2nm_single_cluster = NULL; #define OCFS2_MAX_HB_CTL_PATH 256 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; @@ -98,17 +98,6 @@ const char *o2nm_get_hb_ctl_path(void) } EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path); -struct o2nm_cluster { - struct config_group cl_group; - unsigned cl_has_local:1; - u8 cl_local_node; - rwlock_t cl_nodes_lock; - struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; - struct rb_root cl_node_ip_tree; - /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ - unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; -}; - struct o2nm_node *o2nm_get_node_by_num(u8 node_num) { struct o2nm_node *node = NULL; diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/cluster/nodemanager.h linux-2.6.15-staging1a/fs/ocfs2/cluster/nodemanager.h --- linux-2.6.15-staging1/fs/ocfs2/cluster/nodemanager.h 2006-01-08 19:20:38.493417000 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/cluster/nodemanager.h 2006-01-08 19:19:50.969641712 -0500 @@ -53,6 +53,18 @@ struct o2nm_node { unsigned long nd_set_attributes; }; +struct o2nm_cluster { + struct config_group cl_group; + unsigned cl_has_local:1; + u8 cl_local_node; + rwlock_t cl_nodes_lock; + struct o2nm_node *cl_nodes[O2NM_MAX_NODES]; + struct rb_root cl_node_ip_tree; + /* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */ + unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; +}; +extern struct o2nm_cluster *o2nm_single_cluster; + u8 o2nm_this_node(void); int o2nm_configured_node_map(unsigned long *map, unsigned bytes); diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/cluster/tcp.c linux-2.6.15-staging1a/fs/ocfs2/cluster/tcp.c --- linux-2.6.15-staging1/fs/ocfs2/cluster/tcp.c 2006-01-08 19:20:38.494416848 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/cluster/tcp.c 2006-01-08 19:19:50.970641560 -0500 @@ -1570,7 +1570,7 @@ static int o2net_accept_one(struct socke /* this happens all the time when the other node sees our heartbeat * and tries to connect before we see their heartbeat */ - if (!o2hb_check_node_heartbeating_from_callback(node->nd_num)) { + if (!o2hb_check_node_heartbeating_from_callback(NULL, node->nd_num)) { mlog(ML_CONN, "attempt to connect from node '%s' at " "%u.%u.%u.%u:%d but it isn't heartbeating\n", node->nd_name, NIPQUAD(sin.sin_addr.s_addr), diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/dlm/dlmdomain.c linux-2.6.15-staging1a/fs/ocfs2/dlm/dlmdomain.c --- linux-2.6.15-staging1/fs/ocfs2/dlm/dlmdomain.c 2006-01-08 19:20:38.494416848 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/dlm/dlmdomain.c 2006-01-08 19:19:50.971641408 -0500 @@ -560,7 +560,7 @@ static int dlm_query_join_handler(struct * to back off and try again. This gives heartbeat a chance * to catch up. */ - if (!o2hb_check_node_heartbeating(query->node_idx)) { + if (!o2hb_check_node_heartbeating(query->domain, query->node_idx)) { mlog(0, "node %u is not in our live map yet\n", query->node_idx); @@ -876,7 +876,8 @@ static int dlm_try_to_join_domain(struct /* group sem locking should work for us here -- we're already * registered for heartbeat events so filling this should be * atomic wrt getting those handlers called. */ - o2hb_fill_node_map(dlm->live_nodes_map, sizeof(dlm->live_nodes_map)); + o2hb_fill_node_map(dlm->name, dlm->live_nodes_map, + sizeof(dlm->live_nodes_map)); spin_lock(&dlm->spinlock); memcpy(ctxt->live_map, dlm->live_nodes_map, sizeof(ctxt->live_map)); @@ -1266,7 +1267,7 @@ struct dlm_ctxt * dlm_register_domain(co goto leave; } - if (!o2hb_check_local_node_heartbeating()) { + if (!o2hb_check_local_node_heartbeating(domain)) { mlog(ML_ERROR, "the local node has not been configured, or is " "not heartbeating\n"); ret = -EPROTO; diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/heartbeat.c linux-2.6.15-staging1a/fs/ocfs2/heartbeat.c --- linux-2.6.15-staging1/fs/ocfs2/heartbeat.c 2006-01-08 19:20:38.495416696 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/heartbeat.c 2006-01-08 19:19:50.971641408 -0500 @@ -76,6 +76,13 @@ static void ocfs2_do_node_down(int node_ mlog(0, "ocfs2: node down event for %d\n", node_num); +#if 0 + if (osb->node_num == node_num) { + ocfs2_handle_fencing(node_num, osb); + return; + } +#endif + if (!osb->dlm) { /* * No DLM means we're not even ready to participate yet. @@ -132,21 +139,27 @@ static void ocfs2_hb_node_up_cb(struct o ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); } -void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) +int ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) { + osb->osb_hb_res = o2hb_heartbeat_resource_get_by_name(osb->uuid_str); + + if (!osb->osb_hb_res) + return -EINVAL; + o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB, ocfs2_hb_node_down_cb, osb, - OCFS2_HB_NODE_DOWN_PRI, NULL); + OCFS2_HB_NODE_DOWN_PRI, osb->osb_hb_res); o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB, ocfs2_hb_node_up_cb, osb, - OCFS2_HB_NODE_UP_PRI, NULL); + OCFS2_HB_NODE_UP_PRI, osb->osb_hb_res); /* Not exactly a heartbeat callback, but leads to essentially * the same path so we set it up here. */ dlm_setup_eviction_cb(&osb->osb_eviction_cb, ocfs2_dlm_eviction_cb, osb); + return 0; } /* Most functions here are just stubs for now... */ @@ -179,6 +192,8 @@ void ocfs2_clear_hb_callbacks(struct ocf status = o2hb_unregister_callback(&osb->osb_hb_up); if (status < 0) mlog_errno(status); + + o2hb_heartbeat_resource_put(osb->osb_hb_res); } void ocfs2_stop_heartbeat(struct ocfs2_super *osb) diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/heartbeat.h linux-2.6.15-staging1a/fs/ocfs2/heartbeat.h --- linux-2.6.15-staging1/fs/ocfs2/heartbeat.h 2006-01-08 19:20:38.495416696 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/heartbeat.h 2006-01-08 19:19:50.972641256 -0500 @@ -28,7 +28,7 @@ void ocfs2_init_node_maps(struct ocfs2_super *osb); -void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); +int ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); int ocfs2_register_hb_callbacks(struct ocfs2_super *osb); void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb); void ocfs2_stop_heartbeat(struct ocfs2_super *osb); diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/ocfs2.h linux-2.6.15-staging1a/fs/ocfs2/ocfs2.h --- linux-2.6.15-staging1/fs/ocfs2/ocfs2.h 2006-01-08 19:20:38.495416696 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/ocfs2.h 2006-01-08 19:19:50.972641256 -0500 @@ -278,6 +278,7 @@ struct ocfs2_super unsigned int net_response_ids; struct list_head net_response_list; + struct o2hb_heartbeat_resource *osb_hb_res; struct o2hb_callback_func osb_hb_up; struct o2hb_callback_func osb_hb_down; diff -ruNpX dontdiff linux-2.6.15-staging1/fs/ocfs2/super.c linux-2.6.15-staging1a/fs/ocfs2/super.c --- linux-2.6.15-staging1/fs/ocfs2/super.c 2006-01-08 19:20:38.496416544 -0500 +++ linux-2.6.15-staging1a/fs/ocfs2/super.c 2006-01-08 19:19:50.973641104 -0500 @@ -515,6 +515,27 @@ bail: return status; } +void +copy_uuid_from_super(char *buf, struct buffer_head *bh) +{ + struct ocfs2_dinode *di = NULL; + int i; + char *ptr; + int ret; + di = (struct ocfs2_dinode *)bh->b_data; + + for (i = 0, ptr = buf; i < OCFS2_VOL_UUID_LEN; i++) { + /* print with null */ + ret = snprintf(ptr, 3, "%02X", di->id2.i_super.s_uuid[i]); + if (ret != 2) { /* drop super cleans up */ + memset (buf, 0, OCFS2_VOL_UUID_LEN * 2); + return; + } + /* then only advance past the last char */ + ptr += 2; + } +} + static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; @@ -523,16 +544,10 @@ static int ocfs2_fill_super(struct super struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; + char uuid[33]; mlog_entry("%p, %p, %i", sb, data, silent); - /* for now we only have one cluster/node, make sure we see it - * in the heartbeat universe */ - if (!o2hb_check_local_node_heartbeating()) { - status = -EINVAL; - goto read_super_error; - } - /* probe for superblock */ status = ocfs2_sb_probe(sb, &bh, §or_size); if (status < 0) { @@ -540,6 +555,17 @@ static int ocfs2_fill_super(struct super goto read_super_error; } + copy_uuid_from_super(uuid, bh); + +#if 0 + /* for now we only have one cluster/node, make sure we see it + * in the heartbeat universe */ + if (!o2hb_check_local_node_heartbeating(uuid)) { + status = -EINVAL; + goto read_super_error; + } +#endif + status = ocfs2_initialize_super(sb, bh, sector_size); osb = OCFS2_SB(sb); if (status < 0) { @@ -1297,8 +1323,6 @@ static int ocfs2_initialize_super(struct osb->local_alloc_state = OCFS2_LA_UNUSED; osb->local_alloc_bh = NULL; - ocfs2_setup_hb_callbacks(osb); - init_waitqueue_head(&osb->osb_mount_event); osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); @@ -1317,6 +1341,21 @@ static int ocfs2_initialize_super(struct di = (struct ocfs2_dinode *)bh->b_data; + if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid, + sizeof(di->id2.i_super.s_uuid))) { + mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n"); + status = -ENOMEM; + goto bail; + } + + /* This moves way down here because we need the UUID to do it */ + if (ocfs2_setup_hb_callbacks(osb)) { + mlog(ML_ERROR, "Could not find heartbeat group for file " + "system %s\n", osb->uuid_str); + status = -EINVAL; + goto bail; + } + osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { mlog(ML_ERROR, "Invalid number of node slots (%u)\n", @@ -1398,13 +1437,6 @@ static int ocfs2_initialize_super(struct goto bail; } - if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid, - sizeof(di->id2.i_super.s_uuid))) { - mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n"); - status = -ENOMEM; - goto bail; - } - memcpy(&uuid_net_key, &osb->uuid[i], sizeof(osb->net_key)); osb->net_key = le32_to_cpu(uuid_net_key);