Sunil Mushran
2011-Feb-21 22:25 UTC
[Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
If o2hb finds unexpected values in the heartbeat slot, it prints a message
"ERROR: Device "dm-6": another node is heartbeating in our
slot!"
This patch adds more information allowing us to see the actual mismatch.
The new message reads "ERROR: Heartbeat mismatch on "dm-6":
expected(92:0x234543,
0x76567) ondisk(92:0x234543, 0x76565)"
Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
---
fs/ocfs2/cluster/heartbeat.c | 34 +++++++++++++++++-----------------
1 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index b108e86..d633df0 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -543,25 +543,27 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
/* We want to make sure that nobody is heartbeating on top of us --
* this will help detect an invalid configuration. */
-static int o2hb_check_last_timestamp(struct o2hb_region *reg)
+static void o2hb_check_last_timestamp(struct o2hb_region *reg)
{
- int node_num, ret;
struct o2hb_disk_slot *slot;
struct o2hb_disk_heartbeat_block *hb_block;
- node_num = o2nm_this_node();
-
- ret = 1;
- slot = ®->hr_slots[node_num];
+ slot = ®->hr_slots[o2nm_this_node()];
/* Don't check on our 1st timestamp */
- if (slot->ds_last_time) {
- hb_block = slot->ds_raw_block;
+ if (!slot->ds_last_time)
+ return;
- if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time)
- ret = 0;
- }
+ hb_block = slot->ds_raw_block;
+ if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time)
+ return;
- return ret;
+ mlog(ML_ERROR, "Heartbeat mismatch on \"%s\": "
+ "expected(%u:0x%llx, 0x%llx), ondisk(%u:0x%llx, 0x%llx)\n",
+ reg->hr_dev_name, slot->ds_node_num,
+ (unsigned long long)slot->ds_last_generation,
+ (unsigned long long)slot->ds_last_time, hb_block->hb_node,
+ (unsigned long long)le64_to_cpu(hb_block->hb_generation),
+ (unsigned long long)le64_to_cpu(hb_block->hb_seq));
}
static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -987,9 +989,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
/* With an up to date view of the slots, we can check that no
* other node has been improperly configured to heartbeat in
* our slot. */
- if (!o2hb_check_last_timestamp(reg))
- mlog(ML_ERROR, "Device \"%s\": another node is heartbeating
"
- "in our slot!\n", reg->hr_dev_name);
+ o2hb_check_last_timestamp(reg);
/* fill in the proper info for our next heartbeat */
o2hb_prepare_block(reg, reg->hr_generation);
@@ -1003,8 +1003,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
}
i = -1;
- while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) <
O2NM_MAX_NODES) {
-
+ while((i = find_next_bit(configured_nodes,
+ O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
change |= o2hb_check_slot(reg, ®->hr_slots[i]);
}
--
1.7.1
Sunil Mushran
2011-Feb-21 22:25 UTC
[Ocfs2-devel] [PATCH 2/2] ocfs2/dlm: Move kmalloc() outside the spinlock
In dlm_query_region_handler(), move the kmalloc outside the spinlock.
This allows us to use GFP_KERNEL instead of GFP_ATOMIC.
Signed-off-by: Sunil Mushran <sunil.mushran at oracle.com>
---
fs/ocfs2/dlm/dlmdomain.c | 28 ++++++++++++++++------------
1 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7e38a07..99805d5 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -926,9 +926,10 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg,
u32 len, void *data,
}
static int dlm_match_regions(struct dlm_ctxt *dlm,
- struct dlm_query_region *qr)
+ struct dlm_query_region *qr,
+ char *local, int locallen)
{
- char *local = NULL, *remote = qr->qr_regions;
+ char *remote = qr->qr_regions;
char *l, *r;
int localnr, i, j, foundit;
int status = 0;
@@ -957,13 +958,8 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
r += O2HB_MAX_REGION_NAME_LEN;
}
- local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC);
- if (!local) {
- status = -ENOMEM;
- goto bail;
- }
-
- localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
+ localnr = min(O2NM_MAX_REGIONS, locallen/O2HB_MAX_REGION_NAME_LEN);
+ localnr = o2hb_get_all_regions(local, (u8)localnr);
/* compare local regions with remote */
l = local;
@@ -1012,8 +1008,6 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
}
bail:
- kfree(local);
-
return status;
}
@@ -1075,6 +1069,7 @@ static int dlm_query_region_handler(struct o2net_msg *msg,
u32 len,
{
struct dlm_query_region *qr;
struct dlm_ctxt *dlm = NULL;
+ char *local = NULL;
int status = 0;
int locked = 0;
@@ -1083,6 +1078,13 @@ static int dlm_query_region_handler(struct o2net_msg
*msg, u32 len,
mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
qr->qr_domain);
+ /* buffer used in dlm_mast_regions() */
+ local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
+ if (!local) {
+ status = -ENOMEM;
+ goto bail;
+ }
+
status = -EINVAL;
spin_lock(&dlm_domain_lock);
@@ -1112,13 +1114,15 @@ static int dlm_query_region_handler(struct o2net_msg
*msg, u32 len,
goto bail;
}
- status = dlm_match_regions(dlm, qr);
+ status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions));
bail:
if (locked)
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
+ kfree(local);
+
return status;
}
--
1.7.1
Joel Becker
2011-Feb-22 09:34 UTC
[Ocfs2-devel] [PATCH 1/2] ocfs2/cluster: Heartbeat mismatch message improved
On Mon, Feb 21, 2011 at 02:25:24PM -0800, Sunil Mushran wrote:> If o2hb finds unexpected values in the heartbeat slot, it prints a message > "ERROR: Device "dm-6": another node is heartbeating in our slot!" > > This patch adds more information allowing us to see the actual mismatch. > The new message reads "ERROR: Heartbeat mismatch on "dm-6": expected(92:0x234543, > 0x76567) ondisk(92:0x234543, 0x76565)"Can you still say 'another node is heartbeating in our slot', just with the detail? The new message doesn't give a clue as to what is happening. Joel -- "The first thing we do, let's kill all the lawyers." -Henry VI, IV:ii http://www.jlbec.org/ jlbec at evilplan.org