thr3ads.net - Gluster users - [Gluster-users] volume not working after yum update

If this information is useful, please help other people find it:
Share via:

Atin Mukherjee

2015-Aug-10 17:23 UTC

[Gluster-users] volume not working after yum update - gluster 3.6.3

-Atin
Sent from one plus one
On Aug 10, 2015 10:34 PM, "Kingsley" <gluster at
gluster.dogwind.com> wrote:>
> On Mon, 2015-08-10 at 22:22 +0530, Atin Mukherjee wrote:
> [snip]
>
>> strace output claims the command exited successfully. Are you sure ls
got hung?>
>
> Not sure, but this one definitely hung. 'mkdir("test",
0777' was the lastoutput, and it's been stuck here for about 7 minutes
now:>
> [root at voicemail1b-1 14391.broken]# strace mkdir test
> execve("/usr/bin/mkdir", ["mkdir", "test"],
[/* 27 vars */]) = 0
> brk(0)                                  = 0x8db000
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x7f3468a89000>
> access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file
or
directory)> open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
> fstat(3, {st_mode=S_IFREG|0644, st_size=31874, ...}) = 0
> mmap(NULL, 31874, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3468a81000
>
> close(3)                                = 0
> open("/lib64/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3
> read(3,"\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240d\0\0\0\0\0\0"...,
832)
= 832> fstat(3, {st_mode=S_IFREG|0755, st_size=147120, ...}) = 0
> mmap(NULL, 2246784, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0)
= 0x7f3468644000> mprotect(0x7f3468665000, 2097152, PROT_NONE) = 0
> mmap(0x7f3468865000, 8192, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x21000) =
0x7f3468865000> mmap(0x7f3468867000, 6272, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) =
0x7f3468867000>
> close(3)                                = 0
> open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
> read(3,"\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\34\2\0\0\0\0\0"...,
832)
= 832> fstat(3, {st_mode=S_IFREG|0755, st_size=2107760, ...}) = 0
> mmap(NULL, 3932736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0)
= 0x7f3468283000> mprotect(0x7f3468439000, 2097152, PROT_NONE) = 0
> mmap(0x7f3468639000, 24576, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) =
0x7f3468639000> mmap(0x7f346863f000, 16960, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) =
0x7f346863f000>
> close(3)                                = 0
> open("/lib64/libpcre.so.1", O_RDONLY|O_CLOEXEC) = 3
> read(3,"\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360\25\0\0\0\0\0\0"...,
832) = 832> fstat(3, {st_mode=S_IFREG|0755, st_size=398272, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x7f3468a80000> mmap(NULL, 2490888, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0)
= 0x7f3468022000> mprotect(0x7f3468081000, 2097152, PROT_NONE) = 0
> mmap(0x7f3468281000, 8192, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5f000) =
0x7f3468281000>
> close(3)                                = 0
> open("/lib64/liblzma.so.5", O_RDONLY|O_CLOEXEC) = 3
> read(3,"\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000/\0\0\0\0\0\0"...,
832) 832> fstat(3, {st_mode=S_IFREG|0755, st_size=153184, ...}) = 0
> mmap(NULL, 2245240, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0)
= 0x7f3467dfd000> mprotect(0x7f3467e21000, 2093056, PROT_NONE) = 0
> mmap(0x7f3468020000, 8192, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x23000) =
0x7f3468020000>
> close(3)                                = 0
> open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
> read(3,"\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\16\0\0\0\0\0\0"...,
832) = 832> fstat(3, {st_mode=S_IFREG|0755, st_size=19512, ...}) = 0
> mmap(NULL, 2109744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0)
= 0x7f3467bf9000> mprotect(0x7f3467bfc000, 2093056, PROT_NONE) = 0
> mmap(0x7f3467dfb000, 8192, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) =
0x7f3467dfb000>
> close(3)                                = 0
> open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
> read(3,"\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240l\0\0\0\0\0\0"...,
832)
= 832> fstat(3, {st_mode=S_IFREG|0755, st_size=141616, ...}) = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x7f3468a7f000> mmap(NULL, 2208864, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0)
= 0x7f34679dd000> mprotect(0x7f34679f3000, 2097152, PROT_NONE) = 0
> mmap(0x7f3467bf3000, 8192, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) =
0x7f3467bf3000> mmap(0x7f3467bf5000, 13408, PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) =
0x7f3467bf5000> close(3)                                = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x7f3468a7e000> mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
= 0x7f3468a7c000> arch_prctl(ARCH_SET_FS, 0x7f3468a7c800) = 0
> mprotect(0x7f3468639000, 16384, PROT_READ) = 0
> mprotect(0x7f3467bf3000, 4096, PROT_READ) = 0
> mprotect(0x7f3467dfb000, 4096, PROT_READ) = 0
> mprotect(0x7f3468020000, 4096, PROT_READ) = 0
> mprotect(0x7f3468281000, 4096, PROT_READ) = 0
> mprotect(0x7f3468865000, 4096, PROT_READ) = 0
> mprotect(0x611000, 4096, PROT_READ)     = 0
> mprotect(0x7f3468a8a000, 4096, PROT_READ) = 0
> munmap(0x7f3468a81000, 31874)           = 0
> set_tid_address(0x7f3468a7cad0)         = 24942
> set_robust_list(0x7f3468a7cae0, 24)     = 0
> rt_sigaction(SIGRTMIN, {0x7f34679e3780, [], SA_RESTORER|SA_SIGINFO,
0x7f34679ec130}, NULL, 8) = 0> rt_sigaction(SIGRT_1, {0x7f34679e3810, [],SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7f34679ec130}, NULL, 8) =
0>
> rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
> getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) 0
> statfs("/sys/fs/selinux", {f_type=0xf97cff8c, f_bsize=4096,
f_blocks=0,f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255,
f_frsize=4096}) = 0> statfs("/sys/fs/selinux", {f_type=0xf97cff8c, f_bsize=4096,
f_blocks=0,f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255,
f_frsize=4096}) = 0> stat("/sys/fs/selinux", {st_mode=S_IFDIR|0755, st_size=0, ...}) =
0
> brk(0)                                  = 0x8db000
> brk(0x8fc000)                           = 0x8fc000
> mkdir("test", 0777Can you also collect the statedump of all the brick processes when the
command is hung?

+ Ravi, could you check this?>
>> >
>> >
>> >
>> >
>> >> >
>> >> > Then ... do I need to run something on one of the bricks
while
strace is>> >> > running?
>> >> >
>> >> > Cheers,
>> >> > Kingsley.
>> >> >
>> >> >
>> >> > > >
>> >> > > > [root at gluster1b-1 ~]# gluster volume heal
callrec info
>> >> > > > Brick
gluster1a-1.dns99.co.uk:/data/brick/callrec/
>> >> > > >
<gfid:164f888f-2049-49e6-ad26-c758ee091863>
>> >> > > > /recordings/834723/14391 - Possibly undergoing
heal
>> >> > > >
>> >> > > >
<gfid:e280b40c-d8b7-43c5-9da7-4737054d7a7f>
>> >> > > >
<gfid:b1fbda4a-732f-4f5d-b5a1-8355d786073e>
>> >> > > >
<gfid:edb74524-b4b7-4190-85e7-4aad002f6e7c>
>> >> > > >
<gfid:9b8b8446-1e27-4113-93c2-6727b1f457eb>
>> >> > > >
<gfid:650efeca-b45c-413b-acc3-f0a5853ccebd>
>> >> > > > Number of entries: 7
>> >> > > >
>> >> > > > Brick
gluster1b-1.dns99.co.uk:/data/brick/callrec/
>> >> > > > Number of entries: 0
>> >> > > >
>> >> > > > Brick
gluster2a-1.dns99.co.uk:/data/brick/callrec/
>> >> > > >
<gfid:e280b40c-d8b7-43c5-9da7-4737054d7a7f>
>> >> > > >
<gfid:164f888f-2049-49e6-ad26-c758ee091863>
>> >> > > >
<gfid:650efeca-b45c-413b-acc3-f0a5853ccebd>
>> >> > > >
<gfid:b1fbda4a-732f-4f5d-b5a1-8355d786073e>
>> >> > > > /recordings/834723/14391 - Possibly undergoing
heal
>> >> > > >
>> >> > > >
<gfid:edb74524-b4b7-4190-85e7-4aad002f6e7c>
>> >> > > >
<gfid:9b8b8446-1e27-4113-93c2-6727b1f457eb>
>> >> > > > Number of entries: 7
>> >> > > >
>> >> > > > Brick
gluster2b-1.dns99.co.uk:/data/brick/callrec/
>> >> > > > Number of entries: 0
>> >> > > >
>> >> > > >
>> >> > > > If I query each brick directly for the number
of
files/directories>> >> > > > within that, I get 1731 on gluster1a-1 and
gluster2a-1, but
1737 on>> >> > > the
>> >> > > > other two, using this command:
>> >> > > >
>> >> > > > # find
/data/brick/callrec/recordings/834723/14391 -print | wc
-l>> >> > > >
>> >> > > > Cheers,
>> >> > > > Kingsley.
>> >> > > >
>> >> > > > On Mon, 2015-08-10 at 11:05 +0100, Kingsley
wrote:
>> >> > > > > Sorry for the blind panic - restarting the
volume seems to
have>> >> > > fixed
>> >> > > > > it.
>> >> > > > >
>> >> > > > > But then my next question - why is this
necessary? Surely it
>> >> > > undermines
>> >> > > > > the whole point of a high availability
system?
>> >> > > > >
>> >> > > > > Cheers,
>> >> > > > > Kingsley.
>> >> > > > >
>> >> > > > > On Mon, 2015-08-10 at 10:53 +0100,
Kingsley wrote:
>> >> > > > > > Hi,
>> >> > > > > >
>> >> > > > > > We have a 4 way replicated volume
using gluster 3.6.3 on
CentOS>> >> > > 7.
>> >> > > > > >
>> >> > > > > > Over the weekend I did a yum update
on each of the bricks in
>> >> > > turn, but
>> >> > > > > > now when clients (using fuse mounts)
try to access the
volume,>> >> > > it hangs.
>> >> > > > > > Gluster itself wasn't updated
(we've disabled that repo so
that>> >> > > we keep
>> >> > > > > > to 3.6.3 for now).
>> >> > > > > >
>> >> > > > > > This was what I did:
>> >> > > > > >
>> >> > > > > >       * on first brick, "yum
update"
>> >> > > > > >       * reboot brick
>> >> > > > > >       * watch "gluster volume
status" on another brick and
wait>> >> > > for it
>> >> > > > > >         to say all 4 bricks are
online before proceeding to
>> >> > > update the
>> >> > > > > >         next brick
>> >> > > > > >
>> >> > > > > > I was expecting the clients might
pause 30 seconds while
they>> >> > > notice a
>> >> > > > > > brick is offline, but then recover.
>> >> > > > > >
>> >> > > > > > I've tried re-mounting clients,
but that hasn't helped.
>> >> > > > > >
>> >> > > > > > I can't see much data in any of
the log files.
>> >> > > > > >
>> >> > > > > > I've tried "gluster volume
heal callrec" but it doesn't
seem to>> >> > > have
>> >> > > > > > helped.
>> >> > > > > >
>> >> > > > > > What shall I do next?
>> >> > > > > >
>> >> > > > > > I've pasted some stuff below in
case any of it helps.
>> >> > > > > >
>> >> > > > > > Cheers,
>> >> > > > > > Kingsley.
>> >> > > > > >
>> >> > > > > > [root at gluster1b-1 ~]# gluster
volume info callrec
>> >> > > > > >
>> >> > > > > > Volume Name: callrec
>> >> > > > > > Type: Replicate
>> >> > > > > > Volume ID:
a39830b7-eddb-4061-b381-39411274131a
>> >> > > > > > Status: Started
>> >> > > > > > Number of Bricks: 1 x 4 = 4
>> >> > > > > > Transport-type: tcp
>> >> > > > > > Bricks:
>> >> > > > > > Brick1:
gluster1a-1:/data/brick/callrec
>> >> > > > > > Brick2:
gluster1b-1:/data/brick/callrec
>> >> > > > > > Brick3:
gluster2a-1:/data/brick/callrec
>> >> > > > > > Brick4:
gluster2b-1:/data/brick/callrec
>> >> > > > > > Options Reconfigured:
>> >> > > > > > performance.flush-behind: off
>> >> > > > > > [root at gluster1b-1 ~]#
>> >> > > > > >
>> >> > > > > >
>> >> > > > > > [root at gluster1b-1 ~]# gluster
volume status callrec
>> >> > > > > > Status of volume: callrec
>> >> > > > > > Gluster process                      
Port
>> >> > > Online  Pid
>> >> > > > > >
>> >> > >
------------------------------------------------------------------------------>> >> > > > > > Brick gluster1a-1:/data/brick/callrec
 49153>> >> > >  Y       6803
>> >> > > > > > Brick gluster1b-1:/data/brick/callrec
 49153>> >> > >  Y       2614
>> >> > > > > > Brick gluster2a-1:/data/brick/callrec
 49153>> >> > >  Y       2645
>> >> > > > > > Brick gluster2b-1:/data/brick/callrec
 49153>> >> > >  Y       4325
>> >> > > > > > NFS Server on localhost              
2049
>> >> > > Y       2769
>> >> > > > > > Self-heal Daemon on localhost        
N/A
>> >> > >  Y       2789
>> >> > > > > > NFS Server on gluster2a-1            
2049
>> >> > > Y       2857
>> >> > > > > > Self-heal Daemon on gluster2a-1      
N/A
>> >> > >  Y       2814
>> >> > > > > > NFS Server on 88.151.41.100          
2049
>> >> > > Y       6833
>> >> > > > > > Self-heal Daemon on 88.151.41.100    
N/A
>> >> > >  Y       6824
>> >> > > > > > NFS Server on gluster2b-1            
2049
>> >> > > Y       4428
>> >> > > > > > Self-heal Daemon on gluster2b-1      
N/A
>> >> > >  Y       4387
>> >> > > > > >
>> >> > > > > > Task Status of Volume callrec
>> >> > > > > >
>> >> > >
------------------------------------------------------------------------------>> >> > > > > > There are no active volume tasks
>> >> > > > > >
>> >> > > > > > [root at gluster1b-1 ~]#
>> >> > > > > >
>> >> > > > > >
>> >> > > > > > [root at gluster1b-1 ~]# gluster
volume heal callrec info
>> >> > > > > > Brick
gluster1a-1.dns99.co.uk:/data/brick/callrec/
>> >> > > > > > /to_process - Possibly undergoing
heal
>> >> > > > > >
>> >> > > > > > Number of entries: 1
>> >> > > > > >
>> >> > > > > > Brick
gluster1b-1.dns99.co.uk:/data/brick/callrec/
>> >> > > > > > Number of entries: 0
>> >> > > > > >
>> >> > > > > > Brick
gluster2a-1.dns99.co.uk:/data/brick/callrec/
>> >> > > > > > /to_process - Possibly undergoing
heal
>> >> > > > > >
>> >> > > > > > Number of entries: 1
>> >> > > > > >
>> >> > > > > > Brick
gluster2b-1.dns99.co.uk:/data/brick/callrec/
>> >> > > > > > Number of entries: 0
>> >> > > > > >
>> >> > > > > > [root at gluster1b-1 ~]#
>> >> > > > > >
>> >> > > > > >
>> >> > > > > >
_______________________________________________
>> >> > > > > > Gluster-users mailing list
>> >> > > > > > Gluster-users at gluster.org
>> >> > > > > >
http://www.gluster.org/mailman/listinfo/gluster-users
>> >> > > > > >
>> >> > > > >
>> >> > > > >
_______________________________________________
>> >> > > > > Gluster-users mailing list
>> >> > > > > Gluster-users at gluster.org
>> >> > > > >
http://www.gluster.org/mailman/listinfo/gluster-users
>> >> > > > >
>> >> > > >
>> >> > > > _______________________________________________
>> >> > > > Gluster-users mailing list
>> >> > > > Gluster-users at gluster.org
>> >> > > >
http://www.gluster.org/mailman/listinfo/gluster-users
>> >> > >
>> >> > >
>> >> >
>> >>
>> >>
>>
>>
>>
>> _______________________________________________
>> Gluster-users mailing list
>> Gluster-users at gluster.org
>> http://www.gluster.org/mailman/listinfo/gluster-users-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://www.gluster.org/pipermail/gluster-users/attachments/20150810/6cd83e13/attachment.html>

Kingsley

2015-Aug-10 18:28 UTC

head link

[Gluster-users] volume not working after yum update - gluster 3.6.3

On Mon, 2015-08-10 at 22:53 +0530, Atin Mukherjee wrote:
[snip]
> > stat("/sys/fs/selinux", {st_mode=S_IFDIR|0755, st_size=0,
...}) = 0
> > brk(0)                                  = 0x8db000
> > brk(0x8fc000)                           = 0x8fc000
> > mkdir("test", 0777
> Can you also collect the statedump of all the brick processes when the
> command is hung?
>   
> + Ravi, could you check this?

I ran the command but I could not find where it put the output:


[root at gluster1a-1 ~]# gluster volume statedump callrec all
volume statedump: success
[root at gluster1a-1 ~]# gluster volume info callrec

Volume Name: callrec
Type: Replicate
Volume ID: a39830b7-eddb-4061-b381-39411274131a
Status: Started
Number of Bricks: 1 x 4 = 4
Transport-type: tcp
Bricks:
Brick1: gluster1a-1:/data/brick/callrec
Brick2: gluster1b-1:/data/brick/callrec
Brick3: gluster2a-1:/data/brick/callrec
Brick4: gluster2b-1:/data/brick/callrec
Options Reconfigured:
performance.flush-behind: off
[root at gluster1a-1 ~]#gluster volume status callrec
Status of volume: callrec
Gluster process                                         Port    Online  Pid
------------------------------------------------------------------------------
Brick gluster1a-1:/data/brick/callrec                   49153   Y       29041
Brick gluster1b-1:/data/brick/callrec                   49153   Y       31260
Brick gluster2a-1:/data/brick/callrec                   49153   Y       31585
Brick gluster2b-1:/data/brick/callrec                   49153   Y       12153
NFS Server on localhost                                 2049    Y       29733
Self-heal Daemon on localhost                           N/A     Y       29741
NFS Server on gluster1b-1                               2049    Y       31872
Self-heal Daemon on gluster1b-1                         N/A     Y       31882
NFS Server on gluster2a-1                               2049    Y       32216
Self-heal Daemon on gluster2a-1                         N/A     Y       32226
NFS Server on gluster2b-1                               2049    Y       12752
Self-heal Daemon on gluster2b-1                         N/A     Y       12762

Task Status of Volume callrec
------------------------------------------------------------------------------
There are no active volume tasks

[root at gluster1a-1 ~]# ls -l /tmp
total 144
drwx------. 3 root root    16 Aug  8 22:20 systemd-private-Dp10Pz
-rw-------. 1 root root  5818 Jul 31 06:39
yum_save_tx.2015-07-31.06-39.JCvHd5.yumtx
-rw-------. 1 root root  5818 Aug  1 06:58
yum_save_tx.2015-08-01.06-58.wBytr2.yumtx
-rw-------. 1 root root  5818 Aug  2 05:18
yum_save_tx.2015-08-02.05-18.AXIFSe.yumtx
-rw-------. 1 root root  5818 Aug  3 07:15
yum_save_tx.2015-08-03.07-15.EDd8rg.yumtx
-rw-------. 1 root root  5818 Aug  4 03:48
yum_save_tx.2015-08-04.03-48.XE513B.yumtx
-rw-------. 1 root root  5818 Aug  5 09:03
yum_save_tx.2015-08-05.09-03.mX8xXF.yumtx
-rw-------. 1 root root 28869 Aug  6 06:39
yum_save_tx.2015-08-06.06-39.166wJX.yumtx
-rw-------. 1 root root 28869 Aug  7 07:20
yum_save_tx.2015-08-07.07-20.rLqJnT.yumtx
-rw-------. 1 root root 28869 Aug  8 08:29
yum_save_tx.2015-08-08.08-29.KKaite.yumtx
[root at gluster1a-1 ~]#


Where should I find the output of the statedump command?

Cheers,
Kingsley.

> >
> >> >
> >> >
> >> >
> >> >
> >> >> >
> >> >> > Then ... do I need to run something on one of the
bricks while
> strace is
> >> >> > running?
> >> >> >
> >> >> > Cheers,
> >> >> > Kingsley.
> >> >> >
> >> >> >
> >> >> > > >
> >> >> > > > [root at gluster1b-1 ~]# gluster volume
heal callrec info
> >> >> > > > Brick
gluster1a-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > >
<gfid:164f888f-2049-49e6-ad26-c758ee091863>
> >> >> > > > /recordings/834723/14391 - Possibly
undergoing heal
> >> >> > > >
> >> >> > > >
<gfid:e280b40c-d8b7-43c5-9da7-4737054d7a7f>
> >> >> > > >
<gfid:b1fbda4a-732f-4f5d-b5a1-8355d786073e>
> >> >> > > >
<gfid:edb74524-b4b7-4190-85e7-4aad002f6e7c>
> >> >> > > >
<gfid:9b8b8446-1e27-4113-93c2-6727b1f457eb>
> >> >> > > >
<gfid:650efeca-b45c-413b-acc3-f0a5853ccebd>
> >> >> > > > Number of entries: 7
> >> >> > > >
> >> >> > > > Brick
gluster1b-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > > Number of entries: 0
> >> >> > > >
> >> >> > > > Brick
gluster2a-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > >
<gfid:e280b40c-d8b7-43c5-9da7-4737054d7a7f>
> >> >> > > >
<gfid:164f888f-2049-49e6-ad26-c758ee091863>
> >> >> > > >
<gfid:650efeca-b45c-413b-acc3-f0a5853ccebd>
> >> >> > > >
<gfid:b1fbda4a-732f-4f5d-b5a1-8355d786073e>
> >> >> > > > /recordings/834723/14391 - Possibly
undergoing heal
> >> >> > > >
> >> >> > > >
<gfid:edb74524-b4b7-4190-85e7-4aad002f6e7c>
> >> >> > > >
<gfid:9b8b8446-1e27-4113-93c2-6727b1f457eb>
> >> >> > > > Number of entries: 7
> >> >> > > >
> >> >> > > > Brick
gluster2b-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > > Number of entries: 0
> >> >> > > >
> >> >> > > >
> >> >> > > > If I query each brick directly for the
number of
> files/directories
> >> >> > > > within that, I get 1731 on gluster1a-1 and
gluster2a-1,
> but 1737 on
> >> >> > > the
> >> >> > > > other two, using this command:
> >> >> > > >
> >> >> > > > # find
/data/brick/callrec/recordings/834723/14391 -print
> | wc -l
> >> >> > > >
> >> >> > > > Cheers,
> >> >> > > > Kingsley.
> >> >> > > >
> >> >> > > > On Mon, 2015-08-10 at 11:05 +0100,
Kingsley wrote:
> >> >> > > > > Sorry for the blind panic -
restarting the volume seems
> to have
> >> >> > > fixed
> >> >> > > > > it.
> >> >> > > > >
> >> >> > > > > But then my next question - why is
this necessary?
> Surely it
> >> >> > > undermines
> >> >> > > > > the whole point of a high
availability system?
> >> >> > > > >
> >> >> > > > > Cheers,
> >> >> > > > > Kingsley.
> >> >> > > > >
> >> >> > > > > On Mon, 2015-08-10 at 10:53 +0100,
Kingsley wrote:
> >> >> > > > > > Hi,
> >> >> > > > > >
> >> >> > > > > > We have a 4 way replicated
volume using gluster 3.6.3
> on CentOS
> >> >> > > 7.
> >> >> > > > > >
> >> >> > > > > > Over the weekend I did a yum
update on each of the
> bricks in
> >> >> > > turn, but
> >> >> > > > > > now when clients (using fuse
mounts) try to access the
> volume,
> >> >> > > it hangs.
> >> >> > > > > > Gluster itself wasn't
updated (we've disabled that
> repo so that
> >> >> > > we keep
> >> >> > > > > > to 3.6.3 for now).
> >> >> > > > > >
> >> >> > > > > > This was what I did:
> >> >> > > > > >
> >> >> > > > > >       * on first brick,
"yum update"
> >> >> > > > > >       * reboot brick
> >> >> > > > > >       * watch "gluster
volume status" on another brick
> and wait
> >> >> > > for it
> >> >> > > > > >         to say all 4 bricks are
online before
> proceeding to
> >> >> > > update the
> >> >> > > > > >         next brick
> >> >> > > > > >
> >> >> > > > > > I was expecting the clients
might pause 30 seconds
> while they
> >> >> > > notice a
> >> >> > > > > > brick is offline, but then
recover.
> >> >> > > > > >
> >> >> > > > > > I've tried re-mounting
clients, but that hasn't
> helped.
> >> >> > > > > >
> >> >> > > > > > I can't see much data in any
of the log files.
> >> >> > > > > >
> >> >> > > > > > I've tried "gluster
volume heal callrec" but it
> doesn't seem to
> >> >> > > have
> >> >> > > > > > helped.
> >> >> > > > > >
> >> >> > > > > > What shall I do next?
> >> >> > > > > >
> >> >> > > > > > I've pasted some stuff below
in case any of it helps.
> >> >> > > > > >
> >> >> > > > > > Cheers,
> >> >> > > > > > Kingsley.
> >> >> > > > > >
> >> >> > > > > > [root at gluster1b-1 ~]# gluster
volume info callrec
> >> >> > > > > >
> >> >> > > > > > Volume Name: callrec
> >> >> > > > > > Type: Replicate
> >> >> > > > > > Volume ID:
a39830b7-eddb-4061-b381-39411274131a
> >> >> > > > > > Status: Started
> >> >> > > > > > Number of Bricks: 1 x 4 = 4
> >> >> > > > > > Transport-type: tcp
> >> >> > > > > > Bricks:
> >> >> > > > > > Brick1:
gluster1a-1:/data/brick/callrec
> >> >> > > > > > Brick2:
gluster1b-1:/data/brick/callrec
> >> >> > > > > > Brick3:
gluster2a-1:/data/brick/callrec
> >> >> > > > > > Brick4:
gluster2b-1:/data/brick/callrec
> >> >> > > > > > Options Reconfigured:
> >> >> > > > > > performance.flush-behind: off
> >> >> > > > > > [root at gluster1b-1 ~]#
> >> >> > > > > >
> >> >> > > > > >
> >> >> > > > > > [root at gluster1b-1 ~]# gluster
volume status callrec
> >> >> > > > > > Status of volume: callrec
> >> >> > > > > > Gluster process
>    Port
> >> >> > > Online  Pid
> >> >> > > > > >
> >> >> > >
>
------------------------------------------------------------------------------
> >> >> > > > > > Brick
gluster1a-1:/data/brick/callrec
>    49153
> >> >> > >  Y       6803
> >> >> > > > > > Brick
gluster1b-1:/data/brick/callrec
>    49153
> >> >> > >  Y       2614
> >> >> > > > > > Brick
gluster2a-1:/data/brick/callrec
>    49153
> >> >> > >  Y       2645
> >> >> > > > > > Brick
gluster2b-1:/data/brick/callrec
>    49153
> >> >> > >  Y       4325
> >> >> > > > > > NFS Server on localhost
>    2049
> >> >> > > Y       2769
> >> >> > > > > > Self-heal Daemon on localhost
>    N/A
> >> >> > >  Y       2789
> >> >> > > > > > NFS Server on gluster2a-1
>    2049
> >> >> > > Y       2857
> >> >> > > > > > Self-heal Daemon on gluster2a-1
>    N/A
> >> >> > >  Y       2814
> >> >> > > > > > NFS Server on 88.151.41.100
>    2049
> >> >> > > Y       6833
> >> >> > > > > > Self-heal Daemon on
88.151.41.100
>    N/A
> >> >> > >  Y       6824
> >> >> > > > > > NFS Server on gluster2b-1
>    2049
> >> >> > > Y       4428
> >> >> > > > > > Self-heal Daemon on gluster2b-1
>    N/A
> >> >> > >  Y       4387
> >> >> > > > > >
> >> >> > > > > > Task Status of Volume callrec
> >> >> > > > > >
> >> >> > >
>
------------------------------------------------------------------------------
> >> >> > > > > > There are no active volume tasks
> >> >> > > > > >
> >> >> > > > > > [root at gluster1b-1 ~]#
> >> >> > > > > >
> >> >> > > > > >
> >> >> > > > > > [root at gluster1b-1 ~]# gluster
volume heal callrec info
> >> >> > > > > > Brick
gluster1a-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > > > > /to_process - Possibly
undergoing heal
> >> >> > > > > >
> >> >> > > > > > Number of entries: 1
> >> >> > > > > >
> >> >> > > > > > Brick
gluster1b-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > > > > Number of entries: 0
> >> >> > > > > >
> >> >> > > > > > Brick
gluster2a-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > > > > /to_process - Possibly
undergoing heal
> >> >> > > > > >
> >> >> > > > > > Number of entries: 1
> >> >> > > > > >
> >> >> > > > > > Brick
gluster2b-1.dns99.co.uk:/data/brick/callrec/
> >> >> > > > > > Number of entries: 0
> >> >> > > > > >
> >> >> > > > > > [root at gluster1b-1 ~]#
> >> >> > > > > >
> >> >> > > > > >
> >> >> > > > > >
_______________________________________________
> >> >> > > > > > Gluster-users mailing list
> >> >> > > > > > Gluster-users at gluster.org
> >> >> > > > > >
http://www.gluster.org/mailman/listinfo/gluster-users
> >> >> > > > > >
> >> >> > > > >
> >> >> > > > >
_______________________________________________
> >> >> > > > > Gluster-users mailing list
> >> >> > > > > Gluster-users at gluster.org
> >> >> > > > >
http://www.gluster.org/mailman/listinfo/gluster-users
> >> >> > > > >
> >> >> > > >
> >> >> > > >
_______________________________________________
> >> >> > > > Gluster-users mailing list
> >> >> > > > Gluster-users at gluster.org
> >> >> > > >
http://www.gluster.org/mailman/listinfo/gluster-users
> >> >> > >
> >> >> > >
> >> >> >
> >> >>
> >> >>
> >>
> >>
> >>
> >> _______________________________________________
> >> Gluster-users mailing list
> >> Gluster-users at gluster.org
> >> http://www.gluster.org/mailman/listinfo/gluster-users
> 
> 
> 
> _______________________________________________
> Gluster-users mailing list
> Gluster-users at gluster.org
> http://www.gluster.org/mailman/listinfo/gluster-users-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://www.gluster.org/pipermail/gluster-users/attachments/20150810/bc6c0fd0/attachment.html>

Gluster users - Aug 2015 - volume not working after yum update - gluster 3.6.3

[Gluster-users] volume not working after yum update - gluster 3.6.3

[Gluster-users] volume not working after yum update - gluster 3.6.3