Has anyone seen anything remotely like this before?
The problem occurs when I create a new DomU with the following setup:
kernel = ''/boot/vmlinuz-2.6.16.33-xenU-domU''
memory = 384
name = ''some_name''
vif = [ ''bridge=xenbr0'' ]
disk = [
''phy:/dev/working/lvm/device,sda1,w'',
''phy:/dev/working/lvm/device2,sda2,w'',
# ''phy:/dev/etherd/e2.5,sdc,w'',
# ''phy:/dev/etherd/e2.6,sdd,w'',
]
root = ''/dev/sda1 ro''
vcpus = 1
/dev/etherd are Coraid ATA Over Ethernet devices (model SR1521) and
without these devices it boots fine.
As soon as I boot it, it appears to boot fine, but two xvd processes
immediately appear and spike to 100% processor usage as shown in this
"top" screenshot:
top - 02:37:14 up 9 days, 1:17, 1 user, load average: 10.86, 8.05,
4.04
Tasks: 174 total, 10 running, 164 sleeping, 0 stopped, 0 zombie
Cpu(s): 0.0% us, 49.4% sy, 0.0% ni, 50.5% id, 0.0% wa, 0.0% hi,
0.2% si
Mem: 524288k total, 447200k used, 77088k free, 56028k buffers
Swap: 0k total, 0k used, 0k free, 140492k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
31832 root 11 -5 0 0 0 R 97 0.0 7:04.73 xvd 40
98:260
31833 root 11 -5 0 0 0 R 95 0.0 7:04.74 xvd 40
98:250
5406 root 16 0 21616 920 680 S 1 0.2 15:08.71 collectd
7295 root 10 -5 0 0 0 S 0 0.0 4:57.45 xvd 2 fe:16b
1 root 16 0 2612 564 480 S 0 0.1 0:04.63 init
2 root RT 0 0 0 0 S 0 0.0 0:00.84
migration/0
3 root 34 19 0 0 0 S 0 0.0 0:27.89
ksoftirqd/0
4 root RT 0 0 0 0 S 0 0.0 0:00.04 watchdog/0
5 root RT 0 0 0 0 R 0 0.0 0:00.59
migration/1
6 root 34 19 0 0 0 S 0 0.0 0:00.01
ksoftirqd/1
7 root RT 0 0 0 0 R 0 0.0 0:00.00 watchdog/1
8 root RT 0 0 0 0 R 0 0.0 0:01.14
migration/2
9 root 34 19 0 0 0 S 0 0.0 0:00.02
ksoftirqd/2
10 root RT 0 0 0 0 R 0 0.0 0:00.01 watchdog/2
11 root RT 0 0 0 0 S 0 0.0 0:00.67
migration/3
12 root 34 19 0 0 0 S 0 0.0 0:00.00
ksoftirqd/3
13 root RT 0 0 0 0 S 0 0.0 0:00.00 watchdog/3
If I look at the dmesg log, it appears that for each block device I
create, I get the following kernel BUGs:
[781896.696277] BUG: soft lockup detected on CPU#1!
[781896.696286] CPU 1:
[781896.696289] Modules linked in: raid1 md_mod dlm cman netconsole
dummy dm_mod aoe e1000 tg3
[781896.696299] Pid: 31833, comm: xvd 40 98:250 Tainted: GF
2.6.16.33-xen0 #1
[781896.696303] RIP: e030:[<ffffffff8013d3a2>] <ffffffff8013d3a2>
{lock_timer_base+34}
[781896.696313] RSP: e02b:ffff8800093d9d88 EFLAGS: 00000246
[781896.696316] RAX: 0000000000000001 RBX: 0000000000000000 RCX:
0000000000000000
[781896.696321] RDX: ffffffffff578000 RSI: ffff8800093d9dc0 RDI:
ffff880015ae6190
[781896.696325] RBP: ffff8800093d9da8 R08: 0000000000240004 R09:
0000000000000000
[781896.696329] R10: 0000000000000001 R11: ffff88001f442cc0 R12:
ffff880015ae6190
[781896.696334] R13: ffff8800093d9dc0 R14: ffff880014a09dd0 R15:
ffff8800093d9e68
[781896.696341] FS: 00002ab3cc642080(0000) GS:ffffffff804f3080(0000)
knlGS:0000000000000000
[781896.696344] CS: e033 DS: 0000 ES: 0000
[781896.696346]
[781896.696347] Call Trace: <ffffffff8013d3fd>{try_to_del_timer_sync+29}
[781896.696355] <ffffffff8013d468>{del_timer_sync+24}
<ffffffff8025e980>{blk_sync_queue+16}
[781896.696365] <ffffffff8025e9c2>{blk_cleanup_queue+50}
<ffffffff802e96c2>{unplug_queue+50}
[781896.696375] <ffffffff802ea0f8>{blkif_schedule+888}
<ffffffff802e9d80>{blkif_schedule+0}
[781896.696384] <ffffffff8014931a>{kthread+218}
<ffffffff8012ddcd>{schedule_tail+77}
[781896.696394] <ffffffff8010c16a>{child_rip+8}
<ffffffff80149240>{kthread+0}
[781896.696403] <ffffffff8010c162>{child_rip+0}
[781897.148121] xenbr0: topology change detected, sending tcn bpdu
[781897.148145] xenbr0: port 31(vif40.0) entering forwarding state
[781897.532118] BUG: soft lockup detected on CPU#2!
[781897.532132] CPU 2:
[781897.532135] Modules linked in: raid1 md_mod dlm cman netconsole
dummy dm_mod aoe e1000 tg3
[781897.532148] Pid: 31832, comm: xvd 40 98:260 Tainted: GF
2.6.16.33-xen0 #1
[781897.532152] RIP: e030:[<ffffffff8013d3c4>] <ffffffff8013d3c4>
{lock_timer_base+68}
[781897.532164] RSP: e02b:ffff88000f28dd88 EFLAGS: 00000246
[781897.532167] RAX: 0000000000000001 RBX: 0000000000000000 RCX:
0000000000000000
[781897.532172] RDX: ffffffffff578000 RSI: ffff88000f28ddc0 RDI:
ffff880006f5b190
[781897.532176] RBP: ffff88000f28dda8 R08: 0000000002000000 R09:
0000000000000000
[781897.532180] R10: 0000000000000001 R11: ffff88001f442cc0 R12:
ffff880006f5b190
[781897.532185] R13: ffff88000f28ddc0 R14: ffff88001c628e80 R15:
ffff88000f28de68
[781897.532192] FS: 00002b04f44c7080(0000) GS:ffffffff804f3100(0000)
knlGS:0000000000000000
[781897.532195] CS: e033 DS: 0000 ES: 0000
[781897.532197]
[781897.532198] Call Trace: <ffffffff8013d3fd>{try_to_del_timer_sync+29}
[781897.532207] <ffffffff8013d468>{del_timer_sync+24}
<ffffffff8025e980>{blk_sync_queue+16}
[781897.532217] <ffffffff8025e9c2>{blk_cleanup_queue+50}
<ffffffff802e96c2>{unplug_queue+50}
[781897.532228] <ffffffff802ea0f8>{blkif_schedule+888}
<ffffffff802e9d80>{blkif_schedule+0}
[781897.532237] <ffffffff8014931a>{kthread+218}
<ffffffff8012ddcd>{schedule_tail+77}
[781897.532248] <ffffffff8010c16a>{child_rip+8}
<ffffffff80149240>{kthread+0}
[781897.532257] <ffffffff8010c162>{child_rip+0}
I currently can work around this by making a simple dm table that
maps them one-to-one to a new device. Obviously tedious, but better
than not working.
Does anyone have any idea what is behind this?
--
Jayson Vantuyl
Systems Architect
Engine Yard
jvantuyl@engineyard.com
_______________________________________________
Xen-users mailing list
Xen-users@lists.xensource.com
http://lists.xensource.com/xen-users
Hate to reply to myself, but I meant:> kernel = ''/boot/vmlinuz-2.6.16.33-xenU-domU'' > memory = 384 > name = ''some_name'' > vif = [ ''bridge=xenbr0'' ] > disk = [ > ''phy:/dev/working/lvm/device,sda1,w'', > ''phy:/dev/working/lvm/device2,sda2,w'', > ''phy:/dev/etherd/e2.5,sdc,w'', > ''phy:/dev/etherd/e2.6,sdd,w'', > ] > root = ''/dev/sda1 ro'' > vcpus = 1Not:> kernel = ''/boot/vmlinuz-2.6.16.33-xenU-domU'' > memory = 384 > name = ''some_name'' > vif = [ ''bridge=xenbr0'' ] > disk = [ > ''phy:/dev/working/lvm/device,sda1,w'', > ''phy:/dev/working/lvm/device2,sda2,w'', > # ''phy:/dev/etherd/e2.5,sdc,w'', > # ''phy:/dev/etherd/e2.6,sdd,w'', > ] > root = ''/dev/sda1 ro'' > vcpus = 1-- Jayson Vantuyl Systems Architect Engine Yard jvantuyl@engineyard.com _______________________________________________ Xen-users mailing list Xen-users@lists.xensource.com http://lists.xensource.com/xen-users
> Has anyone seen anything remotely like this before?This looks... weird. I can''t say I have any ideas, but I''d recommend you bring this to xen-devel@lists.xensource.com if you don''t find a solution on this list. Cheers, Mark> The problem occurs when I create a new DomU with the following setup: > > kernel = ''/boot/vmlinuz-2.6.16.33-xenU-domU'' > memory = 384 > name = ''some_name'' > vif = [ ''bridge=xenbr0'' ] > disk = [ > ''phy:/dev/working/lvm/device,sda1,w'', > ''phy:/dev/working/lvm/device2,sda2,w'', > # ''phy:/dev/etherd/e2.5,sdc,w'', > # ''phy:/dev/etherd/e2.6,sdd,w'', > ] > root = ''/dev/sda1 ro'' > vcpus = 1 > > /dev/etherd are Coraid ATA Over Ethernet devices (model SR1521) and > without these devices it boots fine. > > As soon as I boot it, it appears to boot fine, but two xvd processes > immediately appear and spike to 100% processor usage as shown in this > "top" screenshot: > > top - 02:37:14 up 9 days, 1:17, 1 user, load average: 10.86, 8.05, > 4.04 > Tasks: 174 total, 10 running, 164 sleeping, 0 stopped, 0 zombie > Cpu(s): 0.0% us, 49.4% sy, 0.0% ni, 50.5% id, 0.0% wa, 0.0% hi, > 0.2% si > Mem: 524288k total, 447200k used, 77088k free, 56028k buffers > Swap: 0k total, 0k used, 0k free, 140492k cached > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 31832 root 11 -5 0 0 0 R 97 0.0 7:04.73 xvd 40 > 98:260 > 31833 root 11 -5 0 0 0 R 95 0.0 7:04.74 xvd 40 > 98:250 > 5406 root 16 0 21616 920 680 S 1 0.2 15:08.71 collectd > 7295 root 10 -5 0 0 0 S 0 0.0 4:57.45 xvd 2 fe:16b > 1 root 16 0 2612 564 480 S 0 0.1 0:04.63 init > 2 root RT 0 0 0 0 S 0 0.0 0:00.84 > migration/0 > 3 root 34 19 0 0 0 S 0 0.0 0:27.89 > ksoftirqd/0 > 4 root RT 0 0 0 0 S 0 0.0 0:00.04 watchdog/0 > 5 root RT 0 0 0 0 R 0 0.0 0:00.59 > migration/1 > 6 root 34 19 0 0 0 S 0 0.0 0:00.01 > ksoftirqd/1 > 7 root RT 0 0 0 0 R 0 0.0 0:00.00 watchdog/1 > 8 root RT 0 0 0 0 R 0 0.0 0:01.14 > migration/2 > 9 root 34 19 0 0 0 S 0 0.0 0:00.02 > ksoftirqd/2 > 10 root RT 0 0 0 0 R 0 0.0 0:00.01 watchdog/2 > 11 root RT 0 0 0 0 S 0 0.0 0:00.67 > migration/3 > 12 root 34 19 0 0 0 S 0 0.0 0:00.00 > ksoftirqd/3 > 13 root RT 0 0 0 0 S 0 0.0 0:00.00 watchdog/3 > > If I look at the dmesg log, it appears that for each block device I > create, I get the following kernel BUGs: > > [781896.696277] BUG: soft lockup detected on CPU#1! > [781896.696286] CPU 1: > [781896.696289] Modules linked in: raid1 md_mod dlm cman netconsole > dummy dm_mod aoe e1000 tg3 > [781896.696299] Pid: 31833, comm: xvd 40 98:250 Tainted: GF > 2.6.16.33-xen0 #1 > [781896.696303] RIP: e030:[<ffffffff8013d3a2>] <ffffffff8013d3a2> > {lock_timer_base+34} > [781896.696313] RSP: e02b:ffff8800093d9d88 EFLAGS: 00000246 > [781896.696316] RAX: 0000000000000001 RBX: 0000000000000000 RCX: > 0000000000000000 > [781896.696321] RDX: ffffffffff578000 RSI: ffff8800093d9dc0 RDI: > ffff880015ae6190 > [781896.696325] RBP: ffff8800093d9da8 R08: 0000000000240004 R09: > 0000000000000000 > [781896.696329] R10: 0000000000000001 R11: ffff88001f442cc0 R12: > ffff880015ae6190 > [781896.696334] R13: ffff8800093d9dc0 R14: ffff880014a09dd0 R15: > ffff8800093d9e68 > [781896.696341] FS: 00002ab3cc642080(0000) GS:ffffffff804f3080(0000) > knlGS:0000000000000000 > [781896.696344] CS: e033 DS: 0000 ES: 0000 > [781896.696346] > [781896.696347] Call Trace: <ffffffff8013d3fd>{try_to_del_timer_sync+29} > [781896.696355] <ffffffff8013d468>{del_timer_sync+24} > <ffffffff8025e980>{blk_sync_queue+16} > [781896.696365] <ffffffff8025e9c2>{blk_cleanup_queue+50} > <ffffffff802e96c2>{unplug_queue+50} > [781896.696375] <ffffffff802ea0f8>{blkif_schedule+888} > <ffffffff802e9d80>{blkif_schedule+0} > [781896.696384] <ffffffff8014931a>{kthread+218} > <ffffffff8012ddcd>{schedule_tail+77} > [781896.696394] <ffffffff8010c16a>{child_rip+8} > <ffffffff80149240>{kthread+0} > [781896.696403] <ffffffff8010c162>{child_rip+0} > [781897.148121] xenbr0: topology change detected, sending tcn bpdu > [781897.148145] xenbr0: port 31(vif40.0) entering forwarding state > [781897.532118] BUG: soft lockup detected on CPU#2! > [781897.532132] CPU 2: > [781897.532135] Modules linked in: raid1 md_mod dlm cman netconsole > dummy dm_mod aoe e1000 tg3 > [781897.532148] Pid: 31832, comm: xvd 40 98:260 Tainted: GF > 2.6.16.33-xen0 #1 > [781897.532152] RIP: e030:[<ffffffff8013d3c4>] <ffffffff8013d3c4> > {lock_timer_base+68} > [781897.532164] RSP: e02b:ffff88000f28dd88 EFLAGS: 00000246 > [781897.532167] RAX: 0000000000000001 RBX: 0000000000000000 RCX: > 0000000000000000 > [781897.532172] RDX: ffffffffff578000 RSI: ffff88000f28ddc0 RDI: > ffff880006f5b190 > [781897.532176] RBP: ffff88000f28dda8 R08: 0000000002000000 R09: > 0000000000000000 > [781897.532180] R10: 0000000000000001 R11: ffff88001f442cc0 R12: > ffff880006f5b190 > [781897.532185] R13: ffff88000f28ddc0 R14: ffff88001c628e80 R15: > ffff88000f28de68 > [781897.532192] FS: 00002b04f44c7080(0000) GS:ffffffff804f3100(0000) > knlGS:0000000000000000 > [781897.532195] CS: e033 DS: 0000 ES: 0000 > [781897.532197] > [781897.532198] Call Trace: <ffffffff8013d3fd>{try_to_del_timer_sync+29} > [781897.532207] <ffffffff8013d468>{del_timer_sync+24} > <ffffffff8025e980>{blk_sync_queue+16} > [781897.532217] <ffffffff8025e9c2>{blk_cleanup_queue+50} > <ffffffff802e96c2>{unplug_queue+50} > [781897.532228] <ffffffff802ea0f8>{blkif_schedule+888} > <ffffffff802e9d80>{blkif_schedule+0} > [781897.532237] <ffffffff8014931a>{kthread+218} > <ffffffff8012ddcd>{schedule_tail+77} > [781897.532248] <ffffffff8010c16a>{child_rip+8} > <ffffffff80149240>{kthread+0} > [781897.532257] <ffffffff8010c162>{child_rip+0} > > I currently can work around this by making a simple dm table that > maps them one-to-one to a new device. Obviously tedious, but better > than not working. > > Does anyone have any idea what is behind this?-- Dave: Just a question. What use is a unicyle with no seat? And no pedals! Mark: To answer a question with a question: What use is a skateboard? Dave: Skateboards have wheels. Mark: My wheel has a wheel! _______________________________________________ Xen-users mailing list Xen-users@lists.xensource.com http://lists.xensource.com/xen-users