Hi, I upgraded our test lustre file system to the latest 1.6.5.1 version available from the SUN website. I have one OSS with one OST and one MDS with combined MGS and MDT Both servers are running RHEL4 x86_64 and 2.6.9-67.0.7.EL_lustre.1.6.5.1smp kernel, the interconnect is infiniband and I am using ib modules provided with lustre. When I mount filesystem and then start writing to it OSS crashes with kernel panic, see log below: Lustre: 0:0:(watchdog.c:130:lcw_cb()) Watchdog triggered for pid 17398: it was inactive for 200s Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack for process 17397 ll_ost_io_92 D 0000000000000002 0 17397 1 17398 17396 (L-TLB) 00000101156bf538 0000000000000046 00000101956bf616 ffffffff801ece0f 0000000000000000 ffffff0010776340 000001010e14c6c0 0000000100000001 0000010113f90030 000000000012b585 Call Trace:ll_ost_io_82 D 000001012ab79400 0 17387 1 17388 17386 (L-TLB) 000001011c252d88 0000000000000046 ffffffffa000288c 0000010115b213c0 0000000000000246 00000100cf851c00 000001012bafa940 0000000200000000 000001010f71f030 0000000000000814 Call Trace:<ffffffffa000288c>{:scsi_mod:scsi_done+0} <ffffffff801ece0f>{vsnprintf+1406} <ffffffff8024f658>{elv_next_request+238} <ffffffffa0007df8>{:scsi_mod:scsi_request_fn+1100} <ffffffff8030cc1f>{__down+147} <ffffffff80133804>{default_wake_function+0} <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} <ffffffff8030e2f6>{io_schedule+38} <ffffffff80179e24>{__wait_on_buffer+125} <ffffffff80179caa>{bh_wake_function+0} <ffffffff80179caa>{bh_wake_function+0} <ffffffffa07cad2b>{:ldiskfs:ldiskfs_mb_init_cache+635} <ffffffff8030e73d>{__down_failed+53} <ffffffffa06c6670>{:lquota:filter_quota_check+0} <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffff80158b8e>{add_to_page_cache+167} <ffffffffa07cb450>{:ldiskfs:ldiskfs_mb_load_buddy+304} <ffffffffa07cc7b4>{:ldiskfs:ldiskfs_mb_regular_allocator+1028} <4>Lustre: 0:0:(watchdog.c:130:lcw_cb()) Watchdog triggered for pid 17388: it was inactive for 200s Lustre: 0:0:(watchdog.c:130:lcw_cb()) Skipped 2 previous similar messages Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack for process 17388 Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) Skipped 1 previous similar message ll_ost_io_83 D 0000000000000002 0 17388 1 17389 17387 (L-TLB) 000001011c35f538 0000000000000046 000001019c35f616 ffffffff801ece0f 0000000000000000 ffffff0010776508 000001010e14c6c0 0000000000000001 000001011c336800 00000000000de7ad Call Trace:<ffffffff801351dc>{autoremove_wake_function+0} <ffffffffa07d10fd>{:ldiskfs:ldiskfs_mb_new_blocks+333} <ffffffff801351dc>{autoremove_wake_function+0} <ffffffffa0814eb4>{:fsfilt_ldiskfs:ldiskfs_ext_new_extent_cb+884} <ffffffffa028e06a>{:ib_ipath:ipath_verbs_send+1883} <ffffffffa07c6d7f>{:ldiskfs:ldiskfs_ext_find_extent+255} <ffffffffa07c8972>{:ldiskfs:ldiskfs_ext_walk_space+482} <ffffffffa0814b40>{:fsfilt_ldiskfs:ldiskfs_ext_new_extent_cb+0} <ffffffffa0815343>{:fsfilt_ldiskfs:fsfilt_map_nblocks+307} <ffffffffa028560f>{:ib_ipath:ipath_do_send+1852} <ffffffff8013f734>{__mod_timer+293} <ffffffffa08155bb>{:fsfilt_ldiskfs:fsfilt_ldiskfs_map_ext_inode_pages+539} <ffffffff801ece0f>{vsnprintf+1406} <ffffffff801ece0f>{vsnprintf+1406} <ffffffffa067cb38>{:ko2iblnd:kiblnd_check_sends+2040} <ffffffffa08410b4>{:obdfilter:filter_direct_io+1108} <ffffffffa01ba524>{:jbd:journal_start+223} <ffffffffa081384b>{:fsfilt_ldiskfs:fsfilt_ldiskfs_brw_start+763} <ffffffffa0842c1d>{:obdfilter:filter_commitrw_write+4957} <ffffffffa04f5539>{:lvfs:pop_ctxt+505} <ffffffff8030e4ce>{schedule_timeout+411} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffffa07f87ee>{:ost:ost_checksum_bulk+558} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffff80133804>{default_wake_function+0} <ffffffff80133804>{default_wake_function+0} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffff8015c830>{__rmqueue+218} <ffffffff8015c830>{__rmqueue+218} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffff80133855>{__wake_up_common+67} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff8013f734>{__mod_timer+293} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80110ddb>{child_rip+0} <ffffffff8030cc1f>{__down+147} <ffffffff80133804>{default_wake_function+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80110ddb>{child_rip+0} ll_ost_io_84 D 0000000000000002 0 17389 1 17390 17388 (L-TLB) 000001011ac6d538 0000000000000046 000001019ac6d616 ffffffff801ece0f 0000000000000000 ffffff0010776340 000001010e14c6c0 0000000100000001 000001011c336030 000000000012c37b Call Trace:<ffffffff80133804>{default_wake_function+0} <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} <ffffffff801ece0f>{vsnprintf+1406} <ffffffff8030e73d>{__down_failed+53} <ffffffffa06c6670>{:lquota:filter_quota_check+0} <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} <ffffffff80131bc7>{recalc_task_prio+337} <ffffffffa05160a0>{:lnet:lnet_send+2544} <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffff8030cc1f>{__down+147} <ffffffff80133804>{default_wake_function+0} <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} <ffffffff8030e4ce>{schedule_timeout+411} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffff80133804>{default_wake_function+0} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} <ffffffff8015c830>{__rmqueue+218} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffff8030e73d>{__down_failed+53} <ffffffffa06c6670>{:lquota:filter_quota_check+0} <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} <ffffffff80133855>{__wake_up_common+67} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80131bc7>{recalc_task_prio+337} <ffffffff80110ddb>{child_rip+0} ll_ost_io_91 D<ffffffffa05160a0>{:lnet:lnet_send+2544} 0000000000000002 0 17396 1 17397 17395 (L-TLB) 0000010113fb3538 0000000000000046 0000010193fb3616 ffffffff801ece0f 0000000000000000 ffffff0010776638 000001010e14c6c0 0000000000000001 0000010113f90800 00000000000d4d86 Call Trace:<ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffff801ece0f>{vsnprintf+1406} <ffffffff8030e4ce>{schedule_timeout+411} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffff80133804>{default_wake_function+0} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffffa0516ad7>{:lnet:lnet_match_blocked_msg+807} <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffff80133855>{__wake_up_common+67} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80110ddb>{child_rip+0} ll_ost_io_107 D 0000000000000002 0 17412 1 17413 17411 (L-TLB) 000001010ea6f538 0000000000000046 000001018ea6f616 ffffffff801ece0f 0000000000000000 ffffff0010776768 000001010e14c6c0 0000000100000001 000001010ea48800 000000000012d2f5 Call Trace:<ffffffff8030cc1f>{__down+147} <ffffffff80133804>{default_wake_function+0} <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} <ffffffff801ece0f>{vsnprintf+1406} <ffffffff8030e73d>{__down_failed+53} <ffffffffa06c6670>{:lquota:filter_quota_check+0} <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffff8030cc1f>{__down+147} <ffffffff80133804>{default_wake_function+0} <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} <ffffffff8030e4ce>{schedule_timeout+411} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffff80133804>{default_wake_function+0} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffff8015c830>{__rmqueue+218} <ffffffff8030e73d>{__down_failed+53} <ffffffffa06c6670>{:lquota:filter_quota_check+0} <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffff80131bc7>{recalc_task_prio+337} <ffffffffa05160a0>{:lnet:lnet_send+2544} <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffff8013f734>{__mod_timer+293} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffff80133804>{default_wake_function+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80110ddb>{child_rip+0} <ffffffff8030e4ce>{schedule_timeout+411} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffff80133804>{default_wake_function+0} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffff8013f734>{__mod_timer+293} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffff80133804>{default_wake_function+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80110ddb>{child_rip+0} ll_ost_io_121 D 0000000000000002 0 17426 1 17427 17425 (L-TLB) 0000010110113538 0000000000000046 0000010190113616 ffffffff801ece0f 0000000000000000 ffffff00107763d8 000001010e14c6c0 0000000100000001 00000101100e6800 0000000000113501 Call Trace:<ffffffff801ece0f>{vsnprintf+1406} <ffffffff8030cc1f>{__down+147} <ffffffff80133804>{default_wake_function+0} <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} <ffffffff8030e73d>{__down_failed+53} <ffffffffa06c6670>{:lquota:filter_quota_check+0} <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffff8030e4ce>{schedule_timeout+411} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffff80133804>{default_wake_function+0} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffff8015c830>{__rmqueue+218} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffff8013f734>{__mod_timer+293} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffff80133804>{default_wake_function+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80110ddb>{child_rip+0} Lustre: 0:0:(watchdog.c:130:lcw_cb()) Skipped 4 previous similar messages Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack for process 17398 Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) Skipped 4 previous similar messages ll_ost_io_93 D 0000000000000002 0 17398 1 17399 17397 (L-TLB) 00000101157cd538 0000000000000046 00000101957cd616 ffffffff00000073 0000000000000000 0000000010776508 0000010001053a20 0000000200000001 00000101157a7800 0000000000161205 Call Trace:<ffffffff8030ec6c>{.text.lock.spinlock+2} <ffffffff8030cc1f>{__down+147} <ffffffff80133804>{default_wake_function+0} <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} <ffffffff8030e73d>{__down_failed+53} <ffffffffa06c6670>{:lquota:filter_quota_check+0} <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} <ffffffffa04f5539>{:lvfs:pop_ctxt+505} <ffffffff80131bc7>{recalc_task_prio+337} <ffffffffa05160a0>{:lnet:lnet_send+2544} <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} <ffffffffa07f8801>{:ost:ost_checksum_bulk+577} <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} <ffffffffa07fddd1>{:ost:ost_brw_write+9505} <ffffffff8017a62e>{end_buffer_async_read+0} <ffffffff80133804>{default_wake_function+0} <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} <ffffffffa080364d>{:ost:ost_handle+11661} <ffffffff8017a62e>{end_buffer_async_read+0} <ffffffff8017a62e>{end_buffer_async_read+0} <ffffffff8017a62e>{end_buffer_async_read+0} <ffffffff8017a62e>{end_buffer_async_read+0} <ffffffff8015c830>{__rmqueue+218} <ffffffff8015c830>{__rmqueue+218} <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} <ffffffffa04df45e>{:libcfs:lcw_update_time+30} <ffffffff8013f734>{__mod_timer+293} <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} <ffffffff8017a62e>{end_buffer_async_read+0} <ffffffff80133804>{default_wake_function+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} <ffffffff80110ddb>{child_rip+0} LustreError: 17397:0:(filter_io_26.c:700:filter_commitrw_write()) testfs-OST0001: slow i_mutex 223s LustreError: 17398:0:(filter_io_26.c:700:filter_commitrw_write()) testfs-OST0001: slow i_mutex 223s LustreError: 17387:0:(filter_io_26.c:765:filter_commitrw_write()) testfs-OST0001: slow direct_io 223s Lustre: 17397:0:(watchdog.c:312:lcw_update_time()) Expired watchdog for pid 17397 disabled after 223.2918s Lustre: 17387:0:(watchdog.c:312:lcw_update_time()) Expired watchdog for pid 17387 disabled after 223.3108s Lustre: 17387:0:(watchdog.c:312:lcw_update_time()) Skipped 5 previous similar messages slab: cache size-1620 error: slabs_full accounting error slab: cache size-1620 error: slabs_full accounting error slab: cache size-1620 error: slabs_full accounting error Unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: <ffffffff801623c4>{s_show+62} PML4 112a23067 PGD 114d4d067 PMD 0 Oops: 0000 [1] SMP CPU 1 Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) sd_mod(U) scsi_mod(U) Pid: 15569, comm: collectl Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp RIP: 0010:[<ffffffff801623c4>] <ffffffff801623c4>{s_show+62} RSP: 0018:0000010115823e68 EFLAGS: 00010012 RAX: ffffffff80329f7a RBX: 00000100cffa5580 RCX: 00000100cffa5501 RDX: 0000000000000004 RSI: 0000000000000000 RDI: 00000100cffa56e8 RBP: ffffffff80329f7a R08: 00000000fffffffd R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000001000 R14: 000001012b617b80 R15: 0000000000000020 FS: 0000002a9630ee80(0000) GS:ffffffff8048e780(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000012bd38000 CR4: 00000000000006e0 Process collectl (pid: 15569, threadinfo 0000010115822000, task 00000101280a4800) Stack: 0000000000000000 0000000000000000 0000000000000008 00000100cffa5580 000001012b617b80 0000000000000000 0000000000001000 0000000000000ea2 0000000000000000 ffffffff80196c1a Call Trace:<ffffffff80196c1a>{seq_read+445} <ffffffff80178c28>{vfs_read+207} <ffffffff80178e84>{sys_read+69} <ffffffff8011022a>{system_call+126} Code: 48 8b 06 0f 18 08 48 8d 83 18 01 00 00 48 39 c6 74 2e 8b 93 RIP <ffffffff801623c4>{s_show+62} RSP <0000010115823e68> CR2: 0000000000000000 <0>Kernel panic - not syncing: Oops NMI Watchdog detected LOCKUP, CPU=2, registers: CPU 2 Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) sd_mod(U) scsi_mod(U) Pid: 12646, comm: klogd Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp RIP: 0010:[<ffffffff8030ec6c>] <ffffffff8030ec6c>{.text.lock.spinlock+2} RSP: 0018:000001012a0a7b88 EFLAGS: 00000086 RAX: 0000000000000010 RBX: 00000100cffa56e8 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 00000000000004d0 RDI: 00000100cffa56e8 RBP: 000001012bc1e0c0 R08: 000001012a0a7cf0 R09: 6d5f697363732029 R10: 0000000000000053 R11: 0000000000000246 R12: 00000100cffa5688 R13: 00000100cffa5580 R14: 00000000000004d0 R15: 00000000000003e6 FS: 0000002a958a5b00(0000) GS:ffffffff8048e800(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000007fbfffa868 CR3: 000000012bd6e000 CR4: 00000000000006e0 Process klogd (pid: 12646, threadinfo 000001012a0a6000, task 00000101299e2030) Stack: 000000000000000c ffffffff80160901 00000100cffa5580 00000100cffa5580 00000000000004d0 0000000000000400 0000000000000400 0000000000000000 00000000000003e6 ffffffff801607d0 Call Trace:<ffffffff80160901>{cache_alloc_refill+96} <ffffffff801607d0>{__kmalloc+123} <ffffffff802adc58>{alloc_skb+65} <ffffffff802ac770>{sock_alloc_send_pskb+135} <ffffffff80133855>{__wake_up_common+67} <ffffffff801338ab>{__wake_up+54} <ffffffff8030899a>{unix_dgram_sendmsg+364} <ffffffff802aa430>{sock_aio_write+306} <ffffffff80178d0f>{do_sync_write+178} <ffffffff80137822>{do_syslog+482} <ffffffff801351dc>{autoremove_wake_function+0} <ffffffff801351dc>{autoremove_wake_function+0} <ffffffff801351dc>{autoremove_wake_function+0} <ffffffff80193ed0>{dnotify_parent+34} <ffffffff80178e1d>{vfs_write+226} <ffffffff80178ef2>{sys_write+69} <ffffffff8011022a>{system_call+126} Code: 83 3b 00 7e f9 e9 60 fc ff ff f3 90 83 3b 00 7e f9 e9 ce fc Kernel panic - not syncing: nmi watchdog <1>Unable to handle kernel NULL pointer dereference at 00000000000000ff RIP: [<00000000000000ff>] PML4 11b4a6067 PGD 0 Oops: 0010 [2] SMP CPU 2 Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) sd_mod(U) scsi_mod(U) Pid: 12646, comm: klogd Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp RIP: 0010:[<00000000000000ff>] [<00000000000000ff>] RSP: 0018:00000100cfb03fa0 EFLAGS: 00010006 RAX: 000001012a0a7fd8 RBX: 0000000000000000 RCX: 0000000000000002 RDX: 00000000000000ff RSI: 0000000000000000 RDI: 0000000000000002 RBP: 000001012bd71f58 R08: 0000000000000020 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 00000100cffa5688 R13: 00000100cffa5580 R14: 00000000000004d0 R15: 00000000000003e6 FS: 0000002a958a5b00(0000) GS:ffffffff8048e800(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000000ff CR3: 000000012bd6e000 CR4: 00000000000006e0 Process klogd (pid: 12646, threadinfo 000001012a0a6000, task 00000101299e2030) Stack: ffffffff8011c56b ffffffff80321aac ffffffff80110a73 000001012bd71cf8 <EOI> 000000000000000d ffffffff80321aac 000000000000000d ffffffff80324122 0000000000000001 0000000000000000 Call Trace:<IRQ> <ffffffff8011c56b>{smp_call_function_interrupt+64} <ffffffff80110a73>{call_function_interrupt+99} <EOI> <ffffffff8011c51e>{smp_send_stop+76} <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} <ffffffff80111826>{show_registers+277} <ffffffff80111b2d>{die_nmi+130} <ffffffff8011d042>{nmi_watchdog_tick+210} <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} <ffffffff8011100f>{paranoid_exit+0} <ffffffff8030ec6c>{.text.lock.spinlock+2} Code: Bad RIP value. RIP [<00000000000000ff>] RSP <00000100cfb03fa0> CR2: 00000000000000ff <0>Kernel panic - not syncing: Oops Badness in panic at kernel/panic.c:118 Call Trace:<IRQ> <ffffffff8013756e>{panic+527} <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} <ffffffff80123e29>{do_page_fault+1125} <ffffffff801ea99e>{kobject_release+0} <ffffffff80131c55>{activate_task+124} <ffffffff80132180>{try_to_wake_up+876} <ffffffff80110c2d>{error_exit+0} <ffffffff8011c56b>{smp_call_function_interrupt+64} <ffffffff80110a73>{call_function_interrupt+99} <EOI> <ffffffff8011c51e>{smp_send_stop+76} <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} <ffffffff80111826>{show_registers+277} <ffffffff80111b2d>{die_nmi+130} <ffffffff8011d042>{nmi_watchdog_tick+210} <ffffffff801440f3>{notifier_call_chain+31} <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} Badness in i8042_panic_blink at drivers/input/serio/i8042.c:987 Call Trace:<IRQ> <ffffffff8024478f>{i8042_panic_blink+238} <ffffffff8013751c>{panic+445} <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} <ffffffff80123e29>{do_page_fault+1125} <ffffffff801ea99e>{kobject_release+0} <ffffffff80131c55>{activate_task+124} <ffffffff80132180>{try_to_wake_up+876} <ffffffff80110c2d>{error_exit+0} <ffffffff8011c56b>{smp_call_function_interrupt+64} <ffffffff80110a73>{call_function_interrupt+99} <EOI> <ffffffff8011c51e>{smp_send_stop+76} <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} <ffffffff80111826>{show_registers+277} <ffffffff80111b2d>{die_nmi+130} <ffffffff8011d042>{nmi_watchdog_tick+210} <ffffffff801440f3>{notifier_call_chain+31} <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} Badness in i8042_panic_blink at drivers/input/serio/i8042.c:990 Call Trace:<IRQ> <ffffffff80244821>{i8042_panic_blink+384} <ffffffff8013751c>{panic+445} <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} <ffffffff80123e29>{do_page_fault+1125} <ffffffff801ea99e>{kobject_release+0} <ffffffff80131c55>{activate_task+124} <ffffffff80132180>{try_to_wake_up+876} <ffffffff80110c2d>{error_exit+0} <ffffffff8011c56b>{smp_call_function_interrupt+64} <ffffffff80110a73>{call_function_interrupt+99} <EOI> <ffffffff8011c51e>{smp_send_stop+76} <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} <ffffffff80111826>{show_registers+277} <ffffffff80111b2d>{die_nmi+130} <ffffffff8011d042>{nmi_watchdog_tick+210} <ffffffff801440f3>{notifier_call_chain+31} <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} Badness in i8042_panic_blink at drivers/input/serio/i8042.c:992 Call Trace:<IRQ> <ffffffff80244886>{i8042_panic_blink+485} <ffffffff8013751c>{panic+445} <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} <ffffffff80123e29>{do_page_fault+1125} <ffffffff801ea99e>{kobject_release+0} <ffffffff80131c55>{activate_task+124} <ffffffff80132180>{try_to_wake_up+876} <ffffffff80110c2d>{error_exit+0} <ffffffff8011c56b>{smp_call_function_interrupt+64} <ffffffff80110a73>{call_function_interrupt+99} <EOI> <ffffffff8011c51e>{smp_send_stop+76} <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} <ffffffff80111826>{show_registers+277} <ffffffff80111b2d>{die_nmi+130} <ffffffff8011d042>{nmi_watchdog_tick+210} <ffffffff801440f3>{notifier_call_chain+31} <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} Thank you in advance for helping me with this. Regards, Wojciech -- Wojciech Turek Assistant System Manager High Performance Computing Service University of Cambridge
Hi, We had another kernel panic this time on MDS server. Since we use lustre patched kernel downloaded from the SUN website we would like to ask if anyone else have seen such a problem while moving from 1.6.4.3 to 1.6.5.1 on RHEL4 x86_64 slab: cache size-1620 error: slabs_full accounting error slab: cache size-1620 error: slabs_full accounting error slab: cache size-1620 error: slabs_full accounting error Unable to handle kernel paging request at 0000303a383a303a RIP: <ffffffff801623c4>{s_show+62} PML4 0 Oops: 0000 [1] SMP CPU 3 Modules linked in: mds(U) fsfilt_ldiskfs(U) mgs(U) mgc(U) ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) sd_mod(U) scsi_mod(U) Pid: 15733, comm: collectl Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp RIP: 0010:[<ffffffff801623c4>] <ffffffff801623c4>{s_show+62} RSP: 0018:0000010117989e68 EFLAGS: 00010006 RAX: ffffffff80329f7a RBX: 00000100cffa5580 RCX: 00000100cffa5501 RDX: 0000000000000004 RSI: 0000303a383a303a RDI: 00000100cffa56e8 RBP: ffffffff80329f7a R08: 00000000fffffffd R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000001000 R14: 000001004c636500 R15: 0000000000000024 FS: 0000002a9630ee80(0000) GS:ffffffff8048e880(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000303a383a303a CR3: 00000000cfb24000 CR4: 00000000000006e0 Process collectl (pid: 15733, threadinfo 0000010117988000, task 0000010127176030) Stack: 0000000000000000 0000000000000000 0000000000000009 00000100cffa5580 000001004c636500 0000000000000000 0000000000001000 0000000000000f0d 0000000000000000 ffffffff80196c1a Call Trace:<ffffffff80196c1a>{seq_read+445} <ffffffff80178c28>{vfs_read+207} <ffffffff80178e84>{sys_read+69} <ffffffff8011022a>{system_call+126} Code: 48 8b 06 0f 18 08 48 8d 83 18 01 00 00 48 39 c6 74 2e 8b 93 RIP <ffffffff801623c4>{s_show+62} RSP <0000010117989e68> CR2: 0000303a383a303a <0>Kernel panic - not syncing: Oops Thanks, Wojciech Wojciech Turek wrote:> Hi, > > I upgraded our test lustre file system to the latest 1.6.5.1 version > available from the SUN website. > I have one OSS with one OST and one MDS with combined MGS and MDT > Both servers are running RHEL4 x86_64 and > 2.6.9-67.0.7.EL_lustre.1.6.5.1smp kernel, the interconnect is infiniband > and I am using ib modules provided with lustre. > When I mount filesystem and then start writing to it OSS crashes with > kernel panic, see log below: > > > Lustre: 0:0:(watchdog.c:130:lcw_cb()) Watchdog triggered for pid 17398: > it was inactive for 200s > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack > for process 17397 > ll_ost_io_92 D 0000000000000002 0 17397 1 17398 17396 > (L-TLB) > 00000101156bf538 0000000000000046 00000101956bf616 ffffffff801ece0f > 0000000000000000 ffffff0010776340 000001010e14c6c0 0000000100000001 > 0000010113f90030 000000000012b585 > Call Trace:ll_ost_io_82 D 000001012ab79400 0 17387 1 > 17388 17386 (L-TLB) > 000001011c252d88 0000000000000046 ffffffffa000288c 0000010115b213c0 > 0000000000000246 00000100cf851c00 000001012bafa940 0000000200000000 > 000001010f71f030 0000000000000814 > Call Trace:<ffffffffa000288c>{:scsi_mod:scsi_done+0} > <ffffffff801ece0f>{vsnprintf+1406} <ffffffff8024f658>{elv_next_request+238} > <ffffffffa0007df8>{:scsi_mod:scsi_request_fn+1100} > <ffffffff8030cc1f>{__down+147} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > <ffffffff8030e2f6>{io_schedule+38} > <ffffffff80179e24>{__wait_on_buffer+125} > <ffffffff80179caa>{bh_wake_function+0} > <ffffffff80179caa>{bh_wake_function+0} > <ffffffffa07cad2b>{:ldiskfs:ldiskfs_mb_init_cache+635} > <ffffffff8030e73d>{__down_failed+53} > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffff80158b8e>{add_to_page_cache+167} > <ffffffffa07cb450>{:ldiskfs:ldiskfs_mb_load_buddy+304} > <ffffffffa07cc7b4>{:ldiskfs:ldiskfs_mb_regular_allocator+1028} > <4>Lustre: 0:0:(watchdog.c:130:lcw_cb()) Watchdog triggered for > pid 17388: it was inactive for 200s > Lustre: 0:0:(watchdog.c:130:lcw_cb()) Skipped 2 previous similar messages > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack > for process 17388 > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) Skipped 1 > previous similar message > ll_ost_io_83 D 0000000000000002 0 17388 1 17389 17387 > (L-TLB) > 000001011c35f538 0000000000000046 000001019c35f616 ffffffff801ece0f > 0000000000000000 ffffff0010776508 000001010e14c6c0 0000000000000001 > 000001011c336800 00000000000de7ad > Call Trace:<ffffffff801351dc>{autoremove_wake_function+0} > <ffffffffa07d10fd>{:ldiskfs:ldiskfs_mb_new_blocks+333} > <ffffffff801351dc>{autoremove_wake_function+0} > <ffffffffa0814eb4>{:fsfilt_ldiskfs:ldiskfs_ext_new_extent_cb+884} > <ffffffffa028e06a>{:ib_ipath:ipath_verbs_send+1883} > <ffffffffa07c6d7f>{:ldiskfs:ldiskfs_ext_find_extent+255} > <ffffffffa07c8972>{:ldiskfs:ldiskfs_ext_walk_space+482} > <ffffffffa0814b40>{:fsfilt_ldiskfs:ldiskfs_ext_new_extent_cb+0} > <ffffffffa0815343>{:fsfilt_ldiskfs:fsfilt_map_nblocks+307} > <ffffffffa028560f>{:ib_ipath:ipath_do_send+1852} > <ffffffff8013f734>{__mod_timer+293} > > <ffffffffa08155bb>{:fsfilt_ldiskfs:fsfilt_ldiskfs_map_ext_inode_pages+539} > <ffffffff801ece0f>{vsnprintf+1406} > <ffffffff801ece0f>{vsnprintf+1406} > <ffffffffa067cb38>{:ko2iblnd:kiblnd_check_sends+2040} > <ffffffffa08410b4>{:obdfilter:filter_direct_io+1108} > <ffffffffa01ba524>{:jbd:journal_start+223} > <ffffffffa081384b>{:fsfilt_ldiskfs:fsfilt_ldiskfs_brw_start+763} > <ffffffffa0842c1d>{:obdfilter:filter_commitrw_write+4957} > <ffffffffa04f5539>{:lvfs:pop_ctxt+505} > <ffffffff8030e4ce>{schedule_timeout+411} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffffa07f87ee>{:ost:ost_checksum_bulk+558} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffff80133804>{default_wake_function+0} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffff8015c830>{__rmqueue+218} > <ffffffff8015c830>{__rmqueue+218} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffff80133855>{__wake_up_common+67} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff8013f734>{__mod_timer+293} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80110ddb>{child_rip+0} > <ffffffff8030cc1f>{__down+147} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff80110de3>{child_rip+8} > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80110ddb>{child_rip+0} > ll_ost_io_84 D 0000000000000002 0 17389 1 17390 17388 > (L-TLB) > 000001011ac6d538 0000000000000046 000001019ac6d616 ffffffff801ece0f > 0000000000000000 ffffff0010776340 000001010e14c6c0 0000000100000001 > 000001011c336030 000000000012c37b > Call Trace:<ffffffff80133804>{default_wake_function+0} > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > <ffffffff801ece0f>{vsnprintf+1406} > <ffffffff8030e73d>{__down_failed+53} > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > <ffffffff80131bc7>{recalc_task_prio+337} > <ffffffffa05160a0>{:lnet:lnet_send+2544} > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffff8030cc1f>{__down+147} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > <ffffffff8030e4ce>{schedule_timeout+411} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} > <ffffffff8015c830>{__rmqueue+218} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffff8030e73d>{__down_failed+53} > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > <ffffffff80133855>{__wake_up_common+67} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff80110de3>{child_rip+8} > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80131bc7>{recalc_task_prio+337} > <ffffffff80110ddb>{child_rip+0} > ll_ost_io_91 D<ffffffffa05160a0>{:lnet:lnet_send+2544} > 0000000000000002 0 17396 1 17397 17395 (L-TLB) > 0000010113fb3538 0000000000000046 0000010193fb3616 ffffffff801ece0f > 0000000000000000 ffffff0010776638 000001010e14c6c0 0000000000000001 > 0000010113f90800 00000000000d4d86 > Call Trace:<ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffff801ece0f>{vsnprintf+1406} > <ffffffff8030e4ce>{schedule_timeout+411} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffffa0516ad7>{:lnet:lnet_match_blocked_msg+807} > <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffff80133855>{__wake_up_common+67} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff80110de3>{child_rip+8} > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80110ddb>{child_rip+0} > ll_ost_io_107 D 0000000000000002 0 17412 1 17413 17411 > (L-TLB) > 000001010ea6f538 0000000000000046 000001018ea6f616 ffffffff801ece0f > 0000000000000000 ffffff0010776768 000001010e14c6c0 0000000100000001 > 000001010ea48800 000000000012d2f5 > Call Trace:<ffffffff8030cc1f>{__down+147} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > <ffffffff801ece0f>{vsnprintf+1406} > <ffffffff8030e73d>{__down_failed+53} > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffff8030cc1f>{__down+147} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > <ffffffff8030e4ce>{schedule_timeout+411} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffff8015c830>{__rmqueue+218} > <ffffffff8030e73d>{__down_failed+53} > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffff80131bc7>{recalc_task_prio+337} > <ffffffffa05160a0>{:lnet:lnet_send+2544} > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffff8013f734>{__mod_timer+293} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff80110de3>{child_rip+8} > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80110ddb>{child_rip+0} > <ffffffff8030e4ce>{schedule_timeout+411} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffff8013f734>{__mod_timer+293} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff80110de3>{child_rip+8} > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80110ddb>{child_rip+0} > ll_ost_io_121 D 0000000000000002 0 17426 1 17427 17425 > (L-TLB) > 0000010110113538 0000000000000046 0000010190113616 ffffffff801ece0f > 0000000000000000 ffffff00107763d8 000001010e14c6c0 0000000100000001 > 00000101100e6800 0000000000113501 > Call Trace:<ffffffff801ece0f>{vsnprintf+1406} <ffffffff8030cc1f>{__down+147} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > <ffffffff8030e73d>{__down_failed+53} > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffff8030e4ce>{schedule_timeout+411} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffff8015c830>{__rmqueue+218} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffff8013f734>{__mod_timer+293} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff80110de3>{child_rip+8} > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80110ddb>{child_rip+0} > Lustre: 0:0:(watchdog.c:130:lcw_cb()) Skipped 4 previous similar messages > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack > for process 17398 > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) Skipped 4 > previous similar messages > ll_ost_io_93 D 0000000000000002 0 17398 1 17399 17397 > (L-TLB) > 00000101157cd538 0000000000000046 00000101957cd616 ffffffff00000073 > 0000000000000000 0000000010776508 0000010001053a20 0000000200000001 > 00000101157a7800 0000000000161205 > Call Trace:<ffffffff8030ec6c>{.text.lock.spinlock+2} > <ffffffff8030cc1f>{__down+147} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > <ffffffff8030e73d>{__down_failed+53} > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > <ffffffffa04f5539>{:lvfs:pop_ctxt+505} > <ffffffff80131bc7>{recalc_task_prio+337} > <ffffffffa05160a0>{:lnet:lnet_send+2544} > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > <ffffffffa07f8801>{:ost:ost_checksum_bulk+577} > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > <ffffffff8017a62e>{end_buffer_async_read+0} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > <ffffffffa080364d>{:ost:ost_handle+11661} > <ffffffff8017a62e>{end_buffer_async_read+0} > <ffffffff8017a62e>{end_buffer_async_read+0} > <ffffffff8017a62e>{end_buffer_async_read+0} > <ffffffff8017a62e>{end_buffer_async_read+0} > <ffffffff8015c830>{__rmqueue+218} <ffffffff8015c830>{__rmqueue+218} > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > <ffffffff8013f734>{__mod_timer+293} > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > <ffffffff8017a62e>{end_buffer_async_read+0} > <ffffffff80133804>{default_wake_function+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > <ffffffff80110de3>{child_rip+8} > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > <ffffffff80110ddb>{child_rip+0} > > LustreError: 17397:0:(filter_io_26.c:700:filter_commitrw_write()) > testfs-OST0001: slow i_mutex 223s > LustreError: 17398:0:(filter_io_26.c:700:filter_commitrw_write()) > testfs-OST0001: slow i_mutex 223s > LustreError: 17387:0:(filter_io_26.c:765:filter_commitrw_write()) > testfs-OST0001: slow direct_io 223s > Lustre: 17397:0:(watchdog.c:312:lcw_update_time()) Expired watchdog for > pid 17397 disabled after 223.2918s > Lustre: 17387:0:(watchdog.c:312:lcw_update_time()) Expired watchdog for > pid 17387 disabled after 223.3108s > Lustre: 17387:0:(watchdog.c:312:lcw_update_time()) Skipped 5 previous > similar messages > slab: cache size-1620 error: slabs_full accounting error > slab: cache size-1620 error: slabs_full accounting error > slab: cache size-1620 error: slabs_full accounting error > Unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: > <ffffffff801623c4>{s_show+62} > PML4 112a23067 PGD 114d4d067 PMD 0 > Oops: 0000 [1] SMP > CPU 1 > Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) > ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) > ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) > autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) > i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) > mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) > rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) > ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) > dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) > battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) > ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) > ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) > qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) > mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) > sd_mod(U) scsi_mod(U) > Pid: 15569, comm: collectl Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp > RIP: 0010:[<ffffffff801623c4>] <ffffffff801623c4>{s_show+62} > RSP: 0018:0000010115823e68 EFLAGS: 00010012 > RAX: ffffffff80329f7a RBX: 00000100cffa5580 RCX: 00000100cffa5501 > RDX: 0000000000000004 RSI: 0000000000000000 RDI: 00000100cffa56e8 > RBP: ffffffff80329f7a R08: 00000000fffffffd R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > R13: 0000000000001000 R14: 000001012b617b80 R15: 0000000000000020 > FS: 0000002a9630ee80(0000) GS:ffffffff8048e780(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000000000000 CR3: 000000012bd38000 CR4: 00000000000006e0 > Process collectl (pid: 15569, threadinfo 0000010115822000, task > 00000101280a4800) > Stack: 0000000000000000 0000000000000000 0000000000000008 00000100cffa5580 > 000001012b617b80 0000000000000000 0000000000001000 0000000000000ea2 > 0000000000000000 ffffffff80196c1a > Call Trace:<ffffffff80196c1a>{seq_read+445} <ffffffff80178c28>{vfs_read+207} > <ffffffff80178e84>{sys_read+69} <ffffffff8011022a>{system_call+126} > > > Code: 48 8b 06 0f 18 08 48 8d 83 18 01 00 00 48 39 c6 74 2e 8b 93 > RIP <ffffffff801623c4>{s_show+62} RSP <0000010115823e68> > CR2: 0000000000000000 > <0>Kernel panic - not syncing: Oops > NMI Watchdog detected LOCKUP, CPU=2, registers: > CPU 2 > Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) > ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) > ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) > autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) > i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) > mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) > rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) > ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) > dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) > battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) > ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) > ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) > qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) > mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) > sd_mod(U) scsi_mod(U) > Pid: 12646, comm: klogd Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp > RIP: 0010:[<ffffffff8030ec6c>] <ffffffff8030ec6c>{.text.lock.spinlock+2} > RSP: 0018:000001012a0a7b88 EFLAGS: 00000086 > RAX: 0000000000000010 RBX: 00000100cffa56e8 RCX: 0000000000000000 > RDX: 0000000000000000 RSI: 00000000000004d0 RDI: 00000100cffa56e8 > RBP: 000001012bc1e0c0 R08: 000001012a0a7cf0 R09: 6d5f697363732029 > R10: 0000000000000053 R11: 0000000000000246 R12: 00000100cffa5688 > R13: 00000100cffa5580 R14: 00000000000004d0 R15: 00000000000003e6 > FS: 0000002a958a5b00(0000) GS:ffffffff8048e800(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 0000007fbfffa868 CR3: 000000012bd6e000 CR4: 00000000000006e0 > Process klogd (pid: 12646, threadinfo 000001012a0a6000, task > 00000101299e2030) > Stack: 000000000000000c ffffffff80160901 00000100cffa5580 00000100cffa5580 > 00000000000004d0 0000000000000400 0000000000000400 0000000000000000 > 00000000000003e6 ffffffff801607d0 > Call Trace:<ffffffff80160901>{cache_alloc_refill+96} > <ffffffff801607d0>{__kmalloc+123} > <ffffffff802adc58>{alloc_skb+65} > <ffffffff802ac770>{sock_alloc_send_pskb+135} > <ffffffff80133855>{__wake_up_common+67} > <ffffffff801338ab>{__wake_up+54} > <ffffffff8030899a>{unix_dgram_sendmsg+364} > <ffffffff802aa430>{sock_aio_write+306} > <ffffffff80178d0f>{do_sync_write+178} > <ffffffff80137822>{do_syslog+482} > <ffffffff801351dc>{autoremove_wake_function+0} > <ffffffff801351dc>{autoremove_wake_function+0} > <ffffffff801351dc>{autoremove_wake_function+0} > <ffffffff80193ed0>{dnotify_parent+34} > <ffffffff80178e1d>{vfs_write+226} <ffffffff80178ef2>{sys_write+69} > <ffffffff8011022a>{system_call+126} > > Code: 83 3b 00 7e f9 e9 60 fc ff ff f3 90 83 3b 00 7e f9 e9 ce fc > Kernel panic - not syncing: nmi watchdog > <1>Unable to handle kernel NULL pointer dereference at 00000000000000ff > RIP: > [<00000000000000ff>] > PML4 11b4a6067 PGD 0 > Oops: 0010 [2] SMP > CPU 2 > Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) > ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) > ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) > autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) > i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) > mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) > rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) > ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) > dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) > battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) > ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) > ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) > qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) > mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) > sd_mod(U) scsi_mod(U) > Pid: 12646, comm: klogd Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp > RIP: 0010:[<00000000000000ff>] [<00000000000000ff>] > RSP: 0018:00000100cfb03fa0 EFLAGS: 00010006 > RAX: 000001012a0a7fd8 RBX: 0000000000000000 RCX: 0000000000000002 > RDX: 00000000000000ff RSI: 0000000000000000 RDI: 0000000000000002 > RBP: 000001012bd71f58 R08: 0000000000000020 R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000000 R12: 00000100cffa5688 > R13: 00000100cffa5580 R14: 00000000000004d0 R15: 00000000000003e6 > FS: 0000002a958a5b00(0000) GS:ffffffff8048e800(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 00000000000000ff CR3: 000000012bd6e000 CR4: 00000000000006e0 > Process klogd (pid: 12646, threadinfo 000001012a0a6000, task > 00000101299e2030) > Stack: ffffffff8011c56b ffffffff80321aac ffffffff80110a73 > 000001012bd71cf8 <EOI> > 000000000000000d ffffffff80321aac 000000000000000d ffffffff80324122 > 0000000000000001 0000000000000000 > Call Trace:<IRQ> <ffffffff8011c56b>{smp_call_function_interrupt+64} > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > <ffffffff8011c51e>{smp_send_stop+76} > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > <ffffffff80111826>{show_registers+277} > <ffffffff80111b2d>{die_nmi+130} > <ffffffff8011d042>{nmi_watchdog_tick+210} > <ffffffff801123fa>{default_do_nmi+112} > <ffffffff8011d0f8>{do_nmi+115} <ffffffff8011100f>{paranoid_exit+0} > <ffffffff8030ec6c>{.text.lock.spinlock+2} > > Code: Bad RIP value. > RIP [<00000000000000ff>] RSP <00000100cfb03fa0> > CR2: 00000000000000ff > <0>Kernel panic - not syncing: Oops > Badness in panic at kernel/panic.c:118 > > Call Trace:<IRQ> <ffffffff8013756e>{panic+527} > <ffffffff801130a5>{do_IRQ+266} > <ffffffff801107d1>{ret_from_intr+0} <ffffffff80111988>{oops_end+38} > <ffffffff801119a3>{oops_end+65} > <ffffffff80123e29>{do_page_fault+1125} > <ffffffff801ea99e>{kobject_release+0} > <ffffffff80131c55>{activate_task+124} > <ffffffff80132180>{try_to_wake_up+876} > <ffffffff80110c2d>{error_exit+0} > <ffffffff8011c56b>{smp_call_function_interrupt+64} > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > <ffffffff8011c51e>{smp_send_stop+76} > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > <ffffffff80111826>{show_registers+277} > <ffffffff80111b2d>{die_nmi+130} > <ffffffff8011d042>{nmi_watchdog_tick+210} > <ffffffff801440f3>{notifier_call_chain+31} > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > Badness in i8042_panic_blink at drivers/input/serio/i8042.c:987 > > Call Trace:<IRQ> <ffffffff8024478f>{i8042_panic_blink+238} > <ffffffff8013751c>{panic+445} > <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} > <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} > <ffffffff80123e29>{do_page_fault+1125} > <ffffffff801ea99e>{kobject_release+0} > <ffffffff80131c55>{activate_task+124} > <ffffffff80132180>{try_to_wake_up+876} > <ffffffff80110c2d>{error_exit+0} > <ffffffff8011c56b>{smp_call_function_interrupt+64} > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > <ffffffff8011c51e>{smp_send_stop+76} > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > <ffffffff80111826>{show_registers+277} > <ffffffff80111b2d>{die_nmi+130} > <ffffffff8011d042>{nmi_watchdog_tick+210} > <ffffffff801440f3>{notifier_call_chain+31} > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > Badness in i8042_panic_blink at drivers/input/serio/i8042.c:990 > > Call Trace:<IRQ> <ffffffff80244821>{i8042_panic_blink+384} > <ffffffff8013751c>{panic+445} > <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} > <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} > <ffffffff80123e29>{do_page_fault+1125} > <ffffffff801ea99e>{kobject_release+0} > <ffffffff80131c55>{activate_task+124} > <ffffffff80132180>{try_to_wake_up+876} > <ffffffff80110c2d>{error_exit+0} > <ffffffff8011c56b>{smp_call_function_interrupt+64} > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > <ffffffff8011c51e>{smp_send_stop+76} > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > <ffffffff80111826>{show_registers+277} > <ffffffff80111b2d>{die_nmi+130} > <ffffffff8011d042>{nmi_watchdog_tick+210} > <ffffffff801440f3>{notifier_call_chain+31} > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > Badness in i8042_panic_blink at drivers/input/serio/i8042.c:992 > > Call Trace:<IRQ> <ffffffff80244886>{i8042_panic_blink+485} > <ffffffff8013751c>{panic+445} > <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} > <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} > <ffffffff80123e29>{do_page_fault+1125} > <ffffffff801ea99e>{kobject_release+0} > <ffffffff80131c55>{activate_task+124} > <ffffffff80132180>{try_to_wake_up+876} > <ffffffff80110c2d>{error_exit+0} > <ffffffff8011c56b>{smp_call_function_interrupt+64} > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > <ffffffff8011c51e>{smp_send_stop+76} > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > <ffffffff80111826>{show_registers+277} > <ffffffff80111b2d>{die_nmi+130} > <ffffffff8011d042>{nmi_watchdog_tick+210} > <ffffffff801440f3>{notifier_call_chain+31} > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > > Thank you in advance for helping me with this. > > Regards, > > Wojciech > >-- Wojciech Turek Assistant System Manager High Performance Computing Service University of Cambridge Email: wjt27 at cam.ac.uk Tel: (+)44 1223 763517
On Sep 26, 2008 15:14 +0100, Wojciech Turek wrote:> We had another kernel panic this time on MDS server. Since we use lustre > patched kernel downloaded from the SUN website we would like to ask if > anyone else have seen such a problem while moving from 1.6.4.3 to > 1.6.5.1 on RHEL4 x86_64 > > > slab: cache size-1620 error: slabs_full accounting error > slab: cache size-1620 error: slabs_full accounting error > slab: cache size-1620 error: slabs_full accounting errorI''ve never seen these errors before - I didn''t even know a "size-1620" slab existed.> Unable to handle kernel paging request at 0000303a383a303a RIP: > <ffffffff801623c4>{s_show+62} > PML4 0 > Oops: 0000 [1] SMP > CPU 3 > Modules linked in: mds(U) fsfilt_ldiskfs(U) mgs(U) mgc(U) ldiskfs(U) > lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) ptlrpc(U) > obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) autofs4(U) > i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) i2c_i801(U) > i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) mlx4_core(U) > ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) rdma_ucm(U) > ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) ipv6(U) > cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) > dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) > battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) > ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) > ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) > qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) > mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) > sd_mod(U) scsi_mod(U) > Pid: 15733, comm: collectl Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp > RIP: 0010:[<ffffffff801623c4>] <ffffffff801623c4>{s_show+62} > RSP: 0018:0000010117989e68 EFLAGS: 00010006 > RAX: ffffffff80329f7a RBX: 00000100cffa5580 RCX: 00000100cffa5501 > RDX: 0000000000000004 RSI: 0000303a383a303a RDI: 00000100cffa56e8 > RBP: ffffffff80329f7a R08: 00000000fffffffd R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > R13: 0000000000001000 R14: 000001004c636500 R15: 0000000000000024 > FS: 0000002a9630ee80(0000) GS:ffffffff8048e880(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000303a383a303a CR3: 00000000cfb24000 CR4: 00000000000006e0 > Process collectl (pid: 15733, threadinfo 0000010117988000, task > 0000010127176030) > Stack: 0000000000000000 0000000000000000 0000000000000009 00000100cffa5580 > 000001004c636500 0000000000000000 0000000000001000 0000000000000f0d > 0000000000000000 ffffffff80196c1a > Call Trace:<ffffffff80196c1a>{seq_read+445} <ffffffff80178c28>{vfs_read+207} > <ffffffff80178e84>{sys_read+69} <ffffffff8011022a>{system_call+126} > > > Code: 48 8b 06 0f 18 08 48 8d 83 18 01 00 00 48 39 c6 74 2e 8b 93 > RIP <ffffffff801623c4>{s_show+62} RSP <0000010117989e68> > CR2: 0000303a383a303a > <0>Kernel panic - not syncing: Oops > > Thanks, > > Wojciech > > Wojciech Turek wrote: > > Hi, > > > > I upgraded our test lustre file system to the latest 1.6.5.1 version > > available from the SUN website. > > I have one OSS with one OST and one MDS with combined MGS and MDT > > Both servers are running RHEL4 x86_64 and > > 2.6.9-67.0.7.EL_lustre.1.6.5.1smp kernel, the interconnect is infiniband > > and I am using ib modules provided with lustre. > > When I mount filesystem and then start writing to it OSS crashes with > > kernel panic, see log below: > > > > > > Lustre: 0:0:(watchdog.c:130:lcw_cb()) Watchdog triggered for pid 17398: > > it was inactive for 200s > > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack > > for process 17397 > > ll_ost_io_92 D 0000000000000002 0 17397 1 17398 17396 > > (L-TLB) > > 00000101156bf538 0000000000000046 00000101956bf616 ffffffff801ece0f > > 0000000000000000 ffffff0010776340 000001010e14c6c0 0000000100000001 > > 0000010113f90030 000000000012b585 > > Call Trace:ll_ost_io_82 D 000001012ab79400 0 17387 1 > > 17388 17386 (L-TLB) > > 000001011c252d88 0000000000000046 ffffffffa000288c 0000010115b213c0 > > 0000000000000246 00000100cf851c00 000001012bafa940 0000000200000000 > > 000001010f71f030 0000000000000814 > > Call Trace:<ffffffffa000288c>{:scsi_mod:scsi_done+0} > > <ffffffff801ece0f>{vsnprintf+1406} <ffffffff8024f658>{elv_next_request+238} > > <ffffffffa0007df8>{:scsi_mod:scsi_request_fn+1100} > > <ffffffff8030cc1f>{__down+147} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > > <ffffffff8030e2f6>{io_schedule+38} > > <ffffffff80179e24>{__wait_on_buffer+125} > > <ffffffff80179caa>{bh_wake_function+0} > > <ffffffff80179caa>{bh_wake_function+0} > > <ffffffffa07cad2b>{:ldiskfs:ldiskfs_mb_init_cache+635} > > <ffffffff8030e73d>{__down_failed+53} > > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffff80158b8e>{add_to_page_cache+167} > > <ffffffffa07cb450>{:ldiskfs:ldiskfs_mb_load_buddy+304} > > <ffffffffa07cc7b4>{:ldiskfs:ldiskfs_mb_regular_allocator+1028} > > <4>Lustre: 0:0:(watchdog.c:130:lcw_cb()) Watchdog triggered for > > pid 17388: it was inactive for 200s > > Lustre: 0:0:(watchdog.c:130:lcw_cb()) Skipped 2 previous similar messages > > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack > > for process 17388 > > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) Skipped 1 > > previous similar message > > ll_ost_io_83 D 0000000000000002 0 17388 1 17389 17387 > > (L-TLB) > > 000001011c35f538 0000000000000046 000001019c35f616 ffffffff801ece0f > > 0000000000000000 ffffff0010776508 000001010e14c6c0 0000000000000001 > > 000001011c336800 00000000000de7ad > > Call Trace:<ffffffff801351dc>{autoremove_wake_function+0} > > <ffffffffa07d10fd>{:ldiskfs:ldiskfs_mb_new_blocks+333} > > <ffffffff801351dc>{autoremove_wake_function+0} > > <ffffffffa0814eb4>{:fsfilt_ldiskfs:ldiskfs_ext_new_extent_cb+884} > > <ffffffffa028e06a>{:ib_ipath:ipath_verbs_send+1883} > > <ffffffffa07c6d7f>{:ldiskfs:ldiskfs_ext_find_extent+255} > > <ffffffffa07c8972>{:ldiskfs:ldiskfs_ext_walk_space+482} > > <ffffffffa0814b40>{:fsfilt_ldiskfs:ldiskfs_ext_new_extent_cb+0} > > <ffffffffa0815343>{:fsfilt_ldiskfs:fsfilt_map_nblocks+307} > > <ffffffffa028560f>{:ib_ipath:ipath_do_send+1852} > > <ffffffff8013f734>{__mod_timer+293} > > > > <ffffffffa08155bb>{:fsfilt_ldiskfs:fsfilt_ldiskfs_map_ext_inode_pages+539} > > <ffffffff801ece0f>{vsnprintf+1406} > > <ffffffff801ece0f>{vsnprintf+1406} > > <ffffffffa067cb38>{:ko2iblnd:kiblnd_check_sends+2040} > > <ffffffffa08410b4>{:obdfilter:filter_direct_io+1108} > > <ffffffffa01ba524>{:jbd:journal_start+223} > > <ffffffffa081384b>{:fsfilt_ldiskfs:fsfilt_ldiskfs_brw_start+763} > > <ffffffffa0842c1d>{:obdfilter:filter_commitrw_write+4957} > > <ffffffffa04f5539>{:lvfs:pop_ctxt+505} > > <ffffffff8030e4ce>{schedule_timeout+411} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffffa07f87ee>{:ost:ost_checksum_bulk+558} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffff8015c830>{__rmqueue+218} > > <ffffffff8015c830>{__rmqueue+218} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffff80133855>{__wake_up_common+67} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff8013f734>{__mod_timer+293} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffff80110de3>{child_rip+8} <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80110ddb>{child_rip+0} > > <ffffffff8030cc1f>{__down+147} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff80110de3>{child_rip+8} > > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80110ddb>{child_rip+0} > > ll_ost_io_84 D 0000000000000002 0 17389 1 17390 17388 > > (L-TLB) > > 000001011ac6d538 0000000000000046 000001019ac6d616 ffffffff801ece0f > > 0000000000000000 ffffff0010776340 000001010e14c6c0 0000000100000001 > > 000001011c336030 000000000012c37b > > Call Trace:<ffffffff80133804>{default_wake_function+0} > > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > > <ffffffff801ece0f>{vsnprintf+1406} > > <ffffffff8030e73d>{__down_failed+53} > > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > > <ffffffff80131bc7>{recalc_task_prio+337} > > <ffffffffa05160a0>{:lnet:lnet_send+2544} > > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffff8030cc1f>{__down+147} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > > <ffffffff8030e4ce>{schedule_timeout+411} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} > > <ffffffff8015c830>{__rmqueue+218} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffff8030e73d>{__down_failed+53} > > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > > <ffffffff80133855>{__wake_up_common+67} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff80110de3>{child_rip+8} > > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80131bc7>{recalc_task_prio+337} > > <ffffffff80110ddb>{child_rip+0} > > ll_ost_io_91 D<ffffffffa05160a0>{:lnet:lnet_send+2544} > > 0000000000000002 0 17396 1 17397 17395 (L-TLB) > > 0000010113fb3538 0000000000000046 0000010193fb3616 ffffffff801ece0f > > 0000000000000000 ffffff0010776638 000001010e14c6c0 0000000000000001 > > 0000010113f90800 00000000000d4d86 > > Call Trace:<ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffff801ece0f>{vsnprintf+1406} > > <ffffffff8030e4ce>{schedule_timeout+411} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffffa0516ad7>{:lnet:lnet_match_blocked_msg+807} > > <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffff80133855>{__wake_up_common+67} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff80110de3>{child_rip+8} > > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80110ddb>{child_rip+0} > > ll_ost_io_107 D 0000000000000002 0 17412 1 17413 17411 > > (L-TLB) > > 000001010ea6f538 0000000000000046 000001018ea6f616 ffffffff801ece0f > > 0000000000000000 ffffff0010776768 000001010e14c6c0 0000000100000001 > > 000001010ea48800 000000000012d2f5 > > Call Trace:<ffffffff8030cc1f>{__down+147} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > > <ffffffff801ece0f>{vsnprintf+1406} > > <ffffffff8030e73d>{__down_failed+53} > > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffff8030cc1f>{__down+147} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > > <ffffffff8030e4ce>{schedule_timeout+411} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffff8015c830>{__rmqueue+218} > > <ffffffff8030e73d>{__down_failed+53} > > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffff80131bc7>{recalc_task_prio+337} > > <ffffffffa05160a0>{:lnet:lnet_send+2544} > > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffff8013f734>{__mod_timer+293} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff80110de3>{child_rip+8} > > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80110ddb>{child_rip+0} > > <ffffffff8030e4ce>{schedule_timeout+411} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffffa0516b48>{:lnet:lnet_match_blocked_msg+920} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffff8013f734>{__mod_timer+293} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff80110de3>{child_rip+8} > > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80110ddb>{child_rip+0} > > ll_ost_io_121 D 0000000000000002 0 17426 1 17427 17425 > > (L-TLB) > > 0000010110113538 0000000000000046 0000010190113616 ffffffff801ece0f > > 0000000000000000 ffffff00107763d8 000001010e14c6c0 0000000100000001 > > 00000101100e6800 0000000000113501 > > Call Trace:<ffffffff801ece0f>{vsnprintf+1406} <ffffffff8030cc1f>{__down+147} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > > <ffffffff8030e73d>{__down_failed+53} > > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffff8030e4ce>{schedule_timeout+411} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffff8015c830>{__rmqueue+218} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffff8013f734>{__mod_timer+293} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff80110de3>{child_rip+8} > > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80110ddb>{child_rip+0} > > Lustre: 0:0:(watchdog.c:130:lcw_cb()) Skipped 4 previous similar messages > > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) showing stack > > for process 17398 > > Lustre: 0:0:(linux-debug.c:167:libcfs_debug_dumpstack()) Skipped 4 > > previous similar messages > > ll_ost_io_93 D 0000000000000002 0 17398 1 17399 17397 > > (L-TLB) > > 00000101157cd538 0000000000000046 00000101957cd616 ffffffff00000073 > > 0000000000000000 0000000010776508 0000010001053a20 0000000200000001 > > 00000101157a7800 0000000000161205 > > Call Trace:<ffffffff8030ec6c>{.text.lock.spinlock+2} > > <ffffffff8030cc1f>{__down+147} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa067b484>{:ko2iblnd:kiblnd_init_tx_msg+308} > > <ffffffff8030e73d>{__down_failed+53} > > <ffffffffa06c6670>{:lquota:filter_quota_check+0} > > <ffffffffa0843acf>{:obdfilter:.text.lock.filter_io_26+35} > > <ffffffffa04f5539>{:lvfs:pop_ctxt+505} > > <ffffffff80131bc7>{recalc_task_prio+337} > > <ffffffffa05160a0>{:lnet:lnet_send+2544} > > <ffffffffa083b01e>{:obdfilter:filter_commitrw+126} > > <ffffffffa07f8801>{:ost:ost_checksum_bulk+577} > > <ffffffffa07f8688>{:ost:ost_checksum_bulk+200} > > <ffffffffa07fddd1>{:ost:ost_brw_write+9505} > > <ffffffff8017a62e>{end_buffer_async_read+0} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa06015ef>{:ptlrpc:lustre_msg_get_version+95} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa06016e5>{:ptlrpc:lustre_msg_check_version+69} > > <ffffffffa07f8060>{:ost:ost_bulk_timeout+0} > > <ffffffffa080364d>{:ost:ost_handle+11661} > > <ffffffff8017a62e>{end_buffer_async_read+0} > > <ffffffff8017a62e>{end_buffer_async_read+0} > > <ffffffff8017a62e>{end_buffer_async_read+0} > > <ffffffff8017a62e>{end_buffer_async_read+0} > > <ffffffff8015c830>{__rmqueue+218} <ffffffff8015c830>{__rmqueue+218} > > <ffffffffa060a451>{:ptlrpc:ptlrpc_check_req+17} > > <ffffffffa060c629>{:ptlrpc:ptlrpc_server_handle_request+2457} > > <ffffffffa04df45e>{:libcfs:lcw_update_time+30} > > <ffffffff8013f734>{__mod_timer+293} > > <ffffffffa060ed05>{:ptlrpc:ptlrpc_main+3989} > > <ffffffff8017a62e>{end_buffer_async_read+0} > > <ffffffff80133804>{default_wake_function+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffffa060d270>{:ptlrpc:ptlrpc_retry_rqbds+0} > > <ffffffff80110de3>{child_rip+8} > > <ffffffffa060dd70>{:ptlrpc:ptlrpc_main+0} > > <ffffffff80110ddb>{child_rip+0} > > > > LustreError: 17397:0:(filter_io_26.c:700:filter_commitrw_write()) > > testfs-OST0001: slow i_mutex 223s > > LustreError: 17398:0:(filter_io_26.c:700:filter_commitrw_write()) > > testfs-OST0001: slow i_mutex 223s > > LustreError: 17387:0:(filter_io_26.c:765:filter_commitrw_write()) > > testfs-OST0001: slow direct_io 223s > > Lustre: 17397:0:(watchdog.c:312:lcw_update_time()) Expired watchdog for > > pid 17397 disabled after 223.2918s > > Lustre: 17387:0:(watchdog.c:312:lcw_update_time()) Expired watchdog for > > pid 17387 disabled after 223.3108s > > Lustre: 17387:0:(watchdog.c:312:lcw_update_time()) Skipped 5 previous > > similar messages > > slab: cache size-1620 error: slabs_full accounting error > > slab: cache size-1620 error: slabs_full accounting error > > slab: cache size-1620 error: slabs_full accounting error > > Unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: > > <ffffffff801623c4>{s_show+62} > > PML4 112a23067 PGD 114d4d067 PMD 0 > > Oops: 0000 [1] SMP > > CPU 1 > > Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) > > ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) > > ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) > > autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) > > i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) > > mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) > > rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) > > ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) > > dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) > > battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) > > ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) > > ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) > > qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) > > mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) > > sd_mod(U) scsi_mod(U) > > Pid: 15569, comm: collectl Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp > > RIP: 0010:[<ffffffff801623c4>] <ffffffff801623c4>{s_show+62} > > RSP: 0018:0000010115823e68 EFLAGS: 00010012 > > RAX: ffffffff80329f7a RBX: 00000100cffa5580 RCX: 00000100cffa5501 > > RDX: 0000000000000004 RSI: 0000000000000000 RDI: 00000100cffa56e8 > > RBP: ffffffff80329f7a R08: 00000000fffffffd R09: 0000000000000000 > > R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > > R13: 0000000000001000 R14: 000001012b617b80 R15: 0000000000000020 > > FS: 0000002a9630ee80(0000) GS:ffffffff8048e780(0000) knlGS:0000000000000000 > > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > CR2: 0000000000000000 CR3: 000000012bd38000 CR4: 00000000000006e0 > > Process collectl (pid: 15569, threadinfo 0000010115822000, task > > 00000101280a4800) > > Stack: 0000000000000000 0000000000000000 0000000000000008 00000100cffa5580 > > 000001012b617b80 0000000000000000 0000000000001000 0000000000000ea2 > > 0000000000000000 ffffffff80196c1a > > Call Trace:<ffffffff80196c1a>{seq_read+445} <ffffffff80178c28>{vfs_read+207} > > <ffffffff80178e84>{sys_read+69} <ffffffff8011022a>{system_call+126} > > > > > > Code: 48 8b 06 0f 18 08 48 8d 83 18 01 00 00 48 39 c6 74 2e 8b 93 > > RIP <ffffffff801623c4>{s_show+62} RSP <0000010115823e68> > > CR2: 0000000000000000 > > <0>Kernel panic - not syncing: Oops > > NMI Watchdog detected LOCKUP, CPU=2, registers: > > CPU 2 > > Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) > > ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) > > ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) > > autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) > > i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) > > mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) > > rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) > > ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) > > dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) > > battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) > > ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) > > ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) > > qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) > > mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) > > sd_mod(U) scsi_mod(U) > > Pid: 12646, comm: klogd Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp > > RIP: 0010:[<ffffffff8030ec6c>] <ffffffff8030ec6c>{.text.lock.spinlock+2} > > RSP: 0018:000001012a0a7b88 EFLAGS: 00000086 > > RAX: 0000000000000010 RBX: 00000100cffa56e8 RCX: 0000000000000000 > > RDX: 0000000000000000 RSI: 00000000000004d0 RDI: 00000100cffa56e8 > > RBP: 000001012bc1e0c0 R08: 000001012a0a7cf0 R09: 6d5f697363732029 > > R10: 0000000000000053 R11: 0000000000000246 R12: 00000100cffa5688 > > R13: 00000100cffa5580 R14: 00000000000004d0 R15: 00000000000003e6 > > FS: 0000002a958a5b00(0000) GS:ffffffff8048e800(0000) knlGS:0000000000000000 > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 0000007fbfffa868 CR3: 000000012bd6e000 CR4: 00000000000006e0 > > Process klogd (pid: 12646, threadinfo 000001012a0a6000, task > > 00000101299e2030) > > Stack: 000000000000000c ffffffff80160901 00000100cffa5580 00000100cffa5580 > > 00000000000004d0 0000000000000400 0000000000000400 0000000000000000 > > 00000000000003e6 ffffffff801607d0 > > Call Trace:<ffffffff80160901>{cache_alloc_refill+96} > > <ffffffff801607d0>{__kmalloc+123} > > <ffffffff802adc58>{alloc_skb+65} > > <ffffffff802ac770>{sock_alloc_send_pskb+135} > > <ffffffff80133855>{__wake_up_common+67} > > <ffffffff801338ab>{__wake_up+54} > > <ffffffff8030899a>{unix_dgram_sendmsg+364} > > <ffffffff802aa430>{sock_aio_write+306} > > <ffffffff80178d0f>{do_sync_write+178} > > <ffffffff80137822>{do_syslog+482} > > <ffffffff801351dc>{autoremove_wake_function+0} > > <ffffffff801351dc>{autoremove_wake_function+0} > > <ffffffff801351dc>{autoremove_wake_function+0} > > <ffffffff80193ed0>{dnotify_parent+34} > > <ffffffff80178e1d>{vfs_write+226} <ffffffff80178ef2>{sys_write+69} > > <ffffffff8011022a>{system_call+126} > > > > Code: 83 3b 00 7e f9 e9 60 fc ff ff f3 90 83 3b 00 7e f9 e9 ce fc > > Kernel panic - not syncing: nmi watchdog > > <1>Unable to handle kernel NULL pointer dereference at 00000000000000ff > > RIP: > > [<00000000000000ff>] > > PML4 11b4a6067 PGD 0 > > Oops: 0010 [2] SMP > > CPU 2 > > Modules linked in: obdfilter(U) fsfilt_ldiskfs(U) ost(U) mgc(U) > > ldiskfs(U) lustre(U) lov(U) mdc(U) lquota(U) osc(U) ko2iblnd(U) > > ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) sg(U) dell_rbu(U) > > autofs4(U) i2c_nforce2(U) i2c_amd756(U) i2c_isa(U) i2c_amd8111(U) > > i2c_i801(U) i2c_core(U) qlgc_vnic(U) iw_cxgb3(U) cxgb3(U) mlx4_ib(U) > > mlx4_core(U) ib_mthca(U) ipmi_devintf(U) ipmi_si(U) ipmi_msghandler(U) > > rdma_ucm(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipoib(U) md5(U) > > ipv6(U) cpufreq_powersave(U) mptctl(U) dm_mirror(U) dm_round_robin(U) > > dm_multipath(U) dm_mod(U) sr_mod(U) usb_storage(U) joydev(U) button(U) > > battery(U) ac(U) uhci_hcd(U) ehci_hcd(U) hw_random(U) ib_ipath(U) > > ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) > > ata_piix(U) libata(U) ext3(U) jbd(U) tg3(U) s2io(U) qla2400(U) > > qla2xxx(U) scsi_transport_fc(U) nfs(U) nfs_acl(U) lockd(U) sunrpc(U) > > mptsas(U) mptscsi(U) mptbase(U) megaraid_sas(U) e1000(U) bnx2(U) > > sd_mod(U) scsi_mod(U) > > Pid: 12646, comm: klogd Not tainted 2.6.9-67.0.7.EL_lustre.1.6.5.1smp > > RIP: 0010:[<00000000000000ff>] [<00000000000000ff>] > > RSP: 0018:00000100cfb03fa0 EFLAGS: 00010006 > > RAX: 000001012a0a7fd8 RBX: 0000000000000000 RCX: 0000000000000002 > > RDX: 00000000000000ff RSI: 0000000000000000 RDI: 0000000000000002 > > RBP: 000001012bd71f58 R08: 0000000000000020 R09: 0000000000000000 > > R10: 0000000000000000 R11: 0000000000000000 R12: 00000100cffa5688 > > R13: 00000100cffa5580 R14: 00000000000004d0 R15: 00000000000003e6 > > FS: 0000002a958a5b00(0000) GS:ffffffff8048e800(0000) knlGS:0000000000000000 > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 00000000000000ff CR3: 000000012bd6e000 CR4: 00000000000006e0 > > Process klogd (pid: 12646, threadinfo 000001012a0a6000, task > > 00000101299e2030) > > Stack: ffffffff8011c56b ffffffff80321aac ffffffff80110a73 > > 000001012bd71cf8 <EOI> > > 000000000000000d ffffffff80321aac 000000000000000d ffffffff80324122 > > 0000000000000001 0000000000000000 > > Call Trace:<IRQ> <ffffffff8011c56b>{smp_call_function_interrupt+64} > > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > > <ffffffff8011c51e>{smp_send_stop+76} > > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > > <ffffffff80111826>{show_registers+277} > > <ffffffff80111b2d>{die_nmi+130} > > <ffffffff8011d042>{nmi_watchdog_tick+210} > > <ffffffff801123fa>{default_do_nmi+112} > > <ffffffff8011d0f8>{do_nmi+115} <ffffffff8011100f>{paranoid_exit+0} > > <ffffffff8030ec6c>{.text.lock.spinlock+2} > > > > Code: Bad RIP value. > > RIP [<00000000000000ff>] RSP <00000100cfb03fa0> > > CR2: 00000000000000ff > > <0>Kernel panic - not syncing: Oops > > Badness in panic at kernel/panic.c:118 > > > > Call Trace:<IRQ> <ffffffff8013756e>{panic+527} > > <ffffffff801130a5>{do_IRQ+266} > > <ffffffff801107d1>{ret_from_intr+0} <ffffffff80111988>{oops_end+38} > > <ffffffff801119a3>{oops_end+65} > > <ffffffff80123e29>{do_page_fault+1125} > > <ffffffff801ea99e>{kobject_release+0} > > <ffffffff80131c55>{activate_task+124} > > <ffffffff80132180>{try_to_wake_up+876} > > <ffffffff80110c2d>{error_exit+0} > > <ffffffff8011c56b>{smp_call_function_interrupt+64} > > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > > <ffffffff8011c51e>{smp_send_stop+76} > > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > > <ffffffff80111826>{show_registers+277} > > <ffffffff80111b2d>{die_nmi+130} > > <ffffffff8011d042>{nmi_watchdog_tick+210} > > <ffffffff801440f3>{notifier_call_chain+31} > > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > > > Badness in i8042_panic_blink at drivers/input/serio/i8042.c:987 > > > > Call Trace:<IRQ> <ffffffff8024478f>{i8042_panic_blink+238} > > <ffffffff8013751c>{panic+445} > > <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} > > <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} > > <ffffffff80123e29>{do_page_fault+1125} > > <ffffffff801ea99e>{kobject_release+0} > > <ffffffff80131c55>{activate_task+124} > > <ffffffff80132180>{try_to_wake_up+876} > > <ffffffff80110c2d>{error_exit+0} > > <ffffffff8011c56b>{smp_call_function_interrupt+64} > > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > > <ffffffff8011c51e>{smp_send_stop+76} > > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > > <ffffffff80111826>{show_registers+277} > > <ffffffff80111b2d>{die_nmi+130} > > <ffffffff8011d042>{nmi_watchdog_tick+210} > > <ffffffff801440f3>{notifier_call_chain+31} > > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > > > Badness in i8042_panic_blink at drivers/input/serio/i8042.c:990 > > > > Call Trace:<IRQ> <ffffffff80244821>{i8042_panic_blink+384} > > <ffffffff8013751c>{panic+445} > > <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} > > <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} > > <ffffffff80123e29>{do_page_fault+1125} > > <ffffffff801ea99e>{kobject_release+0} > > <ffffffff80131c55>{activate_task+124} > > <ffffffff80132180>{try_to_wake_up+876} > > <ffffffff80110c2d>{error_exit+0} > > <ffffffff8011c56b>{smp_call_function_interrupt+64} > > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > > <ffffffff8011c51e>{smp_send_stop+76} > > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > > <ffffffff80111826>{show_registers+277} > > <ffffffff80111b2d>{die_nmi+130} > > <ffffffff8011d042>{nmi_watchdog_tick+210} > > <ffffffff801440f3>{notifier_call_chain+31} > > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > > > Badness in i8042_panic_blink at drivers/input/serio/i8042.c:992 > > > > Call Trace:<IRQ> <ffffffff80244886>{i8042_panic_blink+485} > > <ffffffff8013751c>{panic+445} > > <ffffffff801130a5>{do_IRQ+266} <ffffffff801107d1>{ret_from_intr+0} > > <ffffffff80111988>{oops_end+38} <ffffffff801119a3>{oops_end+65} > > <ffffffff80123e29>{do_page_fault+1125} > > <ffffffff801ea99e>{kobject_release+0} > > <ffffffff80131c55>{activate_task+124} > > <ffffffff80132180>{try_to_wake_up+876} > > <ffffffff80110c2d>{error_exit+0} > > <ffffffff8011c56b>{smp_call_function_interrupt+64} > > <ffffffff80110a73>{call_function_interrupt+99} <EOI> > > <ffffffff8011c51e>{smp_send_stop+76} > > <ffffffff8013744a>{panic+235} <ffffffff801116fc>{show_stack+241} > > <ffffffff80111826>{show_registers+277} > > <ffffffff80111b2d>{die_nmi+130} > > <ffffffff8011d042>{nmi_watchdog_tick+210} > > <ffffffff801440f3>{notifier_call_chain+31} > > <ffffffff801123fa>{default_do_nmi+112} <ffffffff8011d0f8>{do_nmi+115} > > <ffffffff8011100f>{paranoid_exit+0} <ffffffff80111988>{oops_end+38} > > > > > > Thank you in advance for helping me with this. > > > > Regards, > > > > Wojciech > > > > > > -- > Wojciech Turek > > Assistant System Manager > High Performance Computing Service > University of Cambridge > Email: wjt27 at cam.ac.uk > Tel: (+)44 1223 763517 > > _______________________________________________ > Lustre-discuss mailing list > Lustre-discuss at lists.lustre.org > http://lists.lustre.org/mailman/listinfo/lustre-discussCheers, Andreas -- Andreas Dilger Sr. Staff Engineer, Lustre Group Sun Microsystems of Canada, Inc.