I get the following crash when I try to start up a guest on a low memory machine BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<c0632641>] balloon_process+0x3e5/0x596 *pdpt = 0000000000aac001 *pde = 0000000000000000 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/system/xen_memory/xen_memory0/info/current_kb Modules linked in: nfs lockd fscache nfs_acl nf_conntrack_ftp bridge stp llc autofs4 rpcsec_gss_krb5 auth_rpcgss des_generic sunrpc ip6table_filter ip6_tables ipv6 xen_evtchn xenfs snd_intel8x0 snd_ac97_codec ac97_bus snd_seq snd_seq_device ppdev snd_pcm parport_pc parport snd_timer e100 snd soundcore mii iTCO_wdt snd_page_alloc i2c_i801 iTCO_vendor_support i915 drm_kms_helper drm i2c_algo_bit i2c_core video output [last unloaded: scsi_wait_scan] Pid: 6, comm: events/0 Not tainted (2.6.32.14-1.2.105.xendom0.fc12.i686.PAE #1) EIP: 0061:[<c0632641>] EFLAGS: 00010046 CPU: 0 EIP is at balloon_process+0x3e5/0x596 EAX: c25f6000 EBX: 00000f02 ECX: c269e038 EDX: 00000000 ESI: 00000000 EDI: 00018f02 EBP: dc09df6c ESP: dc09df08 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0069 Process events/0 (pid: 6, ti=dc09c000 task=dc083fc0 task.ti=dc09c000) Stack: dc09df38 dc09df4c c0ab0288 00000000 00000001 ffffb4d7 00000000 00018f02 00000f02 00000000 000003a9 00000000 c0407250 c2a10004 c2a19b04 c2a10004 c0407247 00000000 00000000 00000000 00000000 00007ff0 c2a1d460 c09db2dc Call Trace: [<c0407250>] ? check_events+0x8/0xc [<c0407247>] ? xen_restore_fl_direct_end+0x0/0x1 [<c0458d11>] ? worker_thread+0x140/0x1b9 [<c063225c>] ? balloon_process+0x0/0x596 [<c045c79d>] ? autoremove_wake_function+0x0/0x34 [<c0458bd1>] ? worker_thread+0x0/0x1b9 [<c045c565>] ? kthread+0x64/0x69 [<c045c501>] ? kthread+0x0/0x69 [<c040ac07>] ? kernel_thread_helper+0x7/0x10 Code: ff 25 ff ff ff 7f 8b 55 c8 39 f8 89 04 95 e0 7e b6 c0 b8 01 00 00 00 0f 44 45 a8 83 ca ff 89 45 a8 89 d8 e8 ed 3f dd ff 8b 55 b4 <8b> 02 c1 e8 1e 69 c0 00 0b 00 00 05 80 13 9f c0 2b 80 ec 0a 00 EIP: [<c0632641>] balloon_process+0x3e5/0x596 SS:ESP 0069:dc09df08 CR2: 0000000000000000 The relevant address is 0xc0632641 <balloon_process+997>: mov (%edx),%eax and the context is 0xc06325e1 <balloon_process+901>: mov (%eax),%edx 0xc06325e3 <balloon_process+903>: mov %edx,-0x44(%ebp) 0xc06325e6 <balloon_process+906>: mov -0x44(%ebp),%eax 0xc06325e9 <balloon_process+909>: shl $0x5,%eax 0xc06325ec <balloon_process+912>: add 0xc0b5c740,%eax 0xc06325f2 <balloon_process+918>: call 0xc06321e2 <balloon_append> 0xc06325f7 <balloon_process+923>: movl $0x0,-0x58(%ebp) 0xc06325fe <balloon_process+930>: movl $0x0,-0x38(%ebp) 0xc0632605 <balloon_process+937>: jmp 0xc063267f <balloon_process+1059> 0xc0632607 <balloon_process+939>: cmpb $0x0,0xc0a105ae 0xc063260e <balloon_process+946>: mov %ebx,%eax 0xc0632610 <balloon_process+948>: jne 0xc063261c <balloon_process+960> 0xc0632612 <balloon_process+950>: call 0xc0404eff <get_phys_to_machine> 0xc0632617 <balloon_process+955>: and $0x7fffffff,%eax 0xc063261c <balloon_process+960>: mov -0x38(%ebp),%edx 0xc063261f <balloon_process+963>: cmp %edi,%eax 0xc0632621 <balloon_process+965>: mov %eax,-0x3f498120(,%edx,4) 0xc0632628 <balloon_process+972>: mov $0x1,%eax 0xc063262d <balloon_process+977>: cmove -0x58(%ebp),%eax 0xc0632631 <balloon_process+981>: or $0xffffffff,%edx 0xc0632634 <balloon_process+984>: mov %eax,-0x58(%ebp) 0xc0632637 <balloon_process+987>: mov %ebx,%eax 0xc0632639 <balloon_process+989>: call 0xc040662b <set_phys_to_machine> 0xc063263e <balloon_process+994>: mov -0x4c(%ebp),%edx 0xc0632641 <balloon_process+997>: mov (%edx),%eax 0xc0632643 <balloon_process+999>: shr $0x1e,%eax 0xc0632646 <balloon_process+1002>: imul $0xb00,%eax,%eax 0xc063264c <balloon_process+1008>: add $0xc09f1380,%eax 0xc0632651 <balloon_process+1013>: sub 0xaec(%eax),%eax 0xc0632657 <balloon_process+1019>: cmp $0x1600,%eax 0xc063265c <balloon_process+1024>: je 0xc063267c <balloon_process+1056> 0xc063265e <balloon_process+1026>: cmp $0x2100,%eax 0xc0632663 <balloon_process+1031>: jne 0xc06327d1 <balloon_process+1397> 0xc0632669 <balloon_process+1037>: cmpl $0x2,0xc0b58d3c 0xc0632670 <balloon_process+1044>: jne 0xc06327d1 <balloon_process+1397> 0xc0632676 <balloon_process+1050>: jmp 0xc063267c <balloon_process+1056> 0xc0632678 <balloon_process+1052>: ud2a 0xc063267a <balloon_process+1054>: jmp 0xc063267a <balloon_process+1054> 0xc063267c <balloon_process+1056>: incl -0x38(%ebp) 0xc063267f <balloon_process+1059>: mov -0x38(%ebp),%edi 0xc0632682 <balloon_process+1062>: mov -0x38(%ebp),%ebx 0xc0632685 <balloon_process+1065>: mov 0xc0b67edc,%edx 0xc063268b <balloon_process+1071>: add -0x48(%ebp),%edi 0xc063268e <balloon_process+1074>: add -0x44(%ebp),%ebx 0xc0632691 <balloon_process+1077>: cmp %edx,-0x38(%ebp) 0xc0632694 <balloon_process+1080>: jb 0xc0632607 <balloon_process+939> 0xc063269a <balloon_process+1086>: cmpl $0x0,-0x58(%ebp) 0xc063269e <balloon_process+1090>: je 0xc06326e3 <balloon_process+1159> 0xc06326a0 <balloon_process+1092>: mov -0x64(%ebp),%edi 0xc06326a3 <balloon_process+1095>: xor %eax,%eax so the crash occurs just after a call to set_phys_to_machine Michael Young _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Thu, 3 Jun 2010, M A Young wrote:> I get the following crash when I try to start up a guest on a low memory > machine > > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: [<c0632641>] balloon_process+0x3e5/0x596 > *pdpt = 0000000000aac001 *pde = 0000000000000000 Oops: 0000 [#1] SMP last > sysfs file: /sys/devices/system/xen_memory/xen_memory0/info/current_kb > Modules linked in: nfs lockd fscache nfs_acl nf_conntrack_ftp bridge stp llc > autofs4 rpcsec_gss_krb5 auth_rpcgss des_generic sunrpc ip6table_filter > ip6_tables ipv6 xen_evtchn xenfs snd_intel8x0 snd_ac97_codec ac97_bus snd_seq > snd_seq_device ppdev snd_pcm parport_pc parport snd_timer e100 snd soundcore > mii iTCO_wdt snd_page_alloc i2c_i801 iTCO_vendor_support i915 drm_kms_helper > drm i2c_algo_bit i2c_core video output [last unloaded: scsi_wait_scan] > Pid: 6, comm: events/0 Not tainted (2.6.32.14-1.2.105.xendom0.fc12.i686.PAE > #1) EIP: 0061:[<c0632641>] EFLAGS: 00010046 CPU: 0 > EIP is at balloon_process+0x3e5/0x596 > EAX: c25f6000 EBX: 00000f02 ECX: c269e038 EDX: 00000000 > ESI: 00000000 EDI: 00018f02 EBP: dc09df6c ESP: dc09df08 > DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0069 > Process events/0 (pid: 6, ti=dc09c000 task=dc083fc0 task.ti=dc09c000) > Stack: > dc09df38 dc09df4c c0ab0288 00000000 00000001 ffffb4d7 00000000 00018f02 > 00000f02 00000000 000003a9 00000000 c0407250 c2a10004 c2a19b04 c2a10004 > c0407247 00000000 00000000 00000000 00000000 00007ff0 c2a1d460 c09db2dc > Call Trace: > [<c0407250>] ? check_events+0x8/0xc > [<c0407247>] ? xen_restore_fl_direct_end+0x0/0x1 > [<c0458d11>] ? worker_thread+0x140/0x1b9 > [<c063225c>] ? balloon_process+0x0/0x596 > [<c045c79d>] ? autoremove_wake_function+0x0/0x34 > [<c0458bd1>] ? worker_thread+0x0/0x1b9 > [<c045c565>] ? kthread+0x64/0x69 > [<c045c501>] ? kthread+0x0/0x69 > [<c040ac07>] ? kernel_thread_helper+0x7/0x10 > Code: ff 25 ff ff ff 7f 8b 55 c8 39 f8 89 04 95 e0 7e b6 c0 b8 01 00 00 00 0f > 44 45 a8 83 ca ff 89 45 a8 89 d8 e8 ed 3f dd ff 8b 55 b4 <8b> 02 c1 e8 1e 69 > c0 00 0b 00 00 05 80 13 9f c0 2b 80 ec 0a 00 EIP: [<c0632641>] > balloon_process+0x3e5/0x596 SS:ESP 0069:dc09df08 > CR2: 0000000000000000 > > The relevant address is > 0xc0632641 <balloon_process+997>: mov (%edx),%eaxThis seems to be about line 343 of drivers/xen/balloon.c in the subroutine decrease_reservation which is for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn) discontig_free = 1; set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); /* here */ if (!PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(lpfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); } }>From what I can tell page is meaningless in this context as it is justa temporary variable used in the previous loop, so I would guess that PageHighMem should be checking something else, or page should be set somewhere eg. at a guess page=pfn_to_page(lpfn); Michael Young _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Thu, 2010-06-03 at 20:37 +0100, M A Young wrote:> On Thu, 3 Jun 2010, M A Young wrote: > > > I get the following crash when I try to start up a guest on a low memory > > machine > > > > BUG: unable to handle kernel NULL pointer dereference at (null) > > IP: [<c0632641>] balloon_process+0x3e5/0x596 > > *pdpt = 0000000000aac001 *pde = 0000000000000000 Oops: 0000 [#1] SMP last > > sysfs file: /sys/devices/system/xen_memory/xen_memory0/info/current_kb > > Modules linked in: nfs lockd fscache nfs_acl nf_conntrack_ftp bridge stp llc > > autofs4 rpcsec_gss_krb5 auth_rpcgss des_generic sunrpc ip6table_filter > > ip6_tables ipv6 xen_evtchn xenfs snd_intel8x0 snd_ac97_codec ac97_bus snd_seq > > snd_seq_device ppdev snd_pcm parport_pc parport snd_timer e100 snd soundcore > > mii iTCO_wdt snd_page_alloc i2c_i801 iTCO_vendor_support i915 drm_kms_helper > > drm i2c_algo_bit i2c_core video output [last unloaded: scsi_wait_scan] > > Pid: 6, comm: events/0 Not tainted (2.6.32.14-1.2.105.xendom0.fc12.i686.PAE > > #1) EIP: 0061:[<c0632641>] EFLAGS: 00010046 CPU: 0 > > EIP is at balloon_process+0x3e5/0x596 > > EAX: c25f6000 EBX: 00000f02 ECX: c269e038 EDX: 00000000 > > ESI: 00000000 EDI: 00018f02 EBP: dc09df6c ESP: dc09df08 > > DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0069 > > Process events/0 (pid: 6, ti=dc09c000 task=dc083fc0 task.ti=dc09c000) > > Stack: > > dc09df38 dc09df4c c0ab0288 00000000 00000001 ffffb4d7 00000000 00018f02 > > 00000f02 00000000 000003a9 00000000 c0407250 c2a10004 c2a19b04 c2a10004 > > c0407247 00000000 00000000 00000000 00000000 00007ff0 c2a1d460 c09db2dc > > Call Trace: > > [<c0407250>] ? check_events+0x8/0xc > > [<c0407247>] ? xen_restore_fl_direct_end+0x0/0x1 > > [<c0458d11>] ? worker_thread+0x140/0x1b9 > > [<c063225c>] ? balloon_process+0x0/0x596 > > [<c045c79d>] ? autoremove_wake_function+0x0/0x34 > > [<c0458bd1>] ? worker_thread+0x0/0x1b9 > > [<c045c565>] ? kthread+0x64/0x69 > > [<c045c501>] ? kthread+0x0/0x69 > > [<c040ac07>] ? kernel_thread_helper+0x7/0x10 > > Code: ff 25 ff ff ff 7f 8b 55 c8 39 f8 89 04 95 e0 7e b6 c0 b8 01 00 00 00 0f > > 44 45 a8 83 ca ff 89 45 a8 89 d8 e8 ed 3f dd ff 8b 55 b4 <8b> 02 c1 e8 1e 69 > > c0 00 0b 00 00 05 80 13 9f c0 2b 80 ec 0a 00 EIP: [<c0632641>] > > balloon_process+0x3e5/0x596 SS:ESP 0069:dc09df08 > > CR2: 0000000000000000 > > > > The relevant address is > > 0xc0632641 <balloon_process+997>: mov (%edx),%eax > > This seems to be about line 343 of drivers/xen/balloon.c in the subroutine > decrease_reservation which is > for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { > if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) > != mfn) > discontig_free = 1; > > set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); > /* here */ if (!PageHighMem(page)) { > ret = HYPERVISOR_update_va_mapping( > (unsigned long)__va(lpfn << > PAGE_SHIFT), > __pte_ma(0), 0); > BUG_ON(ret); > } > } > > >From what I can tell page is meaningless in this context as it is just > a temporary variable used in the previous loop, so I would > guess that PageHighMem should be checking something else, or page should > be set somewhere eg. at a guess page=pfn_to_page(lpfn);That would be my guess also. CCing Dave McCracken who looks to have introduced this code in 0e898d5e "Add hugepage support to balloon driver" Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Thursday, June 03, 2010, Ian Campbell wrote:> > This seems to be about line 343 of drivers/xen/balloon.c in the > > subroutine decrease_reservation which is > > > > for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { > > if ((discontig_frame_list[j] > >pfn_to_mfn(lpfn)) > > > > != mfn) > > > > discontig_free = 1; > > > > set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); > > > > /* here */ if (!PageHighMem(page)) { > > > > ret = HYPERVISOR_update_va_mapping( > > (unsigned long)__va(lpfn << > > > > PAGE_SHIFT), > > > > __pte_ma(0), 0); > > BUG_ON(ret); > > } > > } > > > > > > >From what I can tell page is meaningless in this context as it is just > > > > a temporary variable used in the previous loop, so I would > > guess that PageHighMem should be checking something else, or page should > > be set somewhere eg. at a guess page=pfn_to_page(lpfn); > > That would be my guess also. CCing Dave McCracken who looks to have > introduced this code in 0e898d5e "Add hugepage support to balloon > driver"Yes, that is definitely an error. The offending line should be if (!PageHighMem(pfn_to_page(lpfn))) { Dave McCracken Oracle Corp. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Thu, 3 Jun 2010, Dave McCracken wrote:> On Thursday, June 03, 2010, Ian Campbell wrote: >>> This seems to be about line 343 of drivers/xen/balloon.c in the >>> subroutine decrease_reservation which is >>> >>> for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { >>> if ((discontig_frame_list[j] >>> pfn_to_mfn(lpfn)) >>> >>> != mfn) >>> >>> discontig_free = 1; >>> >>> set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); >>> >>> /* here */ if (!PageHighMem(page)) { >>> >>> ret = HYPERVISOR_update_va_mapping( >>> (unsigned long)__va(lpfn << >>> >>> PAGE_SHIFT), >>> >>> __pte_ma(0), 0); >>> BUG_ON(ret); >>> } >>> } >>> >>> >>>> From what I can tell page is meaningless in this context as it is just >>> >>> a temporary variable used in the previous loop, so I would >>> guess that PageHighMem should be checking something else, or page should >>> be set somewhere eg. at a guess page=pfn_to_page(lpfn); >> >> That would be my guess also. CCing Dave McCracken who looks to have >> introduced this code in 0e898d5e "Add hugepage support to balloon >> driver" > > Yes, that is definitely an error. The offending line should be > > if (!PageHighMem(pfn_to_page(lpfn))) { > > Dave McCracken > Oracle Corp.I have tested the corrected line and it does fix the crash I was seeing. Michael Young _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Mon, 2010-06-07 at 20:29 +0100, M A Young wrote:> On Thu, 3 Jun 2010, Dave McCracken wrote: > > > On Thursday, June 03, 2010, Ian Campbell wrote: > >>> This seems to be about line 343 of drivers/xen/balloon.c in the > >>> subroutine decrease_reservation which is > >>> > >>> for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { > >>> if ((discontig_frame_list[j] > >>> pfn_to_mfn(lpfn)) > >>> > >>> != mfn) > >>> > >>> discontig_free = 1; > >>> > >>> set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); > >>> > >>> /* here */ if (!PageHighMem(page)) { > >>> > >>> ret = HYPERVISOR_update_va_mapping( > >>> (unsigned long)__va(lpfn << > >>> > >>> PAGE_SHIFT), > >>> > >>> __pte_ma(0), 0); > >>> BUG_ON(ret); > >>> } > >>> } > >>> > >>> > >>>> From what I can tell page is meaningless in this context as it is just > >>> > >>> a temporary variable used in the previous loop, so I would > >>> guess that PageHighMem should be checking something else, or page should > >>> be set somewhere eg. at a guess page=pfn_to_page(lpfn); > >> > >> That would be my guess also. CCing Dave McCracken who looks to have > >> introduced this code in 0e898d5e "Add hugepage support to balloon > >> driver" > > > > Yes, that is definitely an error. The offending line should be > > > > if (!PageHighMem(pfn_to_page(lpfn))) { > > > > Dave McCracken > > Oracle Corp. > > I have tested the corrected line and it does fix the crash I was seeing.Please can one of you generate a patch and send it to the list. Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Tue, 8 Jun 2010, Ian Campbell wrote:> On Mon, 2010-06-07 at 20:29 +0100, M A Young wrote: >> On Thu, 3 Jun 2010, Dave McCracken wrote: >> >>> On Thursday, June 03, 2010, Ian Campbell wrote: >>>>> This seems to be about line 343 of drivers/xen/balloon.c in the >>>>> subroutine decrease_reservation which is >>>>> >>>>> for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { >>>>> if ((discontig_frame_list[j] >>>>> pfn_to_mfn(lpfn)) >>>>> >>>>> != mfn) >>>>> >>>>> discontig_free = 1; >>>>> >>>>> set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); >>>>> >>>>> /* here */ if (!PageHighMem(page)) { >>>>> >>>>> ret = HYPERVISOR_update_va_mapping( >>>>> (unsigned long)__va(lpfn << >>>>> >>>>> PAGE_SHIFT), >>>>> >>>>> __pte_ma(0), 0); >>>>> BUG_ON(ret); >>>>> } >>>>> } >>>>> >>>>> >>>>>> From what I can tell page is meaningless in this context as it is just >>>>> >>>>> a temporary variable used in the previous loop, so I would >>>>> guess that PageHighMem should be checking something else, or page should >>>>> be set somewhere eg. at a guess page=pfn_to_page(lpfn); >>>> >>>> That would be my guess also. CCing Dave McCracken who looks to have >>>> introduced this code in 0e898d5e "Add hugepage support to balloon >>>> driver" >>> >>> Yes, that is definitely an error. The offending line should be >>> >>> if (!PageHighMem(pfn_to_page(lpfn))) { >>> >>> Dave McCracken >>> Oracle Corp. >> >> I have tested the corrected line and it does fix the crash I was seeing. > > Please can one of you generate a patch and send it to the list.Attached. Michael Young _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Tue, 2010-06-08 at 09:28 +0100, M A Young wrote:> On Tue, 8 Jun 2010, Ian Campbell wrote: > > > On Mon, 2010-06-07 at 20:29 +0100, M A Young wrote: > >> On Thu, 3 Jun 2010, Dave McCracken wrote: > >> > >>> On Thursday, June 03, 2010, Ian Campbell wrote: > >>>>> This seems to be about line 343 of drivers/xen/balloon.c in the > >>>>> subroutine decrease_reservation which is > >>>>> > >>>>> for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { > >>>>> if ((discontig_frame_list[j] > >>>>> pfn_to_mfn(lpfn)) > >>>>> > >>>>> != mfn) > >>>>> > >>>>> discontig_free = 1; > >>>>> > >>>>> set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); > >>>>> > >>>>> /* here */ if (!PageHighMem(page)) { > >>>>> > >>>>> ret = HYPERVISOR_update_va_mapping( > >>>>> (unsigned long)__va(lpfn << > >>>>> > >>>>> PAGE_SHIFT), > >>>>> > >>>>> __pte_ma(0), 0); > >>>>> BUG_ON(ret); > >>>>> } > >>>>> } > >>>>> > >>>>> > >>>>>> From what I can tell page is meaningless in this context as it is just > >>>>> > >>>>> a temporary variable used in the previous loop, so I would > >>>>> guess that PageHighMem should be checking something else, or page should > >>>>> be set somewhere eg. at a guess page=pfn_to_page(lpfn); > >>>> > >>>> That would be my guess also. CCing Dave McCracken who looks to have > >>>> introduced this code in 0e898d5e "Add hugepage support to balloon > >>>> driver" > >>> > >>> Yes, that is definitely an error. The offending line should be > >>> > >>> if (!PageHighMem(pfn_to_page(lpfn))) { > >>> > >>> Dave McCracken > >>> Oracle Corp. > >> > >> I have tested the corrected line and it does fix the crash I was seeing. > > > > Please can one of you generate a patch and send it to the list. > > Attached.Thanks, but I meant with a checkin comment and a signed-off-by etc, plus CC to Jeremy... Ian.> > Michael Young_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
This patch fixes an error in commit f4685d0ed041523d1bd5124c5872459681aca876 "Add hugepage support to balloon driver" that was causing the balloon driver to crash when shrinking the memory used by Domain-0. Signed-off-by: Michael Young <m.a.young@durham.ac.uk> --- linux-2.6.32.x86_64/drivers/xen/balloon.c.orig 2010-06-05 22:43:11.000000000 +0100 +++ linux-2.6.32.x86_64/drivers/xen/balloon.c 2010-06-05 22:47:03.000000000 +0100 @@ -340,7 +340,7 @@ discontig_free = 1; set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); - if (!PageHighMem(page)) { + if (!PageHighMem(pfn_to_page(lpfn))) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(lpfn << PAGE_SHIFT), __pte_ma(0), 0); _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Tue, Jun 08, 2010 at 10:25:35AM +0100, M A Young wrote:> This patch fixes an error in commit f4685d0ed041523d1bd5124c5872459681aca876 > "Add hugepage support to balloon driver" that was causing the balloon > driver to crash when shrinking the memory used by Domain-0. >Should this be applied to both xen/stable-2.6.31.x and xen/stable-2.6.32.x ? -- Pasi> Signed-off-by: Michael Young <m.a.young@durham.ac.uk> > > --- linux-2.6.32.x86_64/drivers/xen/balloon.c.orig 2010-06-05 22:43:11.000000000 +0100 > +++ linux-2.6.32.x86_64/drivers/xen/balloon.c 2010-06-05 22:47:03.000000000 +0100 > @@ -340,7 +340,7 @@ > discontig_free = 1; > > set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); > - if (!PageHighMem(page)) { > + if (!PageHighMem(pfn_to_page(lpfn))) { > ret = HYPERVISOR_update_va_mapping( > (unsigned long)__va(lpfn << PAGE_SHIFT), > __pte_ma(0), 0); > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Tue, 8 Jun 2010, Pasi Kärkkäinen wrote:> On Tue, Jun 08, 2010 at 10:25:35AM +0100, M A Young wrote: >> This patch fixes an error in commit f4685d0ed041523d1bd5124c5872459681aca876 >> "Add hugepage support to balloon driver" that was causing the balloon >> driver to crash when shrinking the memory used by Domain-0. >> > > Should this be applied to both xen/stable-2.6.31.x and xen/stable-2.6.32.x ?Yes. The patch I generated was based on xen/stable-2.6.32.x but xen/stable-2.6.31.x also has the "Add hugepage support to balloon driver" patch that needs fixing (commit 5f40d7cbebdd55598f38e97b1aae2a6e471b060) - perhaps other branches do as well. Michael Young _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On 06/08/2010 02:25 AM, M A Young wrote:> This patch fixes an error in commit > f4685d0ed041523d1bd5124c5872459681aca876 > "Add hugepage support to balloon driver" that was causing the balloon > driver to crash when shrinking the memory used by Domain-0.Thanks. But why haven''t I seen this? J> > Signed-off-by: Michael Young <m.a.young@durham.ac.uk> > > --- linux-2.6.32.x86_64/drivers/xen/balloon.c.orig 2010-06-05 > 22:43:11.000000000 +0100 > +++ linux-2.6.32.x86_64/drivers/xen/balloon.c 2010-06-05 > 22:47:03.000000000 +0100 > @@ -340,7 +340,7 @@ > discontig_free = 1; > > set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); > - if (!PageHighMem(page)) { > + if (!PageHighMem(pfn_to_page(lpfn))) { > ret = HYPERVISOR_update_va_mapping( > (unsigned long)__va(lpfn << PAGE_SHIFT), > __pte_ma(0), 0); >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Tue, 8 Jun 2010, Jeremy Fitzhardinge wrote:> On 06/08/2010 02:25 AM, M A Young wrote: >> This patch fixes an error in commit >> f4685d0ed041523d1bd5124c5872459681aca876 >> "Add hugepage support to balloon driver" that was causing the balloon >> driver to crash when shrinking the memory used by Domain-0. > > Thanks. But why haven''t I seen this?Good question, but I think you only see the crash in a limited memory situation. I don''t claim to be an expert in what is going on, but a few lines earlier you have the the loop for (i = 0; i < nr_pages; i++) { if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) { nr_pages = i; need_sleep = 1; break; } pfn = page_to_pfn(page); frame_list[i] = pfn_to_mfn(pfn); scrub_page(page); } If alloc_pages(GFP_BALLOON, balloon_order) is never NULL during the loop then page is left set to be one of the pages you are freeing. From then on PageHighMem(pfn_to_page(lpfn)) and PageHighMem(page) could easily the same and you will only have a problem if some of the pages you are freeing are PageHighMem and some aren''t. If alloc_pages(GFP_BALLOON, balloon_order) is null somewhere in the loop (presumably because it can''t find enough pages to free) then the process will crash. Michael Young _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On 06/08/2010 11:08 AM, M A Young wrote:> On Tue, 8 Jun 2010, Jeremy Fitzhardinge wrote: > >> On 06/08/2010 02:25 AM, M A Young wrote: >>> This patch fixes an error in commit >>> f4685d0ed041523d1bd5124c5872459681aca876 >>> "Add hugepage support to balloon driver" that was causing the balloon >>> driver to crash when shrinking the memory used by Domain-0. >> >> Thanks. But why haven''t I seen this? > > Good question, but I think you only see the crash in a limited memory > situation. I don''t claim to be an expert in what is going on, but a > few lines earlier you have the the loop > > for (i = 0; i < nr_pages; i++) { > if ((page = alloc_pages(GFP_BALLOON, balloon_order)) > == NULL) { > nr_pages = i; > need_sleep = 1; > break; > } > > pfn = page_to_pfn(page); > frame_list[i] = pfn_to_mfn(pfn); > > scrub_page(page); > } > > If alloc_pages(GFP_BALLOON, balloon_order) is never NULL during the > loop then page is left set to be one of the pages you are freeing. > From then on PageHighMem(pfn_to_page(lpfn)) and PageHighMem(page) > could easily the same and you will only have a problem if some of the > pages you are freeing are PageHighMem and some aren''t. > > If alloc_pages(GFP_BALLOON, balloon_order) is null somewhere in the > loop (presumably because it can''t find enough pages to free) then the > process will crash.I think the short answer is that I always run 64-bit dom0, so there''s no highmem... J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On 06/03/2010 03:38 PM, Dave McCracken wrote:> On Thursday, June 03, 2010, Ian Campbell wrote: > >>> This seems to be about line 343 of drivers/xen/balloon.c in the >>> subroutine decrease_reservation which is >>> >>> for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) { >>> if ((discontig_frame_list[j] >>> pfn_to_mfn(lpfn)) >>> >>> != mfn) >>> >>> discontig_free = 1; >>> >>> set_phys_to_machine(lpfn, INVALID_P2M_ENTRY); >>> >>> /* here */ if (!PageHighMem(page)) { >>> >>> ret = HYPERVISOR_update_va_mapping( >>> (unsigned long)__va(lpfn << >>> >>> PAGE_SHIFT), >>> >>> __pte_ma(0), 0); >>> BUG_ON(ret); >>> } >>> } >>> >>> >>> >From what I can tell page is meaningless in this context as it is just >>> >>> a temporary variable used in the previous loop, so I would >>> guess that PageHighMem should be checking something else, or page should >>> be set somewhere eg. at a guess page=pfn_to_page(lpfn); >>> >> That would be my guess also. CCing Dave McCracken who looks to have >> introduced this code in 0e898d5e "Add hugepage support to balloon >> driver" >> > Yes, that is definitely an error. The offending line should be > > if (!PageHighMem(pfn_to_page(lpfn))) { >Actually it looks like this was fixed on the xen/hugepage-balloon a long time ago, but I guess that branch wasn''t merged across to xen/next because it was based on xen/master and so awkward to merge into the .32-based and later branches. I need to check how the current hugepage support has made it into the .32 based tree, assuming it has. It will take a bit of git archeology. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On Tue, 8 Jun 2010, Jeremy Fitzhardinge wrote:> Actually it looks like this was fixed on the xen/hugepage-balloon a long > time ago, but I guess that branch wasn''t merged across to xen/next > because it was based on xen/master and so awkward to merge into the > .32-based and later branches. > > I need to check how the current hugepage support has made it into the > .32 based tree, assuming it has. It will take a bit of git archeology.There is an "Add hugepage support to balloon driver" in xen/balloon git.kernel.org/?p=linux/kernel/git/jeremy/xen.git;a=shortlog;h=refs/heads/xen/balloon and xen/balloon was later merged into xen/next git.kernel.org/?p=linux/kernel/git/jeremy/xen.git;a=commit;h=53fbeec80db2d7c5677da8d47096d156332731c4 xen/next has been merged into xen/stable-2.6.32.x a few times since then. Michael Young _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel
On 06/08/2010 04:08 PM, M A Young wrote:> On Tue, 8 Jun 2010, Jeremy Fitzhardinge wrote: > >> Actually it looks like this was fixed on the xen/hugepage-balloon a long >> time ago, but I guess that branch wasn''t merged across to xen/next >> because it was based on xen/master and so awkward to merge into the >> .32-based and later branches. >> >> I need to check how the current hugepage support has made it into the >> .32 based tree, assuming it has. It will take a bit of git archeology. > > There is an "Add hugepage support to balloon driver" in xen/balloon > git.kernel.org/?p=linux/kernel/git/jeremy/xen.git;a=shortlog;h=refs/heads/xen/balloon > and xen/balloon was later merged into xen/next > git.kernel.org/?p=linux/kernel/git/jeremy/xen.git;a=commit;h=53fbeec80db2d7c5677da8d47096d156332731c4 > > xen/next has been merged into xen/stable-2.6.32.x a few times since then.Thanks. I just cherry-picked the subsequent fixes over and merged them in. Please check it out. J _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com lists.xensource.com/xen-devel