On Thu, Nov 15, 2001 at 01:57:16PM -0800, Josh Coalson wrote:> --- Miroslav Lichvar <lichvarm@phoenix.inf.upol.cz> wrote: > > On Wed, Nov 14, 2001 at 09:37:47AM -0800, Josh Coalson wrote: > > > cpu support for 3dnow and sse can be easily detected at > > > runtime. I turned off 3dnow by default because it is > > > implicated in some crashes. > > > > Hmm, i never have any crash. Can i get more informations about these > > crases? Or you are talking about that ones posted in this list about > > month ago? > > yes, the ones a month ago. it's not clear that this is > even related to 3dnow but since it was happening on > Matt's new AMD box and I don't have enough info I turned > it off by default.I will gladly set you up with an account on this very system if it would help you to diagnose the problem. I'll take another shot at it myself as well. -- - mdz
So here is quick patch solving the problem, now it should be PIC.
-- 
Miroslav Lichvar
lichvarm@phoenix.inf.upol.cz
-------------- next part --------------
--- lpc_asm.nasm.orig	Wed Jul 18 02:23:40 2001
+++ lpc_asm.nasm	Sat Nov 17 21:09:46 2001
@@ -59,10 +59,10 @@
 ;
 	ALIGN 16
 cident FLAC__lpc_compute_autocorrelation_asm_ia32
-	;[esp + 24] == autoc[]
-	;[esp + 20] == lag
-	;[esp + 16] == data_len
-	;[esp + 12] == data[]
+	;[esp + 28] == autoc[]
+	;[esp + 24] == lag
+	;[esp + 20] == data_len
+	;[esp + 16] == data[]
 
 	;ASSERT(lag > 0)
 	;ASSERT(lag <= 33)
@@ -71,21 +71,22 @@
 .begin:
 	push	esi
 	push	edi
+	push	ebx
 
 	;	for(coeff = 0; coeff < lag; coeff++)
 	;		autoc[coeff] = 0.0;
-	mov	edi, [esp + 24]			; edi == autoc
-	mov	ecx, [esp + 20]			; ecx = # of dwords (=lag) of 0 to write
+	mov	edi, [esp + 28]			; edi == autoc
+	mov	ecx, [esp + 24]			; ecx = # of dwords (=lag) of 0 to write
 	xor	eax, eax
 	rep	stosd
 
 	;	const unsigned limit = data_len - lag;
-	mov	eax, [esp + 20]			; eax == lag
-	mov	ecx, [esp + 16]
+	mov	eax, [esp + 24]			; eax == lag
+	mov	ecx, [esp + 20]
 	sub	ecx, eax			; ecx == limit
 
-	mov	edi, [esp + 24]			; edi == autoc
-	mov	esi, [esp + 12]			; esi == data
+	mov	edi, [esp + 28]			; edi == autoc
+	mov	esi, [esp + 16]			; esi == data
 	inc	ecx				; we are looping <= limit so we add one to the counter
 
 	;	for(sample = 0; sample <= limit; sample++) {
@@ -97,7 +98,11 @@
 	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
 	lea	edx, [eax + eax*2]
 	neg	edx
-	lea	edx, [eax + edx*4 + .jumper1_0]
+	lea	edx, [eax + edx*4 + .jumper1_0 - .get_eip1]
+	call	.get_eip1
+.get_eip1:
+	pop	ebx
+	add	edx, ebx
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	inc	edx				; compensate for the shorter opcode on the last iteration
@@ -254,7 +259,7 @@
 	;		for(coeff = 0; coeff < data_len - sample; coeff++)
 	;			autoc[coeff] += d * data[sample+coeff];
 	;	}
-	mov	ecx, [esp + 20]			; ecx <- lag
+	mov	ecx, [esp + 24]			; ecx <- lag
 	dec	ecx				; ecx <- lag - 1
 	jz	near .end			; skip loop if 0 (i.e. lag == 1)
 
@@ -263,7 +268,11 @@
 	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
 	lea	edx, [eax + eax*2]
 	neg	edx
-	lea	edx, [eax + edx*4 + .jumper2_0]
+	lea	edx, [eax + edx*4 + .jumper2_0 - .get_eip2]
+	call	.get_eip2
+.get_eip2:
+	pop	ebx
+	add	edx, ebx
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	inc	edx				; compensate for the shorter opcode on the last iteration
@@ -409,6 +418,7 @@
 .loop2_end:
 
 .end:
+	pop	ebx
 	pop	edi
 	pop	esi
 	ret
@@ -804,7 +814,11 @@
 .i_32:
 	sub	edi, esi
 	neg	eax
-	lea	edx, [eax + eax * 8 + .jumper_0]
+	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
+	call	.get_eip0
+.get_eip0:
+	pop	eax
+	add	edx, eax
 	inc	edx
 	mov	eax, [esp + 28]			; eax = qlp_coeff[]
 	xor	ebp, ebp
@@ -1203,7 +1217,11 @@
 .x87_32:
 	sub	esi, edi
 	neg	eax
-	lea	edx, [eax + eax * 8 + .jumper_0]
+	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
+	call	.get_eip0
+.get_eip0:
+	pop	eax
+	add	edx, eax
 	inc	edx				; compensate for the shorter opcode on the last iteration
 	mov	eax, [esp + 28]			; eax = qlp_coeff[]
 	xor	ebp, ebp
--- Miroslav Lichvar <lichvarm@phoenix.inf.upol.cz> wrote:> On Wed, Nov 14, 2001 at 09:37:47AM -0800, Josh Coalson wrote: > > cpu support for 3dnow and sse can be easily detected at > > runtime. I turned off 3dnow by default because it is > > implicated in some crashes. > > Hmm, i never have any crash. Can i get more informations about these > crases? Or you are talking about that ones posted in this list about > month ago?yes, the ones a month ago. it's not clear that this is even related to 3dnow but since it was happening on Matt's new AMD box and I don't have enough info I turned it off by default.> BTW, line 394 in stream_encoder.c setting 3dnow function is still > disabled. So configure --enable-use-3dnow in fact doesn't enable it.oops, ok, I fixed that and checked it in. I'm a little late with the release but it will be out today. Josh __________________________________________________ Do You Yahoo!? Find the one for you at Yahoo! Personals http://personals.yahoo.com
On Thu, Nov 15, 2001 at 01:57:16PM -0800, Josh Coalson wrote:> yes, the ones a month ago. it's not clear that this is > even related to 3dnow but since it was happening on > Matt's new AMD box and I don't have enough info I turned > it off by default.Ok, i did test on a debian/testing box (as Matt is probably using something like this) and that segfaults was really there. It is caused by newer ld (from binutils). As i'm not shared library expert, i don't know whether it is bug or feature of ld, or some misunderstanding with nasm produced objects or something totally different. In lpc_asm.nasm are functions: FLAC__lpc_compute_autocorrelation_asm_ia32 FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32 FLAC__lpc_restore_signal_asm_ia32 containing instructions like "lea edx, [.. + blah]", we want there to blah be replaced by absolute address of label blah and this is the problem. Solution is simple, just delete these functions. I will look more at the problem later. -- Miroslav Lichvar lichvarm@phoenix.inf.upol.cz
On Thu, Nov 15, 2001 at 01:57:16PM -0800, Josh Coalson wrote:> --- Miroslav Lichvar <lichvarm@phoenix.inf.upol.cz> wrote: > > On Wed, Nov 14, 2001 at 09:37:47AM -0800, Josh Coalson wrote: > > > cpu support for 3dnow and sse can be easily detected at runtime. I > > > turned off 3dnow by default because it is implicated in some crashes. > > > > Hmm, i never have any crash. Can i get more informations about these > > crases? Or you are talking about that ones posted in this list about > > month ago? > > yes, the ones a month ago. it's not clear that this is even related to > 3dnow but since it was happening on Matt's new AMD box and I don't have > enough info I turned it off by default.I don't remember if I said this already, but I can confirm that I don't get the crash when building from current CVS, even with asm optimizations enabled. -- - mdz
On Sat, Nov 17, 2001 at 04:26:48PM -0500, Matt Zimmerman wrote:> On Thu, Nov 15, 2001 at 01:57:16PM -0800, Josh Coalson wrote: > > > > yes, the ones a month ago. it's not clear that this is even related to > > 3dnow but since it was happening on Matt's new AMD box and I don't have > > enough info I turned it off by default. > > I don't remember if I said this already, but I can confirm that I don't > get the crash when building from current CVS, even with asm optimizations > enabled.However, when I got around to building Debian packages, I found that I DID still get the crash, and had to keep asm optimizations disabled for the package. I don't know what happened; perhaps I was accidentally linking with the installed libFLAC at runtime. So it would seem that this is not related to SSE or 3DNOW, since those are now disabled by default, yes? -- - mdz