Displaying 20 results from an estimated 46 matches for "_mem".
2005 Apr 20
2
Speex-1.1.7 seems to crash with --enable-sse (on P3/GCC-3.2)..
...rash:<br>
<br>
[root@tomb root]# speexenc -n loup.wav loup.spx<br>
Encoding 8000 Hz audio using narrowband mode (stereo)<br>
Segmentation fault (core dumped)<br>
<br>
Stack trace:<br>
Loaded symbols for /lib/ld-linux.so.2<br>
#0 fir_mem2_10 (x=0x80552a8, _num=0x80566b4, y=0x80558ac, N=160,
ord=10, _mem=0x80568cc) at xmmintrin.h:790<br>
790 {<br>
(gdb) bt<br>
#0 fir_mem2_10 (x=0x80552a8, _num=0x80566b4, y=0x80558ac, N=160,
ord=10, _mem=0x80568cc) at xmmintrin.h:790...
2004 Aug 06
3
[PATCH] Make SSE Run Time option.
...bout 5% slower than the pure asm
approach, so it's not too bad (SSE asm is 2x faster than x87). Note that
unlike the previous version which had a kludge to work with order 8
(required for wideband), this version only works with order 10, so it
will only work for narrowband.
<p>void filter_mem2(float *x, float *_num, float *_den, float *y, int N,
int ord, float *_mem)
{
__m128 num[3], den[3], mem[3];
int i;
/* Copy numerator, denominator and memory to aligned xmm */
for (i=0;i<2;i++)
{
mem[i] = _mm_loadu_ps(_mem+4*i);
num[i] = _mm_loadu_ps(_num+4*i+1);...
2005 Apr 20
0
Speex-1.1.7 seems to crash with --enable-sse (on P3/GCC-3.2)..
...ed on a RAS 3 (P4).
> Both version 1.1.6 and 1.1.7 crash:
>
> [root@tomb root]# speexenc -n loup.wav loup.spx
> Encoding 8000 Hz audio using narrowband mode (stereo)
> Segmentation fault (core dumped)
>
> Stack trace:
> Loaded symbols for /lib/ld-linux.so.2
> #0 fir_mem2_10 (x=0x80552a8, _num=0x80566b4, y=0x80558ac, N=160,
> ord=10, _mem=0x80568cc) at xmmintrin.h:790
> 790 {
> (gdb) bt
> #0 fir_mem2_10 (x=0x80552a8, _num=0x80566b4, y=0x80558ac, N=160,
> ord=10, _mem=0x80568cc) at xmmintrin.h:790
> #1 0xb75dc509 in fir_mem2 (x=0x80552a8, _nu...
2015 Feb 11
0
[PATCH v2 6/6] instmem/gk20a: add IOMMU support
...bdev/fb.h>
#include <core/mm.h>
#include <core/device.h>
#ifdef __KERNEL__
#include <linux/dma-attrs.h>
+#include <linux/iommu.h>
+#include <nouveau_platform.h>
#endif
#include "priv.h"
@@ -36,18 +56,51 @@ struct gk20a_instobj_priv {
struct nvkm_mem *mem;
/* Pointed by mem */
struct nvkm_mem _mem;
+};
+
+/*
+ * Used for objects allocated using the DMA API
+ */
+struct gk20a_instobj_dma {
+ struct gk20a_instobj_priv base;
+
void *cpuaddr;
dma_addr_t handle;
struct dma_attrs attrs;
struct nvkm_mm_node r;
};
+/*
+ * Used for object...
2005 Mar 08
1
Speex-1.1.7 seems to crash with --enable-sse (on P3/GCC-3.2)..
...as "i386-redhat-linux"...
(gdb) r --rate 8000 --16bit -n /dev/zero /tmp/foo
Starting program: /usr/src/speex-1.1.7/src/.libs/lt-speexenc --rate 8000
--16bit -n /dev/zero /tmp/foo
Encoding 8000 Hz audio using narrowband mode (mono)
Program received signal SIGSEGV, Segmentation fault.
fir_mem2_10 (x=0x8054a88, _num=0x8055e94, y=0x805508c, N=160, ord=10,
_mem=0x80560ac)
at /usr/lib/gcc-lib/i386-redhat-linux/3.2/include/xmmintrin.h:774
774 {
(gdb)
(gdb) bt
#0 fir_mem2_10 (x=0x8054a88, _num=0x8055e94, y=0x805508c, N=160,
ord=10, _mem=0x80560ac)
at /usr/lib/gcc-lib/i386-redha...
2004 Aug 06
5
[PATCH] Make SSE Run Time option.
> Personally, I don't think much of PNI. The complex arithmetic stuff they
> added sets you up for a lot of permute overhead that is inefficient --
> especially on a processor that is already weak on permute. In my opinion,
Actually, the new instructions make it possible to do complex multiplies
without the need to permute and separate the add and subtract. The
really useful
2013 Feb 21
2
[PATCH] xen: consolidate implementations of LOG() macro
...-offsets.c
+++ b/xen/arch/arm/arm32/asm-offsets.c
@@ -8,6 +8,7 @@
#include <xen/config.h>
#include <xen/types.h>
#include <xen/sched.h>
+#include <xen/bitops.h>
#include <public/xen.h>
#include <asm/current.h>
@@ -18,13 +19,6 @@
#define OFFSET(_sym, _str, _mem) \
DEFINE(_sym, offsetof(_str, _mem));
-/* base-2 logarithm */
-#define __L2(_x) (((_x) & 0x00000002) ? 1 : 0)
-#define __L4(_x) (((_x) & 0x0000000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
-#define __L8(_x) (((_x) & 0x000000f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x)...
2004 Aug 06
2
[PATCH] Make SSE Run Time option. Add Win32 SSE code
...clude "stack_alloc.h"
+#include "speex.h"
#include <math.h>
+extern int global_use_mmx_sse;
void bw_lpc(float gamma, float *lpc_in, float *lpc_out, int order)
{
@@ -46,41 +48,548 @@
}
}
-#ifdef _USE_SSE
-#include "filters_sse.h"
-#else
-void filter_mem2(float *x, float *num, float *den, float *y, int N, int
ord, float *mem)
+
+void filter_mem2(float *x, float *_num, float *_den, float *y, int N, int
ord, float *_mem)
{
- int i,j;
- float xi,yi;
- for (i=0;i<N;i++)
- {
- xi=x[i];
- y[i] = num[0]*xi + mem[0];
- yi=y[i]...
2004 Aug 06
2
Coredumps when --enable-sse is selected
...cc-3.2.3 (weird palindrome there), on a
Williamette core Pentium 4 (1.6Ghz) system.
I've tried both speex 1.1.5 release, and the current CVS (which self-IDs as
1.1.4), and the result is the same.
I suspect some funk in the use of the SSE intrinsics macros.
Backtrace:
#0 0x40024594 in filter_mem2_10 (x=0x805f31c, _num=0x8061fb8,
_den=0x8061fe4, y=0x806071c, N=160, ord=10,
_mem=0x8062150) at xmmintrin.h:790
#1 0x400248b4 in filter_mem2 (x=0x805f31c, _num=0x8061fb8, _den=0x8061fe4,
y=0x806071c, N=1, ord=0,
_mem=0x8061fe4) at filters_sse.h:135
#2 0x40019d1e in nb_encode (state=0x8...
2015 Feb 17
1
[PATCH v3 4/6] instmem/gk20a: use DMA attributes
...nclude <core/mm.h>
> #include <core/device.h>
>
> +#ifdef __KERNEL__
> +#include <linux/dma-attrs.h>
> +#endif
> +
> #include "priv.h"
>
> struct gk20a_instobj_priv {
> @@ -34,6 +38,7 @@ struct gk20a_instobj_priv {
> struct nvkm_mem _mem;
> void *cpuaddr;
> dma_addr_t handle;
> + struct dma_attrs attrs;
> struct nvkm_mm_node r;
> };
>
> @@ -91,8 +96,8 @@ gk20a_instobj_dtor(struct nvkm_object *object)
> if (unlikely(!node->handle))
> return;
&g...
2015 Feb 17
8
[PATCH v3 0/6] nouveau/gk20a: RAM device removal & IOMMU support
Thanks Ilia for the v2 review! Here is the v3 of this IOMMU support for GK20A
series.
Changes since v2:
- Cleaner changes for ltc
- Fixed typos in gk20a instmem IOMMU comments
Changes since v1:
- Add missing else condition in ltc
- Remove extra flags that slipped into nouveau_display.c and nv84_fence.c.
Original cover letter:
Patches 1-3 make the presence of a RAM device optional, and remove
2015 Feb 11
9
[PATCH v2 0/6] nouveau/gk20a: RAM device removal & IOMMU support
Changes since v1:
- Add missing else condition in ltc
- Remove extra flags that slipped into nouveau_display.c and nv84_fence.c.
Original cover letter:
Patches 1-3 make the presence of a RAM device optional, and remove GK20A's dummy
RAM driver we were using so far. On chips using shared memory, such a device
can confuse the driver into moving objects where there is no need to, and can
trick
2015 Jan 23
8
[PATCH 0/6] nouveau/gk20a: RAM device removal & IOMMU support
A series I have waited too long to submit, and the recent refactoring made
me pay the price of my perfectionism, so here are the features that are at least
completed
Patches 1-3 make the presence of a RAM device optional, and remove GK20A's dummy
RAM driver we were using so far. On chips using shared memory, such a device
can confuse the driver into moving objects where there is no need to,
2015 Feb 20
6
[PATCH v4 0/6] nouveau/gk20a: RAM device removal & IOMMU support
Changes since v3:
- Use a single dma_attr for all DMA-API allocations in instmem instead of one
per allocation
- Use device.info.ram_size instead of pfb->ram to check whether VRAM is present
outside of nvkm
Changes since v2:
- Cleaner changes for ltc
- Fixed typos in gk20a instmem IOMMU comments
Changes since v1:
- Add missing else condition in ltc
- Remove extra flags that slipped into
2004 Aug 06
0
Coredumps when --enable-sse is selected
...amette core Pentium 4 (1.6Ghz) system.
>
> I've tried both speex 1.1.5 release, and the current CVS (which self-IDs as
> 1.1.4), and the result is the same.
>
> I suspect some funk in the use of the SSE intrinsics macros.
>
> Backtrace:
>
> #0 0x40024594 in filter_mem2_10 (x=0x805f31c, _num=0x8061fb8,
> _den=0x8061fe4, y=0x806071c, N=160, ord=10,
> _mem=0x8062150) at xmmintrin.h:790
> #1 0x400248b4 in filter_mem2 (x=0x805f31c, _num=0x8061fb8, _den=0x8061fe4,
> y=0x806071c, N=1, ord=0,
> _mem=0x8061fe4) at filters_sse.h:135
> #2 0x40...
2015 Jan 23
0
[PATCH 4/6] instmem/gk20a: use DMA attributes
...ouveau/nvkm/subdev/instmem/gk20a.c
@@ -24,6 +24,10 @@
#include <core/mm.h>
#include <core/device.h>
+#ifdef __KERNEL__
+#include <linux/dma-attrs.h>
+#endif
+
#include "priv.h"
struct gk20a_instobj_priv {
@@ -34,6 +38,7 @@ struct gk20a_instobj_priv {
struct nvkm_mem _mem;
void *cpuaddr;
dma_addr_t handle;
+ struct dma_attrs attrs;
struct nvkm_mm_node r;
};
@@ -91,8 +96,8 @@ gk20a_instobj_dtor(struct nvkm_object *object)
if (unlikely(!node->handle))
return;
- dma_free_coherent(dev, node->mem->size << PAGE_SHIFT, node->cpuaddr,...
2015 Feb 17
0
[PATCH v3 4/6] instmem/gk20a: use DMA attributes
...ouveau/nvkm/subdev/instmem/gk20a.c
@@ -24,6 +24,10 @@
#include <core/mm.h>
#include <core/device.h>
+#ifdef __KERNEL__
+#include <linux/dma-attrs.h>
+#endif
+
#include "priv.h"
struct gk20a_instobj_priv {
@@ -34,6 +38,7 @@ struct gk20a_instobj_priv {
struct nvkm_mem _mem;
void *cpuaddr;
dma_addr_t handle;
+ struct dma_attrs attrs;
struct nvkm_mm_node r;
};
@@ -91,8 +96,8 @@ gk20a_instobj_dtor(struct nvkm_object *object)
if (unlikely(!node->handle))
return;
- dma_free_coherent(dev, node->mem->size << PAGE_SHIFT, node->cpuaddr,...
2015 Apr 17
4
[PATCH 2/6] instmem/gk20a: refer to IOMMU physical translation bit
...T(34 - priv->iommu_pgshift);
> + /*
> + * The iommu_phys_addr_bit tells that an address is to be resolved
> + * through the IOMMU
> + */
> + r->offset |= BIT(priv->iommu_phys_addr_bit - priv->iommu_pgshift);
>
> node->base._mem.offset = ((u64)r->offset) << priv->iommu_pgshift;
>
> @@ -407,6 +411,7 @@ gk20a_instmem_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
> priv->domain = plat->gpu->iommu.domain;
> priv->mm = plat->gpu->iommu.mm;...
2004 Aug 06
2
[PATCH] Make SSE Run Time option. Add Win32 SSE code
...f for loop based
copies:
from your new filters_sse.h around the asm code
for (i=0;i<12;i++)
num[i]=den[i]=0;
for (i=0;i<12;i++)
mem[i]=0;
for (i=0;i<ord;i++)
{
num[i]=_num[i+1];
den[i]=_den[i+1];
}
for (i=0;i<ord;i++)
mem[i]=_mem[i];
<<< asm code>>>
for (i=0;i<ord;i++)
_mem[i]=mem[i];
<p>could easily be reduced to
memset(num,0,12);
memset(den,0,12);
memset(mem,0,12);
memcpy(num,_num+1,ord);
memcpy(den,_den+1,ord);
memcpy(mem,_mem+1,ord);
<<<asm code>>>
memcpy(_me...
2004 Aug 06
2
[PATCH] Make SSE Run Time option. Add Win32 SSE code
Jean-Marc,
There is a big difference between SSE and SSEFP. The SSEFP means
that the CPU supports the xmm registers. All Intel chips with SSE support
do, however no current 32 bit AMD chips support the XMM registers. They
will support the SSE instructions but not those registers. You are right
about the SSE2 not being used.
The AMD Opterons are the first AMD CPU's which support