Displaying 3 results from an estimated 3 matches for "celt_fir1_process16_".
2013 May 21
0
regarding ARM NEON CELT filter optimizations
Hello Aurelien,
+ "vdup.s16 d8, %1;\n" //Duplicate num in d8 lane
+ "vdup.s16 q5, %4;\n" //Duplicate mem in q5 lane
+
+ /* We try to process 16 samples at a time */
+ "movs %5, %3, lsr #4;\n"
+ "beq .celt_fir1_process16_done_%=;\n"
+
+ ".celt_fir1_process16_%=:\n"
+ /* Load 16 x values in q0, q1 lanes */
+ "vld1.16 {q0-q1}, [%0]!;\n"
+
+ /* Init four 32 bits sum in q7, q8, q9, q10 lanes */
+ "vshll.s16 q7, d0, %[SIGSHIFT];\n"
+ "vshll.s16 q8, d1,...
2013 May 21
0
[PATCH] 02-
...opus_val16 mem)
+{
+ int i;
+
+ __asm__ __volatile__(
+ "vdup.s16 d8, %1;\n" //Duplicate num in d8 lane
+ "vdup.s16 q5, %4;\n" //Duplicate mem in q5 lane
+
+ /* We try to process 16 samples at a time */
+ "movs %5, %3, lsr #4;\n"
+ "beq .celt_fir1_process16_done_%=;\n"
+
+ ".celt_fir1_process16_%=:\n"
+ /* Load 16 x values in q0, q1 lanes */
+ "vld1.16 {q0-q1}, [%0]!;\n"
+
+ /* Init four 32 bits sum in q7, q8, q9, q10 lanes */
+ "vshll.s16 q7, d0, %[SIGSHIFT];\n"
+ "vshll.s16 q8, d1,...
2013 May 21
2
[PATCH] 02-Add CELT filter optimizations
...opus_val16 mem)
+{
+ int i;
+
+ __asm__ __volatile__(
+ "vdup.s16 d8, %1;\n" //Duplicate num in d8 lane
+ "vdup.s16 q5, %4;\n" //Duplicate mem in q5 lane
+
+ /* We try to process 16 samples at a time */
+ "movs %5, %3, lsr #4;\n"
+ "beq .celt_fir1_process16_done_%=;\n"
+
+ ".celt_fir1_process16_%=:\n"
+ /* Load 16 x values in q0, q1 lanes */
+ "vld1.16 {q0-q1}, [%0]!;\n"
+
+ /* Init four 32 bits sum in q7, q8, q9, q10 lanes */
+ "vshll.s16 q7, d0, %[SIGSHIFT];\n"
+ "vshll.s16 q8, d1,...