Hello Aurelien,
+      "vdup.s16 d8, %1;\n" //Duplicate num in d8 lane
+      "vdup.s16 q5, %4;\n" //Duplicate mem in q5 lane
+
+      /* We try to process 16 samples at a time */
+      "movs %5, %3, lsr #4;\n"
+      "beq .celt_fir1_process16_done_%=;\n"
+
+      ".celt_fir1_process16_%=:\n"
+      /* Load 16 x values in q0, q1 lanes */
+      "vld1.16 {q0-q1}, [%0]!;\n"
+
+      /* Init four 32 bits sum in q7, q8, q9, q10 lanes */
+      "vshll.s16 q7, d0, %[SIGSHIFT];\n"
+      "vshll.s16 q8, d1, %[SIGSHIFT];\n"
+      "vshll.s16 q9, d2, %[SIGSHIFT];\n"
+      "vshll.s16 q10, d3, %[SIGSHIFT];\n"
I think C++-style comments should be avoided (or not mixed with C-style)
I'd prefer if named arguments (such as SIGSHIFT) could be used throughout 
to improve clarity (instead of numbered arguments such as %1, %4)
regards, p.
-- 
Peter Meerwald
+43-664-2444418 (mobile)