Displaying 1 result from an estimated 1 matches for "kernel_4".
Did you mean:
kernel_h
2018 Sep 20
2
Vectorization width not correct using #pragma clang loop vectorize_width
...:
for (i = 0; i < N; i++)
{
for (j = 0; j < M; j++)
{
data[(i*M) + j] -= mean[j];
}
}
for (i = 0; i < N; i++)
{
for (j = 0; j < M; j++)
{
data[(i*M) + j] /= SQRT_FUN(float_n) * stddev[j];
}
}
kernel_4:
for (i = 0; i < M*M; i++)
{
corr[i] = 0.0;
}
for (k = 0; k < N; k++)
{
for (i = 0; i < M-1; i++)
{
for (j = i+1; j < M; j++)
{
corr[(i*M)+j] += (data[(k*M)+i] *
data[(k*M)+j]);
}
}...