From 8f483496b26f711ec9ce8cfa14d1ff902fd9ab39 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 28 Aug 2020 14:23:40 +0200 Subject: [PATCH] captureSharpening(): let the compiler vectorize the last loop. Tested with gcc 10.2.0 and clang 10.0.1 --- rtengine/capturesharpening.cc | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index 4dcdd0734..7f3da40ed 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -1160,17 +1160,12 @@ BENCHFUN #pragma omp parallel for schedule(dynamic, 16) #endif for (int i = 0; i < H; ++i) { - int j = 0; -#ifdef __SSE2__ - for (; j < W - 3; j += 4) { - const vfloat factor = LVFU(YNew[i][j]) / vmaxf(LVFU(YOld[i][j]), F2V(0.00001f)); - STVFU(red[i][j], LVFU(redVals[i][j]) * factor); - STVFU(green[i][j], LVFU(greenVals[i][j]) * factor); - STVFU(blue[i][j], LVFU(blueVals[i][j]) * factor); - } - +#if defined(__clang__) + #pragma clang loop vectorize(assume_safety) +#elif defined(__GNUC__) + #pragma GCC ivdep #endif - for (; j < W; ++j) { + for (int j = 0; j < W; ++j) { const float factor = YNew[i][j] / std::max(YOld[i][j], 0.00001f); red[i][j] = redVals[i][j] * factor; green[i][j] = greenVals[i][j] * factor;