Colortoning Lab regions: some vectorized precalculations in first loop, #4914

2018-11-01 22:30:26 +01:00
parent dd635de72c
commit 906cf63ecd
4 changed files with 1354 additions and 5 deletions
--- a/rtengine/iplabregions.cc
+++ b/rtengine/iplabregions.cc
@@ -28,6 +28,26 @@
 #include "StopWatch.h"
 #include "sleef.c"

+namespace {
+#ifdef __SSE2__
+void fastlin2log(float *x, float factor, float base, int w)
+{
+    float baseLog = 1.f / xlogf(base);
+    vfloat baseLogv = F2V(baseLog);
+    factor = factor * (base - 1.f);
+    vfloat factorv = F2V(factor);
+    vfloat onev = F2V(1.f);
+    int i = 0;
+    for (; i < w - 3; i += 4) {
+        STVFU(x[i], xlogf(LVFU(x[i]) * factorv + onev) * baseLogv);
+    }
+    for (; i < w; ++i) {
+        x[i] = xlogf(x[i] * factor + 1.f) * baseLog;
+    }
+}
+#endif
+}
+
 namespace rtengine {

 void ImProcFunctions::labColorCorrectionRegions(LabImage *lab)
@@ -69,18 +89,39 @@ BENCHFUN
    }

 #ifdef _OPENMP
-    #pragma omp parallel for if (multiThread)
+    #pragma omp parallel if (multiThread)
+#endif
+    {
+#ifdef __SSE2__
+    float cBuffer[lab->W];
+    float hBuffer[lab->W];
+    // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
+    constexpr float c_factor = 327.68f / 48000.f;
+#endif
+#ifdef _OPENMP
+    #pragma omp for
 #endif
    for (int y = 0; y < lab->H; ++y) {
+#ifdef __SSE2__
+        // vectorized precalculation
+        Color::Lab2Lch(lab->a[y], lab->b[y], cBuffer, hBuffer, lab->W);
+        fastlin2log(cBuffer, c_factor, 10.f, lab->W);
+#endif
        for (int x = 0; x < lab->W; ++x) {
            float l = lab->L[y][x];
+#ifdef __SSE2__
+            // use precalculated values
+            float c1 = cBuffer[x];
+            float h = hBuffer[x];
+#else
+            // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
+            constexpr float c_factor = 327.68f / 48000.f;
            float a = lab->a[y][x];
            float b = lab->b[y][x];
            float c, h;
            Color::Lab2Lch(a, b, c, h);
-            // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
-            constexpr float c_factor = 327.68f / 48000.f;
            float c1 = xlin2log(c * c_factor, 10.f);
+#endif
            float h1 = Color::huelab_to_huehsv2(h);
            h1 = h1 + 1.f/6.f; // offset the hue because we start from purple instead of red
            if (h1 > 1.f) {
@@ -98,7 +139,7 @@ BENCHFUN
            }
        }
    }
-
+    }
    {
        array2D<float> guide(lab->W, lab->H, lab->L);
 #ifdef _OPENMP