Colortoning Lab regions: some vectorized precalculations in first loop, #4914

2018-11-01 22:30:26 +01:00
parent dd635de72c
commit 906cf63ecd
4 changed files with 1354 additions and 5 deletions
--- a/rtengine/color.cc
+++ b/rtengine/color.cc
@@ -1931,6 +1931,24 @@ void Color::Lab2Lch(float a, float b, float &c, float &h)
    h = xatan2f(b, a);
 }

+#ifdef __SSE2__
+void Color::Lab2Lch(float *a, float *b, float *c, float *h, int w)
+{
+    int i = 0;
+    vfloat c327d68v = F2V(327.68f);
+    for (; i < w - 3; i += 4) {
+        vfloat av = LVFU(a[i]);
+        vfloat bv = LVFU(b[i]);
+        STVFU(c[i], vsqrtf(SQRV(av) + SQRV(bv)) / c327d68v);
+        STVFU(h[i], xatan2f(bv, av));
+    }
+    for (; i < w; ++i) {
+        c[i] = sqrtf(SQR(a[i]) + SQR(b[i])) / 327.68f;
+        h[i] = xatan2f(b[i], a[i]);
+    }
+}
+#endif
+
 void Color::Lch2Lab(float c, float h, float &a, float &b)
 {
    float2 sincosval = xsincosf(h);
--- a/rtengine/color.h
+++ b/rtengine/color.h
@@ -651,7 +651,9 @@ public:
    * @param h 'h' channel return value, in [-PI ; +PI] (return value)
    */
    static void Lab2Lch(float a, float b, float &c, float &h);
-
+#ifdef __SSE2__
+    static void Lab2Lch(float *a, float *b, float *c, float *h, int w);
+#endif

    /**
    * @brief Convert 'c' and 'h' channels of the Lch color space to the 'a' and 'b' channels of the L*a*b color space (channel 'L' is identical [0 ; 32768])
--- a/rtengine/iplabregions.cc
+++ b/rtengine/iplabregions.cc
@@ -28,6 +28,26 @@
 #include "StopWatch.h"
 #include "sleef.c"

+namespace {
+#ifdef __SSE2__
+void fastlin2log(float *x, float factor, float base, int w)
+{
+    float baseLog = 1.f / xlogf(base);
+    vfloat baseLogv = F2V(baseLog);
+    factor = factor * (base - 1.f);
+    vfloat factorv = F2V(factor);
+    vfloat onev = F2V(1.f);
+    int i = 0;
+    for (; i < w - 3; i += 4) {
+        STVFU(x[i], xlogf(LVFU(x[i]) * factorv + onev) * baseLogv);
+    }
+    for (; i < w; ++i) {
+        x[i] = xlogf(x[i] * factor + 1.f) * baseLog;
+    }
+}
+#endif
+}
+
 namespace rtengine {

 void ImProcFunctions::labColorCorrectionRegions(LabImage *lab)
@@ -69,18 +89,39 @@ BENCHFUN
    }

 #ifdef _OPENMP
-    #pragma omp parallel for if (multiThread)
+    #pragma omp parallel if (multiThread)
+#endif
+    {
+#ifdef __SSE2__
+    float cBuffer[lab->W];
+    float hBuffer[lab->W];
+    // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
+    constexpr float c_factor = 327.68f / 48000.f;
+#endif
+#ifdef _OPENMP
+    #pragma omp for
 #endif
    for (int y = 0; y < lab->H; ++y) {
+#ifdef __SSE2__
+        // vectorized precalculation
+        Color::Lab2Lch(lab->a[y], lab->b[y], cBuffer, hBuffer, lab->W);
+        fastlin2log(cBuffer, c_factor, 10.f, lab->W);
+#endif
        for (int x = 0; x < lab->W; ++x) {
            float l = lab->L[y][x];
+#ifdef __SSE2__
+            // use precalculated values
+            float c1 = cBuffer[x];
+            float h = hBuffer[x];
+#else
+            // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
+            constexpr float c_factor = 327.68f / 48000.f;
            float a = lab->a[y][x];
            float b = lab->b[y][x];
            float c, h;
            Color::Lab2Lch(a, b, c, h);
-            // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
-            constexpr float c_factor = 327.68f / 48000.f;
            float c1 = xlin2log(c * c_factor, 10.f);
+#endif
            float h1 = Color::huelab_to_huehsv2(h);
            h1 = h1 + 1.f/6.f; // offset the hue because we start from purple instead of red
            if (h1 > 1.f) {
@@ -98,7 +139,7 @@ BENCHFUN
            }
        }
    }
-
+    }
    {
        array2D<float> guide(lab->W, lab->H, lab->L);
 #ifdef _OPENMP
--- a/rtengine/sleef.c.save-failed
+++ b/rtengine/sleef.c.save-failed