Colortoning Lab regions: some vectorized precalculations in first loop, #4914

2018-11-01 22:30:26 +01:00
parent dd635de72c
commit 906cf63ecd
4 changed files with 1354 additions and 5 deletions
--- a/rtengine/color.cc
+++ b/rtengine/color.cc
@@ -1931,6 +1931,24 @@ void Color::Lab2Lch(float a, float b, float &c, float &h)
    h = xatan2f(b, a);
 }
 #ifdef __SSE2__
 void Color::Lab2Lch(float *a, float *b, float *c, float *h, int w)
 {
    int i = 0;
    vfloat c327d68v = F2V(327.68f);
    for (; i < w - 3; i += 4) {
        vfloat av = LVFU(a[i]);
        vfloat bv = LVFU(b[i]);
        STVFU(c[i], vsqrtf(SQRV(av) + SQRV(bv)) / c327d68v);
        STVFU(h[i], xatan2f(bv, av));
    }
    for (; i < w; ++i) {
        c[i] = sqrtf(SQR(a[i]) + SQR(b[i])) / 327.68f;
        h[i] = xatan2f(b[i], a[i]);
    }
 }
 #endif
 void Color::Lch2Lab(float c, float h, float &a, float &b)
 {
    float2 sincosval = xsincosf(h);
--- a/rtengine/color.h
+++ b/rtengine/color.h
@@ -651,7 +651,9 @@ public:
    * @param h 'h' channel return value, in [-PI ; +PI] (return value)
    */
    static void Lab2Lch(float a, float b, float &c, float &h);
-
+#ifdef __SSE2__
    static void Lab2Lch(float *a, float *b, float *c, float *h, int w);
 #endif
    /**
    * @brief Convert 'c' and 'h' channels of the Lch color space to the 'a' and 'b' channels of the L*a*b color space (channel 'L' is identical [0 ; 32768])
--- a/rtengine/iplabregions.cc
+++ b/rtengine/iplabregions.cc
@@ -28,6 +28,26 @@
 #include "StopWatch.h"
 #include "sleef.c"
 namespace {
 #ifdef __SSE2__
 void fastlin2log(float *x, float factor, float base, int w)
 {
    float baseLog = 1.f / xlogf(base);
    vfloat baseLogv = F2V(baseLog);
    factor = factor * (base - 1.f);
    vfloat factorv = F2V(factor);
    vfloat onev = F2V(1.f);
    int i = 0;
    for (; i < w - 3; i += 4) {
        STVFU(x[i], xlogf(LVFU(x[i]) * factorv + onev) * baseLogv);
    }
    for (; i < w; ++i) {
        x[i] = xlogf(x[i] * factor + 1.f) * baseLog;
    }
 }
 #endif
 }
 namespace rtengine {
 void ImProcFunctions::labColorCorrectionRegions(LabImage *lab)
@@ -69,18 +89,39 @@ BENCHFUN
    }
 #ifdef _OPENMP
-    #pragma omp parallel for if (multiThread)
+    #pragma omp parallel if (multiThread)
 #endif
    {
 #ifdef __SSE2__
    float cBuffer[lab->W];
    float hBuffer[lab->W];
    // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
    constexpr float c_factor = 327.68f / 48000.f;
 #endif
 #ifdef _OPENMP
    #pragma omp for
 #endif
    for (int y = 0; y < lab->H; ++y) {
 #ifdef __SSE2__
        // vectorized precalculation
        Color::Lab2Lch(lab->a[y], lab->b[y], cBuffer, hBuffer, lab->W);
        fastlin2log(cBuffer, c_factor, 10.f, lab->W);
 #endif
        for (int x = 0; x < lab->W; ++x) {
            float l = lab->L[y][x];
 #ifdef __SSE2__
            // use precalculated values
            float c1 = cBuffer[x];
            float h = hBuffer[x];
 #else
            // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
            constexpr float c_factor = 327.68f / 48000.f;
            float a = lab->a[y][x];
            float b = lab->b[y][x];
            float c, h;
            Color::Lab2Lch(a, b, c, h);
            // magic constant c_factor: normally chromaticity is in [0; 42000] (see color.h), but here we use the constant to match how the chromaticity pipette works (see improcfun.cc lines 4705-4706 and color.cc line 1930
            constexpr float c_factor = 327.68f / 48000.f;
            float c1 = xlin2log(c * c_factor, 10.f);
 #endif
            float h1 = Color::huelab_to_huehsv2(h);
            h1 = h1 + 1.f/6.f; // offset the hue because we start from purple instead of red
            if (h1 > 1.f) {
@@ -98,7 +139,7 @@ BENCHFUN
            }
        }
    }
-
+    }
    {
        array2D<float> guide(lab->W, lab->H, lab->L);
 #ifdef _OPENMP
--- a/rtengine/sleef.c.save-failed
+++ b/rtengine/sleef.c.save-failed