Merge pull request #3423 from Beep6581/denoise

Merged Denoise speedups to get more tests
2016-09-09 21:58:26 +02:00
parent eb3c661293 3ffe983140
commit c8aa1c9f49
11 changed files with 291 additions and 233 deletions
--- a/rtengine/color.cc
+++ b/rtengine/color.cc
@@ -43,15 +43,10 @@ LUTf Color::igammatab_srgb;
 LUTf Color::igammatab_srgb1;
 LUTf Color::gammatab_srgb;
 LUTf Color::gammatab_srgb1;
-//  LUTf Color::igammatab_709;
-//  LUTf Color::gammatab_709;
-LUTf Color::igammatab_55;
-LUTf Color::gammatab_55;
-LUTf Color::igammatab_4;
-LUTf Color::gammatab_4;

-LUTf Color::igammatab_26_11;
-LUTf Color::gammatab_26_11;
+LUTf Color::denoiseGammaTab;
+LUTf Color::denoiseIGammaTab;
+
 LUTf Color::igammatab_24_17;
 LUTf Color::gammatab_24_17a;
 LUTf Color::gammatab_13_2;
@@ -150,13 +145,10 @@ void Color::init ()
    igammatab_srgb1(maxindex, 0);
    gammatab_srgb(maxindex, 0);
    gammatab_srgb1(maxindex, 0);
-    igammatab_55(maxindex, 0);
-    gammatab_55(maxindex, 0);
-    igammatab_4(maxindex, 0);
-    gammatab_4(maxindex, 0);

-    igammatab_26_11(maxindex, 0);
-    gammatab_26_11(maxindex, 0);
+    denoiseGammaTab(maxindex, 0);
+    denoiseIGammaTab(maxindex, 0);
+
    igammatab_24_17(maxindex, 0);
    gammatab_24_17a(maxindex, LUT_CLIP_ABOVE | LUT_CLIP_BELOW);
    gammatab_13_2(maxindex, 0);
@@ -195,6 +187,7 @@ void Color::init ()
            {
                gammatab_srgb[i] = gammatab_srgb1[i] = gamma2(i / 65535.0);
            }
+
            gammatab_srgb *= 65535.f;
            gamma2curve.share(gammatab_srgb, LUT_CLIP_BELOW | LUT_CLIP_ABOVE); // shares the buffer with gammatab_srgb but has different clip flags
        }
@@ -202,9 +195,11 @@ void Color::init ()
        #pragma omp section
 #endif
        {
-            for (int i = 0; i < maxindex; i++) {
+            for (int i = 0; i < maxindex; i++)
+            {
                igammatab_srgb[i] = igammatab_srgb1[i] = igamma2 (i / 65535.0);
            }
+
            igammatab_srgb *= 65535.f;
        }
 #ifdef _OPENMP
@@ -213,42 +208,74 @@ void Color::init ()
        {
            double rsRGBGamma = 1.0 / sRGBGamma;

-            for (int i = 0; i < maxindex; i++) {
+            for (int i = 0; i < maxindex; i++)
+            {
                double val = pow (i / 65535.0, rsRGBGamma);
                gammatab[i] = 65535.0 * val;
                gammatabThumb[i] = (unsigned char)(255.0 * val);
            }
        }
+
 #ifdef _OPENMP
        #pragma omp section
 #endif
+        // modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10
+        // we can put other as gamma g=2.6 slope=11, etc.
+        // but noting to do with real gamma !!!: it's only for data Lab # data RGB
+        // finally I opted for gamma55 and with options we can change

-        for (int i = 0; i < maxindex; i++) {
-            gammatab_55[i] = 65535.0 * gamma55 (i / 65535.0);
+        switch(settings->denoiselabgamma) {
+            case 0:
+                for (int i = 0; i < maxindex; i++) {
+                    denoiseGammaTab[i] = 65535.0 * gamma26_11 (i / 65535.0);
+                }
+
+                break;
+
+            case 1:
+                for (int i = 0; i < maxindex; i++) {
+                    denoiseGammaTab[i] = 65535.0 * gamma4 (i / 65535.0);
+                }
+
+                break;
+
+            default:
+                for (int i = 0; i < maxindex; i++) {
+                    denoiseGammaTab[i] = 65535.0 * gamma55 (i / 65535.0);
+                }
+
+                break;
        }

 #ifdef _OPENMP
        #pragma omp section
 #endif
+        // modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10
+        // we can put other as gamma g=2.6 slope=11, etc.
+        // but noting to do with real gamma !!!: it's only for data Lab # data RGB
+        // finally I opted for gamma55 and with options we can change

-        for (int i = 0; i < maxindex; i++) {
-            igammatab_55[i] = 65535.0 * igamma55 (i / 65535.0);
-        }
+        switch(settings->denoiselabgamma) {
+            case 0:
+                for (int i = 0; i < maxindex; i++) {
+                    denoiseIGammaTab[i] = 65535.0 * igamma26_11 (i / 65535.0);
+                }

-#ifdef _OPENMP
-        #pragma omp section
-#endif
+                break;

-        for (int i = 0; i < maxindex; i++) {
-            gammatab_4[i] = 65535.0 * gamma4 (i / 65535.0);
-        }
+            case 1:
+                for (int i = 0; i < maxindex; i++) {
+                    denoiseIGammaTab[i] = 65535.0 * igamma4 (i / 65535.0);
+                }

-#ifdef _OPENMP
-        #pragma omp section
-#endif
+                break;

-        for (int i = 0; i < maxindex; i++) {
-            igammatab_4[i] = 65535.0 * igamma4 (i / 65535.0);
+            default:
+                for (int i = 0; i < maxindex; i++) {
+                    denoiseIGammaTab[i] = 65535.0 * igamma55 (i / 65535.0);
+                }
+
+                break;
        }

 #ifdef _OPENMP
@@ -299,22 +326,6 @@ void Color::init ()
            igammatab_145_3[i] = 65535.0 * igamma145_3 (i / 65535.0);
        }

-#ifdef _OPENMP
-        #pragma omp section
-#endif
-
-        for (int i = 0; i < maxindex; i++) {
-            gammatab_26_11[i] = 65535.0 * gamma26_11 (i / 65535.0);
-        }
-
-#ifdef _OPENMP
-        #pragma omp section
-#endif
-
-        for (int i = 0; i < maxindex; i++) {
-            igammatab_26_11[i] = 65535.0 * igamma26_11 (i / 65535.0);
-        }
-
 #ifdef _OPENMP
        #pragma omp section
 #endif
@@ -1534,6 +1545,80 @@ void Color::calcGamma (double pwr, double ts, int mode, int imax, double &gamma0
        return;
    }
 }
+void Color::gammaf2lut (LUTf &gammacurve, float gamma, float start, float slope, float divisor, float factor)
+{
+#ifdef __SSE2__
+    // SSE2 version is more than 6 times faster than scalar version
+    vfloat iv = _mm_set_ps(3.f, 2.f, 1.f, 0.f);
+    vfloat fourv = F2V(4.f);
+    vfloat gammav = F2V(1.f / gamma);
+    vfloat slopev = F2V((slope / divisor) * factor);
+    vfloat divisorv = F2V(xlogf(divisor));
+    vfloat factorv = F2V(factor);
+    vfloat comparev = F2V(start * divisor);
+    int border = start * divisor;
+    int border1 = border - (border & 3);
+    int border2 = border1 + 4;
+    int i = 0;
+
+    for(; i < border1; i += 4) {
+        vfloat resultv = iv * slopev;
+        STVFU(gammacurve[i], resultv);
+        iv += fourv;
+    }
+
+    for(; i < border2; i += 4) {
+        vfloat result0v = iv * slopev;
+        vfloat result1v = xexpf((xlogf(iv) - divisorv) * gammav) * factorv;
+        STVFU(gammacurve[i], vself(vmaskf_le(iv, comparev), result0v, result1v));
+        iv += fourv;
+    }
+
+    for(; i < 65536; i += 4) {
+        vfloat resultv = xexpfNoCheck((xlogfNoCheck(iv) - divisorv) * gammav) * factorv;
+        STVFU(gammacurve[i], resultv);
+        iv += fourv;
+    }
+
+#else
+
+    for (int i = 0; i < 65536; ++i) {
+        gammacurve[i] = gammaf(static_cast<float>(i) / divisor, gamma, start, slope) * factor;
+    }
+
+#endif
+}
+
+void Color::gammanf2lut (LUTf &gammacurve, float gamma, float divisor, float factor)           //standard gamma without slope...
+{
+#ifdef __SSE2__
+    // SSE2 version is more than 6 times faster than scalar version
+    vfloat iv = _mm_set_ps(3.f, 2.f, 1.f, 0.f);
+    vfloat fourv = F2V(4.f);
+    vfloat gammav = F2V(1.f / gamma);
+    vfloat divisorv = F2V(xlogf(divisor));
+    vfloat factorv = F2V(factor);
+
+    // first input value is zero => we have to use the xlogf function which checks this
+    vfloat resultv = xexpf((xlogf(iv) - divisorv) * gammav) * factorv;
+    STVFU(gammacurve[0], resultv);
+    iv += fourv;
+
+    // inside the loop we can use xlogfNoCheck and xexpfNoCheck because we know about the input values
+    for(int i = 4; i < 65536; i += 4) {
+        resultv = xexpfNoCheck((xlogfNoCheck(iv) - divisorv) * gammav) * factorv;
+        STVFU(gammacurve[i], resultv);
+        iv += fourv;
+    }
+
+#else
+
+    for (int i = 0; i < 65536; ++i) {
+        gammacurve[i] = Color::gammanf(static_cast<float>(i) / divisor, gamma) * factor;
+    }
+
+#endif
+}

 void Color::Lab2XYZ(float L, float a, float b, float &x, float &y, float &z)
 {
@@ -2210,6 +2295,7 @@ void Color::gamutLchonly (float HH, float2 sincosval, float &Lprov1, float &Chpr
    neg = false, more_rgb = false;
 #endif
    float ChprovSave = Chprov1;
+
    do {
        inGamut = true;

@@ -2232,6 +2318,7 @@ void Color::gamutLchonly (float HH, float2 sincosval, float &Lprov1, float &Chpr
 #ifdef _DEBUG
            neg = true;
 #endif
+
            if (isnan(HH)) {
                float atemp = ChprovSave * sincosval.y * 327.68;
                float btemp = ChprovSave * sincosval.x * 327.68;