From 8b44ed0fd81326f3b1a54f6063c03931f0013631 Mon Sep 17 00:00:00 2001
From: heckflosse <heckflosse67@gmx.de>
Date: Mon, 8 May 2017 18:42:13 +0200
Subject: [PATCH 1/4] Speedup for microcontrast, issue #3867

---
 rtengine/ipsharpen.cc | 412 ++++++++++++++++--------------------------
 rtengine/rt_math.h    |   7 +
 2 files changed, 166 insertions(+), 253 deletions(-)
diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc
index 00196591e..7ef323f08 100644
--- a/rtengine/ipsharpen.cc
+++ b/rtengine/ipsharpen.cc
@@ -24,7 +24,8 @@
 #include "sleef.c"
 #include "opthelper.h"
 using namespace std;
-
+#define BENCHMARK
+#include "StopWatch.h"
 namespace rtengine
 {
 
@@ -561,27 +562,21 @@ void ImProcFunctions::MLsharpen (LabImage* lab)
 //! MicroContrast is a sharpening method developed by Manuel Llorens and documented here: http://www.rawness.es/sharpening/?lang=en
 //! <BR>The purpose is maximize clarity of the image without creating halo's.
 //! <BR>Addition from JD : pyramid  + pondered contrast with matrix 5x5
+//! <BR>2017 Ingo Weyrich : reduced processing time
 //! \param luminance : Luminance channel of image
 void ImProcFunctions::MLmicrocontrast(float** luminance, int W, int H)
 {
     if (!params->sharpenMicro.enabled) {
         return;
     }
+BENCHFUN
 
-    MyTime t1e, t2e;
-    t1e.set();
-
-    int k = params->sharpenMicro.matrix ? 1 : 2;
+    const int k = params->sharpenMicro.matrix ? 1 : 2;
 
     // k=2 matrix 5x5  k=1 matrix 3x3
-    int offset, offset2, i, j, col, row, n;
-    float temp, temp2, temp3, temp4, tempL;
-    float *LM, v, s, contrast;
-    int signs[25];
-    int width = W, height = H;
-    float uniform = params->sharpenMicro.uniformity;//between 0 to 100
-    int unif;
-    unif = (int)(uniform / 10.0f); //put unif between 0 to 10
+    const int width = W, height = H;
+    const float uniform = params->sharpenMicro.uniformity;//between 0 to 100
+    const int unif = (int)(uniform / 10.0f); //put unif between 0 to 10
     float amount = params->sharpenMicro.amount / 1500.0f; //amount 2000.0 quasi no artefacts ==> 1500 = maximum, after artefacts
 
     if (amount < 0.000001f) {
@@ -594,127 +589,108 @@ void ImProcFunctions::MLmicrocontrast(float** luminance, int W, int H)
 
     if (settings->verbose) {
         printf ("Micro-contrast amount %f\n", amount);
-    }
-
-    if (settings->verbose) {
         printf ("Micro-contrast uniformity %i\n", unif);
     }
 
     //modulation uniformity in function of luminance
-    float L98[11] = {0.001f, 0.0015f, 0.002f, 0.004f, 0.006f, 0.008f, 0.01f, 0.03f, 0.05f, 0.1f, 0.1f};
-    float L95[11] = {0.0012f, 0.002f, 0.005f, 0.01f, 0.02f, 0.05f, 0.1f, 0.12f, 0.15f, 0.2f, 0.25f};
-    float L92[11] = {0.01f, 0.015f, 0.02f, 0.06f, 0.10f, 0.13f, 0.17f, 0.25f, 0.3f, 0.32f, 0.35f};
-    float L90[11] = {0.015f, 0.02f, 0.04f, 0.08f, 0.12f, 0.15f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
-    float L87[11] = {0.025f, 0.03f, 0.05f, 0.1f, 0.15f, 0.25f, 0.3f, 0.4f, 0.5f, 0.63f, 0.75f};
-    float L83[11] = {0.055f, 0.08f, 0.1f, 0.15f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.75f, 0.85f};
-    float L80[11] = {0.15f, 0.2f, 0.25f, 0.3f, 0.35f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
-    float L75[11] = {0.22f, 0.25f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.85f, 0.9f, 0.95f};
-    float L70[11] = {0.35f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.97f, 1.0f, 1.0f, 1.0f, 1.0f};
-    float L63[11] = {0.55f, 0.6f, 0.7f, 0.8f, 0.85f, 0.9f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
-    float L58[11] = {0.75f, 0.77f, 0.8f, 0.9f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
+    const float L98[11] = {0.001f, 0.0015f, 0.002f, 0.004f, 0.006f, 0.008f, 0.01f, 0.03f, 0.05f, 0.1f, 0.1f};
+    const float L95[11] = {0.0012f, 0.002f, 0.005f, 0.01f, 0.02f, 0.05f, 0.1f, 0.12f, 0.15f, 0.2f, 0.25f};
+    const float L92[11] = {0.01f, 0.015f, 0.02f, 0.06f, 0.10f, 0.13f, 0.17f, 0.25f, 0.3f, 0.32f, 0.35f};
+    const float L90[11] = {0.015f, 0.02f, 0.04f, 0.08f, 0.12f, 0.15f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
+    const float L87[11] = {0.025f, 0.03f, 0.05f, 0.1f, 0.15f, 0.25f, 0.3f, 0.4f, 0.5f, 0.63f, 0.75f};
+    const float L83[11] = {0.055f, 0.08f, 0.1f, 0.15f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.75f, 0.85f};
+    const float L80[11] = {0.15f, 0.2f, 0.25f, 0.3f, 0.35f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
+    const float L75[11] = {0.22f, 0.25f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.85f, 0.9f, 0.95f};
+    const float L70[11] = {0.35f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.97f, 1.0f, 1.0f, 1.0f, 1.0f};
+    const float L63[11] = {0.55f, 0.6f, 0.7f, 0.8f, 0.85f, 0.9f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
+    const float L58[11] = {0.75f, 0.77f, 0.8f, 0.9f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
     //default 5
     //modulation contrast
-    float Cont0[11] = {0.05f, 0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
-    float Cont1[11] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 0.95f, 1.0f};
-    float Cont2[11] = {0.2f, 0.40f, 0.6f, 0.7f, 0.8f, 0.85f, 0.90f, 0.95f, 1.0f, 1.05f, 1.10f};
-    float Cont3[11] = {0.5f, 0.6f, 0.7f, 0.8f, 0.85f, 0.9f, 1.0f, 1.0f, 1.05f, 1.10f, 1.20f};
-    float Cont4[11] = {0.8f, 0.85f, 0.9f, 0.95f, 1.0f, 1.05f, 1.10f, 1.150f, 1.2f, 1.25f, 1.40f};
-    float Cont5[11] = {1.0f, 1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.45f, 1.50f, 1.6f, 1.65f, 1.80f};
+    const float Cont0[11] = {0.05f, 0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
+    const float Cont1[11] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 0.95f, 1.0f};
+    const float Cont2[11] = {0.2f, 0.40f, 0.6f, 0.7f, 0.8f, 0.85f, 0.90f, 0.95f, 1.0f, 1.05f, 1.10f};
+    const float Cont3[11] = {0.5f, 0.6f, 0.7f, 0.8f, 0.85f, 0.9f, 1.0f, 1.0f, 1.05f, 1.10f, 1.20f};
+    const float Cont4[11] = {0.8f, 0.85f, 0.9f, 0.95f, 1.0f, 1.05f, 1.10f, 1.150f, 1.2f, 1.25f, 1.40f};
+    const float Cont5[11] = {1.0f, 1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.45f, 1.50f, 1.6f, 1.65f, 1.80f};
+
+    const float s = amount;
+    const float sqrt2 = sqrt(2.0);
+    const float sqrt1d25 = sqrt(1.25);
+    float *LM = new float[width * height]; //allocation for Luminance
 
-    float chmax = 8.0f;
-    LM = new float[width * height]; //allocation for Luminance
 #ifdef _OPENMP
-    #pragma omp parallel for private(offset, i,j) shared(LM)
+    #pragma omp parallel
+#endif
+{
+    float signs[25];
+
+#ifdef _OPENMP
+    #pragma omp for schedule(dynamic,16)
 #endif
 
-    for(j = 0; j < height; j++)
-        for(i = 0, offset = j * width + i; i < width; i++, offset++) {
+    for(int j = 0; j < height; j++)
+        for(int i = 0, offset = j * width + i; i < width; i++, offset++) {
             LM[offset] = luminance[j][i] / 327.68f; // adjust to 0.100 and to RT variables
         }
 
 #ifdef _OPENMP
-    #pragma omp parallel for private(j,i,offset,s,signs,v,n,row,col,offset2,contrast,temp,temp2,tempL,temp4) shared(luminance,LM,amount,chmax,unif,k,L98,L95,L92,L90,L87,L83,L80,L75,L70,L63,L58,Cont0,Cont1,Cont2,Cont3,Cont4,Cont5)
+    #pragma omp for schedule(dynamic,16)
 #endif
 
-    for(j = k; j < height - k; j++)
-        for(i = k, offset = j * width + i; i < width - k; i++, offset++) {
-            s = amount;
-            v = LM[offset];
-            n = 0;
-
-            for(row = j - k; row <= j + k; row++)
-                for(col = i - k, offset2 = row * width + col; col <= i + k; col++, offset2++) {
-                    signs[n] = 0;
-
-                    if (v < LM[offset2]) {
-                        signs[n] = -1;
-                    }
-
-                    if (v > LM[offset2]) {
-                        signs[n] = 1;
-                    }
+    for(int j = k; j < height - k; j++)
+        for(int i = k, offset = j * width + i; i < width - k; i++, offset++) {
+            float v = LM[offset];
 
+            for(int row = j - k, n = 0; row <= j + k; row++) {
+                for(int offset2 = row * width + i - k; offset2 <= row * width + i + k; offset2++) {
+                    signs[n] = SGN(v - LM[offset2]);
                     n++;
                 }
-
-            if      (k == 1) {
-                contrast = sqrt(fabs(LM[offset + 1] - LM[offset - 1]) * fabs(LM[offset + 1] - LM[offset - 1]) + fabs(LM[offset + width] - LM[offset - width]) * fabs(LM[offset + width] - LM[offset - width])) / chmax;    //for 3x3
-            } else /* if (k==2) */ contrast = sqrt(fabs(LM[offset + 1] - LM[offset - 1]) * fabs(LM[offset + 1] - LM[offset - 1]) + fabs(LM[offset + width] - LM[offset - width]) * fabs(LM[offset + width] - LM[offset - width])
-                                                       + fabs(LM[offset + 2] - LM[offset - 2]) * fabs(LM[offset + 2] - LM[offset - 2]) + fabs(LM[offset + 2 * width] - LM[offset - 2 * width]) * fabs(LM[offset + 2 * width] - LM[offset - 2 * width])) / (2 * chmax); //for 5x5
-
-            if (contrast > 1.0f) {
-                contrast = 1.0f;
             }
 
+            float contrast;
+            if (k == 1) {
+                contrast = sqrtf(SQR(LM[offset + 1] - LM[offset - 1]) + SQR(LM[offset + width] - LM[offset - width])) * 0.125f;    //for 3x3
+            } else /* if (k==2) */ contrast = sqrtf(SQR(LM[offset + 1] - LM[offset - 1]) + SQR(LM[offset + width] - LM[offset - width])
+                                                       + SQR(LM[offset + 2] - LM[offset - 2]) + SQR(LM[offset + 2 * width] - LM[offset - 2 * width])) * 0.0625f; //for 5x5
+
+            contrast = std::min(contrast, 1.f);
+
             //matrix 5x5
-            temp = luminance[j][i] / 327.68f; //begin 3x3
-            temp += CLIREF(v - LM[offset - width - 1]) * sqrtf(2.0f) * s;
-            temp += CLIREF(v - LM[offset - width]) * s;
-            temp += CLIREF(v - LM[offset - width + 1]) * sqrtf(2.0f) * s;
-            temp += CLIREF(v - LM[offset - 1]) * s;
-            temp += CLIREF(v - LM[offset + 1]) * s;
-            temp += CLIREF(v - LM[offset + width - 1]) * sqrtf(2.0f) * s;
-            temp += CLIREF(v - LM[offset + width]) * s;
-            temp += CLIREF(v - LM[offset + width + 1]) * sqrtf(2.0f) * s; //end 3x3
+            float temp = v + 4.f *( v * (s + sqrt2 * s)); //begin 3x3
+            float temp1 = sqrt2 * s *(LM[offset - width - 1] + LM[offset - width + 1] + LM[offset + width - 1] + LM[offset + width + 1]);
+            temp1 += s * (LM[offset - width] + LM[offset - 1] + LM[offset + 1] + LM[offset + width]);
+
+            temp -= temp1;
 
             // add JD continue 5x5
             if (k == 2) {
-                temp += 2.0f * CLIREF(v - LM[offset + 2 * width]) * s;
-                temp += 2.0f * CLIREF(v - LM[offset - 2 * width]) * s;
-                temp += 2.0f * CLIREF(v - LM[offset - 2      ]) * s;
-                temp += 2.0f * CLIREF(v - LM[offset + 2      ]) * s;
+                float temp2 = -(LM[offset + 2 * width] + LM[offset - 2 * width] + LM[offset - 2] + LM[offset + 2]);
 
-                temp += 2.0f * CLIREF(v - LM[offset + 2 * width - 1]) * s * sqrtf(1.25f); // 1.25  = 1*1 + 0.5*0.5
-                temp += 2.0f * CLIREF(v - LM[offset + 2 * width - 2]) * s * sqrtf(2.00f);
-                temp += 2.0f * CLIREF(v - LM[offset + 2 * width + 1]) * s * sqrtf(1.25f);
-                temp += 2.0f * CLIREF(v - LM[offset + 2 * width + 2]) * s * sqrtf(2.00f);
-                temp += 2.0f * CLIREF(v - LM[offset +  width + 2]) * s * sqrtf(1.25f);
-                temp += 2.0f * CLIREF(v - LM[offset +  width - 2]) * s * sqrtf(1.25f);
-                temp += 2.0f * CLIREF(v - LM[offset - 2 * width - 1]) * s * sqrtf(1.25f);
-                temp += 2.0f * CLIREF(v - LM[offset - 2 * width - 2]) * s * sqrtf(2.00f);
-                temp += 2.0f * CLIREF(v - LM[offset - 2 * width + 1]) * s * sqrtf(1.25f);
-                temp += 2.0f * CLIREF(v - LM[offset - 2 * width + 2]) * s * sqrtf(2.00f);
-                temp += 2.0f * CLIREF(v - LM[offset -  width + 2]) * s * sqrtf(1.25f);
-                temp += 2.0f * CLIREF(v - LM[offset -  width - 2]) * s * sqrtf(1.25f);
+                temp2 -= sqrt1d25 * (LM[offset + 2 * width - 1] + LM[offset + 2 * width + 1] + LM[offset +  width + 2] + LM[offset +  width - 2] + 
+                                     LM[offset - 2 * width - 1] + LM[offset - 2 * width + 1] + LM[offset -  width + 2] + LM[offset -  width - 2]);
+
+                temp2 -= sqrt2 * (LM[offset + 2 * width - 2] + LM[offset + 2 * width + 2] + LM[offset - 2 * width - 2] + LM[offset - 2 * width + 2]);
+                temp2 += 18.601126159f * v ;
+                temp2 *= 2.f * s;
+                temp += temp2;
             }
 
-            if (temp < 0.0f) {
-                temp = 0.0f;
-            }
+            temp = std::max(temp, 0.f);
+
 
             v = temp;
 
-            n = 0;
-
-            for(row = j - k; row <= j + k; row++) {
-                for(col = i - k, offset2 = row * width + col; col <= i + k; col++, offset2++) {
-                    if (((v < LM[offset2]) && (signs[n] > 0)) || ((v > LM[offset2]) && (signs[n] < 0))) {
-                        temp = v * 0.75f + LM[offset2] * 0.25f; // 0.75 0.25
+            for(int row = j + k, n = SQR(2*k+1) - 1; row >= j - k; row--) {
+                for(int offset2 = row * width + i + k; offset2 >= row * width + i - k; offset2--) {
+                    if((LM[offset2] - v) * signs[n] > 0.f) {
+                        temp = intp(0.75f, v, LM[offset2]);
+                        goto breakout;
                     }
-
-                    n++;
+                    n--;
                 }
             }
+            breakout:
 
             if (LM[offset] > 95.0f || LM[offset] < 5.0f) {
                 contrast *= Cont0[unif];    //+ JD : luminance  pyramid to adjust contrast by evaluation of LM[offset]
@@ -730,184 +706,114 @@ void ImProcFunctions::MLmicrocontrast(float** luminance, int W, int H)
                 contrast *= Cont5[unif];    //(2.0f/k)*Cont5[unif];
             }
 
-            if (contrast > 1.0f) {
-                contrast = 1.0f;
-            }
+            contrast = std::min(contrast, 1.f);
 
-            tempL = 327.68f * (temp * (1.0f - contrast) + LM[offset] * contrast);
+            float tempL = intp(contrast, LM[offset], temp);
             // JD: modulation of microcontrast in function of original Luminance and modulation of luminance
-            temp2 = tempL / (327.68f * LM[offset]); //for highlights
-
-            if (temp2 > 1.0f) {
-                if (temp2 > 1.70f) {
-                    temp2 = 1.70f;    //limit action
-                }
-
-                if      (LM[offset] > 98.0f) {
-                    luminance[j][i] = LM[offset] * 327.68f;
+            if (tempL > LM[offset]) {
+                float temp2 = tempL / LM[offset]; //for highlights
+                temp2 = std::min(temp2, 1.7f); //limit action
+                temp2 -= 1.f;
+                if (LM[offset] > 98.0f) {
+                    temp = 0.f;
                 } else if (LM[offset] > 95.0f) {
-                    temp = (L95[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L95[unif];
                 } else if (LM[offset] > 92.0f) {
-                    temp = (L92[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L92[unif];
                 } else if (LM[offset] > 90.0f) {
-                    temp = (L90[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L90[unif];
                 } else if (LM[offset] > 87.0f) {
-                    temp = (L87[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L87[unif];
                 } else if (LM[offset] > 83.0f) {
-                    temp = (L83[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L83[unif];
                 } else if (LM[offset] > 80.0f) {
-                    temp = (L80[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L80[unif];
                 } else if (LM[offset] > 75.0f) {
-                    temp = (L75[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L75[unif];
                 } else if (LM[offset] > 70.0f) {
-                    temp = (L70[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L70[unif];
                 } else if (LM[offset] > 63.0f) {
-                    temp = (L63[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L63[unif];
                 } else if (LM[offset] > 58.0f) {
-                    temp = (L58[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L58[unif];
                 } else if (LM[offset] > 42.0f) {
-                    temp = (L58[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L58[unif];
                 } else if (LM[offset] > 37.0f) {
-                    temp = (L63[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L63[unif];
                 } else if (LM[offset] > 30.0f) {
-                    temp = (L70[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L70[unif];
                 } else if (LM[offset] > 25.0f) {
-                    temp = (L75[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L75[unif];
                 } else if (LM[offset] > 20.0f) {
-                    temp = (L80[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L80[unif];
                 } else if (LM[offset] > 17.0f) {
-                    temp = (L83[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L83[unif];
                 } else if (LM[offset] > 13.0f) {
-                    temp = (L87[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L87[unif];
                 } else if (LM[offset] > 10.0f) {
-                    temp = (L90[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
+                    temp = L90[unif];
                 } else if (LM[offset] > 5.0f) {
-                    temp = (L95[unif] * (temp2 - 1.f)) + 1.0f;
-                    luminance[j][i] = temp * LM[offset] * 327.68f;
-                } else if (LM[offset] > 0.0f) {
-                    luminance[j][i] = LM[offset] * 327.68f;
+                    temp = L95[unif];
+                } else {
+                    temp = 0.f;
+                }
+                luminance[j][i] *= (temp * temp2 + 1.f);
+            } else {
+
+                float temp4 = LM[offset] / tempL; //
+
+                if (temp4 > 1.0f) {
+                    temp4 = std::min(temp4, 1.7f); //limit action
+                    temp4 -= 1.f;
+                    if (LM[offset] < 2.0f) {
+                        temp = L98[unif];
+                    } else if (LM[offset] < 5.0f) {
+                        temp = L95[unif];
+                    } else if (LM[offset] < 8.0f) {
+                        temp = L92[unif];
+                    } else if (LM[offset] < 10.0f) {
+                        temp = L90[unif];
+                    } else if (LM[offset] < 13.0f) {
+                        temp = L87[unif];
+                    } else if (LM[offset] < 17.0f) {
+                        temp = L83[unif];
+                    } else if (LM[offset] < 20.0f) {
+                        temp = L80[unif];
+                    } else if (LM[offset] < 25.0f) {
+                        temp = L75[unif];
+                    } else if (LM[offset] < 30.0f) {
+                        temp = L70[unif];
+                    } else if (LM[offset] < 37.0f) {
+                        temp = L63[unif];
+                    } else if (LM[offset] < 42.0f) {
+                        temp = L58[unif];
+                    } else if (LM[offset] < 58.0f) {
+                        temp = L58[unif];
+                    } else if (LM[offset] < 63.0f) {
+                        temp = L63[unif];
+                    } else if (LM[offset] < 70.0f) {
+                        temp = L70[unif];
+                    } else if (LM[offset] < 75.0f) {
+                        temp = L75[unif];
+                    } else if (LM[offset] < 80.0f) {
+                        temp = L80[unif];
+                    } else if (LM[offset] < 83.0f) {
+                        temp = L83[unif];
+                    } else if (LM[offset] < 87.0f) {
+                        temp = L87[unif];
+                    } else if (LM[offset] < 90.0f) {
+                        temp = L90[unif];
+                    } else if (LM[offset] < 95.0f) {
+                        temp = L95[unif];
+                    } else {
+                        temp = 0.f;
+                    }
+                    luminance[j][i] /= (temp * temp4 + 1.f);
                 }
             }
-
-            temp4 = (327.68f * LM[offset]) / tempL; //
-
-            if (temp4 > 1.0f) {
-                if (temp4 > 1.7f) {
-                    temp4 = 1.7f;    //limit action
-                }
-
-                if      (LM[offset] < 2.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L98[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 5.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L95[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 8.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L92[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 10.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L90[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 13.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L87[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 17.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L83[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 20.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L80[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 25.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L75[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 30.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L70[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 37.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L63[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 42.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L58[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 58.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L58[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 63.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L63[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 70.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L70[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 75.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L75[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 80.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L80[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 83.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L83[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 87.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L87[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 90.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L90[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 95.0f) {
-                    temp3 = temp4 - 1.0f;
-                    temp = (L95[unif] * temp3) + 1.0f;
-                    luminance[j][i] = (LM[offset] * 327.68f) / temp;
-                } else if (LM[offset] < 100.0f) {
-                    luminance[j][i] = LM[offset] * 327.68f;
-                }
-            }
-
         }
-
+}
     delete [] LM;
-    t2e.set();
-
-    if (settings->verbose) {
-        printf("Micro-contrast  %d usec\n", t2e.etime(t1e));
-    }
-
 }
 
 void ImProcFunctions::MLmicrocontrast(LabImage* lab)
diff --git a/rtengine/rt_math.h b/rtengine/rt_math.h
index 6e1138476..17a292618 100644
--- a/rtengine/rt_math.h
+++ b/rtengine/rt_math.h
@@ -87,6 +87,13 @@ constexpr T CLIP(const T& a)
     return LIM(a, static_cast<T>(0), static_cast<T>(MAXVAL));
 }
 
+template <typename T>
+constexpr T SGN(const T& a)
+{
+    // returns -1 for a < 0, 0 for a = 0 and +1 for a > 0
+    return (T(0) < a) - (a < T(0));
+}
+
 template<typename T>
 constexpr T intp(T a, T b, T c)
 {

From 9a1dd0109b6abbda91d4505cfdd23f5617d0af38 Mon Sep 17 00:00:00 2001
From: heckflosse <heckflosse67@gmx.de>
Date: Wed, 10 May 2017 15:43:19 +0200
Subject: [PATCH 2/4] MLmicrocontrast, eliminated one loop

---
 rtengine/ipsharpen.cc | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc
index 7ef323f08..7514a7eb4 100644
--- a/rtengine/ipsharpen.cc
+++ b/rtengine/ipsharpen.cc
@@ -622,7 +622,6 @@ BENCHFUN
     #pragma omp parallel
 #endif
 {
-    float signs[25];
 
 #ifdef _OPENMP
     #pragma omp for schedule(dynamic,16)
@@ -641,13 +640,6 @@ BENCHFUN
         for(int i = k, offset = j * width + i; i < width - k; i++, offset++) {
             float v = LM[offset];
 
-            for(int row = j - k, n = 0; row <= j + k; row++) {
-                for(int offset2 = row * width + i - k; offset2 <= row * width + i + k; offset2++) {
-                    signs[n] = SGN(v - LM[offset2]);
-                    n++;
-                }
-            }
-
             float contrast;
             if (k == 1) {
                 contrast = sqrtf(SQR(LM[offset + 1] - LM[offset - 1]) + SQR(LM[offset + width] - LM[offset - width])) * 0.125f;    //for 3x3
@@ -678,13 +670,10 @@ BENCHFUN
 
             temp = std::max(temp, 0.f);
 
-
-            v = temp;
-
             for(int row = j + k, n = SQR(2*k+1) - 1; row >= j - k; row--) {
                 for(int offset2 = row * width + i + k; offset2 >= row * width + i - k; offset2--) {
-                    if((LM[offset2] - v) * signs[n] > 0.f) {
-                        temp = intp(0.75f, v, LM[offset2]);
+                    if((LM[offset2] - temp) * SGN(v - LM[offset2]) > 0.f) {
+                        temp = intp(0.75f, temp, LM[offset2]);
                         goto breakout;
                     }
                     n--;

From c856894456e1261ad77eee89fc8a1f26a5765954 Mon Sep 17 00:00:00 2001
From: heckflosse <heckflosse67@gmx.de>
Date: Wed, 10 May 2017 16:04:02 +0200
Subject: [PATCH 3/4] MLmicrocontrast: eliminated SGN()

---
 rtengine/ipsharpen.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc
index 7514a7eb4..cb98fe907 100644
--- a/rtengine/ipsharpen.cc
+++ b/rtengine/ipsharpen.cc
@@ -672,7 +672,7 @@ BENCHFUN
 
             for(int row = j + k, n = SQR(2*k+1) - 1; row >= j - k; row--) {
                 for(int offset2 = row * width + i + k; offset2 >= row * width + i - k; offset2--) {
-                    if((LM[offset2] - temp) * SGN(v - LM[offset2]) > 0.f) {
+                    if((LM[offset2] - temp) * (v - LM[offset2]) > 0.f) {
                         temp = intp(0.75f, temp, LM[offset2]);
                         goto breakout;
                     }

From c03b702131ad8e467e6031050fe0927b90f03faa Mon Sep 17 00:00:00 2001
From: heckflosse <heckflosse67@gmx.de>
Date: Wed, 17 May 2017 22:15:16 +0200
Subject: [PATCH 4/4] Speedup for microcontrast, fixes #3867

---
 rtengine/ipsharpen.cc | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc
index cb98fe907..a730b9496 100644
--- a/rtengine/ipsharpen.cc
+++ b/rtengine/ipsharpen.cc
@@ -23,17 +23,15 @@
 #include "rt_math.h"
 #include "sleef.c"
 #include "opthelper.h"
+
 using namespace std;
-#define BENCHMARK
-#include "StopWatch.h"
+
 namespace rtengine
 {
 
 #undef ABS
 
 #define ABS(a) ((a)<0?-(a):(a))
-#define CLIREF(x) LIM(x,-200000.0f,200000.0f) // avoid overflow : do not act directly on image[] or pix[]
-
 
 extern const Settings* settings;
 SSEFUNCTION void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H)
@@ -569,13 +567,12 @@ void ImProcFunctions::MLmicrocontrast(float** luminance, int W, int H)
     if (!params->sharpenMicro.enabled) {
         return;
     }
-BENCHFUN
 
     const int k = params->sharpenMicro.matrix ? 1 : 2;
 
     // k=2 matrix 5x5  k=1 matrix 3x3
     const int width = W, height = H;
-    const float uniform = params->sharpenMicro.uniformity;//between 0 to 100
+    const float uniform = params->sharpenMicro.uniformity; //between 0 to 100
     const int unif = (int)(uniform / 10.0f); //put unif between 0 to 10
     float amount = params->sharpenMicro.amount / 1500.0f; //amount 2000.0 quasi no artefacts ==> 1500 = maximum, after artefacts
 
@@ -629,7 +626,7 @@ BENCHFUN
 
     for(int j = 0; j < height; j++)
         for(int i = 0, offset = j * width + i; i < width; i++, offset++) {
-            LM[offset] = luminance[j][i] / 327.68f; // adjust to 0.100 and to RT variables
+            LM[offset] = luminance[j][i] / 327.68f; // adjust to [0;100] and to RT variables
         }
 
 #ifdef _OPENMP
@@ -663,7 +660,7 @@ BENCHFUN
                                      LM[offset - 2 * width - 1] + LM[offset - 2 * width + 1] + LM[offset -  width + 2] + LM[offset -  width - 2]);
 
                 temp2 -= sqrt2 * (LM[offset + 2 * width - 2] + LM[offset + 2 * width + 2] + LM[offset - 2 * width - 2] + LM[offset - 2 * width + 2]);
-                temp2 += 18.601126159f * v ;
+                temp2 += 18.601126159f * v ; // 18.601126159 = 4 + 4 * sqrt(2) + 8 * sqrt(1.25)
                 temp2 *= 2.f * s;
                 temp += temp2;
             }