Speedup for microcontrast, fixes #3867

This commit is contained in:
heckflosse
2017-05-17 22:15:16 +02:00
parent c856894456
commit c03b702131

View File

@@ -23,17 +23,15 @@
#include "rt_math.h" #include "rt_math.h"
#include "sleef.c" #include "sleef.c"
#include "opthelper.h" #include "opthelper.h"
using namespace std; using namespace std;
#define BENCHMARK
#include "StopWatch.h"
namespace rtengine namespace rtengine
{ {
#undef ABS #undef ABS
#define ABS(a) ((a)<0?-(a):(a)) #define ABS(a) ((a)<0?-(a):(a))
#define CLIREF(x) LIM(x,-200000.0f,200000.0f) // avoid overflow : do not act directly on image[] or pix[]
extern const Settings* settings; extern const Settings* settings;
SSEFUNCTION void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) SSEFUNCTION void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H)
@@ -569,7 +567,6 @@ void ImProcFunctions::MLmicrocontrast(float** luminance, int W, int H)
if (!params->sharpenMicro.enabled) { if (!params->sharpenMicro.enabled) {
return; return;
} }
BENCHFUN
const int k = params->sharpenMicro.matrix ? 1 : 2; const int k = params->sharpenMicro.matrix ? 1 : 2;
@@ -629,7 +626,7 @@ BENCHFUN
for(int j = 0; j < height; j++) for(int j = 0; j < height; j++)
for(int i = 0, offset = j * width + i; i < width; i++, offset++) { for(int i = 0, offset = j * width + i; i < width; i++, offset++) {
LM[offset] = luminance[j][i] / 327.68f; // adjust to 0.100 and to RT variables LM[offset] = luminance[j][i] / 327.68f; // adjust to [0;100] and to RT variables
} }
#ifdef _OPENMP #ifdef _OPENMP
@@ -663,7 +660,7 @@ BENCHFUN
LM[offset - 2 * width - 1] + LM[offset - 2 * width + 1] + LM[offset - width + 2] + LM[offset - width - 2]); LM[offset - 2 * width - 1] + LM[offset - 2 * width + 1] + LM[offset - width + 2] + LM[offset - width - 2]);
temp2 -= sqrt2 * (LM[offset + 2 * width - 2] + LM[offset + 2 * width + 2] + LM[offset - 2 * width - 2] + LM[offset - 2 * width + 2]); temp2 -= sqrt2 * (LM[offset + 2 * width - 2] + LM[offset + 2 * width + 2] + LM[offset - 2 * width - 2] + LM[offset - 2 * width + 2]);
temp2 += 18.601126159f * v ; temp2 += 18.601126159f * v ; // 18.601126159 = 4 + 4 * sqrt(2) + 8 * sqrt(1.25)
temp2 *= 2.f * s; temp2 *= 2.f * s;
temp += temp2; temp += temp2;
} }