diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc index b4cfc658a..32c4924a4 100644 --- a/rtengine/ipsharpen.cc +++ b/rtengine/ipsharpen.cc @@ -23,7 +23,7 @@ #include "rt_math.h" #include "sleef.c" #include "opthelper.h" -#define BENCHMARK +//#define BENCHMARK #include "StopWatch.h" using namespace std; @@ -32,9 +32,19 @@ float calcBlendFactor(float val, float threshold) { // sigmoid function // result is in ]0;1] range // inflexion point is at (x, y) (threshold, 0.5) - return threshold == 0.f ? 1.f : 1.f / (1.f + xexpf(-8.f * ((val / threshold) * (2.f - threshold) - 1.f ))); + return threshold == 0.f ? 1.f : 1.f / (1.f + xexpf(16.f - 16.f * val / threshold)); } - +#ifdef __SSE2__ +vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) { + // sigmoid function + // result is in ]0;1] range + // inflexion point is at (x, y) (threshold, 0.5) + const vfloat onev = F2V(1.f); + const vfloat c16v = F2V(16.f); + vfloat resultv = onev / (onev + xexpf(c16v - c16v * valv / thresholdv)); + return vself(vmaskf_eq(thresholdv, ZEROV), onev, resultv); +} +#endif } namespace rtengine { @@ -105,7 +115,7 @@ void ImProcFunctions::deconvsharpening (float** luminance, float** tmp, int W, i if (sharpenParam.deconvamount < 1) { return; } - +BENCHFUN float *tmpI[H] ALIGNED16; tmpI[0] = new float[W * H]; @@ -128,39 +138,63 @@ void ImProcFunctions::deconvsharpening (float** luminance, float** tmp, int W, i tmpII[i][j] = luminance[i][j]; } } + + // calculate contrast based blend factors to reduce sharpening in regions with low contrast const float contrastThreshold = sharpenParam.deconvdamping / 100.f; - const float contrastFactor = contrastThreshold == 0.f ? 0.f : 1.f / contrastThreshold; - float *LM = new float[W * H]; //allocation for Luminance + float *blend = new float[W * H]; //allocation for blend factor map + + // upper border + for(int j = 0; j < 2; j++) + for(int i = 0, offset = j * W + i; i < W; i++, offset++) { + blend[offset] = 0.f; + } #ifdef _OPENMP #pragma omp parallel #endif { - +#ifdef __SSE2__ + vfloat contrastThresholdv = F2V(contrastThreshold); + vfloat scalev = F2V(0.0625f / 327.68f); +#endif #ifdef _OPENMP - #pragma omp for schedule(dynamic,16) + #pragma omp for schedule(dynamic,16) nowait #endif - for(int j = 0; j < H; j++) + for(int j = 2; j < H - 2; j++) { + blend[j * W] = blend[j * W + 1] = 0.f; + int i = 2; + int offset = j * W + i; +#ifdef __SSE2__ + for(; i < W - 5; i += 4, offset += 4) { + + vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + + SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; + + STVFU(blend[offset], calcBlendFactor(contrastv, contrastThresholdv)); + } +#endif + for(; i < W - 2; i++, offset++) { + + float contrast = sqrtf(SQR(luminance[j][i+1] - luminance[j][i-1]) + SQR(luminance[j+1][i] - luminance[j-1][i]) + + SQR(luminance[j][i+2] - luminance[j][i-2]) + SQR(luminance[j+2][i] - luminance[j-2][i])) * 0.0625f / 327.68f; + + blend[offset] = calcBlendFactor(contrast, contrastThreshold); + } + blend[j * W + W - 2] = blend[j * W + W - 1] = 0.f; + } +#ifdef _OPENMP + #pragma omp single +#endif + { + // lower border + for(int j = H - 2; j < H; j++) for(int i = 0, offset = j * W + i; i < W; i++, offset++) { - LM[offset] = 0.f; // adjust to [0;100] and to RT variables - } - -#ifdef _OPENMP - #pragma omp for schedule(dynamic,16) -#endif - - for(int j = 2; j < H - 2; j++) - for(int i = 2, offset = j * W + i; i < W - 2; i++, offset++) { - - float contrast; - contrast = sqrtf(SQR(luminance[j][i+1] / 327.68f - luminance[j][i-1] / 327.68f) + SQR(luminance[j+1][i] / 327.68f - luminance[j-1][i] / 327.68f) - + SQR(luminance[j][i+1] / 327.68f - luminance[j][i-1] / 327.68f) + SQR(luminance[j+1][i] / 327.68f - luminance[j-1][i] / 327.68f)) * 0.0625f; //for 5x5 - - contrast = std::min(contrast, 1.f); - LM[offset] = 1.f - calcBlendFactor(contrast, contrastThreshold); + blend[offset] = 0.f; } + } } + float damping = sharpenParam.deconvdamping / 5.0; bool needdamp = false; //sharpenParam.deconvdamping > 0; double sigma = sharpenParam.deconvradius / scale; @@ -200,10 +234,10 @@ void ImProcFunctions::deconvsharpening (float** luminance, float** tmp, int W, i for (int i = 0; i < H; i++) for (int j = 0, offset = i * W + j; j < W; j++, offset++) { - luminance[i][j] = intp(LM[offset], tmpII[i][j], luminance[i][j] * p1 + max(tmpI[i][j], 0.0f) * p2); + luminance[i][j] = intp(blend[offset], luminance[i][j] * p1 + max(tmpI[i][j], 0.0f) * p2, tmpII[i][j]); } } // end parallel - delete [] LM; + delete [] blend; delete [] tmpI[0]; delete [] tmpII[0]; @@ -630,7 +664,6 @@ void ImProcFunctions::MLmicrocontrast(float** luminance, int W, int H) BENCHFUN const int k = params->sharpenMicro.matrix ? 1 : 2; const float contrastThreshold = params->sharpenMicro.contrast / 100; - const float contrastFactor = contrastThreshold == 0.f ? 0.f : 1.f / contrastThreshold; // k=2 matrix 5x5 k=1 matrix 3x3 const int width = W, height = H; const float uniform = params->sharpenMicro.uniformity; //between 0 to 100 @@ -705,7 +738,7 @@ BENCHFUN + SQR(LM[offset + 2] - LM[offset - 2]) + SQR(LM[offset + 2 * width] - LM[offset - 2 * width])) * 0.0625f; //for 5x5 contrast = std::min(contrast, 1.f); - float blend = 1.f - calcBlendFactor(contrast, contrastThreshold); + float blend = calcBlendFactor(contrast, contrastThreshold); //matrix 5x5 float temp = v + 4.f *( v * (s + sqrt2 * s)); //begin 3x3 @@ -804,7 +837,7 @@ BENCHFUN } else { temp = 0.f; } - luminance[j][i] = intp(blend, luminance[j][i], luminance[j][i] * (temp * temp2 + 1.f)); + luminance[j][i] = intp(blend, luminance[j][i] * (temp * temp2 + 1.f), luminance[j][i]); } else { float temp4 = LM[offset] / tempL; // @@ -855,7 +888,7 @@ BENCHFUN } else { temp = 0.f; } - luminance[j][i] = intp(blend, luminance[j][i], luminance[j][i] / (temp * temp4 + 1.f)); + luminance[j][i] = intp(blend, luminance[j][i] / (temp * temp4 + 1.f), luminance[j][i]); } } }