From 9a624ca01e36208a8727f200554ed088f9f618e2 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 27 Aug 2019 13:25:34 +0200 Subject: [PATCH 01/15] Speedup for transform --- rtengine/improcfun.h | 110 ------------------------------------- rtengine/iptransform.cc | 119 +++++++++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 143 deletions(-) diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 987a460d7..338df2416 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -77,116 +77,6 @@ class ImProcFunctions bool needsLensfun(); // static cmsUInt8Number* Mempro = NULL; - inline void interpolateTransformCubic(Imagefloat* src, int xs, int ys, double Dx, double Dy, float *r, float *g, float *b, double mul) - { - const double A = -0.85; - - double w[4]; - - { - double t1, t2; - t1 = -A * (Dx - 1.0) * Dx; - t2 = (3.0 - 2.0 * Dx) * Dx * Dx; - w[3] = t1 * Dx; - w[2] = t1 * (Dx - 1.0) + t2; - w[1] = -t1 * Dx + 1.0 - t2; - w[0] = -t1 * (Dx - 1.0); - } - - double rd, gd, bd; - double yr[4] = {0.0}, yg[4] = {0.0}, yb[4] = {0.0}; - - for (int k = ys, kx = 0; k < ys + 4; k++, kx++) { - rd = gd = bd = 0.0; - - for (int i = xs, ix = 0; i < xs + 4; i++, ix++) { - rd += src->r(k, i) * w[ix]; - gd += src->g(k, i) * w[ix]; - bd += src->b(k, i) * w[ix]; - } - - yr[kx] = rd; - yg[kx] = gd; - yb[kx] = bd; - } - - - { - double t1, t2; - - t1 = -A * (Dy - 1.0) * Dy; - t2 = (3.0 - 2.0 * Dy) * Dy * Dy; - w[3] = t1 * Dy; - w[2] = t1 * (Dy - 1.0) + t2; - w[1] = -t1 * Dy + 1.0 - t2; - w[0] = -t1 * (Dy - 1.0); - } - - rd = gd = bd = 0.0; - - for (int i = 0; i < 4; i++) { - rd += yr[i] * w[i]; - gd += yg[i] * w[i]; - bd += yb[i] * w[i]; - } - - *r = rd * mul; - *g = gd * mul; - *b = bd * mul; - - // if (xs==100 && ys==100) - // printf ("r=%g, g=%g\n", *r, *g); - } - - inline void interpolateTransformChannelsCubic(float** src, int xs, int ys, double Dx, double Dy, float *r, double mul) - { - const double A = -0.85; - - double w[4]; - - { - double t1, t2; - t1 = -A * (Dx - 1.0) * Dx; - t2 = (3.0 - 2.0 * Dx) * Dx * Dx; - w[3] = t1 * Dx; - w[2] = t1 * (Dx - 1.0) + t2; - w[1] = -t1 * Dx + 1.0 - t2; - w[0] = -t1 * (Dx - 1.0); - } - - double rd; - double yr[4] = {0.0}; - - for (int k = ys, kx = 0; k < ys + 4; k++, kx++) { - rd = 0.0; - - for (int i = xs, ix = 0; i < xs + 4; i++, ix++) { - rd += src[k][i] * w[ix]; - } - - yr[kx] = rd; - } - - - { - double t1, t2; - t1 = -A * (Dy - 1.0) * Dy; - t2 = (3.0 - 2.0 * Dy) * Dy * Dy; - w[3] = t1 * Dy; - w[2] = t1 * (Dy - 1.0) + t2; - w[1] = -t1 * Dy + 1.0 - t2; - w[0] = -t1 * (Dy - 1.0); - } - - rd = 0.0; - - for (int i = 0; i < 4; i++) { - rd += yr[i] * w[i]; - } - - *r = rd * mul; - } - public: enum class Median { diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 5b1e7c458..1d7ac9d56 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -26,7 +26,8 @@ #include "rt_math.h" #include "sleef.c" #include "rtlensfun.h" - +#define BENCHMARK +#include "StopWatch.h" using namespace std; @@ -87,6 +88,66 @@ float normn (float a, float b, int n) } } +inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const float w3Vert = t1Vert * Dy; + const float w2Vert = t1Vert * Dy - t1Vert + t2Vert; + const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert; + const float w0Vert = t1Vert - (t1Vert * Dy); + + float rv[4], gv[4], bv[4]; + for (int i = 0; i < 4; ++i) { + rv[i] = w0Vert * src->r(ys, xs + i) + w1Vert * src->r(ys + 1, xs + i) + w2Vert * src->r(ys + 2, xs + i) + w3Vert * src->r(ys + 3, xs + i); + gv[i] = w0Vert * src->g(ys, xs + i) + w1Vert * src->g(ys + 1, xs + i) + w2Vert * src->g(ys + 2, xs + i) + w3Vert * src->g(ys + 3, xs + i); + bv[i] = w0Vert * src->b(ys, xs + i) + w1Vert * src->b(ys + 1, xs + i) + w2Vert * src->b(ys + 2, xs + i) + w3Vert * src->b(ys + 3, xs + i); + } + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const float w3Hor = t1Hor * Dx; + const float w2Hor = t1Hor * Dx - t1Hor + t2Hor; + const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor; + const float w0Hor = t1Hor - (t1Hor * Dx); + + r = mul * (rv[0] * w0Hor + rv[1] * w1Hor + rv[2] * w2Hor + rv[3] * w3Hor); + g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor); + b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor); +} + +inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const float w3Vert = t1Vert * Dy; + const float w2Vert = t1Vert * Dy - t1Vert + t2Vert; + const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert; + const float w0Vert = t1Vert - (t1Vert * Dy); + + float cv[4]; + for (int i = 0; i < 4; ++i) { + cv[i] = w0Vert * src[ys][xs + i] + w1Vert * src[ys + 1][xs + i] + w2Vert * src[ys + 2][xs + i] + w3Vert * src[ys + 3][xs + i]; + } + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const float w3Hor = t1Hor * Dx; + const float w2Hor = t1Hor * Dx - t1Hor + t2Hor; + const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor; + const float w0Hor = t1Hor - (t1Hor * Dx); + + dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor); +} + } @@ -740,17 +801,18 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap) { + BENCHFUN // set up stuff, depending on the mode we are - bool enableLCPDist = pLCPMap && params->lensProf.useDist; - bool enableCA = highQuality && needsCA(); - bool enableGradient = needsGradient(); - bool enablePCVignetting = needsPCVignetting(); - bool enableVignetting = needsVignetting(); - bool enablePerspective = needsPerspective(); - bool enableDistortion = needsDistortion(); + const bool enableLCPDist = pLCPMap && params->lensProf.useDist; + const bool enableCA = highQuality && needsCA(); + const bool enableGradient = needsGradient(); + const bool enablePCVignetting = needsPCVignetting(); + const bool enableVignetting = needsVignetting(); + const bool enablePerspective = needsPerspective(); + const bool enableDistortion = needsDistortion(); - double w2 = (double) oW / 2.0 - 0.5; - double h2 = (double) oH / 2.0 - 0.5; + const double w2 = (double) oW / 2.0 - 0.5; + const double h2 = (double) oH / 2.0 - 0.5; double vig_w2, vig_h2, maxRadius, v, b, mul; calcVignettingParams (oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); @@ -784,11 +846,11 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I chDist[2] = enableCA ? params->cacorrection.blue : 0.0; // auxiliary variables for distortion correction - double distAmount = params->distortion.amount; + const double distAmount = params->distortion.amount; // auxiliary variables for rotation - double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0); - double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0); + const double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0); + const double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0); // auxiliary variables for vertical perspective correction double vpdeg = params->perspective.vertical / 100.0 * 45.0; @@ -814,9 +876,10 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I #pragma GCC diagnostic pop #endif // main cycle - bool darkening = (params->vignetting.amount <= 0.0); + const bool darkening = (params->vignetting.amount <= 0.0); + #ifdef _OPENMP - #pragma omp parallel for if (multiThread) + #pragma omp parallel for schedule(dynamic, 16) if(multiThread) #endif for (int y = 0; y < transformed->getHeight(); y++) { @@ -833,13 +896,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I x_d += ascale * (cx - w2); // centering x coord & scale y_d += ascale * (cy - h2); // centering y coord & scale - double vig_x_d = 0., vig_y_d = 0.; - - if (enableVignetting) { - vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale - vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale - } - if (enablePerspective) { // horizontal perspective transformation y_d *= maxRadius / (maxRadius + x_d * hptanpt); @@ -862,14 +918,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I s = 1.0 - distAmount + distAmount * r ; } - double r2 = 0.; - - if (enableVignetting) { - double vig_Dx = vig_x_d * cost - vig_y_d * sint; - double vig_Dy = vig_x_d * sint + vig_y_d * cost; - r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy); - } - for (int c = 0; c < (enableCA ? 3 : 1); c++) { double Dx = Dxc * (s + chDist[c]); double Dy = Dyc * (s + chDist[c]); @@ -893,6 +941,11 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I double vignmul = 1.0; if (enableVignetting) { + const double vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale + const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale + const double vig_Dx = vig_x_d * cost - vig_y_d * sint; + const double vig_Dy = vig_x_d * sint + vig_y_d * cost; + const double r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy); if (darkening) { vignmul /= std::max (v + mul * tanh (b * (maxRadius - s * r2) / maxRadius), 0.001); } else { @@ -911,13 +964,13 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image if (enableCA) { - interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, & (chTrans[c][y][x]), vignmul); + interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); } else if (!highQuality) { transformed->r (y, x) = vignmul * (original->r (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r (yc, xc + 1) * Dx * (1.0 - Dy) + original->r (yc + 1, xc) * (1.0 - Dx) * Dy + original->r (yc + 1, xc + 1) * Dx * Dy); transformed->g (y, x) = vignmul * (original->g (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g (yc, xc + 1) * Dx * (1.0 - Dy) + original->g (yc + 1, xc) * (1.0 - Dx) * Dy + original->g (yc + 1, xc + 1) * Dx * Dy); transformed->b (y, x) = vignmul * (original->b (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b (yc, xc + 1) * Dx * (1.0 - Dy) + original->b (yc + 1, xc) * (1.0 - Dx) * Dy + original->b (yc + 1, xc + 1) * Dx * Dy); } else { - interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, & (transformed->r (y, x)), & (transformed->g (y, x)), & (transformed->b (y, x)), vignmul); + interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, transformed->r (y, x), transformed->g (y, x), transformed->b (y, x), vignmul); } } else { // edge pixels @@ -988,7 +1041,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans // multiplier for vignetting correction if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image - interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, & (chTrans[c][y][x]), 1.0); + interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0); } else { // edge pixels int y1 = LIM (yc, 0, original->getHeight() - 1); From 5a5952dddbbeb5ba901950e43592aea338d22e60 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 27 Aug 2019 13:36:52 +0200 Subject: [PATCH 02/15] added two comments --- rtengine/iptransform.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 1d7ac9d56..9484599d6 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -90,6 +90,7 @@ float normn (float a, float b, int n) inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) { + // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical @@ -122,6 +123,7 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) { + // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical From 4312e682657590ef8c30f4056baa49613bdd16ed Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 27 Aug 2019 19:59:10 +0200 Subject: [PATCH 03/15] SSE code for interpolateTransformCubic and interpolateTransformChannelsCubic, also some cleanups --- rtengine/iptransform.cc | 165 ++++++++++++++++++++++++---------------- 1 file changed, 98 insertions(+), 67 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 9484599d6..608fa0337 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -88,9 +88,34 @@ float normn (float a, float b, int n) } } +#ifdef __SSE2__ +inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const vfloat w3Vert = F2V(t1Vert * Dy); + const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert); + const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); + const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); + + const vfloat rv = (w0Vert * LVFU(src->r(ys, xs)) + w1Vert * LVFU(src->r(ys + 1, xs))) + (w2Vert * LVFU(src->r(ys + 2, xs)) + w3Vert * LVFU(src->r(ys + 3, xs))); + const vfloat gv = (w0Vert * LVFU(src->g(ys, xs)) + w1Vert * LVFU(src->g(ys + 1, xs))) + (w2Vert * LVFU(src->g(ys + 2, xs)) + w3Vert * LVFU(src->g(ys + 3, xs))); + const vfloat bv = (w0Vert * LVFU(src->b(ys, xs)) + w1Vert * LVFU(src->b(ys + 1, xs))) + (w2Vert * LVFU(src->b(ys + 2, xs)) + w3Vert * LVFU(src->b(ys + 3, xs))); + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)) * F2V(mul); + r = vhadd(weight * rv); + g = vhadd(weight * gv); + b = vhadd(weight * bv); +} +#else inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) { - // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical @@ -120,10 +145,31 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor); b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor); } - +#endif +#ifdef __SSE2__ +inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const vfloat w3Vert = F2V(t1Vert * Dy); + const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert); + const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); + const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); + + const vfloat cv = (w0Vert * LVFU(src[ys][xs]) + w1Vert * LVFU(src[ys + 1][xs])) + (w2Vert * LVFU(src[ys + 2][xs]) + w3Vert * LVFU(src[ys + 3][xs])); + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)); + dest = mul * vhadd(weight * cv); +} +#else inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) { - // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical @@ -149,7 +195,7 @@ inline void interpolateTransformChannelsCubic(const float* const * src, int xs, dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor); } - +#endif } @@ -817,69 +863,54 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double h2 = (double) oH / 2.0 - 0.5; double vig_w2, vig_h2, maxRadius, v, b, mul; - calcVignettingParams (oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); + calcVignettingParams(oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); struct grad_params gp; if (enableGradient) { - calcGradientParams (oW, oH, params->gradient, gp); + calcGradientParams(oW, oH, params->gradient, gp); } struct pcv_params pcv; if (enablePCVignetting) { - calcPCVignetteParams (fW, fH, oW, oH, params->pcvignette, params->crop, pcv); + calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv); } - float** chOrig[3]; - chOrig[0] = original->r.ptrs; - chOrig[1] = original->g.ptrs; - chOrig[2] = original->b.ptrs; - - float** chTrans[3]; - chTrans[0] = transformed->r.ptrs; - chTrans[1] = transformed->g.ptrs; - chTrans[2] = transformed->b.ptrs; + float** chOrig[3] = {original->r.ptrs, original->g.ptrs, original->b.ptrs}; + float** chTrans[3] = {transformed->r.ptrs, transformed->g.ptrs, transformed->b.ptrs}; // auxiliary variables for c/a correction - double chDist[3]; - chDist[0] = enableCA ? params->cacorrection.red : 0.0; - chDist[1] = 0.0; - chDist[2] = enableCA ? params->cacorrection.blue : 0.0; + const double chDist[3] = {enableCA ? params->cacorrection.red : 0.0, 0.0, enableCA ? params->cacorrection.blue : 0.0}; // auxiliary variables for distortion correction const double distAmount = params->distortion.amount; // auxiliary variables for rotation - const double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0); - const double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0); + const double cost = cos(params->rotate.degree * rtengine::RT_PI / 180.0); + const double sint = sin(params->rotate.degree * rtengine::RT_PI / 180.0); // auxiliary variables for vertical perspective correction - double vpdeg = params->perspective.vertical / 100.0 * 45.0; - double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI; - double vpteta = fabs (vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos ((vpdeg > 0 ? 1.0 : -1.0) * sqrt ((-SQR (oW * tan (vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * - oW * tan (vpalpha) * sqrt (SQR (4 * maxRadius) + SQR (oW * tan (vpalpha)))) / (SQR (maxRadius) * 8))); - double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos (vpteta), vptanpt = tan (vpteta); + const double vpdeg = params->perspective.vertical / 100.0 * 45.0; + const double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI; + const double vpteta = fabs(vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((vpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oW * tan(vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * + oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8))); + const double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos(vpteta), vptanpt = tan(vpteta); // auxiliary variables for horizontal perspective correction - double hpdeg = params->perspective.horizontal / 100.0 * 45.0; - double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI; - double hpteta = fabs (hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos ((hpdeg > 0 ? 1.0 : -1.0) * sqrt ((-SQR (oH * tan (hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * - oH * tan (hpalpha) * sqrt (SQR (4 * maxRadius) + SQR (oH * tan (hpalpha)))) / (SQR (maxRadius) * 8))); - double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos (hpteta), hptanpt = tan (hpteta); + const double hpdeg = params->perspective.horizontal / 100.0 * 45.0; + const double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI; + const double hpteta = fabs(hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((hpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oH * tan(hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * + oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8))); + const double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos(hpteta), hptanpt = tan(hpteta); - double ascale = params->commonTrans.autofill ? getTransformAutoFill (oW, oH, pLCPMap) : 1.0; + const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0; -#if defined( __GNUC__ ) && __GNUC__ >= 7// silence warning -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" -#endif -#if defined( __GNUC__ ) && __GNUC__ >= 7 -#pragma GCC diagnostic pop -#endif - // main cycle const bool darkening = (params->vignetting.amount <= 0.0); + const double centerFactorx = cx - w2; + const double centerFactory = cy - h2; + // main cycle #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) if(multiThread) #endif @@ -895,8 +926,8 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I y_d *= ascale; } - x_d += ascale * (cx - w2); // centering x coord & scale - y_d += ascale * (cy - h2); // centering y coord & scale + x_d += ascale * centerFactorx; // centering x coord & scale + y_d += ascale * centerFactory; // centering y coord & scale if (enablePerspective) { // horizontal perspective transformation @@ -909,15 +940,15 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I } // rotate - double Dxc = x_d * cost - y_d * sint; - double Dyc = x_d * sint + y_d * cost; + const double Dxc = x_d * cost - y_d * sint; + const double Dyc = x_d * sint + y_d * cost; // distortion correction double s = 1; if (enableDistortion) { - double r = sqrt (Dxc * Dxc + Dyc * Dyc) / maxRadius; // sqrt is slow - s = 1.0 - distAmount + distAmount * r ; + double r = sqrt(Dxc * Dxc + Dyc * Dyc) / maxRadius; + s = 1.0 - distAmount + distAmount * r; } for (int c = 0; c < (enableCA ? 3 : 1); c++) { @@ -947,46 +978,46 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale const double vig_Dx = vig_x_d * cost - vig_y_d * sint; const double vig_Dy = vig_x_d * sint + vig_y_d * cost; - const double r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy); + const double r2 = sqrt(vig_Dx * vig_Dx + vig_Dy * vig_Dy); if (darkening) { - vignmul /= std::max (v + mul * tanh (b * (maxRadius - s * r2) / maxRadius), 0.001); + vignmul /= std::max(v + mul * tanh(b * (maxRadius - s * r2) / maxRadius), 0.001); } else { - vignmul *= (v + mul * tanh (b * (maxRadius - s * r2) / maxRadius)); + vignmul *= (v + mul * tanh(b * (maxRadius - s * r2) / maxRadius)); } } if (enableGradient) { - vignmul *= calcGradientFactor (gp, cx + x, cy + y); + vignmul *= calcGradientFactor(gp, cx + x, cy + y); } if (enablePCVignetting) { - vignmul *= calcPCVignetteFactor (pcv, cx + x, cy + y); + vignmul *= calcPCVignetteFactor(pcv, cx + x, cy + y); } if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image if (enableCA) { - interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); + interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); } else if (!highQuality) { - transformed->r (y, x) = vignmul * (original->r (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r (yc, xc + 1) * Dx * (1.0 - Dy) + original->r (yc + 1, xc) * (1.0 - Dx) * Dy + original->r (yc + 1, xc + 1) * Dx * Dy); - transformed->g (y, x) = vignmul * (original->g (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g (yc, xc + 1) * Dx * (1.0 - Dy) + original->g (yc + 1, xc) * (1.0 - Dx) * Dy + original->g (yc + 1, xc + 1) * Dx * Dy); - transformed->b (y, x) = vignmul * (original->b (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b (yc, xc + 1) * Dx * (1.0 - Dy) + original->b (yc + 1, xc) * (1.0 - Dx) * Dy + original->b (yc + 1, xc + 1) * Dx * Dy); + transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); } else { - interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, transformed->r (y, x), transformed->g (y, x), transformed->b (y, x), vignmul); + interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul); } } else { // edge pixels - int y1 = LIM (yc, 0, original->getHeight() - 1); - int y2 = LIM (yc + 1, 0, original->getHeight() - 1); - int x1 = LIM (xc, 0, original->getWidth() - 1); - int x2 = LIM (xc + 1, 0, original->getWidth() - 1); + int y1 = LIM(yc, 0, original->getHeight() - 1); + int y2 = LIM(yc + 1, 0, original->getHeight() - 1); + int x1 = LIM(xc, 0, original->getWidth() - 1); + int x2 = LIM(xc + 1, 0, original->getWidth() - 1); if (enableCA) { chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); } else { - transformed->r (y, x) = vignmul * (original->r (y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r (y1, x2) * Dx * (1.0 - Dy) + original->r (y2, x1) * (1.0 - Dx) * Dy + original->r (y2, x2) * Dx * Dy); - transformed->g (y, x) = vignmul * (original->g (y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g (y1, x2) * Dx * (1.0 - Dy) + original->g (y2, x1) * (1.0 - Dx) * Dy + original->g (y2, x2) * Dx * Dy); - transformed->b (y, x) = vignmul * (original->b (y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b (y1, x2) * Dx * (1.0 - Dy) + original->b (y2, x1) * (1.0 - Dx) * Dy + original->b (y2, x2) * Dx * Dy); + transformed->r(y, x) = vignmul * (original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy); } } } else { @@ -994,9 +1025,9 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I // not valid (source pixel x,y not inside source image, etc.) chTrans[c][y][x] = 0; } else { - transformed->r (y, x) = 0; - transformed->g (y, x) = 0; - transformed->b (y, x) = 0; + transformed->r(y, x) = 0; + transformed->g(y, x) = 0; + transformed->b(y, x) = 0; } } } From 2e34aa8a1de8c0f8c0786b050fad6e1bdea63e22 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 10 Sep 2019 23:23:37 +0200 Subject: [PATCH 04/15] Soft Light: speedup and reduce memory usage --- rtengine/ipsoftlight.cc | 115 +++++++++++++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 25 deletions(-) diff --git a/rtengine/ipsoftlight.cc b/rtengine/ipsoftlight.cc index c7a4d1af7..1d94a29c2 100644 --- a/rtengine/ipsoftlight.cc +++ b/rtengine/ipsoftlight.cc @@ -3,6 +3,7 @@ * This file is part of RawTherapee. * * Copyright 2018 Alberto Griggio + * Optimized 2019 Ingo Weyrich * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,14 +19,10 @@ * along with RawTherapee. If not, see . */ -#ifdef _OPENMP -#include -#endif - #include "improcfun.h" - #include "procparams.h" - +#define BENCHMARK +#include "StopWatch.h" namespace rtengine { namespace { @@ -33,18 +30,29 @@ namespace { inline float sl(float blend, float x) { if (!OOG(x)) { - const float orig = 1.f - blend; float v = Color::gamma_srgb(x) / MAXVALF; - // Pegtop's formula from + // using Pegtop's formula from // https://en.wikipedia.org/wiki/Blend_modes#Soft_Light - float v2 = v * v; - float v22 = v2 * 2.f; - v = v2 + v22 - v22 * v; - x = blend * Color::igamma_srgb(v * MAXVALF) + orig * x; + // const float orig = 1.f - blend; + // float v2 = v * v; + // float v22 = v2 * 2.f; + // v = v2 + v22 - v22 * v; + // return blend * Color::igamma_srgb(v * MAXVALF) + orig * x; + + // using optimized formula (heckflosse67@gmx.de) + return intp(blend, Color::igamma_srgb(v * v * (3.f - 2.f * v) * MAXVALF), x); } return x; } +#ifdef __SSE2__ +inline vfloat sl(vfloat blend, vfloat x) +{ + const vfloat v = Color::gammatab_srgb[x] / F2V(MAXVALF); + return vself(vmaskf_gt(x, F2V(MAXVALF)), x, vself(vmaskf_lt(x, ZEROV), x, vintpf(blend, Color::igammatab_srgb[v * v * (F2V(3.f) - (v + v)) * MAXVALF], x))); +} +#endif + } // namespace @@ -53,24 +61,81 @@ void ImProcFunctions::softLight(LabImage *lab) if (!params->softlight.enabled || !params->softlight.strength) { return; } + BENCHFUN - Imagefloat working(lab->W, lab->H); - lab2rgb(*lab, working, params->icm.workingProfile); + TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); + const float wp[3][3] = { + {static_cast (wprof[0][0]), static_cast (wprof[0][1]), static_cast (wprof[0][2])}, + {static_cast (wprof[1][0]), static_cast (wprof[1][1]), static_cast (wprof[1][2])}, + {static_cast (wprof[2][0]), static_cast (wprof[2][1]), static_cast (wprof[2][2])} + }; - const float blend = params->softlight.strength / 100.f; + TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix(params->icm.workingProfile); + const float wip[3][3] = { + {static_cast (wiprof[0][0]), static_cast (wiprof[0][1]), static_cast (wiprof[0][2])}, + {static_cast (wiprof[1][0]), static_cast (wiprof[1][1]), static_cast (wiprof[1][2])}, + {static_cast (wiprof[2][0]), static_cast (wiprof[2][1]), static_cast (wiprof[2][2])} + }; -#ifdef _OPENMP - #pragma omp parallel for -#endif - for (int y = 0; y < working.getHeight(); ++y) { - for (int x = 0; x < working.getWidth(); ++x) { - working.r(y, x) = sl(blend, working.r(y, x)); - working.g(y, x) = sl(blend, working.g(y, x)); - working.b(y, x) = sl(blend, working.b(y, x)); +#ifdef __SSE2__ + vfloat wipv[3][3]; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + wipv[i][j] = F2V(wiprof[i][j]); + } + } + + vfloat wpv[3][3]; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + wpv[i][j] = F2V(wprof[i][j]); + } + } +#endif + +#ifdef _OPENMP + #pragma omp parallel +#endif + { + const float blend = params->softlight.strength / 100.f; +#ifdef __SSE2__ + const vfloat blendv = F2V(blend); +#endif +#ifdef _OPENMP + #pragma omp for schedule(dynamic,16) +#endif + for (int i = 0; i < lab->H; ++i) { + int j = 0; +#ifdef __SSE2__ + for (; j < lab->W - 3; j += 4) { + vfloat Xv, Yv, Zv; + vfloat Rv, Gv, Bv; + Color::Lab2XYZ(LVFU(lab->L[i][j]),LVFU (lab->a[i][j]),LVFU (lab->b[i][j]), Xv, Yv, Zv); + Color::xyz2rgb(Xv, Yv, Zv, Rv, Gv, Bv, wipv); + Rv = sl(blendv, Rv); + Gv = sl(blendv, Gv); + Bv = sl(blendv, Bv); + Color::rgbxyz(Rv, Gv, Bv, Xv, Yv, Zv, wpv); + for (int k = 0; k < 4; ++k) { + Color::XYZ2Lab(Xv[k], Yv[k], Zv[k], lab->L[i][j + k], lab->a[i][j + k], lab->b[i][j+ k]); + } + } +#endif + for (; j < lab->W; j++) { + float X, Y, Z; + float R, G, B; + Color::Lab2XYZ(lab->L[i][j], lab->a[i][j], lab->b[i][j], X, Y, Z); + Color::xyz2rgb(X, Y, Z, R, G, B, wip); + R = sl(blend, R); + G = sl(blend, G); + B = sl(blend, B); + Color::rgbxyz(R, G, B, X, Y, Z, wp); + Color::XYZ2Lab(X, Y, Z, lab->L[i][j], lab->a[i][j], lab->b[i][j]); + } } } - - rgb2lab(working, *lab, params->icm.workingProfile); } } // namespace rtengine From 9a020899a32447be884d0e4b9b3c744cefbe00f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Wed, 11 Sep 2019 09:16:06 +0200 Subject: [PATCH 05/15] Minor cleanups --- rtengine/iptransform.cc | 95 +++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 36 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 608fa0337..1e739dac6 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -16,16 +16,21 @@ * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . */ -#include "rtengine.h" -#include "improcfun.h" -#include "procparams.h" +#include + #ifdef _OPENMP #include #endif + +#include "improcfun.h" + #include "mytime.h" +#include "procparams.h" #include "rt_math.h" -#include "sleef.c" +#include "rtengine.h" #include "rtlensfun.h" +#include "sleef.c" + #define BENCHMARK #include "StopWatch.h" @@ -147,7 +152,7 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, } #endif #ifdef __SSE2__ -inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) { constexpr float A = -0.85f; @@ -168,7 +173,7 @@ inline void interpolateTransformChannelsCubic(const float* const * src, int xs, dest = mul * vhadd(weight * cv); } #else -inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) { constexpr float A = -0.85f; @@ -859,29 +864,45 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const bool enablePerspective = needsPerspective(); const bool enableDistortion = needsDistortion(); - const double w2 = (double) oW / 2.0 - 0.5; - const double h2 = (double) oH / 2.0 - 0.5; + const double w2 = static_cast(oW) / 2.0 - 0.5; + const double h2 = static_cast(oH) / 2.0 - 0.5; double vig_w2, vig_h2, maxRadius, v, b, mul; calcVignettingParams(oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); - struct grad_params gp; + grad_params gp; if (enableGradient) { calcGradientParams(oW, oH, params->gradient, gp); } - struct pcv_params pcv; + pcv_params pcv; if (enablePCVignetting) { calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv); } - float** chOrig[3] = {original->r.ptrs, original->g.ptrs, original->b.ptrs}; - float** chTrans[3] = {transformed->r.ptrs, transformed->g.ptrs, transformed->b.ptrs}; + const std::array chOrig = { + original->r.ptrs, + original->g.ptrs, + original->b.ptrs + }; + const std::array chTrans = { + transformed->r.ptrs, + transformed->g.ptrs, + transformed->b.ptrs + }; // auxiliary variables for c/a correction - const double chDist[3] = {enableCA ? params->cacorrection.red : 0.0, 0.0, enableCA ? params->cacorrection.blue : 0.0}; + const std::array chDist = { + enableCA + ? params->cacorrection.red + : 0.0, + 0.0, + enableCA + ? params->cacorrection.blue + : 0.0 + }; // auxiliary variables for distortion correction const double distAmount = params->distortion.amount; @@ -893,16 +914,18 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I // auxiliary variables for vertical perspective correction const double vpdeg = params->perspective.vertical / 100.0 * 45.0; const double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI; - const double vpteta = fabs(vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((vpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oW * tan(vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * - oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8))); - const double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos(vpteta), vptanpt = tan(vpteta); + const double vpteta = fabs(vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((vpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oW * tan(vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * + oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8))); + const double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos(vpteta); + const double vptanpt = tan(vpteta); // auxiliary variables for horizontal perspective correction const double hpdeg = params->perspective.horizontal / 100.0 * 45.0; const double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI; - const double hpteta = fabs(hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((hpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oH * tan(hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * - oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8))); - const double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos(hpteta), hptanpt = tan(hpteta); + const double hpteta = fabs(hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((hpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oH * tan(hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * + oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8))); + const double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos(hpteta); + const double hptanpt = tan(hpteta); const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0; @@ -915,9 +938,10 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I #pragma omp parallel for schedule(dynamic, 16) if(multiThread) #endif - for (int y = 0; y < transformed->getHeight(); y++) { - for (int x = 0; x < transformed->getWidth(); x++) { - double x_d = x, y_d = y; + for (int y = 0; y < transformed->getHeight(); ++y) { + for (int x = 0; x < transformed->getWidth(); ++x) { + double x_d = x; + double y_d = y; if (enableLCPDist) { pLCPMap->correctDistortion(x_d, y_d, cx, cy, ascale); // must be first transform @@ -944,14 +968,14 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double Dyc = x_d * sint + y_d * cost; // distortion correction - double s = 1; + double s = 1.0; if (enableDistortion) { - double r = sqrt(Dxc * Dxc + Dyc * Dyc) / maxRadius; + const double r = sqrt(Dxc * Dxc + Dyc * Dyc) / maxRadius; s = 1.0 - distAmount + distAmount * r; } - for (int c = 0; c < (enableCA ? 3 : 1); c++) { + for (int c = 0; c < (enableCA ? 3 : 1); ++c) { double Dx = Dxc * (s + chDist[c]); double Dy = Dyc * (s + chDist[c]); @@ -960,22 +984,21 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I Dy += h2; // Extract integer and fractions of source screen coordinates - int xc = (int)Dx; - Dx -= (double)xc; + int xc = Dx; + Dx -= xc; xc -= sx; - int yc = (int)Dy; - Dy -= (double)yc; + int yc = Dy; + Dy -= yc; yc -= sy; // Convert only valid pixels if (yc >= 0 && yc < original->getHeight() && xc >= 0 && xc < original->getWidth()) { - // multiplier for vignetting correction double vignmul = 1.0; if (enableVignetting) { - const double vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale - const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale + const double vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale + const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale const double vig_Dx = vig_x_d * cost - vig_y_d * sint; const double vig_Dy = vig_x_d * sint + vig_y_d * cost; const double r2 = sqrt(vig_Dx * vig_Dx + vig_Dy * vig_Dy); @@ -1007,10 +1030,10 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I } } else { // edge pixels - int y1 = LIM(yc, 0, original->getHeight() - 1); - int y2 = LIM(yc + 1, 0, original->getHeight() - 1); - int x1 = LIM(xc, 0, original->getWidth() - 1); - int x2 = LIM(xc + 1, 0, original->getWidth() - 1); + const int y1 = LIM(yc, 0, original->getHeight() - 1); + const int y2 = LIM(yc + 1, 0, original->getHeight() - 1); + const int x1 = LIM(xc, 0, original->getWidth() - 1); + const int x2 = LIM(xc + 1, 0, original->getWidth() - 1); if (enableCA) { chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); From 4079bb9920be8cf6c568c1b0f91bfbe3a2544b30 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Wed, 11 Sep 2019 18:56:07 +0200 Subject: [PATCH 06/15] Capture Sharpening: automatic radius calculation --- rtdata/languages/default | 2 + rtengine/capturesharpening.cc | 307 ++++++++++++++++++++++++++++++++-- rtengine/imagesource.h | 2 +- rtengine/improccoordinator.cc | 7 +- rtengine/improccoordinator.h | 6 + rtengine/procparams.cc | 4 + rtengine/procparams.h | 1 + rtengine/rawimagesource.h | 2 +- rtengine/refreshmap.cc | 2 +- rtengine/rt_algo.cc | 23 ++- rtengine/rt_algo.h | 2 +- rtengine/rtengine.h | 8 + rtengine/simpleprocess.cc | 2 +- rtengine/stdimagesource.h | 2 +- rtgui/paramsedited.cc | 8 +- rtgui/paramsedited.h | 1 + rtgui/pdsharpening.cc | 74 ++++++-- rtgui/pdsharpening.h | 5 +- rtgui/toolpanelcoord.cc | 1 + rtgui/toolpanelcoord.h | 1 - 20 files changed, 416 insertions(+), 44 deletions(-) diff --git a/rtdata/languages/default b/rtdata/languages/default index 5bf1863df..3749a706a 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -766,6 +766,7 @@ HISTORY_MSG_METADATA_MODE;Metadata copy mode HISTORY_MSG_MICROCONTRAST_CONTRAST;Microcontrast - Contrast threshold HISTORY_MSG_PDSHARPEN_CONTRAST;CAS - Contrast threshold HISTORY_MSG_PDSHARPEN_AUTO_CONTRAST;CAS - Auto threshold +HISTORY_MSG_PDSHARPEN_AUTO_RADIUS;CAS - Auto radius HISTORY_MSG_PDSHARPEN_GAMMA;CAS - Gamma HISTORY_MSG_PDSHARPEN_ITERATIONS;CAS - Iterations HISTORY_MSG_PDSHARPEN_RADIUS;CAS - Radius @@ -1800,6 +1801,7 @@ TP_PCVIGNETTE_ROUNDNESS_TOOLTIP;Roundness:\n0 = rectangle,\n50 = fitted ellipse, TP_PCVIGNETTE_STRENGTH;Strength TP_PCVIGNETTE_STRENGTH_TOOLTIP;Filter strength in stops (reached in corners). TP_PDSHARPENING_LABEL;Capture Sharpening +TP_PDSHARPENING_AUTORADIUS_TOOLTIP;If the checkbox is checked, RawTherapee calculates a value based on the raw data of the image. TP_PERSPECTIVE_HORIZONTAL;Horizontal TP_PERSPECTIVE_LABEL;Perspective TP_PERSPECTIVE_VERTICAL;Vertical diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index eb95d2633..e741016db 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -37,6 +37,243 @@ #include "../rtgui/multilangmgr.h" namespace { + +void buildClipMaskBayer(const float * const *rawData, int W, int H, float** clipMask, const float whites[2][2]) +{ + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) +#endif + for (int row = 0; row < H; ++row) { + for (int col = 0; col < W; ++col) { + clipMask[row][col] = 1.f; + } + } + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) +#endif + for (int row = 2; row < H - 2; ++row) { + float clip0 = whites[row & 1][0]; + float clip1 = whites[row & 1][1]; + for (int col = 2; col < W - 2; ++col) { + if (rawData[row][col] >= clip0) { + clipMask[row - 2][col - 1] = clipMask[row - 2][col] = clipMask[row - 2][col + 1] = 0.f; + clipMask[row - 1][col - 2] = clipMask[row - 1][col - 1] = clipMask[row - 1][col] = clipMask[row - 1][col + 1] = clipMask[row - 1][col + 2] = 0.f; + clipMask[row][col - 2] = clipMask[row][col - 1] = clipMask[row][col] = clipMask[row][col + 1] = clipMask[row][col + 2] = 0.f; + clipMask[row + 1][col - 2] = clipMask[row + 1][col - 1] = clipMask[row + 1][col] = clipMask[row + 1][col + 1] = clipMask[row + 1][col + 2] = 0.f; + clipMask[row + 2][col - 1] = clipMask[row + 2][col] = clipMask[row + 2][col + 1] = 0.f; + } + std::swap(clip0, clip1); + } + } +} + +void buildClipMaskXtrans(const float * const *rawData, int W, int H, float** clipMask, const float whites[6][6]) +{ + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) +#endif + for (int row = 0; row < H; ++row) { + for (int col = 0; col < W; ++col) { + clipMask[row][col] = 1.f; + } + } + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) +#endif + for (int row = 2; row < H - 2; ++row) { + for (int col = 2; col < W - 2; ++col) { + const float clip = whites[row % 6][col % 6]; + if (rawData[row][col] >= clip) { + clipMask[row - 2][col - 1] = clipMask[row - 2][col] = clipMask[row - 2][col + 1] = 0.f; + clipMask[row - 1][col - 2] = clipMask[row - 1][col - 1] = clipMask[row - 1][col] = clipMask[row - 1][col + 1] = clipMask[row - 1][col + 2] = 0.f; + clipMask[row][col - 2] = clipMask[row][col - 1] = clipMask[row][col] = clipMask[row][col + 1] = clipMask[row][col + 2] = 0.f; + clipMask[row + 1][col - 2] = clipMask[row + 1][col - 1] = clipMask[row + 1][col] = clipMask[row + 1][col + 1] = clipMask[row + 1][col + 2] = 0.f; + clipMask[row + 2][col - 1] = clipMask[row + 2][col] = clipMask[row + 2][col + 1] = 0.f; + } + } + } +} + +void buildClipMaskMono(const float * const *rawData, int W, int H, float** clipMask, float white) +{ + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) +#endif + for (int row = 0; row < H; ++row) { + for (int col = 0; col < W; ++col) { + clipMask[row][col] = 1.f; + } + } + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) +#endif + for (int row = 2; row < H - 2; ++row) { + for (int col = 2; col < W - 2; ++col) { + if (rawData[row][col] >= white) { + clipMask[row - 2][col - 1] = clipMask[row - 2][col] = clipMask[row - 2][col + 1] = 0.f; + clipMask[row - 1][col - 2] = clipMask[row - 1][col - 1] = clipMask[row - 1][col] = clipMask[row - 1][col + 1] = clipMask[row - 1][col + 2] = 0.f; + clipMask[row][col - 2] = clipMask[row][col - 1] = clipMask[row][col] = clipMask[row][col + 1] = clipMask[row][col + 2] = 0.f; + clipMask[row + 1][col - 2] = clipMask[row + 1][col - 1] = clipMask[row + 1][col] = clipMask[row + 1][col + 1] = clipMask[row + 1][col + 2] = 0.f; + clipMask[row + 2][col - 1] = clipMask[row + 2][col] = clipMask[row + 2][col + 1] = 0.f; + } + } + } +} + +float calcRadiusBayer(const float * const *rawData, int W, int H, float lowerLimit, float upperLimit, const unsigned int fc[2]) +{ + + float maxRatio = 1.f; +#ifdef _OPENMP + #pragma omp parallel for reduction(max:maxRatio) schedule(dynamic, 16) +#endif + for (int row = 4; row < H - 4; ++row) { + for (int col = 5 + (fc[row & 1] & 1); col < W - 4; col += 2) { + const float val00 = rawData[row][col]; + if (val00 > 0.f) { + const float val1m1 = rawData[row + 1][col - 1]; + const float val1p1 = rawData[row + 1][col + 1]; + const float maxVal0 = std::max(val00, val1m1); + if (val1m1 > 0.f && maxVal0 > lowerLimit) { + const float minVal = std::min(val00, val1m1); + if (UNLIKELY(maxVal0 > maxRatio * minVal)) { + bool clipped = false; + if (maxVal0 == val00) { // check for influence by clipped green in neighborhood + if (rtengine::max(rawData[row - 1][col - 1], rawData[row - 1][col + 1], val1p1) >= upperLimit) { + clipped = true; + } + } else { // check for influence by clipped green in neighborhood + if (rtengine::max(rawData[row][col - 2], val00, rawData[row + 2][col - 2], rawData[row + 2][col]) >= upperLimit) { + clipped = true; + } + } + if (!clipped) { + maxRatio = maxVal0 / minVal; + } + } + } + const float maxVal1 = std::max(val00, val1p1); + if (val1p1 > 0.f && maxVal1 > lowerLimit) { + const float minVal = std::min(val00, val1p1); + if (UNLIKELY(maxVal1 > maxRatio * minVal)) { + if (maxVal1 == val00) { // check for influence by clipped green in neighborhood + if (rtengine::max(rawData[row - 1][col - 1], rawData[row - 1][col + 1], val1p1) >= upperLimit) { + continue; + } + } else { // check for influence by clipped green in neighborhood + if (rtengine::max(val00, rawData[row][col + 2], rawData[row + 2][col], rawData[row + 2][col + 2]) >= upperLimit) { + continue; + } + } + maxRatio = maxVal1 / minVal; + } + } + } + } + } + return std::sqrt((1.f / (std::log(1.f / maxRatio) / 2.f)) / -2.f); +} + +float calcRadiusXtrans(const float * const *rawData, int W, int H, float lowerLimit, float upperLimit, unsigned int starty, unsigned int startx) +{ + + float maxRatio = 1.f; +#ifdef _OPENMP + #pragma omp parallel for reduction(max:maxRatio) schedule(dynamic, 16) +#endif + for (int row = starty + 3; row < H - 4; row += 3) { + for (int col = startx + 3; col < W - 4; col += 3) { + const float valtl = rawData[row][col]; + const float valtr = rawData[row][col + 1]; + const float valbl = rawData[row + 1][col]; + const float valbr = rawData[row + 1][col + 1]; + if (valtl > 1.f) { + const float maxValtltr = std::max(valtl, valtr); + if (valtr > 1.f && maxValtltr > lowerLimit) { + const float minVal = std::min(valtl, valtr); + if (UNLIKELY(maxValtltr > maxRatio * minVal)) { + bool clipped = false; + if (maxValtltr == valtl) { // check for influence by clipped green in neighborhood + if (rtengine::max(rawData[row - 1][col - 1], valtr, valbl, valbr) >= upperLimit) { + clipped = true; + } + } else { // check for influence by clipped green in neighborhood + if (rtengine::max(rawData[row - 1][col + 2], valtl, valbl, valbr) >= upperLimit) { + clipped = true; + } + } + if (!clipped) { + maxRatio = maxValtltr / minVal; + } + } + } + const float maxValtlbl = std::max(valtl, valbl); + if (valbl > 1.f && maxValtlbl > lowerLimit) { + const float minVal = std::min(valtl, valbl); + if (UNLIKELY(maxValtlbl > maxRatio * minVal)) { + bool clipped = false; + if (maxValtlbl == valtl) { // check for influence by clipped green in neighborhood + if (rtengine::max(rawData[row - 1][col - 1], valtr, valbl, valbr) >= upperLimit) { + clipped = true; + } + } else { // check for influence by clipped green in neighborhood + if (rtengine::max(valtl, valtr, rawData[row + 2][col - 1], valbr) >= upperLimit) { + clipped = true; + } + } + if (!clipped) { + maxRatio = maxValtlbl / minVal; + } + } + } + } + if (valbr > 1.f) { + const float maxValblbr = std::max(valbl, valbr); + if (valbl > 1.f && maxValblbr > lowerLimit) { + const float minVal = std::min(valbl, valbr); + if (UNLIKELY(maxValblbr > maxRatio * minVal)) { + bool clipped = false; + if (maxValblbr == valbr) { // check for influence by clipped green in neighborhood + if (rtengine::max(valtl, valtr, valbl, rawData[row + 2][col + 2]) >= upperLimit) { + clipped = true; + } + } else { // check for influence by clipped green in neighborhood + if (rtengine::max(valtl, valtr, rawData[row + 2][col - 1], valbr) >= upperLimit) { + clipped = true; + } + } + if (!clipped) { + maxRatio = maxValblbr / minVal; + } + } + } + const float maxValtrbr = std::max(valtr, valbr); + if (valtr > 1.f && maxValtrbr > lowerLimit) { + const float minVal = std::min(valtr, valbr); + if (UNLIKELY(maxValtrbr > maxRatio * minVal)) { + if (maxValtrbr == valbr) { // check for influence by clipped green in neighborhood + if (rtengine::max(valtl, valtr, valbl, rawData[row + 2][col + 2]) >= upperLimit) { + continue; + } + } else { // check for influence by clipped green in neighborhood + if (rtengine::max(rawData[row - 1][col + 2], valtl, valbl, valbr) >= upperLimit) { + continue; + } + } + maxRatio = maxValtrbr / minVal; + } + } + } + } + } + return std::sqrt((1.f / (std::log(1.f / maxRatio))) / -2.f); +} void CaptureDeconvSharpening (float** luminance, float** tmp, const float * const * blend, int W, int H, double sigma, int iterations, rtengine::ProgressListener* plistener, double start, double step) { @@ -54,7 +291,7 @@ void CaptureDeconvSharpening (float** luminance, float** tmp, const float * cons tmpI[i][j] = max(luminance[i][j], 0.f); } } - + for (int k = 0; k < iterations; k++) { // apply gaussian blur and divide luminance by result of gaussian blur gaussianBlur(tmpI, tmp, W, H, sigma, nullptr, GAUSS_DIV, luminance); @@ -85,9 +322,7 @@ void CaptureDeconvSharpening (float** luminance, float** tmp, const float * cons namespace rtengine { -void RawImageSource::captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold) { -BENCHFUN - +void RawImageSource::captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold, double &radius) { if (plistener) { plistener->setProgressStr(M("TP_PDSHARPENING_LABEL")); @@ -102,12 +337,62 @@ BENCHFUN float contrast = conrastThreshold / 100.f; + const float clipVal = (ri->get_white(1) - ri->get_cblack(1)) * scale_mul[1]; + array2D& redVals = redCache ? *redCache : red; array2D& greenVals = greenCache ? *greenCache : green; array2D& blueVals = blueCache ? *blueCache : blue; + array2D clipMask(W, H); + constexpr float clipLimit = 0.95f; + if (ri->getSensorType() == ST_BAYER) { + const float whites[2][2] = { + {(ri->get_white(FC(0,0)) - c_black[FC(0,0)]) * scale_mul[FC(0,0)] * clipLimit, (ri->get_white(FC(0,1)) - c_black[FC(0,1)]) * scale_mul[FC(0,1)] * clipLimit}, + {(ri->get_white(FC(1,0)) - c_black[FC(1,0)]) * scale_mul[FC(1,0)] * clipLimit, (ri->get_white(FC(1,1)) - c_black[FC(1,1)]) * scale_mul[FC(1,1)] * clipLimit} + }; + buildClipMaskBayer(rawData, W, H, clipMask, whites); + const unsigned int fc[2] = {FC(0,0), FC(1,0)}; + if (sharpeningParams.autoRadius) { + radius = calcRadiusBayer(rawData, W, H, 1000.f, clipVal, fc); + } + } else if (ri->getSensorType() == ST_FUJI_XTRANS) { + float whites[6][6]; + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + const auto color = ri->XTRANSFC(i, j); + whites[i][j] = (ri->get_white(color) - c_black[color]) * scale_mul[color] * clipLimit; + } + } + buildClipMaskXtrans(rawData, W, H, clipMask, whites); + bool found = false; + int i, j; + for (i = 6; i < 12 && !found; ++i) { + for (j = 6; j < 12 && !found; ++j) { + if (ri->XTRANSFC(i, j) == 1) { + if (ri->XTRANSFC(i, j - 1) != ri->XTRANSFC(i, j + 1)) { + if (ri->XTRANSFC(i - 1, j) != 1) { + if (ri->XTRANSFC(i, j - 1) != 1) { + found = true; + break; + } + } + } + } + } + } + if (sharpeningParams.autoRadius) { + radius = calcRadiusXtrans(rawData, W, H, 1000.f, clipVal, i, j); + } + + } else if (ri->get_colors() == 1) { + buildClipMaskMono(rawData, W, H, clipMask, (ri->get_white(0) - c_black[0]) * scale_mul[0] * clipLimit); + if (sharpeningParams.autoRadius) { + const unsigned int fc[2] = {0, 0}; + radius = calcRadiusBayer(rawData, W, H, 1000.f, clipVal, fc); + } + } + if (showMask) { - StopWatch Stop1("Show mask"); array2D& L = blue; // blue will be overridden anyway => we can use its buffer to store L #ifdef _OPENMP #pragma omp parallel for @@ -119,8 +404,9 @@ BENCHFUN if (plistener) { plistener->setProgress(0.1); } + array2D& blend = red; // red will be overridden anyway => we can use its buffer to store the blend mask - buildBlendMask(L, blend, W, H, contrast, 1.f, sharpeningParams.autoContrast); + buildBlendMask(L, blend, W, H, contrast, 1.f, sharpeningParams.autoContrast, clipMask); if (plistener) { plistener->setProgress(0.2); } @@ -157,7 +443,6 @@ BENCHFUN array2D& YOld = YOldbuffer ? * YOldbuffer : green; array2D& YNew = YNewbuffer ? * YNewbuffer : blue; - StopWatch Stop1("rgb2YL"); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) #endif @@ -170,19 +455,17 @@ BENCHFUN } // calculate contrast based blend factors to reduce sharpening in regions with low contrast JaggedArray blend(W, H); - buildBlendMask(L, blend, W, H, contrast, 1.f, sharpeningParams.autoContrast); + buildBlendMask(L, blend, W, H, contrast, 1.f, sharpeningParams.autoContrast, clipMask); if (plistener) { plistener->setProgress(0.2); } conrastThreshold = contrast * 100.f; - Stop1.stop(); array2D& tmp = L; // L is not used anymore now => we can use its buffer as the needed temporary buffer - CaptureDeconvSharpening(YNew, tmp, blend, W, H, sharpeningParams.deconvradius, sharpeningParams.deconviter, plistener, 0.2, (0.9 - 0.2) / sharpeningParams.deconviter); + CaptureDeconvSharpening(YNew, tmp, blend, W, H, radius, sharpeningParams.deconviter, plistener, 0.2, (0.9 - 0.2) / sharpeningParams.deconviter); if (plistener) { plistener->setProgress(0.9); } - StopWatch Stop2("Y2RGB"); const float gamma = sharpeningParams.gamma; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) @@ -206,7 +489,7 @@ BENCHFUN blue[i][j] = blueVals[i][j] * factor; } } - Stop2.stop(); + delete Lbuffer; delete YOldbuffer; delete YNewbuffer; diff --git a/rtengine/imagesource.h b/rtengine/imagesource.h index c62dfe7d8..edc1102c4 100644 --- a/rtengine/imagesource.h +++ b/rtengine/imagesource.h @@ -182,7 +182,7 @@ public: return this; } virtual void getRawValues(int x, int y, int rotate, int &R, int &G, int &B) = 0; - virtual void captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold) = 0; + virtual void captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold, double &radius) = 0; }; } diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index c6268576a..8b9e49124 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -122,6 +122,7 @@ ImProcCoordinator::ImProcCoordinator() : bayerAutoContrastListener(nullptr), xtransAutoContrastListener(nullptr), pdSharpenAutoContrastListener(nullptr), + pdSharpenAutoRadiusListener(nullptr), frameCountListener(nullptr), imageTypeListener(nullptr), actListener(nullptr), @@ -346,10 +347,14 @@ void ImProcCoordinator::updatePreviewImage(int todo, bool panningRelatedChange) if ((todo & (M_RAW | M_CSHARP)) && params->pdsharpening.enabled) { double pdSharpencontrastThreshold = params->pdsharpening.contrast; - imgsrc->captureSharpening(params->pdsharpening, sharpMask, pdSharpencontrastThreshold); + double pdSharpenRadius = params->pdsharpening.deconvradius; + imgsrc->captureSharpening(params->pdsharpening, sharpMask, pdSharpencontrastThreshold, pdSharpenRadius); if (pdSharpenAutoContrastListener && params->pdsharpening.autoContrast) { pdSharpenAutoContrastListener->autoContrastChanged(pdSharpencontrastThreshold); } + if (pdSharpenAutoRadiusListener && params->pdsharpening.autoRadius) { + pdSharpenAutoRadiusListener->autoRadiusChanged(pdSharpenRadius); + } } diff --git a/rtengine/improccoordinator.h b/rtengine/improccoordinator.h index df4ec8134..379a3fb20 100644 --- a/rtengine/improccoordinator.h +++ b/rtengine/improccoordinator.h @@ -162,6 +162,7 @@ protected: AutoContrastListener *bayerAutoContrastListener; AutoContrastListener *xtransAutoContrastListener; AutoContrastListener *pdSharpenAutoContrastListener; + AutoRadiusListener *pdSharpenAutoRadiusListener; FrameCountListener *frameCountListener; ImageTypeListener *imageTypeListener; @@ -364,6 +365,11 @@ public: xtransAutoContrastListener = acl; } + void setpdSharpenAutoRadiusListener (AutoRadiusListener* acl) override + { + pdSharpenAutoRadiusListener = acl; + } + void setpdSharpenAutoContrastListener (AutoContrastListener* acl) override { pdSharpenAutoContrastListener = acl; diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc index 2fcc5d358..f88220c4e 100644 --- a/rtengine/procparams.cc +++ b/rtengine/procparams.cc @@ -1152,6 +1152,7 @@ bool SharpeningParams::operator !=(const SharpeningParams& other) const CaptureSharpeningParams::CaptureSharpeningParams() : enabled(false), autoContrast(true), + autoRadius(true), contrast(10.0), gamma(1.00), deconvradius(0.75), @@ -1166,6 +1167,7 @@ bool CaptureSharpeningParams::operator ==(const CaptureSharpeningParams& other) && contrast == other.contrast && gamma == other.gamma && autoContrast == other.autoContrast + && autoRadius == other.autoRadius && deconvradius == other.deconvradius && deconviter == other.deconviter; } @@ -3370,6 +3372,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo saveToKeyfile(!pedited || pedited->pdsharpening.enabled, "PostDemosaicSharpening", "Enabled", pdsharpening.enabled, keyFile); saveToKeyfile(!pedited || pedited->pdsharpening.contrast, "PostDemosaicSharpening", "Contrast", pdsharpening.contrast, keyFile); saveToKeyfile(!pedited || pedited->pdsharpening.autoContrast, "PostDemosaicSharpening", "AutoContrast", pdsharpening.autoContrast, keyFile); + saveToKeyfile(!pedited || pedited->pdsharpening.autoRadius, "PostDemosaicSharpening", "AutoRadius", pdsharpening.autoRadius, keyFile); saveToKeyfile(!pedited || pedited->pdsharpening.gamma, "PostDemosaicSharpening", "DeconvGamma", pdsharpening.gamma, keyFile); saveToKeyfile(!pedited || pedited->pdsharpening.deconvradius, "PostDemosaicSharpening", "DeconvRadius", pdsharpening.deconvradius, keyFile); saveToKeyfile(!pedited || pedited->pdsharpening.deconviter, "PostDemosaicSharpening", "DeconvIterations", pdsharpening.deconviter, keyFile); @@ -4458,6 +4461,7 @@ int ProcParams::load(const Glib::ustring& fname, ParamsEdited* pedited) assignFromKeyfile(keyFile, "PostDemosaicSharpening", "Enabled", pedited, pdsharpening.enabled, pedited->pdsharpening.enabled); assignFromKeyfile(keyFile, "PostDemosaicSharpening", "Contrast", pedited, pdsharpening.contrast, pedited->pdsharpening.contrast); assignFromKeyfile(keyFile, "PostDemosaicSharpening", "AutoContrast", pedited, pdsharpening.autoContrast, pedited->pdsharpening.autoContrast); + assignFromKeyfile(keyFile, "PostDemosaicSharpening", "AutoRadius", pedited, pdsharpening.autoRadius, pedited->pdsharpening.autoRadius); assignFromKeyfile(keyFile, "PostDemosaicSharpening", "DeconvGamma", pedited, pdsharpening.gamma, pedited->pdsharpening.gamma); assignFromKeyfile(keyFile, "PostDemosaicSharpening", "DeconvRadius", pedited, pdsharpening.deconvradius, pedited->pdsharpening.deconvradius); diff --git a/rtengine/procparams.h b/rtengine/procparams.h index e8f846a1d..ce03efc7d 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -545,6 +545,7 @@ struct SharpenMicroParams { struct CaptureSharpeningParams { bool enabled; bool autoContrast; + bool autoRadius; double contrast; double gamma; double deconvradius; diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index e20024f3c..e52adea18 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -311,7 +311,7 @@ protected: void hflip (Imagefloat* im); void vflip (Imagefloat* im); void getRawValues(int x, int y, int rotate, int &R, int &G, int &B) override; - void captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold) override; + void captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold, double &radius) override; }; } diff --git a/rtengine/refreshmap.cc b/rtengine/refreshmap.cc index db56500b8..6917d856e 100644 --- a/rtengine/refreshmap.cc +++ b/rtengine/refreshmap.cc @@ -521,7 +521,7 @@ int refreshmap[rtengine::NUMOFEVENTS] = { RGBCURVE, // EvRGBEnabled LUMINANCECURVE, // EvLEnabled DEMOSAIC, // EvPdShrEnabled - ALLNORAW // EvPdShrMaskToggled + CAPTURESHARPEN // EvPdShrMaskToggled }; diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index 9fdb59652..4afd8ac73 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -156,7 +156,7 @@ float calcContrastThreshold(const float* const * luminance, int tileY, int tileX } } - return c / 100.f; + return (c + 1) / 100.f; } } @@ -299,7 +299,7 @@ void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& maxOut = rtengine::LIM(maxOut, minVal, maxVal); } -void buildBlendMask(const float* const * luminance, float **blend, int W, int H, float &contrastThreshold, float amount, bool autoContrast) { +void buildBlendMask(const float* const * luminance, float **blend, int W, int H, float &contrastThreshold, float amount, bool autoContrast, float ** clipMask) { if (autoContrast) { constexpr float minLuminance = 2000.f; @@ -424,11 +424,20 @@ void buildBlendMask(const float* const * luminance, float **blend, int W, int H, for(int j = 2; j < H - 2; ++j) { int i = 2; #ifdef __SSE2__ - for(; i < W - 5; i += 4) { - vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + - SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; + if (clipMask) { + for(; i < W - 5; i += 4) { + vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + + SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; - STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv)); + STVFU(blend[j][i], LVFU(clipMask[j][i]) * amountv * calcBlendFactor(contrastv, contrastThresholdv)); + } + } else { + for(; i < W - 5; i += 4) { + vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + + SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; + + STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv)); + } } #endif for(; i < W - 2; ++i) { @@ -436,7 +445,7 @@ void buildBlendMask(const float* const * luminance, float **blend, int W, int H, float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) + rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; - blend[j][i] = amount * calcBlendFactor(contrast, contrastThreshold); + blend[j][i] = (clipMask ? clipMask[j][i] : 1.f) * amount * calcBlendFactor(contrast, contrastThreshold); } } diff --git a/rtengine/rt_algo.h b/rtengine/rt_algo.h index 0ea000a26..5485346c7 100644 --- a/rtengine/rt_algo.h +++ b/rtengine/rt_algo.h @@ -24,5 +24,5 @@ namespace rtengine { void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& minOut, float maxPrct, float& maxOut, bool multiThread = true); -void buildBlendMask(const float* const * luminance, float **blend, int W, int H, float &contrastThreshold, float amount = 1.f, bool autoContrast = false); +void buildBlendMask(const float* const * luminance, float **blend, int W, int H, float &contrastThreshold, float amount = 1.f, bool autoContrast = false, float ** clipmask = nullptr); } diff --git a/rtengine/rtengine.h b/rtengine/rtengine.h index 33cca1aba..c188622af 100644 --- a/rtengine/rtengine.h +++ b/rtengine/rtengine.h @@ -415,6 +415,13 @@ public : virtual void autoContrastChanged (double autoContrast) = 0; }; +class AutoRadiusListener +{ +public : + virtual ~AutoRadiusListener() = default; + virtual void autoRadiusChanged (double autoRadius) = 0; +}; + class WaveletListener { public: @@ -524,6 +531,7 @@ public: virtual void setBayerAutoContrastListener (AutoContrastListener* l) = 0; virtual void setXtransAutoContrastListener (AutoContrastListener* l) = 0; virtual void setpdSharpenAutoContrastListener (AutoContrastListener* l) = 0; + virtual void setpdSharpenAutoRadiusListener (AutoRadiusListener* l) = 0; virtual void setAutoBWListener (AutoBWListener* l) = 0; virtual void setAutoWBListener (AutoWBListener* l) = 0; virtual void setAutoColorTonListener (AutoColorTonListener* l) = 0; diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index 2022d2d15..6dbb0e649 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -222,7 +222,7 @@ private: imgsrc->demosaic (params.raw, autoContrast, contrastThreshold, params.pdsharpening.enabled && pl); if (params.pdsharpening.enabled) { - imgsrc->captureSharpening(params.pdsharpening, false, params.pdsharpening.contrast); + imgsrc->captureSharpening(params.pdsharpening, false, params.pdsharpening.contrast, params.pdsharpening.deconvradius); } diff --git a/rtengine/stdimagesource.h b/rtengine/stdimagesource.h index ef1294dea..0dffe2fd0 100644 --- a/rtengine/stdimagesource.h +++ b/rtengine/stdimagesource.h @@ -102,7 +102,7 @@ public: void getRawValues(int x, int y, int rotate, int &R, int &G, int &B) override { R = G = B = 0;} void flushRGB () override; - void captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold) override {}; + void captureSharpening(const procparams::CaptureSharpeningParams &sharpeningParams, bool showMask, double &conrastThreshold, double &radius) override {}; }; } #endif diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc index 84860329f..2ab5702ea 100644 --- a/rtgui/paramsedited.cc +++ b/rtgui/paramsedited.cc @@ -168,6 +168,7 @@ void ParamsEdited::set(bool v) pdsharpening.enabled = v; pdsharpening.contrast = v; pdsharpening.autoContrast = v; + pdsharpening.autoRadius = v; pdsharpening.gamma = v; pdsharpening.deconvradius = v; pdsharpening.deconviter = v; @@ -752,6 +753,7 @@ void ParamsEdited::initFrom(const std::vector& pdsharpening.enabled = pdsharpening.enabled && p.pdsharpening.enabled == other.pdsharpening.enabled; pdsharpening.contrast = pdsharpening.contrast && p.pdsharpening.contrast == other.pdsharpening.contrast; pdsharpening.autoContrast = pdsharpening.autoContrast && p.pdsharpening.autoContrast == other.pdsharpening.autoContrast; + pdsharpening.autoRadius = pdsharpening.autoRadius && p.pdsharpening.autoRadius == other.pdsharpening.autoRadius; pdsharpening.gamma = pdsharpening.gamma && p.pdsharpening.gamma == other.pdsharpening.gamma; pdsharpening.deconvradius = pdsharpening.deconvradius && p.pdsharpening.deconvradius == other.pdsharpening.deconvradius; pdsharpening.deconviter = pdsharpening.deconviter && p.pdsharpening.deconviter == other.pdsharpening.deconviter; @@ -1732,6 +1734,10 @@ void ParamsEdited::combine(rtengine::procparams::ProcParams& toEdit, const rteng toEdit.pdsharpening.autoContrast = mods.pdsharpening.autoContrast; } + if (pdsharpening.autoRadius) { + toEdit.pdsharpening.autoRadius = mods.pdsharpening.autoRadius; + } + if (pdsharpening.gamma) { toEdit.pdsharpening.gamma = dontforceSet && options.baBehav[ADDSET_SHARP_GAMMA] ? toEdit.pdsharpening.gamma + mods.pdsharpening.gamma : mods.pdsharpening.gamma; } @@ -3289,5 +3295,5 @@ bool FilmNegativeParamsEdited::isUnchanged() const bool CaptureSharpeningParamsEdited::isUnchanged() const { - return enabled && contrast && autoContrast && gamma && deconvradius && deconviter; + return enabled && contrast && autoContrast && autoRadius && gamma && deconvradius && deconviter; } \ No newline at end of file diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h index 860ffc551..1bd7170d4 100644 --- a/rtgui/paramsedited.h +++ b/rtgui/paramsedited.h @@ -202,6 +202,7 @@ struct CaptureSharpeningParamsEdited { bool enabled; bool contrast; bool autoContrast; + bool autoRadius; bool gamma; bool deconvradius; bool deconviter; diff --git a/rtgui/pdsharpening.cc b/rtgui/pdsharpening.cc index c85b42e95..ef0ad90c2 100644 --- a/rtgui/pdsharpening.cc +++ b/rtgui/pdsharpening.cc @@ -35,6 +35,7 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS EvPdShrDRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS"); EvPdShrDIterations = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_ITERATIONS"); EvPdShrAutoContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_CONTRAST"); + EvPdShrAutoRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_RADIUS"); Gtk::HBox* hb = Gtk::manage(new Gtk::HBox()); hb->show(); @@ -51,6 +52,8 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS Gtk::VBox* rld = Gtk::manage(new Gtk::VBox()); gamma = Gtk::manage(new Adjuster(M("TP_SHARPENING_GAMMA"), 0.5, 6.0, 0.05, 1.00)); dradius = Gtk::manage(new Adjuster(M("TP_SHARPENING_EDRADIUS"), 0.4, 1.15, 0.01, 0.75)); + dradius->addAutoButton(M("TP_PDSHARPENING_AUTORADIUS_TOOLTIP")); + dradius->setAutoValue(true); diter = Gtk::manage(new Adjuster(M("TP_SHARPENING_RLD_ITERATIONS"), 1, 100, 1, 20)); rld->pack_start(*gamma); rld->pack_start(*dradius); @@ -85,6 +88,7 @@ void PdSharpening::read(const ProcParams* pp, const ParamsEdited* pedited) if (pedited) { contrast->setEditedState(pedited->pdsharpening.contrast ? Edited : UnEdited); contrast->setAutoInconsistent(multiImage && !pedited->pdsharpening.autoContrast); + dradius->setAutoInconsistent(multiImage && !pedited->pdsharpening.autoRadius); gamma->setEditedState(pedited->pdsharpening.gamma ? Edited : UnEdited); dradius->setEditedState(pedited->pdsharpening.deconvradius ? Edited : UnEdited); diter->setEditedState(pedited->pdsharpening.deconviter ? Edited : UnEdited); @@ -98,8 +102,10 @@ void PdSharpening::read(const ProcParams* pp, const ParamsEdited* pedited) contrast->setAutoValue(pp->pdsharpening.autoContrast); gamma->setValue(pp->pdsharpening.gamma); dradius->setValue(pp->pdsharpening.deconvradius); + dradius->setAutoValue(pp->pdsharpening.autoRadius); diter->setValue(pp->pdsharpening.deconviter); lastAutoContrast = pp->pdsharpening.autoContrast; + lastAutoRadius = pp->pdsharpening.autoRadius; enableListener(); } @@ -112,6 +118,7 @@ void PdSharpening::write(ProcParams* pp, ParamsEdited* pedited) pp->pdsharpening.enabled = getEnabled(); pp->pdsharpening.gamma = gamma->getValue(); pp->pdsharpening.deconvradius = dradius->getValue(); + pp->pdsharpening.autoRadius = dradius->getAutoValue(); pp->pdsharpening.deconviter =(int)diter->getValue(); if (pedited) { @@ -119,6 +126,7 @@ void PdSharpening::write(ProcParams* pp, ParamsEdited* pedited) pedited->pdsharpening.autoContrast = !contrast->getAutoInconsistent(); pedited->pdsharpening.gamma = gamma->getEditedState(); pedited->pdsharpening.deconvradius = dradius->getEditedState(); + pedited->pdsharpening.autoRadius = !dradius->getAutoInconsistent(); pedited->pdsharpening.deconviter = diter->getEditedState(); pedited->pdsharpening.enabled = !get_inconsistent(); } @@ -224,26 +232,62 @@ void PdSharpening::autoContrastChanged(double autoContrast) ); } +void PdSharpening::autoRadiusChanged(double autoRadius) +{ + idle_register.add( + [this, autoRadius]() -> bool + { + disableListener(); + dradius->setValue(autoRadius); + enableListener(); + return false; + } + ); +} + void PdSharpening::adjusterAutoToggled(Adjuster* a, bool newval) { - if (multiImage) { - if (contrast->getAutoInconsistent()) { - contrast->setAutoInconsistent(false); - contrast->setAutoValue(false); - } else if (lastAutoContrast) { - contrast->setAutoInconsistent(true); + if (a == contrast) { + if (multiImage) { + if (contrast->getAutoInconsistent()) { + contrast->setAutoInconsistent(false); + contrast->setAutoValue(false); + } else if (lastAutoContrast) { + contrast->setAutoInconsistent(true); + } + + lastAutoContrast = contrast->getAutoValue(); } - lastAutoContrast = contrast->getAutoValue(); - } + if (listener) { + if (contrast->getAutoInconsistent()) { + listener->panelChanged(EvPdShrAutoContrast, M("GENERAL_UNCHANGED")); + } else if (contrast->getAutoValue()) { + listener->panelChanged(EvPdShrAutoContrast, M("GENERAL_ENABLED")); + } else { + listener->panelChanged(EvPdShrAutoContrast, M("GENERAL_DISABLED")); + } + } + } else { // must be dradius + if (multiImage) { + if (dradius->getAutoInconsistent()) { + dradius->setAutoInconsistent(false); + dradius->setAutoValue(false); + } else if (lastAutoRadius) { + dradius->setAutoInconsistent(true); + } - if (listener) { - if (contrast->getAutoInconsistent()) { - listener->panelChanged(EvPdShrAutoContrast, M("GENERAL_UNCHANGED")); - } else if (contrast->getAutoValue()) { - listener->panelChanged(EvPdShrAutoContrast, M("GENERAL_ENABLED")); - } else { - listener->panelChanged(EvPdShrAutoContrast, M("GENERAL_DISABLED")); + lastAutoRadius = dradius->getAutoValue(); + } + + if (listener) { + if (dradius->getAutoInconsistent()) { + listener->panelChanged(EvPdShrAutoRadius, M("GENERAL_UNCHANGED")); + } else if (dradius->getAutoValue()) { + listener->panelChanged(EvPdShrAutoRadius, M("GENERAL_ENABLED")); + } else { + listener->panelChanged(EvPdShrAutoRadius, M("GENERAL_DISABLED")); + } } } } diff --git a/rtgui/pdsharpening.h b/rtgui/pdsharpening.h index b2f7b6e57..e56b4b085 100644 --- a/rtgui/pdsharpening.h +++ b/rtgui/pdsharpening.h @@ -21,7 +21,7 @@ #include "adjuster.h" #include "toolpanel.h" -class PdSharpening final : public ToolParamBlock, public AdjusterListener, public FoldableToolPanel, public rtengine::AutoContrastListener +class PdSharpening final : public ToolParamBlock, public AdjusterListener, public FoldableToolPanel, public rtengine::AutoContrastListener, public rtengine::AutoRadiusListener { protected: @@ -31,11 +31,13 @@ protected: Adjuster* diter; bool lastAutoContrast; + bool lastAutoRadius; rtengine::ProcEvent EvPdShrContrast; rtengine::ProcEvent EvPdShrDRadius; rtengine::ProcEvent EvPdSharpenGamma; rtengine::ProcEvent EvPdShrDIterations; rtengine::ProcEvent EvPdShrAutoContrast; + rtengine::ProcEvent EvPdShrAutoRadius; IdleRegister idle_register; public: @@ -53,6 +55,7 @@ public: void enabledChanged () override; void autoContrastChanged (double autoContrast) override; + void autoRadiusChanged (double autoRadius) override; void setAdjusterBehavior (bool contrastadd, bool gammaadd, bool radiusadd, bool iteradds); void trimValues (rtengine::procparams::ProcParams* pp) override; diff --git a/rtgui/toolpanelcoord.cc b/rtgui/toolpanelcoord.cc index a664942a1..4fec4a18a 100644 --- a/rtgui/toolpanelcoord.cc +++ b/rtgui/toolpanelcoord.cc @@ -573,6 +573,7 @@ void ToolPanelCoordinator::initImage (rtengine::StagedImageProcessor* ipc_, bool ipc->setBayerAutoContrastListener (bayerprocess); ipc->setXtransAutoContrastListener (xtransprocess); ipc->setpdSharpenAutoContrastListener (pdSharpening); + ipc->setpdSharpenAutoRadiusListener (pdSharpening); ipc->setAutoWBListener (whitebalance); ipc->setAutoColorTonListener (colortoning); ipc->setAutoChromaListener (dirpyrdenoise); diff --git a/rtgui/toolpanelcoord.h b/rtgui/toolpanelcoord.h index 390967395..3a4ddc05f 100644 --- a/rtgui/toolpanelcoord.h +++ b/rtgui/toolpanelcoord.h @@ -242,7 +242,6 @@ public: void imageTypeChanged (bool isRaw, bool isBayer, bool isXtrans, bool isMono = false) override; -// void autoContrastChanged (double autoContrast); // profilechangelistener interface void profileChange( const rtengine::procparams::PartialProfile* nparams, From af10bf8c7c8f06c5fc2effecda4533937016ee9a Mon Sep 17 00:00:00 2001 From: "luz.paz" Date: Wed, 11 Sep 2019 17:38:55 -0400 Subject: [PATCH 07/15] Fix misc. source comment typo Found via `codespell -q 3 -I ../rawtherapy-whitelist.txt -S ./rtdata/languages -L bord,hist,fo,reall,bloc,alph,dof,thre,makro,chang,currentry,portugues,vektor,ue` --- rtengine/demosaic_algos.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index 49b386b0a..51db8bb3f 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -2024,7 +2024,8 @@ void RawImageSource::refinement(int PassCount) // Refinement based on EECI demozaicing algorithm by L. Chang and Y.P. Tan // from "Lassus" : Luis Sanz Rodriguez, adapted by Jacques Desmis - JDC - and Oliver Duis for RawTherapee -// increases the signal to noise ratio (PSNR) # +1 to +2 dB : tested with Dcraw : eg: Lighthouse + AMaZE : whitout refinement:39.96dB, with refinement:41.86 dB +// increases the signal to noise ratio (PSNR) # +1 to +2 dB : tested with Dcraw : +// eg: Lighthouse + AMaZE : without refinement:39.96 dB, with refinement:41.86 dB // reduce color artifacts, improves the interpolation // but it's relatively slow // From 1e75f38dba6b95d380f1d97fb9ec61748a139ff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Thu, 12 Sep 2019 14:49:51 +0200 Subject: [PATCH 08/15] Softlight cleanups - More `const` - Removed `using namespace` - Whitespace cleanups --- rtengine/ipsoftlight.cc | 57 ++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/rtengine/ipsoftlight.cc b/rtengine/ipsoftlight.cc index 1d94a29c2..556790eb4 100644 --- a/rtengine/ipsoftlight.cc +++ b/rtengine/ipsoftlight.cc @@ -20,17 +20,17 @@ */ #include "improcfun.h" + #include "procparams.h" #define BENCHMARK #include "StopWatch.h" -namespace rtengine { namespace { inline float sl(float blend, float x) { - if (!OOG(x)) { - float v = Color::gamma_srgb(x) / MAXVALF; + if (!rtengine::OOG(x)) { + float v = rtengine::Color::gamma_srgb(x) / rtengine::MAXVALF; // using Pegtop's formula from // https://en.wikipedia.org/wiki/Blend_modes#Soft_Light // const float orig = 1.f - blend; @@ -40,7 +40,7 @@ inline float sl(float blend, float x) // return blend * Color::igamma_srgb(v * MAXVALF) + orig * x; // using optimized formula (heckflosse67@gmx.de) - return intp(blend, Color::igamma_srgb(v * v * (3.f - 2.f * v) * MAXVALF), x); + return rtengine::intp(blend, rtengine::Color::igamma_srgb(v * v * (3.f - 2.f * v) * rtengine::MAXVALF), x); } return x; } @@ -48,51 +48,46 @@ inline float sl(float blend, float x) #ifdef __SSE2__ inline vfloat sl(vfloat blend, vfloat x) { - const vfloat v = Color::gammatab_srgb[x] / F2V(MAXVALF); - return vself(vmaskf_gt(x, F2V(MAXVALF)), x, vself(vmaskf_lt(x, ZEROV), x, vintpf(blend, Color::igammatab_srgb[v * v * (F2V(3.f) - (v + v)) * MAXVALF], x))); + const vfloat v = rtengine::Color::gammatab_srgb[x] / F2V(rtengine::MAXVALF); + return vself(vmaskf_gt(x, F2V(rtengine::MAXVALF)), x, vself(vmaskf_lt(x, ZEROV), x, vintpf(blend, rtengine::Color::igammatab_srgb[v * v * (F2V(3.f) - (v + v)) * rtengine::MAXVALF], x))); } #endif } // namespace - -void ImProcFunctions::softLight(LabImage *lab) +void rtengine::ImProcFunctions::softLight(LabImage *lab) { if (!params->softlight.enabled || !params->softlight.strength) { return; } BENCHFUN - TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); + const TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); const float wp[3][3] = { - {static_cast (wprof[0][0]), static_cast (wprof[0][1]), static_cast (wprof[0][2])}, - {static_cast (wprof[1][0]), static_cast (wprof[1][1]), static_cast (wprof[1][2])}, - {static_cast (wprof[2][0]), static_cast (wprof[2][1]), static_cast (wprof[2][2])} + {static_cast(wprof[0][0]), static_cast(wprof[0][1]), static_cast(wprof[0][2])}, + {static_cast(wprof[1][0]), static_cast(wprof[1][1]), static_cast(wprof[1][2])}, + {static_cast(wprof[2][0]), static_cast(wprof[2][1]), static_cast(wprof[2][2])} }; - TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix(params->icm.workingProfile); + const TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix(params->icm.workingProfile); const float wip[3][3] = { - {static_cast (wiprof[0][0]), static_cast (wiprof[0][1]), static_cast (wiprof[0][2])}, - {static_cast (wiprof[1][0]), static_cast (wiprof[1][1]), static_cast (wiprof[1][2])}, - {static_cast (wiprof[2][0]), static_cast (wiprof[2][1]), static_cast (wiprof[2][2])} + {static_cast(wiprof[0][0]), static_cast(wiprof[0][1]), static_cast(wiprof[0][2])}, + {static_cast(wiprof[1][0]), static_cast(wiprof[1][1]), static_cast(wiprof[1][2])}, + {static_cast(wiprof[2][0]), static_cast(wiprof[2][1]), static_cast(wiprof[2][2])} }; #ifdef __SSE2__ - vfloat wipv[3][3]; + const vfloat wpv[3][3] = { + {F2V(wprof[0][0]), F2V(wprof[0][1]), F2V(wprof[0][2])}, + {F2V(wprof[1][0]), F2V(wprof[1][1]), F2V(wprof[1][2])}, + {F2V(wprof[2][0]), F2V(wprof[2][1]), F2V(wprof[2][2])} + }; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - wipv[i][j] = F2V(wiprof[i][j]); - } - } - - vfloat wpv[3][3]; - - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - wpv[i][j] = F2V(wprof[i][j]); - } - } + const vfloat wipv[3][3] = { + {F2V(wiprof[0][0]), F2V(wiprof[0][1]), F2V(wiprof[0][2])}, + {F2V(wiprof[1][0]), F2V(wiprof[1][1]), F2V(wiprof[1][2])}, + {F2V(wiprof[2][0]), F2V(wiprof[2][1]), F2V(wiprof[2][2])} + }; #endif #ifdef _OPENMP @@ -137,5 +132,3 @@ void ImProcFunctions::softLight(LabImage *lab) } } } - -} // namespace rtengine From 62ec0f0fd295d6e86437953bc3254a3d32b5339e Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 14 Sep 2019 14:43:32 +0200 Subject: [PATCH 09/15] Capture sharpening: tiled deconvolution --- rtengine/capturesharpening.cc | 381 ++++++++++++++++++++++++++++++---- rtengine/rt_algo.cc | 2 +- 2 files changed, 346 insertions(+), 37 deletions(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index e741016db..ef35695f3 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -28,7 +28,7 @@ #include "color.h" #include "gauss.h" #include "rt_algo.h" -#define BENCHMARK +//#define BENCHMARK #include "StopWatch.h" #ifdef _OPENMP #include @@ -38,6 +38,257 @@ namespace { +void compute7x7kernel(float sigma, float kernel[7][7]) { + + const double temp = -2.f * rtengine::SQR(sigma); + float sum = 0.f; + for (int i = -3; i <= 3; ++i) { + for (int j = -3; j <= 3; ++j) { + if((rtengine::SQR(i) + rtengine::SQR(j)) <= rtengine::SQR(3.0 * 1.15)) { + kernel[i + 3][j + 3] = std::exp((rtengine::SQR(i) + rtengine::SQR(j)) / temp); + sum += kernel[i + 3][j + 3]; + } else { + kernel[i + 3][j + 3] = 0.f; + } + } + } + + for (int i = 0; i < 7; ++i) { + for (int j = 0; j < 7; ++j) { + kernel[i][j] /= sum; + } + } +} + +void compute5x5kernel(float sigma, float kernel[5][5]) { + + const double temp = -2.f * rtengine::SQR(sigma); + float sum = 0.f; + for (int i = -2; i <= 2; ++i) { + for (int j = -2; j <= 2; ++j) { + if((rtengine::SQR(i) + rtengine::SQR(j)) <= rtengine::SQR(3.0 * 0.84)) { + kernel[i + 2][j + 2] = std::exp((rtengine::SQR(i) + rtengine::SQR(j)) / temp); + sum += kernel[i + 2][j + 2]; + } else { + kernel[i + 2][j + 2] = 0.f; + } + } + } + + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + kernel[i][j] /= sum; + } + } +} + +void compute3x3kernel(float sigma, float kernel[3][3]) { + + const double temp = -2.f * rtengine::SQR(sigma); + float sum = 0.f; + for (int i = -1; i <= 1; ++i) { + for (int j = -1; j <= 1; ++j) { + if((rtengine::SQR(i) + rtengine::SQR(j)) <= rtengine::SQR(3.0 * 0.84)) { + kernel[i + 1][j + 1] = std::exp((rtengine::SQR(i) + rtengine::SQR(j)) / temp); + sum += kernel[i + 1][j + 1]; + } else { + kernel[i + 1][j + 1] = 0.f; + } + } + } + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + kernel[i][j] /= sum; + } + } +} + +inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int W, const int H, const float kernel[3][3]) +{ + + const float c11 = kernel[0][0]; + const float c10 = kernel[0][1]; + const float c00 = kernel[1][1]; + + for (int i = 1; i < H - 1; i++) { + dst[i][0] = 1.f; + for (int j = 1; j < W - 1; j++) { + const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + + c00 * src[i][j]; + dst[i][j] = divBuffer[i][j] / std::max(val, 0.00001f); + } + dst[i][W - 1] = 1.f; + } + // first and last rows + { + for (int i = 0; i < 1; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; + } + } + for (int i = H - 1 ; i < H; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; + } + } + } +} + +inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int W, const int H, const float kernel[5][5]) +{ + + const float c21 = kernel[0][1]; + const float c20 = kernel[0][2]; + const float c11 = kernel[1][1]; + const float c10 = kernel[1][2]; + const float c00 = kernel[2][2]; + + for (int i = 2; i < H - 2; ++i) { + dst[i][0] = dst[i][1] = 1.f; + // I tried hand written SSE code but gcc vectorizes better + for (int j = 2; j < W - 2; ++j) { + const float val = c21 * (src[i - 2][j - 1] + src[i - 2][j + 1] + src[i - 1][j - 2] + src[i - 1][j + 2] + src[i + 1][j - 2] + src[i + 1][j + 2] + src[i + 2][j - 1] + src[i + 2][j + 1]) + + c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) + + c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + + c00 * src[i][j]; + + dst[i][j] = divBuffer[i][j] / std::max(val, 0.00001f); + } + dst[i][W - 2] = dst[i][W - 1] = 1.f; + } + + // first and last rows + { + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; + } + } + for (int i = H - 2 ; i < H; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; + } + } + } +} + +inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** RESTRICT divBuffer, const int W, const int H, const float kernel[7][7]) +{ + + const float c31 = kernel[0][2]; + const float c30 = kernel[0][3]; + const float c22 = kernel[1][1]; + const float c21 = kernel[1][2]; + const float c20 = kernel[1][3]; + const float c11 = kernel[2][2]; + const float c10 = kernel[2][3]; + const float c00 = kernel[3][3]; + + for (int i = 3; i < H - 3; ++i) { + dst[i][0] = dst[i][1] = dst[i][2] = 1.f; + // I tried hand written SSE code but gcc vectorizes better + for (int j = 3; j < W - 3; ++j) { + const float val = c31 * (src[i - 3][j - 1] + src[i - 3][j + 1] + src[i - 1][j - 3] + src[i - 1][j + 3] + src[i + 1][j - 3] + src[i + 1][j + 3] + src[i + 3][j - 1] + src[i + 3][j + 1]) + + c30 * (src[i - 3][j] + src[i][j - 3] + src[i][j + 3] + src[i + 3][j]) + + c22 * (src[i - 2][j - 2] + src[i - 2][j + 2] + src[i + 2][j - 2] + src[i + 2][j + 2]) + + c21 * (src[i - 2][j - 1] + src[i - 2][j + 1] * c21 + src[i - 1][j - 2] + src[i - 1][j + 2] + src[i + 1][j - 2] + src[i + 1][j + 2] + src[i + 2][j - 1] + src[i + 2][j + 1]) + + c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) + + c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + + c00 * src[i][j]; + + dst[i][j] = divBuffer[i][j] / std::max(val, 0.00001f); + } + dst[i][W - 3] = dst[i][W - 2] = dst[i][W - 1] = 1.f; + } + + // first and last rows + { + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; + } + } + for (int i = H - 3 ; i < H; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; + } + } + } +} + +inline void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int W, const int H, const float kernel[3][3]) +{ + const float c11 = kernel[0][0]; + const float c10 = kernel[0][1]; + const float c00 = kernel[1][1]; + + for (int i = 1; i < H - 1; i++) { + for (int j = 1; j < W - 1; j++) { + const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + + c00 * src[i][j]; + dst[i][j] *= val; + } + } + +} + +inline void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int W, const int H, const float kernel[5][5]) +{ + + const float c21 = kernel[0][1]; + const float c20 = kernel[0][2]; + const float c11 = kernel[1][1]; + const float c10 = kernel[1][2]; + const float c00 = kernel[2][2]; + + for (int i = 2; i < H - 2; ++i) { + // I tried hand written SSE code but gcc vectorizes better + for (int j = 2; j < W - 2; ++j) { + const float val = c21 * (src[i - 2][j - 1] + src[i - 2][j + 1] + src[i - 1][j - 2] + src[i - 1][j + 2] + src[i + 1][j - 2] + src[i + 1][j + 2] + src[i + 2][j - 1] + src[i + 2][j + 1]) + + c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) + + c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + + c00 * src[i][j]; + + dst[i][j] *= val; + } + } +} + +inline void gauss7x7mult(float** RESTRICT src, float** RESTRICT dst, const int W, const int H, const float kernel[7][7]) +{ + + const float c31 = kernel[0][2]; + const float c30 = kernel[0][3]; + const float c22 = kernel[1][1]; + const float c21 = kernel[1][2]; + const float c20 = kernel[1][3]; + const float c11 = kernel[2][2]; + const float c10 = kernel[2][3]; + const float c00 = kernel[3][3]; + + for (int i = 3; i < H - 3; ++i) { + // I tried hand written SSE code but gcc vectorizes better + for (int j = 3; j < W - 3; ++j) { + const float val = c31 * (src[i - 3][j - 1] + src[i - 3][j + 1] + src[i - 1][j - 3] + src[i - 1][j + 3] + src[i + 1][j - 3] + src[i + 1][j + 3] + src[i + 3][j - 1] + src[i + 3][j + 1]) + + c30 * (src[i - 3][j] + src[i][j - 3] + src[i][j + 3] + src[i + 3][j]) + + c22 * (src[i - 2][j - 2] + src[i - 2][j + 2] + src[i + 2][j - 2] + src[i + 2][j + 2]) + + c21 * (src[i - 2][j - 1] + src[i - 2][j + 1] * c21 + src[i - 1][j - 2] + src[i - 1][j + 2] + src[i + 1][j - 2] + src[i + 1][j + 2] + src[i + 2][j - 1] + src[i + 2][j + 1]) + + c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) + + c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + + c00 * src[i][j]; + + dst[i][j] *= val; + } + } +} + void buildClipMaskBayer(const float * const *rawData, int W, int H, float** clipMask, const float whites[2][2]) { @@ -274,47 +525,106 @@ float calcRadiusXtrans(const float * const *rawData, int W, int H, float lowerLi } return std::sqrt((1.f / (std::log(1.f / maxRatio))) / -2.f); } -void CaptureDeconvSharpening (float** luminance, float** tmp, const float * const * blend, int W, int H, double sigma, int iterations, rtengine::ProgressListener* plistener, double start, double step) +void CaptureDeconvSharpening (float** luminance, float** oldLuminance, const float * const * blend, int W, int H, double sigma, int iterations, rtengine::ProgressListener* plistener, double startVal, double endVal) { +BENCHFUN + const bool is5x5 = (sigma <= 0.84); + const bool is3x3 = (sigma < 0.6); + float kernel7[7][7]; + float kernel5[5][5]; + float kernel3[3][3]; + if (is3x3) { + compute3x3kernel(sigma, kernel3); + } else if (is5x5) { + compute5x5kernel(sigma, kernel5); + } else { + compute7x7kernel(sigma, kernel7); + } - rtengine::JaggedArray tmpI(W, H); + constexpr int tileSize = 194; + constexpr int border = 3; + constexpr int fullTileSize = tileSize + 2 * border; + double progress = startVal; + const double progressStep = (endVal - startVal) * rtengine::SQR(tileSize) / (W * H); #ifdef _OPENMP #pragma omp parallel #endif { + int progresscounter = 0; + rtengine::JaggedArray tmpIThr(fullTileSize, fullTileSize); + rtengine::JaggedArray tmpThr(fullTileSize, fullTileSize); + rtengine::JaggedArray lumThr(fullTileSize, fullTileSize); +#pragma omp for schedule(dynamic,2) collapse(2) + for (int i = border; i < H - border; i+= tileSize) { + for(int j = border; j < W - border; j+= tileSize) { + const bool endOfCol = (i + tileSize + border) >= H; + const bool endOfRow = (j + tileSize + border) >= W; + // fill tiles + if (endOfRow || endOfCol) { + // special handling for small tiles at end of row or column + for (int k = 0, ii = endOfCol ? H - fullTileSize : i; k < fullTileSize; ++k, ++ii) { + for (int l = 0, jj = endOfRow ? W - fullTileSize : j; l < fullTileSize; ++l, ++jj) { + tmpIThr[k][l] = oldLuminance[ii - border][jj - border]; + lumThr[k][l] = oldLuminance[ii - border][jj - border]; + } + } + } else { + for (int ii = i; ii < i + fullTileSize; ++ii) { + for (int jj = j; jj < j + fullTileSize; ++jj) { + tmpIThr[ii - i][jj - j] = oldLuminance[ii - border][jj - border]; + lumThr[ii - i][jj - j] = oldLuminance[ii - border][jj - border]; + } + } + } + if (is3x3) { + for (int k = 0; k < iterations; ++k) { + // apply 3x3 gaussian blur and divide luminance by result of gaussian blur + gauss3x3div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, kernel3); + gauss3x3mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, kernel3); + } + } else if (is5x5) { + for (int k = 0; k < iterations; ++k) { + // apply 5x5 gaussian blur and divide luminance by result of gaussian blur + gauss5x5div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, kernel5); + gauss5x5mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, kernel5); + } + } else { + for (int k = 0; k < iterations; ++k) { + // apply 7x7 gaussian blur and divide luminance by result of gaussian blur + gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, kernel7); + gauss7x7mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, kernel7); + } + } + if (endOfRow || endOfCol) { + // special handling for small tiles at end of row or column + for (int k = border, ii = endOfCol ? H - fullTileSize - border : i - border; k < fullTileSize - border; ++k) { + for (int l = border, jj = endOfRow ? W - fullTileSize - border : j - border; l < fullTileSize - border; ++l) { + luminance[ii + k][jj + l] = rtengine::intp(blend[ii + k][jj + l], max(tmpIThr[k][l], 0.0f), luminance[ii + k][jj + l]); + } + } + } else { + for (int ii = border; ii < fullTileSize - border; ++ii) { + for (int jj = border; jj < fullTileSize - border; ++jj) { + luminance[i + ii - border][j + jj - border] = rtengine::intp(blend[i + ii - border][j + jj - border], max(tmpIThr[ii][jj], 0.0f), luminance[i + ii - border][j + jj - border]); + } + } + } + if (plistener) { + if (++progresscounter % 16 == 0) { #ifdef _OPENMP - #pragma omp for + #pragma omp critical(csprogress) #endif - for (int i = 0; i < H; i++) { - for(int j = 0; j < W; j++) { - tmpI[i][j] = max(luminance[i][j], 0.f); + { + progress += 16.0 * progressStep; + progress = rtengine::min(progress, endVal); + plistener->setProgress(progress); + } + } + } } } - - for (int k = 0; k < iterations; k++) { - // apply gaussian blur and divide luminance by result of gaussian blur - gaussianBlur(tmpI, tmp, W, H, sigma, nullptr, GAUSS_DIV, luminance); - gaussianBlur(tmp, tmpI, W, H, sigma, nullptr, GAUSS_MULT); - if (plistener) { -#ifdef _OPENMP - #pragma omp single -#endif - start += step; - plistener->setProgress(start); - } - } // end for - -#ifdef _OPENMP - #pragma omp for -#endif - - for (int i = 0; i < H; ++i) { - for (int j = 0; j < W; ++j) { - luminance[i][j] = rtengine::intp(blend[i][j], max(tmpI[i][j], 0.0f), luminance[i][j]); - } - } - } // end parallel + } } } @@ -328,7 +638,7 @@ void RawImageSource::captureSharpening(const procparams::CaptureSharpeningParams plistener->setProgressStr(M("TP_PDSHARPENING_LABEL")); plistener->setProgress(0.0); } - +BENCHFUN const float xyz_rgb[3][3] = { // XYZ from RGB { 0.412453, 0.357580, 0.180423 }, { 0.212671, 0.715160, 0.072169 }, @@ -442,13 +752,14 @@ void RawImageSource::captureSharpening(const procparams::CaptureSharpeningParams array2D& L = Lbuffer ? *Lbuffer : red; array2D& YOld = YOldbuffer ? * YOldbuffer : green; array2D& YNew = YNewbuffer ? * YNewbuffer : blue; + const float gamma = sharpeningParams.gamma; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) #endif for (int i = 0; i < H; ++i) { Color::RGB2L(redVals[i], greenVals[i], blueVals[i], L[i], xyz_rgb, W); - Color::RGB2Y(redVals[i], greenVals[i], blueVals[i], YOld[i], YNew[i], sharpeningParams.gamma, W); + Color::RGB2Y(redVals[i], greenVals[i], blueVals[i], YOld[i], YNew[i], gamma, W); } if (plistener) { plistener->setProgress(0.1); @@ -461,12 +772,10 @@ void RawImageSource::captureSharpening(const procparams::CaptureSharpeningParams } conrastThreshold = contrast * 100.f; - array2D& tmp = L; // L is not used anymore now => we can use its buffer as the needed temporary buffer - CaptureDeconvSharpening(YNew, tmp, blend, W, H, radius, sharpeningParams.deconviter, plistener, 0.2, (0.9 - 0.2) / sharpeningParams.deconviter); + CaptureDeconvSharpening(YNew, YOld, blend, W, H, radius, sharpeningParams.deconviter, plistener, 0.2, 0.9); if (plistener) { plistener->setProgress(0.9); } - const float gamma = sharpeningParams.gamma; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) #endif diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index 4afd8ac73..dd7783c1d 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -394,7 +394,7 @@ void buildBlendMask(const float* const * luminance, float **blend, int W, int H, } } - contrastThreshold = minvar <= 4.f ? calcContrastThreshold(luminance, topLeftYStart + minI, topLeftXStart + minJ, tilesize) : 0.f; + contrastThreshold = minvar <= 8.f ? calcContrastThreshold(luminance, topLeftYStart + minI, topLeftXStart + minJ, tilesize) : 0.f; } } } From aadeb539a92b1729fa8a8a2d7b4a0b2d71682dfd Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 14 Sep 2019 17:51:28 +0200 Subject: [PATCH 10/15] Capture sharpening: small speedup, also reduced memory usage by width * height * 4 byte --- rtengine/capturesharpening.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index ef35695f3..4bfe91aba 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -765,7 +765,7 @@ BENCHFUN plistener->setProgress(0.1); } // calculate contrast based blend factors to reduce sharpening in regions with low contrast - JaggedArray blend(W, H); + array2D& blend = clipMask; // we can share blend and clipMask buffer here buildBlendMask(L, blend, W, H, contrast, 1.f, sharpeningParams.autoContrast, clipMask); if (plistener) { plistener->setProgress(0.2); From 51daaf5fa1e4e75a3e225dd4b3f98ab8cfa64c89 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 14 Sep 2019 19:35:19 +0200 Subject: [PATCH 11/15] Capture Sharpening: some code cleanups --- rtengine/capturesharpening.cc | 58 ++++++++++++++--------------------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index 4bfe91aba..8d9ee5b64 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -19,7 +19,6 @@ #include #include -#include "jaggedarray.h" #include "rtengine.h" #include "rawimagesource.h" #include "rt_math.h" @@ -121,18 +120,12 @@ inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RES } dst[i][W - 1] = 1.f; } - // first and last rows - { - for (int i = 0; i < 1; ++i) { - for (int j = 0; j < W; ++j) { - dst[i][j] = 1.f; - } - } - for (int i = H - 1 ; i < H; ++i) { - for (int j = 0; j < W; ++j) { - dst[i][j] = 1.f; - } - } + // first and last row + for (int j = 0; j < W; ++j) { + dst[0][j] = 1.f; + } + for (int j = 0; j < W; ++j) { + dst[H - 1][j] = 1.f; } } @@ -161,16 +154,14 @@ inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RES } // first and last rows - { - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < W; ++j) { - dst[i][j] = 1.f; - } + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; } - for (int i = H - 2 ; i < H; ++i) { - for (int j = 0; j < W; ++j) { - dst[i][j] = 1.f; - } + } + for (int i = H - 2 ; i < H; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; } } } @@ -206,16 +197,14 @@ inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** REST } // first and last rows - { - for (int i = 0; i < 3; ++i) { - for (int j = 0; j < W; ++j) { - dst[i][j] = 1.f; - } + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; } - for (int i = H - 3 ; i < H; ++i) { - for (int j = 0; j < W; ++j) { - dst[i][j] = 1.f; - } + } + for (int i = H - 3 ; i < H; ++i) { + for (int j = 0; j < W; ++j) { + dst[i][j] = 1.f; } } } @@ -300,7 +289,6 @@ void buildClipMaskBayer(const float * const *rawData, int W, int H, float** clip clipMask[row][col] = 1.f; } } - #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) #endif @@ -552,9 +540,9 @@ BENCHFUN #endif { int progresscounter = 0; - rtengine::JaggedArray tmpIThr(fullTileSize, fullTileSize); - rtengine::JaggedArray tmpThr(fullTileSize, fullTileSize); - rtengine::JaggedArray lumThr(fullTileSize, fullTileSize); + array2D tmpIThr(fullTileSize, fullTileSize); + array2D tmpThr(fullTileSize, fullTileSize); + array2D lumThr(fullTileSize, fullTileSize); #pragma omp for schedule(dynamic,2) collapse(2) for (int i = border; i < H - border; i+= tileSize) { for(int j = border; j < W - border; j+= tileSize) { From 19e01124952069794dbfc980e4e0a315566fdd83 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 15 Sep 2019 12:55:20 +0200 Subject: [PATCH 12/15] xtrans: border artifacts when using 1-pass Markesteijn, fixes #5452 --- rtengine/xtrans_demosaic.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/xtrans_demosaic.cc b/rtengine/xtrans_demosaic.cc index e84b5217e..9a3b341cc 100644 --- a/rtengine/xtrans_demosaic.cc +++ b/rtengine/xtrans_demosaic.cc @@ -959,7 +959,7 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab, free(buffer); } - xtransborder_interpolate(8, red, green, blue); + xtransborder_interpolate(passes > 1 ? 8 : 11, red, green, blue); } #undef CLIP void RawImageSource::fast_xtrans_interpolate (const array2D &rawData, array2D &red, array2D &green, array2D &blue) From a06c2714e27a7415554fc036c7749bc209f33f2a Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 15 Sep 2019 16:29:29 +0200 Subject: [PATCH 13/15] Update capturesharpening.cc Fix artifacts at radius > 0.85 --- rtengine/capturesharpening.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index 8d9ee5b64..df2b1d908 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -530,7 +530,7 @@ BENCHFUN } constexpr int tileSize = 194; - constexpr int border = 3; + constexpr int border = 5; constexpr int fullTileSize = tileSize + 2 * border; double progress = startVal; From f55afb91c306c3449f579bff0f1f6cfdcde8c831 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 15 Sep 2019 22:30:03 +0200 Subject: [PATCH 14/15] Soft Light: remove benchmark code, closes #5447 --- rtengine/ipsoftlight.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/rtengine/ipsoftlight.cc b/rtengine/ipsoftlight.cc index 556790eb4..cd49e858f 100644 --- a/rtengine/ipsoftlight.cc +++ b/rtengine/ipsoftlight.cc @@ -22,8 +22,6 @@ #include "improcfun.h" #include "procparams.h" -#define BENCHMARK -#include "StopWatch.h" namespace { @@ -60,7 +58,6 @@ void rtengine::ImProcFunctions::softLight(LabImage *lab) if (!params->softlight.enabled || !params->softlight.strength) { return; } - BENCHFUN const TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); const float wp[3][3] = { From 796e8f02896c0506dd9c403572adabf80187e03b Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 16 Sep 2019 17:17:49 +0200 Subject: [PATCH 15/15] Removed timing code --- rtengine/iptransform.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 1e739dac6..1a38d8e2f 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -31,9 +31,6 @@ #include "rtlensfun.h" #include "sleef.c" -#define BENCHMARK -#include "StopWatch.h" - using namespace std; namespace @@ -854,7 +851,6 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap) { - BENCHFUN // set up stuff, depending on the mode we are const bool enableLCPDist = pLCPMap && params->lensProf.useDist; const bool enableCA = highQuality && needsCA();