From d17f71eb72216ab0e7c196408064006ad45bf519 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 30 Dec 2019 15:27:17 +0100 Subject: [PATCH 01/22] Applying geometric transformations leads to dark artifacts in combination with capture sharpening, fixes #5588 --- rtengine/iptransform.cc | 140 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 132 insertions(+), 8 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index af513536e..736fa0620 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -111,6 +111,31 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, g = vhadd(weight * gv); b = vhadd(weight * bv); } + +inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const vfloat w3Vert = F2V(t1Vert * Dy); + const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert); + const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); + const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); + + const vfloat rv = (w0Vert * xlogf(LVFU(src->r(ys, xs))) + w1Vert * xlogf(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->r(ys + 3, xs)))); + const vfloat gv = (w0Vert * xlogf(LVFU(src->g(ys, xs))) + w1Vert * xlogf(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->g(ys + 3, xs)))); + const vfloat bv = (w0Vert * xlogf(LVFU(src->b(ys, xs))) + w1Vert * xlogf(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->b(ys + 3, xs)))); + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)); + r = mul * xexpf(vhadd(weight * rv)); + g = mul * xexpf(vhadd(weight * gv)); + b = mul * xexpf(vhadd(weight * bv)); +} #else inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) { @@ -143,6 +168,38 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor); b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor); } + +inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const float w3Vert = t1Vert * Dy; + const float w2Vert = t1Vert * Dy - t1Vert + t2Vert; + const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert; + const float w0Vert = t1Vert - (t1Vert * Dy); + + float rv[4], gv[4], bv[4]; + for (int i = 0; i < 4; ++i) { + rv[i] = w0Vert * xlogf(src->r(ys, xs + i)) + w1Vert * xlogf(src->r(ys + 1, xs + i)) + w2Vert * xlogf(src->r(ys + 2, xs + i)) + w3Vert * xlogf(src->r(ys + 3, xs + i)); + gv[i] = w0Vert * xlogf(src->g(ys, xs + i)) + w1Vert * xlogf(src->g(ys + 1, xs + i)) + w2Vert * xlogf(src->g(ys + 2, xs + i)) + w3Vert * xlogf(src->g(ys + 3, xs + i)); + bv[i] = w0Vert * xlogf(src->b(ys, xs + i)) + w1Vert * xlogf(src->b(ys + 1, xs + i)) + w2Vert * xlogf(src->b(ys + 2, xs + i)) + w3Vert * xlogf(src->b(ys + 3, xs + i)); + } + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const float w3Hor = t1Hor * Dx; + const float w2Hor = t1Hor * Dx - t1Hor + t2Hor; + const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor; + const float w0Hor = t1Hor - (t1Hor * Dx); + + r = mul * xexpf(rv[0] * w0Hor + rv[1] * w1Hor + rv[2] * w2Hor + rv[3] * w3Hor); + g = mul * xexpf(gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor); + b = mul * xexpf(bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor); +} #endif #ifdef __SSE2__ inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) @@ -165,6 +222,27 @@ inline void interpolateTransformChannelsCubic(const float* const* src, int xs, i const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)); dest = mul * vhadd(weight * cv); } + +inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const vfloat w3Vert = F2V(t1Vert * Dy); + const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert); + const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); + const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); + + const vfloat cv = (w0Vert * xlogf(LVFU(src[ys][xs])) + w1Vert * xlogf(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf(LVFU(src[ys + 2][xs])) + w3Vert * xlogf(LVFU(src[ys + 3][xs]))); + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)); + dest = mul * xexpf(vhadd(weight * cv)); +} #else inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) { @@ -193,6 +271,34 @@ inline void interpolateTransformChannelsCubic(const float* const* src, int xs, i dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor); } + +inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const float w3Vert = t1Vert * Dy; + const float w2Vert = t1Vert * Dy - t1Vert + t2Vert; + const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert; + const float w0Vert = t1Vert - (t1Vert * Dy); + + float cv[4]; + for (int i = 0; i < 4; ++i) { + cv[i] = w0Vert * xlogf(src[ys][xs + i]) + w1Vert * xlogf(src[ys + 1][xs + i]) + w2Vert * xlogf(src[ys + 2][xs + i]) + w3Vert * xlogf(src[ys + 3][xs + i]); + } + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const float w3Hor = t1Hor * Dx; + const float w2Hor = t1Hor * Dx - t1Hor + t2Hor; + const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor; + const float w0Hor = t1Hor - (t1Hor * Dx); + + dest = mul * xexpf(cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor); +} #endif } @@ -922,6 +1028,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0; const bool darkening = (params->vignetting.amount <= 0.0); + const bool useLog = params->pdsharpening.enabled; const double centerFactorx = cx - w2; const double centerFactory = cy - h2; @@ -1011,14 +1118,26 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image - if (enableCA) { - interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); - } else if (!highQuality) { - transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); - transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); - transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); + if (!useLog) { + if (enableCA) { + interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); + } else if (!highQuality) { + transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); + } else { + interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul); + } } else { - interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul); + if (enableCA) { + interpolateTransformChannelsCubicLog(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); + } else if (!highQuality) { + transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); + } else { + interpolateTransformCubicLog(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul); + } } } else { // edge pixels @@ -1054,6 +1173,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap) { assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable()); + const bool useLog = params->pdsharpening.enabled; float** chOrig[3]; chOrig[0] = original->r.ptrs; @@ -1089,7 +1209,11 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans // multiplier for vignetting correction if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image - interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0); + if (!useLog) { + interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0); + } else { + interpolateTransformChannelsCubicLog(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0); + } } else { // edge pixels int y1 = LIM (yc, 0, original->getHeight() - 1); From bcb7df44dfe5c8507ea4b7ac7cde319ec51ac501 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 31 Dec 2019 15:23:24 +0100 Subject: [PATCH 02/22] Log transform: fix segfault --- rtengine/iptransform.cc | 30 +++++++++++++----------------- rtengine/sleef.h | 24 ++++++++++++++++++++++++ rtengine/sleefsseavx.c | 24 ++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 17 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 736fa0620..77fa57985 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -124,9 +124,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); - const vfloat rv = (w0Vert * xlogf(LVFU(src->r(ys, xs))) + w1Vert * xlogf(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->r(ys + 3, xs)))); - const vfloat gv = (w0Vert * xlogf(LVFU(src->g(ys, xs))) + w1Vert * xlogf(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->g(ys + 3, xs)))); - const vfloat bv = (w0Vert * xlogf(LVFU(src->b(ys, xs))) + w1Vert * xlogf(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->b(ys + 3, xs)))); + const vfloat rv = (w0Vert * xlogf1(LVFU(src->r(ys, xs))) + w1Vert * xlogf1(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->r(ys + 3, xs)))); + const vfloat gv = (w0Vert * xlogf1(LVFU(src->g(ys, xs))) + w1Vert * xlogf1(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->g(ys + 3, xs)))); + const vfloat bv = (w0Vert * xlogf1(LVFU(src->b(ys, xs))) + w1Vert * xlogf1(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->b(ys + 3, xs)))); // Horizontal const float t1Hor = A * (Dx - Dx * Dx); @@ -183,9 +183,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int float rv[4], gv[4], bv[4]; for (int i = 0; i < 4; ++i) { - rv[i] = w0Vert * xlogf(src->r(ys, xs + i)) + w1Vert * xlogf(src->r(ys + 1, xs + i)) + w2Vert * xlogf(src->r(ys + 2, xs + i)) + w3Vert * xlogf(src->r(ys + 3, xs + i)); - gv[i] = w0Vert * xlogf(src->g(ys, xs + i)) + w1Vert * xlogf(src->g(ys + 1, xs + i)) + w2Vert * xlogf(src->g(ys + 2, xs + i)) + w3Vert * xlogf(src->g(ys + 3, xs + i)); - bv[i] = w0Vert * xlogf(src->b(ys, xs + i)) + w1Vert * xlogf(src->b(ys + 1, xs + i)) + w2Vert * xlogf(src->b(ys + 2, xs + i)) + w3Vert * xlogf(src->b(ys + 3, xs + i)); + rv[i] = w0Vert * xlogf1(src->r(ys, xs + i)) + w1Vert * xlogf1(src->r(ys + 1, xs + i)) + w2Vert * xlogf1(src->r(ys + 2, xs + i)) + w3Vert * xlogf1(src->r(ys + 3, xs + i)); + gv[i] = w0Vert * xlogf1(src->g(ys, xs + i)) + w1Vert * xlogf1(src->g(ys + 1, xs + i)) + w2Vert * xlogf1(src->g(ys + 2, xs + i)) + w3Vert * xlogf1(src->g(ys + 3, xs + i)); + bv[i] = w0Vert * xlogf1(src->b(ys, xs + i)) + w1Vert * xlogf1(src->b(ys + 1, xs + i)) + w2Vert * xlogf1(src->b(ys + 2, xs + i)) + w3Vert * xlogf1(src->b(ys + 3, xs + i)); } // Horizontal @@ -235,7 +235,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); - const vfloat cv = (w0Vert * xlogf(LVFU(src[ys][xs])) + w1Vert * xlogf(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf(LVFU(src[ys + 2][xs])) + w3Vert * xlogf(LVFU(src[ys + 3][xs]))); + const vfloat cv = (w0Vert * xlogf1(LVFU(src[ys][xs])) + w1Vert * xlogf1(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf1(LVFU(src[ys + 2][xs])) + w3Vert * xlogf1(LVFU(src[ys + 3][xs]))); // Horizontal const float t1Hor = A * (Dx - Dx * Dx); @@ -286,7 +286,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs float cv[4]; for (int i = 0; i < 4; ++i) { - cv[i] = w0Vert * xlogf(src[ys][xs + i]) + w1Vert * xlogf(src[ys + 1][xs + i]) + w2Vert * xlogf(src[ys + 2][xs + i]) + w3Vert * xlogf(src[ys + 3][xs + i]); + cv[i] = w0Vert * xlogf1(src[ys][xs + i]) + w1Vert * xlogf1(src[ys + 1][xs + i]) + w2Vert * xlogf1(src[ys + 2][xs + i]) + w3Vert * xlogf1(src[ys + 3][xs + i]); } // Horizontal @@ -1118,23 +1118,19 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image - if (!useLog) { + if (!highQuality) { + transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); + } else if (!useLog) { if (enableCA) { interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); - } else if (!highQuality) { - transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); - transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); - transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); } else { interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul); } } else { if (enableCA) { interpolateTransformChannelsCubicLog(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); - } else if (!highQuality) { - transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); - transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); - transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); } else { interpolateTransformCubicLog(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul); } diff --git a/rtengine/sleef.h b/rtengine/sleef.h index 30c059010..7b7d5995f 100644 --- a/rtengine/sleef.h +++ b/rtengine/sleef.h @@ -1206,6 +1206,30 @@ __inline float xlogf(float d) { return x; } +__inline float xlogf1(float d) { // does xlogf(vmaxf(d, 1.f)) but faster + float x, x2, t, m; + int e; + + e = ilogbp1f(d * 0.7071f); + m = ldexpkf(d, -e); + + x = (m-1.0f) / (m+1.0f); + x2 = x * x; + + t = 0.2371599674224853515625f; + t = mlaf(t, x2, 0.285279005765914916992188f); + t = mlaf(t, x2, 0.400005519390106201171875f); + t = mlaf(t, x2, 0.666666567325592041015625f); + t = mlaf(t, x2, 2.0f); + + x = x * t + 0.693147180559945286226764f * e; + + if (xisinff(d)) x = rtengine::RT_INFINITY_F; + if (d <= 1.f) x = 0; + + return x; +} + __inline float xexpf(float d) { if(d<=-104.0f) return 0.0f; diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index 1982c7c4c..0af516f9b 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -1253,6 +1253,30 @@ static INLINE vfloat xlogf(vfloat d) { return x; } +static INLINE vfloat xlogf1(vfloat d) { // does xlogf(vmaxf(d, 1.f)) but faster + vfloat x, x2, t, m; + vint2 e; + + e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); + m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); + + x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); + x2 = vmulf(x, x); + + t = vcast_vf_f(0.2371599674224853515625f); + t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); + t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); + t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); + t = vmlaf(t, x2, vcast_vf_f(2.0f)); + + x = vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); + + x = vself(vmaskf_ispinf(d), vcast_vf_f(INFINITYf), x); + x = vselfnotzero(vmaskf_le(d, vcast_vf_f(1.f)), x); + + return x; +} + static INLINE vfloat xlogf0(vfloat d) { vfloat x, x2, t, m; vint2 e; From 58d8e66b7245e63b39c0d92db7775d0ede14cc04 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 31 Dec 2019 19:09:06 +0100 Subject: [PATCH 03/22] Log transform: speedup, #5588 --- rtengine/iptransform.cc | 76 ++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 77fa57985..67647b7ef 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -86,6 +86,29 @@ float normn (float a, float b, int n) } } +void logEncode(rtengine::Imagefloat *original, bool multiThread) { + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) if(multiThread) +#endif + + for (int y = 0; y < original->getHeight(); ++y) { + int x = 0; +#ifdef __SSE2__ + for (; x < original->getWidth() - 3; x += 4) { + STVFU(original->r(y, x), xlogf1(LVFU(original->r(y, x)))); + STVFU(original->g(y, x), xlogf1(LVFU(original->g(y, x)))); + STVFU(original->b(y, x), xlogf1(LVFU(original->b(y, x)))); + } +#endif + for (; x < original->getWidth(); ++x) { + original->r(y, x) = xlogf1(original->r(y, x)); + original->g(y, x) = xlogf1(original->g(y, x)); + original->b(y, x) = xlogf1(original->b(y, x)); + } + } +} + #ifdef __SSE2__ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) { @@ -124,9 +147,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); - const vfloat rv = (w0Vert * xlogf1(LVFU(src->r(ys, xs))) + w1Vert * xlogf1(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->r(ys + 3, xs)))); - const vfloat gv = (w0Vert * xlogf1(LVFU(src->g(ys, xs))) + w1Vert * xlogf1(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->g(ys + 3, xs)))); - const vfloat bv = (w0Vert * xlogf1(LVFU(src->b(ys, xs))) + w1Vert * xlogf1(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->b(ys + 3, xs)))); + const vfloat rv = (w0Vert * LVFU(src->r(ys, xs)) + w1Vert * LVFU(src->r(ys + 1, xs))) + (w2Vert * LVFU(src->r(ys + 2, xs)) + w3Vert * LVFU(src->r(ys + 3, xs))); + const vfloat gv = (w0Vert * LVFU(src->g(ys, xs)) + w1Vert * LVFU(src->g(ys + 1, xs))) + (w2Vert * LVFU(src->g(ys + 2, xs)) + w3Vert * LVFU(src->g(ys + 3, xs))); + const vfloat bv = (w0Vert * LVFU(src->b(ys, xs)) + w1Vert * LVFU(src->b(ys + 1, xs))) + (w2Vert * LVFU(src->b(ys + 2, xs)) + w3Vert * LVFU(src->b(ys + 3, xs))); // Horizontal const float t1Hor = A * (Dx - Dx * Dx); @@ -183,9 +206,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int float rv[4], gv[4], bv[4]; for (int i = 0; i < 4; ++i) { - rv[i] = w0Vert * xlogf1(src->r(ys, xs + i)) + w1Vert * xlogf1(src->r(ys + 1, xs + i)) + w2Vert * xlogf1(src->r(ys + 2, xs + i)) + w3Vert * xlogf1(src->r(ys + 3, xs + i)); - gv[i] = w0Vert * xlogf1(src->g(ys, xs + i)) + w1Vert * xlogf1(src->g(ys + 1, xs + i)) + w2Vert * xlogf1(src->g(ys + 2, xs + i)) + w3Vert * xlogf1(src->g(ys + 3, xs + i)); - bv[i] = w0Vert * xlogf1(src->b(ys, xs + i)) + w1Vert * xlogf1(src->b(ys + 1, xs + i)) + w2Vert * xlogf1(src->b(ys + 2, xs + i)) + w3Vert * xlogf1(src->b(ys + 3, xs + i)); + rv[i] = w0Vert * src->r(ys, xs + i) + w1Vert * src->r(ys + 1, xs + i) + w2Vert * src->r(ys + 2, xs + i) + w3Vert * src->r(ys + 3, xs + i); + gv[i] = w0Vert * src->g(ys, xs + i) + w1Vert * src->g(ys + 1, xs + i) + w2Vert * src->g(ys + 2, xs + i) + w3Vert * src->g(ys + 3, xs + i); + bv[i] = w0Vert * src->b(ys, xs + i) + w1Vert * src->b(ys + 1, xs + i) + w2Vert * src->b(ys + 2, xs + i) + w3Vert * src->b(ys + 3, xs + i); } // Horizontal @@ -235,7 +258,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); - const vfloat cv = (w0Vert * xlogf1(LVFU(src[ys][xs])) + w1Vert * xlogf1(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf1(LVFU(src[ys + 2][xs])) + w3Vert * xlogf1(LVFU(src[ys + 3][xs]))); + const vfloat cv = (w0Vert * LVFU(src[ys][xs]) + w1Vert * LVFU(src[ys + 1][xs])) + (w2Vert * LVFU(src[ys + 2][xs]) + w3Vert * LVFU(src[ys + 3][xs])); // Horizontal const float t1Hor = A * (Dx - Dx * Dx); @@ -286,7 +309,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs float cv[4]; for (int i = 0; i < 4; ++i) { - cv[i] = w0Vert * xlogf1(src[ys][xs + i]) + w1Vert * xlogf1(src[ys + 1][xs + i]) + w2Vert * xlogf1(src[ys + 2][xs + i]) + w3Vert * xlogf1(src[ys + 3][xs + i]); + cv[i] = w0Vert * src[ys][xs + i] + w1Vert * src[ys + 1][xs + i] + w2Vert * src[ys + 2][xs + i] + w3Vert * src[ys + 3][xs + i]; } // Horizontal @@ -953,6 +976,7 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap) { + BENCHFUN // set up stuff, depending on the mode we are const bool enableLCPDist = pLCPMap && params->lensProf.useDist; const bool enableCA = highQuality && needsCA(); @@ -1028,10 +1052,13 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0; const bool darkening = (params->vignetting.amount <= 0.0); - const bool useLog = params->pdsharpening.enabled; + const bool useLog = params->pdsharpening.enabled && highQuality; const double centerFactorx = cx - w2; const double centerFactory = cy - h2; + if (useLog) { + logEncode(original, multiThread); + } // main cycle #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) if(multiThread) @@ -1142,12 +1169,22 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const int x1 = LIM(xc, 0, original->getWidth() - 1); const int x2 = LIM(xc + 1, 0, original->getWidth() - 1); - if (enableCA) { - chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); + if (useLog) { + if (enableCA) { + chTrans[c][y][x] = vignmul * xexpf(chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); + } else { + transformed->r(y, x) = vignmul * xexpf(original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy); + transformed->g(y, x) = vignmul * xexpf(original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy); + transformed->b(y, x) = vignmul * xexpf(original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy); + } } else { - transformed->r(y, x) = vignmul * (original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy); - transformed->g(y, x) = vignmul * (original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy); - transformed->b(y, x) = vignmul * (original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy); + if (enableCA) { + chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); + } else { + transformed->r(y, x) = vignmul * (original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy); + } } } } else { @@ -1181,6 +1218,10 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans chTrans[1] = transformed->g.ptrs; chTrans[2] = transformed->b.ptrs; + if (useLog) { + logEncode(original, multiThread); + } + #ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif @@ -1216,8 +1257,11 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans int y2 = LIM (yc + 1, 0, original->getHeight() - 1); int x1 = LIM (xc, 0, original->getWidth() - 1); int x2 = LIM (xc + 1, 0, original->getWidth() - 1); - - chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); + if (!useLog) { + chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); + } else { + chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); + } } } else { // not valid (source pixel x,y not inside source image, etc.) From 399a0055c61b9c03479e36f07a08b2c1d7d70a09 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 31 Dec 2019 20:07:39 +0100 Subject: [PATCH 04/22] Log transform: Fix broken build --- rtengine/iptransform.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 67647b7ef..8d89b96cc 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -976,7 +976,7 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap) { - BENCHFUN + // set up stuff, depending on the mode we are const bool enableLCPDist = pLCPMap && params->lensProf.useDist; const bool enableCA = highQuality && needsCA(); From 6302084804b24de7b4bcb44b2e7774464282c15b Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Wed, 1 Jan 2020 14:52:15 +0100 Subject: [PATCH 05/22] sleef: use our own replacement of rint. Based on code from Alberto Griggio, but further optimized for SSE --- rtengine/sleef.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/rtengine/sleef.h b/rtengine/sleef.h index 7b7d5995f..1a953953c 100644 --- a/rtengine/sleef.h +++ b/rtengine/sleef.h @@ -894,6 +894,15 @@ __inline double xlog1p(double a) { #define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f +#ifdef __SSE2__ +__inline int xrintf(float x) { + return _mm_cvt_ss2si(_mm_set_ss(x)); +} +#else +__inline int xrintf(float x) { + return x + (x < 0 ? -0.5f : 0.5f); +} +#endif __inline int32_t floatToRawIntBits(float d) { union { float f; @@ -980,7 +989,7 @@ __inline float xsinf(float d) { int q; float u, s; - q = rint(d * rtengine::RT_1_PI_F); + q = xrintf(d * rtengine::RT_1_PI_F); d = mlaf(q, -PI4_Af*4, d); d = mlaf(q, -PI4_Bf*4, d); @@ -1009,7 +1018,7 @@ __inline float xcosf(float d) { int q; float u, s; - q = 1 + 2*rint(d * rtengine::RT_1_PI_F - 0.5f); + q = 1 + 2*xrintf(d * rtengine::RT_1_PI_F - 0.5f); d = mlaf(q, -PI4_Af*2, d); d = mlaf(q, -PI4_Bf*2, d); @@ -1041,7 +1050,7 @@ __inline float2 xsincosf(float d) { float u, s, t; float2 r; - q = rint(d * rtengine::RT_2_PI_F); + q = xrintf(d * rtengine::RT_2_PI_F); s = d; @@ -1083,7 +1092,7 @@ __inline float xtanf(float d) { int q; float u, s, x; - q = rint(d * (float)(2 * rtengine::RT_1_PI)); + q = xrintf(d * (float)(2 * rtengine::RT_1_PI)); x = d; @@ -1233,7 +1242,7 @@ __inline float xlogf1(float d) { // does xlogf(vmaxf(d, 1.f)) but faster __inline float xexpf(float d) { if(d<=-104.0f) return 0.0f; - int q = rint(d * R_LN2f); + int q = xrintf(d * R_LN2f); float s, u; s = mlaf(q, -L2Uf, d); From 52f7c2c5311b5a43cc6fd499f36b4c325ee59aa7 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Wed, 1 Jan 2020 15:18:47 +0100 Subject: [PATCH 06/22] Log transform: further speedup --- rtengine/iptransform.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 8d89b96cc..caa5b7e4e 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -155,9 +155,11 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int const float t1Hor = A * (Dx - Dx * Dx); const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)); - r = mul * xexpf(vhadd(weight * rv)); - g = mul * xexpf(vhadd(weight * gv)); - b = mul * xexpf(vhadd(weight * bv)); + const vfloat tempv = _mm_setr_ps(vhadd(weight * rv), vhadd(weight * gv), vhadd(weight * bv), 0.f); + const vfloat resultv = xexpf(tempv); + r = mul * resultv[0]; + g = mul * resultv[1]; + b = mul * resultv[2]; } #else inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) From 085c68fc2985b2d556398f78ed411cf985ee67d6 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Wed, 1 Jan 2020 19:06:02 +0100 Subject: [PATCH 07/22] Log tranform: fix bug in preview mode --- rtengine/improcfun.h | 6 ++--- rtengine/iptransform.cc | 48 +++++++++++++++++++++++++-------------- rtengine/simpleprocess.cc | 2 +- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 661f399ef..c1a4a5979 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -84,8 +84,8 @@ class ImProcFunctions void calcVignettingParams(int oW, int oH, const procparams::VignettingParams& vignetting, double &w2, double &h2, double& maxRadius, double &v, double &b, double &mul); void transformLuminanceOnly(Imagefloat* original, Imagefloat* transformed, int cx, int cy, int oW, int oH, int fW, int fH); - void transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap); - void transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap); + void transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap, bool useOriginalBuffer); + void transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap, bool useOriginalBuffer); bool needsCA() const; bool needsDistortion() const; @@ -156,7 +156,7 @@ public: // void colorCurve (LabImage* lold, LabImage* lnew); void sharpening(LabImage* lab, const procparams::SharpeningParams &sharpenParam, bool showMask = false); void sharpeningcam(CieImage* ncie, float** buffer, bool showMask = false); - void transform(Imagefloat* original, Imagefloat* transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const FramesMetaData *metadata, int rawRotationDeg, bool fullImage); + void transform(Imagefloat* original, Imagefloat* transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const FramesMetaData *metadata, int rawRotationDeg, bool fullImage, bool useOriginalBuffer = false); float resizeScale(const procparams::ProcParams* params, int fw, int fh, int &imw, int &imh); void lab2monitorRgb(LabImage* lab, Image8* image); void resize(Imagefloat* src, Imagefloat* dst, float dScale); diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index caa5b7e4e..fc79dcae4 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -86,25 +86,25 @@ float normn (float a, float b, int n) } } -void logEncode(rtengine::Imagefloat *original, bool multiThread) { +void logEncode(rtengine::Imagefloat *src, rtengine::Imagefloat *dest, bool multiThread) { #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) if(multiThread) #endif - for (int y = 0; y < original->getHeight(); ++y) { + for (int y = 0; y < src->getHeight(); ++y) { int x = 0; #ifdef __SSE2__ - for (; x < original->getWidth() - 3; x += 4) { - STVFU(original->r(y, x), xlogf1(LVFU(original->r(y, x)))); - STVFU(original->g(y, x), xlogf1(LVFU(original->g(y, x)))); - STVFU(original->b(y, x), xlogf1(LVFU(original->b(y, x)))); + for (; x < src->getWidth() - 3; x += 4) { + STVFU(dest->r(y, x), xlogf1(LVFU(src->r(y, x)))); + STVFU(dest->g(y, x), xlogf1(LVFU(src->g(y, x)))); + STVFU(dest->b(y, x), xlogf1(LVFU(src->b(y, x)))); } #endif - for (; x < original->getWidth(); ++x) { - original->r(y, x) = xlogf1(original->r(y, x)); - original->g(y, x) = xlogf1(original->g(y, x)); - original->b(y, x) = xlogf1(original->b(y, x)); + for (; x < src->getWidth(); ++x) { + dest->r(y, x) = xlogf1(src->r(y, x)); + dest->g(y, x) = xlogf1(src->g(y, x)); + dest->b(y, x) = xlogf1(src->b(y, x)); } } } @@ -536,7 +536,7 @@ bool ImProcFunctions::transCoord (int W, int H, int x, int y, int w, int h, int& void ImProcFunctions::transform (Imagefloat* original, Imagefloat* transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const FramesMetaData *metadata, - int rawRotationDeg, bool fullImage) + int rawRotationDeg, bool fullImage, bool useOriginalBuffer) { double focalLen = metadata->getFocalLen(); double focalLen35mm = metadata->getFocalLen35mm(); @@ -584,10 +584,10 @@ void ImProcFunctions::transform (Imagefloat* original, Imagefloat* transformed, dest = tmpimg.get(); } } - transformGeneral(highQuality, original, dest, cx, cy, sx, sy, oW, oH, fW, fH, pLCPMap.get()); + transformGeneral(highQuality, original, dest, cx, cy, sx, sy, oW, oH, fW, fH, pLCPMap.get(), useOriginalBuffer); if (highQuality && dest != transformed) { - transformLCPCAOnly(dest, transformed, cx, cy, pLCPMap.get()); + transformLCPCAOnly(dest, transformed, cx, cy, pLCPMap.get(), useOriginalBuffer); } } } @@ -976,7 +976,7 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* } -void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap) +void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap, bool useOriginalBuffer) { // set up stuff, depending on the mode we are @@ -1058,8 +1058,15 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double centerFactorx = cx - w2; const double centerFactory = cy - h2; + std::unique_ptr tempLog; if (useLog) { - logEncode(original, multiThread); + if (!useOriginalBuffer) { + tempLog.reset(new Imagefloat(original->getWidth(), original->getHeight())); + logEncode(original, tempLog.get(), multiThread); + original = tempLog.get(); + } else { + logEncode(original, original, multiThread); + } } // main cycle #ifdef _OPENMP @@ -1205,7 +1212,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I } -void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap) +void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap, bool useOriginalBuffer) { assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable()); const bool useLog = params->pdsharpening.enabled; @@ -1220,8 +1227,15 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans chTrans[1] = transformed->g.ptrs; chTrans[2] = transformed->b.ptrs; + std::unique_ptr tempLog; if (useLog) { - logEncode(original, multiThread); + if (!useOriginalBuffer) { + tempLog.reset(new Imagefloat(original->getWidth(), original->getHeight())); + logEncode(original, tempLog.get(), multiThread); + original = tempLog.get(); + } else { + logEncode(original, original, multiThread); + } } #ifdef _OPENMP diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index c35fc7431..3811c0c80 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -880,7 +880,7 @@ private: trImg = new Imagefloat (fw, fh); } ipf.transform (baseImg, trImg, 0, 0, 0, 0, fw, fh, fw, fh, - imgsrc->getMetaData(), imgsrc->getRotateDegree(), true); + imgsrc->getMetaData(), imgsrc->getRotateDegree(), true, true); if(trImg != baseImg) { delete baseImg; baseImg = trImg; From 2ce6e6d1d389bed4dfd82833ddf082920bc9fcd0 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 2 Jan 2020 14:35:27 +0100 Subject: [PATCH 08/22] skip unnecessary transform --- rtengine/dcrop.cc | 75 +++++++++++++++++------------------ rtengine/improccoordinator.cc | 4 +- rtengine/improcfun.h | 2 +- rtengine/iptransform.cc | 9 ++++- rtengine/rtthumbnail.cc | 10 ++--- rtengine/simpleprocess.cc | 2 +- 6 files changed, 52 insertions(+), 50 deletions(-) diff --git a/rtengine/dcrop.cc b/rtengine/dcrop.cc index a6889b954..890003ee0 100644 --- a/rtengine/dcrop.cc +++ b/rtengine/dcrop.cc @@ -173,8 +173,6 @@ void Crop::update(int todo) int widIm = parent->fw;//full image int heiIm = parent->fh; - bool needstransform = parent->ipf.needsTransform(); - if (todo & (M_INIT | M_LINDENOISE | M_HDR)) { MyMutex::MyLock lock(parent->minit); // Also used in improccoord @@ -766,8 +764,9 @@ void Crop::update(int todo) } } + const bool needstransform = parent->ipf.needsTransform(skips(parent->fw, skip), skips(parent->fh, skip), parent->imgsrc->getRotateDegree(), parent->imgsrc->getMetaData()); // transform - if (needstransform || ((todo & (M_TRANSFORM | M_RGBCURVE)) && params.dirpyrequalizer.cbdlMethod == "bef" && params.dirpyrequalizer.enabled && !params.colorappearance.enabled)) { + if (needstransform || ((todo & (M_TRANSFORM | M_RGBCURVE)) && params.dirpyrequalizer.cbdlMethod == "bef" && params.dirpyrequalizer.enabled && !params.colorappearance.enabled)) { if (!transCrop) { transCrop = new Imagefloat(cropw, croph); } @@ -784,10 +783,7 @@ void Crop::update(int todo) baseCrop = transCrop; } } else { - if (transCrop) { - delete transCrop; - } - + delete transCrop; transCrop = nullptr; } @@ -1167,41 +1163,42 @@ bool Crop::setCropSizes(int rcx, int rcy, int rcw, int rch, int skip, bool inter parent->ipf.transCoord(parent->fw, parent->fh, bx1, by1, bw, bh, orx, ory, orw, orh); - if (check_need_larger_crop_for_lcp_distortion(parent->fw, parent->fh, orx, ory, orw, orh, *parent->params)) { - // TODO - this is an estimate of the max distortion relative to the image size. ATM it is hardcoded to be 15%, which seems enough. If not, need to revise - int dW = int (double (parent->fw) * 0.15 / (2 * skip)); - int dH = int (double (parent->fh) * 0.15 / (2 * skip)); - int x1 = orx - dW; - int x2 = orx + orw + dW; - int y1 = ory - dH; - int y2 = ory + orh + dH; + if (parent->ipf.needsTransform(skips(parent->fw, skip), skips(parent->fh, skip), parent->imgsrc->getRotateDegree(), parent->imgsrc->getMetaData())) { + if (check_need_larger_crop_for_lcp_distortion(parent->fw, parent->fh, orx, ory, orw, orh, *parent->params)) { + // TODO - this is an estimate of the max distortion relative to the image size. ATM it is hardcoded to be 15%, which seems enough. If not, need to revise + int dW = int (double (parent->fw) * 0.15 / (2 * skip)); + int dH = int (double (parent->fh) * 0.15 / (2 * skip)); + int x1 = orx - dW; + int x2 = orx + orw + dW; + int y1 = ory - dH; + int y2 = ory + orh + dH; - if (x1 < 0) { - x2 += -x1; - x1 = 0; + if (x1 < 0) { + x2 += -x1; + x1 = 0; + } + + if (x2 > parent->fw) { + x1 -= x2 - parent->fw; + x2 = parent->fw; + } + + if (y1 < 0) { + y2 += -y1; + y1 = 0; + } + + if (y2 > parent->fh) { + y1 -= y2 - parent->fh; + y2 = parent->fh; + } + + orx = max(x1, 0); + ory = max(y1, 0); + orw = min(x2 - x1, parent->fw - orx); + orh = min(y2 - y1, parent->fh - ory); } - - if (x2 > parent->fw) { - x1 -= x2 - parent->fw; - x2 = parent->fw; - } - - if (y1 < 0) { - y2 += -y1; - y1 = 0; - } - - if (y2 > parent->fh) { - y1 -= y2 - parent->fh; - y2 = parent->fh; - } - - orx = max(x1, 0); - ory = max(y1, 0); - orw = min(x2 - x1, parent->fw - orx); - orh = min(y2 - y1, parent->fh - ory); } - leftBorder = skips(rqx1 - bx1, skip); upperBorder = skips(rqy1 - by1, skip); diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index a4ca0ea0b..e915a11b0 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -545,7 +545,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, bool panningRelatedChange) oprevi = orig_prev; // Remove transformation if unneeded - bool needstransform = ipf.needsTransform(); + bool needstransform = ipf.needsTransform(fw, fh, imgsrc->getRotateDegree(), imgsrc->getMetaData()); if ((needstransform || ((todo & (M_TRANSFORM | M_RGBCURVE)) && params->dirpyrequalizer.cbdlMethod == "bef" && params->dirpyrequalizer.enabled && !params->colorappearance.enabled))) { assert(oprevi); @@ -1349,7 +1349,7 @@ void ImProcCoordinator::saveInputICCReference(const Glib::ustring& fname, bool a imgsrc->getImage(currWB, tr, im, pp, ppar.toneCurve, ppar.raw); ImProcFunctions ipf(&ppar, true); - if (ipf.needsTransform()) { + if (ipf.needsTransform(fW, fH, imgsrc->getRotateDegree(), imgsrc->getMetaData())) { Imagefloat* trImg = new Imagefloat(fW, fH); ipf.transform(im, trImg, 0, 0, 0, 0, fW, fH, fW, fH, imgsrc->getMetaData(), imgsrc->getRotateDegree(), true); diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index c1a4a5979..8f4ae7771 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -119,7 +119,7 @@ public: } void setScale(double iscale); - bool needsTransform() const; + bool needsTransform(int oW, int oH, int rawRotationDeg, const FramesMetaData *metadata) const; bool needsPCVignetting() const; void firstAnalysis(const Imagefloat* const working, const procparams::ProcParams ¶ms, LUTu & vhist16); diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index fc79dcae4..c9f88caf0 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -1358,9 +1358,14 @@ bool ImProcFunctions::needsLensfun() const return params->lensProf.useLensfun(); } -bool ImProcFunctions::needsTransform () const +bool ImProcFunctions::needsTransform (int oW, int oH, int rawRotationDeg, const FramesMetaData *metadata) const { - return needsCA () || needsDistortion () || needsRotation () || needsPerspective () || needsGradient () || needsPCVignetting () || needsVignetting () || needsLCP() || needsLensfun(); + bool needsLf = needsLensfun(); + if (needsLf) { + std::unique_ptr pLCPMap = LFDatabase::getInstance()->findModifier(params->lensProf, metadata, oW, oH, params->coarse, rawRotationDeg); + needsLf = pLCPMap.get(); + } + return needsCA () || needsDistortion () || needsRotation () || needsPerspective () || needsGradient () || needsPCVignetting () || needsVignetting () || needsLCP() || needsLf; } diff --git a/rtengine/rtthumbnail.cc b/rtengine/rtthumbnail.cc index 0cdcbf6ed..9da601e2a 100644 --- a/rtengine/rtthumbnail.cc +++ b/rtengine/rtthumbnail.cc @@ -1247,12 +1247,12 @@ IImage8* Thumbnail::processImage (const procparams::ProcParams& params, eSensorT ipf.ToneMapFattal02(baseImg); // perform transform - if (ipf.needsTransform()) { + int origFW; + int origFH; + double tscale = 0.0; + getDimensions (origFW, origFH, tscale); + if (ipf.needsTransform(origFW * tscale + 0.5, origFH * tscale + 0.5, 0, metadata)) { Imagefloat* trImg = new Imagefloat (fw, fh); - int origFW; - int origFH; - double tscale = 0.0; - getDimensions (origFW, origFH, tscale); ipf.transform (baseImg, trImg, 0, 0, 0, 0, fw, fh, origFW * tscale + 0.5, origFH * tscale + 0.5, metadata, 0, true); // Raw rotate degree not detectable here delete baseImg; baseImg = trImg; diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index 3811c0c80..1c1e46a65 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -872,7 +872,7 @@ private: ipf.ToneMapFattal02(baseImg); // perform transform (excepted resizing) - if (ipf.needsTransform()) { + if (ipf.needsTransform(fw, fh, imgsrc->getRotateDegree(), imgsrc->getMetaData())) { Imagefloat* trImg = nullptr; if (ipf.needsLuminanceOnly()) { trImg = baseImg; From 49d594f67a6f9ad2f2b0d6f0c591136d53fe2dff Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 2 Jan 2020 19:28:57 +0100 Subject: [PATCH 09/22] Log transform: add method combobox --- rtdata/languages/default | 3 +++ rtengine/iptransform.cc | 4 ++-- rtengine/procparams.cc | 9 ++++++++- rtengine/procparams.h | 1 + rtgui/lensgeom.cc | 40 +++++++++++++++++++++++++++++++++++++--- rtgui/lensgeom.h | 3 +++ rtgui/paramsedited.cc | 6 ++++++ rtgui/paramsedited.h | 1 + 8 files changed, 61 insertions(+), 6 deletions(-) diff --git a/rtdata/languages/default b/rtdata/languages/default index 5004bf25c..922c50c51 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -787,6 +787,7 @@ HISTORY_MSG_SH_COLORSPACE;S/H - Colorspace HISTORY_MSG_SOFTLIGHT_ENABLED;Soft light HISTORY_MSG_SOFTLIGHT_STRENGTH;Soft light - Strength HISTORY_MSG_TM_FATTAL_ANCHOR;DRC - Anchor +HISTORY_MSG_TRANS_Method;Geometry - Method HISTORY_NEWSNAPSHOT;Add HISTORY_NEWSNAPSHOT_TOOLTIP;Shortcut: Alt-s HISTORY_SNAPSHOT;Snapshot @@ -1775,6 +1776,8 @@ TP_LABCURVE_RSTPRO_TOOLTIP;Works on the Chromaticity slider and the CC curve. TP_LENSGEOM_AUTOCROP;Auto-Crop TP_LENSGEOM_FILL;Auto-fill TP_LENSGEOM_LABEL;Lens / Geometry +TP_LENSGEOM_LIN;Linear +TP_LENSGEOM_LOG;Logarithmic TP_LENSPROFILE_CORRECTION_AUTOMATCH;Automatically selected TP_LENSPROFILE_CORRECTION_LCPFILE;LCP file TP_LENSPROFILE_CORRECTION_MANUAL;Manually selected diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index c9f88caf0..39f270d51 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -1054,7 +1054,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0; const bool darkening = (params->vignetting.amount <= 0.0); - const bool useLog = params->pdsharpening.enabled && highQuality; + const bool useLog = params->commonTrans.method == "log" && highQuality; const double centerFactorx = cx - w2; const double centerFactory = cy - h2; @@ -1215,7 +1215,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap, bool useOriginalBuffer) { assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable()); - const bool useLog = params->pdsharpening.enabled; + const bool useLog = params->commonTrans.method == "log"; float** chOrig[3]; chOrig[0] = original->r.ptrs; diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc index ba6fc237b..99f4f4404 100644 --- a/rtengine/procparams.cc +++ b/rtengine/procparams.cc @@ -1732,13 +1732,14 @@ bool CoarseTransformParams::operator !=(const CoarseTransformParams& other) cons } CommonTransformParams::CommonTransformParams() : + method("log"), autofill(true) { } bool CommonTransformParams::operator ==(const CommonTransformParams& other) const { - return autofill == other.autofill; + return method == other.method && autofill == other.autofill; } bool CommonTransformParams::operator !=(const CommonTransformParams& other) const @@ -3322,6 +3323,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo saveToKeyfile(!pedited || pedited->coarse.vflip, "Coarse Transformation", "VerticalFlip", coarse.vflip, keyFile); // Common properties for transformations + saveToKeyfile(!pedited || pedited->commonTrans.method, "Common Properties for Transformations", "Method", commonTrans.method, keyFile); saveToKeyfile(!pedited || pedited->commonTrans.autofill, "Common Properties for Transformations", "AutoFill", commonTrans.autofill, keyFile); // Rotation @@ -4360,6 +4362,11 @@ int ProcParams::load(const Glib::ustring& fname, ParamsEdited* pedited) } if (keyFile.has_group("Common Properties for Transformations")) { + if (keyFile.has_key("Common Properties for Transformations", "Method")) { + assignFromKeyfile(keyFile, "Common Properties for Transformations", "Method", pedited, commonTrans.method, pedited->commonTrans.method); + } else { + commonTrans.method = "lin"; + } assignFromKeyfile(keyFile, "Common Properties for Transformations", "AutoFill", pedited, commonTrans.autofill, pedited->commonTrans.autofill); } diff --git a/rtengine/procparams.h b/rtengine/procparams.h index c41e55872..0b6b2dc46 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -837,6 +837,7 @@ struct CoarseTransformParams { * Common transformation parameters */ struct CommonTransformParams { + Glib::ustring method; bool autofill; CommonTransformParams(); diff --git a/rtgui/lensgeom.cc b/rtgui/lensgeom.cc index 76e0635eb..762726107 100644 --- a/rtgui/lensgeom.cc +++ b/rtgui/lensgeom.cc @@ -17,6 +17,8 @@ * along with RawTherapee. If not, see . */ #include "lensgeom.h" + +#include "eventmapper.h" #include "guiutils.h" #include "rtimage.h" @@ -28,6 +30,18 @@ using namespace rtengine::procparams; LensGeometry::LensGeometry () : FoldableToolPanel(this, "lensgeom", M("TP_LENSGEOM_LABEL")), rlistener(nullptr), lastFill(false) { + auto m = ProcEventMapper::getInstance(); + EvTransMethod = m->newEvent(TRANSFORM, "HISTORY_MSG_TRANS_METHOD"); + + Gtk::HBox* hb1 = Gtk::manage (new Gtk::HBox ()); + hb1->pack_start (*Gtk::manage (new Gtk::Label ( M("TP_RAW_DMETHOD") + ": ")), Gtk::PACK_SHRINK, 4); + method = Gtk::manage (new MyComboBoxText ()); + method->append(M("TP_LENSGEOM_LOG")); + method->append(M("TP_LENSGEOM_LIN")); + method->set_active(0); + hb1->pack_end (*method, Gtk::PACK_EXPAND_WIDGET, 4); + pack_start( *hb1, Gtk::PACK_SHRINK, 4); + fill = Gtk::manage (new Gtk::CheckButton (M("TP_LENSGEOM_FILL"))); pack_start (*fill); @@ -39,8 +53,9 @@ LensGeometry::LensGeometry () : FoldableToolPanel(this, "lensgeom", M("TP_LENSGE packBox = Gtk::manage (new ToolParamBlock ()); pack_start (*packBox); - autoCrop->signal_pressed().connect( sigc::mem_fun(*this, &LensGeometry::autoCropPressed) ); - fillConn = fill->signal_toggled().connect( sigc::mem_fun(*this, &LensGeometry::fillPressed) ); + method->connect(method->signal_changed().connect(sigc::mem_fun(*this, &LensGeometry::methodChanged))); + autoCrop->signal_pressed().connect(sigc::mem_fun(*this, &LensGeometry::autoCropPressed)); + fillConn = fill->signal_toggled().connect(sigc::mem_fun(*this, &LensGeometry::fillPressed)); fill->set_active (true); show_all (); @@ -55,8 +70,14 @@ void LensGeometry::read (const ProcParams* pp, const ParamsEdited* pedited) { disableListener (); + method->block (true); + method->set_active(pp->commonTrans.method == "log" ? 0 : 1); if (pedited) { + if(!pedited->commonTrans.method) { + method->set_active_text(M("GENERAL_UNCHANGED")); + } + fill->set_inconsistent (!pedited->commonTrans.autofill); } @@ -67,15 +88,20 @@ void LensGeometry::read (const ProcParams* pp, const ParamsEdited* pedited) lastFill = pp->commonTrans.autofill; + method->block (false); enableListener (); } void LensGeometry::write (ProcParams* pp, ParamsEdited* pedited) { - + int currentRow = method->get_active_row_number(); + if( currentRow >= 0 && method->get_active_text() != M("GENERAL_UNCHANGED")) { + pp->commonTrans.method = currentRow == 0 ? "log" : "lin"; + } pp->commonTrans.autofill = fill->get_active (); if (pedited) { + pedited->commonTrans.method = method->get_active_text() != M("GENERAL_UNCHANGED"); pedited->commonTrans.autofill = !fill->get_inconsistent(); } } @@ -115,6 +141,14 @@ void LensGeometry::fillPressed () } } +void LensGeometry::methodChanged () +{ + + if (listener && method->get_active_row_number() >= 0) { + listener->panelChanged(EvTransMethod, method->get_active_text()); + } +} + void LensGeometry::setBatchMode (bool batchMode) { diff --git a/rtgui/lensgeom.h b/rtgui/lensgeom.h index 18b31a619..73c28b006 100644 --- a/rtgui/lensgeom.h +++ b/rtgui/lensgeom.h @@ -29,6 +29,7 @@ class LensGeometry final : { protected: + MyComboBoxText* method; Gtk::Button* autoCrop; LensGeomListener* rlistener; Gtk::CheckButton* fill; @@ -36,6 +37,7 @@ protected: sigc::connection fillConn; ToolParamBlock* packBox; + rtengine::ProcEvent EvTransMethod; public: LensGeometry (); @@ -50,6 +52,7 @@ public: void write (rtengine::procparams::ProcParams* pp, ParamsEdited* pedited = nullptr) override; void setBatchMode (bool batchMode) override; + void methodChanged(); void fillPressed (); void autoCropPressed (); void setLensGeomListener (LensGeomListener* l) diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc index 82132008a..d41bd472c 100644 --- a/rtgui/paramsedited.cc +++ b/rtgui/paramsedited.cc @@ -321,6 +321,7 @@ void ParamsEdited::set(bool v) coarse.rotate = v; coarse.hflip = v; coarse.vflip = v; + commonTrans.method = v; commonTrans.autofill = v; rotate.degree = v; distortion.amount = v; @@ -904,6 +905,7 @@ void ParamsEdited::initFrom(const std::vector& coarse.rotate = coarse.rotate && p.coarse.rotate == other.coarse.rotate; coarse.hflip = coarse.hflip && p.coarse.hflip == other.coarse.hflip; coarse.vflip = coarse.vflip && p.coarse.vflip == other.coarse.vflip; + commonTrans.method = commonTrans.method && p.commonTrans.method == other.commonTrans.method; commonTrans.autofill = commonTrans.autofill && p.commonTrans.autofill == other.commonTrans.autofill; rotate.degree = rotate.degree && p.rotate.degree == other.rotate.degree; distortion.amount = distortion.amount && p.distortion.amount == other.distortion.amount; @@ -2265,6 +2267,10 @@ void ParamsEdited::combine(rtengine::procparams::ProcParams& toEdit, const rteng toEdit.coarse.vflip = mods.coarse.vflip; } + if (commonTrans.method) { + toEdit.commonTrans.method = mods.commonTrans.method; + } + if (commonTrans.autofill) { toEdit.commonTrans.autofill = mods.commonTrans.autofill; } diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h index 01a3e4efe..153dd7bb6 100644 --- a/rtgui/paramsedited.h +++ b/rtgui/paramsedited.h @@ -365,6 +365,7 @@ struct CoarseTransformParamsEdited { }; struct CommonTransformParamsEdited { + bool method; bool autofill; }; From 7c2aa2405b497027fbcd8865f6449ab919aa6813 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 2 Jan 2020 23:01:37 +0100 Subject: [PATCH 10/22] Startup crashes in LensProfilePanel::LFDbHelper, fixes #5577, thanks to @dlichtenberger for the fix --- rtgui/lensprofile.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rtgui/lensprofile.cc b/rtgui/lensprofile.cc index be21512d5..1a8391f66 100644 --- a/rtgui/lensprofile.cc +++ b/rtgui/lensprofile.cc @@ -586,6 +586,9 @@ void LensProfilePanel::onCorrModeChanged(const Gtk::RadioButton* rbChanged) LensProfilePanel::LFDbHelper::LFDbHelper() { + lensfunCameraModel = Gtk::TreeStore::create(lensfunModelCam); + lensfunLensModel = Gtk::TreeStore::create(lensfunModelLens); + #ifdef _OPENMP #pragma omp parallel sections if (!settings->verbose) #endif @@ -594,14 +597,12 @@ LensProfilePanel::LFDbHelper::LFDbHelper() #pragma omp section #endif { - lensfunCameraModel = Gtk::TreeStore::create(lensfunModelCam); fillLensfunCameras(); } #ifdef _OPENMP #pragma omp section #endif { - lensfunLensModel = Gtk::TreeStore::create(lensfunModelLens); fillLensfunLenses(); } } From e07ff4032b23198d6cd33717888ab8484d895744 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 3 Jan 2020 11:35:20 +0100 Subject: [PATCH 11/22] log transform: Fix segfault --- rtengine/iptransform.cc | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 39f270d51..29e10035e 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -1217,15 +1217,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable()); const bool useLog = params->commonTrans.method == "log"; - float** chOrig[3]; - chOrig[0] = original->r.ptrs; - chOrig[1] = original->g.ptrs; - chOrig[2] = original->b.ptrs; - - float** chTrans[3]; - chTrans[0] = transformed->r.ptrs; - chTrans[1] = transformed->g.ptrs; - chTrans[2] = transformed->b.ptrs; + float** chTrans[3] = {transformed->r.ptrs, transformed->g.ptrs, transformed->b.ptrs}; std::unique_ptr tempLog; if (useLog) { @@ -1237,6 +1229,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans logEncode(original, original, multiThread); } } + float** chOrig[3] = {original->r.ptrs, original->g.ptrs, original->b.ptrs}; #ifdef _OPENMP #pragma omp parallel for if (multiThread) @@ -1276,7 +1269,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans if (!useLog) { chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); } else { - chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); + chTrans[c][y][x] = xexpf(chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); } } } else { From cabbf8c229464eb52c90a0ebf19606026793bba9 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 3 Jan 2020 20:23:34 +0100 Subject: [PATCH 12/22] guidedfilter: direct copy when no rescaling is needed, copied from ART --- rtengine/guidedfilter.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc index 6b2adb773..ad3beec51 100644 --- a/rtengine/guidedfilter.cc +++ b/rtengine/guidedfilter.cc @@ -106,7 +106,18 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 const auto f_subsample = [multithread](array2D &d, const array2D &s) -> void { - rescaleBilinear(s, d, multithread); + if (d.width() == s.width() && d.height() == s.height()) { +#ifdef _OPENMP + #pragma omp parallel for if (multithread) +#endif + for (int y = 0; y < s.height(); ++y) { + for (int x = 0; x < s.width(); ++x) { + d[y][x] = s[y][x]; + } + } + } else { + rescaleBilinear(s, d, multithread); + } }; const auto f_mean = From 525283650306d1ced6f6df5ab2625630ea12c7df Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 4 Jan 2020 11:28:05 +0100 Subject: [PATCH 13/22] filterpanel: make full use of the screen vertical space, copied from ART --- rtgui/filterpanel.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rtgui/filterpanel.cc b/rtgui/filterpanel.cc index 301a7b188..2c4fc52d2 100644 --- a/rtgui/filterpanel.cc +++ b/rtgui/filterpanel.cc @@ -14,7 +14,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with RawTherapee. If not, see . + * along with RawTherapee. If not, see . */ #include "filterpanel.h" #include "multilangmgr.h" @@ -107,8 +107,8 @@ FilterPanel::FilterPanel () : listener (nullptr) scamera->set_policy(Gtk::POLICY_AUTOMATIC, Gtk::POLICY_ALWAYS); scamera->set_size_request(-1, 80); scamera->add(*camera); - cvb->pack_start (*scamera, Gtk::PACK_SHRINK, 0); - pack_start (*cvb, Gtk::PACK_SHRINK, 4); + cvb->pack_start (*scamera, Gtk::PACK_EXPAND_WIDGET, 0); + pack_start (*cvb, Gtk::PACK_EXPAND_WIDGET, 4); enaLens = Gtk::manage(new Gtk::CheckButton(M("EXIFFILTER_LENS") + ":")); Gtk::VBox* lvb = Gtk::manage(new Gtk::VBox ()); @@ -119,8 +119,8 @@ FilterPanel::FilterPanel () : listener (nullptr) slens->set_policy(Gtk::POLICY_AUTOMATIC, Gtk::POLICY_ALWAYS); slens->set_size_request(-1, 80); slens->add(*lens); - lvb->pack_start (*slens, Gtk::PACK_SHRINK, 0); - pack_start (*lvb, Gtk::PACK_SHRINK, 4); + lvb->pack_start (*slens, Gtk::PACK_EXPAND_WIDGET, 0); + pack_start (*lvb, Gtk::PACK_EXPAND_WIDGET, 4); enaFiletype = Gtk::manage(new Gtk::CheckButton(M("EXIFFILTER_FILETYPE") + ":")); Gtk::VBox* ftvb = Gtk::manage(new Gtk::VBox ()); @@ -131,8 +131,8 @@ FilterPanel::FilterPanel () : listener (nullptr) sfiletype->set_policy(Gtk::POLICY_AUTOMATIC, Gtk::POLICY_ALWAYS); sfiletype->set_size_request(-1, 80); sfiletype->add(*filetype); - ftvb->pack_start (*sfiletype, Gtk::PACK_SHRINK, 0); - pack_start (*ftvb, Gtk::PACK_SHRINK, 4); + ftvb->pack_start (*sfiletype, Gtk::PACK_EXPAND_WIDGET, 0); + pack_start (*ftvb, Gtk::PACK_EXPAND_WIDGET, 4); // add panel ending Gtk::VBox* vboxpe = Gtk::manage (new Gtk::VBox ()); From 0edac2dac752ad75656817140b8492812ab2dd4d Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 6 Jan 2020 19:01:30 +0100 Subject: [PATCH 14/22] Speedup for scalar xlog functions --- rtengine/sleef.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rtengine/sleef.h b/rtengine/sleef.h index 1a953953c..b7655258b 100644 --- a/rtengine/sleef.h +++ b/rtengine/sleef.h @@ -532,7 +532,7 @@ __inline double xlog(double d) { x = x * t + 0.693147180559945286226764 * e; - if (xisinf(d)) x = rtengine::RT_INFINITY; + if (xispinf(d)) x = rtengine::RT_INFINITY; if (d < 0) x = rtengine::RT_NAN; if (d == 0) x = -rtengine::RT_INFINITY; @@ -864,7 +864,7 @@ __inline double xlog10(double a) { double2 d = mul_dd(logk(a), dd(0.43429448190325176116, 6.6494347733425473126e-17)); double x = d.x + d.y; - if (xisinf(a)) x = rtengine::RT_INFINITY; + if (xispinf(a)) x = rtengine::RT_INFINITY; if (a < 0) x = rtengine::RT_NAN; if (a == 0) x = -rtengine::RT_INFINITY; @@ -875,7 +875,7 @@ __inline double xlog1p(double a) { double2 d = logk2(add2_ss(a, 1)); double x = d.x + d.y; - if (xisinf(a)) x = rtengine::RT_INFINITY; + if (xispinf(a)) x = rtengine::RT_INFINITY; if (a < -1) x = rtengine::RT_NAN; if (a == -1) x = -rtengine::RT_INFINITY; @@ -1208,7 +1208,7 @@ __inline float xlogf(float d) { x = x * t + 0.693147180559945286226764f * e; - if (xisinff(d)) x = rtengine::RT_INFINITY_F; + if (xispinff(d)) x = rtengine::RT_INFINITY_F; if (d < 0) x = rtengine::RT_NAN_F; if (d == 0) x = -rtengine::RT_INFINITY_F; @@ -1233,7 +1233,7 @@ __inline float xlogf1(float d) { // does xlogf(vmaxf(d, 1.f)) but faster x = x * t + 0.693147180559945286226764f * e; - if (xisinff(d)) x = rtengine::RT_INFINITY_F; + if (xispinff(d)) x = rtengine::RT_INFINITY_F; if (d <= 1.f) x = 0; return x; From 355fd5a44a2b740292406db61f038224cc8df88d Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 10 Jan 2020 14:25:51 +0100 Subject: [PATCH 15/22] Capture sharpening: add vectorization hints for clang --- rtengine/capturesharpening.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index e5bfde555..4beb4091a 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -131,6 +131,9 @@ inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RES const float c00 = kernel[1][1]; for (int i = 1; i < tileSize - 1; i++) { +#ifdef __clang__ + #pragma clang loop vectorize(assume_safety) +#endif for (int j = 1; j < tileSize - 1; j++) { const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + @@ -151,6 +154,9 @@ inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RES for (int i = 2; i < tileSize - 2; ++i) { // I tried hand written SSE code but gcc vectorizes better +#ifdef __clang__ + #pragma clang loop vectorize(assume_safety) +#endif for (int j = 2; j < tileSize - 2; ++j) { const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) + c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) + @@ -177,6 +183,9 @@ inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** REST for (int i = 3; i < tileSize - 3; ++i) { // I tried hand written SSE code but gcc vectorizes better +#ifdef __clang__ + #pragma clang loop vectorize(assume_safety) +#endif for (int j = 3; j < tileSize - 3; ++j) { const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) + c30 * (src[i - 3][j] + src[i][j - 3] + src[i][j + 3] + src[i + 3][j]) + @@ -199,6 +208,9 @@ inline void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int t const float c00 = kernel[1][1]; for (int i = 1; i < tileSize - 1; i++) { +#ifdef __clang__ + #pragma clang loop vectorize(assume_safety) +#endif for (int j = 1; j < tileSize - 1; j++) { const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + @@ -220,6 +232,9 @@ inline void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int for (int i = 2; i < tileSize - 2; ++i) { // I tried hand written SSE code but gcc vectorizes better +#ifdef __clang__ + #pragma clang loop vectorize(assume_safety) +#endif for (int j = 2; j < tileSize - 2; ++j) { const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) + c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) + @@ -246,6 +261,9 @@ inline void gauss7x7mult(float** RESTRICT src, float** RESTRICT dst, const int t for (int i = 3; i < tileSize - 3; ++i) { // I tried hand written SSE code but gcc vectorizes better +#ifdef __clang__ + #pragma clang loop vectorize(assume_safety) +#endif for (int j = 3; j < tileSize - 3; ++j) { const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) + c30 * (src[i - 3][j] + src[i][j - 3] + src[i][j + 3] + src[i + 3][j]) + From 09b1a5fada6b9d12f271e0ab03bfa4df4bea31d9 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 10 Jan 2020 19:45:59 +0100 Subject: [PATCH 16/22] Capture sharpening: add vectorization hints for gcc --- rtengine/capturesharpening.cc | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index 4beb4091a..f8067ac0b 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -131,8 +131,10 @@ inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RES const float c00 = kernel[1][1]; for (int i = 1; i < tileSize - 1; i++) { -#ifdef __clang__ +#if defined(__clang__) #pragma clang loop vectorize(assume_safety) +#elif defined(__GNUC__) + #pragma GCC ivdep #endif for (int j = 1; j < tileSize - 1; j++) { const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + @@ -154,8 +156,10 @@ inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RES for (int i = 2; i < tileSize - 2; ++i) { // I tried hand written SSE code but gcc vectorizes better -#ifdef __clang__ +#if defined(__clang__) #pragma clang loop vectorize(assume_safety) +#elif defined(__GNUC__) + #pragma GCC ivdep #endif for (int j = 2; j < tileSize - 2; ++j) { const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) + @@ -183,8 +187,10 @@ inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** REST for (int i = 3; i < tileSize - 3; ++i) { // I tried hand written SSE code but gcc vectorizes better -#ifdef __clang__ +#if defined(__clang__) #pragma clang loop vectorize(assume_safety) +#elif defined(__GNUC__) + #pragma GCC ivdep #endif for (int j = 3; j < tileSize - 3; ++j) { const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) + @@ -208,8 +214,10 @@ inline void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int t const float c00 = kernel[1][1]; for (int i = 1; i < tileSize - 1; i++) { -#ifdef __clang__ +#if defined(__clang__) #pragma clang loop vectorize(assume_safety) +#elif defined(__GNUC__) + #pragma GCC ivdep #endif for (int j = 1; j < tileSize - 1; j++) { const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + @@ -232,8 +240,10 @@ inline void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int for (int i = 2; i < tileSize - 2; ++i) { // I tried hand written SSE code but gcc vectorizes better -#ifdef __clang__ +#if defined(__clang__) #pragma clang loop vectorize(assume_safety) +#elif defined(__GNUC__) + #pragma GCC ivdep #endif for (int j = 2; j < tileSize - 2; ++j) { const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) + @@ -261,8 +271,10 @@ inline void gauss7x7mult(float** RESTRICT src, float** RESTRICT dst, const int t for (int i = 3; i < tileSize - 3; ++i) { // I tried hand written SSE code but gcc vectorizes better -#ifdef __clang__ +#if defined(__clang__) #pragma clang loop vectorize(assume_safety) +#elif defined(__GNUC__) + #pragma GCC ivdep #endif for (int j = 3; j < tileSize - 3; ++j) { const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) + From 2ff619e5e2832df25658186905bc89353351abd0 Mon Sep 17 00:00:00 2001 From: Desmis Date: Sat, 11 Jan 2020 08:23:15 +0100 Subject: [PATCH 17/22] Fixed bad behavior gamma slider tone-mapping --- rtgui/epd.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtgui/epd.cc b/rtgui/epd.cc index 307790ff7..6544b7131 100644 --- a/rtgui/epd.cc +++ b/rtgui/epd.cc @@ -69,6 +69,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite setEnabled(pp->epd.enabled); strength->set_sensitive (true); + gamma->set_sensitive (true); if(pp->wavelet.enabled) { if(pp->wavelet.tmrs == 0) { @@ -98,6 +99,7 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited) pp->epd.reweightingIterates = reweightingIterates->getValue(); pp->epd.enabled = getEnabled(); strength->set_sensitive (true); + gamma->set_sensitive (true); if(pp->wavelet.enabled) { if(pp->wavelet.tmrs == 0) { From 1a02f16c35ff7e366010c1170637b7b7a9dff5a0 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 11 Jan 2020 11:20:41 +0100 Subject: [PATCH 18/22] crash in Tab: transform, fixes #5604 --- rtengine/iptransform.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 29e10035e..9e7c61dcf 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -1006,11 +1006,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv); } - const std::array chOrig = { - original->r.ptrs, - original->g.ptrs, - original->b.ptrs - }; const std::array chTrans = { transformed->r.ptrs, transformed->g.ptrs, @@ -1068,6 +1063,13 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I logEncode(original, original, multiThread); } } + + const std::array chOrig = { + original->r.ptrs, + original->g.ptrs, + original->b.ptrs + }; + // main cycle #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) if(multiThread) From d8564cb65241c34d1f083a2ddde339b19de74291 Mon Sep 17 00:00:00 2001 From: Desmis Date: Sat, 11 Jan 2020 16:25:50 +0100 Subject: [PATCH 19/22] another fix for wavelet <=> tone mapping --- rtgui/epd.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rtgui/epd.cc b/rtgui/epd.cc index 6544b7131..a620f4468 100644 --- a/rtgui/epd.cc +++ b/rtgui/epd.cc @@ -71,11 +71,11 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite strength->set_sensitive (true); gamma->set_sensitive (true); - if(pp->wavelet.enabled) { - if(pp->wavelet.tmrs == 0) { + if(pp->wavelet.enabled) { + if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") { strength->set_sensitive (true); gamma->set_sensitive (true); - } else { + } else if(pp->wavelet.tmrs != 0 && pp->wavelet.TMmethod == "tm") { strength->set_sensitive (false); gamma->set_sensitive (false); } @@ -101,11 +101,11 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited) strength->set_sensitive (true); gamma->set_sensitive (true); - if(pp->wavelet.enabled) { - if(pp->wavelet.tmrs == 0) { + if(pp->wavelet.enabled) { + if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") { strength->set_sensitive (true); gamma->set_sensitive (true); - } else { + } else if(pp->wavelet.tmrs != 0 && pp->wavelet.TMmethod == "tm") { strength->set_sensitive (false); gamma->set_sensitive (false); } From 01fbc2eddff2dfff5f84da88eec27cc160be70e2 Mon Sep 17 00:00:00 2001 From: Desmis Date: Sun, 12 Jan 2020 08:03:52 +0100 Subject: [PATCH 20/22] Suppress interaction between wavelet and tone-mapping --- rtengine/improcfun.cc | 8 ++++---- rtgui/epd.cc | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 59fb0f016..d5c69773e 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -5131,11 +5131,11 @@ void ImProcFunctions::EPDToneMapCIE (CieImage *ncie, float a_w, float c_, int Wi if (!params->epd.enabled) { return; } - +/* if (params->wavelet.enabled && params->wavelet.tmrs != 0) { return; } - +*/ float stren = params->epd.strength; float edgest = params->epd.edgeStopping; float sca = params->epd.scale; @@ -5245,11 +5245,11 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip if (!params->epd.enabled) { return; } - +/* if (params->wavelet.enabled && params->wavelet.tmrs != 0) { return; } - +*/ float stren = params->epd.strength; float edgest = params->epd.edgeStopping; float sca = params->epd.scale; diff --git a/rtgui/epd.cc b/rtgui/epd.cc index a620f4468..b13effba3 100644 --- a/rtgui/epd.cc +++ b/rtgui/epd.cc @@ -70,7 +70,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite setEnabled(pp->epd.enabled); strength->set_sensitive (true); gamma->set_sensitive (true); - +/* if(pp->wavelet.enabled) { if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") { strength->set_sensitive (true); @@ -80,7 +80,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite gamma->set_sensitive (false); } } - +*/ strength->setValue(pp->epd.strength); gamma->setValue(pp->epd.gamma); edgeStopping->setValue(pp->epd.edgeStopping); @@ -100,7 +100,7 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited) pp->epd.enabled = getEnabled(); strength->set_sensitive (true); gamma->set_sensitive (true); - +/* if(pp->wavelet.enabled) { if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") { strength->set_sensitive (true); @@ -110,7 +110,7 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited) gamma->set_sensitive (false); } } - +*/ if(pedited) { pedited->epd.strength = strength->getEditedState(); pedited->epd.gamma = gamma->getEditedState(); From 54bce4af64a597732d2964d8ea40169322a0214c Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 12 Jan 2020 19:36:37 +0100 Subject: [PATCH 21/22] Capture sharpening: minor speedups --- rtengine/capturesharpening.cc | 91 +++++++++++++++++------------------ 1 file changed, 44 insertions(+), 47 deletions(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index f8067ac0b..21217918e 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -26,7 +26,7 @@ #include "procparams.h" #include "color.h" #include "rt_algo.h" -//#define BENCHMARK +#define BENCHMARK #include "StopWatch.h" #include "opthelper.h" #include "../rtgui/multilangmgr.h" @@ -525,28 +525,25 @@ float calcRadiusXtrans(const float * const *rawData, int W, int H, float lowerLi bool checkForStop(float** tmpIThr, float** iterCheck, int fullTileSize, int border) { - bool stopped = false; - for (int ii = border; !stopped && ii < fullTileSize - border; ++ii) { + for (int ii = border; ii < fullTileSize - border; ++ii) { #ifdef __SSE2__ for (int jj = border; jj < fullTileSize - border; jj += 4) { - if (_mm_movemask_ps((vfloat)vmaskf_lt(LVFU(tmpIThr[ii][jj]), LVFU(iterCheck[ii - border][jj - border])))) { - stopped = true; - break; + if (UNLIKELY(_mm_movemask_ps((vfloat)vmaskf_lt(LVFU(tmpIThr[ii][jj]), LVFU(iterCheck[ii - border][jj - border]))))) { + return true; } } #else for (int jj = border; jj < fullTileSize - border; ++jj) { if (tmpIThr[ii][jj] < iterCheck[ii - border][jj - border]) { - stopped = true; - break; + return true; } } #endif } - return stopped; + return false; } -void CaptureDeconvSharpening (float ** clipmask, float** luminance, float** oldLuminance, const float * const * blend, int W, int H, double sigma, double sigmaCornerOffset, int iterations, bool checkIterStop, rtengine::ProgressListener* plistener, double startVal, double endVal) +void CaptureDeconvSharpening (float** luminance, const float* const * oldLuminance, const float * const * blend, int W, int H, double sigma, double sigmaCornerOffset, int iterations, bool checkIterStop, rtengine::ProgressListener* plistener, double startVal, double endVal) { BENCHFUN const bool is5x5 = (sigma <= 0.84 && sigmaCornerOffset == 0.0); @@ -571,6 +568,7 @@ BENCHFUN double progress = startVal; const double progressStep = (endVal - startVal) * rtengine::SQR(tileSize) / (W * H); + constexpr float minBlend = 0.01f; #ifdef _OPENMP @@ -597,14 +595,14 @@ BENCHFUN if (checkIterStop) { for (int k = 0, ii = endOfCol ? H - fullTileSize + border : i; k < tileSize; ++k, ++ii) { for (int l = 0, jj = endOfRow ? W - fullTileSize + border : j; l < tileSize; ++l, ++jj) { - iterCheck[k][l] = oldLuminance[ii][jj] * clipmask[ii][jj] * 0.5f; - maxVal = std::max(maxVal, clipmask[ii][jj]); + iterCheck[k][l] = oldLuminance[ii][jj] * blend[ii][jj] * 0.5f; + maxVal = std::max(maxVal, blend[ii][jj]); } } } else { for (int k = 0, ii = endOfCol ? H - fullTileSize + border : i; k < tileSize; ++k, ++ii) { for (int l = 0, jj = endOfRow ? W - fullTileSize + border : j; l < tileSize; ++l, ++jj) { - maxVal = std::max(maxVal, clipmask[ii][jj]); + maxVal = std::max(maxVal, blend[ii][jj]); } } } @@ -623,14 +621,14 @@ BENCHFUN if (checkIterStop) { for (int ii = 0; ii < tileSize; ++ii) { for (int jj = 0; jj < tileSize; ++jj) { - iterCheck[ii][jj] = oldLuminance[i + ii][j + jj] * clipmask[i + ii][j + jj] * 0.5f; - maxVal = std::max(maxVal, clipmask[i + ii][j + jj]); + iterCheck[ii][jj] = oldLuminance[i + ii][j + jj] * blend[i + ii][j + jj] * 0.5f; + maxVal = std::max(maxVal, blend[i + ii][j + jj]); } } } else { for (int ii = 0; ii < tileSize; ++ii) { for (int jj = 0; jj < tileSize; ++jj) { - maxVal = std::max(maxVal, clipmask[i + ii][j + jj]); + maxVal = std::max(maxVal, blend[i + ii][j + jj]); } } } @@ -645,23 +643,22 @@ BENCHFUN } } } - bool stopped = false; if (is3x3) { - for (int k = 0; k < iterations && !stopped; ++k) { + for (int k = 0; k < iterations; ++k) { // apply 3x3 gaussian blur and divide luminance by result of gaussian blur gauss3x3div(tmpIThr, tmpThr, lumThr, fullTileSize, kernel3); gauss3x3mult(tmpThr, tmpIThr, fullTileSize, kernel3); - if (checkIterStop) { - stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border); + if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) { + break; } } } else if (is5x5) { - for (int k = 0; k < iterations && !stopped; ++k) { + for (int k = 0; k < iterations; ++k) { // apply 5x5 gaussian blur and divide luminance by result of gaussian blur gauss5x5div(tmpIThr, tmpThr, lumThr, fullTileSize, kernel5); gauss5x5mult(tmpThr, tmpIThr, fullTileSize, kernel5); - if (checkIterStop) { - stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border); + if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) { + break; } } } else { @@ -672,34 +669,34 @@ BENCHFUN if (sigmaTile > 0.84) { // have to use 7x7 kernel float lkernel7[7][7]; compute7x7kernel(static_cast(sigma) + distanceFactor * distance, lkernel7); - for (int k = 0; k < iterations && !stopped; ++k) { + for (int k = 0; k < iterations; ++k) { // apply 7x7 gaussian blur and divide luminance by result of gaussian blur gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, lkernel7); gauss7x7mult(tmpThr, tmpIThr, fullTileSize, lkernel7); - if (checkIterStop) { - stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border); + if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) { + break; } } } else { // can use 5x5 kernel - float lkernel7[5][5]; - compute5x5kernel(static_cast(sigma) + distanceFactor * distance, lkernel7); - for (int k = 0; k < iterations && !stopped; ++k) { + float lkernel5[5][5]; + compute5x5kernel(static_cast(sigma) + distanceFactor * distance, lkernel5); + for (int k = 0; k < iterations; ++k) { // apply 7x7 gaussian blur and divide luminance by result of gaussian blur - gauss5x5div(tmpIThr, tmpThr, lumThr, fullTileSize, lkernel7); - gauss5x5mult(tmpThr, tmpIThr, fullTileSize, lkernel7); - if (checkIterStop) { - stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border); + gauss5x5div(tmpIThr, tmpThr, lumThr, fullTileSize, lkernel5); + gauss5x5mult(tmpThr, tmpIThr, fullTileSize, lkernel5); + if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) { + break; } } } } } else { - for (int k = 0; k < iterations && !stopped; ++k) { + for (int k = 0; k < iterations; ++k) { // apply 7x7 gaussian blur and divide luminance by result of gaussian blur gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, kernel7); gauss7x7mult(tmpThr, tmpIThr, fullTileSize, kernel7); - if (checkIterStop) { - stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border); + if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) { + break; } } } @@ -719,12 +716,12 @@ BENCHFUN } } if (plistener) { - if (++progresscounter % 16 == 0) { + if (++progresscounter % 32 == 0) { #ifdef _OPENMP #pragma omp critical(csprogress) #endif { - progress += 16.0 * progressStep; + progress += 32.0 * progressStep; progress = rtengine::min(progress, endVal); plistener->setProgress(progress); } @@ -751,6 +748,7 @@ void RawImageSource::captureSharpening(const procparams::CaptureSharpeningParams plistener->setProgress(0.0); } BENCHFUN + constexpr float xyz_rgb[3][3] = { // XYZ from RGB { 0.412453, 0.357580, 0.180423 }, { 0.212671, 0.715160, 0.072169 }, @@ -829,8 +827,7 @@ BENCHFUN plistener->setProgress(0.1); } - array2D& blend = red; // red will be overridden anyway => we can use its buffer to store the blend mask - buildBlendMask(L, blend, W, H, contrast, sharpeningParams.autoContrast, clipMask); + buildBlendMask(L, clipMask, W, H, contrast, sharpeningParams.autoContrast, clipMask); if (plistener) { plistener->setProgress(0.2); } @@ -840,7 +837,7 @@ BENCHFUN #endif for (int i = 0; i < H; ++i) { for (int j = 0; j < W; ++j) { - red[i][j] = green[i][j] = blue[i][j] = blend[i][j] * 16384.f; + red[i][j] = green[i][j] = blue[i][j] = clipMask[i][j] * 16384.f; } } if (plistener) { @@ -877,18 +874,18 @@ BENCHFUN if (plistener) { plistener->setProgress(0.1); } + // calculate contrast based blend factors to reduce sharpening in regions with low contrast - array2D& blend = clipMask; // we can share blend and clipMask buffer here - buildBlendMask(L, blend, W, H, contrast, sharpeningParams.autoContrast, clipMask); + buildBlendMask(L, clipMask, W, H, contrast, sharpeningParams.autoContrast, clipMask); if (plistener) { plistener->setProgress(0.2); } conrastThreshold = contrast * 100.f; - - CaptureDeconvSharpening(clipMask, YNew, YOld, blend, W, H, radius, sharpeningParams.deconvradiusOffset, sharpeningParams.deconviter, sharpeningParams.deconvitercheck, plistener, 0.2, 0.9); + CaptureDeconvSharpening(YNew, YOld, clipMask, W, H, radius, sharpeningParams.deconvradiusOffset, sharpeningParams.deconviter, sharpeningParams.deconvitercheck, plistener, 0.2, 0.9); if (plistener) { plistener->setProgress(0.9); } + #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) #endif @@ -896,7 +893,7 @@ BENCHFUN int j = 0; #ifdef __SSE2__ for (; j < W - 3; j += 4) { - const vfloat factor = vmaxf(LVFU(YNew[i][j]), ZEROV) / vmaxf(LVFU(YOld[i][j]), F2V(0.00001f)); + const vfloat factor = LVFU(YNew[i][j]) / vmaxf(LVFU(YOld[i][j]), F2V(0.00001f)); STVFU(red[i][j], LVFU(redVals[i][j]) * factor); STVFU(green[i][j], LVFU(greenVals[i][j]) * factor); STVFU(blue[i][j], LVFU(blueVals[i][j]) * factor); @@ -904,7 +901,7 @@ BENCHFUN #endif for (; j < W; ++j) { - const float factor = std::max(YNew[i][j], 0.f) / std::max(YOld[i][j], 0.00001f); + const float factor = YNew[i][j] / std::max(YOld[i][j], 0.00001f); red[i][j] = redVals[i][j] * factor; green[i][j] = greenVals[i][j] * factor; blue[i][j] = blueVals[i][j] * factor; From 4ee4888d5ba30536dcbbd3c67b68f36817c3b742 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 12 Jan 2020 19:38:37 +0100 Subject: [PATCH 22/22] Capture sharpening: Disable benchmark --- rtengine/capturesharpening.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc index 21217918e..d357eb59c 100644 --- a/rtengine/capturesharpening.cc +++ b/rtengine/capturesharpening.cc @@ -26,7 +26,7 @@ #include "procparams.h" #include "color.h" #include "rt_algo.h" -#define BENCHMARK +//#define BENCHMARK #include "StopWatch.h" #include "opthelper.h" #include "../rtgui/multilangmgr.h"