From d17f71eb72216ab0e7c196408064006ad45bf519 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Mon, 30 Dec 2019 15:27:17 +0100
Subject: [PATCH 01/22] Applying geometric transformations leads to dark
 artifacts in combination with capture sharpening, fixes #5588

---
 rtengine/iptransform.cc | 140 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 132 insertions(+), 8 deletions(-)

diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index af513536e..736fa0620 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -111,6 +111,31 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys,
     g = vhadd(weight * gv);
     b = vhadd(weight * bv);
 }
+
+inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
+{
+    constexpr float A = -0.85f;
+
+    // Vertical
+    const float t1Vert = A * (Dy - Dy * Dy);
+    const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
+    const vfloat w3Vert = F2V(t1Vert * Dy);
+    const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert);
+    const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
+    const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
+
+    const vfloat rv = (w0Vert * xlogf(LVFU(src->r(ys, xs))) + w1Vert * xlogf(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->r(ys + 3, xs))));
+    const vfloat gv = (w0Vert * xlogf(LVFU(src->g(ys, xs))) + w1Vert * xlogf(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->g(ys + 3, xs))));
+    const vfloat bv = (w0Vert * xlogf(LVFU(src->b(ys, xs))) + w1Vert * xlogf(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->b(ys + 3, xs))));
+
+    // Horizontal
+    const float t1Hor = A * (Dx - Dx * Dx);
+    const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
+    const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx));
+    r = mul * xexpf(vhadd(weight * rv));
+    g = mul * xexpf(vhadd(weight * gv));
+    b = mul * xexpf(vhadd(weight * bv));
+}
 #else
 inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
 {
@@ -143,6 +168,38 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys,
     g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor);
     b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor);
 }
+
+inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
+{
+    constexpr float A = -0.85f;
+
+    // Vertical
+    const float t1Vert = A * (Dy - Dy * Dy);
+    const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
+    const float w3Vert = t1Vert * Dy;
+    const float w2Vert = t1Vert * Dy - t1Vert + t2Vert;
+    const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert;
+    const float w0Vert = t1Vert - (t1Vert * Dy);
+
+    float rv[4], gv[4], bv[4];
+    for (int i = 0; i < 4; ++i) {
+        rv[i] = w0Vert * xlogf(src->r(ys, xs + i)) + w1Vert * xlogf(src->r(ys + 1, xs + i)) + w2Vert * xlogf(src->r(ys + 2, xs + i)) + w3Vert * xlogf(src->r(ys + 3, xs + i));
+        gv[i] = w0Vert * xlogf(src->g(ys, xs + i)) + w1Vert * xlogf(src->g(ys + 1, xs + i)) + w2Vert * xlogf(src->g(ys + 2, xs + i)) + w3Vert * xlogf(src->g(ys + 3, xs + i));
+        bv[i] = w0Vert * xlogf(src->b(ys, xs + i)) + w1Vert * xlogf(src->b(ys + 1, xs + i)) + w2Vert * xlogf(src->b(ys + 2, xs + i)) + w3Vert * xlogf(src->b(ys + 3, xs + i));
+    }
+
+    // Horizontal
+    const float t1Hor = A * (Dx - Dx * Dx);
+    const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
+    const float w3Hor = t1Hor * Dx;
+    const float w2Hor = t1Hor * Dx - t1Hor + t2Hor;
+    const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor;
+    const float w0Hor = t1Hor - (t1Hor * Dx);
+
+    r = mul * xexpf(rv[0] * w0Hor + rv[1] * w1Hor + rv[2] * w2Hor + rv[3] * w3Hor);
+    g = mul * xexpf(gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor);
+    b = mul * xexpf(bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor);
+}
 #endif
 #ifdef __SSE2__
 inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul)
@@ -165,6 +222,27 @@ inline void interpolateTransformChannelsCubic(const float* const* src, int xs, i
     const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx));
     dest = mul * vhadd(weight * cv);
 }
+
+inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul)
+{
+    constexpr float A = -0.85f;
+
+    // Vertical
+    const float t1Vert = A * (Dy - Dy * Dy);
+    const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
+    const vfloat w3Vert = F2V(t1Vert * Dy);
+    const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert);
+    const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
+    const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
+
+    const vfloat cv = (w0Vert * xlogf(LVFU(src[ys][xs])) + w1Vert * xlogf(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf(LVFU(src[ys + 2][xs])) + w3Vert * xlogf(LVFU(src[ys + 3][xs])));
+
+    // Horizontal
+    const float t1Hor = A * (Dx - Dx * Dx);
+    const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
+    const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx));
+    dest = mul * xexpf(vhadd(weight * cv));
+}
 #else
 inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul)
 {
@@ -193,6 +271,34 @@ inline void interpolateTransformChannelsCubic(const float* const* src, int xs, i
 
     dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor);
 }
+
+inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul)
+{
+    constexpr float A = -0.85f;
+
+    // Vertical
+    const float t1Vert = A * (Dy - Dy * Dy);
+    const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
+    const float w3Vert = t1Vert * Dy;
+    const float w2Vert = t1Vert * Dy - t1Vert + t2Vert;
+    const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert;
+    const float w0Vert = t1Vert - (t1Vert * Dy);
+
+    float cv[4];
+    for (int i = 0; i < 4; ++i) {
+        cv[i] = w0Vert * xlogf(src[ys][xs + i]) + w1Vert * xlogf(src[ys + 1][xs + i]) + w2Vert * xlogf(src[ys + 2][xs + i]) + w3Vert * xlogf(src[ys + 3][xs + i]);
+    }
+
+    // Horizontal
+    const float t1Hor = A * (Dx - Dx * Dx);
+    const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
+    const float w3Hor = t1Hor * Dx;
+    const float w2Hor = t1Hor * Dx - t1Hor + t2Hor;
+    const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor;
+    const float w0Hor = t1Hor - (t1Hor * Dx);
+
+    dest = mul * xexpf(cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor);
+}
 #endif
 
 }
@@ -922,6 +1028,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
     const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0;
 
     const bool darkening = (params->vignetting.amount <= 0.0);
+    const bool useLog = params->pdsharpening.enabled;
     const double centerFactorx = cx - w2;
     const double centerFactory = cy - h2;
 
@@ -1011,14 +1118,26 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
 
                     if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) {
                         // all interpolation pixels inside image
-                        if (enableCA) {
-                            interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul);
-                        } else if (!highQuality) {
-                            transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy);
-                            transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy);
-                            transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy);
+                        if (!useLog) {
+                            if (enableCA) {
+                                interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul);
+                            } else if (!highQuality) {
+                                transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy);
+                                transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy);
+                                transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy);
+                            } else {
+                                interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul);
+                            }
                         } else {
-                            interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul);
+                            if (enableCA) {
+                                interpolateTransformChannelsCubicLog(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul);
+                            } else if (!highQuality) {
+                                transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy);
+                                transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy);
+                                transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy);
+                            } else {
+                                interpolateTransformCubicLog(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul);
+                            }
                         }
                     } else {
                         // edge pixels
@@ -1054,6 +1173,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
 void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap)
 {
     assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable());
+    const bool useLog = params->pdsharpening.enabled;
 
     float** chOrig[3];
     chOrig[0] = original->r.ptrs;
@@ -1089,7 +1209,11 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
                     // multiplier for vignetting correction
                     if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) {
                         // all interpolation pixels inside image
-                        interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0);
+                        if (!useLog) {
+                            interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0);
+                        } else {
+                            interpolateTransformChannelsCubicLog(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0);
+                        }
                     } else {
                         // edge pixels
                         int y1 = LIM (yc,   0, original->getHeight() - 1);

From bcb7df44dfe5c8507ea4b7ac7cde319ec51ac501 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Tue, 31 Dec 2019 15:23:24 +0100
Subject: [PATCH 02/22] Log transform: fix segfault

---
 rtengine/iptransform.cc | 30 +++++++++++++-----------------
 rtengine/sleef.h        | 24 ++++++++++++++++++++++++
 rtengine/sleefsseavx.c  | 24 ++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index 736fa0620..77fa57985 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -124,9 +124,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int
     const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
     const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
 
-    const vfloat rv = (w0Vert * xlogf(LVFU(src->r(ys, xs))) + w1Vert * xlogf(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->r(ys + 3, xs))));
-    const vfloat gv = (w0Vert * xlogf(LVFU(src->g(ys, xs))) + w1Vert * xlogf(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->g(ys + 3, xs))));
-    const vfloat bv = (w0Vert * xlogf(LVFU(src->b(ys, xs))) + w1Vert * xlogf(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf(LVFU(src->b(ys + 3, xs))));
+    const vfloat rv = (w0Vert * xlogf1(LVFU(src->r(ys, xs))) + w1Vert * xlogf1(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->r(ys + 3, xs))));
+    const vfloat gv = (w0Vert * xlogf1(LVFU(src->g(ys, xs))) + w1Vert * xlogf1(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->g(ys + 3, xs))));
+    const vfloat bv = (w0Vert * xlogf1(LVFU(src->b(ys, xs))) + w1Vert * xlogf1(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->b(ys + 3, xs))));
 
     // Horizontal
     const float t1Hor = A * (Dx - Dx * Dx);
@@ -183,9 +183,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int
 
     float rv[4], gv[4], bv[4];
     for (int i = 0; i < 4; ++i) {
-        rv[i] = w0Vert * xlogf(src->r(ys, xs + i)) + w1Vert * xlogf(src->r(ys + 1, xs + i)) + w2Vert * xlogf(src->r(ys + 2, xs + i)) + w3Vert * xlogf(src->r(ys + 3, xs + i));
-        gv[i] = w0Vert * xlogf(src->g(ys, xs + i)) + w1Vert * xlogf(src->g(ys + 1, xs + i)) + w2Vert * xlogf(src->g(ys + 2, xs + i)) + w3Vert * xlogf(src->g(ys + 3, xs + i));
-        bv[i] = w0Vert * xlogf(src->b(ys, xs + i)) + w1Vert * xlogf(src->b(ys + 1, xs + i)) + w2Vert * xlogf(src->b(ys + 2, xs + i)) + w3Vert * xlogf(src->b(ys + 3, xs + i));
+        rv[i] = w0Vert * xlogf1(src->r(ys, xs + i)) + w1Vert * xlogf1(src->r(ys + 1, xs + i)) + w2Vert * xlogf1(src->r(ys + 2, xs + i)) + w3Vert * xlogf1(src->r(ys + 3, xs + i));
+        gv[i] = w0Vert * xlogf1(src->g(ys, xs + i)) + w1Vert * xlogf1(src->g(ys + 1, xs + i)) + w2Vert * xlogf1(src->g(ys + 2, xs + i)) + w3Vert * xlogf1(src->g(ys + 3, xs + i));
+        bv[i] = w0Vert * xlogf1(src->b(ys, xs + i)) + w1Vert * xlogf1(src->b(ys + 1, xs + i)) + w2Vert * xlogf1(src->b(ys + 2, xs + i)) + w3Vert * xlogf1(src->b(ys + 3, xs + i));
     }
 
     // Horizontal
@@ -235,7 +235,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs
     const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
     const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
 
-    const vfloat cv = (w0Vert * xlogf(LVFU(src[ys][xs])) + w1Vert * xlogf(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf(LVFU(src[ys + 2][xs])) + w3Vert * xlogf(LVFU(src[ys + 3][xs])));
+    const vfloat cv = (w0Vert * xlogf1(LVFU(src[ys][xs])) + w1Vert * xlogf1(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf1(LVFU(src[ys + 2][xs])) + w3Vert * xlogf1(LVFU(src[ys + 3][xs])));
 
     // Horizontal
     const float t1Hor = A * (Dx - Dx * Dx);
@@ -286,7 +286,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs
 
     float cv[4];
     for (int i = 0; i < 4; ++i) {
-        cv[i] = w0Vert * xlogf(src[ys][xs + i]) + w1Vert * xlogf(src[ys + 1][xs + i]) + w2Vert * xlogf(src[ys + 2][xs + i]) + w3Vert * xlogf(src[ys + 3][xs + i]);
+        cv[i] = w0Vert * xlogf1(src[ys][xs + i]) + w1Vert * xlogf1(src[ys + 1][xs + i]) + w2Vert * xlogf1(src[ys + 2][xs + i]) + w3Vert * xlogf1(src[ys + 3][xs + i]);
     }
 
     // Horizontal
@@ -1118,23 +1118,19 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
 
                     if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) {
                         // all interpolation pixels inside image
-                        if (!useLog) {
+                        if (!highQuality) {
+                            transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy);
+                            transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy);
+                            transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy);
+                        } else if (!useLog) {
                             if (enableCA) {
                                 interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul);
-                            } else if (!highQuality) {
-                                transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy);
-                                transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy);
-                                transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy);
                             } else {
                                 interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul);
                             }
                         } else {
                             if (enableCA) {
                                 interpolateTransformChannelsCubicLog(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul);
-                            } else if (!highQuality) {
-                                transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy);
-                                transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy);
-                                transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy);
                             } else {
                                 interpolateTransformCubicLog(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul);
                             }
diff --git a/rtengine/sleef.h b/rtengine/sleef.h
index 30c059010..7b7d5995f 100644
--- a/rtengine/sleef.h
+++ b/rtengine/sleef.h
@@ -1206,6 +1206,30 @@ __inline float xlogf(float d) {
     return x;
 }
 
+__inline float xlogf1(float d) { // does xlogf(vmaxf(d, 1.f)) but faster
+    float x, x2, t, m;
+    int e;
+
+    e = ilogbp1f(d * 0.7071f);
+    m = ldexpkf(d, -e);
+
+    x = (m-1.0f) / (m+1.0f);
+    x2 = x * x;
+
+    t = 0.2371599674224853515625f;
+    t = mlaf(t, x2, 0.285279005765914916992188f);
+    t = mlaf(t, x2, 0.400005519390106201171875f);
+    t = mlaf(t, x2, 0.666666567325592041015625f);
+    t = mlaf(t, x2, 2.0f);
+
+    x = x * t + 0.693147180559945286226764f * e;
+
+    if (xisinff(d)) x = rtengine::RT_INFINITY_F;
+    if (d <= 1.f) x = 0;
+
+    return x;
+}
+
 __inline float xexpf(float d) {
     if(d<=-104.0f) return 0.0f;
 
diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c
index 1982c7c4c..0af516f9b 100644
--- a/rtengine/sleefsseavx.c
+++ b/rtengine/sleefsseavx.c
@@ -1253,6 +1253,30 @@ static INLINE vfloat xlogf(vfloat d) {
     return x;
 }
 
+static INLINE vfloat xlogf1(vfloat d) { // does xlogf(vmaxf(d, 1.f)) but faster
+    vfloat x, x2, t, m;
+    vint2 e;
+
+    e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f)));
+    m = vldexpf(d, vsubi2(vcast_vi2_i(0), e));
+
+    x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m));
+    x2 = vmulf(x, x);
+
+    t = vcast_vf_f(0.2371599674224853515625f);
+    t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f));
+    t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f));
+    t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f));
+    t = vmlaf(t, x2, vcast_vf_f(2.0f));
+
+    x = vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e)));
+
+    x = vself(vmaskf_ispinf(d), vcast_vf_f(INFINITYf), x);
+    x = vselfnotzero(vmaskf_le(d, vcast_vf_f(1.f)), x);
+
+    return x;
+}
+
 static INLINE vfloat xlogf0(vfloat d) {
     vfloat x, x2, t, m;
     vint2 e;

From 58d8e66b7245e63b39c0d92db7775d0ede14cc04 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Tue, 31 Dec 2019 19:09:06 +0100
Subject: [PATCH 03/22] Log transform: speedup, #5588

---
 rtengine/iptransform.cc | 76 ++++++++++++++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 16 deletions(-)

diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index 77fa57985..67647b7ef 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -86,6 +86,29 @@ float normn (float a, float b, int n)
     }
 }
 
+void logEncode(rtengine::Imagefloat *original, bool multiThread) {
+
+#ifdef _OPENMP
+    #pragma omp parallel for schedule(dynamic, 16) if(multiThread)
+#endif
+
+    for (int y = 0; y < original->getHeight(); ++y) {
+        int x = 0;
+#ifdef __SSE2__
+        for (; x < original->getWidth() - 3; x += 4) {
+            STVFU(original->r(y, x), xlogf1(LVFU(original->r(y, x))));
+            STVFU(original->g(y, x), xlogf1(LVFU(original->g(y, x))));
+            STVFU(original->b(y, x), xlogf1(LVFU(original->b(y, x))));
+        }
+#endif
+        for (; x < original->getWidth(); ++x) {
+            original->r(y, x) = xlogf1(original->r(y, x));
+            original->g(y, x) = xlogf1(original->g(y, x));
+            original->b(y, x) = xlogf1(original->b(y, x));
+        }
+    }
+}
+
 #ifdef __SSE2__
 inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
 {
@@ -124,9 +147,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int
     const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
     const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
 
-    const vfloat rv = (w0Vert * xlogf1(LVFU(src->r(ys, xs))) + w1Vert * xlogf1(LVFU(src->r(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->r(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->r(ys + 3, xs))));
-    const vfloat gv = (w0Vert * xlogf1(LVFU(src->g(ys, xs))) + w1Vert * xlogf1(LVFU(src->g(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->g(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->g(ys + 3, xs))));
-    const vfloat bv = (w0Vert * xlogf1(LVFU(src->b(ys, xs))) + w1Vert * xlogf1(LVFU(src->b(ys + 1, xs)))) + (w2Vert * xlogf1(LVFU(src->b(ys + 2, xs))) + w3Vert * xlogf1(LVFU(src->b(ys + 3, xs))));
+    const vfloat rv = (w0Vert * LVFU(src->r(ys, xs)) + w1Vert * LVFU(src->r(ys + 1, xs))) + (w2Vert * LVFU(src->r(ys + 2, xs)) + w3Vert * LVFU(src->r(ys + 3, xs)));
+    const vfloat gv = (w0Vert * LVFU(src->g(ys, xs)) + w1Vert * LVFU(src->g(ys + 1, xs))) + (w2Vert * LVFU(src->g(ys + 2, xs)) + w3Vert * LVFU(src->g(ys + 3, xs)));
+    const vfloat bv = (w0Vert * LVFU(src->b(ys, xs)) + w1Vert * LVFU(src->b(ys + 1, xs))) + (w2Vert * LVFU(src->b(ys + 2, xs)) + w3Vert * LVFU(src->b(ys + 3, xs)));
 
     // Horizontal
     const float t1Hor = A * (Dx - Dx * Dx);
@@ -183,9 +206,9 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int
 
     float rv[4], gv[4], bv[4];
     for (int i = 0; i < 4; ++i) {
-        rv[i] = w0Vert * xlogf1(src->r(ys, xs + i)) + w1Vert * xlogf1(src->r(ys + 1, xs + i)) + w2Vert * xlogf1(src->r(ys + 2, xs + i)) + w3Vert * xlogf1(src->r(ys + 3, xs + i));
-        gv[i] = w0Vert * xlogf1(src->g(ys, xs + i)) + w1Vert * xlogf1(src->g(ys + 1, xs + i)) + w2Vert * xlogf1(src->g(ys + 2, xs + i)) + w3Vert * xlogf1(src->g(ys + 3, xs + i));
-        bv[i] = w0Vert * xlogf1(src->b(ys, xs + i)) + w1Vert * xlogf1(src->b(ys + 1, xs + i)) + w2Vert * xlogf1(src->b(ys + 2, xs + i)) + w3Vert * xlogf1(src->b(ys + 3, xs + i));
+        rv[i] = w0Vert * src->r(ys, xs + i) + w1Vert * src->r(ys + 1, xs + i) + w2Vert * src->r(ys + 2, xs + i) + w3Vert * src->r(ys + 3, xs + i);
+        gv[i] = w0Vert * src->g(ys, xs + i) + w1Vert * src->g(ys + 1, xs + i) + w2Vert * src->g(ys + 2, xs + i) + w3Vert * src->g(ys + 3, xs + i);
+        bv[i] = w0Vert * src->b(ys, xs + i) + w1Vert * src->b(ys + 1, xs + i) + w2Vert * src->b(ys + 2, xs + i) + w3Vert * src->b(ys + 3, xs + i);
     }
 
     // Horizontal
@@ -235,7 +258,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs
     const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
     const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
 
-    const vfloat cv = (w0Vert * xlogf1(LVFU(src[ys][xs])) + w1Vert * xlogf1(LVFU(src[ys + 1][xs]))) + (w2Vert * xlogf1(LVFU(src[ys + 2][xs])) + w3Vert * xlogf1(LVFU(src[ys + 3][xs])));
+    const vfloat cv = (w0Vert * LVFU(src[ys][xs]) + w1Vert * LVFU(src[ys + 1][xs])) + (w2Vert * LVFU(src[ys + 2][xs]) + w3Vert * LVFU(src[ys + 3][xs]));
 
     // Horizontal
     const float t1Hor = A * (Dx - Dx * Dx);
@@ -286,7 +309,7 @@ inline void interpolateTransformChannelsCubicLog(const float* const* src, int xs
 
     float cv[4];
     for (int i = 0; i < 4; ++i) {
-        cv[i] = w0Vert * xlogf1(src[ys][xs + i]) + w1Vert * xlogf1(src[ys + 1][xs + i]) + w2Vert * xlogf1(src[ys + 2][xs + i]) + w3Vert * xlogf1(src[ys + 3][xs + i]);
+        cv[i] = w0Vert * src[ys][xs + i] + w1Vert * src[ys + 1][xs + i] + w2Vert * src[ys + 2][xs + i] + w3Vert * src[ys + 3][xs + i];
     }
 
     // Horizontal
@@ -953,6 +976,7 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat*
 
 void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap)
 {
+    BENCHFUN
     // set up stuff, depending on the mode we are
     const bool enableLCPDist = pLCPMap && params->lensProf.useDist;
     const bool enableCA = highQuality && needsCA();
@@ -1028,10 +1052,13 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
     const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0;
 
     const bool darkening = (params->vignetting.amount <= 0.0);
-    const bool useLog = params->pdsharpening.enabled;
+    const bool useLog = params->pdsharpening.enabled && highQuality;
     const double centerFactorx = cx - w2;
     const double centerFactory = cy - h2;
 
+    if (useLog) {
+        logEncode(original, multiThread);
+    }
     // main cycle
 #ifdef _OPENMP
     #pragma omp parallel for schedule(dynamic, 16) if(multiThread)
@@ -1142,12 +1169,22 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
                         const int x1 = LIM(xc, 0, original->getWidth() - 1);
                         const int x2 = LIM(xc + 1, 0, original->getWidth() - 1);
 
-                        if (enableCA) {
-                            chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
+                        if (useLog) {
+                            if (enableCA) {
+                                chTrans[c][y][x] = vignmul * xexpf(chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
+                            } else {
+                                transformed->r(y, x) = vignmul * xexpf(original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy);
+                                transformed->g(y, x) = vignmul * xexpf(original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy);
+                                transformed->b(y, x) = vignmul * xexpf(original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy);
+                            }
                         } else {
-                            transformed->r(y, x) = vignmul * (original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy);
-                            transformed->g(y, x) = vignmul * (original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy);
-                            transformed->b(y, x) = vignmul * (original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy);
+                            if (enableCA) {
+                                chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
+                            } else {
+                                transformed->r(y, x) = vignmul * (original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy);
+                                transformed->g(y, x) = vignmul * (original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy);
+                                transformed->b(y, x) = vignmul * (original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy);
+                            }
                         }
                     }
                 } else {
@@ -1181,6 +1218,10 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
     chTrans[1] = transformed->g.ptrs;
     chTrans[2] = transformed->b.ptrs;
 
+    if (useLog) {
+        logEncode(original, multiThread);
+    }
+
 #ifdef _OPENMP
     #pragma omp parallel for if (multiThread)
 #endif
@@ -1216,8 +1257,11 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
                         int y2 = LIM (yc + 1, 0, original->getHeight() - 1);
                         int x1 = LIM (xc,   0, original->getWidth() - 1);
                         int x2 = LIM (xc + 1, 0, original->getWidth() - 1);
-
-                        chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
+                        if (!useLog) {
+                            chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
+                        } else {
+                            chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
+                        }
                     }
                 } else {
                     // not valid (source pixel x,y not inside source image, etc.)

From 399a0055c61b9c03479e36f07a08b2c1d7d70a09 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Tue, 31 Dec 2019 20:07:39 +0100
Subject: [PATCH 04/22] Log transform: Fix broken build

---
 rtengine/iptransform.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index 67647b7ef..8d89b96cc 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -976,7 +976,7 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat*
 
 void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap)
 {
-    BENCHFUN
+
     // set up stuff, depending on the mode we are
     const bool enableLCPDist = pLCPMap && params->lensProf.useDist;
     const bool enableCA = highQuality && needsCA();

From 6302084804b24de7b4bcb44b2e7774464282c15b Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Wed, 1 Jan 2020 14:52:15 +0100
Subject: [PATCH 05/22] sleef: use our own replacement of rint. Based on code
 from Alberto Griggio, but further optimized for SSE

---
 rtengine/sleef.h | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/rtengine/sleef.h b/rtengine/sleef.h
index 7b7d5995f..1a953953c 100644
--- a/rtengine/sleef.h
+++ b/rtengine/sleef.h
@@ -894,6 +894,15 @@ __inline double xlog1p(double a) {
 
 #define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f
 
+#ifdef __SSE2__
+__inline int xrintf(float x) {
+    return _mm_cvt_ss2si(_mm_set_ss(x));
+}
+#else
+__inline int xrintf(float x) {
+    return x + (x < 0 ? -0.5f : 0.5f);
+}
+#endif
 __inline int32_t floatToRawIntBits(float d) {
     union {
         float f;
@@ -980,7 +989,7 @@ __inline float xsinf(float d) {
     int q;
     float u, s;
 
-    q = rint(d * rtengine::RT_1_PI_F);
+    q = xrintf(d * rtengine::RT_1_PI_F);
 
     d = mlaf(q, -PI4_Af*4, d);
     d = mlaf(q, -PI4_Bf*4, d);
@@ -1009,7 +1018,7 @@ __inline float xcosf(float d) {
     int q;
     float u, s;
 
-    q = 1 + 2*rint(d * rtengine::RT_1_PI_F - 0.5f);
+    q = 1 + 2*xrintf(d * rtengine::RT_1_PI_F - 0.5f);
 
     d = mlaf(q, -PI4_Af*2, d);
     d = mlaf(q, -PI4_Bf*2, d);
@@ -1041,7 +1050,7 @@ __inline float2 xsincosf(float d) {
     float u, s, t;
     float2 r;
 
-    q = rint(d * rtengine::RT_2_PI_F);
+    q = xrintf(d * rtengine::RT_2_PI_F);
 
     s = d;
 
@@ -1083,7 +1092,7 @@ __inline float xtanf(float d) {
     int q;
     float u, s, x;
 
-    q = rint(d * (float)(2 * rtengine::RT_1_PI));
+    q = xrintf(d * (float)(2 * rtengine::RT_1_PI));
 
     x = d;
 
@@ -1233,7 +1242,7 @@ __inline float xlogf1(float d) { // does xlogf(vmaxf(d, 1.f)) but faster
 __inline float xexpf(float d) {
     if(d<=-104.0f) return 0.0f;
 
-    int q = rint(d * R_LN2f);
+    int q = xrintf(d * R_LN2f);
     float s, u;
 
     s = mlaf(q, -L2Uf, d);

From 52f7c2c5311b5a43cc6fd499f36b4c325ee59aa7 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Wed, 1 Jan 2020 15:18:47 +0100
Subject: [PATCH 06/22] Log transform: further speedup

---
 rtengine/iptransform.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index 8d89b96cc..caa5b7e4e 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -155,9 +155,11 @@ inline void interpolateTransformCubicLog(rtengine::Imagefloat* src, int xs, int
     const float t1Hor = A * (Dx - Dx * Dx);
     const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
     const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx));
-    r = mul * xexpf(vhadd(weight * rv));
-    g = mul * xexpf(vhadd(weight * gv));
-    b = mul * xexpf(vhadd(weight * bv));
+    const vfloat tempv = _mm_setr_ps(vhadd(weight * rv), vhadd(weight * gv), vhadd(weight * bv), 0.f);
+    const vfloat resultv = xexpf(tempv);
+    r = mul * resultv[0];
+    g = mul * resultv[1];
+    b = mul * resultv[2];
 }
 #else
 inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)

From 085c68fc2985b2d556398f78ed411cf985ee67d6 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Wed, 1 Jan 2020 19:06:02 +0100
Subject: [PATCH 07/22] Log tranform: fix bug in preview mode

---
 rtengine/improcfun.h      |  6 ++---
 rtengine/iptransform.cc   | 48 +++++++++++++++++++++++++--------------
 rtengine/simpleprocess.cc |  2 +-
 3 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h
index 661f399ef..c1a4a5979 100644
--- a/rtengine/improcfun.h
+++ b/rtengine/improcfun.h
@@ -84,8 +84,8 @@ class ImProcFunctions
     void calcVignettingParams(int oW, int oH, const procparams::VignettingParams& vignetting, double &w2, double &h2, double& maxRadius, double &v, double &b, double &mul);
 
     void transformLuminanceOnly(Imagefloat* original, Imagefloat* transformed, int cx, int cy, int oW, int oH, int fW, int fH);
-    void transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap);
-    void transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap);
+    void transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap, bool useOriginalBuffer);
+    void transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap, bool useOriginalBuffer);
 
     bool needsCA() const;
     bool needsDistortion() const;
@@ -156,7 +156,7 @@ public:
 //    void colorCurve       (LabImage* lold, LabImage* lnew);
     void sharpening(LabImage* lab, const procparams::SharpeningParams &sharpenParam, bool showMask = false);
     void sharpeningcam(CieImage* ncie, float** buffer, bool showMask = false);
-    void transform(Imagefloat* original, Imagefloat* transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const FramesMetaData *metadata, int rawRotationDeg, bool fullImage);
+    void transform(Imagefloat* original, Imagefloat* transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const FramesMetaData *metadata, int rawRotationDeg, bool fullImage, bool useOriginalBuffer = false);
     float resizeScale(const procparams::ProcParams* params, int fw, int fh, int &imw, int &imh);
     void lab2monitorRgb(LabImage* lab, Image8* image);
     void resize(Imagefloat* src, Imagefloat* dst, float dScale);
diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index caa5b7e4e..fc79dcae4 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -86,25 +86,25 @@ float normn (float a, float b, int n)
     }
 }
 
-void logEncode(rtengine::Imagefloat *original, bool multiThread) {
+void logEncode(rtengine::Imagefloat *src, rtengine::Imagefloat *dest, bool multiThread) {
 
 #ifdef _OPENMP
     #pragma omp parallel for schedule(dynamic, 16) if(multiThread)
 #endif
 
-    for (int y = 0; y < original->getHeight(); ++y) {
+    for (int y = 0; y < src->getHeight(); ++y) {
         int x = 0;
 #ifdef __SSE2__
-        for (; x < original->getWidth() - 3; x += 4) {
-            STVFU(original->r(y, x), xlogf1(LVFU(original->r(y, x))));
-            STVFU(original->g(y, x), xlogf1(LVFU(original->g(y, x))));
-            STVFU(original->b(y, x), xlogf1(LVFU(original->b(y, x))));
+        for (; x < src->getWidth() - 3; x += 4) {
+            STVFU(dest->r(y, x), xlogf1(LVFU(src->r(y, x))));
+            STVFU(dest->g(y, x), xlogf1(LVFU(src->g(y, x))));
+            STVFU(dest->b(y, x), xlogf1(LVFU(src->b(y, x))));
         }
 #endif
-        for (; x < original->getWidth(); ++x) {
-            original->r(y, x) = xlogf1(original->r(y, x));
-            original->g(y, x) = xlogf1(original->g(y, x));
-            original->b(y, x) = xlogf1(original->b(y, x));
+        for (; x < src->getWidth(); ++x) {
+            dest->r(y, x) = xlogf1(src->r(y, x));
+            dest->g(y, x) = xlogf1(src->g(y, x));
+            dest->b(y, x) = xlogf1(src->b(y, x));
         }
     }
 }
@@ -536,7 +536,7 @@ bool ImProcFunctions::transCoord (int W, int H, int x, int y, int w, int h, int&
 
 void ImProcFunctions::transform (Imagefloat* original, Imagefloat* transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH,
                                  const FramesMetaData *metadata,
-                                 int rawRotationDeg, bool fullImage)
+                                 int rawRotationDeg, bool fullImage, bool useOriginalBuffer)
 {
     double focalLen = metadata->getFocalLen();
     double focalLen35mm = metadata->getFocalLen35mm();
@@ -584,10 +584,10 @@ void ImProcFunctions::transform (Imagefloat* original, Imagefloat* transformed,
                 dest = tmpimg.get();
             }
         }
-        transformGeneral(highQuality, original, dest, cx, cy, sx, sy, oW, oH, fW, fH, pLCPMap.get());
+        transformGeneral(highQuality, original, dest, cx, cy, sx, sy, oW, oH, fW, fH, pLCPMap.get(), useOriginalBuffer);
         
         if (highQuality && dest != transformed) {
-            transformLCPCAOnly(dest, transformed, cx, cy, pLCPMap.get());
+            transformLCPCAOnly(dest, transformed, cx, cy, pLCPMap.get(), useOriginalBuffer);
         }
     }
 }
@@ -976,7 +976,7 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat*
 }
 
 
-void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap)
+void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap, bool useOriginalBuffer)
 {
 
     // set up stuff, depending on the mode we are
@@ -1058,8 +1058,15 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
     const double centerFactorx = cx - w2;
     const double centerFactory = cy - h2;
 
+    std::unique_ptr<Imagefloat> tempLog;
     if (useLog) {
-        logEncode(original, multiThread);
+        if (!useOriginalBuffer) {
+            tempLog.reset(new Imagefloat(original->getWidth(), original->getHeight()));
+            logEncode(original, tempLog.get(), multiThread);
+            original = tempLog.get();
+        } else {
+            logEncode(original, original, multiThread);
+        }
     }
     // main cycle
 #ifdef _OPENMP
@@ -1205,7 +1212,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
 }
 
 
-void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap)
+void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap, bool useOriginalBuffer)
 {
     assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable());
     const bool useLog = params->pdsharpening.enabled;
@@ -1220,8 +1227,15 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
     chTrans[1] = transformed->g.ptrs;
     chTrans[2] = transformed->b.ptrs;
 
+    std::unique_ptr<Imagefloat> tempLog;
     if (useLog) {
-        logEncode(original, multiThread);
+        if (!useOriginalBuffer) {
+            tempLog.reset(new Imagefloat(original->getWidth(), original->getHeight()));
+            logEncode(original, tempLog.get(), multiThread);
+            original = tempLog.get();
+        } else {
+            logEncode(original, original, multiThread);
+        }
     }
 
 #ifdef _OPENMP
diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc
index c35fc7431..3811c0c80 100644
--- a/rtengine/simpleprocess.cc
+++ b/rtengine/simpleprocess.cc
@@ -880,7 +880,7 @@ private:
                 trImg = new Imagefloat (fw, fh);
             }
             ipf.transform (baseImg, trImg, 0, 0, 0, 0, fw, fh, fw, fh,
-                           imgsrc->getMetaData(), imgsrc->getRotateDegree(), true);
+                           imgsrc->getMetaData(), imgsrc->getRotateDegree(), true, true);
             if(trImg != baseImg) {
                 delete baseImg;
                 baseImg = trImg;

From 2ce6e6d1d389bed4dfd82833ddf082920bc9fcd0 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Thu, 2 Jan 2020 14:35:27 +0100
Subject: [PATCH 08/22] skip unnecessary transform

---
 rtengine/dcrop.cc             | 75 +++++++++++++++++------------------
 rtengine/improccoordinator.cc |  4 +-
 rtengine/improcfun.h          |  2 +-
 rtengine/iptransform.cc       |  9 ++++-
 rtengine/rtthumbnail.cc       | 10 ++---
 rtengine/simpleprocess.cc     |  2 +-
 6 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/rtengine/dcrop.cc b/rtengine/dcrop.cc
index a6889b954..890003ee0 100644
--- a/rtengine/dcrop.cc
+++ b/rtengine/dcrop.cc
@@ -173,8 +173,6 @@ void Crop::update(int todo)
     int widIm = parent->fw;//full image
     int heiIm = parent->fh;
 
-    bool needstransform  = parent->ipf.needsTransform();
-
     if (todo & (M_INIT | M_LINDENOISE | M_HDR)) {
         MyMutex::MyLock lock(parent->minit);  // Also used in improccoord
 
@@ -766,8 +764,9 @@ void Crop::update(int todo)
         }
     }
 
+    const bool needstransform  = parent->ipf.needsTransform(skips(parent->fw, skip), skips(parent->fh, skip), parent->imgsrc->getRotateDegree(), parent->imgsrc->getMetaData());
     // transform
-    if (needstransform || ((todo & (M_TRANSFORM | M_RGBCURVE))  && params.dirpyrequalizer.cbdlMethod == "bef" && params.dirpyrequalizer.enabled && !params.colorappearance.enabled)) {
+    if (needstransform || ((todo & (M_TRANSFORM | M_RGBCURVE)) && params.dirpyrequalizer.cbdlMethod == "bef" && params.dirpyrequalizer.enabled && !params.colorappearance.enabled)) {
         if (!transCrop) {
             transCrop = new Imagefloat(cropw, croph);
         }
@@ -784,10 +783,7 @@ void Crop::update(int todo)
             baseCrop = transCrop;
         }
     } else {
-        if (transCrop) {
-            delete transCrop;
-        }
-
+        delete transCrop;
         transCrop = nullptr;
     }
 
@@ -1167,41 +1163,42 @@ bool Crop::setCropSizes(int rcx, int rcy, int rcw, int rch, int skip, bool inter
 
     parent->ipf.transCoord(parent->fw, parent->fh, bx1, by1, bw, bh, orx, ory, orw, orh);
 
-    if (check_need_larger_crop_for_lcp_distortion(parent->fw, parent->fh, orx, ory, orw, orh, *parent->params)) {
-        // TODO - this is an estimate of the max distortion relative to the image size. ATM it is hardcoded to be 15%, which seems enough. If not, need to revise
-        int dW = int (double (parent->fw) * 0.15 / (2 * skip));
-        int dH = int (double (parent->fh) * 0.15 / (2 * skip));
-        int x1 = orx - dW;
-        int x2 = orx + orw + dW;
-        int y1 = ory - dH;
-        int y2 = ory + orh + dH;
+    if (parent->ipf.needsTransform(skips(parent->fw, skip), skips(parent->fh, skip), parent->imgsrc->getRotateDegree(), parent->imgsrc->getMetaData())) {
+        if (check_need_larger_crop_for_lcp_distortion(parent->fw, parent->fh, orx, ory, orw, orh, *parent->params)) {
+            // TODO - this is an estimate of the max distortion relative to the image size. ATM it is hardcoded to be 15%, which seems enough. If not, need to revise
+            int dW = int (double (parent->fw) * 0.15 / (2 * skip));
+            int dH = int (double (parent->fh) * 0.15 / (2 * skip));
+            int x1 = orx - dW;
+            int x2 = orx + orw + dW;
+            int y1 = ory - dH;
+            int y2 = ory + orh + dH;
 
-        if (x1 < 0) {
-            x2 += -x1;
-            x1 = 0;
+            if (x1 < 0) {
+                x2 += -x1;
+                x1 = 0;
+            }
+
+            if (x2 > parent->fw) {
+                x1 -= x2 - parent->fw;
+                x2 = parent->fw;
+            }
+
+            if (y1 < 0) {
+                y2 += -y1;
+                y1 = 0;
+            }
+
+            if (y2 > parent->fh) {
+                y1 -= y2 - parent->fh;
+                y2 = parent->fh;
+            }
+
+            orx = max(x1, 0);
+            ory = max(y1, 0);
+            orw = min(x2 - x1, parent->fw - orx);
+            orh = min(y2 - y1, parent->fh - ory);
         }
-
-        if (x2 > parent->fw) {
-            x1 -= x2 - parent->fw;
-            x2 = parent->fw;
-        }
-
-        if (y1 < 0) {
-            y2 += -y1;
-            y1 = 0;
-        }
-
-        if (y2 > parent->fh) {
-            y1 -= y2 - parent->fh;
-            y2 = parent->fh;
-        }
-
-        orx = max(x1, 0);
-        ory = max(y1, 0);
-        orw = min(x2 - x1, parent->fw - orx);
-        orh = min(y2 - y1, parent->fh - ory);
     }
-
     leftBorder  = skips(rqx1 - bx1, skip);
     upperBorder = skips(rqy1 - by1, skip);
 
diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc
index a4ca0ea0b..e915a11b0 100644
--- a/rtengine/improccoordinator.cc
+++ b/rtengine/improccoordinator.cc
@@ -545,7 +545,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, bool panningRelatedChange)
         oprevi = orig_prev;
 
         // Remove transformation if unneeded
-        bool needstransform = ipf.needsTransform();
+        bool needstransform = ipf.needsTransform(fw, fh, imgsrc->getRotateDegree(), imgsrc->getMetaData());
 
         if ((needstransform || ((todo & (M_TRANSFORM | M_RGBCURVE))  && params->dirpyrequalizer.cbdlMethod == "bef" && params->dirpyrequalizer.enabled && !params->colorappearance.enabled))) {
             assert(oprevi);
@@ -1349,7 +1349,7 @@ void ImProcCoordinator::saveInputICCReference(const Glib::ustring& fname, bool a
     imgsrc->getImage(currWB, tr, im, pp, ppar.toneCurve, ppar.raw);
     ImProcFunctions ipf(&ppar, true);
 
-    if (ipf.needsTransform()) {
+    if (ipf.needsTransform(fW, fH, imgsrc->getRotateDegree(), imgsrc->getMetaData())) {
         Imagefloat* trImg = new Imagefloat(fW, fH);
         ipf.transform(im, trImg, 0, 0, 0, 0, fW, fH, fW, fH,
                       imgsrc->getMetaData(), imgsrc->getRotateDegree(), true);
diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h
index c1a4a5979..8f4ae7771 100644
--- a/rtengine/improcfun.h
+++ b/rtengine/improcfun.h
@@ -119,7 +119,7 @@ public:
     }
     void setScale(double iscale);
 
-    bool needsTransform() const;
+    bool needsTransform(int oW, int oH, int rawRotationDeg, const FramesMetaData *metadata) const;
     bool needsPCVignetting() const;
 
     void firstAnalysis(const Imagefloat* const working, const procparams::ProcParams &params, LUTu & vhist16);
diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index fc79dcae4..c9f88caf0 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -1358,9 +1358,14 @@ bool ImProcFunctions::needsLensfun() const
     return params->lensProf.useLensfun();
 }
 
-bool ImProcFunctions::needsTransform () const
+bool ImProcFunctions::needsTransform (int oW, int oH, int rawRotationDeg, const FramesMetaData *metadata) const
 {
-    return needsCA () || needsDistortion () || needsRotation () || needsPerspective () || needsGradient () || needsPCVignetting () || needsVignetting () || needsLCP() || needsLensfun();
+    bool needsLf = needsLensfun();
+    if (needsLf) {
+        std::unique_ptr<const LensCorrection> pLCPMap = LFDatabase::getInstance()->findModifier(params->lensProf, metadata, oW, oH, params->coarse, rawRotationDeg);
+        needsLf = pLCPMap.get();
+    }
+    return needsCA () || needsDistortion () || needsRotation () || needsPerspective () || needsGradient () || needsPCVignetting () || needsVignetting () || needsLCP() || needsLf;
 }
 
 
diff --git a/rtengine/rtthumbnail.cc b/rtengine/rtthumbnail.cc
index 0cdcbf6ed..9da601e2a 100644
--- a/rtengine/rtthumbnail.cc
+++ b/rtengine/rtthumbnail.cc
@@ -1247,12 +1247,12 @@ IImage8* Thumbnail::processImage (const procparams::ProcParams& params, eSensorT
     ipf.ToneMapFattal02(baseImg);
     
     // perform transform
-    if (ipf.needsTransform()) {
+    int origFW;
+    int origFH;
+    double tscale = 0.0;
+    getDimensions (origFW, origFH, tscale);
+    if (ipf.needsTransform(origFW * tscale + 0.5, origFH * tscale + 0.5, 0, metadata)) {
         Imagefloat* trImg = new Imagefloat (fw, fh);
-        int origFW;
-        int origFH;
-        double tscale = 0.0;
-        getDimensions (origFW, origFH, tscale);
         ipf.transform (baseImg, trImg, 0, 0, 0, 0, fw, fh, origFW * tscale + 0.5, origFH * tscale + 0.5, metadata, 0, true); // Raw rotate degree not detectable here
         delete baseImg;
         baseImg = trImg;
diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc
index 3811c0c80..1c1e46a65 100644
--- a/rtengine/simpleprocess.cc
+++ b/rtengine/simpleprocess.cc
@@ -872,7 +872,7 @@ private:
         ipf.ToneMapFattal02(baseImg);
 
         // perform transform (excepted resizing)
-        if (ipf.needsTransform()) {
+        if (ipf.needsTransform(fw, fh, imgsrc->getRotateDegree(), imgsrc->getMetaData())) {
             Imagefloat* trImg = nullptr;
             if (ipf.needsLuminanceOnly()) {
                 trImg = baseImg;

From 49d594f67a6f9ad2f2b0d6f0c591136d53fe2dff Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Thu, 2 Jan 2020 19:28:57 +0100
Subject: [PATCH 09/22] Log transform: add method combobox

---
 rtdata/languages/default |  3 +++
 rtengine/iptransform.cc  |  4 ++--
 rtengine/procparams.cc   |  9 ++++++++-
 rtengine/procparams.h    |  1 +
 rtgui/lensgeom.cc        | 40 +++++++++++++++++++++++++++++++++++++---
 rtgui/lensgeom.h         |  3 +++
 rtgui/paramsedited.cc    |  6 ++++++
 rtgui/paramsedited.h     |  1 +
 8 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/rtdata/languages/default b/rtdata/languages/default
index 5004bf25c..922c50c51 100644
--- a/rtdata/languages/default
+++ b/rtdata/languages/default
@@ -787,6 +787,7 @@ HISTORY_MSG_SH_COLORSPACE;S/H - Colorspace
 HISTORY_MSG_SOFTLIGHT_ENABLED;Soft light
 HISTORY_MSG_SOFTLIGHT_STRENGTH;Soft light - Strength
 HISTORY_MSG_TM_FATTAL_ANCHOR;DRC - Anchor
+HISTORY_MSG_TRANS_Method;Geometry - Method
 HISTORY_NEWSNAPSHOT;Add
 HISTORY_NEWSNAPSHOT_TOOLTIP;Shortcut: <b>Alt-s</b>
 HISTORY_SNAPSHOT;Snapshot
@@ -1775,6 +1776,8 @@ TP_LABCURVE_RSTPRO_TOOLTIP;Works on the Chromaticity slider and the CC curve.
 TP_LENSGEOM_AUTOCROP;Auto-Crop
 TP_LENSGEOM_FILL;Auto-fill
 TP_LENSGEOM_LABEL;Lens / Geometry
+TP_LENSGEOM_LIN;Linear
+TP_LENSGEOM_LOG;Logarithmic
 TP_LENSPROFILE_CORRECTION_AUTOMATCH;Automatically selected
 TP_LENSPROFILE_CORRECTION_LCPFILE;LCP file
 TP_LENSPROFILE_CORRECTION_MANUAL;Manually selected
diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index c9f88caf0..39f270d51 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -1054,7 +1054,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
     const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0;
 
     const bool darkening = (params->vignetting.amount <= 0.0);
-    const bool useLog = params->pdsharpening.enabled && highQuality;
+    const bool useLog = params->commonTrans.method == "log" && highQuality;
     const double centerFactorx = cx - w2;
     const double centerFactory = cy - h2;
 
@@ -1215,7 +1215,7 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
 void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *transformed, int cx, int cy, const LensCorrection *pLCPMap, bool useOriginalBuffer)
 {
     assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable());
-    const bool useLog = params->pdsharpening.enabled;
+    const bool useLog = params->commonTrans.method == "log";
 
     float** chOrig[3];
     chOrig[0] = original->r.ptrs;
diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc
index ba6fc237b..99f4f4404 100644
--- a/rtengine/procparams.cc
+++ b/rtengine/procparams.cc
@@ -1732,13 +1732,14 @@ bool CoarseTransformParams::operator !=(const CoarseTransformParams& other) cons
 }
 
 CommonTransformParams::CommonTransformParams() :
+    method("log"),
     autofill(true)
 {
 }
 
 bool CommonTransformParams::operator ==(const CommonTransformParams& other) const
 {
-    return autofill == other.autofill;
+    return method == other.method && autofill == other.autofill;
 }
 
 bool CommonTransformParams::operator !=(const CommonTransformParams& other) const
@@ -3322,6 +3323,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo
         saveToKeyfile(!pedited || pedited->coarse.vflip, "Coarse Transformation", "VerticalFlip", coarse.vflip, keyFile);
 
 // Common properties for transformations
+        saveToKeyfile(!pedited || pedited->commonTrans.method, "Common Properties for Transformations", "Method", commonTrans.method, keyFile);
         saveToKeyfile(!pedited || pedited->commonTrans.autofill, "Common Properties for Transformations", "AutoFill", commonTrans.autofill, keyFile);
 
 // Rotation
@@ -4360,6 +4362,11 @@ int ProcParams::load(const Glib::ustring& fname, ParamsEdited* pedited)
         }
 
         if (keyFile.has_group("Common Properties for Transformations")) {
+            if (keyFile.has_key("Common Properties for Transformations", "Method")) {
+                assignFromKeyfile(keyFile, "Common Properties for Transformations", "Method", pedited, commonTrans.method, pedited->commonTrans.method);
+            } else {
+                commonTrans.method = "lin";
+            }
             assignFromKeyfile(keyFile, "Common Properties for Transformations", "AutoFill", pedited, commonTrans.autofill, pedited->commonTrans.autofill);
         }
 
diff --git a/rtengine/procparams.h b/rtengine/procparams.h
index c41e55872..0b6b2dc46 100644
--- a/rtengine/procparams.h
+++ b/rtengine/procparams.h
@@ -837,6 +837,7 @@ struct CoarseTransformParams {
   * Common transformation parameters
   */
 struct CommonTransformParams {
+    Glib::ustring method;
     bool autofill;
 
     CommonTransformParams();
diff --git a/rtgui/lensgeom.cc b/rtgui/lensgeom.cc
index 76e0635eb..762726107 100644
--- a/rtgui/lensgeom.cc
+++ b/rtgui/lensgeom.cc
@@ -17,6 +17,8 @@
  *  along with RawTherapee.  If not, see <https://www.gnu.org/licenses/>.
  */
 #include "lensgeom.h"
+
+#include "eventmapper.h"
 #include "guiutils.h"
 #include "rtimage.h"
 
@@ -28,6 +30,18 @@ using namespace rtengine::procparams;
 LensGeometry::LensGeometry () : FoldableToolPanel(this, "lensgeom", M("TP_LENSGEOM_LABEL")), rlistener(nullptr), lastFill(false)
 {
 
+    auto m = ProcEventMapper::getInstance();
+    EvTransMethod = m->newEvent(TRANSFORM, "HISTORY_MSG_TRANS_METHOD");
+
+    Gtk::HBox* hb1 = Gtk::manage (new Gtk::HBox ());
+    hb1->pack_start (*Gtk::manage (new Gtk::Label ( M("TP_RAW_DMETHOD") + ": ")), Gtk::PACK_SHRINK, 4);
+    method = Gtk::manage (new MyComboBoxText ());
+    method->append(M("TP_LENSGEOM_LOG"));
+    method->append(M("TP_LENSGEOM_LIN"));
+    method->set_active(0);
+    hb1->pack_end (*method, Gtk::PACK_EXPAND_WIDGET, 4);
+    pack_start( *hb1, Gtk::PACK_SHRINK, 4);
+
     fill = Gtk::manage (new Gtk::CheckButton (M("TP_LENSGEOM_FILL")));
     pack_start (*fill);
 
@@ -39,8 +53,9 @@ LensGeometry::LensGeometry () : FoldableToolPanel(this, "lensgeom", M("TP_LENSGE
     packBox = Gtk::manage (new ToolParamBlock ());
     pack_start (*packBox);
 
-    autoCrop->signal_pressed().connect( sigc::mem_fun(*this, &LensGeometry::autoCropPressed) );
-    fillConn = fill->signal_toggled().connect( sigc::mem_fun(*this, &LensGeometry::fillPressed) );
+    method->connect(method->signal_changed().connect(sigc::mem_fun(*this, &LensGeometry::methodChanged)));
+    autoCrop->signal_pressed().connect(sigc::mem_fun(*this, &LensGeometry::autoCropPressed));
+    fillConn = fill->signal_toggled().connect(sigc::mem_fun(*this, &LensGeometry::fillPressed));
 
     fill->set_active (true);
     show_all ();
@@ -55,8 +70,14 @@ void LensGeometry::read (const ProcParams* pp, const ParamsEdited* pedited)
 {
 
     disableListener ();
+    method->block (true);
+    method->set_active(pp->commonTrans.method == "log" ? 0 : 1);
 
     if (pedited) {
+        if(!pedited->commonTrans.method) {
+            method->set_active_text(M("GENERAL_UNCHANGED"));
+        }
+
         fill->set_inconsistent (!pedited->commonTrans.autofill);
     }
 
@@ -67,15 +88,20 @@ void LensGeometry::read (const ProcParams* pp, const ParamsEdited* pedited)
 
     lastFill = pp->commonTrans.autofill;
 
+    method->block (false);
     enableListener ();
 }
 
 void LensGeometry::write (ProcParams* pp, ParamsEdited* pedited)
 {
-
+    int currentRow = method->get_active_row_number();
+    if( currentRow >= 0 && method->get_active_text() != M("GENERAL_UNCHANGED")) {
+        pp->commonTrans.method = currentRow == 0 ? "log" : "lin";
+    }
     pp->commonTrans.autofill   = fill->get_active ();
 
     if (pedited) {
+        pedited->commonTrans.method = method->get_active_text() != M("GENERAL_UNCHANGED");
         pedited->commonTrans.autofill   = !fill->get_inconsistent();
     }
 }
@@ -115,6 +141,14 @@ void LensGeometry::fillPressed ()
     }
 }
 
+void LensGeometry::methodChanged ()
+{
+
+    if (listener && method->get_active_row_number() >= 0) {
+        listener->panelChanged(EvTransMethod, method->get_active_text());
+    }
+}
+
 void LensGeometry::setBatchMode (bool batchMode)
 {
 
diff --git a/rtgui/lensgeom.h b/rtgui/lensgeom.h
index 18b31a619..73c28b006 100644
--- a/rtgui/lensgeom.h
+++ b/rtgui/lensgeom.h
@@ -29,6 +29,7 @@ class LensGeometry final :
 {
 
 protected:
+    MyComboBoxText*     method;
     Gtk::Button*        autoCrop;
     LensGeomListener*   rlistener;
     Gtk::CheckButton*   fill;
@@ -36,6 +37,7 @@ protected:
     sigc::connection    fillConn;
     ToolParamBlock*     packBox;
 
+    rtengine::ProcEvent EvTransMethod;
 public:
 
     LensGeometry ();
@@ -50,6 +52,7 @@ public:
     void write          (rtengine::procparams::ProcParams* pp, ParamsEdited* pedited = nullptr) override;
     void setBatchMode   (bool batchMode) override;
 
+    void methodChanged();
     void fillPressed            ();
     void autoCropPressed        ();
     void setLensGeomListener    (LensGeomListener* l)
diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc
index 82132008a..d41bd472c 100644
--- a/rtgui/paramsedited.cc
+++ b/rtgui/paramsedited.cc
@@ -321,6 +321,7 @@ void ParamsEdited::set(bool v)
     coarse.rotate = v;
     coarse.hflip = v;
     coarse.vflip = v;
+    commonTrans.method = v;
     commonTrans.autofill = v;
     rotate.degree = v;
     distortion.amount = v;
@@ -904,6 +905,7 @@ void ParamsEdited::initFrom(const std::vector<rtengine::procparams::ProcParams>&
         coarse.rotate = coarse.rotate && p.coarse.rotate == other.coarse.rotate;
         coarse.hflip = coarse.hflip && p.coarse.hflip == other.coarse.hflip;
         coarse.vflip = coarse.vflip && p.coarse.vflip == other.coarse.vflip;
+        commonTrans.method = commonTrans.method && p.commonTrans.method == other.commonTrans.method;
         commonTrans.autofill = commonTrans.autofill && p.commonTrans.autofill == other.commonTrans.autofill;
         rotate.degree = rotate.degree && p.rotate.degree == other.rotate.degree;
         distortion.amount = distortion.amount && p.distortion.amount == other.distortion.amount;
@@ -2265,6 +2267,10 @@ void ParamsEdited::combine(rtengine::procparams::ProcParams& toEdit, const rteng
         toEdit.coarse.vflip = mods.coarse.vflip;
     }
 
+    if (commonTrans.method) {
+        toEdit.commonTrans.method = mods.commonTrans.method;
+    }
+
     if (commonTrans.autofill) {
         toEdit.commonTrans.autofill = mods.commonTrans.autofill;
     }
diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h
index 01a3e4efe..153dd7bb6 100644
--- a/rtgui/paramsedited.h
+++ b/rtgui/paramsedited.h
@@ -365,6 +365,7 @@ struct CoarseTransformParamsEdited {
 };
 
 struct CommonTransformParamsEdited {
+    bool method;
     bool autofill;
 };
 

From 7c2aa2405b497027fbcd8865f6449ab919aa6813 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Thu, 2 Jan 2020 23:01:37 +0100
Subject: [PATCH 10/22] Startup crashes in LensProfilePanel::LFDbHelper, fixes
 #5577, thanks to @dlichtenberger for the fix

---
 rtgui/lensprofile.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rtgui/lensprofile.cc b/rtgui/lensprofile.cc
index be21512d5..1a8391f66 100644
--- a/rtgui/lensprofile.cc
+++ b/rtgui/lensprofile.cc
@@ -586,6 +586,9 @@ void LensProfilePanel::onCorrModeChanged(const Gtk::RadioButton* rbChanged)
 
 LensProfilePanel::LFDbHelper::LFDbHelper()
 {
+    lensfunCameraModel = Gtk::TreeStore::create(lensfunModelCam);
+    lensfunLensModel = Gtk::TreeStore::create(lensfunModelLens);
+
 #ifdef _OPENMP
 #pragma omp parallel sections if (!settings->verbose)
 #endif
@@ -594,14 +597,12 @@ LensProfilePanel::LFDbHelper::LFDbHelper()
 #pragma omp section
 #endif
         {
-            lensfunCameraModel = Gtk::TreeStore::create(lensfunModelCam);
             fillLensfunCameras();
         }
 #ifdef _OPENMP
 #pragma omp section
 #endif
         {
-            lensfunLensModel = Gtk::TreeStore::create(lensfunModelLens);
             fillLensfunLenses();
         }
     }

From e07ff4032b23198d6cd33717888ab8484d895744 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Fri, 3 Jan 2020 11:35:20 +0100
Subject: [PATCH 11/22] log transform: Fix segfault

---
 rtengine/iptransform.cc | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index 39f270d51..29e10035e 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -1217,15 +1217,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
     assert(pLCPMap && params->lensProf.useCA && pLCPMap->isCACorrectionAvailable());
     const bool useLog = params->commonTrans.method == "log";
 
-    float** chOrig[3];
-    chOrig[0] = original->r.ptrs;
-    chOrig[1] = original->g.ptrs;
-    chOrig[2] = original->b.ptrs;
-
-    float** chTrans[3];
-    chTrans[0] = transformed->r.ptrs;
-    chTrans[1] = transformed->g.ptrs;
-    chTrans[2] = transformed->b.ptrs;
+    float** chTrans[3] = {transformed->r.ptrs, transformed->g.ptrs, transformed->b.ptrs};
 
     std::unique_ptr<Imagefloat> tempLog;
     if (useLog) {
@@ -1237,6 +1229,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
             logEncode(original, original, multiThread);
         }
     }
+    float** chOrig[3] = {original->r.ptrs, original->g.ptrs, original->b.ptrs};
 
 #ifdef _OPENMP
     #pragma omp parallel for if (multiThread)
@@ -1276,7 +1269,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
                         if (!useLog) {
                             chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
                         } else {
-                            chTrans[c][y][x] = (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
+                            chTrans[c][y][x] = xexpf(chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy);
                         }
                     }
                 } else {

From cabbf8c229464eb52c90a0ebf19606026793bba9 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Fri, 3 Jan 2020 20:23:34 +0100
Subject: [PATCH 12/22] guidedfilter: direct copy when no rescaling is needed,
 copied from ART

---
 rtengine/guidedfilter.cc | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc
index 6b2adb773..ad3beec51 100644
--- a/rtengine/guidedfilter.cc
+++ b/rtengine/guidedfilter.cc
@@ -106,7 +106,18 @@ void guidedFilter(const array2D<float> &guide, const array2D<float> &src, array2
     const auto f_subsample =
         [multithread](array2D<float> &d, const array2D<float> &s) -> void
         {
-            rescaleBilinear(s, d, multithread);
+            if (d.width() == s.width() && d.height() == s.height()) {
+#ifdef _OPENMP
+                #pragma omp parallel for if (multithread)
+#endif
+                for (int y = 0; y < s.height(); ++y) {
+                    for (int x = 0; x < s.width(); ++x) {
+                        d[y][x] = s[y][x];
+                    }
+                }
+            } else {
+                rescaleBilinear(s, d, multithread);
+            }
         };
 
     const auto f_mean =

From 525283650306d1ced6f6df5ab2625630ea12c7df Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Sat, 4 Jan 2020 11:28:05 +0100
Subject: [PATCH 13/22] filterpanel: make full use of the screen vertical
 space, copied from ART

---
 rtgui/filterpanel.cc | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/rtgui/filterpanel.cc b/rtgui/filterpanel.cc
index 301a7b188..2c4fc52d2 100644
--- a/rtgui/filterpanel.cc
+++ b/rtgui/filterpanel.cc
@@ -14,7 +14,7 @@
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
- *  along with RawTherapee.  If not, see <https://www.gnu.org/licenses/>.
+ *  along with RawTherapee.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include "filterpanel.h"
 #include "multilangmgr.h"
@@ -107,8 +107,8 @@ FilterPanel::FilterPanel () : listener (nullptr)
     scamera->set_policy(Gtk::POLICY_AUTOMATIC, Gtk::POLICY_ALWAYS);
     scamera->set_size_request(-1, 80);
     scamera->add(*camera);
-    cvb->pack_start (*scamera, Gtk::PACK_SHRINK, 0);
-    pack_start (*cvb, Gtk::PACK_SHRINK, 4);
+    cvb->pack_start (*scamera, Gtk::PACK_EXPAND_WIDGET, 0);
+    pack_start (*cvb, Gtk::PACK_EXPAND_WIDGET, 4);
 
     enaLens = Gtk::manage(new Gtk::CheckButton(M("EXIFFILTER_LENS") + ":"));
     Gtk::VBox* lvb = Gtk::manage(new Gtk::VBox ());
@@ -119,8 +119,8 @@ FilterPanel::FilterPanel () : listener (nullptr)
     slens->set_policy(Gtk::POLICY_AUTOMATIC, Gtk::POLICY_ALWAYS);
     slens->set_size_request(-1, 80);
     slens->add(*lens);
-    lvb->pack_start (*slens, Gtk::PACK_SHRINK, 0);
-    pack_start (*lvb, Gtk::PACK_SHRINK, 4);
+    lvb->pack_start (*slens, Gtk::PACK_EXPAND_WIDGET, 0);
+    pack_start (*lvb, Gtk::PACK_EXPAND_WIDGET, 4);
 
     enaFiletype = Gtk::manage(new Gtk::CheckButton(M("EXIFFILTER_FILETYPE") + ":"));
     Gtk::VBox* ftvb = Gtk::manage(new Gtk::VBox ());
@@ -131,8 +131,8 @@ FilterPanel::FilterPanel () : listener (nullptr)
     sfiletype->set_policy(Gtk::POLICY_AUTOMATIC, Gtk::POLICY_ALWAYS);
     sfiletype->set_size_request(-1, 80);
     sfiletype->add(*filetype);
-    ftvb->pack_start (*sfiletype, Gtk::PACK_SHRINK, 0);
-    pack_start (*ftvb, Gtk::PACK_SHRINK, 4);
+    ftvb->pack_start (*sfiletype, Gtk::PACK_EXPAND_WIDGET, 0);
+    pack_start (*ftvb, Gtk::PACK_EXPAND_WIDGET, 4);
 
     // add panel ending
     Gtk::VBox* vboxpe = Gtk::manage (new Gtk::VBox ());

From 0edac2dac752ad75656817140b8492812ab2dd4d Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Mon, 6 Jan 2020 19:01:30 +0100
Subject: [PATCH 14/22] Speedup for scalar xlog functions

---
 rtengine/sleef.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/rtengine/sleef.h b/rtengine/sleef.h
index 1a953953c..b7655258b 100644
--- a/rtengine/sleef.h
+++ b/rtengine/sleef.h
@@ -532,7 +532,7 @@ __inline double xlog(double d) {
 
     x = x * t + 0.693147180559945286226764 * e;
 
-    if (xisinf(d)) x = rtengine::RT_INFINITY;
+    if (xispinf(d)) x = rtengine::RT_INFINITY;
     if (d < 0) x = rtengine::RT_NAN;
     if (d == 0) x = -rtengine::RT_INFINITY;
 
@@ -864,7 +864,7 @@ __inline double xlog10(double a) {
     double2 d = mul_dd(logk(a), dd(0.43429448190325176116, 6.6494347733425473126e-17));
     double x = d.x + d.y;
 
-    if (xisinf(a)) x = rtengine::RT_INFINITY;
+    if (xispinf(a)) x = rtengine::RT_INFINITY;
     if (a < 0) x = rtengine::RT_NAN;
     if (a == 0) x = -rtengine::RT_INFINITY;
 
@@ -875,7 +875,7 @@ __inline double xlog1p(double a) {
     double2 d = logk2(add2_ss(a, 1));
     double x = d.x + d.y;
 
-    if (xisinf(a)) x = rtengine::RT_INFINITY;
+    if (xispinf(a)) x = rtengine::RT_INFINITY;
     if (a < -1) x = rtengine::RT_NAN;
     if (a == -1) x = -rtengine::RT_INFINITY;
 
@@ -1208,7 +1208,7 @@ __inline float xlogf(float d) {
 
     x = x * t + 0.693147180559945286226764f * e;
 
-    if (xisinff(d)) x = rtengine::RT_INFINITY_F;
+    if (xispinff(d)) x = rtengine::RT_INFINITY_F;
     if (d < 0) x = rtengine::RT_NAN_F;
     if (d == 0) x = -rtengine::RT_INFINITY_F;
 
@@ -1233,7 +1233,7 @@ __inline float xlogf1(float d) { // does xlogf(vmaxf(d, 1.f)) but faster
 
     x = x * t + 0.693147180559945286226764f * e;
 
-    if (xisinff(d)) x = rtengine::RT_INFINITY_F;
+    if (xispinff(d)) x = rtengine::RT_INFINITY_F;
     if (d <= 1.f) x = 0;
 
     return x;

From 355fd5a44a2b740292406db61f038224cc8df88d Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Fri, 10 Jan 2020 14:25:51 +0100
Subject: [PATCH 15/22] Capture sharpening: add vectorization hints for clang

---
 rtengine/capturesharpening.cc | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc
index e5bfde555..4beb4091a 100644
--- a/rtengine/capturesharpening.cc
+++ b/rtengine/capturesharpening.cc
@@ -131,6 +131,9 @@ inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RES
     const float c00 = kernel[1][1];
 
     for (int i = 1; i < tileSize - 1; i++) {
+#ifdef __clang__
+        #pragma clang loop vectorize(assume_safety)
+#endif
         for (int j = 1; j < tileSize - 1; j++) {
             const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + 
                               c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + 
@@ -151,6 +154,9 @@ inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RES
 
     for (int i = 2; i < tileSize - 2; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
+#ifdef __clang__
+        #pragma clang loop vectorize(assume_safety)
+#endif
         for (int j = 2; j < tileSize - 2; ++j) {
             const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) +
                               c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) +
@@ -177,6 +183,9 @@ inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** REST
 
     for (int i = 3; i < tileSize - 3; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
+#ifdef __clang__
+        #pragma clang loop vectorize(assume_safety)
+#endif
         for (int j = 3; j < tileSize - 3; ++j) {
             const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) +
                               c30 * (src[i - 3][j] + src[i][j - 3] + src[i][j + 3] + src[i + 3][j]) +
@@ -199,6 +208,9 @@ inline void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int t
     const float c00 = kernel[1][1];
 
     for (int i = 1; i < tileSize - 1; i++) {
+#ifdef __clang__
+        #pragma clang loop vectorize(assume_safety)
+#endif
         for (int j = 1; j < tileSize - 1; j++) {
             const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + 
                               c10 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + 
@@ -220,6 +232,9 @@ inline void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int
 
     for (int i = 2; i < tileSize - 2; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
+#ifdef __clang__
+        #pragma clang loop vectorize(assume_safety)
+#endif
         for (int j = 2; j < tileSize - 2; ++j) {
             const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) +
                               c20 * (src[i - 2][j] + src[i][j - 2] + src[i][j + 2] + src[i + 2][j]) +
@@ -246,6 +261,9 @@ inline void gauss7x7mult(float** RESTRICT src, float** RESTRICT dst, const int t
 
     for (int i = 3; i < tileSize - 3; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
+#ifdef __clang__
+        #pragma clang loop vectorize(assume_safety)
+#endif
         for (int j = 3; j < tileSize - 3; ++j) {
             const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) +
                               c30 * (src[i - 3][j] + src[i][j - 3] + src[i][j + 3] + src[i + 3][j]) +

From 09b1a5fada6b9d12f271e0ab03bfa4df4bea31d9 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Fri, 10 Jan 2020 19:45:59 +0100
Subject: [PATCH 16/22] Capture sharpening: add vectorization hints for gcc

---
 rtengine/capturesharpening.cc | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc
index 4beb4091a..f8067ac0b 100644
--- a/rtengine/capturesharpening.cc
+++ b/rtengine/capturesharpening.cc
@@ -131,8 +131,10 @@ inline void gauss3x3div (float** RESTRICT src, float** RESTRICT dst, float** RES
     const float c00 = kernel[1][1];
 
     for (int i = 1; i < tileSize - 1; i++) {
-#ifdef __clang__
+#if defined(__clang__)
         #pragma clang loop vectorize(assume_safety)
+#elif defined(__GNUC__)
+        #pragma GCC ivdep
 #endif
         for (int j = 1; j < tileSize - 1; j++) {
             const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + 
@@ -154,8 +156,10 @@ inline void gauss5x5div (float** RESTRICT src, float** RESTRICT dst, float** RES
 
     for (int i = 2; i < tileSize - 2; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
-#ifdef __clang__
+#if defined(__clang__)
         #pragma clang loop vectorize(assume_safety)
+#elif defined(__GNUC__)
+        #pragma GCC ivdep
 #endif
         for (int j = 2; j < tileSize - 2; ++j) {
             const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) +
@@ -183,8 +187,10 @@ inline void gauss7x7div(float** RESTRICT src, float** RESTRICT dst, float** REST
 
     for (int i = 3; i < tileSize - 3; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
-#ifdef __clang__
+#if defined(__clang__)
         #pragma clang loop vectorize(assume_safety)
+#elif defined(__GNUC__)
+        #pragma GCC ivdep
 #endif
         for (int j = 3; j < tileSize - 3; ++j) {
             const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) +
@@ -208,8 +214,10 @@ inline void gauss3x3mult(float** RESTRICT src, float** RESTRICT dst, const int t
     const float c00 = kernel[1][1];
 
     for (int i = 1; i < tileSize - 1; i++) {
-#ifdef __clang__
+#if defined(__clang__)
         #pragma clang loop vectorize(assume_safety)
+#elif defined(__GNUC__)
+        #pragma GCC ivdep
 #endif
         for (int j = 1; j < tileSize - 1; j++) {
             const float val = c11 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + 
@@ -232,8 +240,10 @@ inline void gauss5x5mult (float** RESTRICT src, float** RESTRICT dst, const int
 
     for (int i = 2; i < tileSize - 2; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
-#ifdef __clang__
+#if defined(__clang__)
         #pragma clang loop vectorize(assume_safety)
+#elif defined(__GNUC__)
+        #pragma GCC ivdep
 #endif
         for (int j = 2; j < tileSize - 2; ++j) {
             const float val = c21 * ((src[i - 2][j - 1] + src[i - 2][j + 1]) + (src[i - 1][j - 2] + src[i - 1][j + 2]) + (src[i + 1][j - 2] + src[i + 1][j + 2]) + (src[i + 2][j - 1] + src[i + 2][j + 1])) +
@@ -261,8 +271,10 @@ inline void gauss7x7mult(float** RESTRICT src, float** RESTRICT dst, const int t
 
     for (int i = 3; i < tileSize - 3; ++i) {
         // I tried hand written SSE code but gcc vectorizes better
-#ifdef __clang__
+#if defined(__clang__)
         #pragma clang loop vectorize(assume_safety)
+#elif defined(__GNUC__)
+        #pragma GCC ivdep
 #endif
         for (int j = 3; j < tileSize - 3; ++j) {
             const float val = c31 * ((src[i - 3][j - 1] + src[i - 3][j + 1]) + (src[i - 1][j - 3] + src[i - 1][j + 3]) + (src[i + 1][j - 3] + src[i + 1][j + 3]) + (src[i + 3][j - 1] + src[i + 3][j + 1])) +

From 2ff619e5e2832df25658186905bc89353351abd0 Mon Sep 17 00:00:00 2001
From: Desmis <jdesmis@gmail.com>
Date: Sat, 11 Jan 2020 08:23:15 +0100
Subject: [PATCH 17/22] Fixed bad behavior gamma slider tone-mapping

---
 rtgui/epd.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rtgui/epd.cc b/rtgui/epd.cc
index 307790ff7..6544b7131 100644
--- a/rtgui/epd.cc
+++ b/rtgui/epd.cc
@@ -69,6 +69,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite
 
     setEnabled(pp->epd.enabled);
     strength->set_sensitive (true);
+    gamma->set_sensitive (true);
 
     if(pp->wavelet.enabled) {
         if(pp->wavelet.tmrs == 0) {
@@ -98,6 +99,7 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited)
     pp->epd.reweightingIterates = reweightingIterates->getValue();
     pp->epd.enabled = getEnabled();
     strength->set_sensitive (true);
+    gamma->set_sensitive (true);
 
     if(pp->wavelet.enabled) {
         if(pp->wavelet.tmrs == 0) {

From 1a02f16c35ff7e366010c1170637b7b7a9dff5a0 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Sat, 11 Jan 2020 11:20:41 +0100
Subject: [PATCH 18/22] crash in Tab: transform, fixes #5604

---
 rtengine/iptransform.cc | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc
index 29e10035e..9e7c61dcf 100644
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -1006,11 +1006,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
         calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv);
     }
 
-    const std::array<const float* const*, 3> chOrig = {
-        original->r.ptrs,
-        original->g.ptrs,
-        original->b.ptrs
-    };
     const std::array<float* const*, 3> chTrans = {
         transformed->r.ptrs,
         transformed->g.ptrs,
@@ -1068,6 +1063,13 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
             logEncode(original, original, multiThread);
         }
     }
+
+    const std::array<const float* const*, 3> chOrig = {
+        original->r.ptrs,
+        original->g.ptrs,
+        original->b.ptrs
+    };
+
     // main cycle
 #ifdef _OPENMP
     #pragma omp parallel for schedule(dynamic, 16) if(multiThread)

From d8564cb65241c34d1f083a2ddde339b19de74291 Mon Sep 17 00:00:00 2001
From: Desmis <jdesmis@gmail.com>
Date: Sat, 11 Jan 2020 16:25:50 +0100
Subject: [PATCH 19/22] another fix for wavelet <=> tone mapping

---
 rtgui/epd.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/rtgui/epd.cc b/rtgui/epd.cc
index 6544b7131..a620f4468 100644
--- a/rtgui/epd.cc
+++ b/rtgui/epd.cc
@@ -71,11 +71,11 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite
     strength->set_sensitive (true);
     gamma->set_sensitive (true);
 
-    if(pp->wavelet.enabled) {
-        if(pp->wavelet.tmrs == 0) {
+    if(pp->wavelet.enabled) { 
+        if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") {
             strength->set_sensitive (true);
             gamma->set_sensitive (true);
-        } else {
+        } else if(pp->wavelet.tmrs != 0 && pp->wavelet.TMmethod == "tm") {
             strength->set_sensitive (false);
             gamma->set_sensitive (false);
         }
@@ -101,11 +101,11 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited)
     strength->set_sensitive (true);
     gamma->set_sensitive (true);
 
-    if(pp->wavelet.enabled) {
-        if(pp->wavelet.tmrs == 0) {
+    if(pp->wavelet.enabled) { 
+        if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") {
             strength->set_sensitive (true);
             gamma->set_sensitive (true);
-        } else {
+        } else if(pp->wavelet.tmrs != 0 && pp->wavelet.TMmethod == "tm") {
             strength->set_sensitive (false);
             gamma->set_sensitive (false);
         }

From 01fbc2eddff2dfff5f84da88eec27cc160be70e2 Mon Sep 17 00:00:00 2001
From: Desmis <jdesmis@gmail.com>
Date: Sun, 12 Jan 2020 08:03:52 +0100
Subject: [PATCH 20/22] Suppress interaction between wavelet and tone-mapping

---
 rtengine/improcfun.cc | 8 ++++----
 rtgui/epd.cc          | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc
index 59fb0f016..d5c69773e 100644
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@@ -5131,11 +5131,11 @@ void ImProcFunctions::EPDToneMapCIE (CieImage *ncie, float a_w, float c_, int Wi
     if (!params->epd.enabled) {
         return;
     }
-
+/*
     if (params->wavelet.enabled  && params->wavelet.tmrs != 0) {
         return;
     }
-
+*/
     float stren = params->epd.strength;
     float edgest = params->epd.edgeStopping;
     float sca = params->epd.scale;
@@ -5245,11 +5245,11 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip
     if (!params->epd.enabled) {
         return;
     }
-
+/*
     if (params->wavelet.enabled  && params->wavelet.tmrs != 0) {
         return;
     }
-
+*/
     float stren = params->epd.strength;
     float edgest = params->epd.edgeStopping;
     float sca = params->epd.scale;
diff --git a/rtgui/epd.cc b/rtgui/epd.cc
index a620f4468..b13effba3 100644
--- a/rtgui/epd.cc
+++ b/rtgui/epd.cc
@@ -70,7 +70,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite
     setEnabled(pp->epd.enabled);
     strength->set_sensitive (true);
     gamma->set_sensitive (true);
-
+/*
     if(pp->wavelet.enabled) { 
         if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") {
             strength->set_sensitive (true);
@@ -80,7 +80,7 @@ void EdgePreservingDecompositionUI::read(const ProcParams *pp, const ParamsEdite
             gamma->set_sensitive (false);
         }
     }
-
+*/
     strength->setValue(pp->epd.strength);
     gamma->setValue(pp->epd.gamma);
     edgeStopping->setValue(pp->epd.edgeStopping);
@@ -100,7 +100,7 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited)
     pp->epd.enabled = getEnabled();
     strength->set_sensitive (true);
     gamma->set_sensitive (true);
-
+/*
     if(pp->wavelet.enabled) { 
         if(pp->wavelet.tmrs == 0 || pp->wavelet.TMmethod == "cont") {
             strength->set_sensitive (true);
@@ -110,7 +110,7 @@ void EdgePreservingDecompositionUI::write(ProcParams *pp, ParamsEdited *pedited)
             gamma->set_sensitive (false);
         }
     }
-
+*/
     if(pedited) {
         pedited->epd.strength = strength->getEditedState();
         pedited->epd.gamma = gamma->getEditedState();

From 54bce4af64a597732d2964d8ea40169322a0214c Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Sun, 12 Jan 2020 19:36:37 +0100
Subject: [PATCH 21/22] Capture sharpening: minor speedups

---
 rtengine/capturesharpening.cc | 91 +++++++++++++++++------------------
 1 file changed, 44 insertions(+), 47 deletions(-)

diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc
index f8067ac0b..21217918e 100644
--- a/rtengine/capturesharpening.cc
+++ b/rtengine/capturesharpening.cc
@@ -26,7 +26,7 @@
 #include "procparams.h"
 #include "color.h"
 #include "rt_algo.h"
-//#define BENCHMARK
+#define BENCHMARK
 #include "StopWatch.h"
 #include "opthelper.h"
 #include "../rtgui/multilangmgr.h"
@@ -525,28 +525,25 @@ float calcRadiusXtrans(const float * const *rawData, int W, int H, float lowerLi
 
 bool checkForStop(float** tmpIThr, float** iterCheck, int fullTileSize, int border)
 {
-    bool stopped = false;
-    for (int ii = border; !stopped && ii < fullTileSize - border; ++ii) {
+    for (int ii = border; ii < fullTileSize - border; ++ii) {
 #ifdef __SSE2__
         for (int jj = border; jj < fullTileSize - border; jj += 4) {
-            if (_mm_movemask_ps((vfloat)vmaskf_lt(LVFU(tmpIThr[ii][jj]), LVFU(iterCheck[ii - border][jj - border])))) {
-                stopped = true;
-                break;
+            if (UNLIKELY(_mm_movemask_ps((vfloat)vmaskf_lt(LVFU(tmpIThr[ii][jj]), LVFU(iterCheck[ii - border][jj - border]))))) {
+                return true;
             }
         }
 #else
         for (int jj = border; jj < fullTileSize - border; ++jj) {
             if (tmpIThr[ii][jj] < iterCheck[ii - border][jj - border]) {
-                stopped = true;
-                break;
+                return true;
             }
         }
 #endif
     }
-    return stopped;
+    return false;
 }
 
-void CaptureDeconvSharpening (float ** clipmask, float** luminance, float** oldLuminance, const float * const * blend, int W, int H, double sigma, double sigmaCornerOffset, int iterations, bool checkIterStop, rtengine::ProgressListener* plistener, double startVal, double endVal)
+void CaptureDeconvSharpening (float** luminance, const float* const * oldLuminance, const float * const * blend, int W, int H, double sigma, double sigmaCornerOffset, int iterations, bool checkIterStop, rtengine::ProgressListener* plistener, double startVal, double endVal)
 {
 BENCHFUN
     const bool is5x5 = (sigma <= 0.84 && sigmaCornerOffset == 0.0);
@@ -571,6 +568,7 @@ BENCHFUN
 
     double progress = startVal;
     const double progressStep = (endVal - startVal) * rtengine::SQR(tileSize) / (W * H);
+
     constexpr float minBlend = 0.01f;
 
 #ifdef _OPENMP
@@ -597,14 +595,14 @@ BENCHFUN
                     if (checkIterStop) {
                         for (int k = 0, ii = endOfCol ? H - fullTileSize + border : i; k < tileSize; ++k, ++ii) {
                             for (int l = 0, jj = endOfRow ? W - fullTileSize + border : j; l < tileSize; ++l, ++jj) {
-                                iterCheck[k][l] = oldLuminance[ii][jj] * clipmask[ii][jj] * 0.5f;
-                                maxVal = std::max(maxVal, clipmask[ii][jj]);
+                                iterCheck[k][l] = oldLuminance[ii][jj] * blend[ii][jj] * 0.5f;
+                                maxVal = std::max(maxVal, blend[ii][jj]);
                             }
                         }
                     } else {
                         for (int k = 0, ii = endOfCol ? H - fullTileSize + border : i; k < tileSize; ++k, ++ii) {
                             for (int l = 0, jj = endOfRow ? W - fullTileSize + border : j; l < tileSize; ++l, ++jj) {
-                                maxVal = std::max(maxVal, clipmask[ii][jj]);
+                                maxVal = std::max(maxVal, blend[ii][jj]);
                             }
                         }
                     }
@@ -623,14 +621,14 @@ BENCHFUN
                     if (checkIterStop) {
                         for (int ii = 0; ii < tileSize; ++ii) {
                             for (int jj = 0; jj < tileSize; ++jj) {
-                                iterCheck[ii][jj] = oldLuminance[i + ii][j + jj] * clipmask[i + ii][j + jj] * 0.5f;
-                                maxVal = std::max(maxVal, clipmask[i + ii][j + jj]);
+                                iterCheck[ii][jj] = oldLuminance[i + ii][j + jj] * blend[i + ii][j + jj] * 0.5f;
+                                maxVal = std::max(maxVal, blend[i + ii][j + jj]);
                             }
                         }
                     } else {
                         for (int ii = 0; ii < tileSize; ++ii) {
                             for (int jj = 0; jj < tileSize; ++jj) {
-                                maxVal = std::max(maxVal, clipmask[i + ii][j + jj]);
+                                maxVal = std::max(maxVal, blend[i + ii][j + jj]);
                             }
                         }
                     }
@@ -645,23 +643,22 @@ BENCHFUN
                         }
                     }
                 }
-                bool stopped = false;
                 if (is3x3) {
-                    for (int k = 0; k < iterations && !stopped; ++k) {
+                    for (int k = 0; k < iterations; ++k) {
                         // apply 3x3 gaussian blur and divide luminance by result of gaussian blur
                         gauss3x3div(tmpIThr, tmpThr, lumThr, fullTileSize, kernel3);
                         gauss3x3mult(tmpThr, tmpIThr, fullTileSize, kernel3);
-                        if (checkIterStop) {
-                            stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border);
+                        if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) {
+                            break;
                         }
                     }
                 } else if (is5x5) {
-                    for (int k = 0; k < iterations && !stopped; ++k) {
+                    for (int k = 0; k < iterations; ++k) {
                         // apply 5x5 gaussian blur and divide luminance by result of gaussian blur
                         gauss5x5div(tmpIThr, tmpThr, lumThr, fullTileSize, kernel5);
                         gauss5x5mult(tmpThr, tmpIThr, fullTileSize, kernel5);
-                        if (checkIterStop) {
-                            stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border);
+                        if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) {
+                            break;
                         }
                     }
                 } else {
@@ -672,34 +669,34 @@ BENCHFUN
                             if (sigmaTile > 0.84) { // have to use 7x7 kernel
                                 float lkernel7[7][7];
                                 compute7x7kernel(static_cast<float>(sigma) + distanceFactor * distance, lkernel7);
-                                for (int k = 0; k < iterations && !stopped; ++k) {
+                                for (int k = 0; k < iterations; ++k) {
                                     // apply 7x7 gaussian blur and divide luminance by result of gaussian blur
                                     gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, lkernel7);
                                     gauss7x7mult(tmpThr, tmpIThr, fullTileSize, lkernel7);
-                                    if (checkIterStop) {
-                                        stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border);
+                                    if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) {
+                                        break;
                                     }
                                 }
                             } else { // can use 5x5 kernel
-                                float lkernel7[5][5];
-                                compute5x5kernel(static_cast<float>(sigma) + distanceFactor * distance, lkernel7);
-                                for (int k = 0; k < iterations && !stopped; ++k) {
+                                float lkernel5[5][5];
+                                compute5x5kernel(static_cast<float>(sigma) + distanceFactor * distance, lkernel5);
+                                for (int k = 0; k < iterations; ++k) {
                                     // apply 7x7 gaussian blur and divide luminance by result of gaussian blur
-                                    gauss5x5div(tmpIThr, tmpThr, lumThr, fullTileSize, lkernel7);
-                                    gauss5x5mult(tmpThr, tmpIThr, fullTileSize, lkernel7);
-                                    if (checkIterStop) {
-                                        stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border);
+                                    gauss5x5div(tmpIThr, tmpThr, lumThr, fullTileSize, lkernel5);
+                                    gauss5x5mult(tmpThr, tmpIThr, fullTileSize, lkernel5);
+                                    if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) {
+                                        break;
                                     }
                                 }
                             }
                         }
                     } else {
-                        for (int k = 0; k < iterations && !stopped; ++k) {
+                        for (int k = 0; k < iterations; ++k) {
                             // apply 7x7 gaussian blur and divide luminance by result of gaussian blur
                             gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, kernel7);
                             gauss7x7mult(tmpThr, tmpIThr, fullTileSize, kernel7);
-                            if (checkIterStop) {
-                                stopped = checkForStop(tmpIThr, iterCheck, fullTileSize, border);
+                            if (checkIterStop && k < iterations - 1 && checkForStop(tmpIThr, iterCheck, fullTileSize, border)) {
+                                break;
                             }
                         }
                     }
@@ -719,12 +716,12 @@ BENCHFUN
                     }
                 }
                 if (plistener) {
-                    if (++progresscounter % 16 == 0) {
+                    if (++progresscounter % 32 == 0) {
 #ifdef _OPENMP
                         #pragma omp critical(csprogress)
 #endif
                         {
-                            progress += 16.0 * progressStep;
+                            progress += 32.0 * progressStep;
                             progress = rtengine::min(progress, endVal);
                             plistener->setProgress(progress);
                         }
@@ -751,6 +748,7 @@ void RawImageSource::captureSharpening(const procparams::CaptureSharpeningParams
         plistener->setProgress(0.0);
     }
 BENCHFUN
+
     constexpr float xyz_rgb[3][3] = {          // XYZ from RGB
                                     { 0.412453, 0.357580, 0.180423 },
                                     { 0.212671, 0.715160, 0.072169 },
@@ -829,8 +827,7 @@ BENCHFUN
             plistener->setProgress(0.1);
         }
 
-        array2D<float>& blend = red; // red will be overridden anyway => we can use its buffer to store the blend mask
-        buildBlendMask(L, blend, W, H, contrast, sharpeningParams.autoContrast, clipMask);
+        buildBlendMask(L, clipMask, W, H, contrast, sharpeningParams.autoContrast, clipMask);
         if (plistener) {
             plistener->setProgress(0.2);
         }
@@ -840,7 +837,7 @@ BENCHFUN
 #endif
         for (int i = 0; i < H; ++i) {
             for (int j = 0; j < W; ++j) {
-                red[i][j] = green[i][j] = blue[i][j] = blend[i][j] * 16384.f;
+                red[i][j] = green[i][j] = blue[i][j] = clipMask[i][j] * 16384.f;
             }
         }
         if (plistener) {
@@ -877,18 +874,18 @@ BENCHFUN
     if (plistener) {
         plistener->setProgress(0.1);
     }
+
     // calculate contrast based blend factors to reduce sharpening in regions with low contrast
-    array2D<float>& blend = clipMask; // we can share blend and clipMask buffer here
-    buildBlendMask(L, blend, W, H, contrast, sharpeningParams.autoContrast, clipMask);
+    buildBlendMask(L, clipMask, W, H, contrast, sharpeningParams.autoContrast, clipMask);
     if (plistener) {
         plistener->setProgress(0.2);
     }
     conrastThreshold = contrast * 100.f;
-
-    CaptureDeconvSharpening(clipMask, YNew, YOld, blend, W, H, radius, sharpeningParams.deconvradiusOffset, sharpeningParams.deconviter, sharpeningParams.deconvitercheck, plistener, 0.2, 0.9);
+    CaptureDeconvSharpening(YNew, YOld, clipMask, W, H, radius, sharpeningParams.deconvradiusOffset, sharpeningParams.deconviter, sharpeningParams.deconvitercheck, plistener, 0.2, 0.9);
     if (plistener) {
         plistener->setProgress(0.9);
     }
+
 #ifdef _OPENMP
     #pragma omp parallel for schedule(dynamic, 16)
 #endif
@@ -896,7 +893,7 @@ BENCHFUN
         int j = 0;
 #ifdef __SSE2__
         for (; j < W - 3; j += 4) {
-            const vfloat factor = vmaxf(LVFU(YNew[i][j]), ZEROV) / vmaxf(LVFU(YOld[i][j]), F2V(0.00001f));
+            const vfloat factor = LVFU(YNew[i][j]) / vmaxf(LVFU(YOld[i][j]), F2V(0.00001f));
             STVFU(red[i][j], LVFU(redVals[i][j]) * factor);
             STVFU(green[i][j], LVFU(greenVals[i][j]) * factor);
             STVFU(blue[i][j], LVFU(blueVals[i][j]) * factor);
@@ -904,7 +901,7 @@ BENCHFUN
 
 #endif
         for (; j < W; ++j) {
-            const float factor = std::max(YNew[i][j], 0.f) / std::max(YOld[i][j], 0.00001f);
+            const float factor = YNew[i][j] / std::max(YOld[i][j], 0.00001f);
             red[i][j] = redVals[i][j] * factor;
             green[i][j] = greenVals[i][j] * factor;
             blue[i][j] = blueVals[i][j] * factor;

From 4ee4888d5ba30536dcbbd3c67b68f36817c3b742 Mon Sep 17 00:00:00 2001
From: Ingo Weyrich <heckflosse67@gmx.de>
Date: Sun, 12 Jan 2020 19:38:37 +0100
Subject: [PATCH 22/22] Capture sharpening: Disable benchmark

---
 rtengine/capturesharpening.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc
index 21217918e..d357eb59c 100644
--- a/rtengine/capturesharpening.cc
+++ b/rtengine/capturesharpening.cc
@@ -26,7 +26,7 @@
 #include "procparams.h"
 #include "color.h"
 #include "rt_algo.h"
-#define BENCHMARK
+//#define BENCHMARK
 #include "StopWatch.h"
 #include "opthelper.h"
 #include "../rtgui/multilangmgr.h"