diff --git a/.travis.yml.fixme b/.travis.yml.fixme
deleted file mode 100644
index 0aa85f3b4..000000000
--- a/.travis.yml.fixme
+++ /dev/null
@@ -1,44 +0,0 @@
-sudo: required
-dist: trusty
-
-language: cpp
-
-compiler:
-  - gcc
-
-os:
-  - linux
-
-#branches:
-#  only:
-#  - master
-
-notifications:
-  irc:
-    channels:
-    - "chat.freenode.net#rawtherapee"
-    skip_join: true
-    template:
-    - "%{repository}/%{branch} (%{commit} - %{author}): %{build_url}: %{message}"
-  email:
-    on_success: change
-    on_failure: always
-
-env:
-  global:
-  - OMP_NUM_THREADS=4
-
-before_install:
-  - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
-  - sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu/ xenial main"
-  - sudo apt-get -qq update
-  - sudo apt-get install gcc-6 g++-6
-  - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-6 60 --slave /usr/bin/g++ g++ /usr/bin/g++-6
-  - sudo apt-get install build-essential cmake curl git libbz2-dev libcanberra-gtk3-dev libexiv2-dev libexpat-dev libfftw3-dev libglibmm-2.4-dev libgtk-3-dev libgtkmm-3.0-dev libiptcdata0-dev libjpeg8-dev liblcms2-dev libpng12-dev libsigc++-2.0-dev libtiff5-dev zlib1g-dev
-
-before_script:
-  - mkdir build
-  - cd build
-  - cmake -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations" -DWITH_LTO="OFF" -DPROC_TARGET_NUMBER="2" ..
-
-script: make
diff --git a/rtdata/languages/default b/rtdata/languages/default
index f85fdd312..585e4aeca 100644
--- a/rtdata/languages/default
+++ b/rtdata/languages/default
@@ -744,6 +744,7 @@ HISTORY_MSG_COLORTONING_LABREGION_SHOWMASK;CT - region show mask
 HISTORY_MSG_COLORTONING_LABREGION_SLOPE;CT - region slope
 HISTORY_MSG_DEHAZE_DEPTH;Dehaze - Depth
 HISTORY_MSG_DEHAZE_ENABLED;Haze Removal
+HISTORY_MSG_DEHAZE_LUMINANCE;Dehaze - Luminance only
 HISTORY_MSG_DEHAZE_SHOW_DEPTH_MAP;Dehaze - Show depth map
 HISTORY_MSG_DEHAZE_STRENGTH;Dehaze - Strength
 HISTORY_MSG_DUALDEMOSAIC_AUTO_CONTRAST;Dual demosaic - Auto threshold
@@ -770,6 +771,7 @@ HISTORY_MSG_PDSHARPEN_AUTO_RADIUS;CAS - Auto radius
 HISTORY_MSG_PDSHARPEN_GAMMA;CAS - Gamma
 HISTORY_MSG_PDSHARPEN_ITERATIONS;CAS - Iterations
 HISTORY_MSG_PDSHARPEN_RADIUS;CAS - Radius
+HISTORY_MSG_PDSHARPEN_RADIUS_BOOST;CAS - Corner radius boost
 HISTORY_MSG_PIXELSHIFT_DEMOSAIC;PS - Demosaic method for motion
 HISTORY_MSG_PREPROCESS_LINEDENOISE_DIRECTION;Line noise filter direction
 HISTORY_MSG_PREPROCESS_PDAFLINESFILTER;PDAF lines filter
@@ -1548,6 +1550,7 @@ TP_DEFRINGE_RADIUS;Radius
 TP_DEFRINGE_THRESHOLD;Threshold
 TP_DEHAZE_DEPTH;Depth
 TP_DEHAZE_LABEL;Haze Removal
+TP_DEHAZE_LUMINANCE;Luminance only
 TP_DEHAZE_SHOW_DEPTH_MAP;Show depth map
 TP_DEHAZE_STRENGTH;Strength
 TP_DIRPYRDENOISE_CHROMINANCE_AMZ;Auto multi-zones
@@ -1811,7 +1814,6 @@ TP_PCVIGNETTE_ROUNDNESS_TOOLTIP;Roundness:\n0 = rectangle,\n50 = fitted ellipse,
 TP_PCVIGNETTE_STRENGTH;Strength
 TP_PCVIGNETTE_STRENGTH_TOOLTIP;Filter strength in stops (reached in corners).
 TP_PDSHARPENING_LABEL;Capture Sharpening
-TP_PDSHARPENING_AUTORADIUS_TOOLTIP;If the checkbox is checked, RawTherapee calculates a value based on the raw data of the image.
 TP_PERSPECTIVE_HORIZONTAL;Horizontal
 TP_PERSPECTIVE_LABEL;Perspective
 TP_PERSPECTIVE_VERTICAL;Vertical
@@ -2047,7 +2049,7 @@ TP_SHARPENING_LABEL;Sharpening
 TP_SHARPENING_METHOD;Method
 TP_SHARPENING_ONLYEDGES;Sharpen only edges
 TP_SHARPENING_RADIUS;Radius
-TP_SHARPENING_RADIUS_OFFSET;Radius corner offset
+TP_SHARPENING_RADIUS_BOOST;Corner radius boost
 TP_SHARPENING_RLD;RL Deconvolution
 TP_SHARPENING_RLD_AMOUNT;Amount
 TP_SHARPENING_RLD_DAMPING;Damping
diff --git a/rtdata/rawtherapee.desktop.in b/rtdata/rawtherapee.desktop.in
index c6c675c4d..b059e7d6a 100644
--- a/rtdata/rawtherapee.desktop.in
+++ b/rtdata/rawtherapee.desktop.in
@@ -1,6 +1,6 @@
 [Desktop Entry]
 Type=Application
-Version=1.1
+Version=1.0
 Name=RawTherapee
 GenericName=Raw Photo Editor
 GenericName[cs]=Editor raw obrázků
diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h
index da302964b..27aa9d2fc 100644
--- a/rtengine/boxblur.h
+++ b/rtengine/boxblur.h
@@ -20,12 +20,14 @@
 #define _BOXBLUR_H_
 
 #include <assert.h>
+#include <memory>
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
 #include "alignedbuffer.h"
 #include "rt_math.h"
 #include "opthelper.h"
+#include "StopWatch.h"
 
 
 namespace rtengine
@@ -204,15 +206,15 @@ template<class T, class A> void boxblur (T** src, A** dst, T* buffer, int radx,
 
             tempv = tempv / lenv;
             temp1v = temp1v / lenv;
-            STVFU( dst[0][col], tempv);
-            STVFU( dst[0][col + 4], temp1v);
+            STVFU(dst[0][col], tempv);
+            STVFU(dst[0][col + 4], temp1v);
 
             for (int row = 1; row <= rady; row++) {
                 lenp1v = lenv + onev;
                 tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v;
                 temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v;
-                STVFU( dst[row][col], tempv);
-                STVFU( dst[row][col + 4], temp1v);
+                STVFU(dst[row][col], tempv);
+                STVFU(dst[row][col + 4], temp1v);
                 lenv = lenp1v;
             }
 
@@ -221,16 +223,16 @@ template<class T, class A> void boxblur (T** src, A** dst, T* buffer, int radx,
             for (int row = rady + 1; row < H - rady; row++) {
                 tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ;
                 temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ;
-                STVFU( dst[row][col], tempv);
-                STVFU( dst[row][col + 4], temp1v);
+                STVFU(dst[row][col], tempv);
+                STVFU(dst[row][col + 4], temp1v);
             }
 
             for (int row = H - rady; row < H; row++) {
                 lenm1v = lenv - onev;
                 tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v;
                 temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v;
-                STVFU( dst[row][col], tempv);
-                STVFU( dst[row][col + 4], temp1v);
+                STVFU(dst[row][col], tempv);
+                STVFU(dst[row][col + 4], temp1v);
                 lenv = lenm1v;
             }
         }
@@ -312,6 +314,223 @@ template<class T, class A> void boxblur (T** src, A** dst, T* buffer, int radx,
 
 }
 
+inline void boxblur (float** src, float** dst, int radius, int W, int H, bool multiThread)
+{
+    //box blur using rowbuffers and linebuffers instead of a full size buffer
+
+    if (radius == 0) {
+        if (src != dst) {
+#ifdef _OPENMP
+            #pragma omp parallel for if (multiThread)
+#endif
+
+            for (int row = 0; row < H; row++) {
+                for (int col = 0; col < W; col++) {
+                    dst[row][col] = src[row][col];
+                }
+            }
+        }
+        return;
+    }
+
+    constexpr int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache
+#ifdef _OPENMP
+    #pragma omp parallel if (multiThread)
+#endif
+    {
+        std::unique_ptr<float> buffer(new float[std::max(W, 8 * H)]);
+
+        //horizontal blur
+        float* const lineBuffer = buffer.get();
+#ifdef _OPENMP
+        #pragma omp for
+#endif
+        for (int row = 0; row < H; row++) {
+            float len = radius + 1;
+            float tempval = src[row][0];
+            lineBuffer[0] = tempval;
+            for (int j = 1; j <= radius; j++) {
+                tempval += src[row][j];
+            }
+
+            tempval /= len;
+            dst[row][0] = tempval;
+
+            for (int col = 1; col <= radius; col++) {
+                lineBuffer[col] = src[row][col];
+                tempval = (tempval * len + src[row][col + radius]) / (len + 1);
+                dst[row][col] = tempval;
+                ++len;
+            }
+
+            for (int col = radius + 1; col < W - radius; col++) {
+                lineBuffer[col] = src[row][col];
+                dst[row][col] = tempval = tempval + (src[row][col + radius] - lineBuffer[col - radius - 1]) / len;
+            }
+
+            for (int col = W - radius; col < W; col++) {
+                dst[row][col] = tempval = (tempval * len - lineBuffer[col - radius - 1]) / (len - 1);
+                --len;
+            }
+        }
+
+        //vertical blur
+#ifdef __SSE2__
+        vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer.get();
+        const vfloat leninitv = F2V(radius + 1);
+        const vfloat onev = F2V(1.f);
+        vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv;
+
+#ifdef _OPENMP
+        #pragma omp for nowait
+#endif
+
+        for (int col = 0; col < W - 7; col += 8) {
+            lenv = leninitv;
+            tempv = LVFU(dst[0][col]);
+            temp1v = LVFU(dst[0][col + 4]);
+            rowBuffer[0][0] = tempv;
+            rowBuffer[0][1] = temp1v;
+
+            for (int i = 1; i <= radius; i++) {
+                tempv = tempv + LVFU(dst[i][col]);
+                temp1v = temp1v + LVFU(dst[i][col + 4]);
+            }
+
+            tempv = tempv / lenv;
+            temp1v = temp1v / lenv;
+            STVFU(dst[0][col], tempv);
+            STVFU(dst[0][col + 4], temp1v);
+
+            for (int row = 1; row <= radius; row++) {
+                rowBuffer[row][0] = LVFU(dst[row][col]);
+                rowBuffer[row][1] = LVFU(dst[row][col + 4]);
+                lenp1v = lenv + onev;
+                tempv = (tempv * lenv + LVFU(dst[row + radius][col])) / lenp1v;
+                temp1v = (temp1v * lenv + LVFU(dst[row + radius][col + 4])) / lenp1v;
+                STVFU(dst[row][col], tempv);
+                STVFU(dst[row][col + 4], temp1v);
+                lenv = lenp1v;
+            }
+
+            rlenv = onev / lenv;
+
+            for (int row = radius + 1; row < H - radius; row++) {
+                rowBuffer[row][0] = LVFU(dst[row][col]);
+                rowBuffer[row][1] = LVFU(dst[row][col + 4]);
+                tempv = tempv + (LVFU(dst[row + radius][col]) - rowBuffer[row - radius - 1][0]) * rlenv ;
+                temp1v = temp1v + (LVFU(dst[row + radius][col + 4]) - rowBuffer[row - radius - 1][1]) * rlenv ;
+                STVFU(dst[row][col], tempv);
+                STVFU(dst[row][col + 4], temp1v);
+            }
+
+            for (int row = H - radius; row < H; row++) {
+                lenm1v = lenv - onev;
+                tempv = (tempv * lenv - rowBuffer[row - radius - 1][0]) / lenm1v;
+                temp1v = (temp1v * lenv - rowBuffer[row - radius - 1][1]) / lenm1v;
+                STVFU(dst[row][col], tempv);
+                STVFU(dst[row][col + 4], temp1v);
+                lenv = lenm1v;
+            }
+        }
+
+#else
+        float (* const rowBuffer)[8] = (float(*)[8]) buffer.get();
+#ifdef _OPENMP
+        #pragma omp for nowait
+#endif
+
+        for (int col = 0; col < W - numCols + 1; col += 8) {
+            float len = radius + 1;
+
+            for (int k = 0; k < numCols; k++) {
+                rowBuffer[0][k] = dst[0][col + k];
+            }
+
+            for (int i = 1; i <= radius; i++) {
+                for (int k = 0; k < numCols; k++) {
+                    dst[0][col + k] += dst[i][col + k];
+                }
+            }
+
+            for(int k = 0; k < numCols; k++) {
+                dst[0][col + k] /= len;
+            }
+
+            for (int row = 1; row <= radius; row++) {
+                for(int k = 0; k < numCols; k++) {
+                    rowBuffer[row][k] = dst[row][col + k];
+                    dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1);
+                }
+
+                len ++;
+            }
+
+            for (int row = radius + 1; row < H - radius; row++) {
+                for(int k = 0; k < numCols; k++) {
+                    rowBuffer[row][k] = dst[row][col + k];
+                    dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) / len;
+                }
+            }
+
+            for (int row = H - radius; row < H; row++) {
+                for(int k = 0; k < numCols; k++) {
+                    dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[row - radius - 1][k]) / (len - 1);
+                }
+
+                len --;
+            }
+        }
+
+#endif
+        //vertical blur, remaining columns
+#ifdef _OPENMP
+        #pragma omp single
+#endif
+        {
+            const int remaining = W % numCols;
+
+            if (remaining > 0) {
+                float (* const rowBuffer)[8] = (float(*)[8]) buffer.get();
+                const int col = W - remaining;
+
+                float len = radius + 1;
+                for(int k = 0; k < remaining; ++k) {
+                    rowBuffer[0][k] = dst[0][col + k];
+                }
+                for (int row = 1; row <= radius; ++row) {
+                    for(int k = 0; k < remaining; ++k) {
+                        dst[0][col + k] += dst[row][col + k];
+                    }
+                }
+                for(int k = 0; k < remaining; ++k) {
+                    dst[0][col + k] /= len;
+                }
+                for (int row = 1; row <= radius; ++row) {
+                    for(int k = 0; k < remaining; ++k) {
+                        rowBuffer[row][k] = dst[row][col + k];
+                        dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1);
+                    }
+                    len ++;
+                }
+                const float rlen = 1.f / len;
+                for (int row = radius + 1; row < H - radius; ++row) {
+                    for(int k = 0; k < remaining; ++k) {
+                        rowBuffer[row][k] = dst[row][col + k];
+                        dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - rowBuffer[row - radius - 1][k]) * rlen;
+                    }
+                }
+                for (int row = H - radius; row < H; ++row) {
+                    for(int k = 0; k < remaining; ++k) {
+                        dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[row - radius - 1][k]) / (len - 1);
+                    }
+                    len --;
+                }
+            }
+        }
+    }
+}
+
 template<class T, class A> void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H)
 {
     //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
@@ -382,15 +601,15 @@ template<class T, class A> void boxblur (T* src, A* dst, A* buffer, int radx, in
 
             tempv = tempv / lenv;
             temp1v = temp1v / lenv;
-            STVFU( dst[0 * W + col], tempv);
-            STVFU( dst[0 * W + col + 4], temp1v);
+            STVFU(dst[0 * W + col], tempv);
+            STVFU(dst[0 * W + col + 4], temp1v);
 
             for (int row = 1; row <= rady; row++) {
                 lenp1v = lenv + onev;
                 tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v;
                 temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v;
-                STVFU( dst[row * W + col], tempv);
-                STVFU( dst[row * W + col + 4], temp1v);
+                STVFU(dst[row * W + col], tempv);
+                STVFU(dst[row * W + col + 4], temp1v);
                 lenv = lenp1v;
             }
 
@@ -399,16 +618,16 @@ template<class T, class A> void boxblur (T* src, A* dst, A* buffer, int radx, in
             for (int row = rady + 1; row < H - rady; row++) {
                 tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ;
                 temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ;
-                STVFU( dst[row * W + col], tempv);
-                STVFU( dst[row * W + col + 4], temp1v);
+                STVFU(dst[row * W + col], tempv);
+                STVFU(dst[row * W + col + 4], temp1v);
             }
 
             for (int row = H - rady; row < H; row++) {
                 lenm1v = lenv - onev;
                 tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v;
                 temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v;
-                STVFU( dst[row * W + col], tempv);
-                STVFU( dst[row * W + col + 4], temp1v);
+                STVFU(dst[row * W + col], tempv);
+                STVFU(dst[row * W + col + 4], temp1v);
                 lenv = lenm1v;
             }
         }
@@ -422,12 +641,12 @@ template<class T, class A> void boxblur (T* src, A* dst, A* buffer, int radx, in
             }
 
             tempv = tempv / lenv;
-            STVFU( dst[0 * W + col], tempv);
+            STVFU(dst[0 * W + col], tempv);
 
             for (int row = 1; row <= rady; row++) {
                 lenp1v = lenv + onev;
                 tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v;
-                STVFU( dst[row * W + col], tempv);
+                STVFU(dst[row * W + col], tempv);
                 lenv = lenp1v;
             }
 
@@ -435,13 +654,13 @@ template<class T, class A> void boxblur (T* src, A* dst, A* buffer, int radx, in
 
             for (int row = rady + 1; row < H - rady; row++) {
                 tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ;
-                STVFU( dst[row * W + col], tempv);
+                STVFU(dst[row * W + col], tempv);
             }
 
             for (int row = H - rady; row < H; row++) {
                 lenm1v = lenv - onev;
                 tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v;
-                STVFU( dst[row * W + col], tempv);
+                STVFU(dst[row * W + col], tempv);
                 lenv = lenm1v;
             }
         }
diff --git a/rtengine/capturesharpening.cc b/rtengine/capturesharpening.cc
index e08243713..6720d9abc 100644
--- a/rtengine/capturesharpening.cc
+++ b/rtengine/capturesharpening.cc
@@ -532,9 +532,9 @@ BENCHFUN
     constexpr int tileSize = 194;
     constexpr int border = 5;
     constexpr int fullTileSize = tileSize + 2 * border;
-    const float maxRadius = std::min<float>(1.15f, sigma + sigmaCornerOffset);
-    const float maxDistance = sqrt(rtengine::SQR(W * 0.5f) + rtengine::SQR(H * 0.5f));
-    const float distanceFactor = (maxRadius - sigma) / maxDistance;
+    const float cornerRadius = std::min<float>(1.15f, sigma + sigmaCornerOffset);
+    const float cornerDistance = sqrt(rtengine::SQR(W * 0.5f) + rtengine::SQR(H * 0.5f));
+    const float distanceFactor = (cornerRadius - sigma) / cornerDistance;
 
     double progress = startVal;
     const double progressStep = (endVal - startVal) * rtengine::SQR(tileSize) / (W * H);
@@ -581,14 +581,17 @@ BENCHFUN
                         gauss5x5mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, kernel5);
                     }
                 } else {
-                    if (sigmaCornerOffset > 0.0) {
-                        float lkernel7[7][7];
+                    if (sigmaCornerOffset != 0.0) {
                         const float distance = sqrt(rtengine::SQR(i + tileSize / 2 - H / 2) + rtengine::SQR(j + tileSize / 2 - W / 2));
-                        compute7x7kernel(sigma + distanceFactor * distance, lkernel7);
-                        for (int k = 0; k < iterations - 1; ++k) {
-                            // apply 7x7 gaussian blur and divide luminance by result of gaussian blur
-                            gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, lkernel7);
-                            gauss7x7mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, lkernel7);
+                        const float sigmaTile = sigma + distanceFactor * distance;
+                        if (sigmaTile >= 0.4f) {
+                            float lkernel7[7][7];
+                            compute7x7kernel(sigma + distanceFactor * distance, lkernel7);
+                            for (int k = 0; k < iterations - 1; ++k) {
+                                // apply 7x7 gaussian blur and divide luminance by result of gaussian blur
+                                gauss7x7div(tmpIThr, tmpThr, lumThr, fullTileSize, fullTileSize, lkernel7);
+                                gauss7x7mult(tmpThr, tmpIThr, fullTileSize, fullTileSize, lkernel7);
+                            }
                         }
                     } else {
                         for (int k = 0; k < iterations; ++k) {
diff --git a/rtengine/color.h b/rtengine/color.h
index b859fb0cf..97835ba10 100644
--- a/rtengine/color.h
+++ b/rtengine/color.h
@@ -210,6 +210,13 @@ public:
         return r * workingspace[1][0] + g * workingspace[1][1] + b * workingspace[1][2];
     }
 
+#ifdef __SSE2__
+    static vfloat rgbLuminance(vfloat r, vfloat g, vfloat b, const vfloat workingspace[3])
+    {
+        return r * workingspace[0] + g * workingspace[1] + b * workingspace[2];
+    }
+#endif
+
     /**
     * @brief Convert red/green/blue to L*a*b
     * @brief Convert red/green/blue to hue/saturation/luminance
diff --git a/rtengine/dcraw.cc b/rtengine/dcraw.cc
index d2c68e2e8..ef209118b 100644
--- a/rtengine/dcraw.cc
+++ b/rtengine/dcraw.cc
@@ -2417,59 +2417,78 @@ void CLASS hasselblad_correct()
 
 void CLASS hasselblad_load_raw()
 {
-  struct jhead jh;
-  int shot, row, col, *back[5], len[2], diff[12], pred, sh, f, s, c;
-  unsigned upix, urow, ucol;
-  ushort *ip;
+    struct jhead jh;
+    int *back[5], diff[12];
 
-  if (!ljpeg_start (&jh, 0)) return;
-  order = 0x4949;
-  ph1_bithuff_t ph1_bithuff(this, ifp, order);
-  hb_bits(-1);
-  back[4] = (int *) calloc (raw_width, 3*sizeof **back);
-  merror (back[4], "hasselblad_load_raw()");
-  FORC3 back[c] = back[4] + c*raw_width;
-  cblack[6] >>= sh = tiff_samples > 1;
-  shot = LIM(shot_select, 1, tiff_samples) - 1;
-  for (row=0; row < raw_height; row++) {
-    FORC4 back[(c+3) & 3] = back[c];
-    for (col=0; col < raw_width; col+=2) {
-      for (s=0; s < tiff_samples*2; s+=2) {
-	FORC(2) len[c] = ph1_huff(jh.huff[0]);
-	FORC(2) {
-	  diff[s+c] = hb_bits(len[c]);
-	  if ((diff[s+c] & (1 << (len[c]-1))) == 0)
-	    diff[s+c] -= (1 << len[c]) - 1;
-	  if (diff[s+c] == 65535) diff[s+c] = -32768;
-	}
-      }
-      for (s=col; s < col+2; s++) {
-	pred = 0x8000 + load_flags;
-	if (col) pred = back[2][s-2];
-	if (col && row > 1) switch (jh.psv) {
-	  case 11: pred += back[0][s]/2 - back[0][s-2]/2;  break;
-	}
-	f = (row & 1)*3 ^ ((col+s) & 1);
-	FORC (tiff_samples) {
-	  pred += diff[(s & 1)*tiff_samples+c];
-	  upix = pred >> sh & 0xffff;
-	  if (raw_image && c == shot)
-	    RAW(row,s) = upix;
-	  if (image) {
-	    urow = row-top_margin  + (c & 1);
-	    ucol = col-left_margin - ((c >> 1) & 1);
-	    ip = &image[urow*width+ucol][f];
-	    if (urow < height && ucol < width)
-	      *ip = c < 4 ? upix : (*ip + upix) >> 1;
-	  }
-	}
-	back[2][s] = pred;
-      }
+    if (!ljpeg_start (&jh, 0)) {
+        return;
+    }
+    order = 0x4949;
+    ph1_bithuff_t ph1_bithuff(this, ifp, order);
+    hb_bits(-1);
+    back[4] = (int *) calloc(raw_width, 3 * sizeof **back);
+    merror(back[4], "hasselblad_load_raw()");
+    for (int c = 0; c < 3; ++c) {
+        back[c] = back[4] + c * raw_width;
+    }
+    const int sh = tiff_samples > 1;
+    cblack[6] >>= sh;
+    const int shot = LIM(shot_select, 1, tiff_samples) - 1;
+    for (int row = 0; row < raw_height; ++row) {
+        for (int c = 0; c < 4; ++c) {
+            back[(c + 3) & 3] = back[c];
+        }
+        for (int col = 0; col < raw_width; col += 2) {
+            for (int s = 0; s < tiff_samples * 2; s += 2) {
+                const int len[2]= {
+                    static_cast<int>(ph1_huff(jh.huff[0])),
+                    static_cast<int>(ph1_huff(jh.huff[0]))
+                };
+                for (int c = 0; c < 2; ++c) {
+                    diff[s + c] = hb_bits(len[c]);
+                    if ((diff[s + c] & (1 << (len[c] - 1))) == 0) {
+                        diff[s + c] -= (1 << len[c]) - 1;
+                    }
+                    if (diff[s + c] == 65535) {
+                        diff[s + c] = -32768;
+                    }
+                }
+            }
+            for (int s = col; s < col + 2; ++s) {
+                int pred;
+                if (col) {
+                    pred = back[2][s - 2];
+                    if (row > 1 && jh.psv == 11) {
+                        pred += back[0][s] / 2 - back[0][s - 2] / 2;
+                    }
+                } else {
+                     pred = 0x8000 + load_flags;
+                }
+                for (int c = 0; c < tiff_samples; ++c) {
+                    pred += diff[(s & 1) * tiff_samples + c];
+                    const unsigned upix = pred >> sh & 0xffff;
+                    if (raw_image && c == shot) {
+                        RAW(row, s) = upix;
+                    }
+                    if (image) {
+                        const int f = (row & 1) * 3 ^ ((col + s) & 1);
+                        const unsigned urow = row - top_margin  + (c & 1);
+                        const unsigned ucol = col - left_margin - ((c >> 1) & 1);
+                        ushort* const ip = &image[urow * width + ucol][f];
+                        if (urow < height && ucol < width) {
+                            *ip = c < 4 ? upix : (*ip + upix) >> 1;
+                        }
+                    }
+                }
+                back[2][s] = pred;
+            }
+        }
+    }
+    free(back[4]);
+    ljpeg_end(&jh);
+    if (image) {
+        mix_green = 1;
     }
-  }
-  free (back[4]);
-  ljpeg_end (&jh);
-  if (image) mix_green = 1;
 }
 
 void CLASS leaf_hdr_load_raw()
diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc
index 0ebe6c172..159e89504 100644
--- a/rtengine/guidedfilter.cc
+++ b/rtengine/guidedfilter.cc
@@ -3,6 +3,7 @@
  *  This file is part of RawTherapee.
  *
  *  Copyright (c) 2018 Alberto Griggio <alberto.griggio@gmail.com>
+ *  Optimized 2019 Ingo Weyrich <heckflosse67@gmx.de>
  *
  *  RawTherapee is free software: you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -16,9 +17,9 @@
  *
  *  You should have received a copy of the GNU General Public License
  *  along with RawTherapee.  If not, see <https://www.gnu.org/licenses/>.
- */
+*/
 
-/**
+/*
  * This is a Fast Guided Filter implementation, derived directly from the
  * pseudo-code of the paper:
  *
@@ -26,32 +27,16 @@
  * by Kaiming He, Jian Sun
  *
  * available at https://arxiv.org/abs/1505.00996
- */
+*/
 
 #include "guidedfilter.h"
 #include "boxblur.h"
 #include "rescale.h"
 #include "imagefloat.h"
-
+#define BENCHMARK
+#include "StopWatch.h"
 namespace rtengine {
 
-#if 0
-#  define DEBUG_DUMP(arr)                                                 \
-    do {                                                                \
-        Imagefloat im(arr.width(), arr.height());                      \
-        const char *out = "/tmp/" #arr ".tif";                     \
-        for (int y = 0; y < im.getHeight(); ++y) {                      \
-            for (int x = 0; x < im.getWidth(); ++x) {                   \
-                im.r(y, x) = im.g(y, x) = im.b(y, x) = arr[y][x] * 65535.f; \
-            }                                                           \
-        }                                                               \
-        im.saveTIFF(out, 16);                                           \
-    } while (false)
-#else
-#  define DEBUG_DUMP(arr)
-#endif
-
-
 namespace {
 
 int calculate_subsampling(int w, int h, int r)
@@ -78,18 +63,10 @@ int calculate_subsampling(int w, int h, int r)
 
 void guidedFilter(const array2D<float> &guide, const array2D<float> &src, array2D<float> &dst, int r, float epsilon, bool multithread, int subsampling)
 {
-
-    const int W = src.width();
-    const int H = src.height();
-
-    if (subsampling <= 0) {
-        subsampling = calculate_subsampling(W, H, r);
-    }
-
-    enum Op { MUL, DIVEPSILON, ADD, SUB, ADDMUL, SUBMUL };
+    enum Op {MUL, DIVEPSILON, SUBMUL};
 
     const auto apply =
-        [=](Op op, array2D<float> &res, const array2D<float> &a, const array2D<float> &b, const array2D<float> &c=array2D<float>()) -> void
+        [multithread, epsilon](Op op, array2D<float> &res, const array2D<float> &a, const array2D<float> &b, const array2D<float> &c=array2D<float>()) -> void
         {
             const int w = res.width();
             const int h = res.height();
@@ -99,137 +76,109 @@ void guidedFilter(const array2D<float> &guide, const array2D<float> &src, array2
 #endif
             for (int y = 0; y < h; ++y) {
                 for (int x = 0; x < w; ++x) {
-                    float r;
-                    float aa = a[y][x];
-                    float bb = b[y][x];
                     switch (op) {
-                    case MUL:
-                        r = aa * bb;
-                        break;
-                    case DIVEPSILON:
-                        r = aa / (bb + epsilon);
-                        break;
-                    case ADD:
-                        r = aa + bb;
-                        break;
-                    case SUB:
-                        r = aa - bb;
-                        break;
-                    case ADDMUL:
-                        r = aa * bb + c[y][x];
-                        break;
-                    case SUBMUL:
-                        r = c[y][x] - (aa * bb);
-                        break;
-                    default:
-                        assert(false);
-                        r = 0;
-                        break;
+                        case MUL:
+                            res[y][x] = a[y][x] * b[y][x];
+                            break;
+                        case DIVEPSILON:
+                            res[y][x] = a[y][x] / (b[y][x] + epsilon); // note: the value of epsilon intentionally has an impact on the result. It is not only to avoid divisions by zero
+                            break;
+                        case SUBMUL:
+                            res[y][x] = c[y][x] - (a[y][x] * b[y][x]);
+                            break;
+                        default:
+                            assert(false);
+                            res[y][x] = 0;
+                            break;
                     }
-                    res[y][x] = r;
                 }
             }
         };
 
-    // use the terminology of the paper (Algorithm 2)
-    const array2D<float> &I = guide;
-    const array2D<float> &p = src;
-    array2D<float> &q = dst;
-
     const auto f_subsample =
-        [=](array2D<float> &d, const array2D<float> &s) -> void
+        [multithread](array2D<float> &d, const array2D<float> &s) -> void
         {
             rescaleBilinear(s, d, multithread);
         };
 
-    const auto f_upsample = f_subsample;
-    
-    const size_t w = W / subsampling;
-    const size_t h = H / subsampling;
-
-    AlignedBuffer<float> blur_buf(w * h);
     const auto f_mean =
-        [&](array2D<float> &d, array2D<float> &s, int rad) -> void
+        [multithread](array2D<float> &d, array2D<float> &s, int rad) -> void
         {
             rad = LIM(rad, 0, (min(s.width(), s.height()) - 1) / 2 - 1);
-            float **src = s;
-            float **dst = d;
-#ifdef _OPENMP
-            #pragma omp parallel if (multithread)
-#endif
-            boxblur<float, float>(src, dst, blur_buf.data, rad, rad, s.width(), s.height());
+            boxblur(s, d, rad, s.width(), s.height(), multithread);
         };
 
+    const int W = src.width();
+    const int H = src.height();
+
+    if (subsampling <= 0) {
+        subsampling = calculate_subsampling(W, H, r);
+    }
+
+    const size_t w = W / subsampling;
+    const size_t h = H / subsampling;
+    const float r1 = float(r) / subsampling;
+
     array2D<float> I1(w, h);
     array2D<float> p1(w, h);
 
-    f_subsample(I1, I);
-    f_subsample(p1, p);
+    f_subsample(I1, guide);
 
-    DEBUG_DUMP(I);
-    DEBUG_DUMP(p);
-    DEBUG_DUMP(I1);
-    DEBUG_DUMP(p1);
+    if (&guide == &src) {
+        f_mean(p1, I1, r1);
 
-    float r1 = float(r) / subsampling;
+        apply(MUL, I1, I1, I1);        // I1 = I1 * I1
 
-    array2D<float> meanI(w, h);
-    f_mean(meanI, I1, r1);
-    DEBUG_DUMP(meanI);
+        f_mean(I1, I1, r1);
 
-    array2D<float> meanp(w, h);
-    f_mean(meanp, p1, r1);
-    DEBUG_DUMP(meanp);
+        apply(SUBMUL, I1, p1, p1, I1); // I1 = I1 - p1 * p1
+        apply(DIVEPSILON, I1, I1, I1); // I1 = I1 / (I1 + epsilon)
+        apply(SUBMUL, p1, I1, p1, p1); // p1 = p1 - I1 * p1
 
-    array2D<float> &corrIp = p1;
-    apply(MUL, corrIp, I1, p1);
-    f_mean(corrIp, corrIp, r1);
-    DEBUG_DUMP(corrIp);
+    } else {
+        f_subsample(p1, src);
 
-    array2D<float> &corrI = I1;
-    apply(MUL, corrI, I1, I1);
-    f_mean(corrI, corrI, r1);
-    DEBUG_DUMP(corrI);
+        array2D<float> meanI(w, h);
+        f_mean(meanI, I1, r1);
 
-    array2D<float> &varI = corrI;
-    apply(SUBMUL, varI, meanI, meanI, corrI);
-    DEBUG_DUMP(varI);
+        array2D<float> meanp(w, h);
+        f_mean(meanp, p1, r1);
 
-    array2D<float> &covIp = corrIp;
-    apply(SUBMUL, covIp, meanI, meanp, corrIp);
-    DEBUG_DUMP(covIp);
+        apply(MUL, p1, I1, p1);
 
-    array2D<float> &a = varI;
-    apply(DIVEPSILON, a, covIp, varI);
-    DEBUG_DUMP(a);
+        f_mean(p1, p1, r1);
 
-    array2D<float> &b = covIp;
-    apply(SUBMUL, b, a, meanI, meanp);
-    DEBUG_DUMP(b);
+        apply(MUL, I1, I1, I1);
 
-    meanI.free(); // frees w * h * 4 byte
-    meanp.free(); // frees w * h * 4 byte
+        f_mean(I1, I1, r1);
 
-    array2D<float> &meana = a;
-    f_mean(meana, a, r1);
-    DEBUG_DUMP(meana);
+        apply(SUBMUL, I1, meanI, meanI, I1);
+        apply(SUBMUL, p1, meanI, meanp, p1);
+        apply(DIVEPSILON, I1, p1, I1);
+        apply(SUBMUL, p1, I1, meanI, meanp);
+    }
 
-    array2D<float> &meanb = b;
-    f_mean(meanb, b, r1);
-    DEBUG_DUMP(meanb);
+    f_mean(I1, I1, r1);
+    f_mean(p1, p1, r1);
 
-    blur_buf.resize(0); // frees w * h * 4 byte
+    const int Ws = I1.width();
+    const int Hs = I1.height();
+    const int Wd = dst.width();
+    const int Hd = dst.height();
 
-    array2D<float> meanA(W, H);
-    f_upsample(meanA, meana);
-    DEBUG_DUMP(meanA);
+    const float col_scale = static_cast<float>(Ws) / static_cast<float>(Wd);
+    const float row_scale = static_cast<float>(Hs) / static_cast<float>(Hd);
 
-    array2D<float> &meanB = q;
-    f_upsample(meanB, meanb);
-    DEBUG_DUMP(meanB);
+#ifdef _OPENMP
+    #pragma omp parallel for if (multithread)
+#endif
 
-    apply(ADDMUL, q, meanA, I, meanB);
-    DEBUG_DUMP(q);
+    for (int y = 0; y < Hd; ++y) {
+        const float ymrs = y * row_scale;
+        for (int x = 0; x < Wd; ++x) {
+            dst[y][x] = getBilinearValue(I1, x * col_scale, ymrs) * guide[y][x] + getBilinearValue(p1, x * col_scale, ymrs);
+        }
+    }
 }
 
 } // namespace rtengine
diff --git a/rtengine/ipdehaze.cc b/rtengine/ipdehaze.cc
index 60d4cb9ff..e7bf71ba6 100644
--- a/rtengine/ipdehaze.cc
+++ b/rtengine/ipdehaze.cc
@@ -16,7 +16,7 @@
  *
  *  You should have received a copy of the GNU General Public License
  *  along with RawTherapee.  If not, see <https://www.gnu.org/licenses/>.
- */
+*/
 
 /*
  * Haze removal using the algorithm described in the paper:
@@ -26,15 +26,16 @@
  *
  * using a guided filter for the "soft matting" of the transmission map
  *
- */  
+*/
 
+#include <algorithm>
 #include <iostream>
-#include <queue>
+#include <vector>
 
 #include "guidedfilter.h"
 #include "improcfun.h"
 #include "procparams.h"
-#include "rt_algo.h"
+#include "rescale.h"
 #include "rt_math.h"
 
 extern Options options;
@@ -43,24 +44,103 @@ namespace rtengine {
 
 namespace {
 
-#if 0
-#  define DEBUG_DUMP(arr)                                                 \
-    do {                                                                \
-        Imagefloat im(arr.width(), arr.height());                      \
-        const char *out = "/tmp/" #arr ".tif";                     \
-        for (int y = 0; y < im.getHeight(); ++y) {                      \
-            for (int x = 0; x < im.getWidth(); ++x) {                   \
-                im.r(y, x) = im.g(y, x) = im.b(y, x) = arr[y][x] * 65535.f; \
-            }                                                           \
-        }                                                               \
-        im.saveTIFF(out, 16);                                           \
-    } while (false)
-#else
-#  define DEBUG_DUMP(arr)
+float normalize(Imagefloat *rgb, bool multithread)
+{
+    float maxval = 0.f;
+    const int W = rgb->getWidth();
+    const int H = rgb->getHeight();
+#ifdef _OPENMP
+    #pragma omp parallel for reduction(max:maxval) schedule(dynamic, 16) if (multithread)
 #endif
+    for (int y = 0; y < H; ++y) {
+        for (int x = 0; x < W; ++x) {
+            maxval = max(maxval, rgb->r(y, x), rgb->g(y, x), rgb->b(y, x));
+        }
+    }
+    maxval = max(maxval * 2.f, 65535.f);
+#ifdef _OPENMP
+    #pragma omp parallel for schedule(dynamic, 16) if (multithread)
+#endif
+    for (int y = 0; y < H; ++y) {
+        for (int x = 0; x < W; ++x) {
+            rgb->r(y, x) /= maxval;
+            rgb->g(y, x) /= maxval;
+            rgb->b(y, x) /= maxval;
+        }
+    }
+    return maxval;
+}
 
+void restore(Imagefloat *rgb, float maxval, bool multithread)
+{
+    const int W = rgb->getWidth();
+    const int H = rgb->getHeight();
+    if (maxval > 0.f && maxval != 1.f) {
+#ifdef _OPENMP
+#       pragma omp parallel for if (multithread)
+#endif
+        for (int y = 0; y < H; ++y) {
+            for (int x = 0; x < W; ++x) {
+                rgb->r(y, x) *= maxval;
+                rgb->g(y, x) *= maxval;
+                rgb->b(y, x) *= maxval;
+            }
+        }
+    }
+}
 
-int get_dark_channel(const array2D<float> &R, const array2D<float> &G, const array2D<float> &B, array2D<float> &dst, int patchsize, const float ambient[3], bool clip, bool multithread)
+int get_dark_channel(const array2D<float> &R, const array2D<float> &G, const array2D<float> &B, const array2D<float> &dst, int patchsize, const float ambient[3], bool clip, bool multithread, float strength)
+{
+    const int W = R.width();
+    const int H = R.height();
+
+#ifdef _OPENMP
+    #pragma omp parallel for if (multithread)
+#endif
+    for (int y = 0; y < H; y += patchsize) {
+        const int pH = min(y + patchsize, H);
+        for (int x = 0; x < W; x += patchsize) {
+            float minR = RT_INFINITY_F;
+            float minG = RT_INFINITY_F;
+            float minB = RT_INFINITY_F;
+#ifdef __SSE2__
+            vfloat minRv = F2V(minR);
+            vfloat minGv = F2V(minG);
+            vfloat minBv = F2V(minB);
+#endif
+            const int pW = min(x + patchsize, W);
+            for (int yy = y; yy < pH; ++yy) {
+                int xx = x;
+#ifdef __SSE2__
+                for (; xx < pW - 3; xx += 4) {
+                    minRv = vminf(minRv, LVFU(R[yy][xx]));
+                    minGv = vminf(minGv, LVFU(G[yy][xx]));
+                    minBv = vminf(minBv, LVFU(B[yy][xx]));
+                }
+#endif
+                for (; xx < pW; ++xx) {
+                    minR = min(minR, R[yy][xx]);
+                    minG = min(minG, G[yy][xx]);
+                    minB = min(minB, B[yy][xx]);
+                }
+            }
+#ifdef __SSE2__
+            minR = min(minR, vhmin(minRv));
+            minG = min(minG, vhmin(minGv));
+            minB = min(minB, vhmin(minBv));
+#endif
+            float val = min(minR / ambient[0], minG / ambient[1], minB / ambient[2]);
+            val = 1.f - strength * LIM01(val);
+            for (int yy = y; yy < pH; ++yy) {
+                std::fill(dst[yy] + x, dst[yy] + pW, val);
+            }
+        }
+    }
+
+    return (W / patchsize + ((W % patchsize) > 0)) *  (H / patchsize + ((H % patchsize) > 0));
+}
+
+int get_dark_channel_downsized(const array2D<float> &R, const array2D<float> &G, const array2D<float> &B, const array2D<float> &dst, int patchsize, bool multithread)
 {
     const int W = R.width();
     const int H = R.height();
@@ -73,22 +153,11 @@ int get_dark_channel(const array2D<float> &R, const array2D<float> &G, const arr
         for (int x = 0; x < W; x += patchsize) {
             float val = RT_INFINITY_F;
             const int pW = min(x + patchsize, W);
-            for (int yy = y; yy < pH; ++yy) {
-                for (int xx = x; xx < pW; ++xx) {
-                    float r = R[yy][xx];
-                    float g = G[yy][xx];
-                    float b = B[yy][xx];
-                    if (ambient) {
-                        r /= ambient[0];
-                        g /= ambient[1];
-                        b /= ambient[2];
-                    }
-                    val = min(val, r, g, b);
+            for (int xx = x; xx < pW; ++xx) {
+                for (int yy = y; yy < pH; ++yy) {
+                    val = min(val, R[yy][xx], G[yy][xx], B[yy][xx]);
                 }
             }
-            if (clip) {
-                val = LIM01(val);
-            }
             for (int yy = y; yy < pH; ++yy) {
                 std::fill(dst[yy] + x, dst[yy] + pW, val);
             }
@@ -98,33 +167,24 @@ int get_dark_channel(const array2D<float> &R, const array2D<float> &G, const arr
     return (W / patchsize + ((W % patchsize) > 0)) *  (H / patchsize + ((H % patchsize) > 0));
 }
 
-
 float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, const array2D<float> &B, const array2D<float> &dark, int patchsize, int npatches, float ambient[3])
 {
     const int W = R.width();
     const int H = R.height();
 
-    const auto get_percentile =
-        [](std::priority_queue<float> &q, float prcnt) -> float
-        {
-            size_t n = LIM<size_t>(q.size() * prcnt, 1, q.size());
-            while (q.size() > n) {
-                q.pop();
-            }
-            return q.top();
-        };
-    
     float darklim = RT_INFINITY_F;
     {
-        std::priority_queue<float> p;
+        std::vector<float> p;
         for (int y = 0; y < H; y += patchsize) {
             for (int x = 0; x < W; x += patchsize) {
                 if (!OOG(dark[y][x], 1.f - 1e-5f)) {
-                    p.push(dark[y][x]);
+                    p.push_back(dark[y][x]);
                 }
             }
         }
-        darklim = get_percentile(p, 0.95);
+        const int pos = p.size() * 0.95;
+        std::nth_element(p.begin(), p.begin() + pos, p.end());
+        darklim = p[pos];
     }
 
     std::vector<std::pair<int, int>> patches;
@@ -145,7 +205,8 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
 
     float bright_lim = RT_INFINITY_F;
     {
-        std::priority_queue<float> l;
+        std::vector<float> l;
+        l.reserve(patches.size() * patchsize * patchsize);
         
         for (auto &p : patches) {
             const int pW = min(p.first+patchsize, W);
@@ -153,12 +214,13 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
             
             for (int y = p.second; y < pH; ++y) {
                 for (int x = p.first; x < pW; ++x) {
-                    l.push(R[y][x] + G[y][x] + B[y][x]);
+                    l.push_back(R[y][x] + G[y][x] + B[y][x]);
                 }
             }
         }
-
-        bright_lim = get_percentile(l, 0.95);
+        const int pos = l.size() * 0.95;
+        std::nth_element(l.begin(), l.begin() + pos, l.end());
+        bright_lim = l[pos];
     }
 
     double rr = 0, gg = 0, bb = 0;
@@ -190,7 +252,6 @@ float estimate_ambient_light(const array2D<float> &R, const array2D<float> &G, c
     return darklim > 0 ? -1.125f * std::log(darklim) : std::log(std::numeric_limits<float>::max()) / 2;
 }
 
-
 void extract_channels(Imagefloat *img, array2D<float> &r, array2D<float> &g, array2D<float> &b, int radius, float epsilon, bool multithread)
 {
     const int W = img->getWidth();
@@ -211,12 +272,12 @@ void extract_channels(Imagefloat *img, array2D<float> &r, array2D<float> &g, arr
 
 void ImProcFunctions::dehaze(Imagefloat *img)
 {
-    if (!params->dehaze.enabled) {
+    if (!params->dehaze.enabled || params->dehaze.strength == 0.0) {
         return;
     }
 
-    img->normalizeFloatTo1();
-    
+    const float maxChannel = normalize(img, multiThread);
+
     const int W = img->getWidth();
     const int H = img->getHeight();
     const float strength = LIM01(float(params->dehaze.strength) / 100.f * 0.9f);
@@ -229,21 +290,47 @@ void ImProcFunctions::dehaze(Imagefloat *img)
 
     int patchsize = max(int(5 / scale), 2);
     float ambient[3];
-    array2D<float> &t_tilde = dark;
-    float max_t = 0.f;
+    float maxDistance = 0.f;
 
     {
-        int npatches = 0;
-        array2D<float> R(W, H);
+        array2D<float>& R = dark; // R and dark can safely use the same buffer, which is faster and reduces memory allocations/deallocations
         array2D<float> G(W, H);
         array2D<float> B(W, H);
         extract_channels(img, R, G, B, patchsize, 1e-1, multiThread);
-    
-        patchsize = max(max(W, H) / 600, 2);
-        npatches = get_dark_channel(R, G, B, dark, patchsize, nullptr, false, multiThread);
-        DEBUG_DUMP(dark);
 
-        max_t = estimate_ambient_light(R, G, B, dark, patchsize, npatches, ambient);
+        {
+            constexpr int sizecap = 200;
+            const float r = static_cast<float>(W) / static_cast<float>(H);
+            const int hh = r >= 1.f ? sizecap : sizecap / r;
+            const int ww = r >= 1.f ? sizecap * r : sizecap;
+
+            if (W <= ww && H <= hh) {
+                // don't rescale small thumbs
+                array2D<float> D(W, H);
+                const int npatches = get_dark_channel_downsized(R, G, B, D, 2, multiThread);
+                maxDistance = estimate_ambient_light(R, G, B, D, patchsize, npatches, ambient);
+            } else {
+                array2D<float> RR(ww, hh);
+                array2D<float> GG(ww, hh);
+                array2D<float> BB(ww, hh);
+                rescaleNearest(R, RR, multiThread);
+                rescaleNearest(G, GG, multiThread);
+                rescaleNearest(B, BB, multiThread);
+                array2D<float> D(ww, hh);
+
+                const int npatches = get_dark_channel_downsized(RR, GG, BB, D, 2, multiThread);
+                maxDistance = estimate_ambient_light(RR, GG, BB, D, patchsize, npatches, ambient);
+            }
+        }
+
+        if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) {
+            if (options.rtSettings.verbose) {
+                std::cout << "dehaze: no haze detected" << std::endl;
+            }
+            restore(img, maxChannel, multiThread);
+            return; // probably no haze at all
+        }
+        patchsize = max(max(W, H) / 600, 2);
 
         if (options.rtSettings.verbose) {
             std::cout << "dehaze: ambient light is "
@@ -251,78 +338,95 @@ void ImProcFunctions::dehaze(Imagefloat *img)
                       << std::endl;
         }
 
-        get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread);
-    }
-
-    if (min(ambient[0], ambient[1], ambient[2]) < 0.01f) {
-        if (options.rtSettings.verbose) {
-            std::cout << "dehaze: no haze detected" << std::endl;
-        }
-        img->normalizeFloatTo65535();
-        return; // probably no haze at all
-    }
-
-    DEBUG_DUMP(t_tilde);
-
-#ifdef _OPENMP
-    #pragma omp parallel for if (multiThread)
-#endif
-    for (int y = 0; y < H; ++y) {
-        for (int x = 0; x < W; ++x) {
-            dark[y][x] = 1.f - strength * dark[y][x];
-        }
+        get_dark_channel(R, G, B, dark, patchsize, ambient, true, multiThread, strength);
     }
 
     const int radius = patchsize * 4;
-    const float epsilon = 1e-5;
-    array2D<float> &t = t_tilde;
+    constexpr float epsilon = 1e-5f;
 
-    {
-        array2D<float> guideB(W, H, img->b.ptrs, ARRAY2D_BYREFERENCE);
-        guidedFilter(guideB, t_tilde, t, radius, epsilon, multiThread);
-    }
+    array2D<float> guideB(W, H, img->b.ptrs, ARRAY2D_BYREFERENCE);
+    guidedFilter(guideB, dark, dark, radius, epsilon, multiThread);
         
-    DEBUG_DUMP(t);
-
     if (options.rtSettings.verbose) {
-        std::cout << "dehaze: max distance is " << max_t << std::endl;
+        std::cout << "dehaze: max distance is " << maxDistance << std::endl;
     }
 
-    float depth = -float(params->dehaze.depth) / 100.f;
-    const float t0 = max(1e-3f, std::exp(depth * max_t));
+    const float depth = -float(params->dehaze.depth) / 100.f;
+    const float t0 = max(1e-3f, std::exp(depth * maxDistance));
     const float teps = 1e-3f;
+
+    const bool luminance = params->dehaze.luminance;
+    const TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile);
+#ifdef __SSE2__
+    const vfloat wsv[3] = {F2V(ws[1][0]), F2V(ws[1][1]),F2V(ws[1][2])};
+#endif
+    const float ambientY = Color::rgbLuminance(ambient[0], ambient[1], ambient[2], ws);
 #ifdef _OPENMP
     #pragma omp parallel for if (multiThread)
 #endif
     for (int y = 0; y < H; ++y) {
-        for (int x = 0; x < W; ++x) {
+        int x = 0;
+#ifdef __SSE2__
+        const vfloat onev = F2V(1.f);
+        const vfloat ambient0v = F2V(ambient[0]);
+        const vfloat ambient1v = F2V(ambient[1]);
+        const vfloat ambient2v = F2V(ambient[2]);
+        const vfloat ambientYv = F2V(ambientY);
+        const vfloat epsYv = F2V(1e-5f);
+        const vfloat t0v = F2V(t0);
+        const vfloat tepsv = F2V(teps);
+        const vfloat cmaxChannelv = F2V(maxChannel);
+        for (; x < W - 3; x += 4) {
             // ensure that the transmission is such that to avoid clipping...
-            float rgb[3] = { img->r(y, x), img->g(y, x), img->b(y, x) };
+            const vfloat r = LVFU(img->r(y, x));
+            const vfloat g = LVFU(img->g(y, x));
+            const vfloat b = LVFU(img->b(y, x));
             // ... t >= tl to avoid negative values
-            float tl = 1.f - min(rgb[0]/ambient[0], rgb[1]/ambient[1], rgb[2]/ambient[2]);
-            // ... t >= tu to avoid values > 1
-            float tu = t0 - teps;
-            for (int c = 0; c < 3; ++c) {
-                if (ambient[c] < 1) {
-                    tu = max(tu, (rgb[c] - ambient[c])/(1.f - ambient[c]));
-                }
-            }
-            float mt = max(t[y][x], t0, tl + teps, tu + teps);
+            const vfloat tlv = onev - vminf(r / ambient0v, vminf(g / ambient1v, b / ambient2v));
+            const vfloat mtv = vmaxf(LVFU(dark[y][x]), vmaxf(tlv + tepsv, t0v));
             if (params->dehaze.showDepthMap) {
-                img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt);
+                const vfloat valv = vclampf(onev - mtv, ZEROV, onev) * cmaxChannelv;
+                STVFU(img->r(y, x), valv);
+                STVFU(img->g(y, x), valv);
+                STVFU(img->b(y, x), valv);
+            } else if (luminance) {
+                const vfloat Yv = Color::rgbLuminance(r, g, b, wsv);
+                const vfloat YYv = (Yv - ambientYv) / mtv + ambientYv;
+                const vfloat fv = vself(vmaskf_gt(Yv, epsYv), cmaxChannelv * YYv / Yv, cmaxChannelv);
+                STVFU(img->r(y, x), r * fv);
+                STVFU(img->g(y, x), g * fv);
+                STVFU(img->b(y, x), b * fv);
             } else {
-                float r = (rgb[0] - ambient[0]) / mt + ambient[0];
-                float g = (rgb[1] - ambient[1]) / mt + ambient[1];
-                float b = (rgb[2] - ambient[2]) / mt + ambient[2];
-
-                img->r(y, x) = r;
-                img->g(y, x) = g;
-                img->b(y, x) = b;
+                STVFU(img->r(y, x), ((r - ambient0v) / mtv + ambient0v) * cmaxChannelv);
+                STVFU(img->g(y, x), ((g - ambient1v) / mtv + ambient1v) * cmaxChannelv);
+                STVFU(img->b(y, x), ((b - ambient2v) / mtv + ambient2v) * cmaxChannelv);
+            }
+        }
+#endif
+        for (; x < W; ++x) {
+            // ensure that the transmission is such that to avoid clipping...
+            const float r = img->r(y, x);
+            const float g = img->g(y, x);
+            const float b = img->b(y, x);
+            // ... t >= tl to avoid negative values
+            const float tl = 1.f - min(r / ambient[0], g / ambient[1], b / ambient[2]);
+            const float mt = max(dark[y][x], t0, tl + teps);
+            if (params->dehaze.showDepthMap) {
+                img->r(y, x) = img->g(y, x) = img->b(y, x) = LIM01(1.f - mt) * maxChannel;
+            } else if (luminance) {
+                const float Y = Color::rgbLuminance(img->r(y, x), img->g(y, x), img->b(y, x), ws);
+                const float YY = (Y - ambientY) / mt + ambientY;
+                const float f = Y > 1e-5f ? maxChannel * YY / Y : maxChannel;
+                img->r(y, x) *= f;
+                img->g(y, x) *= f;
+                img->b(y, x) *= f;
+            } else {
+                img->r(y, x) = ((r - ambient[0]) / mt + ambient[0]) * maxChannel;
+                img->g(y, x) = ((g - ambient[1]) / mt + ambient[1]) * maxChannel;
+                img->b(y, x) = ((b - ambient[2]) / mt + ambient[2]) * maxChannel;
             }
         }
     }
-
-    img->normalizeFloatTo65535();
 }
 
 
diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc
index bd91ae8e7..68960794e 100644
--- a/rtengine/procparams.cc
+++ b/rtengine/procparams.cc
@@ -2539,7 +2539,8 @@ DehazeParams::DehazeParams() :
     enabled(false),
     strength(50),
     showDepthMap(false),
-    depth(25)
+    depth(25),
+    luminance(false)
 {
 }
 
@@ -2549,7 +2550,8 @@ bool DehazeParams::operator ==(const DehazeParams& other) const
         enabled == other.enabled
         && strength == other.strength
         && showDepthMap == other.showDepthMap
-        && depth == other.depth;
+        && depth == other.depth
+        && luminance == other.luminance;
 }
 
 bool DehazeParams::operator !=(const DehazeParams& other) const
@@ -3260,6 +3262,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo
         saveToKeyfile(!pedited || pedited->dehaze.strength, "Dehaze", "Strength", dehaze.strength, keyFile);        
         saveToKeyfile(!pedited || pedited->dehaze.showDepthMap, "Dehaze", "ShowDepthMap", dehaze.showDepthMap, keyFile);        
         saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Depth", dehaze.depth, keyFile);        
+        saveToKeyfile(!pedited || pedited->dehaze.depth, "Dehaze", "Luminance", dehaze.luminance, keyFile);
 
 // Directional pyramid denoising
         saveToKeyfile(!pedited || pedited->dirpyrDenoise.enabled, "Directional Pyramid Denoising", "Enabled", dirpyrDenoise.enabled, keyFile);
@@ -4922,6 +4925,7 @@ int ProcParams::load(const Glib::ustring& fname, ParamsEdited* pedited)
             assignFromKeyfile(keyFile, "Dehaze", "Strength", pedited, dehaze.strength, pedited->dehaze.strength);
             assignFromKeyfile(keyFile, "Dehaze", "ShowDepthMap", pedited, dehaze.showDepthMap, pedited->dehaze.showDepthMap);
             assignFromKeyfile(keyFile, "Dehaze", "Depth", pedited, dehaze.depth, pedited->dehaze.depth);
+            assignFromKeyfile(keyFile, "Dehaze", "Luminance", pedited, dehaze.luminance, pedited->dehaze.luminance);
         }
         
         if (keyFile.has_group("Film Simulation")) {
diff --git a/rtengine/procparams.h b/rtengine/procparams.h
index 82dfe9697..4d8f66ebd 100644
--- a/rtengine/procparams.h
+++ b/rtengine/procparams.h
@@ -1353,6 +1353,7 @@ struct DehazeParams {
     int strength;
     bool showDepthMap;
     int depth;
+    bool luminance;
 
     DehazeParams();
 
diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c
index 3000c1c10..cce88df5d 100644
--- a/rtengine/sleefsseavx.c
+++ b/rtengine/sleefsseavx.c
@@ -1390,6 +1390,18 @@ static inline float vhadd( vfloat a ) {
     return _mm_cvtss_f32(_mm_add_ss(a, _mm_shuffle_ps(a, a, 1)));
 }
 
+static inline float vhmin(vfloat a) {
+    // returns min(a[0], a[1], a[2], a[3])
+    a = vminf(a, _mm_movehl_ps(a, a));
+    return _mm_cvtss_f32(vminf(a, _mm_shuffle_ps(a, a, 1)));
+}
+
+static inline float vhmax(vfloat a) {
+    // returns max(a[0], a[1], a[2], a[3])
+    a = vmaxf(a, _mm_movehl_ps(a, a));
+    return _mm_cvtss_f32(vmaxf(a, _mm_shuffle_ps(a, a, 1)));
+}
+
 static INLINE vfloat vmul2f(vfloat a){
     // fastest way to multiply by 2
 	return a + a;
diff --git a/rtgui/dehaze.cc b/rtgui/dehaze.cc
index 6f60d08d6..6b7fcd64f 100644
--- a/rtgui/dehaze.cc
+++ b/rtgui/dehaze.cc
@@ -36,6 +36,7 @@ Dehaze::Dehaze(): FoldableToolPanel(this, "dehaze", M("TP_DEHAZE_LABEL"), false,
     EvDehazeStrength = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_STRENGTH");
     EvDehazeShowDepthMap = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_SHOW_DEPTH_MAP");
     EvDehazeDepth = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_DEPTH");
+    EvDehazeLuminance = m->newEvent(HDR, "HISTORY_MSG_DEHAZE_LUMINANCE");
     
     strength = Gtk::manage(new Adjuster(M("TP_DEHAZE_STRENGTH"), 0., 100., 1., 50.));
     strength->setAdjusterListener(this);
@@ -45,12 +46,17 @@ Dehaze::Dehaze(): FoldableToolPanel(this, "dehaze", M("TP_DEHAZE_LABEL"), false,
     depth->setAdjusterListener(this);
     depth->show();
 
+    luminance = Gtk::manage(new Gtk::CheckButton(M("TP_DEHAZE_LUMINANCE")));
+    luminance->signal_toggled().connect(sigc::mem_fun(*this, &Dehaze::luminanceChanged));
+    luminance->show();
+
     showDepthMap = Gtk::manage(new Gtk::CheckButton(M("TP_DEHAZE_SHOW_DEPTH_MAP")));
     showDepthMap->signal_toggled().connect(sigc::mem_fun(*this, &Dehaze::showDepthMapChanged));
     showDepthMap->show();
     
     pack_start(*strength);
     pack_start(*depth);
+    pack_start(*luminance);
     pack_start(*showDepthMap);
 }
 
@@ -64,12 +70,14 @@ void Dehaze::read(const ProcParams *pp, const ParamsEdited *pedited)
         depth->setEditedState(pedited->dehaze.depth ? Edited : UnEdited);
         set_inconsistent(multiImage && !pedited->dehaze.enabled);
         showDepthMap->set_inconsistent(!pedited->dehaze.showDepthMap);
+        luminance->set_inconsistent(!pedited->dehaze.luminance);
     }
 
     setEnabled(pp->dehaze.enabled);
     strength->setValue(pp->dehaze.strength);
     depth->setValue(pp->dehaze.depth);
     showDepthMap->set_active(pp->dehaze.showDepthMap);
+    luminance->set_active(pp->dehaze.luminance);
 
     enableListener();
 }
@@ -81,12 +89,14 @@ void Dehaze::write(ProcParams *pp, ParamsEdited *pedited)
     pp->dehaze.depth = depth->getValue();
     pp->dehaze.enabled = getEnabled();
     pp->dehaze.showDepthMap = showDepthMap->get_active();
+    pp->dehaze.luminance = luminance->get_active();
 
     if (pedited) {
         pedited->dehaze.strength = strength->getEditedState();
         pedited->dehaze.depth = depth->getEditedState();
         pedited->dehaze.enabled = !get_inconsistent();
         pedited->dehaze.showDepthMap = !showDepthMap->get_inconsistent();
+        pedited->dehaze.luminance = !luminance->get_inconsistent();
     }
 }
 
@@ -138,6 +148,12 @@ void Dehaze::showDepthMapChanged()
     }
 }
 
+void Dehaze::luminanceChanged()
+{
+    if (listener) {
+        listener->panelChanged(EvDehazeLuminance, luminance->get_active() ? M("GENERAL_ENABLED") : M("GENERAL_DISABLED"));
+    }
+}
 
 void Dehaze::setBatchMode(bool batchMode)
 {
diff --git a/rtgui/dehaze.h b/rtgui/dehaze.h
index 3120dfc91..6a9d31cd1 100644
--- a/rtgui/dehaze.h
+++ b/rtgui/dehaze.h
@@ -28,12 +28,14 @@ class Dehaze: public ToolParamBlock, public AdjusterListener, public FoldableToo
 private:
     Adjuster *strength;
     Adjuster *depth;
-    Gtk::CheckButton *showDepthMap;    
+    Gtk::CheckButton *showDepthMap;
+    Gtk::CheckButton *luminance;
 
     rtengine::ProcEvent EvDehazeEnabled;
     rtengine::ProcEvent EvDehazeStrength;
     rtengine::ProcEvent EvDehazeDepth;
     rtengine::ProcEvent EvDehazeShowDepthMap;
+    rtengine::ProcEvent EvDehazeLuminance;
     
 public:
 
@@ -47,6 +49,7 @@ public:
     void adjusterChanged(Adjuster *a, double newval) override;
     void enabledChanged() override;
     void showDepthMapChanged();
+    void luminanceChanged();
     void setAdjusterBehavior(bool strengthAdd);
 };
 
diff --git a/rtgui/extprog.cc b/rtgui/extprog.cc
index a6a9050c0..95c1c937d 100644
--- a/rtgui/extprog.cc
+++ b/rtgui/extprog.cc
@@ -58,7 +58,7 @@ bool ExtProgAction::execute (const std::vector<Glib::ustring>& fileNames) const
     }
 
     for (const auto& fileName : fileNames) {
-        cmdLine += " \"" + fileName + "\"";
+        cmdLine += " " + Glib::shell_quote(fileName);
     }
 
     return ExtProgStore::spawnCommandAsync (cmdLine);
@@ -256,7 +256,7 @@ bool ExtProgStore::openInGimp (const Glib::ustring& fileName)
 
 #else
 
-    auto cmdLine = Glib::ustring("gimp \"") + fileName + Glib::ustring("\"");
+    auto cmdLine = Glib::ustring("gimp ") + Glib::shell_quote(fileName);
     auto success = spawnCommandAsync (cmdLine);
 
 #endif
@@ -291,7 +291,7 @@ bool ExtProgStore::openInGimp (const Glib::ustring& fileName)
 
 #else
 
-    cmdLine = Glib::ustring("gimp-remote \"") + fileName + Glib::ustring("\"");
+    cmdLine = Glib::ustring("gimp-remote ") + Glib::shell_quote(fileName);
     success = ExtProgStore::spawnCommandAsync (cmdLine);
 
 #endif
@@ -312,7 +312,7 @@ bool ExtProgStore::openInPhotoshop (const Glib::ustring& fileName)
 
 #else
 
-    const auto cmdLine = Glib::ustring("\"") + Glib::build_filename(options.psDir, "Photoshop.exe") + Glib::ustring("\" \"") + fileName + Glib::ustring("\"");
+    const auto cmdLine = Glib::ustring("\"") + Glib::build_filename(options.psDir, "Photoshop.exe") + "\" " + Glib::shell_quote(fileName);
 
 #endif
 
@@ -334,7 +334,7 @@ bool ExtProgStore::openInCustomEditor (const Glib::ustring& fileName)
 
 #else
 
-    const auto cmdLine = Glib::ustring("\"") + options.customEditorProg + Glib::ustring("\" \"") + fileName + Glib::ustring("\"");
+    const auto cmdLine = Glib::ustring("\"") + options.customEditorProg + "\" " + Glib::shell_quote(fileName);
     return spawnCommandAsync (cmdLine);
 
 #endif
diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc
index 1cdcacf13..f6561077c 100644
--- a/rtgui/paramsedited.cc
+++ b/rtgui/paramsedited.cc
@@ -598,6 +598,7 @@ void ParamsEdited::set(bool v)
     dehaze.strength = v;
     dehaze.showDepthMap = v;
     dehaze.depth = v;
+    dehaze.luminance = v;
     metadata.mode = v;
     filmNegative.enabled = v;
     filmNegative.redRatio = v;
@@ -1180,6 +1181,7 @@ void ParamsEdited::initFrom(const std::vector<rtengine::procparams::ProcParams>&
         dehaze.strength = dehaze.strength && p.dehaze.strength == other.dehaze.strength;
         dehaze.showDepthMap = dehaze.showDepthMap && p.dehaze.showDepthMap == other.dehaze.showDepthMap;
         dehaze.depth = dehaze.depth && p.dehaze.depth == other.dehaze.depth;
+        dehaze.luminance = dehaze.luminance && p.dehaze.luminance == other.dehaze.luminance;
         metadata.mode = metadata.mode && p.metadata.mode == other.metadata.mode;
         filmNegative.enabled = filmNegative.enabled && p.filmNegative.enabled == other.filmNegative.enabled;
         filmNegative.redRatio = filmNegative.redRatio && p.filmNegative.redRatio == other.filmNegative.redRatio;
@@ -3290,6 +3292,10 @@ void ParamsEdited::combine(rtengine::procparams::ProcParams& toEdit, const rteng
         toEdit.dehaze.showDepthMap = mods.dehaze.showDepthMap;
     }
 
+    if (dehaze.luminance) {
+        toEdit.dehaze.luminance = mods.dehaze.luminance;
+    }
+
     if (metadata.mode) {
         toEdit.metadata.mode = mods.metadata.mode;
     }
diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h
index 0f4ad85ea..fc3cd4b7a 100644
--- a/rtgui/paramsedited.h
+++ b/rtgui/paramsedited.h
@@ -607,6 +607,7 @@ struct DehazeParamsEdited {
     bool strength;
     bool showDepthMap;
     bool depth;
+    bool luminance;
 };
 
 struct RAWParamsEdited {
diff --git a/rtgui/pdsharpening.cc b/rtgui/pdsharpening.cc
index f25e44e69..d0ccc43a8 100644
--- a/rtgui/pdsharpening.cc
+++ b/rtgui/pdsharpening.cc
@@ -18,22 +18,28 @@
 */
 
 #include <cmath>
-#include "eventmapper.h"
+#include <iomanip>
+
 #include "pdsharpening.h"
+
+#include "eventmapper.h"
 #include "options.h"
+
 #include "../rtengine/procparams.h"
 
 using namespace rtengine;
 using namespace rtengine::procparams;
 
-PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDSHARPENING_LABEL"), false, true)
+PdSharpening::PdSharpening() :
+    FoldableToolPanel(this, "capturesharpening", M("TP_PDSHARPENING_LABEL"), false, true),
+    lastAutoContrast(true),
+    lastAutoRadius(true)
 {
-
     auto m = ProcEventMapper::getInstance();
     EvPdShrContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_CONTRAST");
     EvPdSharpenGamma = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_GAMMA");
     EvPdShrDRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS");
-    EvPdShrDRadiusOffset = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS_OFFSET");
+    EvPdShrDRadiusOffset = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_RADIUS_BOOST");
     EvPdShrDIterations = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_ITERATIONS");
     EvPdShrAutoContrast = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_CONTRAST");
     EvPdShrAutoRadius = m->newEvent(CAPTURESHARPEN, "HISTORY_MSG_PDSHARPEN_AUTO_RADIUS");
@@ -42,7 +48,7 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS
     hb->show();
     contrast = Gtk::manage(new Adjuster(M("TP_SHARPENING_CONTRAST"), 0, 200, 1, 10));
     contrast->setAdjusterListener(this);
-    contrast->addAutoButton(M("TP_RAW_DUALDEMOSAICAUTOCONTRAST_TOOLTIP"));
+    contrast->addAutoButton();
     contrast->setAutoValue(true);
 
     pack_start(*contrast);
@@ -53,9 +59,9 @@ PdSharpening::PdSharpening() : FoldableToolPanel(this, "pdsharpening", M("TP_PDS
     Gtk::VBox* rld = Gtk::manage(new Gtk::VBox());
     gamma = Gtk::manage(new Adjuster(M("TP_SHARPENING_GAMMA"), 0.5, 6.0, 0.05, 1.00));
     dradius = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS"), 0.4, 1.15, 0.01, 0.75));
-    dradius->addAutoButton(M("TP_PDSHARPENING_AUTORADIUS_TOOLTIP"));
+    dradius->addAutoButton();
     dradius->setAutoValue(true);
-    dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_OFFSET"), 0.0, 0.5, 0.01, 0.0));
+    dradiusOffset = Gtk::manage(new Adjuster(M("TP_SHARPENING_RADIUS_BOOST"), -0.5, 0.5, 0.01, 0.0));
     diter = Gtk::manage(new Adjuster(M("TP_SHARPENING_RLD_ITERATIONS"), 1, 100, 1, 20));
     rld->pack_start(*gamma);
     rld->pack_start(*dradius);