diff --git a/rtengine/PF_correct_RT.cc b/rtengine/PF_correct_RT.cc
index 1a937b409..b4f9ae6b3 100644
--- a/rtengine/PF_correct_RT.cc
+++ b/rtengine/PF_correct_RT.cc
@@ -27,12 +27,13 @@
 #include "gauss.h"
 #include "improcfun.h"
 #include "sleef.c"
-#include "mytime.h"
 #include "../rtgui/myflatcurve.h"
 #include "rt_math.h"
 #include "opthelper.h"
 #include "median.h"
-
+#include "jaggedarray.h"
+#define BENCHMARK
+#include "StopWatch.h"
 #ifdef _OPENMP
 #include <omp.h>
 #endif
@@ -43,8 +44,9 @@ namespace rtengine
 {
 extern const Settings* settings;
 
-void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh)
+void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh)
 {
+    BENCHFUN
     const int halfwin = ceil(2 * radius) + 1;
 
     FlatCurve* chCurve = nullptr;
@@ -56,27 +58,27 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
     // local variables
     const int width = src->W, height = src->H;
     //temporary array to store chromaticity
-    float (*fringe);
-    fringe = (float (*)) malloc (height * width * sizeof(*fringe));
+    float *fringe = new float[width * height];
+
+    const JaggedArray<float> tmpa(width, height);
+    const JaggedArray<float> tmpb(width, height);
 
-    LabImage * tmp1;
-    tmp1 = new LabImage(width, height);
 
 #ifdef _OPENMP
     #pragma omp parallel
 #endif
     {
-        gaussianBlur (src->a, tmp1->a, src->W, src->H, radius);
-        gaussianBlur (src->b, tmp1->b, src->W, src->H, radius);
+        gaussianBlur(src->a, tmpa, src->W, src->H, radius);
+        gaussianBlur(src->b, tmpb, src->W, src->H, radius);
     }
 
-    float chromave = 0.0f;
+    double chromave = 0.f; // use double precision for large summations
 
 #ifdef _OPENMP
     #pragma omp parallel
 #endif
     {
-        float chromaChfactor = 1.0f;
+        float chromaChfactor = 1.f;
 #ifdef _OPENMP
         #pragma omp for reduction(+:chromave)
 #endif
@@ -108,16 +110,16 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
                     // no precalculated values without SSE => calculate
                     float HH = xatan2f(src->b[i][j], src->a[i][j]);
 #endif
-                    float chparam = float((chCurve->getVal((Color::huelab_to_huehsv2(HH))) - 0.5f) * 2.0f); //get C=f(H)
+                    float chparam = chCurve->getVal((Color::huelab_to_huehsv2(HH))) - 0.5f; //get C=f(H)
 
-                    if(chparam > 0.f) {
-                        chparam /= 2.f;    // reduced action if chparam > 0
+                    if(chparam < 0.f) {
+                        chparam *= 2.f;    // increased action if chparam < 0
                     }
 
-                    chromaChfactor = 1.0f + chparam;
+                    chromaChfactor = 1.f + chparam;
                 }
 
-                float chroma = SQR(chromaChfactor * (src->a[i][j] - tmp1->a[i][j])) + SQR(chromaChfactor * (src->b[i][j] - tmp1->b[i][j])); //modulate chroma function hue
+                float chroma = SQR(chromaChfactor * (src->a[i][j] - tmpa[i][j])) + SQR(chromaChfactor * (src->b[i][j] - tmpb[i][j])); //modulate chroma function hue
                 chromave += chroma;
                 fringe[i * width + j] = chroma;
             }
@@ -125,45 +127,17 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
     }
 
     chromave /= (height * width);
-    float threshfactor = SQR(thresh / 33.f) * chromave * 5.0f;
 
-
-// now chromave is calculated, so we postprocess fringe to reduce the number of divisions in future
-#ifdef __SSE2__
+// now as chromave is calculated, we postprocess fringe to reduce the number of divisions in future
 #ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-        __m128 sumv = F2V( chromave );
-        __m128 onev = F2V( 1.0f );
-#ifdef _OPENMP
-        #pragma omp for nowait
-#endif
-
-        for(int j = 0; j < width * height - 3; j += 4) {
-            STVFU(fringe[j], onev / (LVFU(fringe[j]) + sumv));
-        }
-
-        #pragma omp single
-
-        for(int j = width * height - (width * height) % 4; j < width * height; j++) {
-            fringe[j] = 1.f / (fringe[j] + chromave);
-        }
-    }
-
-#else
-#ifdef _OPENMP
-    #pragma omp parallel for
+    #pragma omp parallel for simd
 #endif
 
     for(int j = 0; j < width * height; j++) {
         fringe[j] = 1.f / (fringe[j] + chromave);
     }
 
-#endif
-
-    // because we changed the values of fringe we also have to recalculate threshfactor
-    threshfactor = 1.0f / (threshfactor + chromave);
+    const float threshfactor = 1.f / (SQR(thresh / 33.f) * chromave * 5.0f + chromave);
 
 // Issue 1674:
 // often, CA isn't evenly distributed, e.g. a lot in contrasty regions and none in the sky.
@@ -176,13 +150,10 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
 #endif
 
     for(int i = 0; i < height; i++ ) {
-        int j;
+        int j = 0;
+        for(; j < halfwin - 1; j++) {
 
-        for(j = 0; j < halfwin - 1; j++) {
-            tmp1->a[i][j] = src->a[i][j];
-            tmp1->b[i][j] = src->b[i][j];
-
-            //test for pixel darker than some fraction of neighborhood ave, near an edge, more saturated than average
+            //test for pixel darker than some fraction of neighbourhood ave, near an edge, more saturated than average
             if (fringe[i * width + j] < threshfactor) {
                 float atot = 0.f;
                 float btot = 0.f;
@@ -191,23 +162,21 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
 
                 for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                     for (int j1 = 0; j1 < j + halfwin; j1++) {
-                        //neighborhood average of pixels weighted by chrominance
+                        //neighbourhood average of pixels weighted by chrominance
                         wt = fringe[i1 * width + j1];
                         atot += wt * src->a[i1][j1];
                         btot += wt * src->b[i1][j1];
                         norm += wt;
                     }
 
-                tmp1->a[i][j] = atot / norm;
-                tmp1->b[i][j] = btot / norm;
+                src->a[i][j] = atot / norm;
+                src->b[i][j] = btot / norm;
             }
         }
 
         for(; j < width - halfwin + 1; j++) {
-            tmp1->a[i][j] = src->a[i][j];
-            tmp1->b[i][j] = src->b[i][j];
 
-            //test for pixel darker than some fraction of neighborhood ave, near an edge, more saturated than average
+            //test for pixel darker than some fraction of neighbourhood ave, near an edge, more saturated than average
             if (fringe[i * width + j] < threshfactor) {
                 float atot = 0.f;
                 float btot = 0.f;
@@ -216,23 +185,21 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
 
                 for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                     for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
-                        //neighborhood average of pixels weighted by chrominance
+                        //neighbourhood average of pixels weighted by chrominance
                         wt = fringe[i1 * width + j1];
                         atot += wt * src->a[i1][j1];
                         btot += wt * src->b[i1][j1];
                         norm += wt;
                     }
 
-                tmp1->a[i][j] = atot / norm;
-                tmp1->b[i][j] = btot / norm;
+                src->a[i][j] = atot / norm;
+                src->b[i][j] = btot / norm;
             }
         }
 
         for(; j < width; j++) {
-            tmp1->a[i][j] = src->a[i][j];
-            tmp1->b[i][j] = src->b[i][j];
 
-            //test for pixel darker than some fraction of neighborhood ave, near an edge, more saturated than average
+            //test for pixel darker than some fraction of neighbourhood ave, near an edge, more saturated than average
             if (fringe[i * width + j] < threshfactor) {
                 float atot = 0.f;
                 float btot = 0.f;
@@ -241,52 +208,29 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
 
                 for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                     for (int j1 = j - halfwin + 1; j1 < width; j1++) {
-                        //neighborhood average of pixels weighted by chrominance
+                        //neighbourhood average of pixels weighted by chrominance
                         wt = fringe[i1 * width + j1];
                         atot += wt * src->a[i1][j1];
                         btot += wt * src->b[i1][j1];
                         norm += wt;
                     }
 
-                tmp1->a[i][j] = atot / norm;
-                tmp1->b[i][j] = btot / norm;
+                src->a[i][j] = atot / norm;
+                src->b[i][j] = btot / norm;
             }
         }
     }//end of ab channel averaging
 
-    if(src != dst)
-#ifdef _OPENMP
-        #pragma omp parallel for
-#endif
-        for(int i = 0; i < height; i++ ) {
-            for(int j = 0; j < width; j++) {
-                dst->L[i][j] = src->L[i][j];
-            }
-        }
-
-#ifdef _OPENMP
-    #pragma omp parallel for
-#endif
-
-    for(int i = 0; i < height; i++ ) {
-        for(int j = 0; j < width; j++) {
-            dst->a[i][j] = tmp1->a[i][j];
-            dst->b[i][j] = tmp1->b[i][j];
-        }
-    }
-
-
-    delete tmp1;
-
     if(chCurve) {
         delete chCurve;
     }
 
-    free(fringe);
+    delete [] fringe;
 }
 
-void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh)
+void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh)
 {
+    BENCHFUN
     const int halfwin = ceil(2 * radius) + 1;
 
     FlatCurve* chCurve = nullptr;
@@ -297,80 +241,42 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
 
     // local variables
     const int width = src->W, height = src->H;
-    const float piid = 3.14159265f / 180.f;
-    const float eps2 = 0.01f;
+    constexpr float eps2 = 0.01f;
 
     //temporary array to store chromaticity
-    float (*fringe);
-    fringe = (float (*)) malloc (height * width * sizeof(*fringe));
-
-    float** sraa;
-    sraa = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        sraa[i] = new float[width];
-    }
-
-    float** srbb;
-    srbb = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        srbb[i] = new float[width];
-    }
-
-    float** tmaa;
-    tmaa = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmaa[i] = new float[width];
-    }
-
-    float** tmbb;
-    tmbb = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmbb[i] = new float[width];
-    }
+    float *fringe = new float[width * height];
 
+    float **sraa = src->h_p; // we use the src->h_p buffer to avoid memory allocation/deallocation and reduce memory pressure
+    float **srbb = src->C_p; // we use the src->C_p buffer to avoid memory allocation/deallocation and reduce memory pressure
+    const JaggedArray<float> tmaa(width, height);
+    const JaggedArray<float> tmbb(width, height);
 
 #ifdef _OPENMP
     #pragma omp parallel
 #endif
     {
-        float2 sincosval;
 #ifdef __SSE2__
-        int j;
-        vfloat2 sincosvalv;
-        __m128 piidv = F2V(piid);
-#endif // __SSE2__
+        vfloat piDiv180v = F2V(RT_PI_F_180);
+#endif
 #ifdef _OPENMP
         #pragma omp for
 #endif
 
         for (int i = 0; i < height; i++) {
+            int j = 0;
 #ifdef __SSE2__
 
-            for (j = 0; j < width - 3; j += 4) {
-                sincosvalv = xsincosf(piidv * LVFU(src->h_p[i][j]));
-                STVFU(sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y);
-                STVFU(srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x);
+            for (; j < width - 3; j += 4) {
+                vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j]));
+                STVFU(sraa[i][j], LVFU(src->C_p[i][j]) * sincosvalv.y);
+                STVFU(srbb[i][j], LVFU(src->C_p[i][j]) * sincosvalv.x);
             }
-
-            for (; j < width; j++) {
-                sincosval = xsincosf(piid * src->h_p[i][j]);
-                sraa[i][j] = src->C_p[i][j] * sincosval.y;
-                srbb[i][j] = src->C_p[i][j] * sincosval.x;
-            }
-
-#else
-
-            for (int j = 0; j < width; j++) {
-                sincosval = xsincosf(piid * src->h_p[i][j]);
-                sraa[i][j] = src->C_p[i][j] * sincosval.y;
-                srbb[i][j] = src->C_p[i][j] * sincosval.x;
-            }
-
 #endif
+            for (; j < width; j++) {
+                float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]);
+                sraa[i][j] = src->C_p[i][j] * sincosval.y;
+                srbb[i][j] = src->C_p[i][j] * sincosval.x;
+            }
         }
     }
 
@@ -378,28 +284,28 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
     #pragma omp parallel
 #endif
     {
-        gaussianBlur (sraa, tmaa, src->W, src->H, radius);
-        gaussianBlur (srbb, tmbb, src->W, src->H, radius);
+        gaussianBlur(sraa, tmaa, src->W, src->H, radius);
+        gaussianBlur(srbb, tmbb, src->W, src->H, radius);
     }
 
-    float chromave = 0.0f;
+    double chromave = 0.0f; // use double precision for large summations
 
 #ifdef __SSE2__
 
     if( chCurve ) {
-// vectorized precalculation of the atan2 values
+        // vectorized precalculation of the atan2 values
 #ifdef _OPENMP
         #pragma omp parallel
 #endif
         {
-            int j;
 #ifdef _OPENMP
             #pragma omp for
 #endif
 
             for(int i = 0; i < height; i++ )
             {
-                for(j = 0; j < width - 3; j += 4) {
+                int j = 0;
+                for(; j < width - 3; j += 4) {
                     STVFU(fringe[i * width + j], xatan2f(LVFU(srbb[i][j]), LVFU(sraa[i][j])));
                 }
 
@@ -416,7 +322,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
     #pragma omp parallel
 #endif
     {
-        float chromaChfactor = 1.0f;
+        float chromaChfactor = 1.f;
 #ifdef _OPENMP
         #pragma omp for reduction(+:chromave)
 #endif
@@ -425,19 +331,19 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
             for(int j = 0; j < width; j++) {
                 if (chCurve) {
 #ifdef __SSE2__
-                    // use the precalculated atan values
+                    // use the precalculated atan2 values
                     float HH = fringe[i * width + j];
 #else
                     // no precalculated values without SSE => calculate
                     float HH = xatan2f(srbb[i][j], sraa[i][j]);
 #endif
-                    float chparam = float((chCurve->getVal((Color::huelab_to_huehsv2(HH))) - 0.5f) * 2.0f); //get C=f(H)
+                    float chparam = chCurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f; //get C=f(H)
 
-                    if(chparam > 0.f) {
-                        chparam /= 2.f;    // reduced action if chparam > 0
+                    if(chparam < 0.f) {
+                        chparam *= 2.f;    // increase action if chparam < 0
                     }
 
-                    chromaChfactor = 1.0f + chparam;
+                    chromaChfactor = 1.f + chparam;
                 }
 
                 float chroma = SQR(chromaChfactor * (sraa[i][j] - tmaa[i][j])) + SQR(chromaChfactor * (srbb[i][j] - tmbb[i][j])); //modulate chroma function hue
@@ -448,42 +354,17 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
     }
 
     chromave /= (height * width);
-    float threshfactor = SQR(thresh / 33.f) * chromave * 5.0f; // Calculated once to eliminate mult inside the next loop
 
-// now chromave is calculated, so we postprocess fringe to reduce the number of divisions in future
-#ifdef __SSE2__
+// now as chromave is calculated, we postprocess fringe to reduce the number of divisions in future
 #ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-        __m128 sumv = F2V( chromave + eps2 );
-        __m128 onev = F2V( 1.0f );
-#ifdef _OPENMP
-        #pragma omp for
-#endif
-
-        for(int j = 0; j < width * height - 3; j += 4) {
-            STVFU(fringe[j], onev / (LVFU(fringe[j]) + sumv));
-        }
-    }
-
-    for(int j = width * height - (width * height) % 4; j < width * height; j++) {
-        fringe[j] = 1.f / (fringe[j] + chromave + eps2);
-    }
-
-#else
-#ifdef _OPENMP
-    #pragma omp parallel for
+    #pragma omp parallel for simd
 #endif
 
     for(int j = 0; j < width * height; j++) {
         fringe[j] = 1.f / (fringe[j] + chromave + eps2);
     }
 
-#endif
-
-    // because we changed the values of fringe we also have to recalculate threshfactor
-    threshfactor = 1.0f / (threshfactor + chromave + eps2);
+    const float threshfactor = 1.f / (SQR(thresh / 33.f) * chromave * 5.0f + chromave + eps2);
 
 // Issue 1674:
 // often, CA isn't evenly distributed, e.g. a lot in contrasty regions and none in the sky.
@@ -496,9 +377,8 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
 #endif
 
     for(int i = 0; i < height; i++ ) {
-        int j;
-
-        for(j = 0; j < halfwin - 1; j++) {
+        int j = 0;
+        for(; j < halfwin - 1; j++) {
             tmaa[i][j] = sraa[i][j];
             tmbb[i][j] = srbb[i][j];
 
@@ -510,7 +390,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
 
                 for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                     for (int j1 = 0; j1 < j + halfwin; j1++) {
-                        //neighborhood average of pixels weighted by chrominance
+                        //neighbourhood average of pixels weighted by chrominance
                         wt = fringe[i1 * width + j1];
                         atot += wt * sraa[i1][j1];
                         btot += wt * srbb[i1][j1];
@@ -518,8 +398,8 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
                     }
 
                 if(norm > 0.f) {
-                    tmaa[i][j] = (atot / norm);
-                    tmbb[i][j] = (btot / norm);
+                    tmaa[i][j] = atot / norm;
+                    tmbb[i][j] = btot / norm;
                 }
             }
         }
@@ -536,7 +416,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
 
                 for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                     for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
-                        //neighborhood average of pixels weighted by chrominance
+                        //neighbourhood average of pixels weighted by chrominance
                         wt = fringe[i1 * width + j1];
                         atot += wt * sraa[i1][j1];
                         btot += wt * srbb[i1][j1];
@@ -544,8 +424,8 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
                     }
 
                 if(norm > 0.f) {
-                    tmaa[i][j] = (atot / norm);
-                    tmbb[i][j] = (btot / norm);
+                    tmaa[i][j] = atot / norm;
+                    tmbb[i][j] = btot / norm;
                 }
             }
         }
@@ -562,7 +442,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
 
                 for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                     for (int j1 = j - halfwin + 1; j1 < width; j1++) {
-                        //neighborhood average of pixels weighted by chrominance
+                        //neighbourhood average of pixels weighted by chrominance
                         wt = fringe[i1 * width + j1];
                         atot += wt * sraa[i1][j1];
                         btot += wt * srbb[i1][j1];
@@ -570,202 +450,303 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double ra
                     }
 
                 if(norm > 0.f) {
-                    tmaa[i][j] = (atot / norm);
-                    tmbb[i][j] = (btot / norm);
+                    tmaa[i][j] = atot / norm;
+                    tmbb[i][j] = btot / norm;
                 }
             }
         }
     } //end of ab channel averaging
 
-
 #ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-#ifdef __SSE2__
-        int j;
-        __m128 interav, interbv;
-        __m128 piidv = F2V(piid);
-#endif
-#ifdef _OPENMP
-        #pragma omp for
+    #pragma omp parallel for
 #endif
 
-        for(int i = 0; i < height; i++ ) {
+    for(int i = 0; i < height; i++ ) {
+        int j = 0;
 #ifdef __SSE2__
 
-            for(j = 0; j < width - 3; j += 4) {
-                STVFU(dst->sh_p[i][j], LVFU(src->sh_p[i][j]));
-                interav = LVFU(tmaa[i][j]);
-                interbv = LVFU(tmbb[i][j]);
-                STVFU(dst->h_p[i][j], (xatan2f(interbv, interav)) / piidv);
-                STVFU(dst->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav)));
-            }
-
-            for(; j < width; j++) {
-                dst->sh_p[i][j] = src->sh_p[i][j];
-                float intera = tmaa[i][j];
-                float interb = tmbb[i][j];
-                dst->h_p[i][j] = (xatan2f(interb, intera)) / piid;
-                dst->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
-            }
-
-#else
-
-            for(int j = 0; j < width; j++) {
-                dst->sh_p[i][j] = src->sh_p[i][j];
-                float intera = tmaa[i][j];
-                float interb = tmbb[i][j];
-                dst->h_p[i][j] = (xatan2f(interb, intera)) / piid;
-                dst->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
-            }
-
+        for(; j < width - 3; j += 4) {
+            vfloat interav = LVFU(tmaa[i][j]);
+            vfloat interbv = LVFU(tmbb[i][j]);
+            STVFU(src->h_p[i][j], xatan2f(interbv, interav) / F2V(RT_PI_F_180));
+            STVFU(src->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav)));
+        }
 #endif
+        for(; j < width; j++) {
+            float intera = tmaa[i][j];
+            float interb = tmbb[i][j];
+            src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
+            src->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
         }
     }
 
-    for (int i = 0; i < height; i++) {
-        delete [] sraa[i];
-    }
-
-    delete [] sraa;
-
-    for (int i = 0; i < height; i++) {
-        delete [] srbb[i];
-    }
-
-    delete [] srbb;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmaa[i];
-    }
-
-    delete [] tmaa;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmbb[i];
-    }
-
-    delete [] tmbb;
-
     if(chCurve) {
         delete chCurve;
     }
 
-    free(fringe);
+    delete [] fringe;
 }
 
-void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad)
+void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad)
 {
+    BENCHFUN
     const int halfwin = ceil(2 * radius) + 1;
-    MyTime t1, t2;
-    t1.set();
 
     const int width = src->W, height = src->H;
-    const float piid = 3.14159265f / 180.f;
 
-    int i1, j1;
-    const float eps = 1.0f;
-    const float eps2 = 0.01f;
+    constexpr float eps = 1.f;
+    constexpr float eps2 = 0.01f;
 
-    float** sraa;
-    sraa = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        sraa[i] = new float[width];
-    }
-
-    float** srbb;
-    srbb = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        srbb[i] = new float[width];
-    }
-
-    float** tmaa;
-    tmaa = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmaa[i] = new float[width];
-    }
-
-    float** tmbb;
-    tmbb = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmbb[i] = new float[width];
-    }
-
-    float* badpix = (float*)malloc(width * height * sizeof(float));
-
-    float** tmL;
-    tmL = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmL[i] = new float[width];
-    }
+    const JaggedArray<float> tmL(width, height);
 
+    float* badpix = new float[width * height];
+
+#ifdef _OPENMP
+    #pragma omp parallel
+#endif
+    {
+        //luma sh_p
+        gaussianBlur(src->sh_p, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
+    }
+
+//luma badpixels
+    constexpr float sh_thr = 4.5f;//low value for luma sh_p to avoid artifacts
+    constexpr float shthr = sh_thr / 24.0f;
 
 #ifdef _OPENMP
     #pragma omp parallel
 #endif
     {
-        float2 sincosval;
 #ifdef __SSE2__
-        int j;
-        vfloat2 sincosvalv;
-        __m128 piidv = F2V(piid);
+        vfloat shthrv = F2V(shthr);
+        vfloat onev = F2V(1.f);
 #endif // __SSE2__
 #ifdef _OPENMP
         #pragma omp for
 #endif
 
         for (int i = 0; i < height; i++) {
+            int j = 0;
+            for (; j < 2; j++) {
+                float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
+                float shmed = 0.0f;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = 0; j1 <= j + 2; j1++ ) {
+                        shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]);
+                    }
+
+                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
+            }
+
 #ifdef __SSE2__
 
-            for (j = 0; j < width - 3; j += 4) {
-                sincosvalv = xsincosf(piidv * LVFU(src->h_p[i][j]));
-                STVFU(sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y);
-                STVFU(srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x);
+            for (; j < width - 5; j += 4) {
+                vfloat shfabsv = vabsf(LVFU(src->sh_p[i][j]) - LVFU(tmL[i][j]));
+                vfloat shmedv = ZEROV;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = j - 2; j1 <= j + 2; j1++ ) {
+                        shmedv += vabsf(LVFU(src->sh_p[i1][j1]) - LVFU(tmL[i1][j1]));
+                    }
+
+                STVFU(badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv) * shthrv), onev, ZEROV));
+            }
+#endif
+            for (; j < width - 2; j++) {
+                float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
+                float shmed = 0.0f;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = j - 2; j1 <= j + 2; j1++ ) {
+                        shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]);
+                    }
+
+                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
             }
 
             for (; j < width; j++) {
-                sincosval = xsincosf(piid * src->h_p[i][j]);
-                sraa[i][j] = src->C_p[i][j] * sincosval.y;
-                srbb[i][j] = src->C_p[i][j] * sincosval.x;
+                float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
+                float shmed = 0.0f;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = j - 2; j1 < width; j1++ ) {
+                        shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]);
+                    }
+
+                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
             }
-
-#else
-
-            for (int j = 0; j < width; j++) {
-                sincosval = xsincosf(piid * src->h_p[i][j]);
-                sraa[i][j] = src->C_p[i][j] * sincosval.y;
-                srbb[i][j] = src->C_p[i][j] * sincosval.x;
-            }
-
-#endif
         }
     }
 
+
+#ifdef _OPENMP
+    #pragma omp parallel for schedule(dynamic,16)
+#endif
+
+    for (int i = 0; i < height; i++) {
+        int j = 0;
+        for (; j < 2; j++) {
+            if (!badpix[i * width + j]) {
+                continue;
+            }
+
+            float norm = 0.0f;
+            float shsum = 0.0f;
+            float sum = 0.0f;
+            int tot = 0;
+
+            for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                for (int j1 = 0; j1 <= j + 2; j1++ ) {
+                    if (i1 == i && j1 == j) {
+                        continue;
+                    }
+
+                    if (badpix[i1 * width + j1]) {
+                        continue;
+                    }
+
+                    sum += src->sh_p[i1][j1];
+                    tot++;
+                    float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps);
+                    shsum += dirsh * src->sh_p[i1][j1];
+                    norm += dirsh;
+                }
+
+            if (norm > 0.f) {
+                src->sh_p[i][j] = shsum / norm;
+            } else if (tot > 0) {
+                src->sh_p[i][j] = sum / tot;
+            }
+        }
+
+        for (; j < width - 2; j++) {
+            if (!badpix[i * width + j]) {
+                continue;
+            }
+
+            float norm = 0.0f;
+            float shsum = 0.0f;
+            float sum = 0.0f;
+            int tot = 0;
+
+            for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                for (int j1 = j - 2; j1 <= j + 2; j1++ ) {
+                    if (i1 == i && j1 == j) {
+                        continue;
+                    }
+
+                    if (badpix[i1 * width + j1]) {
+                        continue;
+                    }
+
+                    sum += src->sh_p[i1][j1];
+                    tot++;
+                    float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps);
+                    shsum += dirsh * src->sh_p[i1][j1];
+                    norm += dirsh;
+                }
+
+            if (norm > 0.f) {
+                src->sh_p[i][j] = shsum / norm;
+            } else if(tot > 0) {
+                src->sh_p[i][j] = sum / tot;
+            }
+        }
+
+        for (; j < width; j++) {
+            if (!badpix[i * width + j]) {
+                continue;
+            }
+
+            float norm = 0.0f;
+            float shsum = 0.0f;
+            float sum = 0.0f;
+            int tot = 0;
+
+            for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                for (int j1 = j - 2; j1 < width; j1++ ) {
+                    if (i1 == i && j1 == j) {
+                        continue;
+                    }
+
+                    if (badpix[i1 * width + j1]) {
+                        continue;
+                    }
+
+                    sum += src->sh_p[i1][j1];
+                    tot++;
+                    float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps);
+                    shsum += dirsh * src->sh_p[i1][j1];
+                    norm += dirsh;
+                }
+
+            if (norm > 0.f) {
+                src->sh_p[i][j] = shsum / norm;
+            } else if(tot > 0) {
+                src->sh_p[i][j] = sum / tot;
+            }
+        }
+    }
+
+// end luma badpixels
+
+    const JaggedArray<float> sraa(width, height);
+    const JaggedArray<float> srbb(width, height);
+
 #ifdef _OPENMP
     #pragma omp parallel
 #endif
     {
-        //chroma a and b
-        if(mode == 2) { //choice of gaussian blur
-            gaussianBlur (sraa, tmaa, src->W, src->H, radius);
-            gaussianBlur (srbb, tmbb, src->W, src->H, radius);
-        }
 
-        //luma sh_p
-        gaussianBlur (src->sh_p, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
+#ifdef __SSE2__
+        vfloat piDiv180v = F2V(RT_PI_F_180);
+#endif // __SSE2__
+#ifdef _OPENMP
+        #pragma omp for
+#endif
+
+        for (int i = 0; i < height; i++) {
+            int j = 0;
+#ifdef __SSE2__
+
+            for (; j < width - 3; j += 4) {
+                vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j]));
+                STVFU(sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y);
+                STVFU(srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x);
+            }
+#endif
+            for (; j < width; j++) {
+                float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]);
+                sraa[i][j] = src->C_p[i][j] * sincosval.y;
+                srbb[i][j] = src->C_p[i][j] * sincosval.x;
+            }
+        }
     }
 
-    if(mode == 1) { //choice of median
+    float ** tmaa = tmL; // reuse tmL buffer
+    const JaggedArray<float> tmbb(width, height);
+
+    if(mode == 2) { //choice of gaussian blur
+
+#ifdef _OPENMP
         #pragma omp parallel
+#endif
+        {
+        //chroma a and b
+            gaussianBlur(sraa, tmaa, src->W, src->H, radius);
+            gaussianBlur(srbb, tmbb, src->W, src->H, radius);
+        }
+
+    } else if(mode == 1) { //choice of median
+#ifdef _OPENMP
+        #pragma omp parallel
+#endif
         {
             int ip, in, jp, jn;
+#ifdef _OPENMP
             #pragma omp for nowait  //nowait because next loop inside this parallel region is independent on this one
+#endif
 
             for (int i = 0; i < height; i++) {
                 if (i < 2) {
@@ -797,8 +778,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius
                 }
             }
 
+#ifdef _OPENMP
             #pragma omp for
-
+#endif
             for (int i = 0; i < height; i++) {
                 if (i < 2) {
                     ip = i + 2;
@@ -831,218 +813,8 @@ void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius
         }
     }
 
-//luma badpixels
-    const float sh_thr = 4.5f;//low value for luma sh_p to avoid artifacts
-    const float shthr = sh_thr / 24.0f;
-
-#ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-        int j;
-#ifdef __SSE2__
-        __m128 shfabsv, shmedv;
-        __m128 shthrv = F2V(shthr);
-        __m128 onev = F2V(1.0f);
-#endif // __SSE2__
-#ifdef _OPENMP
-        #pragma omp for private(i1,j1)
-#endif
-
-        for (int i = 0; i < height; i++) {
-            for (j = 0; j < 2; j++) {
-                float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = 0; j1 <= j + 2; j1++ ) {
-                        shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-
-#ifdef __SSE2__
-
-            for (; j < width - 5; j += 4) {
-                shfabsv = vabsf(LVFU(src->sh_p[i][j]) - LVFU(tmL[i][j]));
-                shmedv = ZEROV;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        shmedv += vabsf(LVFU(src->sh_p[i1][j1]) - LVFU(tmL[i1][j1]));
-                    }
-
-                STVFU(badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv)*shthrv), onev, ZEROV));
-            }
-
-            for (; j < width - 2; j++) {
-                float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-
-#else
-
-            for (; j < width - 2; j++) {
-                float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-
-#endif
-
-            for (; j < width; j++) {
-                float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 < width; j1++ ) {
-                        shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-        }
-    }
-
-
-#ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-        int j;
-#ifdef _OPENMP
-        #pragma omp for private(i1,j1) schedule(dynamic,16)
-#endif
-
-        for (int i = 0; i < height; i++) {
-            for (j = 0; j < 2; j++) {
-                if (!badpix[i * width + j]) {
-                    continue;
-                }
-
-                float norm = 0.0f;
-                float shsum = 0.0f;
-                float sum = 0.0f;
-                int tot = 0;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = 0; j1 <= j + 2; j1++ ) {
-                        if (i1 == i && j1 == j) {
-                            continue;
-                        }
-
-                        if (badpix[i1 * width + j1]) {
-                            continue;
-                        }
-
-                        sum += src->sh_p[i1][j1];
-                        tot++;
-                        float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps);
-                        shsum += dirsh * src->sh_p[i1][j1];
-                        norm += dirsh;
-                    }
-
-                if (norm > 0.f) {
-                    src->sh_p[i][j] = shsum / norm;
-                } else {
-                    if(tot > 0) {
-                        src->sh_p[i][j] = sum / tot;
-                    }
-                }
-            }
-
-            for (; j < width - 2; j++) {
-                if (!badpix[i * width + j]) {
-                    continue;
-                }
-
-                float norm = 0.0f;
-                float shsum = 0.0f;
-                float sum = 0.0f;
-                int tot = 0;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        if (i1 == i && j1 == j) {
-                            continue;
-                        }
-
-                        if (badpix[i1 * width + j1]) {
-                            continue;
-                        }
-
-                        sum += src->sh_p[i1][j1];
-                        tot++;
-                        float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps);
-                        shsum += dirsh * src->sh_p[i1][j1];
-                        norm += dirsh;
-                    }
-
-                if (norm > 0.f) {
-                    src->sh_p[i][j] = shsum / norm;
-                } else {
-                    if(tot > 0) {
-                        src->sh_p[i][j] = sum / tot;
-                    }
-                }
-            }
-
-            for (; j < width; j++) {
-                if (!badpix[i * width + j]) {
-                    continue;
-                }
-
-                float norm = 0.0f;
-                float shsum = 0.0f;
-                float sum = 0.0f;
-                int tot = 0;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 < width; j1++ ) {
-                        if (i1 == i && j1 == j) {
-                            continue;
-                        }
-
-                        if (badpix[i1 * width + j1]) {
-                            continue;
-                        }
-
-                        sum += src->sh_p[i1][j1];
-                        tot++;
-                        float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps);
-                        shsum += dirsh * src->sh_p[i1][j1];
-                        norm += dirsh;
-                    }
-
-                if (norm > 0.f) {
-                    src->sh_p[i][j] = shsum / norm;
-                } else {
-                    if(tot > 0) {
-                        src->sh_p[i][j] = sum / tot;
-                    }
-                }
-            }
-        }
-    }
-// end luma badpixels
-
-
 // begin chroma badpixels
-    float chrommed = 0.f;
+    double chrommed = 0.f; // use double precision for large summations
 #ifdef _OPENMP
     #pragma omp parallel for reduction(+:chrommed)
 #endif
@@ -1056,59 +828,416 @@ void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius
     }
 
     chrommed /= (height * width);
-    float threshfactor = (thresh * chrommed) / 33.f;
 
 // now chrommed is calculated, so we postprocess badpix to reduce the number of divisions in future
-#ifdef __SSE2__
 #ifdef _OPENMP
     #pragma omp parallel
 #endif
     {
-        int j;
-        __m128 sumv = F2V( chrommed + eps2 );
-        __m128 onev = F2V( 1.0f );
+#ifdef __SSE2__
+        vfloat sumv = F2V(chrommed + eps2);
+        vfloat onev = F2V(1.f);
+#endif
 #ifdef _OPENMP
         #pragma omp for
 #endif
 
         for(int i = 0; i < height; i++) {
-            for(j = 0; j < width - 3; j += 4) {
+            int j = 0;
+#ifdef __SSE2__
+            for(; j < width - 3; j += 4) {
                 STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + sumv));
             }
-
+#endif
             for(; j < width; j++) {
                 badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed + eps2);
             }
         }
     }
-#else
+
+    const float threshfactor = 1.f / ((thresh * chrommed) / 33.f + chrommed + eps2);
+
 #ifdef _OPENMP
-    #pragma omp parallel for
+    #pragma omp parallel for schedule(dynamic,16)
 #endif
 
-    for(int i = 0; i < height; i++)
-        for(int j = 0; j < width; j++) {
-            badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed + eps2);
+    for(int i = 0; i < height; i++ ) {
+        int j = 0;
+        for(; j < halfwin; j++) {
+
+            if (badpix[i * width + j] < threshfactor) {
+                float atot = 0.f;
+                float btot = 0.f;
+                float norm = 0.f;
+                float wt;
+
+                for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
+                    for (int j1 = 0; j1 < j + halfwin; j1++) {
+                        wt = badpix[i1 * width + j1];
+                        atot += wt * sraa[i1][j1];
+                        btot += wt * srbb[i1][j1];
+                        norm += wt;
+                    }
+
+                if(norm > 0.f) {
+                    const float intera = atot / norm;
+                    const float interb = atot / norm;
+                    const float CC = sqrt(SQR(interb) + SQR(intera));
+
+                    if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) {
+                        src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
+                        src->C_p[i][j] = CC;
+                    }
+                }
+            }
         }
 
-#endif
+        for(; j < width - halfwin; j++) {
 
-    // because we changed the values of badpix we also have to recalculate threshfactor
-    threshfactor = 1.0f / (threshfactor + chrommed + eps2);
+            if (badpix[i * width + j] < threshfactor) {
+                float atot = 0.f;
+                float btot = 0.f;
+                float norm = 0.f;
+                float wt;
+
+                for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
+                    for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
+                        wt = badpix[i1 * width + j1];
+                        atot += wt * sraa[i1][j1];
+                        btot += wt * srbb[i1][j1];
+                        norm += wt;
+                    }
+
+                if(norm > 0.f) {
+                    const float intera = atot / norm;
+                    const float interb = atot / norm;
+                    const float CC = sqrt(SQR(interb) + SQR(intera));
+
+                    if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) {
+                        src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
+                        src->C_p[i][j] = CC;
+                    }
+                }
+            }
+        }
+
+        for(; j < width; j++) {
+
+            if (badpix[i * width + j] < threshfactor) {
+                float atot = 0.f;
+                float btot = 0.f;
+                float norm = 0.f;
+                float wt;
+
+                for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
+                    for (int j1 = j - halfwin + 1; j1 < width; j1++) {
+                        wt = badpix[i1 * width + j1];
+                        atot += wt * sraa[i1][j1];
+                        btot += wt * srbb[i1][j1];
+                        norm += wt;
+                    }
+
+                if(norm > 0.f) {
+                    const float intera = atot / norm;
+                    const float interb = atot / norm;
+                    const float CC = sqrt(SQR(interb) + SQR(intera));
+
+                    if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) {
+                        src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
+                        src->C_p[i][j] = CC;
+                    }
+                }
+            }
+        }
+    }
+
+    delete [] badpix;
+
+}
+
+void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, int mode, float chrom)
+{
+    BENCHFUN
+    const int halfwin = ceil(2 * radius) + 1;
+
+    const int width = src->W, height = src->H;
+
+    constexpr float eps = 1.f;
+    constexpr float eps2 = 0.01f;
+
+    const JaggedArray<float> tmL(width, height);
+
+    float* badpix = new float[width * height];
+
+#ifdef _OPENMP
+    #pragma omp parallel
+#endif
+    {
+        // blur L channel
+        gaussianBlur(src->L, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
+    }
+
+//luma badpixels
+    constexpr float sh_thr = 4.5f;//low value for luma sh_p to avoid artifacts
+    constexpr float shthr = sh_thr / 24.0f;
+
+#ifdef _OPENMP
+    #pragma omp parallel
+#endif
+    {
+#ifdef __SSE2__
+        vfloat shthrv = F2V(shthr);
+        vfloat onev = F2V(1.f);
+#endif // __SSE2__
+#ifdef _OPENMP
+        #pragma omp for
+#endif
+
+        for (int i = 0; i < height; i++) {
+            int j = 0;
+            for (; j < 2; j++) {
+                float shfabs = fabs(src->L[i][j] - tmL[i][j]);
+                float shmed = 0.0f;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = 0; j1 <= j + 2; j1++ ) {
+                        shmed += fabs(src->L[i1][j1] - tmL[i1][j1]);
+                    }
+
+                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
+            }
+
+#ifdef __SSE2__
+
+            for (; j < width - 5; j += 4) {
+                vfloat shfabsv = vabsf(LVFU(src->L[i][j]) - LVFU(tmL[i][j]));
+                vfloat shmedv = ZEROV;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = j - 2; j1 <= j + 2; j1++ ) {
+                        shmedv += vabsf(LVFU(src->L[i1][j1]) - LVFU(tmL[i1][j1]));
+                    }
+
+                STVFU(badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv) * shthrv), onev, ZEROV));
+            }
+#endif
+            for (; j < width - 2; j++) {
+                float shfabs = fabs(src->L[i][j] - tmL[i][j]);
+                float shmed = 0.0f;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = j - 2; j1 <= j + 2; j1++ ) {
+                        shmed += fabs(src->L[i1][j1] - tmL[i1][j1]);
+                    }
+
+                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
+            }
+
+            for (; j < width; j++) {
+                float shfabs = fabs(src->L[i][j] - tmL[i][j]);
+                float shmed = 0.0f;
+
+                for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                    for (int j1 = j - 2; j1 < width; j1++ ) {
+                        shmed += fabs(src->L[i1][j1] - tmL[i1][j1]);
+                    }
+
+                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
+            }
+        }
+    }
+
+#ifdef _OPENMP
+    #pragma omp for schedule(dynamic,16)
+#endif
+
+    for (int i = 0; i < height; i++) {
+        int j = 0;
+        for (; j < 2; j++) {
+            if (!badpix[i * width + j]) {
+                continue;
+            }
+
+            float norm = 0.0f;
+            float shsum = 0.0f;
+            float sum = 0.0f;
+            int tot = 0;
+
+            for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                for (int j1 = 0; j1 <= j + 2; j1++ ) {
+                    if (i1 == i && j1 == j) {
+                        continue;
+                    }
+
+                    if (badpix[i1 * width + j1]) {
+                        continue;
+                    }
+
+                    sum += src->L[i1][j1];
+                    tot++;
+                    float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps);
+                    shsum += dirsh * src->L[i1][j1];
+                    norm += dirsh;
+                }
+
+            if (norm > 0.f) {
+                src->L[i][j] = shsum / norm;
+            } else {
+                if(tot > 0) {
+                    src->L[i][j] = sum / tot;
+                }
+            }
+        }
+
+        for (; j < width - 2; j++) {
+            if (!badpix[i * width + j]) {
+                continue;
+            }
+
+            float norm = 0.0f;
+            float shsum = 0.0f;
+            float sum = 0.0f;
+            int tot = 0;
+
+            for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                for (int j1 = j - 2; j1 <= j + 2; j1++ ) {
+                    if (i1 == i && j1 == j) {
+                        continue;
+                    }
+
+                    if (badpix[i1 * width + j1]) {
+                        continue;
+                    }
+
+                    sum += src->L[i1][j1];
+                    tot++;
+                    float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps);
+                    shsum += dirsh * src->L[i1][j1];
+                    norm += dirsh;
+                }
+
+            if (norm > 0.f) {
+                src->L[i][j] = shsum / norm;
+            } else {
+                if(tot > 0) {
+                    src->L[i][j] = sum / tot;
+                }
+            }
+        }
+
+        for (; j < width; j++) {
+            if (!badpix[i * width + j]) {
+                continue;
+            }
+
+            float norm = 0.0f;
+            float shsum = 0.0f;
+            float sum = 0.0f;
+            int tot = 0;
+
+            for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
+                for (int j1 = j - 2; j1 < width; j1++ ) {
+                    if (i1 == i && j1 == j) {
+                        continue;
+                    }
+
+                    if (badpix[i1 * width + j1]) {
+                        continue;
+                    }
+
+                    sum += src->L[i1][j1];
+                    tot++;
+                    float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps);
+                    shsum += dirsh * src->L[i1][j1];
+                    norm += dirsh;
+                }
+
+            if (norm > 0.f) {
+                src->L[i][j] = shsum / norm;
+            } else {
+                if(tot > 0) {
+                    src->L[i][j] = sum / tot;
+                }
+            }
+        }
+    }
+
+// end luma badpixels
+
+    float ** tmaa = tmL; // reuse tmL buffer
+    const JaggedArray<float> tmbb(width, height);
+
+#ifdef _OPENMP
+    #pragma omp parallel
+#endif
+    {
+        // blur chroma a and b
+        gaussianBlur(src->a, tmaa, src->W, src->H, radius);
+        gaussianBlur(src->b, tmbb, src->W, src->H, radius);
+    }
+
+// begin chroma badpixels
+    double chrommed = 0.f; // use double precision for large summations
+
+#ifdef _OPENMP
+    #pragma omp parallel for reduction(+:chrommed)
+#endif
+
+    for(int i = 0; i < height; i++ ) {
+        for(int j = 0; j < width; j++) {
+            float chroma = SQR(src->a[i][j] - tmaa[i][j]) + SQR(src->b[i][j] - tmbb[i][j]);
+            chrommed += chroma;
+            badpix[i * width + j] = chroma;
+        }
+    }
+
+    chrommed /= (height * width);
+    float threshfactor = (thresh * chrommed) / 33.f;
+
+// now chrommed is calculated, so we postprocess badpix to reduce the number of divisions in future
+
+#ifdef _OPENMP
+    #pragma omp parallel
+#endif
+    {
+#ifdef __SSE2__
+        vfloat sumv = F2V(chrommed + eps2);
+        vfloat onev = F2V(1.f);
+#endif
+#ifdef _OPENMP
+        #pragma omp for
+#endif
+
+        for(int i = 0; i < height; i++) {
+            int j = 0;
+#ifdef __SSE2__
+            for(; j < width - 3; j += 4) {
+                STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + sumv));
+            }
+#endif
+            for(; j < width; j++) {
+                badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed + eps2);
+            }
+        }
+    }
+
+    // because we changed the values of badpix we also have to recalculate threshfactor
+    threshfactor = 1.f / (threshfactor + chrommed + eps2);
+
+    chrom *= 327.68f;
+    chrom *= chrom;
 
 #ifdef _OPENMP
     #pragma omp parallel
 #endif
     {
-        int j;
 #ifdef _OPENMP
         #pragma omp for schedule(dynamic,16)
 #endif
 
         for(int i = 0; i < height; i++ ) {
-            for(j = 0; j < halfwin; j++) {
-                tmaa[i][j] = sraa[i][j];
-                tmbb[i][j] = srbb[i][j];
+            int j = 0;
+            for(; j < halfwin; j++) {
 
                 if (badpix[i * width + j] < threshfactor) {
                     float atot = 0.f;
@@ -1119,21 +1248,23 @@ void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius
                     for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                         for (int j1 = 0; j1 < j + halfwin; j1++) {
                             wt = badpix[i1 * width + j1];
-                            atot += wt * sraa[i1][j1];
-                            btot += wt * srbb[i1][j1];
+                            atot += wt * src->a[i1][j1];
+                            btot += wt * src->b[i1][j1];
                             norm += wt;
                         }
 
                     if(norm > 0.f) {
-                        tmaa[i][j] = (atot / norm);
-                        tmbb[i][j] = (btot / norm);
+                        const float a = atot / norm;
+                        const float b = btot / norm;
+                        if(SQR(a) + SQR(b) < chrom) {
+                            src->a[i][j] = a;
+                            src->b[i][j] = b;
+                        }
                     }
                 }
             }
 
-            for(; j < width - halfwin; j++) {
-                tmaa[i][j] = sraa[i][j];
-                tmbb[i][j] = srbb[i][j];
+            for(; j < width - halfwin; j++) { // this loop is the hot spot. Maybe worth to vectorize
 
                 if (badpix[i * width + j] < threshfactor) {
                     float atot = 0.f;
@@ -1144,21 +1275,23 @@ void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius
                     for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                         for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
                             wt = badpix[i1 * width + j1];
-                            atot += wt * sraa[i1][j1];
-                            btot += wt * srbb[i1][j1];
+                            atot += wt * src->a[i1][j1];
+                            btot += wt * src->b[i1][j1];
                             norm += wt;
                         }
 
                     if(norm > 0.f) {
-                        tmaa[i][j] = (atot / norm);
-                        tmbb[i][j] = (btot / norm);
+                        const float a = atot / norm;
+                        const float b = btot / norm;
+                        if(SQR(a) + SQR(b) < chrom) {
+                            src->a[i][j] = a;
+                            src->b[i][j] = b;
+                        }
                     }
                 }
             }
 
             for(; j < width; j++) {
-                tmaa[i][j] = sraa[i][j];
-                tmbb[i][j] = srbb[i][j];
 
                 if (badpix[i * width + j] < threshfactor) {
                     float atot = 0.f;
@@ -1169,697 +1302,25 @@ void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius
                     for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
                         for (int j1 = j - halfwin + 1; j1 < width; j1++) {
                             wt = badpix[i1 * width + j1];
-                            atot += wt * sraa[i1][j1];
-                            btot += wt * srbb[i1][j1];
+                            atot += wt * src->a[i1][j1];
+                            btot += wt * src->b[i1][j1];
                             norm += wt;
                         }
 
                     if(norm > 0.f) {
-                        tmaa[i][j] = (atot / norm);
-                        tmbb[i][j] = (btot / norm);
-                    }
-                }
-            }
-        }
-    }
-
-#ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-#ifdef _OPENMP
-        #pragma omp for
-#endif
-
-        for(int i = 0; i < height; i++ ) {
-            for(int j = 0; j < width; j++) {
-                float intera = tmaa[i][j];
-                float interb = tmbb[i][j];
-                float CC = sqrt(SQR(interb) + SQR(intera));
-
-                if(hotbad == 0) {
-                    if(CC < chrom && skinprot != 0.f) {
-                        dst->h_p[i][j] = (xatan2f(interb, intera)) / piid;
-                        dst->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
-                    }
-                } else {
-                    dst->h_p[i][j] = (xatan2f(interb, intera)) / piid;
-                    dst->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
-                }
-            }
-        }
-    }
-
-    if(src != dst) {
-#ifdef _OPENMP
-        #pragma omp parallel for
-#endif
-
-        for(int i = 0; i < height; i++ )
-            for(int j = 0; j < width; j++) {
-                dst->sh_p[i][j] = src->sh_p[i][j];
-            }
-    }
-
-
-    for (int i = 0; i < height; i++) {
-        delete [] sraa[i];
-    }
-
-    delete [] sraa;
-
-    for (int i = 0; i < height; i++) {
-        delete [] srbb[i];
-    }
-
-    delete [] srbb;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmaa[i];
-    }
-
-    delete [] tmaa;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmbb[i];
-    }
-
-    delete [] tmbb;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmL[i];
-    }
-
-    delete [] tmL;
-
-    free(badpix);
-
-    t2.set();
-
-    if( settings->verbose ) {
-        printf("Ciecam badpixels:- %d usec\n", t2.etime(t1));
-    }
-
-
-}
-
-void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float skinprot, float chrom)
-{
-    const int halfwin = ceil(2 * radius) + 1;
-    MyTime t1, t2;
-    t1.set();
-
-    const int width = src->W, height = src->H;
-
-    int i1, j1;
-    const float eps = 1.0f;
-    const float eps2 = 0.01f;
-
-    float** sraa;
-    sraa = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        sraa[i] = new float[width];
-    }
-
-    float** srbb;
-    srbb = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        srbb[i] = new float[width];
-    }
-
-    float** tmaa;
-    tmaa = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmaa[i] = new float[width];
-    }
-
-    float** tmbb;
-    tmbb = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmbb[i] = new float[width];
-    }
-
-    float* badpix = (float*)malloc(width * height * sizeof(float));
-
-    float** tmL;
-    tmL = new float*[height];
-
-    for (int i = 0; i < height; i++) {
-        tmL[i] = new float[width];
-    }
-
-
-#ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-//  float2 sincosval;
-#ifdef __SSE2__
-        int j;
-//  vfloat2 sincosvalv;
-//  __m128 piidv = F2V(piid);
-#endif // __SSE2__
-#ifdef _OPENMP
-        #pragma omp for
-#endif
-
-        for (int i = 0; i < height; i++) {
-#ifdef __SSE2__
-
-            for (j = 0; j < width - 3; j += 4) {
-                STVFU(sraa[i][j], LVFU(src->a[i][j]));
-                STVFU(srbb[i][j], LVFU(src->b[i][j]));
-            }
-
-            for (; j < width; j++) {
-                sraa[i][j] = src->a[i][j];
-                srbb[i][j] = src->b[i][j];
-            }
-
-#else
-
-            for (int j = 0; j < width; j++) {
-                sraa[i][j] = src->a[i][j];
-                srbb[i][j] = src->b[i][j];
-            }
-
-#endif
-        }
-    }
-
-#ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-        //chroma a and b
-        if(mode >= 2) { //choice of gaussian blur
-            gaussianBlur (sraa, tmaa, src->W, src->H, radius);
-            gaussianBlur (srbb, tmbb, src->W, src->H, radius);
-        }
-
-        //luma sh_p
-        gaussianBlur (src->L, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
-    }
-
-    if(mode == 1) { //choice of median
-        #pragma omp parallel
-        {
-            int ip, in, jp, jn;
-            #pragma omp for nowait  //nowait because next loop inside this parallel region is independent on this one
-
-            for (int i = 0; i < height; i++) {
-                if (i < 2) {
-                    ip = i + 2;
-                } else {
-                    ip = i - 2;
-                }
-
-                if (i > height - 3) {
-                    in = i - 2;
-                } else {
-                    in = i + 2;
-                }
-
-                for (int j = 0; j < width; j++) {
-                    if (j < 2) {
-                        jp = j + 2;
-                    } else {
-                        jp = j - 2;
-                    }
-
-                    if (j > width - 3) {
-                        jn = j - 2;
-                    } else {
-                        jn = j + 2;
-                    }
-
-                    tmaa[i][j] = median(sraa[ip][jp], sraa[ip][j], sraa[ip][jn], sraa[i][jp], sraa[i][j], sraa[i][jn], sraa[in][jp], sraa[in][j], sraa[in][jn]);
-                }
-            }
-
-            #pragma omp for
-
-            for (int i = 0; i < height; i++) {
-                if (i < 2) {
-                    ip = i + 2;
-                } else {
-                    ip = i - 2;
-                }
-
-                if (i > height - 3) {
-                    in = i - 2;
-                } else {
-                    in = i + 2;
-                }
-
-                for (int j = 0; j < width; j++) {
-                    if (j < 2) {
-                        jp = j + 2;
-                    } else {
-                        jp = j - 2;
-                    }
-
-                    if (j > width - 3) {
-                        jn = j - 2;
-                    } else {
-                        jn = j + 2;
-                    }
-
-                    tmbb[i][j] = median(srbb[ip][jp], srbb[ip][j], srbb[ip][jn], srbb[i][jp], srbb[i][j], srbb[i][jn], srbb[in][jp], srbb[in][j], srbb[in][jn]);
-                }
-            }
-        }
-    }
-
-//luma badpixels
-    const float sh_thr = 4.5f;//low value for luma sh_p to avoid artifacts
-    const float shthr = sh_thr / 24.0f;
-
-#ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-        int j;
-#ifdef __SSE2__
-        __m128 shfabsv, shmedv;
-        __m128 shthrv = F2V(shthr);
-        __m128 onev = F2V(1.0f);
-#endif // __SSE2__
-#ifdef _OPENMP
-        #pragma omp for private(i1,j1)
-#endif
-
-        for (int i = 0; i < height; i++) {
-            for (j = 0; j < 2; j++) {
-                float shfabs = fabs(src->L[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = 0; j1 <= j + 2; j1++ ) {
-                        shmed += fabs(src->L[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-
-#ifdef __SSE2__
-
-            for (; j < width - 5; j += 4) {
-                shfabsv = vabsf(LVFU(src->L[i][j]) - LVFU(tmL[i][j]));
-                shmedv = ZEROV;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        shmedv += vabsf(LVFU(src->L[i1][j1]) - LVFU(tmL[i1][j1]));
-                    }
-
-                STVFU(badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv)*shthrv), onev, ZEROV));
-            }
-
-            for (; j < width - 2; j++) {
-                float shfabs = fabs(src->L[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        shmed += fabs(src->L[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-
-#else
-
-            for (; j < width - 2; j++) {
-                float shfabs = fabs(src->L[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        shmed += fabs(src->L[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-
-#endif
-
-            for (; j < width; j++) {
-                float shfabs = fabs(src->L[i][j] - tmL[i][j]);
-                float shmed = 0.0f;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 < width; j1++ ) {
-                        shmed += fabs(src->L[i1][j1] - tmL[i1][j1]);
-                    }
-
-                badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr));
-            }
-        }
-    }
-
-
-#ifdef _OPENMP
-    #pragma omp parallel
-#endif
-    {
-        int j;
-#ifdef _OPENMP
-        #pragma omp for private(i1,j1) schedule(dynamic,16)
-#endif
-
-        for (int i = 0; i < height; i++) {
-            for (j = 0; j < 2; j++) {
-                if (!badpix[i * width + j]) {
-                    continue;
-                }
-
-                float norm = 0.0f;
-                float shsum = 0.0f;
-                float sum = 0.0f;
-                int tot = 0;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = 0; j1 <= j + 2; j1++ ) {
-                        if (i1 == i && j1 == j) {
-                            continue;
-                        }
-
-                        if (badpix[i1 * width + j1]) {
-                            continue;
-                        }
-
-                        sum += src->L[i1][j1];
-                        tot++;
-                        float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps);
-                        shsum += dirsh * src->L[i1][j1];
-                        norm += dirsh;
-                    }
-
-                if (norm > 0.f) {
-                    src->L[i][j] = shsum / norm;
-                } else {
-                    if(tot > 0) {
-                        src->L[i][j] = sum / tot;
-                    }
-                }
-            }
-
-            for (; j < width - 2; j++) {
-                if (!badpix[i * width + j]) {
-                    continue;
-                }
-
-                float norm = 0.0f;
-                float shsum = 0.0f;
-                float sum = 0.0f;
-                int tot = 0;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 <= j + 2; j1++ ) {
-                        if (i1 == i && j1 == j) {
-                            continue;
-                        }
-
-                        if (badpix[i1 * width + j1]) {
-                            continue;
-                        }
-
-                        sum += src->L[i1][j1];
-                        tot++;
-                        float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps);
-                        shsum += dirsh * src->L[i1][j1];
-                        norm += dirsh;
-                    }
-
-                if (norm > 0.f) {
-                    src->L[i][j] = shsum / norm;
-                } else {
-                    if(tot > 0) {
-                        src->L[i][j] = sum / tot;
-                    }
-                }
-            }
-
-            for (; j < width; j++) {
-                if (!badpix[i * width + j]) {
-                    continue;
-                }
-
-                float norm = 0.0f;
-                float shsum = 0.0f;
-                float sum = 0.0f;
-                int tot = 0;
-
-                for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
-                    for (j1 = j - 2; j1 < width; j1++ ) {
-                        if (i1 == i && j1 == j) {
-                            continue;
-                        }
-
-                        if (badpix[i1 * width + j1]) {
-                            continue;
-                        }
-
-                        sum += src->L[i1][j1];
-                        tot++;
-                        float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps);
-                        shsum += dirsh * src->L[i1][j1];
-                        norm += dirsh;
-                    }
-
-                if (norm > 0.f) {
-                    src->L[i][j] = shsum / norm;
-                } else {
-                    if(tot > 0) {
-                        src->L[i][j] = sum / tot;
-                    }
-                }
-            }
-        }
-    }
-// end luma badpixels
-
-    if(mode == 3) {
-// begin chroma badpixels
-        float chrommed = 0.f;
-#ifdef _OPENMP
-        #pragma omp parallel for reduction(+:chrommed)
-#endif
-
-        for(int i = 0; i < height; i++ ) {
-            for(int j = 0; j < width; j++) {
-                float chroma = SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j]);
-                chrommed += chroma;
-                badpix[i * width + j] = chroma;
-            }
-        }
-
-        chrommed /= (height * width);
-        float threshfactor = (thresh * chrommed) / 33.f;
-
-// now chrommed is calculated, so we postprocess badpix to reduce the number of divisions in future
-#ifdef __SSE2__
-#ifdef _OPENMP
-        #pragma omp parallel
-#endif
-        {
-            int j;
-            __m128 sumv = F2V( chrommed + eps2 );
-            __m128 onev = F2V( 1.0f );
-#ifdef _OPENMP
-            #pragma omp for
-#endif
-
-            for(int i = 0; i < height; i++) {
-                for(j = 0; j < width - 3; j += 4) {
-                    STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + sumv));
-                }
-
-                for(; j < width; j++) {
-                    badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed + eps2);
-                }
-            }
-        }
-#else
-#ifdef _OPENMP
-        #pragma omp parallel for
-#endif
-
-        for(int i = 0; i < height; i++)
-            for(int j = 0; j < width; j++) {
-                badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed + eps2);
-            }
-
-#endif
-
-        // because we changed the values of badpix we also have to recalculate threshfactor
-        threshfactor = 1.0f / (threshfactor + chrommed + eps2);
-
-#ifdef _OPENMP
-        #pragma omp parallel
-#endif
-        {
-            int j;
-#ifdef _OPENMP
-            #pragma omp for schedule(dynamic,16)
-#endif
-
-            for(int i = 0; i < height; i++ ) {
-                for(j = 0; j < halfwin; j++) {
-                    tmaa[i][j] = sraa[i][j];
-                    tmbb[i][j] = srbb[i][j];
-
-                    if (badpix[i * width + j] < threshfactor) {
-                        float atot = 0.f;
-                        float btot = 0.f;
-                        float norm = 0.f;
-                        float wt;
-
-                        for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
-                            for (int j1 = 0; j1 < j + halfwin; j1++) {
-                                wt = badpix[i1 * width + j1];
-                                atot += wt * sraa[i1][j1];
-                                btot += wt * srbb[i1][j1];
-                                norm += wt;
-                            }
-
-                        if(norm > 0.f) {
-                            tmaa[i][j] = (atot / norm);
-                            tmbb[i][j] = (btot / norm);
-                        }
-                    }
-                }
-
-                for(; j < width - halfwin; j++) {
-                    tmaa[i][j] = sraa[i][j];
-                    tmbb[i][j] = srbb[i][j];
-
-                    if (badpix[i * width + j] < threshfactor) {
-                        float atot = 0.f;
-                        float btot = 0.f;
-                        float norm = 0.f;
-                        float wt;
-
-                        for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
-                            for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
-                                wt = badpix[i1 * width + j1];
-                                atot += wt * sraa[i1][j1];
-                                btot += wt * srbb[i1][j1];
-                                norm += wt;
-                            }
-
-                        if(norm > 0.f) {
-                            tmaa[i][j] = (atot / norm);
-                            tmbb[i][j] = (btot / norm);
-                        }
-                    }
-                }
-
-                for(; j < width; j++) {
-                    tmaa[i][j] = sraa[i][j];
-                    tmbb[i][j] = srbb[i][j];
-
-                    if (badpix[i * width + j] < threshfactor) {
-                        float atot = 0.f;
-                        float btot = 0.f;
-                        float norm = 0.f;
-                        float wt;
-
-                        for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
-                            for (int j1 = j - halfwin + 1; j1 < width; j1++) {
-                                wt = badpix[i1 * width + j1];
-                                atot += wt * sraa[i1][j1];
-                                btot += wt * srbb[i1][j1];
-                                norm += wt;
-                            }
-
-                        if(norm > 0.f) {
-                            tmaa[i][j] = (atot / norm);
-                            tmbb[i][j] = (btot / norm);
+                        const float a = atot / norm;
+                        const float b = btot / norm;
+                        if(SQR(a) + SQR(b) < chrom) {
+                            src->a[i][j] = a;
+                            src->b[i][j] = b;
                         }
                     }
                 }
             }
         }
-
-#ifdef _OPENMP
-        #pragma omp parallel
-#endif
-        {
-#ifdef _OPENMP
-            #pragma omp for
-#endif
-
-            for(int i = 0; i < height; i++ ) {
-                for(int j = 0; j < width; j++) {
-                    float intera = tmaa[i][j];
-                    float interb = tmbb[i][j];
-                    float CC = sqrt(SQR(interb / 327.68) + SQR(intera / 327.68f));
-
-                    if(CC < chrom && skinprot != 0.f) {
-                        dst->a[i][j] = intera;
-                        dst->b[i][j] = interb;
-                    }
-                }
-            }
-        }
-    }
-
-    if(src != dst) {
-#ifdef _OPENMP
-        #pragma omp parallel for
-#endif
-
-        for(int i = 0; i < height; i++ )
-            for(int j = 0; j < width; j++) {
-                dst->L[i][j] = src->L[i][j];
-            }
-    }
-
-
-    for (int i = 0; i < height; i++) {
-        delete [] sraa[i];
-    }
-
-    delete [] sraa;
-
-    for (int i = 0; i < height; i++) {
-        delete [] srbb[i];
-    }
-
-    delete [] srbb;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmaa[i];
-    }
-
-    delete [] tmaa;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmbb[i];
-    }
-
-    delete [] tmbb;
-
-    for (int i = 0; i < height; i++) {
-        delete [] tmL[i];
-    }
-
-    delete [] tmL;
-
-    free(badpix);
-
-    t2.set();
-
-    if( settings->verbose ) {
-        printf("Lab artifacts:- %d usec\n", t2.etime(t1));
     }
 
+    delete [] badpix;
 
 }
 
diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc
index e7ff01f46..806be454f 100644
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@@ -1486,21 +1486,10 @@ void ImProcFunctions::ciecam_02 (CieImage* ncie, double adap, int pW, int pwb, L
 //if(params->dirpyrequalizer.enabled) if(execsharp) {
             if (params->dirpyrequalizer.enabled) {
                 if (params->dirpyrequalizer.gamutlab  /*&& execsharp*/) {
-                    float artifact = (float) settings->artifact_cbdl;
-
-                    if (artifact > 6.f) {
-                        artifact = 6.f;
-                    }
-
-                    if (artifact < 0.f) {
-                        artifact = 1.f;
-                    }
-
-                    float chrom = 50.f;
-                    {
-                        int hotbad = 0;
-                        ImProcFunctions::badpixcam (ncie, artifact, 5, 2, params->dirpyrequalizer.skinprotect, chrom, hotbad);      //enabled remove artifacts for cbDL
-                    }
+                    constexpr float artifact = 4.f;
+                    constexpr float chrom = 50.f;
+                    constexpr int hotbad = 0;
+                    ImProcFunctions::badpixcam (ncie, artifact, 5, 2, params->dirpyrequalizer.skinprotect, chrom, hotbad);      //enabled remove artifacts for cbDL
                 }
             }
 
@@ -2917,18 +2906,10 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
 //if(params->dirpyrequalizer.enabled) if(execsharp) {
                 if (params->dirpyrequalizer.enabled)  {
                     if (params->dirpyrequalizer.gamutlab  /*&& execsharp*/) { //remove artifacts by gaussian blur - skin control
-                        float artifact = (float) settings->artifact_cbdl;
+                        constexpr float artifact = 4.f;
+                        constexpr int hotbad = 0;
+                        constexpr float chrom = 50.f;
 
-                        if (artifact > 6.f) {
-                            artifact = 6.f;
-                        }
-
-                        if (artifact < 0.f) {
-                            artifact = 1.f;
-                        }
-
-                        int hotbad = 0;
-                        float chrom = 50.f;
                         lab->deleteLab();
                         ImProcFunctions::badpixcam (ncie, artifact, 5, 2, params->dirpyrequalizer.skinprotect, chrom, hotbad);  //enabled remove artifacts for cbDL
                         lab->reallocLab();
@@ -6290,52 +6271,43 @@ void ImProcFunctions::defringe (LabImage* lab)
     if (params->defringe.enabled && lab->W >= 8 && lab->H >= 8)
 
     {
-        PF_correct_RT (lab, lab, params->defringe.radius, params->defringe.threshold);
+        PF_correct_RT (lab, params->defringe.radius, params->defringe.threshold);
     }
 }
 
 void ImProcFunctions::defringecam (CieImage* ncie)
 {
     if (params->defringe.enabled && ncie->W >= 8 && ncie->H >= 8) {
-        PF_correct_RTcam (ncie, ncie, params->defringe.radius, params->defringe.threshold);
+        PF_correct_RTcam (ncie, params->defringe.radius, params->defringe.threshold);
     }
 }
 
 void ImProcFunctions::badpixcam (CieImage* ncie, double rad, int thr, int mode, float skinprot, float chrom, int hotbad)
 {
     if (ncie->W >= 8 && ncie->H >= 8) {
-        Badpixelscam (ncie, ncie, rad, thr, mode, skinprot, chrom, hotbad);
+        Badpixelscam (ncie, rad, thr, mode, skinprot, chrom, hotbad);
     }
 }
 
 void ImProcFunctions::badpixlab (LabImage* lab, double rad, int thr, int mode, float skinprot, float chrom)
 {
     if (lab->W >= 8 && lab->H >= 8) {
-        BadpixelsLab (lab, lab, rad, thr, mode, skinprot, chrom);
+        BadpixelsLab (lab, rad, thr, mode, chrom);
     }
 }
 
 void ImProcFunctions::dirpyrequalizer (LabImage* lab, int scale)
 {
     if (params->dirpyrequalizer.enabled && lab->W >= 8 && lab->H >= 8) {
-        float b_l = static_cast<float> (params->dirpyrequalizer.hueskin.getBottomLeft()) / 100.0f;
-        float t_l = static_cast<float> (params->dirpyrequalizer.hueskin.getTopLeft()) / 100.0f;
-        float t_r = static_cast<float> (params->dirpyrequalizer.hueskin.getTopRight()) / 100.0f;
+        float b_l = static_cast<float> (params->dirpyrequalizer.hueskin.getBottomLeft()) / 100.f;
+        float t_l = static_cast<float> (params->dirpyrequalizer.hueskin.getTopLeft()) / 100.f;
+        float t_r = static_cast<float> (params->dirpyrequalizer.hueskin.getTopRight()) / 100.f;
         //      if     (params->dirpyrequalizer.algo=="FI") choice=0;
         //      else if(params->dirpyrequalizer.algo=="LA") choice=1;
-        float artifact = (float) settings->artifact_cbdl;
+        constexpr float artifact = 4.f;
 
-        if (artifact > 6.f) {
-            artifact = 6.f;
-        }
-
-        if (artifact < 0.f) {
-            artifact = 1.f;
-        }
-
-        float chrom = 50.f;
-
-        if (params->dirpyrequalizer.gamutlab) {
+        if (params->dirpyrequalizer.gamutlab && params->dirpyrequalizer.skinprotect != 0) {
+            constexpr float chrom = 50.f;
             ImProcFunctions::badpixlab (lab, artifact, 5, 3, params->dirpyrequalizer.skinprotect, chrom);    //for artifacts
         }
 
diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h
index 8204516fd..ffad14213 100644
--- a/rtengine/improcfun.h
+++ b/rtengine/improcfun.h
@@ -339,10 +339,10 @@ public:
     void badpixcam      (CieImage* ncie, double rad, int thr, int mode, float skinprot, float chrom, int hotbad);
     void badpixlab      (LabImage* lab, double rad, int thr, int mode, float skinprot, float chrom);
 
-    void PF_correct_RT    (LabImage * src, LabImage * dst, double radius, int thresh);
-    void PF_correct_RTcam (CieImage * src, CieImage * dst, double radius, int thresh);
-    void Badpixelscam (CieImage * src, CieImage * dst, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad);
-    void BadpixelsLab (LabImage * src, LabImage * dst, double radius, int thresh, int mode, float skinprot, float chrom);
+    void PF_correct_RT    (LabImage * src, double radius, int thresh);
+    void PF_correct_RTcam (CieImage * src, double radius, int thresh);
+    void Badpixelscam (CieImage * src, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad);
+    void BadpixelsLab (LabImage * src, double radius, int thresh, int mode, float chrom);
 
     void ToneMapFattal02(Imagefloat *rgb);
     void localContrast(LabImage *lab);
diff --git a/rtengine/settings.h b/rtengine/settings.h
index 8f24f18f1..eebc2fb4b 100644
--- a/rtengine/settings.h
+++ b/rtengine/settings.h
@@ -80,7 +80,6 @@ public:
     //  double          colortoningab; //
     //  double          decaction;
     //  bool            bw_complementary;
-    double          artifact_cbdl;
     double          level0_cbdl;
     double          level123_cbdl;
     Glib::ustring   lensfunDbDirectory; ///< The directory containing the lensfun database. If empty, the system defaults will be used (as described in http://lensfun.sourceforge.net/manual/dbsearch.html)
diff --git a/rtgui/options.cc b/rtgui/options.cc
index 0d227b6eb..22adb2836 100644
--- a/rtgui/options.cc
+++ b/rtgui/options.cc
@@ -557,7 +557,6 @@ void Options::setDefaults ()
     rtSettings.gamutICC = true;
     rtSettings.gamutLch = true;
     rtSettings.amchroma = 40;//between 20 and 140   low values increase effect..and also artefacts, high values reduces
-    rtSettings.artifact_cbdl = 4.;
     rtSettings.level0_cbdl = 0;
     rtSettings.level123_cbdl = 30;
 
@@ -1411,9 +1410,6 @@ void Options::readFromFile (Glib::ustring fname)
                                     rtSettings.viewinggreySc = keyFile.get_integer ("Color Management", "greySc");
                                 }
                 */
-                if (keyFile.has_key ("Color Management", "CBDLArtif")) {
-                    rtSettings.artifact_cbdl = keyFile.get_double ("Color Management", "CBDLArtif");
-                }
 
                 if (keyFile.has_key ("Color Management", "CBDLlevel0")) {
                     rtSettings.level0_cbdl = keyFile.get_double ("Color Management", "CBDLlevel0");
@@ -2006,7 +2002,6 @@ void Options::saveToFile (Glib::ustring fname)
         keyFile.set_integer ("Color Management", "CRI", rtSettings.CRI_color);
         keyFile.set_integer ("Color Management", "DenoiseLabgamma", rtSettings.denoiselabgamma);
         //keyFile.set_boolean ("Color Management", "Ciebadpixgauss", rtSettings.ciebadpixgauss);
-        keyFile.set_double  ("Color Management", "CBDLArtif", rtSettings.artifact_cbdl);
         keyFile.set_double  ("Color Management", "CBDLlevel0", rtSettings.level0_cbdl);
         keyFile.set_double  ("Color Management", "CBDLlevel123", rtSettings.level123_cbdl);
         //keyFile.set_double  ("Color Management", "Colortoningab", rtSettings.colortoningab);