From 524b0056dcead2b69d2861cdf8e9e0ef8c5f362f Mon Sep 17 00:00:00 2001
From: heckflosse <heckflosse67@gmx.de>
Date: Wed, 27 Dec 2017 00:01:09 +0100
Subject: [PATCH 1/2] Speedup for rgbPrco()

---
 rtengine/LUT.h        | 31 ++++++++++++++++++++++
 rtengine/improcfun.cc | 60 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 79 insertions(+), 12 deletions(-)
diff --git a/rtengine/LUT.h b/rtengine/LUT.h
index 6a617f40b..48c85726d 100644
--- a/rtengine/LUT.h
+++ b/rtengine/LUT.h
@@ -340,6 +340,37 @@ public:
         vfloat diff = clampedIndexes - _mm_cvtepi32_ps(indexes);
         return vintpf(diff, upper, lower);
     }
+
+    // NOTE: This version requires LUTs which do not clip at upper and lower bounds
+    vfloat operator()(vfloat indexv) const
+    {
+        static_assert(std::is_same<T, float>::value, "This method only works for float LUTs");
+
+        // Clamp and convert to integer values. Extract out of SSE register because all
+        // lookup operations use regular addresses.
+        vfloat clampedIndexes = vmaxf(ZEROV, vminf(F2V(maxsf), indexv));
+        vint indexes = _mm_cvttps_epi32(clampedIndexes);
+        int indexArray[4];
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes);
+
+        // Load data from the table. This reads more than necessary, but there don't seem
+        // to exist more granular operations (though we could try non-SSE).
+        // Cast to int for convenience in the next operation (partial transpose).
+        vint values[4];
+        for (int i = 0; i < 4; ++i) {
+            values[i] = _mm_castps_si128(LVFU(data[indexArray[i]]));
+        }
+
+        // Partial 4x4 transpose operation. We want two new vectors, the first consisting
+        // of [values[0][0] ... values[3][0]] and the second [values[0][1] ... values[3][1]].
+        __m128i temp0 = _mm_unpacklo_epi32(values[0], values[1]);
+        __m128i temp1 = _mm_unpacklo_epi32(values[2], values[3]);
+        vfloat lower = _mm_castsi128_ps(_mm_unpacklo_epi64(temp0, temp1));
+        vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1));
+
+        vfloat diff = indexv - _mm_cvtepi32_ps(indexes);
+        return vintpf(diff, upper, lower);
+    }
 #ifdef __SSE4_1__
     template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
     vfloat operator[](vint idxv ) const
diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc
index 082799e62..8300a24e7 100644
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@@ -3540,7 +3540,27 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                 }
 
                 for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                    for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                    vfloat cr = F2V(0.299f);
+                    vfloat cg = F2V(0.587f);
+                    vfloat cb = F2V(0.114f);
+                    int j = jstart;
+                    int tj = 0;
+#ifdef __SSE2__
+                    for (; j < tW - 3; j+=4, tj+=4) {
+
+                        vfloat rv = LVF(rtemp[ti * TS + tj]);
+                        vfloat gv = LVF(gtemp[ti * TS + tj]);
+                        vfloat bv = LVF(btemp[ti * TS + tj]);
+
+                        //shadow tone curve
+                        vfloat Yv = cr * rv + cg * gv + cb * bv;
+                        vfloat tonefactorv = shtonecurve(Yv);
+                        STVF(rtemp[ti * TS + tj], rv * tonefactorv);
+                        STVF(gtemp[ti * TS + tj], gv * tonefactorv);
+                        STVF(btemp[ti * TS + tj], bv * tonefactorv);
+                    }
+#endif
+                    for (; j < tW; j++, tj++) {
 
                         float r = rtemp[ti * TS + tj];
                         float g = gtemp[ti * TS + tj];
@@ -3588,19 +3608,35 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                     }
                 }
 
-                for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                    for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                if (histToneCurveThr) {
+                    for (int i = istart, ti = 0; i < tH; i++, ti++) {
+                        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
 
-                        //brightness/contrast
-                        rtemp[ti * TS + tj] = tonecurve[ rtemp[ti * TS + tj] ];
-                        gtemp[ti * TS + tj] = tonecurve[ gtemp[ti * TS + tj] ];
-                        btemp[ti * TS + tj] = tonecurve[ btemp[ti * TS + tj] ];
+                            //brightness/contrast
+                            rtemp[ti * TS + tj] = tonecurve[ rtemp[ti * TS + tj] ];
+                            gtemp[ti * TS + tj] = tonecurve[ gtemp[ti * TS + tj] ];
+                            btemp[ti * TS + tj] = tonecurve[ btemp[ti * TS + tj] ];
 
-                        if (histToneCurveThr) {
                             int y = CLIP<int> (lumimulf[0] * Color::gamma2curve[rtemp[ti * TS + tj]] + lumimulf[1] * Color::gamma2curve[gtemp[ti * TS + tj]] + lumimulf[2] * Color::gamma2curve[btemp[ti * TS + tj]]);
                             histToneCurveThr[y >> histToneCurveCompression]++;
                         }
                     }
+                } else {
+                    for (int i = istart, ti = 0; i < tH; i++, ti++) {
+                        int j = jstart, tj = 0;
+                        for (; j < tW - 3; j+=4, tj+=4) {
+                            //brightness/contrast
+                            STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj])));
+                            STVF(gtemp[ti * TS + tj], tonecurve(LVF(gtemp[ti * TS + tj])));
+                            STVF(btemp[ti * TS + tj], tonecurve(LVF(btemp[ti * TS + tj])));
+                        }
+                        for (; j < tW; j++, tj++) {
+                            //brightness/contrast
+                            rtemp[ti * TS + tj] = tonecurve[rtemp[ti * TS + tj]];
+                            gtemp[ti * TS + tj] = tonecurve[gtemp[ti * TS + tj]];
+                            btemp[ti * TS + tj] = tonecurve[btemp[ti * TS + tj]];
+                        }
+                    }
                 }
 
                 if (editID == EUID_ToneCurve1) {  // filling the pipette buffer
@@ -3687,10 +3723,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                 if (hasToneCurve2) {
                     if (curveMode2 == ToneCurveParams::TcMode::STD) { // Standard
                         for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                const StandardToneCurve& userToneCurve = static_cast<const StandardToneCurve&> (customToneCurve2);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
+                            const StandardToneCurve& userToneCurve = static_cast<const StandardToneCurve&> (customToneCurve2);
+                            userToneCurve.BatchApply (
+                                    0, tW - jstart,
+                                    &rtemp[ti * TS], &gtemp[ti * TS], &btemp[ti * TS]);
                         }
                     } else if (curveMode2 == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like
                         for (int i = istart, ti = 0; i < tH; i++, ti++) {

From 3dcce23c24d093f7c9719addd99c7b9d5589a827 Mon Sep 17 00:00:00 2001
From: heckflosse <heckflosse67@gmx.de>
Date: Sun, 31 Dec 2017 14:36:59 +0100
Subject: [PATCH 2/2] rgbproc() speedups

---
 rtengine/LUT.h        |  32 +++
 rtengine/color.cc     |  65 +++++
 rtengine/color.h      |   2 +-
 rtengine/improcfun.cc | 544 ++++++++++++++++++++----------------------
 4 files changed, 358 insertions(+), 285 deletions(-)

diff --git a/rtengine/LUT.h b/rtengine/LUT.h
index 48c85726d..0fd906dc0 100644
--- a/rtengine/LUT.h
+++ b/rtengine/LUT.h
@@ -309,6 +309,38 @@ public:
 
 #if defined( __SSE2__ ) && defined( __x86_64__ )
 
+
+    // NOTE: This function requires LUTs which clips only at lower bound
+    vfloat cb(vfloat indexv) const
+    {
+        static_assert(std::is_same<T, float>::value, "This method only works for float LUTs");
+
+        // Clamp and convert to integer values. Extract out of SSE register because all
+        // lookup operations use regular addresses.
+        vfloat clampedIndexes = vmaxf(ZEROV, vminf(F2V(maxIndexFloat), indexv));
+        vint indexes = _mm_cvttps_epi32(clampedIndexes);
+        int indexArray[4];
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes);
+
+        // Load data from the table. This reads more than necessary, but there don't seem
+        // to exist more granular operations (though we could try non-SSE).
+        // Cast to int for convenience in the next operation (partial transpose).
+        vint values[4];
+        for (int i = 0; i < 4; ++i) {
+            values[i] = _mm_castps_si128(LVFU(data[indexArray[i]]));
+        }
+
+        // Partial 4x4 transpose operation. We want two new vectors, the first consisting
+        // of [values[0][0] ... values[3][0]] and the second [values[0][1] ... values[3][1]].
+        __m128i temp0 = _mm_unpacklo_epi32(values[0], values[1]);
+        __m128i temp1 = _mm_unpacklo_epi32(values[2], values[3]);
+        vfloat lower = _mm_castsi128_ps(_mm_unpacklo_epi64(temp0, temp1));
+        vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1));
+
+        vfloat diff = vmaxf(ZEROV, indexv) - _mm_cvtepi32_ps(indexes);
+        return vintpf(diff, upper, lower);
+    }
+
     // NOTE: This version requires LUTs which clip at upper and lower bounds
     // (which is the default).
     vfloat operator[](vfloat indexv) const
diff --git a/rtengine/color.cc b/rtengine/color.cc
index 7964cc472..4b51314cc 100644
--- a/rtengine/color.cc
+++ b/rtengine/color.cc
@@ -1783,6 +1783,71 @@ void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &
 }
 #endif // __SSE2__
 
+void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, const float wp[3][3], int width)
+{
+
+#ifdef __SSE2__
+    // prepare matrix to save some divisions (reduces the number of divisions by width/2 - 6)
+    float wpn[3][3];
+    for(int i = 0; i < 3; ++i) {
+        wpn[0][i] = wp[0][i] / Color::D50x;
+        wpn[1][i] = wp[1][i];
+        wpn[2][i] = wp[2][i] / Color::D50z;
+    }
+
+    vfloat maxvalfv = F2V(MAXVALF);
+    vfloat c116v = F2V(116.f);
+    vfloat c5242d88v = F2V(5242.88f);
+    vfloat c500v = F2V(500.f);
+    vfloat c200v = F2V(200.f);
+#endif
+    int i = 0;
+#ifdef __SSE2__
+    for(;i < width - 3; i+=4) {
+        const vfloat rv = LVFU(R[i]);
+        const vfloat gv = LVFU(G[i]);
+        const vfloat bv = LVFU(B[i]);
+        const vfloat xv = F2V(wpn[0][0]) * rv + F2V(wpn[0][1]) * gv + F2V(wpn[0][2]) * bv;
+        const vfloat yv = F2V(wpn[1][0]) * rv + F2V(wpn[1][1]) * gv + F2V(wpn[1][2]) * bv;
+        const vfloat zv = F2V(wpn[2][0]) * rv + F2V(wpn[2][1]) * gv + F2V(wpn[2][2]) * bv;
+
+        vmask maxMask = vmaskf_gt(vmaxf(xv, vmaxf(yv, zv)), maxvalfv);
+        if (_mm_movemask_ps((vfloat)maxMask)) {
+            // take slower code path for all 4 pixels if one of the values is > MAXVALF. Still faster than non SSE2 version
+            for(int k = 0; k < 4; ++k) {
+                float x = xv[k];
+                float y = yv[k];
+                float z = zv[k];
+                float fx = (x <= 65535.f ? cachef[x] : (327.68f * xcbrtf(x / MAXVALF)));
+                float fy = (y <= 65535.f ? cachef[y] : (327.68f * xcbrtf(y / MAXVALF)));
+                float fz = (z <= 65535.f ? cachef[z] : (327.68f * xcbrtf(z / MAXVALF)));
+
+                L[i + k] = (116.f *  fy - 5242.88f); //5242.88=16.0*327.68;
+                a[i + k] = (500.f * (fx - fy) );
+                b[i + k] = (200.f * (fy - fz) );
+            }
+        } else {
+            const vfloat fx = cachef[xv];
+            const vfloat fy = cachef[yv];
+            const vfloat fz = cachef[zv];
+
+            STVFU(L[i], c116v *  fy - c5242d88v); //5242.88=16.0*327.68;
+            STVFU(a[i], c500v * (fx - fy));
+            STVFU(b[i], c200v * (fy - fz));
+        }
+    }
+#endif
+    for(;i < width; ++i) {
+        const float rv = R[i];
+        const float gv = G[i];
+        const float bv = B[i];
+        float x = wp[0][0] * rv + wp[0][1] * gv + wp[0][2] * bv;
+        float y = wp[1][0] * rv + wp[1][1] * gv + wp[1][2] * bv;
+        float z = wp[2][0] * rv + wp[2][1] * gv + wp[2][2] * bv;
+        XYZ2Lab(x, y, z, L[i], a[i], b[i]);
+    }
+}
+
 void Color::XYZ2Lab(float X, float Y, float Z, float &L, float &a, float &b)
 {
 
diff --git a/rtengine/color.h b/rtengine/color.h
index 59e189810..049defb70 100644
--- a/rtengine/color.h
+++ b/rtengine/color.h
@@ -475,7 +475,7 @@ public:
     * @param b channel [-42000 ; +42000] ; can be more than 42000 (return value)
     */
     static void XYZ2Lab(float x, float y, float z, float &L, float &a, float &b);
-
+    static void RGB2Lab(float *X, float *Y, float *Z, float *L, float *a, float *b, const float wp[3][3], int width);
 
     /**
     * @brief Convert Lab in Yuv
diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc
index 8300a24e7..bfe4ee021 100644
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@@ -48,6 +48,227 @@
 #undef CLIPD
 #define CLIPD(a) ((a)>0.0f?((a)<1.0f?(a):1.0f):0.0f)
 
+namespace {
+
+using namespace rtengine;
+// begin of helper function for rgbProc()
+void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) {
+
+#ifdef __SSE2__
+    vfloat cr = F2V(0.299f);
+    vfloat cg = F2V(0.587f);
+    vfloat cb = F2V(0.114f);
+#endif
+
+    for (int i = istart, ti = 0; i < tH; i++, ti++) {
+        int j = jstart, tj = 0;
+#ifdef __SSE2__
+        for (; j < tW - 3; j+=4, tj+=4) {
+
+            vfloat rv = LVF(rtemp[ti * tileSize + tj]);
+            vfloat gv = LVF(gtemp[ti * tileSize + tj]);
+            vfloat bv = LVF(btemp[ti * tileSize + tj]);
+
+            //shadow tone curve
+            vfloat Yv = cr * rv + cg * gv + cb * bv;
+            vfloat tonefactorv = shtonecurve(Yv);
+            STVF(rtemp[ti * tileSize + tj], rv * tonefactorv);
+            STVF(gtemp[ti * tileSize + tj], gv * tonefactorv);
+            STVF(btemp[ti * tileSize + tj], bv * tonefactorv);
+        }
+#endif
+        for (; j < tW; j++, tj++) {
+
+            float r = rtemp[ti * tileSize + tj];
+            float g = gtemp[ti * tileSize + tj];
+            float b = btemp[ti * tileSize + tj];
+
+            //shadow tone curve
+            float Y = (0.299f * r + 0.587f * g + 0.114f * b);
+            float tonefactor = shtonecurve[Y];
+            rtemp[ti * tileSize + tj] = rtemp[ti * tileSize + tj] * tonefactor;
+            gtemp[ti * tileSize + tj] = gtemp[ti * tileSize + tj] * tonefactor;
+            btemp[ti * tileSize + tj] = btemp[ti * tileSize + tj] * tonefactor;
+        }
+    }
+}
+
+void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, float exp_scale, float comp, float hlrange) {
+
+#ifdef __SSE2__
+    vfloat threev = F2V(3.f);
+    vfloat maxvalfv = F2V(MAXVALF);
+#endif
+
+    for (int i = istart, ti = 0; i < tH; i++, ti++) {
+        int j = jstart, tj = 0;
+#ifdef __SSE2__
+        for (; j < tW - 3; j+=4, tj+=4) {
+
+            vfloat rv = LVF(rtemp[ti * tileSize + tj]);
+            vfloat gv = LVF(gtemp[ti * tileSize + tj]);
+            vfloat bv = LVF(btemp[ti * tileSize + tj]);
+
+            //TODO: proper treatment of out-of-gamut colors
+            //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)];
+            vmask maxMask = vmaskf_ge(vmaxf(rv, vmaxf(gv, bv)), maxvalfv);
+            if(_mm_movemask_ps((vfloat)maxMask)) {
+                for (int k = 0; k < 4; ++k) {
+                    float r = rtemp[ti * tileSize + tj + k];
+                    float g = gtemp[ti * tileSize + tj + k];
+                    float b = btemp[ti * tileSize + tj + k];
+                    float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) +
+                                        (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) +
+                                        (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0;
+
+                    // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place
+                    rtemp[ti * tileSize + tj + k] = r * tonefactor;
+                    gtemp[ti * tileSize + tj + k] = g * tonefactor;
+                    btemp[ti * tileSize + tj + k] = b * tonefactor;
+                }
+            } else {
+                vfloat tonefactorv = (hltonecurve.cb(rv) + hltonecurve.cb(gv) + hltonecurve.cb(bv)) / threev;
+                // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place
+                STVF(rtemp[ti * tileSize + tj], rv * tonefactorv);
+                STVF(gtemp[ti * tileSize + tj], gv * tonefactorv);
+                STVF(btemp[ti * tileSize + tj], bv * tonefactorv);
+            }
+        }
+#endif
+        for (; j < tW; j++, tj++) {
+
+            float r = rtemp[ti * tileSize + tj];
+            float g = gtemp[ti * tileSize + tj];
+            float b = btemp[ti * tileSize + tj];
+
+            //TODO: proper treatment of out-of-gamut colors
+            //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)];
+            float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) +
+                                (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) +
+                                (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0;
+
+            // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place
+            rtemp[ti * tileSize + tj] = r * tonefactor;
+            gtemp[ti * tileSize + tj] = g * tonefactor;
+            btemp[ti * tileSize + tj] = b * tonefactor;
+        }
+    }
+}
+
+void proPhotoBlue(float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) {
+    // this is a hack to avoid the blue=>black bug (Issue 2141)
+    for (int i = istart, ti = 0; i < tH; i++, ti++) {
+        int j = jstart, tj = 0;
+#ifdef __SSE2__
+        for (; j < tW - 3; j+=4, tj+=4) {
+            vfloat rv = LVF(rtemp[ti * tileSize + tj]);
+            vfloat gv = LVF(gtemp[ti * tileSize + tj]);
+            vmask zeromask = vorm(vmaskf_eq(rv, ZEROV), vmaskf_eq(gv, ZEROV));
+            if(_mm_movemask_ps((vfloat)zeromask)) {
+                for (int k = 0; k < 4; ++k) {
+                    float r = rtemp[ti * tileSize + tj + k];
+                    float g = gtemp[ti * tileSize + tj + k];
+                    if (r == 0.0f || g == 0.0f) {
+                        float b = btemp[ti * tileSize + tj + k];
+                        float h, s, v;
+                        Color::rgb2hsv (r, g, b, h, s, v);
+                        s *= 0.99f;
+                        Color::hsv2rgb (h, s, v, rtemp[ti * tileSize + tj + k], gtemp[ti * tileSize + tj + k], btemp[ti * tileSize + tj + k]);
+                    }
+                }
+            }
+        }
+#endif
+        for (; j < tW; j++, tj++) {
+            float r = rtemp[ti * tileSize + tj];
+            float g = gtemp[ti * tileSize + tj];
+
+            if (r == 0.0f || g == 0.0f) {
+                float b = btemp[ti * tileSize + tj];
+                float h, s, v;
+                Color::rgb2hsv (r, g, b, h, s, v);
+                s *= 0.99f;
+                Color::hsv2rgb (h, s, v, rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]);
+            }
+        }
+    }
+}
+
+void customToneCurve(const ToneCurve &customToneCurve, ToneCurveParams::TcMode curveMode, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, PerceptualToneCurveState ptcApplyState) {
+
+    if (curveMode == ToneCurveParams::TcMode::STD) { // Standard
+        for (int i = istart, ti = 0; i < tH; i++, ti++) {
+            const StandardToneCurve& userToneCurve = static_cast<const StandardToneCurve&> (customToneCurve);
+            userToneCurve.BatchApply (
+                    0, tW - jstart,
+                    &rtemp[ti * tileSize], &gtemp[ti * tileSize], &btemp[ti * tileSize]);
+        }
+    } else if (curveMode == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like
+        for (int i = istart, ti = 0; i < tH; i++, ti++) {
+            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                const AdobeToneCurve& userToneCurve = static_cast<const AdobeToneCurve&> (customToneCurve);
+                userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]);
+            }
+        }
+    } else if (curveMode == ToneCurveParams::TcMode::SATANDVALBLENDING) { // apply the curve on the saturation and value channels
+        for (int i = istart, ti = 0; i < tH; i++, ti++) {
+            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                const SatAndValueBlendingToneCurve& userToneCurve = static_cast<const SatAndValueBlendingToneCurve&> (customToneCurve);
+                rtemp[ti * tileSize + tj] = CLIP<float> (rtemp[ti * tileSize + tj]);
+                gtemp[ti * tileSize + tj] = CLIP<float> (gtemp[ti * tileSize + tj]);
+                btemp[ti * tileSize + tj] = CLIP<float> (btemp[ti * tileSize + tj]);
+                userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]);
+            }
+        }
+    } else if (curveMode == ToneCurveParams::TcMode::WEIGHTEDSTD) { // apply the curve to the rgb channels, weighted
+        const WeightedStdToneCurve& userToneCurve = static_cast<const WeightedStdToneCurve&> (customToneCurve);
+
+        for (int i = istart, ti = 0; i < tH; i++, ti++) {
+            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                rtemp[ti * tileSize + tj] = CLIP<float> (rtemp[ti * tileSize + tj]);
+                gtemp[ti * tileSize + tj] = CLIP<float> (gtemp[ti * tileSize + tj]);
+                btemp[ti * tileSize + tj] = CLIP<float> (btemp[ti * tileSize + tj]);
+                userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]);
+            }
+        }
+    } else if (curveMode == ToneCurveParams::TcMode::LUMINANCE) { // apply the curve to the luminance channel
+        const LuminanceToneCurve& userToneCurve = static_cast<const LuminanceToneCurve&> (customToneCurve);
+
+        for (int i = istart, ti = 0; i < tH; i++, ti++) {
+            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                rtemp[ti * tileSize + tj] = CLIP<float> (rtemp[ti * tileSize + tj]);
+                gtemp[ti * tileSize + tj] = CLIP<float> (gtemp[ti * tileSize + tj]);
+                btemp[ti * tileSize + tj] = CLIP<float> (btemp[ti * tileSize + tj]);
+                userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]);
+            }
+        }
+    } else if (curveMode == ToneCurveParams::TcMode::PERCEPTUAL) { // apply curve while keeping color appearance constant
+        const PerceptualToneCurve& userToneCurve = static_cast<const PerceptualToneCurve&> (customToneCurve);
+
+        for (int i = istart, ti = 0; i < tH; i++, ti++) {
+            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+                rtemp[ti * tileSize + tj] = CLIP<float> (rtemp[ti * tileSize + tj]);
+                gtemp[ti * tileSize + tj] = CLIP<float> (gtemp[ti * tileSize + tj]);
+                btemp[ti * tileSize + tj] = CLIP<float> (btemp[ti * tileSize + tj]);
+                userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj], ptcApplyState);
+            }
+        }
+    }
+}
+
+void fillEditFloat(float *editIFloatTmpR, float *editIFloatTmpG, float *editIFloatTmpB, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) {
+    for (int i = istart, ti = 0; i < tH; i++, ti++) {
+        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+            editIFloatTmpR[ti * tileSize + tj] = Color::gamma2curve[rtemp[ti * tileSize + tj]] / 65535.f;
+            editIFloatTmpG[ti * tileSize + tj] = Color::gamma2curve[gtemp[ti * tileSize + tj]] / 65535.f;
+            editIFloatTmpB[ti * tileSize + tj] = Color::gamma2curve[btemp[ti * tileSize + tj]] / 65535.f;
+        }
+    }
+}
+// end of helper function for rgbProc()
+
+}
+
 namespace rtengine
 {
 
@@ -3332,8 +3553,8 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
     float chMixBG = float (params->chmixer.blue[1]);
     float chMixBB = float (params->chmixer.blue[2]);
 
-    int shHighlights = params->sh.highlights;
-    int shShadows = params->sh.shadows;
+    int shHighlights = params->sh.highlights / 100.f;
+    int shShadows = params->sh.shadows / 100.f;
     bool blackwhite = params->blackwhite.enabled;
     bool complem = params->blackwhite.enabledcc;
     float bwr = float (params->blackwhite.mixerRed);
@@ -3501,15 +3722,13 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                             float g = gtemp[ti * TS + tj];
                             float b = btemp[ti * TS + tj];
 
-                            double mapval = 1.0 + shmap->map[i][j];
-                            double factor = 1.0;
+                            float mapval = 1.f + shmap->map[i][j];
+                            float factor = 1.f;
 
-                            if (processSH) {
-                                if (mapval > h_th) {
-                                    factor = (h_th + (100.0 - shHighlights) * (mapval - h_th) / 100.0) / mapval;
-                                } else if (mapval < s_th) {
-                                    factor = (s_th - (100.0 - shShadows) * (s_th - mapval) / 100.0) / mapval;
-                                }
+                            if (mapval > h_th) {
+                                factor = (1.f - shHighlights) + shHighlights * h_th / mapval;
+                            } else if (mapval < s_th) {
+                                factor = (s_th - (1.f - shShadows) * (s_th - mapval)) / mapval;
                             }
 
                             rtemp[ti * TS + tj] = factor * r;
@@ -3519,61 +3738,8 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                     }
                 }
 
-                for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                    for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-
-                        float r = rtemp[ti * TS + tj];
-                        float g = gtemp[ti * TS + tj];
-                        float b = btemp[ti * TS + tj];
-
-                        //TODO: proper treatment of out-of-gamut colors
-                        //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)];
-                        float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) +
-                                            (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) +
-                                            (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0;
-
-                        // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place
-                        rtemp[ti * TS + tj] = r * tonefactor;
-                        gtemp[ti * TS + tj] = g * tonefactor;
-                        btemp[ti * TS + tj] = b * tonefactor;
-                    }
-                }
-
-                for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                    vfloat cr = F2V(0.299f);
-                    vfloat cg = F2V(0.587f);
-                    vfloat cb = F2V(0.114f);
-                    int j = jstart;
-                    int tj = 0;
-#ifdef __SSE2__
-                    for (; j < tW - 3; j+=4, tj+=4) {
-
-                        vfloat rv = LVF(rtemp[ti * TS + tj]);
-                        vfloat gv = LVF(gtemp[ti * TS + tj]);
-                        vfloat bv = LVF(btemp[ti * TS + tj]);
-
-                        //shadow tone curve
-                        vfloat Yv = cr * rv + cg * gv + cb * bv;
-                        vfloat tonefactorv = shtonecurve(Yv);
-                        STVF(rtemp[ti * TS + tj], rv * tonefactorv);
-                        STVF(gtemp[ti * TS + tj], gv * tonefactorv);
-                        STVF(btemp[ti * TS + tj], bv * tonefactorv);
-                    }
-#endif
-                    for (; j < tW; j++, tj++) {
-
-                        float r = rtemp[ti * TS + tj];
-                        float g = gtemp[ti * TS + tj];
-                        float b = btemp[ti * TS + tj];
-
-                        //shadow tone curve
-                        float Y = (0.299f * r + 0.587f * g + 0.114f * b);
-                        float tonefactor = shtonecurve[Y];
-                        rtemp[ti * TS + tj] = rtemp[ti * TS + tj] * tonefactor;
-                        gtemp[ti * TS + tj] = gtemp[ti * TS + tj] * tonefactor;
-                        btemp[ti * TS + tj] = btemp[ti * TS + tj] * tonefactor;
-                    }
-                }
+                highlightToneCurve(hltonecurve, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS, exp_scale, comp, hlrange);
+                shadowToneCurve(shtonecurve, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS);
 
                 if (dcpProf) {
                     dcpProf->step2ApplyTile (rtemp, gtemp, btemp, tW - jstart, tH - istart, TS, asIn);
@@ -3581,22 +3747,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
 
                 for (int i = istart, ti = 0; i < tH; i++, ti++) {
                     for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                        float r = rtemp[ti * TS + tj];
-                        float g = gtemp[ti * TS + tj];
-                        float b = btemp[ti * TS + tj];
-
-                        // clip out of gamut colors, without distorting color too bad
-                        if (r < 0) {
-                            r = 0;
-                        }
-
-                        if (g < 0) {
-                            g = 0;
-                        }
-
-                        if (b < 0) {
-                            b = 0;
-                        }
+                        // clip out of gamut colors, without distorting colour too bad
+                        float r = std::max(rtemp[ti * TS + tj], 0.f);
+                        float g = std::max(gtemp[ti * TS + tj], 0.f);
+                        float b = std::max(btemp[ti * TS + tj], 0.f);
 
                         if (r > 65535 || g > 65535 || b > 65535) {
                             filmlike_clip (&r, &g, &b);
@@ -3624,12 +3778,14 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                 } else {
                     for (int i = istart, ti = 0; i < tH; i++, ti++) {
                         int j = jstart, tj = 0;
+#ifdef __SSE2__
                         for (; j < tW - 3; j+=4, tj+=4) {
                             //brightness/contrast
                             STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj])));
                             STVF(gtemp[ti * TS + tj], tonecurve(LVF(gtemp[ti * TS + tj])));
                             STVF(btemp[ti * TS + tj], tonecurve(LVF(btemp[ti * TS + tj])));
                         }
+#endif
                         for (; j < tW; j++, tj++) {
                             //brightness/contrast
                             rtemp[ti * TS + tj] = tonecurve[rtemp[ti * TS + tj]];
@@ -3640,133 +3796,19 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                 }
 
                 if (editID == EUID_ToneCurve1) {  // filling the pipette buffer
-                    for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                            editIFloatTmpR[ti * TS + tj] = Color::gamma2curve[rtemp[ti * TS + tj]] / 65535.f;
-                            editIFloatTmpG[ti * TS + tj] = Color::gamma2curve[gtemp[ti * TS + tj]] / 65535.f;
-                            editIFloatTmpB[ti * TS + tj] = Color::gamma2curve[btemp[ti * TS + tj]] / 65535.f;
-                        }
-                    }
+                    fillEditFloat(editIFloatTmpR, editIFloatTmpG, editIFloatTmpB, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS);
                 }
 
                 if (hasToneCurve1) {
-                    if (curveMode == ToneCurveParams::TcMode::STD) { // Standard
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            const StandardToneCurve& userToneCurve = static_cast<const StandardToneCurve&> (customToneCurve1);
-                            userToneCurve.BatchApply (
-                                    0, tW - jstart,
-                                    &rtemp[ti * TS], &gtemp[ti * TS], &btemp[ti * TS]);
-                        }
-                    } else if (curveMode == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                const AdobeToneCurve& userToneCurve = static_cast<const AdobeToneCurve&> (customToneCurve1);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode == ToneCurveParams::TcMode::SATANDVALBLENDING) { // apply the curve on the saturation and value channels
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                const SatAndValueBlendingToneCurve& userToneCurve = static_cast<const SatAndValueBlendingToneCurve&> (customToneCurve1);
-                                rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
-                                gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
-                                btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode == ToneCurveParams::TcMode::WEIGHTEDSTD) { // apply the curve to the rgb channels, weighted
-                        const WeightedStdToneCurve& userToneCurve = static_cast<const WeightedStdToneCurve&> (customToneCurve1);
-
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
-                                gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
-                                btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode == ToneCurveParams::TcMode::LUMINANCE) { // apply the curve to the luminance channel
-                        const LuminanceToneCurve& userToneCurve = static_cast<const LuminanceToneCurve&> (customToneCurve1);
-
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
-                                gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
-                                btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode == ToneCurveParams::TcMode::PERCEPTUAL) { // apply curve while keeping color appearance constant
-                        const PerceptualToneCurve& userToneCurve = static_cast<const PerceptualToneCurve&> (customToneCurve1);
-
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                rtemp[ti * TS + tj] = CLIP<float> (rtemp[ti * TS + tj]);
-                                gtemp[ti * TS + tj] = CLIP<float> (gtemp[ti * TS + tj]);
-                                btemp[ti * TS + tj] = CLIP<float> (btemp[ti * TS + tj]);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], ptc1ApplyState);
-                            }
-                        }
-                    }
+                    customToneCurve(customToneCurve1, curveMode, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS, ptc1ApplyState);
                 }
 
                 if (editID == EUID_ToneCurve2) {  // filling the pipette buffer
-                    for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                            editIFloatTmpR[ti * TS + tj] = Color::gamma2curve[rtemp[ti * TS + tj]] / 65535.f;
-                            editIFloatTmpG[ti * TS + tj] = Color::gamma2curve[gtemp[ti * TS + tj]] / 65535.f;
-                            editIFloatTmpB[ti * TS + tj] = Color::gamma2curve[btemp[ti * TS + tj]] / 65535.f;
-                        }
-                    }
+                    fillEditFloat(editIFloatTmpR, editIFloatTmpG, editIFloatTmpB, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS);
                 }
 
                 if (hasToneCurve2) {
-                    if (curveMode2 == ToneCurveParams::TcMode::STD) { // Standard
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            const StandardToneCurve& userToneCurve = static_cast<const StandardToneCurve&> (customToneCurve2);
-                            userToneCurve.BatchApply (
-                                    0, tW - jstart,
-                                    &rtemp[ti * TS], &gtemp[ti * TS], &btemp[ti * TS]);
-                        }
-                    } else if (curveMode2 == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                const AdobeToneCurve& userToneCurve = static_cast<const AdobeToneCurve&> (customToneCurve2);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode2 == ToneCurveParams::TcMode::SATANDVALBLENDING) { // apply the curve on the saturation and value channels
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                const SatAndValueBlendingToneCurve& userToneCurve = static_cast<const SatAndValueBlendingToneCurve&> (customToneCurve2);
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode2 == ToneCurveParams::TcMode::WEIGHTEDSTD) { // apply the curve to the rgb channels, weighted
-                        const WeightedStdToneCurve& userToneCurve = static_cast<const WeightedStdToneCurve&> (customToneCurve2);
-
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode2 == ToneCurveParams::TcMode::LUMINANCE) { // apply the curve to the luminance channel
-                        const LuminanceToneCurve& userToneCurve = static_cast<const LuminanceToneCurve&> (customToneCurve2);
-
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    } else if (curveMode2 == ToneCurveParams::TcMode::PERCEPTUAL) { // apply curve while keeping color appearance constant
-                        const PerceptualToneCurve& userToneCurve = static_cast<const PerceptualToneCurve&> (customToneCurve2);
-
-                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                                userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], ptc2ApplyState);
-                            }
-                        }
-                    }
+                    customToneCurve(customToneCurve2, curveMode2, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS, ptc2ApplyState);
                 }
 
                 if (editID == EUID_RGB_R) {
@@ -3978,20 +4020,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
                 }
 
                 if (isProPhoto) { // this is a hack to avoid the blue=>black bug (Issue 2141)
-                    for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                            float r = rtemp[ti * TS + tj];
-                            float g = gtemp[ti * TS + tj];
-
-                            if (r == 0.0f || g == 0.0f) {
-                                float b = btemp[ti * TS + tj];
-                                float h, s, v;
-                                Color::rgb2hsv (r, g, b, h, s, v);
-                                s *= 0.99f;
-                                Color::hsv2rgb (h, s, v, rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]);
-                            }
-                        }
-                    }
+                    proPhotoBlue(rtemp, gtemp, btemp, istart, tH, jstart, tW, TS);
                 }
 
                 if (hasColorToning && !blackwhite) {
@@ -4195,13 +4224,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
 
                 // filling the pipette buffer
                 if (editID == EUID_BlackWhiteBeforeCurve) {
-                    for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-                            editIFloatTmpR[ti * TS + tj] = Color::gamma2curve[rtemp[ti * TS + tj]] / 65535.f;
-                            editIFloatTmpG[ti * TS + tj] = Color::gamma2curve[gtemp[ti * TS + tj]] / 65535.f;
-                            editIFloatTmpB[ti * TS + tj] = Color::gamma2curve[btemp[ti * TS + tj]] / 65535.f;
-                        }
-                    }
+                    fillEditFloat(editIFloatTmpR, editIFloatTmpG, editIFloatTmpB, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS);
                 } else if (editID == EUID_BlackWhiteLuminance) {
                     for (int i = istart, ti = 0; i < tH; i++, ti++) {
                         for (int j = jstart, tj = 0; j < tW; j++, tj++) {
@@ -4494,53 +4517,24 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
 
 
                 if (!blackwhite) {
+                    if (editImgFloat || editWhatever) {
+                        for (int i = istart, ti = 0; i < tH; i++, ti++) {
+                            for (int j = jstart, tj = 0; j < tW; j++, tj++) {
+
+                                // filling the pipette buffer by the content of the temp pipette buffers
+                                if (editImgFloat) {
+                                    editImgFloat->r (i, j) = editIFloatTmpR[ti * TS + tj];
+                                    editImgFloat->g (i, j) = editIFloatTmpG[ti * TS + tj];
+                                    editImgFloat->b (i, j) = editIFloatTmpB[ti * TS + tj];
+                                } else if (editWhatever) {
+                                    editWhatever->v (i, j) = editWhateverTmp[ti * TS + tj];
+                                }
+                            }
+                        }
+                    }
                     // ready, fill lab
                     for (int i = istart, ti = 0; i < tH; i++, ti++) {
-                        for (int j = jstart, tj = 0; j < tW; j++, tj++) {
-
-                            // filling the pipette buffer by the content of the temp pipette buffers
-                            if (editImgFloat) {
-                                editImgFloat->r (i, j) = editIFloatTmpR[ti * TS + tj];
-                                editImgFloat->g (i, j) = editIFloatTmpG[ti * TS + tj];
-                                editImgFloat->b (i, j) = editIFloatTmpB[ti * TS + tj];
-                            } else if (editWhatever) {
-                                editWhatever->v (i, j) = editWhateverTmp[ti * TS + tj];
-                            }
-
-                            float r = rtemp[ti * TS + tj];
-                            float g = gtemp[ti * TS + tj];
-                            float b = btemp[ti * TS + tj];
-
-                            float x = toxyz[0][0] * r + toxyz[0][1] * g + toxyz[0][2] * b;
-                            float y = toxyz[1][0] * r + toxyz[1][1] * g + toxyz[1][2] * b;
-                            float z = toxyz[2][0] * r + toxyz[2][1] * g + toxyz[2][2] * b;
-
-                            float fx, fy, fz;
-
-                            fx = (x < 65535.0f ? Color::cachef[x] : 327.68f * std::cbrt (x / MAXVALF));
-                            fy = (y < 65535.0f ? Color::cachef[y] : 327.68f * std::cbrt (y / MAXVALF));
-                            fz = (z < 65535.0f ? Color::cachef[z] : 327.68f * std::cbrt (z / MAXVALF));
-
-                            lab->L[i][j] = (116.0f *  fy - 5242.88f); //5242.88=16.0*327.68;
-                            lab->a[i][j] = (500.0f * (fx - fy) );
-                            lab->b[i][j] = (200.0f * (fy - fz) );
-
-                            //test for color accuracy
-                            /*
-                            float fy = (0.00862069 * lab->L[i][j])/327.68 + 0.137932; // (L+16)/116
-                            float fx = (0.002 * lab->a[i][j])/327.68 + fy;
-                            float fz = fy - (0.005 * lab->b[i][j])/327.68;
-
-                            float x_ = 65535*Lab2xyz(fx)*Color::D50x;
-                            float y_ = 65535*Lab2xyz(fy);
-                            float z_ = 65535*Lab2xyz(fz)*Color::D50z;
-
-                            int R,G,B;
-                            xyz2srgb(x_,y_,z_,R,G,B);
-                            r=(float)R; g=(float)G; b=(float)B;
-                            float xxx=1;
-                            */
-                        }
+                        Color::RGB2Lab(&rtemp[ti * TS], &gtemp[ti * TS], &btemp[ti * TS], &(lab->L[i][jstart]), &(lab->a[i][jstart]), &(lab->b[i][jstart]), toxyz, tW - jstart);
                     }
                 } else { // black & white
                     // Auto channel mixer needs whole image, so we now copy to tmpImage and close the tiled processing
@@ -4952,25 +4946,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
 #endif
 
         for (int i = 0; i < tH; i++) {
-            for (int j = 0; j < tW; j++) {
-                float r = tmpImage->r (i, j);
-                float g = tmpImage->g (i, j);
-                float b = tmpImage->b (i, j);
-
-                float x = toxyz[0][0] * r + toxyz[0][1] * g + toxyz[0][2] * b;
-                float y = toxyz[1][0] * r + toxyz[1][1] * g + toxyz[1][2] * b;
-                float z = toxyz[2][0] * r + toxyz[2][1] * g + toxyz[2][2] * b;
-
-                float fx, fy, fz;
-
-                fx = (x < MAXVALF ? Color::cachef[x] : 327.68f * std::cbrt (x / MAXVALF));
-                fy = (y < MAXVALF ? Color::cachef[y] : 327.68f * std::cbrt (y / MAXVALF));
-                fz = (z < MAXVALF ? Color::cachef[z] : 327.68f * std::cbrt (z / MAXVALF));
-
-                lab->L[i][j] = 116.0f *  fy - 5242.88f; //5242.88=16.0*327.68;
-                lab->a[i][j] = 500.0f * (fx - fy);
-                lab->b[i][j] = 200.0f * (fy - fz);
-            }
+            Color::RGB2Lab(tmpImage->r(i), tmpImage->g(i), tmpImage->b(i), lab->L[i], lab->a[i], lab->b[i], toxyz, tW);
         }