diff --git a/rtengine/ciecam02.cc b/rtengine/ciecam02.cc
index 47b24eba0..ec9d6722d 100644
--- a/rtengine/ciecam02.cc
+++ b/rtengine/ciecam02.cc
@@ -462,7 +462,6 @@ void Ciecam02::cat02_to_xyz( double &x, double &y, double &z, double r, double g
     }
 }
 
-#ifndef __SSE2__
 void Ciecam02::cat02_to_xyzfloat( float &x, float &y, float &z, float r, float g, float b, int gamu )
 {
     gamu = 1;
@@ -480,7 +479,7 @@ void Ciecam02::cat02_to_xyzfloat( float &x, float &y, float &z, float r, float g
         z = ( 0.000000f * r) - (0.000000f * g) + (1.000000f * b);
     }
 }
-#else
+#ifdef __SSE2__
 void Ciecam02::cat02_to_xyzfloat( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b )
 {
     //gamut correction M.H.Brill S.Susstrunk
@@ -497,14 +496,14 @@ void Ciecam02::hpe_to_xyz( double &x, double &y, double &z, double r, double g,
     z = b;
 }
 
-#ifndef __SSE2__
+
 void Ciecam02::hpe_to_xyzfloat( float &x, float &y, float &z, float r, float g, float b )
 {
     x = (1.910197f * r) - (1.112124f * g) + (0.201908f * b);
     y = (0.370950f * r) + (0.629054f * g) - (0.000008f * b);
     z = b;
 }
-#else
+#ifdef __SSE2__
 void Ciecam02::hpe_to_xyzfloat( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b )
 {
     x = (F2V(1.910197f) * r) - (F2V(1.112124f) * g) + (F2V(0.201908f) * b);
@@ -565,7 +564,6 @@ void Ciecam02::Aab_to_rgb( double &r, double &g, double &b, double A, double aa,
     b = (0.32787 * x) - (0.15681 * aa) - (4.49038 * bb);
 }
 
-#ifndef __SSE2__
 void Ciecam02::Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa, float bb, float nbb )
 {
     float x = (A / nbb) + 0.305f;
@@ -577,7 +575,7 @@ void Ciecam02::Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa,
     /*       c1              c6               c7       */
     b = (0.32787f * x) - (0.15681f * aa) - (4.49038f * bb);
 }
-#else
+#ifdef __SSE2__
 void Ciecam02::Aab_to_rgbfloat( vfloat &r, vfloat &g, vfloat &b, vfloat A, vfloat aa, vfloat bb, vfloat nbb )
 {
     vfloat c1 = F2V(0.32787f) * ((A / nbb) + F2V(0.305f));
@@ -619,7 +617,6 @@ void Ciecam02::calculate_ab( double &aa, double &bb, double h, double e, double
         bb = (aa * sinh) / cosh;
     }
 }
-#ifndef __SSE2__
 void Ciecam02::calculate_abfloat( float &aa, float &bb, float h, float e, float t, float nbb, float a )
 {
     float2 sincosval = xsincosf((h * M_PI) / 180.0f);
@@ -657,7 +654,7 @@ void Ciecam02::calculate_abfloat( float &aa, float &bb, float h, float e, float
         std::swap(aa, bb);
     }
 }
-#else
+#ifdef __SSE2__
 void Ciecam02::calculate_abfloat( vfloat &aa, vfloat &bb, vfloat h, vfloat e, vfloat t, vfloat nbb, vfloat a )
 {
     vfloat2 sincosval = xsincosf((h * F2V(M_PI)) / F2V(180.0f));
@@ -862,7 +859,7 @@ void Ciecam02::xyz2jchqms_ciecam02( double &J, double &C, double &h, double &Q,
 
 void Ciecam02::xyz2jchqms_ciecam02float( float &J, float &C, float &h, float &Q, float &M, float &s, float &aw, float &fl, float &wh,
         float x, float y, float z, float xw, float yw, float zw,
-        float yb, float la, float f, float c, float nc, float pilotd, int gamu, float pow1, float nbb, float ncb, float pfl, float cz, float d)
+        float c, float nc, int gamu, float pow1, float nbb, float ncb, float pfl, float cz, float d)
 
 {
     float r, g, b;
@@ -876,9 +873,9 @@ void Ciecam02::xyz2jchqms_ciecam02float( float &J, float &C, float &h, float &Q,
     gamu = 1;
     xyz_to_cat02float( r, g, b, x, y, z, gamu );
     xyz_to_cat02float( rw, gw, bw, xw, yw, zw, gamu );
-    rc = r * (((yw * d) / rw) + (1.0 - d));
-    gc = g * (((yw * d) / gw) + (1.0 - d));
-    bc = b * (((yw * d) / bw) + (1.0 - d));
+    rc = r * (((yw * d) / rw) + (1.f - d));
+    gc = g * (((yw * d) / gw) + (1.f - d));
+    bc = b * (((yw * d) / bw) + (1.f - d));
 
     cat02_to_hpefloat( rp, gp, bp, rc, gc, bc, gamu );
 
@@ -924,7 +921,7 @@ void Ciecam02::xyz2jchqms_ciecam02float( float &J, float &C, float &h, float &Q,
 #ifdef __SSE2__
 void Ciecam02::xyz2jchqms_ciecam02float( vfloat &J, vfloat &C, vfloat &h, vfloat &Q, vfloat &M, vfloat &s, vfloat aw, vfloat fl, vfloat wh,
         vfloat x, vfloat y, vfloat z, vfloat xw, vfloat yw, vfloat zw,
-        vfloat yb, vfloat la, vfloat f, vfloat c, vfloat nc, vfloat pow1, vfloat nbb, vfloat ncb, vfloat pfl, vfloat cz, vfloat d)
+        vfloat c, vfloat nc, vfloat pow1, vfloat nbb, vfloat ncb, vfloat pfl, vfloat cz, vfloat d)
 
 {
     vfloat r, g, b;
@@ -979,6 +976,65 @@ void Ciecam02::xyz2jchqms_ciecam02float( vfloat &J, vfloat &C, vfloat &h, vfloat
 }
 #endif
 
+void Ciecam02::xyz2jch_ciecam02float( float &J, float &C, float &h, float aw, float fl,
+                                      float x, float y, float z, float xw, float yw, float zw,
+                                      float c, float nc, float pow1, float nbb, float ncb, float cz, float d)
+
+{
+    float r, g, b;
+    float rw, gw, bw;
+    float rc, gc, bc;
+    float rp, gp, bp;
+    float rpa, gpa, bpa;
+    float a, ca, cb;
+    float e, t;
+    float myh;
+    int gamu = 1;
+    xyz_to_cat02float( r, g, b, x, y, z, gamu );
+    xyz_to_cat02float( rw, gw, bw, xw, yw, zw, gamu );
+    rc = r * (((yw * d) / rw) + (1.f - d));
+    gc = g * (((yw * d) / gw) + (1.f - d));
+    bc = b * (((yw * d) / bw) + (1.f - d));
+
+    cat02_to_hpefloat( rp, gp, bp, rc, gc, bc, gamu );
+
+    if (gamu == 1) { //gamut correction M.H.Brill S.Susstrunk
+        rp = MAXR(rp, 0.0f);
+        gp = MAXR(gp, 0.0f);
+        bp = MAXR(bp, 0.0f);
+    }
+
+    rpa = nonlinear_adaptationfloat( rp, fl );
+    gpa = nonlinear_adaptationfloat( gp, fl );
+    bpa = nonlinear_adaptationfloat( bp, fl );
+
+    ca = rpa - ((12.0f * gpa) - bpa) / 11.0f;
+    cb = (0.11111111f) * (rpa + gpa - (2.0f * bpa));
+
+    myh = xatan2f( cb, ca );
+
+    if ( myh < 0.0f ) {
+        myh += (2.f * M_PI);
+    }
+
+    a = ((2.0f * rpa) + gpa + (0.05f * bpa) - 0.305f) * nbb;
+
+    if (gamu == 1) {
+        a = MAXR(a, 0.0f); //gamut correction M.H.Brill S.Susstrunk
+    }
+
+    J = pow_F( a / aw, c * cz * 0.5f);
+
+    e = ((961.53846f) * nc * ncb) * (xcosf( myh + 2.0f ) + 3.8f);
+    t = (e * sqrtf( (ca * ca) + (cb * cb) )) / (rpa + gpa + (1.05f * bpa));
+
+    C = pow_F( t, 0.9f ) * J * pow1;
+
+    J *= J * 100.0f;
+    h = (myh * 180.f) / (float)M_PI;
+}
+
+
 void Ciecam02::jch2xyz_ciecam02( double &x, double &y, double &z, double J, double C, double h,
                                  double xw, double yw, double zw, double yb, double la,
                                  double f, double c, double nc , int gamu, double n, double nbb, double ncb, double fl, double cz, double d, double aw )
@@ -1012,9 +1068,9 @@ void Ciecam02::jch2xyz_ciecam02( double &x, double &y, double &z, double J, doub
 
     cat02_to_xyz( x, y, z, r, g, b, gamu );
 }
-#ifndef __SSE2__
+
 void Ciecam02::jch2xyz_ciecam02float( float &x, float &y, float &z, float J, float C, float h,
-                                      float xw, float yw, float zw, float yb, float la,
+                                      float xw, float yw, float zw,
                                       float f, float c, float nc , int gamu, float pow1, float nbb, float ncb, float fl, float cz, float d, float aw)
 {
     float r, g, b;
@@ -1047,9 +1103,9 @@ void Ciecam02::jch2xyz_ciecam02float( float &x, float &y, float &z, float J, flo
     cat02_to_xyzfloat( x, y, z, r, g, b, gamu );
 }
 
-#else
+#ifdef __SSE2__
 void Ciecam02::jch2xyz_ciecam02float( vfloat &x, vfloat &y, vfloat &z, vfloat J, vfloat C, vfloat h,
-                                      vfloat xw, vfloat yw, vfloat zw, vfloat yb, vfloat la,
+                                      vfloat xw, vfloat yw, vfloat zw,
                                       vfloat f, vfloat nc, vfloat pow1, vfloat nbb, vfloat ncb, vfloat fl, vfloat d, vfloat aw, vfloat reccmcz)
 {
     vfloat r, g, b;
@@ -1135,7 +1191,6 @@ double Ciecam02::inverse_nonlinear_adaptation( double c, double fl )
     return c1 * (100.0 / fl) * pow( (27.13 * fabs( c - 0.1 )) / (400.0 - fabs( c - 0.1 )), 1.0 / 0.42 );
 }
 
-#ifndef __SSE2__
 float Ciecam02::inverse_nonlinear_adaptationfloat( float c, float fl )
 {
     c -= 0.1f;
@@ -1153,7 +1208,7 @@ float Ciecam02::inverse_nonlinear_adaptationfloat( float c, float fl )
     return (100.0f / fl) * pow_F( (27.13f * fabsf( c )) / (400.0f - fabsf( c )), 2.38095238f );
 }
 
-#else
+#ifdef __SSE2__
 vfloat Ciecam02::inverse_nonlinear_adaptationfloat( vfloat c, vfloat fl )
 {
     c -= F2V(0.1f);
diff --git a/rtengine/ciecam02.h b/rtengine/ciecam02.h
index b598f4678..e5b61d466 100644
--- a/rtengine/ciecam02.h
+++ b/rtengine/ciecam02.h
@@ -55,13 +55,13 @@ private:
     static float nonlinear_adaptationfloat( float c, float fl );
     static double inverse_nonlinear_adaptation( double c, double fl );
 
-#ifndef __SSE2__
+
     static float inverse_nonlinear_adaptationfloat( float c, float fl );
     static void calculate_abfloat( float &aa, float &bb, float h, float e, float t, float nbb, float a );
     static void Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa, float bb, float nbb );
     static void hpe_to_xyzfloat   ( float &x,  float &y,  float &z,  float r, float g, float b );
     static void cat02_to_xyzfloat ( float &x,  float &y,  float &z,  float r, float g, float b, int gamu );
-#else
+#ifdef __SSE2__
     static vfloat inverse_nonlinear_adaptationfloat( vfloat c, vfloat fl );
     static void calculate_abfloat( vfloat &aa, vfloat &bb, vfloat h, vfloat e, vfloat t, vfloat nbb, vfloat a );
     static void Aab_to_rgbfloat( vfloat &r, vfloat &g, vfloat &b, vfloat A, vfloat aa, vfloat bb, vfloat nbb );
@@ -85,17 +85,15 @@ public:
                                   double yb, double la,
                                   double f, double c, double nc, int gamu, double n, double nbb, double ncb, double fl, double cz, double d, double aw);
 
-#ifndef __SSE2__
+
     static void jch2xyz_ciecam02float( float &x, float &y, float &z,
                                        float J, float C, float h,
                                        float xw, float yw, float zw,
-                                       float yb, float la,
                                        float f, float c, float nc, int gamu, float n, float nbb, float ncb, float fl, float cz, float d, float aw );
-#else
+#ifdef __SSE2__
     static void jch2xyz_ciecam02float( vfloat &x, vfloat &y, vfloat &z,
                                        vfloat J, vfloat C, vfloat h,
                                        vfloat xw, vfloat yw, vfloat zw,
-                                       vfloat yb, vfloat la,
                                        vfloat f, vfloat nc, vfloat n, vfloat nbb, vfloat ncb, vfloat fl, vfloat d, vfloat aw, vfloat reccmcz );
 #endif
     /**
@@ -120,20 +118,24 @@ public:
                                      double yb, double la,
                                      double f, double c, double nc,  double pilotd, int gamu , double n, double nbb, double ncb, double pfl, double cz, double d );
 
+    static void xyz2jch_ciecam02float( float &J, float &C, float &h,
+                                       float aw, float fl,
+                                       float x, float y, float z,
+                                       float xw, float yw, float zw,
+                                       float c, float nc, float n, float nbb, float ncb, float cz, float d  );
+
     static void xyz2jchqms_ciecam02float( float &J, float &C, float &h,
                                           float &Q, float &M, float &s, float &aw, float &fl, float &wh,
                                           float x, float y, float z,
                                           float xw, float yw, float zw,
-                                          float yb, float la,
-                                          float f, float c, float nc,  float pilotd, int gamu, float n, float nbb, float ncb, float pfl, float cz, float d  );
+                                          float c, float nc, int gamu, float n, float nbb, float ncb, float pfl, float cz, float d  );
 
 #ifdef __SSE2__
     static void xyz2jchqms_ciecam02float( vfloat &J, vfloat &C, vfloat &h,
                                           vfloat &Q, vfloat &M, vfloat &s, vfloat aw, vfloat fl, vfloat wh,
                                           vfloat x, vfloat y, vfloat z,
                                           vfloat xw, vfloat yw, vfloat zw,
-                                          vfloat yb, vfloat la,
-                                          vfloat f, vfloat c, vfloat nc, vfloat n, vfloat nbb, vfloat ncb, vfloat pfl, vfloat cz, vfloat d  );
+                                          vfloat c, vfloat nc, vfloat n, vfloat nbb, vfloat ncb, vfloat pfl, vfloat cz, vfloat d  );
 
 
 #endif
diff --git a/rtengine/curves.cc b/rtengine/curves.cc
index 4e94be1b8..f4316f128 100644
--- a/rtengine/curves.cc
+++ b/rtengine/curves.cc
@@ -33,6 +33,7 @@
 #include "LUT.h"
 #include "curves.h"
 #include "opthelper.h"
+#include "ciecam02.h"
 #undef CLIPD
 #define CLIPD(a) ((a)>0.0f?((a)<1.0f?(a):1.0f):0.0f)
 
@@ -2083,14 +2084,14 @@ float PerceptualToneCurve::calculateToneCurveContrastValue(void) const
 void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurveState & state) const
 {
     float x, y, z;
-    cmsCIEXYZ XYZ;
-    cmsJCh JCh;
-
-    int thread_idx = 0;
-#ifdef _OPENMP
-    thread_idx = omp_get_thread_num();
-#endif
+//  cmsCIEXYZ XYZ;
+//  cmsJCh JCh;
 
+    /*  int thread_idx = 0;
+    #ifdef _OPENMP
+        thread_idx = omp_get_thread_num();
+    #endif
+    */
     if (!state.isProphoto) {
         // convert to prophoto space to make sure the same result is had regardless of working color space
         float newr = state.Working2Prophoto[0][0] * r + state.Working2Prophoto[0][1] * g + state.Working2Prophoto[0][2] * b;
@@ -2134,12 +2135,21 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
 
     // move to JCh so we can modulate chroma based on the global contrast-related chroma scaling factor
     Color::Prophotoxyz(r, g, b, x, y, z);
-    XYZ.X = x * 100.0f / 65535;
-    XYZ.Y = y * 100.0f / 65535;
-    XYZ.Z = z * 100.0f / 65535;
-    cmsCIECAM02Forward(h02[thread_idx], &XYZ, &JCh);
 
-    if (!isfinite(JCh.J) || !isfinite(JCh.C) || !isfinite(JCh.h)) {
+//  XYZ = (cmsCIEXYZ){ .X = x * 0.0015259022f, .Y = y * 0.0015259022f, .Z = z * 0.0015259022f };
+    float J, C, h;
+    Ciecam02::xyz2jch_ciecam02float( J, C, h,
+                                     aw, fl,
+                                     x * 0.0015259022f,  y * 0.0015259022f,  z * 0.0015259022f,
+                                     xw, yw,  zw,
+                                     c,  nc, n, nbb, ncb, cz, d);
+
+
+//  cmsCIECAM02Forward(h02[thread_idx], &XYZ, &JCh);
+//  XYZ.X = x * 0.0015259022f;
+//  XYZ.Y = y * 0.0015259022f;
+//  XYZ.Z = z * 0.0015259022f;
+    if (!isfinite(J) || !isfinite(C) || !isfinite(h)) {
         // this can happen for dark noise colors or colors outside human gamut. Then we just return the curve's result.
         if (!state.isProphoto) {
             float newr = state.Prophoto2Working[0][0] * r + state.Prophoto2Working[0][1] * g + state.Prophoto2Working[0][2] * b;
@@ -2159,24 +2169,26 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
 
     {
         // decrease chroma scaling sligthly of extremely saturated colors
-        float saturated_scale_factor = 0.95;
-        const float lolim = 35; // lower limit, below this chroma all colors will keep original chroma scaling factor
-        const float hilim = 60; // high limit, above this chroma the chroma scaling factor is multiplied with the saturated scale factor value above
+        float saturated_scale_factor = 0.95f;
+        const float lolim = 35.f; // lower limit, below this chroma all colors will keep original chroma scaling factor
+        const float hilim = 60.f; // high limit, above this chroma the chroma scaling factor is multiplied with the saturated scale factor value above
 
-        if (JCh.C < lolim) {
+        if (C < lolim) {
             // chroma is low enough, don't scale
-            saturated_scale_factor = 1.0;
-        } else if (JCh.C < hilim) {
+            saturated_scale_factor = 1.f;
+        } else if (C < hilim) {
             // S-curve transition between low and high limit
-            float x = (JCh.C - lolim) / (hilim - lolim); // x = [0..1], 0 at lolim, 1 at hilim
+            float x = (C - lolim) / (hilim - lolim); // x = [0..1], 0 at lolim, 1 at hilim
 
-            if (x < 0.5) {
-                x = 0.5 * powf(2 * x, 2);
+            if (x < 0.5f) {
+                x = 2.f * SQR(x);
+//              x = 0.5f * powf(2*x, 2);
             } else {
-                x = 0.5 + 0.5 * (1 - powf(1 - 2 * (x - 0.5), 2));
+                x = 1.f - 2.f * SQR(1 - x);
+//              x = 1.f - 0.5f * powf(2-2*x, 2);
             }
 
-            saturated_scale_factor = 1.0 * (1.0 - x) + saturated_scale_factor * x;
+            saturated_scale_factor = (1.f - x) + saturated_scale_factor * x;
         } else {
             // do nothing, high saturation color, keep scale factor
         }
@@ -2186,11 +2198,11 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
 
     {
         // increase chroma scaling slightly of shadows
-        float nL = CurveFactory::gamma2(newLuminance / 65535); // apply gamma so we make comparison and transition with a more perceptual lightness scale
-        float dark_scale_factor = 1.20;
+        float nL = gamma2curve[newLuminance]; // apply gamma so we make comparison and transition with a more perceptual lightness scale
+        float dark_scale_factor = 1.20f;
         //float dark_scale_factor = 1.0 + state.debug.p2 / 100.0f;
-        const float lolim = 0.15;
-        const float hilim = 0.50;
+        const float lolim = 0.15f;
+        const float hilim = 0.50f;
 
         if (nL < lolim) {
             // do nothing, keep scale factor
@@ -2198,15 +2210,17 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
             // S-curve transition
             float x = (nL - lolim) / (hilim - lolim); // x = [0..1], 0 at lolim, 1 at hilim
 
-            if (x < 0.5) {
-                x = 0.5 * powf(2 * x, 2);
+            if (x < 0.5f) {
+                x = 2.f * SQR(x);
+//              x = 0.5f * powf(2*x, 2);
             } else {
-                x = 0.5 + 0.5 * (1 - powf(1 - 2 * (x - 0.5), 2));
+                x = 1.f - 2.f * SQR(1 - x);
+//              x = 1.f - 0.5f * (powf(2-2*x, 2));
             }
 
-            dark_scale_factor = dark_scale_factor * (1.0 - x) + 1.0 * x;
+            dark_scale_factor = dark_scale_factor * (1.0f - x) + x;
         } else {
-            dark_scale_factor = 1.0;
+            dark_scale_factor = 1.f;
         }
 
         cmul *= dark_scale_factor;
@@ -2214,34 +2228,41 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
 
     {
         // to avoid strange CIECAM02 chroma errors on close-to-shadow-clipping colors we reduce chroma scaling towards 1.0 for black colors
-        float dark_scale_factor = 1.0 / cmul;
-        const float lolim = 4;
-        const float hilim = 7;
+        float dark_scale_factor = 1.f / cmul;
+        const float lolim = 4.f;
+        const float hilim = 7.f;
 
-        if (JCh.J < lolim) {
+        if (J < lolim) {
             // do nothing, keep scale factor
-        } else if (JCh.J < hilim) {
+        } else if (J < hilim) {
             // S-curve transition
-            float x = (JCh.J - lolim) / (hilim - lolim);
+            float x = (J - lolim) / (hilim - lolim);
 
-            if (x < 0.5) {
-                x = 0.5 * powf(2 * x, 2);
+            if (x < 0.5f) {
+                x = 2.f * SQR(x);
+//              x = 0.5f * powf(2*x, 2);
             } else {
-                x = 0.5 + 0.5 * (1 - powf(1 - 2 * (x - 0.5), 2));
+                x = 1.f - 2.f * SQR(1 - x);
+//              x = 1.f - 0.5f * (powf(2-2*x, 2));
             }
 
-            dark_scale_factor = dark_scale_factor * (1.0 - x) + 1.0 * x;
+            dark_scale_factor = dark_scale_factor * (1.f - x) + x;
         } else {
-            dark_scale_factor = 1.0;
+            dark_scale_factor = 1.f;
         }
 
         cmul *= dark_scale_factor;
     }
 
-    JCh.C *= cmul;
-    cmsCIECAM02Reverse(h02[thread_idx], &JCh, &XYZ);
+    C *= cmul;
+//  cmsCIECAM02Reverse(h02[thread_idx], &JCh, &XYZ);
 
-    if (!isfinite(XYZ.X) || !isfinite(XYZ.Y) || !isfinite(XYZ.Z)) {
+    Ciecam02::jch2xyz_ciecam02float( x, y, z,
+                                     J, C, h,
+                                     xw, yw,  zw,
+                                     f,  c, nc, 1, n, nbb, ncb, fl, cz, d, aw );
+
+    if (!isfinite(x) || !isfinite(y) || !isfinite(z)) {
         // can happen for colors on the rim of being outside gamut, that worked without chroma scaling but not with. Then we return only the curve's result.
         if (!state.isProphoto) {
             float newr = state.Prophoto2Working[0][0] * r + state.Prophoto2Working[0][1] * g + state.Prophoto2Working[0][2] * b;
@@ -2255,10 +2276,10 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
         return;
     }
 
-    Color::xyz2Prophoto(XYZ.X, XYZ.Y, XYZ.Z, r, g, b);
-    r *= 655.35;
-    g *= 655.35;
-    b *= 655.35;
+    Color::xyz2Prophoto(x, y, z, r, g, b);
+    r *= 655.35f;
+    g *= 655.35f;
+    b *= 655.35f;
     r = LIM<float>(r, 0.f, 65535.f);
     g = LIM<float>(g, 0.f, 65535.f);
     b = LIM<float>(b, 0.f, 65535.f);
@@ -2273,34 +2294,36 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
         Color::rgb2hsv(ar, ag, ab, ah, as, av);
         Color::rgb2hsv(r, g, b, h, s, v);
 
-        float sat_scale = as <= 0.0 ? 1.0 : s / as; // saturation scale compared to Adobe curve
-        float keep = 0.2;
-        const float lolim = 1.00; // only mix in the Adobe curve if we have increased saturation compared to it
-        const float hilim = 1.20;
+        float sat_scale = as <= 0.f ? 1.f : s / as; // saturation scale compared to Adobe curve
+        float keep = 0.2f;
+        const float lolim = 1.00f; // only mix in the Adobe curve if we have increased saturation compared to it
+        const float hilim = 1.20f;
 
         if (sat_scale < lolim) {
             // saturation is low enough, don't desaturate
-            keep = 1.0;
+            keep = 1.f;
         } else if (sat_scale < hilim) {
             // S-curve transition
             float x = (sat_scale - lolim) / (hilim - lolim); // x = [0..1], 0 at lolim, 1 at hilim
 
-            if (x < 0.5) {
-                x = 0.5 * powf(2 * x, 2);
+            if (x < 0.5f) {
+                x = 2.f * SQR(x);
+//              x = 0.5f * powf(2*x, 2);
             } else {
-                x = 0.5 + 0.5 * (1 - powf(1 - 2 * (x - 0.5), 2));
+                x = 1.f - 2.f * SQR(1 - x);
+//              x = 1.f - 0.5f * (powf(2-2*x, 2));
             }
 
-            keep = 1.0 * (1.0 - x) + keep * x;
+            keep = (1.f - x) + keep * x;
         } else {
             // do nothing, very high increase, keep minimum amount
         }
 
-        if (keep < 1.0) {
+        if (keep < 1.f) {
             // mix in some of the Adobe curve result
-            r = r * keep + (1.0 - keep) * ar;
-            g = g * keep + (1.0 - keep) * ag;
-            b = b * keep + (1.0 - keep) * ab;
+            r = r * keep + (1.f - keep) * ar;
+            g = g * keep + (1.f - keep) * ag;
+            b = b * keep + (1.f - keep) * ab;
         }
     }
 
@@ -2314,42 +2337,58 @@ void PerceptualToneCurve::Apply(float &r, float &g, float &b, PerceptualToneCurv
     }
 }
 
-cmsContext * PerceptualToneCurve::c02;
-cmsHANDLE * PerceptualToneCurve::h02;
+//cmsContext * PerceptualToneCurve::c02;
+//cmsHANDLE * PerceptualToneCurve::h02;
 float PerceptualToneCurve::cf_range[2];
 float PerceptualToneCurve::cf[1000];
+LUTf PerceptualToneCurve::gamma2curve;
+float PerceptualToneCurve::f, PerceptualToneCurve::c, PerceptualToneCurve::nc, PerceptualToneCurve::yb, PerceptualToneCurve::la, PerceptualToneCurve::xw, PerceptualToneCurve::yw, PerceptualToneCurve::zw, PerceptualToneCurve::gamut;
+float PerceptualToneCurve::n, PerceptualToneCurve::d, PerceptualToneCurve::nbb, PerceptualToneCurve::ncb, PerceptualToneCurve::cz, PerceptualToneCurve::aw, PerceptualToneCurve::wh, PerceptualToneCurve::pfl, PerceptualToneCurve::fl, PerceptualToneCurve::pow1;
 
 void PerceptualToneCurve::init()
 {
 
-    {
-        // init ciecam02 state, used for chroma scalings
-        cmsViewingConditions vc;
-        vc.whitePoint = *cmsD50_XYZ();
-        vc.whitePoint.X *= 100;
-        vc.whitePoint.Y *= 100;
-        vc.whitePoint.Z *= 100;
-        vc.Yb = 20;
-        vc.La = 20;
-        vc.surround = AVG_SURROUND;
-        vc.D_value = 1.0;
+    /*  { // init ciecam02 state, used for chroma scalings
 
-        int thread_count = 1;
-#ifdef _OPENMP
-        thread_count = omp_get_max_threads();
-#endif
-        h02 = (cmsHANDLE *)malloc(sizeof(h02[0]) * (thread_count + 1));
-        c02 = (cmsContext *)malloc(sizeof(c02[0]) * (thread_count + 1));
-        h02[thread_count] = NULL;
-        c02[thread_count] = NULL;
-
-        // little cms requires one state per thread, for thread safety
-        for (int i = 0; i < thread_count; i++) {
-            c02[i] = cmsCreateContext(NULL, NULL);
-            h02[i] = cmsCIECAM02Init(c02[i], &vc);
+            cmsViewingConditions vc;
+            vc.whitePoint = *cmsD50_XYZ();
+            vc.whitePoint.X *= 100;
+            vc.whitePoint.Y *= 100;
+            vc.whitePoint.Z *= 100;
+    */
+    xw = 96.42f;
+    yw = 100.0f;
+    zw = 82.49f;
+    /*
+            vc.Yb = 20;
+            vc.La = 20;
+            vc.surround = AVG_SURROUND;
+    */
+    yb = 20;
+    la = 20;
+    f  = 1.00f;
+    c  = 0.69f;
+    nc = 1.00f;
+//      vc.D_value = 1.0;
+    Ciecam02::initcam1float(gamut, yb, 1.f, f, la, xw, yw, zw, n, d, nbb, ncb,
+                            cz, aw, wh, pfl, fl, c);
+    pow1 = pow_F( 1.64f - pow_F( 0.29f, n ), 0.73f );
+    /*
+            int thread_count = 1;
+    #ifdef _OPENMP
+            thread_count = omp_get_max_threads();
+    #endif
+            h02 = (cmsHANDLE *)malloc(sizeof(h02[0]) * (thread_count + 1));
+            c02 = (cmsContext *)malloc(sizeof(c02[0]) * (thread_count + 1));
+            h02[thread_count] = NULL;
+            c02[thread_count] = NULL;
+            // little cms requires one state per thread, for thread safety
+            for (int i = 0; i < thread_count; i++) {
+                c02[i] = cmsCreateContext(NULL, NULL);
+                h02[i] = cmsCIECAM02Init(c02[i], &vc);
+            }
         }
-    }
-
+    */
     {
         // init contrast-value-to-chroma-scaling conversion curve
 
@@ -2392,17 +2431,12 @@ void PerceptualToneCurve::init()
         cf_range[0] = in_x[0];
         cf_range[1] = in_x[in_len - 1];
     }
-}
+    gamma2curve(65536, 0);
 
-void PerceptualToneCurve::cleanup()
-{
-    for (int i = 0; h02[i] != NULL; i++) {
-        cmsCIECAM02Done(h02[i]);
-        cmsDeleteContext(c02[i]);
+    for (int i = 0; i < 65536; i++) {
+        gamma2curve[i] = CurveFactory::gamma2(i / 65535.0);
     }
 
-    free(h02);
-    free(c02);
 }
 
 void PerceptualToneCurve::initApplyState(PerceptualToneCurveState & state, Glib::ustring workingSpace) const
diff --git a/rtengine/curves.h b/rtengine/curves.h
index f43542dba..b130b7a7d 100644
--- a/rtengine/curves.h
+++ b/rtengine/curves.h
@@ -800,10 +800,10 @@ public:
 class PerceptualToneCurveState
 {
 public:
-    bool isProphoto;
     float Working2Prophoto[3][3];
     float Prophoto2Working[3][3];
     float cmul_contrast;
+    bool isProphoto;
 };
 
 // Tone curve whose purpose is to keep the color appearance constant, that is the curve changes contrast
@@ -813,10 +813,17 @@ public:
 class PerceptualToneCurve : public ToneCurve
 {
 private:
-    static cmsHANDLE *h02;
-    static cmsContext *c02;
+//    static cmsHANDLE *h02;
+//    static cmsContext *c02;
     static float cf_range[2];
     static float cf[1000];
+    static LUTf gamma2curve;
+    // for ciecam02
+//    float aw, fl, xw, yw, zw, c, nc, pow1, n, nbb, ncb, cz, d;
+//    float yb2, la2, f2,  c2, nc2, pow1n, nbbj, ncbj, flj, czj, dj, awj;
+
+    static float f, c, nc, yb, la, xw, yw, zw, gamut;
+    static float n, d, nbb, ncb, cz, aw, wh, pfl, fl, pow1;
     static void cubic_spline(const float x[], const float y[], const int len, const float out_x[], float out_y[], const int out_len);
     static float find_minimum_interval_halving(float (*func)(float x, void *arg), void *arg, float a, float b, float tol, int nmax);
     static float find_tc_slope_fun(float k, void *arg);
@@ -824,7 +831,6 @@ private:
     float calculateToneCurveContrastValue() const;
 public:
     static void init();
-    static void cleanup();
     void initApplyState(PerceptualToneCurveState & state, Glib::ustring workingSpace) const;
     void Apply(float& r, float& g, float& b, PerceptualToneCurveState & state) const;
 };
diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc
index f1f0bf31f..37304f48b 100644
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@@ -40,7 +40,7 @@
 #include "improccoordinator.h"
 #include "clutstore.h"
 #include "ciecam02.h"
-
+#include "StopWatch.h"
 #ifdef _OPENMP
 #include <omp.h>
 #endif
@@ -2049,8 +2049,7 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int begh, int
                                                         Q,  M,  s, F2V(aw), F2V(fl), F2V(wh),
                                                         x,  y,  z,
                                                         F2V(xw1), F2V(yw1),  F2V(zw1),
-                                                        F2V(yb),  F2V(la),
-                                                        F2V(f), F2V(c),  F2V(nc), F2V(pow1), F2V(nbb), F2V(ncb), F2V(pfl), F2V(cz), F2V(d));
+                                                        F2V(c),  F2V(nc), F2V(pow1), F2V(nbb), F2V(ncb), F2V(pfl), F2V(cz), F2V(d));
                     STVF(Jbuffer[k], J);
                     STVF(Cbuffer[k], C);
                     STVF(hbuffer[k], h);
@@ -2074,8 +2073,7 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int begh, int
                                                         Q,  M,  s, aw, fl, wh,
                                                         x,  y,  z,
                                                         xw1, yw1,  zw1,
-                                                        yb,  la,
-                                                        f, c,  nc,  pilot, gamu, pow1, nbb, ncb, pfl, cz, d);
+                                                        c,  nc, gamu, pow1, nbb, ncb, pfl, cz, d);
                     Jbuffer[k] = J;
                     Cbuffer[k] = C;
                     hbuffer[k] = h;
@@ -2113,8 +2111,7 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int begh, int
                                                         Q,  M,  s, aw, fl, wh,
                                                         x,  y,  z,
                                                         xw1, yw1,  zw1,
-                                                        yb,  la,
-                                                        f, c,  nc,  pilot, gamu, pow1, nbb, ncb, pfl, cz, d);
+                                                        c,  nc, gamu, pow1, nbb, ncb, pfl, cz, d);
 #endif
                     float Jpro, Cpro, hpro, Qpro, Mpro, spro;
                     Jpro = J;
@@ -2545,7 +2542,6 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int begh, int
                             Ciecam02::jch2xyz_ciecam02float( xx, yy, zz,
                                                              J,  C, h,
                                                              xw2, yw2,  zw2,
-                                                             yb2, la2,
                                                              f2,  c2, nc2, gamu, pow1n, nbbj, ncbj, flj, czj, dj, awj);
                             float x, y, z;
                             x = (float)xx * 655.35f;
@@ -2607,7 +2603,6 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int begh, int
                     Ciecam02::jch2xyz_ciecam02float( x, y, z,
                                                      LVF(Jbuffer[k]), LVF(Cbuffer[k]), LVF(hbuffer[k]),
                                                      F2V(xw2), F2V(yw2), F2V(zw2),
-                                                     F2V(yb2), F2V(la2),
                                                      F2V(f2),  F2V(nc2), F2V(pow1n), F2V(nbbj), F2V(ncbj), F2V(flj), F2V(dj), F2V(awj), F2V(reccmcz));
                     STVF(xbuffer[k], x * c655d35);
                     STVF(ybuffer[k], y * c655d35);
@@ -2936,7 +2931,6 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int begh, int
                         Ciecam02::jch2xyz_ciecam02float( xx, yy, zz,
                                                          ncie->J_p[i][j],  ncie_C_p, ncie->h_p[i][j],
                                                          xw2, yw2,  zw2,
-                                                         yb2, la2,
                                                          f2,  c2, nc2, gamu, pow1n, nbbj, ncbj, flj, czj, dj, awj);
                         x = (float)xx * 655.35f;
                         y = (float)yy * 655.35f;
@@ -2992,7 +2986,6 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int begh, int
                         Ciecam02::jch2xyz_ciecam02float( x, y, z,
                                                          LVF(Jbuffer[k]), LVF(Cbuffer[k]), LVF(hbuffer[k]),
                                                          F2V(xw2), F2V(yw2), F2V(zw2),
-                                                         F2V(yb2), F2V(la2),
                                                          F2V(f2), F2V(nc2), F2V(pow1n), F2V(nbbj), F2V(ncbj), F2V(flj), F2V(dj), F2V(awj), F2V(reccmcz));
                         x *= c655d35;
                         y *= c655d35;
@@ -3178,7 +3171,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, EditBuffer *e
                                SHMap* shmap, int sat, LUTf & rCurve, LUTf & gCurve, LUTf & bCurve, float satLimit , float satLimitOpacity, const ColorGradientCurve & ctColorCurve, const OpacityCurve & ctOpacityCurve, bool opautili, LUTf & clToningcurve, LUTf & cl2Toningcurve,
                                const ToneCurve & customToneCurve1, const ToneCurve & customToneCurve2,  const ToneCurve & customToneCurvebw1, const ToneCurve & customToneCurvebw2, double &rrm, double &ggm, double &bbm, float &autor, float &autog, float &autob, double expcomp, int hlcompr, int hlcomprthresh, DCPProfile *dcpProf)
 {
-
+    StopWatch Stop1("rgbProc");
     LUTf fGammaLUTf;
     Imagefloat *tmpImage = NULL;
 
diff --git a/rtengine/init.cc b/rtengine/init.cc
index 1fdcb6438..acedd844f 100644
--- a/rtengine/init.cc
+++ b/rtengine/init.cc
@@ -66,7 +66,6 @@ void cleanup ()
 
     ProcParams::cleanup ();
     Color::cleanup ();
-    PerceptualToneCurve::cleanup ();
     ImProcFunctions::cleanupCache ();
     Thumbnail::cleanupGamma ();
     RawImageSource::cleanup ();