From c67b986744d21b803de17dba71f1d82d6bc38a7e Mon Sep 17 00:00:00 2001 From: heckflosse Date: Wed, 21 Sep 2016 00:22:42 +0200 Subject: [PATCH 1/5] add faster implementation to clip float to [0;65535] and round --- rtengine/rt_math.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/rtengine/rt_math.h b/rtengine/rt_math.h index 0836c8be7..b5c93d127 100644 --- a/rtengine/rt_math.h +++ b/rtengine/rt_math.h @@ -80,7 +80,7 @@ inline _Tp intp(_Tp a, _Tp b, _Tp c) // following is valid: // intp(a, b+x, c+x) = intp(a, b, c) + x // intp(a, b*x, c*x) = intp(a, b, c) * x - return a * (b-c) + c; + return a * (b - c) + c; } template @@ -101,5 +101,17 @@ inline T norminf(const T& x, const T& y) return std::max(std::abs(x), std::abs(y)); } -} +inline int float2uint16range(float d) // clips input to [0;65535] and rounds +{ + d = CLIP(d); // clip to [0;65535] +#ifdef __SSE2__ // this only works in IEEE 754 maths. For simplicity I restricted it to SSE2. We can enhance it later, but we have to take care of endianness then. + d += 12582912.f; + return reinterpret_cast(d); +#else // fall back to slow std::round() + return std::round(d); +#endif +} + +} + #endif From f17011cec400b0730a8f9ead26069ddb1390bd82 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Wed, 21 Sep 2016 00:24:44 +0200 Subject: [PATCH 2/5] use faster implementation to clip float to [0;65535] and round in rtengine::lab2rgb16b and rtengine::lab2rgb16 --- rtengine/iplab2rgb.cc | 45 +++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index 489ca60bc..d37ba362d 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -26,6 +26,19 @@ #include "curves.h" #include "alignedbuffer.h" #include "color.h" +#define BENCHMARK +#include "StopWatch.h" + +namespace +{ + +int float2intx(float d) +{ + d += 12582912.f; + return reinterpret_cast(d); +} +} + namespace rtengine { @@ -241,8 +254,7 @@ Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch, // for default (not gamma) Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int ch, Glib::ustring profile, RenderingIntent intent, bool bw) { - - //gamutmap(lab); + BENCHFUN if (cx < 0) { cx = 0; @@ -279,7 +291,7 @@ Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int for (int j = cx; j < cx + cw; j++) { float fy = (0.0086206897f * rL[j]) / 327.68f + 0.1379310345f; // (L+16)/116 - float fx = (0.002 * ra[j]) / 327.68f + fy; + float fx = (0.002f * ra[j]) / 327.68f + fy; float fz = fy - (0.005f * rb[j]) / 327.68f; float LL = rL[j] / 327.68f; @@ -288,15 +300,14 @@ Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int float z_ = 65535.0f * (float) Color::f2xyz(fz) * Color::D50z; float y_ = (LL > Color::epskap) ? 65535.0f * fy * fy * fy : 65535.0f * LL / Color::kappa; - xa[j - cx] = CLIP((int) round(x_)); - ya[j - cx] = CLIP((int) round(y_)); - za[j - cx] = CLIP((int) round(z_)); + xa[j - cx] = float2uint16range(x_); + ya[j - cx] = float2uint16range(y_); + za[j - cx] = float2uint16range(z_); if(bw && y_ < 65535.f ) { //force Bw value and take highlight into account - xa[j - cx] = (int) round(y_ * Color::D50x ); - za[j - cx] = (int) round(y_ * Color::D50z); + xa[j - cx] = float2uint16range(y_ * Color::D50x); + za[j - cx] = float2uint16range(y_ * Color::D50z); } - } } @@ -345,7 +356,7 @@ Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int // for gamma options (BT709...sRGB linear...) Image16* ImProcFunctions::lab2rgb16b (LabImage* lab, int cx, int cy, int cw, int ch, Glib::ustring profile, RenderingIntent intent, Glib::ustring profi, Glib::ustring gam, bool freegamma, double gampos, double slpos, double &ga0, double &ga1, double &ga2, double &ga3, double &ga4, double &ga5, double &ga6, bool bw) { - +BENCHFUN //gamutmap(lab); if (cx < 0) { @@ -539,7 +550,7 @@ Image16* ImProcFunctions::lab2rgb16b (LabImage* lab, int cx, int cy, int cw, int // 7 parameters for smoother curves cmsWhitePointFromTemp(&xyD, t50); GammaTRC[0] = GammaTRC[1] = GammaTRC[2] = cmsBuildParametricToneCurve(NULL, 5, Parameters);//5 = more smoother than 4 - cmsHPROFILE oprofdef = cmsCreateRGBProfileTHR(NULL, &xyD, &Primaries, GammaTRC); //oprofdef become Outputprofile + cmsHPROFILE oprofdef = cmsCreateRGBProfileTHR(NULL, &xyD, &Primaries, GammaTRC); //oprofdef becomes Outputprofile cmsFreeToneCurve(GammaTRC[0]); @@ -567,13 +578,13 @@ Image16* ImProcFunctions::lab2rgb16b (LabImage* lab, int cx, int cy, int cw, int float z_ = 65535.0f * (float)Color::f2xyz(fz) * Color::D50z; float y_ = (LL > Color::epskap) ? (float) 65535.0 * fy * fy * fy : 65535.0f * LL / Color::kappa; - xa[j - cx] = CLIP((int) round(x_)) ; - ya[j - cx] = CLIP((int) round(y_)); - za[j - cx] = CLIP((int) round(z_)); + xa[j - cx] = float2uint16range(x_); + ya[j - cx] = float2uint16range(y_); + za[j - cx] = float2uint16range(z_); if(bw && y_ < 65535.f) { //force Bw value and take highlight into account - xa[j - cx] = (int) round(y_ * Color::D50x); - za[j - cx] = (int) round(y_ * Color::D50z); + xa[j - cx] = float2uint16range(y_ * Color::D50x); + za[j - cx] = float2uint16range(y_ * Color::D50z); } } @@ -581,7 +592,7 @@ Image16* ImProcFunctions::lab2rgb16b (LabImage* lab, int cx, int cy, int cw, int cmsHPROFILE iprof = iccStore->getXYZProfile (); lcmsMutex->lock (); - cmsHTRANSFORM hTransform = cmsCreateTransform (iprof, TYPE_RGB_16, oprofdef, TYPE_RGB_16, intent, cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE); + cmsHTRANSFORM hTransform = cmsCreateTransform (iprof, TYPE_RGB_16, oprofdef, TYPE_RGB_16, intent, cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE); lcmsMutex->unlock (); image->ExecCMSTransform(hTransform); From 29c4d936aa3c7f4801fadc1d15e45c15e7a7e5ba Mon Sep 17 00:00:00 2001 From: heckflosse Date: Wed, 21 Sep 2016 00:35:02 +0200 Subject: [PATCH 3/5] cleaned code --- rtengine/iplab2rgb.cc | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index d37ba362d..c5eeb1a2a 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -29,17 +29,6 @@ #define BENCHMARK #include "StopWatch.h" -namespace -{ - -int float2intx(float d) -{ - d += 12582912.f; - return reinterpret_cast(d); -} -} - - namespace rtengine { From ab2be87333407451ae090f6bc3f86b6910ea0056 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Wed, 21 Sep 2016 14:05:30 +0200 Subject: [PATCH 4/5] Simplified float2uint16range(..), removed StopWatches --- rtengine/iplab2rgb.cc | 26 +++++++++----------------- rtengine/rt_math.h | 7 +------ 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index c5eeb1a2a..576af00c3 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -26,8 +26,6 @@ #include "curves.h" #include "alignedbuffer.h" #include "color.h" -#define BENCHMARK -#include "StopWatch.h" namespace rtengine { @@ -79,9 +77,7 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) cmsDoTransform (monitorTransform, buffer, data + ix, W); } } - } // End of parallelization - } else { int W = lab->W; @@ -129,7 +125,6 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch, Glib::ustring profile, RenderingIntent intent, bool standard_gamma) { - //gamutmap(lab); if (cx < 0) { cx = 0; @@ -243,7 +238,6 @@ Image8* ImProcFunctions::lab2rgb (LabImage* lab, int cx, int cy, int cw, int ch, // for default (not gamma) Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int ch, Glib::ustring profile, RenderingIntent intent, bool bw) { - BENCHFUN if (cx < 0) { cx = 0; @@ -264,10 +258,10 @@ Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int Image16* image = new Image16 (cw, ch); cmsHPROFILE oprof = iccStore->getProfile (profile); - - if (oprof) { +#ifdef _OPENMP #pragma omp parallel for if (multiThread) +#endif for (int i = cy; i < cy + ch; i++) { float* rL = lab->L[i]; @@ -309,7 +303,9 @@ Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int cmsDeleteTransform(hTransform); } else { +#ifdef _OPENMP #pragma omp parallel for if (multiThread) +#endif for (int i = cy; i < cy + ch; i++) { float R, G, B; @@ -345,8 +341,6 @@ Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int // for gamma options (BT709...sRGB linear...) Image16* ImProcFunctions::lab2rgb16b (LabImage* lab, int cx, int cy, int cw, int ch, Glib::ustring profile, RenderingIntent intent, Glib::ustring profi, Glib::ustring gam, bool freegamma, double gampos, double slpos, double &ga0, double &ga1, double &ga2, double &ga3, double &ga4, double &ga5, double &ga6, bool bw) { -BENCHFUN - //gamutmap(lab); if (cx < 0) { cx = 0; @@ -506,13 +500,11 @@ BENCHFUN Color::calcGamma(pwr, ts, mode, imax, g_a0, g_a1, g_a2, g_a3, g_a4, g_a5); // call to calcGamma with selected gamma and slope : return parameters for LCMS2 ga4 = g_a3 * ts; - //printf("g_a0=%f g_a1=%f g_a2=%f g_a3=%f g_a4=%f\n", g_a0,g_a1,g_a2,g_a3,g_a4); ga0 = gampos; ga1 = 1. / (1.0 + g_a4); ga2 = g_a4 / (1.0 + g_a4); ga3 = 1. / slpos; ga5 = 0.0; - //printf("ga0=%f ga1=%f ga2=%f ga3=%f ga4=%f\n", ga0,ga1,ga2,ga3,ga4); } @@ -543,9 +535,10 @@ BENCHFUN cmsFreeToneCurve(GammaTRC[0]); - if (oprofdef) { +#ifdef _OPENMP #pragma omp parallel for if (multiThread) +#endif for (int i = cy; i < cy + ch; i++) { float* rL = lab->L[i]; @@ -575,7 +568,6 @@ BENCHFUN xa[j - cx] = float2uint16range(y_ * Color::D50x); za[j - cx] = float2uint16range(y_ * Color::D50z); } - } } @@ -587,8 +579,10 @@ BENCHFUN image->ExecCMSTransform(hTransform); cmsDeleteTransform(hTransform); } else { - // +#ifdef _OPENMP #pragma omp parallel for if (multiThread) +#endif + for (int i = cy; i < cy + ch; i++) { float R, G, B; float* rL = lab->L[i]; @@ -619,6 +613,4 @@ BENCHFUN return image; } -//#include "sRGBgamutbdy.cc" - } diff --git a/rtengine/rt_math.h b/rtengine/rt_math.h index b5c93d127..f55f7c1b2 100644 --- a/rtengine/rt_math.h +++ b/rtengine/rt_math.h @@ -104,12 +104,7 @@ inline T norminf(const T& x, const T& y) inline int float2uint16range(float d) // clips input to [0;65535] and rounds { d = CLIP(d); // clip to [0;65535] -#ifdef __SSE2__ // this only works in IEEE 754 maths. For simplicity I restricted it to SSE2. We can enhance it later, but we have to take care of endianness then. - d += 12582912.f; - return reinterpret_cast(d); -#else // fall back to slow std::round() - return std::round(d); -#endif + return d + 0.5f; } } From 1e268105dbcb3d8653166907cb87589b87839a89 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Wed, 21 Sep 2016 21:01:51 +0200 Subject: [PATCH 5/5] replaced code to convert from Lab to XYZ by calling Color::Lab2XYZ(..) --- rtengine/iplab2rgb.cc | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index 576af00c3..c12e3463e 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -272,16 +272,8 @@ Image16* ImProcFunctions::lab2rgb16 (LabImage* lab, int cx, int cy, int cw, int short* za = (short*)image->b(i - cy); for (int j = cx; j < cx + cw; j++) { - - float fy = (0.0086206897f * rL[j]) / 327.68f + 0.1379310345f; // (L+16)/116 - float fx = (0.002f * ra[j]) / 327.68f + fy; - float fz = fy - (0.005f * rb[j]) / 327.68f; - float LL = rL[j] / 327.68f; - - float x_ = 65535.0f * (float) Color::f2xyz(fx) * Color::D50x; - //float y_ = 65535.0 * Color::f2xyz(fy); - float z_ = 65535.0f * (float) Color::f2xyz(fz) * Color::D50z; - float y_ = (LL > Color::epskap) ? 65535.0f * fy * fy * fy : 65535.0f * LL / Color::kappa; + float x_, y_, z_; + Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_); xa[j - cx] = float2uint16range(x_); ya[j - cx] = float2uint16range(y_); @@ -549,16 +541,8 @@ Image16* ImProcFunctions::lab2rgb16b (LabImage* lab, int cx, int cy, int cw, int short* za = (short*)image->b(i - cy); for (int j = cx; j < cx + cw; j++) { - - float fy = (0.0086206897f * rL[j]) / 327.68f + 0.1379310345f; // (L+16)/116 - float fx = (0.002f * ra[j]) / 327.68f + fy; - float fz = fy - (0.005f * rb[j]) / 327.68f; - float LL = rL[j] / 327.68f; - - float x_ = 65535.0f * (float)Color::f2xyz(fx) * Color::D50x; - // float y_ = 65535.0 * Color::f2xyz(fy); - float z_ = 65535.0f * (float)Color::f2xyz(fz) * Color::D50z; - float y_ = (LL > Color::epskap) ? (float) 65535.0 * fy * fy * fy : 65535.0f * LL / Color::kappa; + float x_, y_, z_; + Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_); xa[j - cx] = float2uint16range(x_); ya[j - cx] = float2uint16range(y_);