From 20577ccab67a595a8e98c301a517ba07842b9836 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 25 Nov 2018 13:54:59 +0100 Subject: [PATCH 1/4] custom trc, speedup and reduced memory usage, #5025 --- rtengine/dcrop.cc | 32 +++----------------- rtengine/imagefloat.cc | 21 ++++++++----- rtengine/imagefloat.h | 2 +- rtengine/improccoordinator.cc | 34 ++++------------------ rtengine/improcfun.h | 2 +- rtengine/iplab2rgb.cc | 55 +++++++++++++++-------------------- rtengine/simpleprocess.cc | 34 ++++------------------ 7 files changed, 53 insertions(+), 127 deletions(-) diff --git a/rtengine/dcrop.cc b/rtengine/dcrop.cc index 7854dfb81..d9b4f1ca1 100644 --- a/rtengine/dcrop.cc +++ b/rtengine/dcrop.cc @@ -815,37 +815,13 @@ void Crop::update(int todo) const Glib::ustring profile = params.icm.workingProfile; if (profile == "sRGB" || profile == "Adobe RGB" || profile == "ProPhoto" || profile == "WideGamut" || profile == "BruceRGB" || profile == "Beta RGB" || profile == "BestRGB" || profile == "Rec2020" || profile == "ACESp0" || profile == "ACESp1") { - int cw = baseCrop->getWidth(); - int ch = baseCrop->getHeight(); + const int cw = baseCrop->getWidth(); + const int ch = baseCrop->getHeight(); workingCrop = new Imagefloat(cw, ch); - baseCrop->copyData(workingCrop); //first put gamma TRC to 1 - Imagefloat* readyImg0 = parent->ipf.workingtrc(workingCrop, cw, ch, -5, params.icm.workingProfile, 2.4, 12.92310); - #pragma omp parallel for - - for (int row = 0; row < ch; row++) { - for (int col = 0; col < cw; col++) { - workingCrop->r(row, col) = (float)readyImg0->r(row, col); - workingCrop->g(row, col) = (float)readyImg0->g(row, col); - workingCrop->b(row, col) = (float)readyImg0->b(row, col); - } - } - - delete readyImg0; - + parent->ipf.workingtrc(baseCrop, workingCrop, cw, ch, -5, params.icm.workingProfile, 2.4, 12.92310, true, false); //adjust gamma TRC - Imagefloat* readyImg = parent->ipf.workingtrc(workingCrop, cw, ch, 5, params.icm.workingProfile, params.icm.workingTRCGamma, params.icm.workingTRCSlope); - #pragma omp parallel for - - for (int row = 0; row < ch; row++) { - for (int col = 0; col < cw; col++) { - workingCrop->r(row, col) = (float)readyImg->r(row, col); - workingCrop->g(row, col) = (float)readyImg->g(row, col); - workingCrop->b(row, col) = (float)readyImg->b(row, col); - } - } - - delete readyImg; + parent->ipf.workingtrc(workingCrop, workingCrop, cw, ch, 5, params.icm.workingProfile, params.icm.workingTRCGamma, params.icm.workingTRCSlope, false, true); } } double rrm, ggm, bbm; diff --git a/rtengine/imagefloat.cc b/rtengine/imagefloat.cc index 63c521040..2dae72793 100644 --- a/rtengine/imagefloat.cc +++ b/rtengine/imagefloat.cc @@ -475,7 +475,7 @@ void Imagefloat::calcCroppedHistogram(const ProcParams ¶ms, float scale, LUT } // Parallelized transformation; create transform with cmsFLAGS_NOCACHE! -void Imagefloat::ExecCMSTransform2(cmsHTRANSFORM hTransform) +void Imagefloat::ExecCMSTransform2(cmsHTRANSFORM hTransform, bool normalizeIn) { // LittleCMS cannot parallelize planar setups -- Hombre: LCMS2.4 can! But it we use this new feature, memory allocation @@ -487,18 +487,25 @@ void Imagefloat::ExecCMSTransform2(cmsHTRANSFORM hTransform) AlignedBuffer pBuf(width * 3); #ifdef _OPENMP - #pragma omp for schedule(static) + #pragma omp for schedule(dynamic, 16) #endif for (int y = 0; y < height; y++) { float *p = pBuf.data, *pR = r(y), *pG = g(y), *pB = b(y); - for (int x = 0; x < width; x++) { - *(p++) = *(pR++)/ 65535.f; - *(p++) = *(pG++)/ 65535.f; - *(p++) = *(pB++)/ 65535.f; - + if (normalizeIn) { + for (int x = 0; x < width; x++) { + *(p++) = *(pR++)/ 65535.f; + *(p++) = *(pG++)/ 65535.f; + *(p++) = *(pB++)/ 65535.f; + } + } else { + for (int x = 0; x < width; x++) { + *(p++) = *(pR++); + *(p++) = *(pG++); + *(p++) = *(pB++); + } } cmsDoTransform (hTransform, pBuf.data, pBuf.data, width); diff --git a/rtengine/imagefloat.h b/rtengine/imagefloat.h index 9d7c69aef..921f86b59 100644 --- a/rtengine/imagefloat.h +++ b/rtengine/imagefloat.h @@ -222,7 +222,7 @@ public: void normalizeFloatTo1(); void normalizeFloatTo65535(); void calcCroppedHistogram(const ProcParams ¶ms, float scale, LUTu & hist); - void ExecCMSTransform2(cmsHTRANSFORM hTransform); + void ExecCMSTransform2(cmsHTRANSFORM hTransform, bool normalizeIn = true); void ExecCMSTransform(cmsHTRANSFORM hTransform); void ExecCMSTransform(cmsHTRANSFORM hTransform, const LabImage &labImage, int cx, int cy); diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index d76ccda27..bbadbf0a7 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -28,7 +28,6 @@ #include #include #include "color.h" - #ifdef _OPENMP #include #endif @@ -511,38 +510,15 @@ void ImProcCoordinator::updatePreviewImage(int todo, bool panningRelatedChange) orig_prev->copyData(oprevi); } - Glib::ustring profile = params.icm.workingProfile; + const Glib::ustring profile = params.icm.workingProfile; if (profile == "sRGB" || profile == "Adobe RGB" || profile == "ProPhoto" || profile == "WideGamut" || profile == "BruceRGB" || profile == "Beta RGB" || profile == "BestRGB" || profile == "Rec2020" || profile == "ACESp0" || profile == "ACESp1") { - int cw = oprevi->getWidth(); - int ch = oprevi->getHeight(); + const int cw = oprevi->getWidth(); + const int ch = oprevi->getHeight(); // put gamma TRC to 1 - Imagefloat* readyImg0 = ipf.workingtrc(oprevi, cw, ch, -5, params.icm.workingProfile, 2.4, 12.92310); - #pragma omp parallel for - - for (int row = 0; row < ch; row++) { - for (int col = 0; col < cw; col++) { - oprevi->r(row, col) = (float)readyImg0->r(row, col); - oprevi->g(row, col) = (float)readyImg0->g(row, col); - oprevi->b(row, col) = (float)readyImg0->b(row, col); - } - } - - delete readyImg0; + ipf.workingtrc(oprevi, oprevi, cw, ch, -5, params.icm.workingProfile, 2.4, 12.92310, true, false); //adjust TRC - Imagefloat* readyImg = ipf.workingtrc(oprevi, cw, ch, 5, params.icm.workingProfile, params.icm.workingTRCGamma, params.icm.workingTRCSlope); - #pragma omp parallel for - - for (int row = 0; row < ch; row++) { - for (int col = 0; col < cw; col++) { - oprevi->r(row, col) = (float)readyImg->r(row, col); - oprevi->g(row, col) = (float)readyImg->g(row, col); - oprevi->b(row, col) = (float)readyImg->b(row, col); - } - } - - delete readyImg; - + ipf.workingtrc(oprevi, oprevi, cw, ch, 5, params.icm.workingProfile, params.icm.workingTRCGamma, params.icm.workingTRCSlope, false, true); } } } diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index c3e17a050..470922508 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -350,7 +350,7 @@ public: Image8* lab2rgb(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm, bool consider_histogram_settings = true); Imagefloat* lab2rgbOut(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm); // CieImage *ciec; - Imagefloat* workingtrc(Imagefloat* working, int cw, int ch, int mul, Glib::ustring profile, double gampos, double slpos); + void workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int ch, int mul, Glib::ustring profile, double gampos, double slpos, bool normalizeIn = true, bool normalizeOut = true); bool transCoord(int W, int H, int x, int y, int w, int h, int& xv, int& yv, int& wv, int& hv, double ascaleDef = -1, const LensCorrection *pLCPMap = nullptr); bool transCoord(int W, int H, const std::vector &src, std::vector &red, std::vector &green, std::vector &blue, double ascaleDef = -1, const LensCorrection *pLCPMap = nullptr); diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index da8e687ed..437144600 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -432,7 +432,7 @@ Imagefloat* ImProcFunctions::lab2rgbOut(LabImage* lab, int cx, int cy, int cw, i } -Imagefloat* ImProcFunctions::workingtrc(Imagefloat* working, int cw, int ch, int mul, Glib::ustring profile, double gampos, double slpos) +void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int ch, int mul, Glib::ustring profile, double gampos, double slpos, bool normalizeIn, bool normalizeOut) { TMatrix wprof; @@ -445,22 +445,20 @@ Imagefloat* ImProcFunctions::workingtrc(Imagefloat* working, int cw, int ch, int } double toxyz[3][3] = { { - (wprof[0][0] / dx), //I have suppressed / Color::D50x - (wprof[0][1] / dx), - (wprof[0][2] / dx) + (wprof[0][0] / (dx * (normalizeIn ? 65535.0 : 1.0))), //I have suppressed / Color::D50x + (wprof[0][1] / (dx * (normalizeIn ? 65535.0 : 1.0))), + (wprof[0][2] / (dx * (normalizeIn ? 65535.0 : 1.0))) }, { - (wprof[1][0]), - (wprof[1][1]), - (wprof[1][2]) + (wprof[1][0] / (normalizeIn ? 65535.0 : 1.0)), + (wprof[1][1] / (normalizeIn ? 65535.0 : 1.0)), + (wprof[1][2] / (normalizeIn ? 65535.0 : 1.0)) }, { - (wprof[2][0] / dz), //I have suppressed / Color::D50z - (wprof[2][1] / dz), - (wprof[2][2] / dz) + (wprof[2][0] / (dz * (normalizeIn ? 65535.0 : 1.0))), //I have suppressed / Color::D50z + (wprof[2][1] / (dz * (normalizeIn ? 65535.0 : 1.0))), + (wprof[2][2] / (dz * (normalizeIn ? 65535.0 : 1.0))) } }; - Imagefloat* image = new Imagefloat(cw, ch); - double pwr; double ts; ts = slpos; @@ -618,27 +616,22 @@ Imagefloat* ImProcFunctions::workingtrc(Imagefloat* working, int cw, int ch, int #pragma omp parallel for if (multiThread) for (int i = 0; i < ch; i++) { - float* rr = working->r(i); - float* rg = working->g(i); - float* rb = working->b(i); + float* rr = src->r(i); + float* rg = src->g(i); + float* rb = src->b(i); - float* xa = (float*)image->r(i); - float* ya = (float*)image->g(i); - float* za = (float*)image->b(i); + float* xa = (float*)dst->r(i); + float* ya = (float*)dst->g(i); + float* za = (float*)dst->b(i); for (int j = 0; j < cw; j++) { float r1 = rr[j]; float g1 = rg[j]; float b1 = rb[j]; - float x_ = toxyz[0][0] * r1 + toxyz[0][1] * g1 + toxyz[0][2] * b1; - float y_ = toxyz[1][0] * r1 + toxyz[1][1] * g1 + toxyz[1][2] * b1; - float z_ = toxyz[2][0] * r1 + toxyz[2][1] * g1 + toxyz[2][2] * b1; - - xa[j] = ( x_) ; - ya[j] = ( y_); - za[j] = ( z_); - + xa[j] = toxyz[0][0] * r1 + toxyz[0][1] * g1 + toxyz[0][2] * b1; + ya[j] = toxyz[1][0] * r1 + toxyz[1][1] * g1 + toxyz[1][2] * b1; + za[j] = toxyz[2][0] * r1 + toxyz[2][1] * g1 + toxyz[2][2] * b1; } } @@ -651,16 +644,14 @@ Imagefloat* ImProcFunctions::workingtrc(Imagefloat* working, int cw, int ch, int cmsHTRANSFORM hTransform = cmsCreateTransform(iprof, TYPE_RGB_FLT, oprofdef, TYPE_RGB_FLT, params->icm.outputIntent, flags); lcmsMutex->unlock(); - image->ExecCMSTransform2(hTransform); + dst->ExecCMSTransform2(hTransform, false); cmsDeleteTransform(hTransform); - image->normalizeFloatTo65535(); + if (normalizeOut) { + dst->normalizeFloatTo65535(); + } } - - - return image; - } diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index bdb3ced19..e09d2fe3a 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -893,39 +893,15 @@ private: //gamma TRC working if (params.icm.workingTRC == "Custom") { //exec TRC IN free - Glib::ustring profile; - profile = params.icm.workingProfile; + const Glib::ustring profile = params.icm.workingProfile; if (profile == "sRGB" || profile == "Adobe RGB" || profile == "ProPhoto" || profile == "WideGamut" || profile == "BruceRGB" || profile == "Beta RGB" || profile == "BestRGB" || profile == "Rec2020" || profile == "ACESp0" || profile == "ACESp1") { - int cw = baseImg->getWidth(); - int ch = baseImg->getHeight(); + const int cw = baseImg->getWidth(); + const int ch = baseImg->getHeight(); // put gamma TRC to 1 - Imagefloat* readyImg0 = ipf.workingtrc(baseImg, cw, ch, -5, params.icm.workingProfile, 2.4, 12.92310); - #pragma omp parallel for - - for (int row = 0; row < ch; row++) { - for (int col = 0; col < cw; col++) { - baseImg->r(row, col) = (float)readyImg0->r(row, col); - baseImg->g(row, col) = (float)readyImg0->g(row, col); - baseImg->b(row, col) = (float)readyImg0->b(row, col); - } - } - - delete readyImg0; - + ipf.workingtrc(baseImg, baseImg, cw, ch, -5, params.icm.workingProfile, 2.4, 12.92310, true, false); //adjust TRC - Imagefloat* readyImg = ipf.workingtrc(baseImg, cw, ch, 5, params.icm.workingProfile, params.icm.workingTRCGamma, params.icm.workingTRCSlope); - #pragma omp parallel for - - for (int row = 0; row < ch; row++) { - for (int col = 0; col < cw; col++) { - baseImg->r(row, col) = (float)readyImg->r(row, col); - baseImg->g(row, col) = (float)readyImg->g(row, col); - baseImg->b(row, col) = (float)readyImg->b(row, col); - } - } - - delete readyImg; + ipf.workingtrc(baseImg, baseImg, cw, ch, 5, params.icm.workingProfile, params.icm.workingTRCGamma, params.icm.workingTRCSlope, false, true); } } From c9817e369cd0b0d18ee95b2b65897c2fc7ca6d2d Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 25 Nov 2018 14:45:28 +0100 Subject: [PATCH 2/4] Remove Imagefloat::ExecCMSTransform2, #5025 --- rtengine/imagefloat.cc | 55 +----------------------------------------- rtengine/imagefloat.h | 2 -- rtengine/iplab2rgb.cc | 2 +- 3 files changed, 2 insertions(+), 57 deletions(-) diff --git a/rtengine/imagefloat.cc b/rtengine/imagefloat.cc index 2dae72793..940806419 100644 --- a/rtengine/imagefloat.cc +++ b/rtengine/imagefloat.cc @@ -474,59 +474,6 @@ void Imagefloat::calcCroppedHistogram(const ProcParams ¶ms, float scale, LUT } -// Parallelized transformation; create transform with cmsFLAGS_NOCACHE! -void Imagefloat::ExecCMSTransform2(cmsHTRANSFORM hTransform, bool normalizeIn) -{ - - // LittleCMS cannot parallelize planar setups -- Hombre: LCMS2.4 can! But it we use this new feature, memory allocation - // have to be modified too to build temporary buffers that allow multi processor execution -#ifdef _OPENMP - #pragma omp parallel -#endif - { - AlignedBuffer pBuf(width * 3); - -#ifdef _OPENMP - #pragma omp for schedule(dynamic, 16) -#endif - - for (int y = 0; y < height; y++) - { - float *p = pBuf.data, *pR = r(y), *pG = g(y), *pB = b(y); - - if (normalizeIn) { - for (int x = 0; x < width; x++) { - *(p++) = *(pR++)/ 65535.f; - *(p++) = *(pG++)/ 65535.f; - *(p++) = *(pB++)/ 65535.f; - } - } else { - for (int x = 0; x < width; x++) { - *(p++) = *(pR++); - *(p++) = *(pG++); - *(p++) = *(pB++); - } - } - - cmsDoTransform (hTransform, pBuf.data, pBuf.data, width); - - p = pBuf.data; - pR = r(y); - pG = g(y); - pB = b(y); - - for (int x = 0; x < width; x++) { - *(pR++) = *(p++); - *(pG++) = *(p++); - *(pB++) = *(p++); - } - } // End of parallelization - } -} - - - - // Parallelized transformation; create transform with cmsFLAGS_NOCACHE! void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform) { @@ -540,7 +487,7 @@ void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform) AlignedBuffer pBuf(width * 3); #ifdef _OPENMP - #pragma omp for schedule(static) + #pragma omp for schedule(dynamic, 16) #endif for (int y = 0; y < height; y++) diff --git a/rtengine/imagefloat.h b/rtengine/imagefloat.h index 921f86b59..e1e5086b8 100644 --- a/rtengine/imagefloat.h +++ b/rtengine/imagefloat.h @@ -222,8 +222,6 @@ public: void normalizeFloatTo1(); void normalizeFloatTo65535(); void calcCroppedHistogram(const ProcParams ¶ms, float scale, LUTu & hist); - void ExecCMSTransform2(cmsHTRANSFORM hTransform, bool normalizeIn = true); - void ExecCMSTransform(cmsHTRANSFORM hTransform); void ExecCMSTransform(cmsHTRANSFORM hTransform, const LabImage &labImage, int cx, int cy); }; diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index 437144600..3240f06b1 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -644,7 +644,7 @@ void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int c cmsHTRANSFORM hTransform = cmsCreateTransform(iprof, TYPE_RGB_FLT, oprofdef, TYPE_RGB_FLT, params->icm.outputIntent, flags); lcmsMutex->unlock(); - dst->ExecCMSTransform2(hTransform, false); + dst->ExecCMSTransform(hTransform); cmsDeleteTransform(hTransform); if (normalizeOut) { From a3549a6247926ac05b96d3d66114a97e12ee61fc Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 25 Nov 2018 16:21:47 +0100 Subject: [PATCH 3/4] custom trc: another small speedup, #5025 --- rtengine/iplab2rgb.cc | 339 +++++++++++++++++++++--------------------- 1 file changed, 172 insertions(+), 167 deletions(-) diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index 3240f06b1..38cebd17a 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -443,29 +443,26 @@ void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int c { dx = dz = 1.0; } - double toxyz[3][3] = { + const float toxyz[3][3] = { { - (wprof[0][0] / (dx * (normalizeIn ? 65535.0 : 1.0))), //I have suppressed / Color::D50x - (wprof[0][1] / (dx * (normalizeIn ? 65535.0 : 1.0))), - (wprof[0][2] / (dx * (normalizeIn ? 65535.0 : 1.0))) + static_cast(wprof[0][0] / (dx * (normalizeIn ? 65535.0 : 1.0))), //I have suppressed / Color::D50x + static_cast(wprof[0][1] / (dx * (normalizeIn ? 65535.0 : 1.0))), + static_cast(wprof[0][2] / (dx * (normalizeIn ? 65535.0 : 1.0))) }, { - (wprof[1][0] / (normalizeIn ? 65535.0 : 1.0)), - (wprof[1][1] / (normalizeIn ? 65535.0 : 1.0)), - (wprof[1][2] / (normalizeIn ? 65535.0 : 1.0)) + static_cast(wprof[1][0] / (normalizeIn ? 65535.0 : 1.0)), + static_cast(wprof[1][1] / (normalizeIn ? 65535.0 : 1.0)), + static_cast(wprof[1][2] / (normalizeIn ? 65535.0 : 1.0)) }, { - (wprof[2][0] / (dz * (normalizeIn ? 65535.0 : 1.0))), //I have suppressed / Color::D50z - (wprof[2][1] / (dz * (normalizeIn ? 65535.0 : 1.0))), - (wprof[2][2] / (dz * (normalizeIn ? 65535.0 : 1.0))) + static_cast(wprof[2][0] / (dz * (normalizeIn ? 65535.0 : 1.0))), //I have suppressed / Color::D50z + static_cast(wprof[2][1] / (dz * (normalizeIn ? 65535.0 : 1.0))), + static_cast(wprof[2][2] / (dz * (normalizeIn ? 65535.0 : 1.0))) } }; - double pwr; - double ts; - ts = slpos; - + double pwr = 1.0 / gampos; + double ts = slpos; int five = mul; - pwr = 1.0 / gampos; if (gampos < 1.0) { pwr = gampos; @@ -474,7 +471,7 @@ void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int c } // int select_temp = 1; //5003K - const double eps = 0.000000001; // not divide by zero + constexpr double eps = 0.000000001; // not divide by zero enum class ColorTemp { D50 = 5003, // for Widegamut, ProPhoto Best, Beta -> D50 @@ -484,173 +481,181 @@ void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int c }; ColorTemp temp = ColorTemp::D50; - cmsHPROFILE oprofdef; float p[6]; //primaries - if (true) { - //primaries for 10 working profiles ==> output profiles - if (profile == "WideGamut") { - p[0] = 0.7350; //Widegamut primaries - p[1] = 0.2650; - p[2] = 0.1150; - p[3] = 0.8260; - p[4] = 0.1570; - p[5] = 0.0180; - } else if (profile == "Adobe RGB") { - p[0] = 0.6400; //Adobe primaries - p[1] = 0.3300; - p[2] = 0.2100; - p[3] = 0.7100; - p[4] = 0.1500; - p[5] = 0.0600; - temp = ColorTemp::D65; - } else if (profile == "sRGB") { - p[0] = 0.6400; // sRGB primaries - p[1] = 0.3300; - p[2] = 0.3000; - p[3] = 0.6000; - p[4] = 0.1500; - p[5] = 0.0600; - temp = ColorTemp::D65; - } else if (profile == "BruceRGB") { - p[0] = 0.6400; // Bruce primaries - p[1] = 0.3300; - p[2] = 0.2800; - p[3] = 0.6500; - p[4] = 0.1500; - p[5] = 0.0600; - temp = ColorTemp::D65; - } else if (profile == "Beta RGB") { - p[0] = 0.6888; // Beta primaries - p[1] = 0.3112; - p[2] = 0.1986; - p[3] = 0.7551; - p[4] = 0.1265; - p[5] = 0.0352; - } else if (profile == "BestRGB") { - p[0] = 0.7347; // Best primaries - p[1] = 0.2653; - p[2] = 0.2150; - p[3] = 0.7750; - p[4] = 0.1300; - p[5] = 0.0350; - } else if (profile == "Rec2020") { - p[0] = 0.7080; // Rec2020 primaries - p[1] = 0.2920; - p[2] = 0.1700; - p[3] = 0.7970; - p[4] = 0.1310; - p[5] = 0.0460; - temp = ColorTemp::D65; - } else if (profile == "ACESp0") { - p[0] = 0.7347; // ACES P0 primaries - p[1] = 0.2653; - p[2] = 0.0000; - p[3] = 1.0; - p[4] = 0.0001; - p[5] = -0.0770; - temp = ColorTemp::D60; - } else if (profile == "ACESp1") { - p[0] = 0.713; // ACES P1 primaries - p[1] = 0.293; - p[2] = 0.165; - p[3] = 0.830; - p[4] = 0.128; - p[5] = 0.044; - temp = ColorTemp::D60; - } else if (profile == "ProPhoto") { - p[0] = 0.7347; //ProPhoto and default primaries - p[1] = 0.2653; - p[2] = 0.1596; - p[3] = 0.8404; - p[4] = 0.0366; - p[5] = 0.0001; - } else { - p[0] = 0.7347; //default primaries always unused - p[1] = 0.2653; - p[2] = 0.1596; - p[3] = 0.8404; - p[4] = 0.0366; - p[5] = 0.0001; - } - - if (slpos == 0) { - slpos = eps; - } - - GammaValues g_a; //gamma parameters - int mode = 0; - Color::calcGamma(pwr, ts, mode, g_a); // call to calcGamma with selected gamma and slope : return parameters for LCMS2 - - cmsCIExyY xyD; - - cmsCIExyYTRIPLE Primaries = { - {p[0], p[1], 1.0}, // red - {p[2], p[3], 1.0}, // green - {p[4], p[5], 1.0} // blue - }; - - cmsToneCurve* GammaTRC[3]; - cmsFloat64Number gammaParams[7]; - gammaParams[4] = g_a[3] * ts; - gammaParams[0] = gampos; - gammaParams[1] = 1. / (1.0 + g_a[4]); - gammaParams[2] = g_a[4] / (1.0 + g_a[4]); - gammaParams[3] = 1. / slpos; - gammaParams[5] = 0.0; - gammaParams[6] = 0.0; - // printf("ga0=%f ga1=%f ga2=%f ga3=%f ga4=%f\n", ga0, ga1, ga2, ga3, ga4); - - // 7 parameters for smoother curves - cmsWhitePointFromTemp(&xyD, (double)temp); - if (profile == "ACESp0") { - xyD = {0.32168, 0.33767, 1.0};//refine white point to avoid differences - } - - GammaTRC[0] = GammaTRC[1] = GammaTRC[2] = cmsBuildParametricToneCurve(NULL, five, gammaParams);//5 = more smoother than 4 - oprofdef = cmsCreateRGBProfile(&xyD, &Primaries, GammaTRC); - cmsFreeToneCurve(GammaTRC[0]); + //primaries for 10 working profiles ==> output profiles + if (profile == "WideGamut") { + p[0] = 0.7350; //Widegamut primaries + p[1] = 0.2650; + p[2] = 0.1150; + p[3] = 0.8260; + p[4] = 0.1570; + p[5] = 0.0180; + } else if (profile == "Adobe RGB") { + p[0] = 0.6400; //Adobe primaries + p[1] = 0.3300; + p[2] = 0.2100; + p[3] = 0.7100; + p[4] = 0.1500; + p[5] = 0.0600; + temp = ColorTemp::D65; + } else if (profile == "sRGB") { + p[0] = 0.6400; // sRGB primaries + p[1] = 0.3300; + p[2] = 0.3000; + p[3] = 0.6000; + p[4] = 0.1500; + p[5] = 0.0600; + temp = ColorTemp::D65; + } else if (profile == "BruceRGB") { + p[0] = 0.6400; // Bruce primaries + p[1] = 0.3300; + p[2] = 0.2800; + p[3] = 0.6500; + p[4] = 0.1500; + p[5] = 0.0600; + temp = ColorTemp::D65; + } else if (profile == "Beta RGB") { + p[0] = 0.6888; // Beta primaries + p[1] = 0.3112; + p[2] = 0.1986; + p[3] = 0.7551; + p[4] = 0.1265; + p[5] = 0.0352; + } else if (profile == "BestRGB") { + p[0] = 0.7347; // Best primaries + p[1] = 0.2653; + p[2] = 0.2150; + p[3] = 0.7750; + p[4] = 0.1300; + p[5] = 0.0350; + } else if (profile == "Rec2020") { + p[0] = 0.7080; // Rec2020 primaries + p[1] = 0.2920; + p[2] = 0.1700; + p[3] = 0.7970; + p[4] = 0.1310; + p[5] = 0.0460; + temp = ColorTemp::D65; + } else if (profile == "ACESp0") { + p[0] = 0.7347; // ACES P0 primaries + p[1] = 0.2653; + p[2] = 0.0000; + p[3] = 1.0; + p[4] = 0.0001; + p[5] = -0.0770; + temp = ColorTemp::D60; + } else if (profile == "ACESp1") { + p[0] = 0.713; // ACES P1 primaries + p[1] = 0.293; + p[2] = 0.165; + p[3] = 0.830; + p[4] = 0.128; + p[5] = 0.044; + temp = ColorTemp::D60; + } else if (profile == "ProPhoto") { + p[0] = 0.7347; //ProPhoto and default primaries + p[1] = 0.2653; + p[2] = 0.1596; + p[3] = 0.8404; + p[4] = 0.0366; + p[5] = 0.0001; + } else { + p[0] = 0.7347; //default primaries always unused + p[1] = 0.2653; + p[2] = 0.1596; + p[3] = 0.8404; + p[4] = 0.0366; + p[5] = 0.0001; } + if (slpos == 0) { + slpos = eps; + } + + GammaValues g_a; //gamma parameters + int mode = 0; + Color::calcGamma(pwr, ts, mode, g_a); // call to calcGamma with selected gamma and slope : return parameters for LCMS2 + + cmsCIExyY xyD; + + cmsCIExyYTRIPLE Primaries = { + {p[0], p[1], 1.0}, // red + {p[2], p[3], 1.0}, // green + {p[4], p[5], 1.0} // blue + }; + + cmsToneCurve* GammaTRC[3]; + cmsFloat64Number gammaParams[7]; + gammaParams[4] = g_a[3] * ts; + gammaParams[0] = gampos; + gammaParams[1] = 1. / (1.0 + g_a[4]); + gammaParams[2] = g_a[4] / (1.0 + g_a[4]); + gammaParams[3] = 1. / slpos; + gammaParams[5] = 0.0; + gammaParams[6] = 0.0; + // printf("ga0=%f ga1=%f ga2=%f ga3=%f ga4=%f\n", ga0, ga1, ga2, ga3, ga4); + + // 7 parameters for smoother curves + cmsWhitePointFromTemp(&xyD, (double)temp); + if (profile == "ACESp0") { + xyD = {0.32168, 0.33767, 1.0};//refine white point to avoid differences + } + + GammaTRC[0] = GammaTRC[1] = GammaTRC[2] = cmsBuildParametricToneCurve(NULL, five, gammaParams);//5 = more smoother than 4 + const cmsHPROFILE oprofdef = cmsCreateRGBProfile(&xyD, &Primaries, GammaTRC); + cmsFreeToneCurve(GammaTRC[0]); + if (oprofdef) { - #pragma omp parallel for if (multiThread) - - for (int i = 0; i < ch; i++) { - float* rr = src->r(i); - float* rg = src->g(i); - float* rb = src->b(i); - - float* xa = (float*)dst->r(i); - float* ya = (float*)dst->g(i); - float* za = (float*)dst->b(i); - - for (int j = 0; j < cw; j++) { - float r1 = rr[j]; - float g1 = rg[j]; - float b1 = rb[j]; - - xa[j] = toxyz[0][0] * r1 + toxyz[0][1] * g1 + toxyz[0][2] * b1; - ya[j] = toxyz[1][0] * r1 + toxyz[1][1] * g1 + toxyz[1][2] * b1; - za[j] = toxyz[2][0] * r1 + toxyz[2][1] * g1 + toxyz[2][2] * b1; - } - } - cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE; - - - lcmsMutex->lock(); cmsHPROFILE iprof = ICCStore::getInstance()->getXYZProfile(); + lcmsMutex->lock(); // cmsHTRANSFORM hTransform = cmsCreateTransform(iprof, TYPE_RGB_16, oprofdef, TYPE_RGB_16, params->icm.outputIntent, cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE); cmsHTRANSFORM hTransform = cmsCreateTransform(iprof, TYPE_RGB_FLT, oprofdef, TYPE_RGB_FLT, params->icm.outputIntent, flags); lcmsMutex->unlock(); +#ifdef _OPENMP + #pragma omp parallel if (multiThread) +#endif + { + AlignedBuffer pBuf(cw * 3); - dst->ExecCMSTransform(hTransform); +#ifdef _OPENMP + #pragma omp for schedule(dynamic, 16) +#endif + + for (int i = 0; i < ch; i++) { + float *p = pBuf.data; + float* rr = src->r(i); + float* rg = src->g(i); + float* rb = src->b(i); + + float* xa = (float*)dst->r(i); + float* ya = (float*)dst->g(i); + float* za = (float*)dst->b(i); + + for (int j = 0; j < cw; j++) { + float r1 = rr[j]; + float g1 = rg[j]; + float b1 = rb[j]; + + *(p++) = toxyz[0][0] * r1 + toxyz[0][1] * g1 + toxyz[0][2] * b1; + *(p++) = toxyz[1][0] * r1 + toxyz[1][1] * g1 + toxyz[1][2] * b1; + *(p++) = toxyz[2][0] * r1 + toxyz[2][1] * g1 + toxyz[2][2] * b1; + } + cmsDoTransform (hTransform, pBuf.data, pBuf.data, cw); + p = pBuf.data; + for (int x = 0; x < cw; x++) { + *(xa++) = *(p++); + *(ya++) = *(p++); + *(za++) = *(p++); + } + } + } cmsDeleteTransform(hTransform); if (normalizeOut) { dst->normalizeFloatTo65535(); } - } } From 4f89286b986f5d5b22783eac63f54b114830eb4f Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 25 Nov 2018 17:53:49 +0100 Subject: [PATCH 4/4] ImProcFunctions::workingtrc(): cleanup and another small speedup, #5025 --- rtengine/improcfun.h | 2 +- rtengine/iplab2rgb.cc | 72 ++++++++++++++++++------------------------- 2 files changed, 31 insertions(+), 43 deletions(-) diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 470922508..6684c8ee2 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -350,7 +350,7 @@ public: Image8* lab2rgb(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm, bool consider_histogram_settings = true); Imagefloat* lab2rgbOut(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm); // CieImage *ciec; - void workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int ch, int mul, Glib::ustring profile, double gampos, double slpos, bool normalizeIn = true, bool normalizeOut = true); + void workingtrc(const Imagefloat* src, Imagefloat* dst, int cw, int ch, int mul, const Glib::ustring &profile, double gampos, double slpos, bool normalizeIn = true, bool normalizeOut = true); bool transCoord(int W, int H, int x, int y, int w, int h, int& xv, int& yv, int& wv, int& hv, double ascaleDef = -1, const LensCorrection *pLCPMap = nullptr); bool transCoord(int W, int H, const std::vector &src, std::vector &red, std::vector &green, std::vector &blue, double ascaleDef = -1, const LensCorrection *pLCPMap = nullptr); diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index 38cebd17a..9e47558ca 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -432,11 +432,9 @@ Imagefloat* ImProcFunctions::lab2rgbOut(LabImage* lab, int cx, int cy, int cw, i } -void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int ch, int mul, Glib::ustring profile, double gampos, double slpos, bool normalizeIn, bool normalizeOut) +void ImProcFunctions::workingtrc(const Imagefloat* src, Imagefloat* dst, int cw, int ch, int mul, const Glib::ustring &profile, double gampos, double slpos, bool normalizeIn, bool normalizeOut) { - TMatrix wprof; - - wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); + const TMatrix wprof = ICCStore::getInstance()->workingSpaceMatrix(params->icm.workingProfile); double dx = Color::D50x; double dz = Color::D50z; @@ -574,18 +572,10 @@ void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int c } GammaValues g_a; //gamma parameters - int mode = 0; + constexpr int mode = 0; Color::calcGamma(pwr, ts, mode, g_a); // call to calcGamma with selected gamma and slope : return parameters for LCMS2 - cmsCIExyY xyD; - cmsCIExyYTRIPLE Primaries = { - {p[0], p[1], 1.0}, // red - {p[2], p[3], 1.0}, // green - {p[4], p[5], 1.0} // blue - }; - - cmsToneCurve* GammaTRC[3]; cmsFloat64Number gammaParams[7]; gammaParams[4] = g_a[3] * ts; gammaParams[0] = gampos; @@ -597,65 +587,63 @@ void ImProcFunctions::workingtrc(Imagefloat* src, Imagefloat* dst, int cw, int c // printf("ga0=%f ga1=%f ga2=%f ga3=%f ga4=%f\n", ga0, ga1, ga2, ga3, ga4); // 7 parameters for smoother curves + cmsCIExyY xyD; cmsWhitePointFromTemp(&xyD, (double)temp); if (profile == "ACESp0") { xyD = {0.32168, 0.33767, 1.0};//refine white point to avoid differences } + cmsToneCurve* GammaTRC[3]; GammaTRC[0] = GammaTRC[1] = GammaTRC[2] = cmsBuildParametricToneCurve(NULL, five, gammaParams);//5 = more smoother than 4 + + const cmsCIExyYTRIPLE Primaries = { + {p[0], p[1], 1.0}, // red + {p[2], p[3], 1.0}, // green + {p[4], p[5], 1.0} // blue + }; const cmsHPROFILE oprofdef = cmsCreateRGBProfile(&xyD, &Primaries, GammaTRC); + cmsFreeToneCurve(GammaTRC[0]); if (oprofdef) { - cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE; - cmsHPROFILE iprof = ICCStore::getInstance()->getXYZProfile(); + constexpr cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE; + const cmsHPROFILE iprof = ICCStore::getInstance()->getXYZProfile(); lcmsMutex->lock(); - // cmsHTRANSFORM hTransform = cmsCreateTransform(iprof, TYPE_RGB_16, oprofdef, TYPE_RGB_16, params->icm.outputIntent, cmsFLAGS_NOOPTIMIZE | cmsFLAGS_NOCACHE); - cmsHTRANSFORM hTransform = cmsCreateTransform(iprof, TYPE_RGB_FLT, oprofdef, TYPE_RGB_FLT, params->icm.outputIntent, flags); + const cmsHTRANSFORM hTransform = cmsCreateTransform(iprof, TYPE_RGB_FLT, oprofdef, TYPE_RGB_FLT, params->icm.outputIntent, flags); lcmsMutex->unlock(); #ifdef _OPENMP #pragma omp parallel if (multiThread) #endif { AlignedBuffer pBuf(cw * 3); + const float normalize = normalizeOut ? 65535.f : 1.f; #ifdef _OPENMP - #pragma omp for schedule(dynamic, 16) + #pragma omp for schedule(dynamic, 16) nowait #endif - for (int i = 0; i < ch; i++) { + for (int i = 0; i < ch; ++i) { float *p = pBuf.data; - float* rr = src->r(i); - float* rg = src->g(i); - float* rb = src->b(i); + for (int j = 0; j < cw; ++j) { + const float r = src->r(i, j); + const float g = src->g(i, j); + const float b = src->b(i, j); - float* xa = (float*)dst->r(i); - float* ya = (float*)dst->g(i); - float* za = (float*)dst->b(i); - - for (int j = 0; j < cw; j++) { - float r1 = rr[j]; - float g1 = rg[j]; - float b1 = rb[j]; - - *(p++) = toxyz[0][0] * r1 + toxyz[0][1] * g1 + toxyz[0][2] * b1; - *(p++) = toxyz[1][0] * r1 + toxyz[1][1] * g1 + toxyz[1][2] * b1; - *(p++) = toxyz[2][0] * r1 + toxyz[2][1] * g1 + toxyz[2][2] * b1; + *(p++) = toxyz[0][0] * r + toxyz[0][1] * g + toxyz[0][2] * b; + *(p++) = toxyz[1][0] * r + toxyz[1][1] * g + toxyz[1][2] * b; + *(p++) = toxyz[2][0] * r + toxyz[2][1] * g + toxyz[2][2] * b; } - cmsDoTransform (hTransform, pBuf.data, pBuf.data, cw); p = pBuf.data; - for (int x = 0; x < cw; x++) { - *(xa++) = *(p++); - *(ya++) = *(p++); - *(za++) = *(p++); + cmsDoTransform(hTransform, p, p, cw); + for (int j = 0; j < cw; ++j) { + dst->r(i, j) = *(p++) * normalize; + dst->g(i, j) = *(p++) * normalize; + dst->b(i, j) = *(p++) * normalize; } } } cmsDeleteTransform(hTransform); - if (normalizeOut) { - dst->normalizeFloatTo65535(); - } } }