From 524b0056dcead2b69d2861cdf8e9e0ef8c5f362f Mon Sep 17 00:00:00 2001 From: heckflosse Date: Wed, 27 Dec 2017 00:01:09 +0100 Subject: [PATCH 1/2] Speedup for rgbPrco() --- rtengine/LUT.h | 31 ++++++++++++++++++++++ rtengine/improcfun.cc | 60 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/rtengine/LUT.h b/rtengine/LUT.h index 6a617f40b..48c85726d 100644 --- a/rtengine/LUT.h +++ b/rtengine/LUT.h @@ -340,6 +340,37 @@ public: vfloat diff = clampedIndexes - _mm_cvtepi32_ps(indexes); return vintpf(diff, upper, lower); } + + // NOTE: This version requires LUTs which do not clip at upper and lower bounds + vfloat operator()(vfloat indexv) const + { + static_assert(std::is_same::value, "This method only works for float LUTs"); + + // Clamp and convert to integer values. Extract out of SSE register because all + // lookup operations use regular addresses. + vfloat clampedIndexes = vmaxf(ZEROV, vminf(F2V(maxsf), indexv)); + vint indexes = _mm_cvttps_epi32(clampedIndexes); + int indexArray[4]; + _mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes); + + // Load data from the table. This reads more than necessary, but there don't seem + // to exist more granular operations (though we could try non-SSE). + // Cast to int for convenience in the next operation (partial transpose). + vint values[4]; + for (int i = 0; i < 4; ++i) { + values[i] = _mm_castps_si128(LVFU(data[indexArray[i]])); + } + + // Partial 4x4 transpose operation. We want two new vectors, the first consisting + // of [values[0][0] ... values[3][0]] and the second [values[0][1] ... values[3][1]]. + __m128i temp0 = _mm_unpacklo_epi32(values[0], values[1]); + __m128i temp1 = _mm_unpacklo_epi32(values[2], values[3]); + vfloat lower = _mm_castsi128_ps(_mm_unpacklo_epi64(temp0, temp1)); + vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1)); + + vfloat diff = indexv - _mm_cvtepi32_ps(indexes); + return vintpf(diff, upper, lower); + } #ifdef __SSE4_1__ template::value>::type> vfloat operator[](vint idxv ) const diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 082799e62..8300a24e7 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -3540,7 +3540,27 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { + vfloat cr = F2V(0.299f); + vfloat cg = F2V(0.587f); + vfloat cb = F2V(0.114f); + int j = jstart; + int tj = 0; +#ifdef __SSE2__ + for (; j < tW - 3; j+=4, tj+=4) { + + vfloat rv = LVF(rtemp[ti * TS + tj]); + vfloat gv = LVF(gtemp[ti * TS + tj]); + vfloat bv = LVF(btemp[ti * TS + tj]); + + //shadow tone curve + vfloat Yv = cr * rv + cg * gv + cb * bv; + vfloat tonefactorv = shtonecurve(Yv); + STVF(rtemp[ti * TS + tj], rv * tonefactorv); + STVF(gtemp[ti * TS + tj], gv * tonefactorv); + STVF(btemp[ti * TS + tj], bv * tonefactorv); + } +#endif + for (; j < tW; j++, tj++) { float r = rtemp[ti * TS + tj]; float g = gtemp[ti * TS + tj]; @@ -3588,19 +3608,35 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } } - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { + if (histToneCurveThr) { + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { - //brightness/contrast - rtemp[ti * TS + tj] = tonecurve[ rtemp[ti * TS + tj] ]; - gtemp[ti * TS + tj] = tonecurve[ gtemp[ti * TS + tj] ]; - btemp[ti * TS + tj] = tonecurve[ btemp[ti * TS + tj] ]; + //brightness/contrast + rtemp[ti * TS + tj] = tonecurve[ rtemp[ti * TS + tj] ]; + gtemp[ti * TS + tj] = tonecurve[ gtemp[ti * TS + tj] ]; + btemp[ti * TS + tj] = tonecurve[ btemp[ti * TS + tj] ]; - if (histToneCurveThr) { int y = CLIP (lumimulf[0] * Color::gamma2curve[rtemp[ti * TS + tj]] + lumimulf[1] * Color::gamma2curve[gtemp[ti * TS + tj]] + lumimulf[2] * Color::gamma2curve[btemp[ti * TS + tj]]); histToneCurveThr[y >> histToneCurveCompression]++; } } + } else { + for (int i = istart, ti = 0; i < tH; i++, ti++) { + int j = jstart, tj = 0; + for (; j < tW - 3; j+=4, tj+=4) { + //brightness/contrast + STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj]))); + STVF(gtemp[ti * TS + tj], tonecurve(LVF(gtemp[ti * TS + tj]))); + STVF(btemp[ti * TS + tj], tonecurve(LVF(btemp[ti * TS + tj]))); + } + for (; j < tW; j++, tj++) { + //brightness/contrast + rtemp[ti * TS + tj] = tonecurve[rtemp[ti * TS + tj]]; + gtemp[ti * TS + tj] = tonecurve[gtemp[ti * TS + tj]]; + btemp[ti * TS + tj] = tonecurve[btemp[ti * TS + tj]]; + } + } } if (editID == EUID_ToneCurve1) { // filling the pipette buffer @@ -3687,10 +3723,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer if (hasToneCurve2) { if (curveMode2 == ToneCurveParams::TcMode::STD) { // Standard for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - const StandardToneCurve& userToneCurve = static_cast (customToneCurve2); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } + const StandardToneCurve& userToneCurve = static_cast (customToneCurve2); + userToneCurve.BatchApply ( + 0, tW - jstart, + &rtemp[ti * TS], >emp[ti * TS], &btemp[ti * TS]); } } else if (curveMode2 == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like for (int i = istart, ti = 0; i < tH; i++, ti++) { From 3dcce23c24d093f7c9719addd99c7b9d5589a827 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 31 Dec 2017 14:36:59 +0100 Subject: [PATCH 2/2] rgbproc() speedups --- rtengine/LUT.h | 32 +++ rtengine/color.cc | 65 +++++ rtengine/color.h | 2 +- rtengine/improcfun.cc | 544 ++++++++++++++++++++---------------------- 4 files changed, 358 insertions(+), 285 deletions(-) diff --git a/rtengine/LUT.h b/rtengine/LUT.h index 48c85726d..0fd906dc0 100644 --- a/rtengine/LUT.h +++ b/rtengine/LUT.h @@ -309,6 +309,38 @@ public: #if defined( __SSE2__ ) && defined( __x86_64__ ) + + // NOTE: This function requires LUTs which clips only at lower bound + vfloat cb(vfloat indexv) const + { + static_assert(std::is_same::value, "This method only works for float LUTs"); + + // Clamp and convert to integer values. Extract out of SSE register because all + // lookup operations use regular addresses. + vfloat clampedIndexes = vmaxf(ZEROV, vminf(F2V(maxIndexFloat), indexv)); + vint indexes = _mm_cvttps_epi32(clampedIndexes); + int indexArray[4]; + _mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes); + + // Load data from the table. This reads more than necessary, but there don't seem + // to exist more granular operations (though we could try non-SSE). + // Cast to int for convenience in the next operation (partial transpose). + vint values[4]; + for (int i = 0; i < 4; ++i) { + values[i] = _mm_castps_si128(LVFU(data[indexArray[i]])); + } + + // Partial 4x4 transpose operation. We want two new vectors, the first consisting + // of [values[0][0] ... values[3][0]] and the second [values[0][1] ... values[3][1]]. + __m128i temp0 = _mm_unpacklo_epi32(values[0], values[1]); + __m128i temp1 = _mm_unpacklo_epi32(values[2], values[3]); + vfloat lower = _mm_castsi128_ps(_mm_unpacklo_epi64(temp0, temp1)); + vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1)); + + vfloat diff = vmaxf(ZEROV, indexv) - _mm_cvtepi32_ps(indexes); + return vintpf(diff, upper, lower); + } + // NOTE: This version requires LUTs which clip at upper and lower bounds // (which is the default). vfloat operator[](vfloat indexv) const diff --git a/rtengine/color.cc b/rtengine/color.cc index 7964cc472..4b51314cc 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -1783,6 +1783,71 @@ void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat & } #endif // __SSE2__ +void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, const float wp[3][3], int width) +{ + +#ifdef __SSE2__ + // prepare matrix to save some divisions (reduces the number of divisions by width/2 - 6) + float wpn[3][3]; + for(int i = 0; i < 3; ++i) { + wpn[0][i] = wp[0][i] / Color::D50x; + wpn[1][i] = wp[1][i]; + wpn[2][i] = wp[2][i] / Color::D50z; + } + + vfloat maxvalfv = F2V(MAXVALF); + vfloat c116v = F2V(116.f); + vfloat c5242d88v = F2V(5242.88f); + vfloat c500v = F2V(500.f); + vfloat c200v = F2V(200.f); +#endif + int i = 0; +#ifdef __SSE2__ + for(;i < width - 3; i+=4) { + const vfloat rv = LVFU(R[i]); + const vfloat gv = LVFU(G[i]); + const vfloat bv = LVFU(B[i]); + const vfloat xv = F2V(wpn[0][0]) * rv + F2V(wpn[0][1]) * gv + F2V(wpn[0][2]) * bv; + const vfloat yv = F2V(wpn[1][0]) * rv + F2V(wpn[1][1]) * gv + F2V(wpn[1][2]) * bv; + const vfloat zv = F2V(wpn[2][0]) * rv + F2V(wpn[2][1]) * gv + F2V(wpn[2][2]) * bv; + + vmask maxMask = vmaskf_gt(vmaxf(xv, vmaxf(yv, zv)), maxvalfv); + if (_mm_movemask_ps((vfloat)maxMask)) { + // take slower code path for all 4 pixels if one of the values is > MAXVALF. Still faster than non SSE2 version + for(int k = 0; k < 4; ++k) { + float x = xv[k]; + float y = yv[k]; + float z = zv[k]; + float fx = (x <= 65535.f ? cachef[x] : (327.68f * xcbrtf(x / MAXVALF))); + float fy = (y <= 65535.f ? cachef[y] : (327.68f * xcbrtf(y / MAXVALF))); + float fz = (z <= 65535.f ? cachef[z] : (327.68f * xcbrtf(z / MAXVALF))); + + L[i + k] = (116.f * fy - 5242.88f); //5242.88=16.0*327.68; + a[i + k] = (500.f * (fx - fy) ); + b[i + k] = (200.f * (fy - fz) ); + } + } else { + const vfloat fx = cachef[xv]; + const vfloat fy = cachef[yv]; + const vfloat fz = cachef[zv]; + + STVFU(L[i], c116v * fy - c5242d88v); //5242.88=16.0*327.68; + STVFU(a[i], c500v * (fx - fy)); + STVFU(b[i], c200v * (fy - fz)); + } + } +#endif + for(;i < width; ++i) { + const float rv = R[i]; + const float gv = G[i]; + const float bv = B[i]; + float x = wp[0][0] * rv + wp[0][1] * gv + wp[0][2] * bv; + float y = wp[1][0] * rv + wp[1][1] * gv + wp[1][2] * bv; + float z = wp[2][0] * rv + wp[2][1] * gv + wp[2][2] * bv; + XYZ2Lab(x, y, z, L[i], a[i], b[i]); + } +} + void Color::XYZ2Lab(float X, float Y, float Z, float &L, float &a, float &b) { diff --git a/rtengine/color.h b/rtengine/color.h index 59e189810..049defb70 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -475,7 +475,7 @@ public: * @param b channel [-42000 ; +42000] ; can be more than 42000 (return value) */ static void XYZ2Lab(float x, float y, float z, float &L, float &a, float &b); - + static void RGB2Lab(float *X, float *Y, float *Z, float *L, float *a, float *b, const float wp[3][3], int width); /** * @brief Convert Lab in Yuv diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 8300a24e7..bfe4ee021 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -48,6 +48,227 @@ #undef CLIPD #define CLIPD(a) ((a)>0.0f?((a)<1.0f?(a):1.0f):0.0f) +namespace { + +using namespace rtengine; +// begin of helper function for rgbProc() +void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) { + +#ifdef __SSE2__ + vfloat cr = F2V(0.299f); + vfloat cg = F2V(0.587f); + vfloat cb = F2V(0.114f); +#endif + + for (int i = istart, ti = 0; i < tH; i++, ti++) { + int j = jstart, tj = 0; +#ifdef __SSE2__ + for (; j < tW - 3; j+=4, tj+=4) { + + vfloat rv = LVF(rtemp[ti * tileSize + tj]); + vfloat gv = LVF(gtemp[ti * tileSize + tj]); + vfloat bv = LVF(btemp[ti * tileSize + tj]); + + //shadow tone curve + vfloat Yv = cr * rv + cg * gv + cb * bv; + vfloat tonefactorv = shtonecurve(Yv); + STVF(rtemp[ti * tileSize + tj], rv * tonefactorv); + STVF(gtemp[ti * tileSize + tj], gv * tonefactorv); + STVF(btemp[ti * tileSize + tj], bv * tonefactorv); + } +#endif + for (; j < tW; j++, tj++) { + + float r = rtemp[ti * tileSize + tj]; + float g = gtemp[ti * tileSize + tj]; + float b = btemp[ti * tileSize + tj]; + + //shadow tone curve + float Y = (0.299f * r + 0.587f * g + 0.114f * b); + float tonefactor = shtonecurve[Y]; + rtemp[ti * tileSize + tj] = rtemp[ti * tileSize + tj] * tonefactor; + gtemp[ti * tileSize + tj] = gtemp[ti * tileSize + tj] * tonefactor; + btemp[ti * tileSize + tj] = btemp[ti * tileSize + tj] * tonefactor; + } + } +} + +void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, float exp_scale, float comp, float hlrange) { + +#ifdef __SSE2__ + vfloat threev = F2V(3.f); + vfloat maxvalfv = F2V(MAXVALF); +#endif + + for (int i = istart, ti = 0; i < tH; i++, ti++) { + int j = jstart, tj = 0; +#ifdef __SSE2__ + for (; j < tW - 3; j+=4, tj+=4) { + + vfloat rv = LVF(rtemp[ti * tileSize + tj]); + vfloat gv = LVF(gtemp[ti * tileSize + tj]); + vfloat bv = LVF(btemp[ti * tileSize + tj]); + + //TODO: proper treatment of out-of-gamut colors + //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)]; + vmask maxMask = vmaskf_ge(vmaxf(rv, vmaxf(gv, bv)), maxvalfv); + if(_mm_movemask_ps((vfloat)maxMask)) { + for (int k = 0; k < 4; ++k) { + float r = rtemp[ti * tileSize + tj + k]; + float g = gtemp[ti * tileSize + tj + k]; + float b = btemp[ti * tileSize + tj + k]; + float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) + + (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) + + (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0; + + // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place + rtemp[ti * tileSize + tj + k] = r * tonefactor; + gtemp[ti * tileSize + tj + k] = g * tonefactor; + btemp[ti * tileSize + tj + k] = b * tonefactor; + } + } else { + vfloat tonefactorv = (hltonecurve.cb(rv) + hltonecurve.cb(gv) + hltonecurve.cb(bv)) / threev; + // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place + STVF(rtemp[ti * tileSize + tj], rv * tonefactorv); + STVF(gtemp[ti * tileSize + tj], gv * tonefactorv); + STVF(btemp[ti * tileSize + tj], bv * tonefactorv); + } + } +#endif + for (; j < tW; j++, tj++) { + + float r = rtemp[ti * tileSize + tj]; + float g = gtemp[ti * tileSize + tj]; + float b = btemp[ti * tileSize + tj]; + + //TODO: proper treatment of out-of-gamut colors + //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)]; + float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) + + (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) + + (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0; + + // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place + rtemp[ti * tileSize + tj] = r * tonefactor; + gtemp[ti * tileSize + tj] = g * tonefactor; + btemp[ti * tileSize + tj] = b * tonefactor; + } + } +} + +void proPhotoBlue(float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) { + // this is a hack to avoid the blue=>black bug (Issue 2141) + for (int i = istart, ti = 0; i < tH; i++, ti++) { + int j = jstart, tj = 0; +#ifdef __SSE2__ + for (; j < tW - 3; j+=4, tj+=4) { + vfloat rv = LVF(rtemp[ti * tileSize + tj]); + vfloat gv = LVF(gtemp[ti * tileSize + tj]); + vmask zeromask = vorm(vmaskf_eq(rv, ZEROV), vmaskf_eq(gv, ZEROV)); + if(_mm_movemask_ps((vfloat)zeromask)) { + for (int k = 0; k < 4; ++k) { + float r = rtemp[ti * tileSize + tj + k]; + float g = gtemp[ti * tileSize + tj + k]; + if (r == 0.0f || g == 0.0f) { + float b = btemp[ti * tileSize + tj + k]; + float h, s, v; + Color::rgb2hsv (r, g, b, h, s, v); + s *= 0.99f; + Color::hsv2rgb (h, s, v, rtemp[ti * tileSize + tj + k], gtemp[ti * tileSize + tj + k], btemp[ti * tileSize + tj + k]); + } + } + } + } +#endif + for (; j < tW; j++, tj++) { + float r = rtemp[ti * tileSize + tj]; + float g = gtemp[ti * tileSize + tj]; + + if (r == 0.0f || g == 0.0f) { + float b = btemp[ti * tileSize + tj]; + float h, s, v; + Color::rgb2hsv (r, g, b, h, s, v); + s *= 0.99f; + Color::hsv2rgb (h, s, v, rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + } + } + } +} + +void customToneCurve(const ToneCurve &customToneCurve, ToneCurveParams::TcMode curveMode, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, PerceptualToneCurveState ptcApplyState) { + + if (curveMode == ToneCurveParams::TcMode::STD) { // Standard + for (int i = istart, ti = 0; i < tH; i++, ti++) { + const StandardToneCurve& userToneCurve = static_cast (customToneCurve); + userToneCurve.BatchApply ( + 0, tW - jstart, + &rtemp[ti * tileSize], >emp[ti * tileSize], &btemp[ti * tileSize]); + } + } else if (curveMode == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + const AdobeToneCurve& userToneCurve = static_cast (customToneCurve); + userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + } + } + } else if (curveMode == ToneCurveParams::TcMode::SATANDVALBLENDING) { // apply the curve on the saturation and value channels + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + const SatAndValueBlendingToneCurve& userToneCurve = static_cast (customToneCurve); + rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); + gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); + btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); + userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + } + } + } else if (curveMode == ToneCurveParams::TcMode::WEIGHTEDSTD) { // apply the curve to the rgb channels, weighted + const WeightedStdToneCurve& userToneCurve = static_cast (customToneCurve); + + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); + gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); + btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); + userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + } + } + } else if (curveMode == ToneCurveParams::TcMode::LUMINANCE) { // apply the curve to the luminance channel + const LuminanceToneCurve& userToneCurve = static_cast (customToneCurve); + + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); + gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); + btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); + userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + } + } + } else if (curveMode == ToneCurveParams::TcMode::PERCEPTUAL) { // apply curve while keeping color appearance constant + const PerceptualToneCurve& userToneCurve = static_cast (customToneCurve); + + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); + gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); + btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); + userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj], ptcApplyState); + } + } + } +} + +void fillEditFloat(float *editIFloatTmpR, float *editIFloatTmpG, float *editIFloatTmpB, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) { + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + editIFloatTmpR[ti * tileSize + tj] = Color::gamma2curve[rtemp[ti * tileSize + tj]] / 65535.f; + editIFloatTmpG[ti * tileSize + tj] = Color::gamma2curve[gtemp[ti * tileSize + tj]] / 65535.f; + editIFloatTmpB[ti * tileSize + tj] = Color::gamma2curve[btemp[ti * tileSize + tj]] / 65535.f; + } + } +} +// end of helper function for rgbProc() + +} + namespace rtengine { @@ -3332,8 +3553,8 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer float chMixBG = float (params->chmixer.blue[1]); float chMixBB = float (params->chmixer.blue[2]); - int shHighlights = params->sh.highlights; - int shShadows = params->sh.shadows; + int shHighlights = params->sh.highlights / 100.f; + int shShadows = params->sh.shadows / 100.f; bool blackwhite = params->blackwhite.enabled; bool complem = params->blackwhite.enabledcc; float bwr = float (params->blackwhite.mixerRed); @@ -3501,15 +3722,13 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer float g = gtemp[ti * TS + tj]; float b = btemp[ti * TS + tj]; - double mapval = 1.0 + shmap->map[i][j]; - double factor = 1.0; + float mapval = 1.f + shmap->map[i][j]; + float factor = 1.f; - if (processSH) { - if (mapval > h_th) { - factor = (h_th + (100.0 - shHighlights) * (mapval - h_th) / 100.0) / mapval; - } else if (mapval < s_th) { - factor = (s_th - (100.0 - shShadows) * (s_th - mapval) / 100.0) / mapval; - } + if (mapval > h_th) { + factor = (1.f - shHighlights) + shHighlights * h_th / mapval; + } else if (mapval < s_th) { + factor = (s_th - (1.f - shShadows) * (s_th - mapval)) / mapval; } rtemp[ti * TS + tj] = factor * r; @@ -3519,61 +3738,8 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } } - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - - float r = rtemp[ti * TS + tj]; - float g = gtemp[ti * TS + tj]; - float b = btemp[ti * TS + tj]; - - //TODO: proper treatment of out-of-gamut colors - //float tonefactor = hltonecurve[(0.299f*r+0.587f*g+0.114f*b)]; - float tonefactor = ((r < MAXVALF ? hltonecurve[r] : CurveFactory::hlcurve (exp_scale, comp, hlrange, r) ) + - (g < MAXVALF ? hltonecurve[g] : CurveFactory::hlcurve (exp_scale, comp, hlrange, g) ) + - (b < MAXVALF ? hltonecurve[b] : CurveFactory::hlcurve (exp_scale, comp, hlrange, b) ) ) / 3.0; - - // note: tonefactor includes exposure scaling, that is here exposure slider and highlight compression takes place - rtemp[ti * TS + tj] = r * tonefactor; - gtemp[ti * TS + tj] = g * tonefactor; - btemp[ti * TS + tj] = b * tonefactor; - } - } - - for (int i = istart, ti = 0; i < tH; i++, ti++) { - vfloat cr = F2V(0.299f); - vfloat cg = F2V(0.587f); - vfloat cb = F2V(0.114f); - int j = jstart; - int tj = 0; -#ifdef __SSE2__ - for (; j < tW - 3; j+=4, tj+=4) { - - vfloat rv = LVF(rtemp[ti * TS + tj]); - vfloat gv = LVF(gtemp[ti * TS + tj]); - vfloat bv = LVF(btemp[ti * TS + tj]); - - //shadow tone curve - vfloat Yv = cr * rv + cg * gv + cb * bv; - vfloat tonefactorv = shtonecurve(Yv); - STVF(rtemp[ti * TS + tj], rv * tonefactorv); - STVF(gtemp[ti * TS + tj], gv * tonefactorv); - STVF(btemp[ti * TS + tj], bv * tonefactorv); - } -#endif - for (; j < tW; j++, tj++) { - - float r = rtemp[ti * TS + tj]; - float g = gtemp[ti * TS + tj]; - float b = btemp[ti * TS + tj]; - - //shadow tone curve - float Y = (0.299f * r + 0.587f * g + 0.114f * b); - float tonefactor = shtonecurve[Y]; - rtemp[ti * TS + tj] = rtemp[ti * TS + tj] * tonefactor; - gtemp[ti * TS + tj] = gtemp[ti * TS + tj] * tonefactor; - btemp[ti * TS + tj] = btemp[ti * TS + tj] * tonefactor; - } - } + highlightToneCurve(hltonecurve, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS, exp_scale, comp, hlrange); + shadowToneCurve(shtonecurve, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS); if (dcpProf) { dcpProf->step2ApplyTile (rtemp, gtemp, btemp, tW - jstart, tH - istart, TS, asIn); @@ -3581,22 +3747,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer for (int i = istart, ti = 0; i < tH; i++, ti++) { for (int j = jstart, tj = 0; j < tW; j++, tj++) { - float r = rtemp[ti * TS + tj]; - float g = gtemp[ti * TS + tj]; - float b = btemp[ti * TS + tj]; - - // clip out of gamut colors, without distorting color too bad - if (r < 0) { - r = 0; - } - - if (g < 0) { - g = 0; - } - - if (b < 0) { - b = 0; - } + // clip out of gamut colors, without distorting colour too bad + float r = std::max(rtemp[ti * TS + tj], 0.f); + float g = std::max(gtemp[ti * TS + tj], 0.f); + float b = std::max(btemp[ti * TS + tj], 0.f); if (r > 65535 || g > 65535 || b > 65535) { filmlike_clip (&r, &g, &b); @@ -3624,12 +3778,14 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } else { for (int i = istart, ti = 0; i < tH; i++, ti++) { int j = jstart, tj = 0; +#ifdef __SSE2__ for (; j < tW - 3; j+=4, tj+=4) { //brightness/contrast STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj]))); STVF(gtemp[ti * TS + tj], tonecurve(LVF(gtemp[ti * TS + tj]))); STVF(btemp[ti * TS + tj], tonecurve(LVF(btemp[ti * TS + tj]))); } +#endif for (; j < tW; j++, tj++) { //brightness/contrast rtemp[ti * TS + tj] = tonecurve[rtemp[ti * TS + tj]]; @@ -3640,133 +3796,19 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } if (editID == EUID_ToneCurve1) { // filling the pipette buffer - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - editIFloatTmpR[ti * TS + tj] = Color::gamma2curve[rtemp[ti * TS + tj]] / 65535.f; - editIFloatTmpG[ti * TS + tj] = Color::gamma2curve[gtemp[ti * TS + tj]] / 65535.f; - editIFloatTmpB[ti * TS + tj] = Color::gamma2curve[btemp[ti * TS + tj]] / 65535.f; - } - } + fillEditFloat(editIFloatTmpR, editIFloatTmpG, editIFloatTmpB, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS); } if (hasToneCurve1) { - if (curveMode == ToneCurveParams::TcMode::STD) { // Standard - for (int i = istart, ti = 0; i < tH; i++, ti++) { - const StandardToneCurve& userToneCurve = static_cast (customToneCurve1); - userToneCurve.BatchApply ( - 0, tW - jstart, - &rtemp[ti * TS], >emp[ti * TS], &btemp[ti * TS]); - } - } else if (curveMode == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - const AdobeToneCurve& userToneCurve = static_cast (customToneCurve1); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode == ToneCurveParams::TcMode::SATANDVALBLENDING) { // apply the curve on the saturation and value channels - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - const SatAndValueBlendingToneCurve& userToneCurve = static_cast (customToneCurve1); - rtemp[ti * TS + tj] = CLIP (rtemp[ti * TS + tj]); - gtemp[ti * TS + tj] = CLIP (gtemp[ti * TS + tj]); - btemp[ti * TS + tj] = CLIP (btemp[ti * TS + tj]); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode == ToneCurveParams::TcMode::WEIGHTEDSTD) { // apply the curve to the rgb channels, weighted - const WeightedStdToneCurve& userToneCurve = static_cast (customToneCurve1); - - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - rtemp[ti * TS + tj] = CLIP (rtemp[ti * TS + tj]); - gtemp[ti * TS + tj] = CLIP (gtemp[ti * TS + tj]); - btemp[ti * TS + tj] = CLIP (btemp[ti * TS + tj]); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode == ToneCurveParams::TcMode::LUMINANCE) { // apply the curve to the luminance channel - const LuminanceToneCurve& userToneCurve = static_cast (customToneCurve1); - - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - rtemp[ti * TS + tj] = CLIP (rtemp[ti * TS + tj]); - gtemp[ti * TS + tj] = CLIP (gtemp[ti * TS + tj]); - btemp[ti * TS + tj] = CLIP (btemp[ti * TS + tj]); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode == ToneCurveParams::TcMode::PERCEPTUAL) { // apply curve while keeping color appearance constant - const PerceptualToneCurve& userToneCurve = static_cast (customToneCurve1); - - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - rtemp[ti * TS + tj] = CLIP (rtemp[ti * TS + tj]); - gtemp[ti * TS + tj] = CLIP (gtemp[ti * TS + tj]); - btemp[ti * TS + tj] = CLIP (btemp[ti * TS + tj]); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], ptc1ApplyState); - } - } - } + customToneCurve(customToneCurve1, curveMode, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS, ptc1ApplyState); } if (editID == EUID_ToneCurve2) { // filling the pipette buffer - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - editIFloatTmpR[ti * TS + tj] = Color::gamma2curve[rtemp[ti * TS + tj]] / 65535.f; - editIFloatTmpG[ti * TS + tj] = Color::gamma2curve[gtemp[ti * TS + tj]] / 65535.f; - editIFloatTmpB[ti * TS + tj] = Color::gamma2curve[btemp[ti * TS + tj]] / 65535.f; - } - } + fillEditFloat(editIFloatTmpR, editIFloatTmpG, editIFloatTmpB, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS); } if (hasToneCurve2) { - if (curveMode2 == ToneCurveParams::TcMode::STD) { // Standard - for (int i = istart, ti = 0; i < tH; i++, ti++) { - const StandardToneCurve& userToneCurve = static_cast (customToneCurve2); - userToneCurve.BatchApply ( - 0, tW - jstart, - &rtemp[ti * TS], >emp[ti * TS], &btemp[ti * TS]); - } - } else if (curveMode2 == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - const AdobeToneCurve& userToneCurve = static_cast (customToneCurve2); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode2 == ToneCurveParams::TcMode::SATANDVALBLENDING) { // apply the curve on the saturation and value channels - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - const SatAndValueBlendingToneCurve& userToneCurve = static_cast (customToneCurve2); - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode2 == ToneCurveParams::TcMode::WEIGHTEDSTD) { // apply the curve to the rgb channels, weighted - const WeightedStdToneCurve& userToneCurve = static_cast (customToneCurve2); - - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode2 == ToneCurveParams::TcMode::LUMINANCE) { // apply the curve to the luminance channel - const LuminanceToneCurve& userToneCurve = static_cast (customToneCurve2); - - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } else if (curveMode2 == ToneCurveParams::TcMode::PERCEPTUAL) { // apply curve while keeping color appearance constant - const PerceptualToneCurve& userToneCurve = static_cast (customToneCurve2); - - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - userToneCurve.Apply (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], ptc2ApplyState); - } - } - } + customToneCurve(customToneCurve2, curveMode2, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS, ptc2ApplyState); } if (editID == EUID_RGB_R) { @@ -3978,20 +4020,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } if (isProPhoto) { // this is a hack to avoid the blue=>black bug (Issue 2141) - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - float r = rtemp[ti * TS + tj]; - float g = gtemp[ti * TS + tj]; - - if (r == 0.0f || g == 0.0f) { - float b = btemp[ti * TS + tj]; - float h, s, v; - Color::rgb2hsv (r, g, b, h, s, v); - s *= 0.99f; - Color::hsv2rgb (h, s, v, rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj]); - } - } - } + proPhotoBlue(rtemp, gtemp, btemp, istart, tH, jstart, tW, TS); } if (hasColorToning && !blackwhite) { @@ -4195,13 +4224,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer // filling the pipette buffer if (editID == EUID_BlackWhiteBeforeCurve) { - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - editIFloatTmpR[ti * TS + tj] = Color::gamma2curve[rtemp[ti * TS + tj]] / 65535.f; - editIFloatTmpG[ti * TS + tj] = Color::gamma2curve[gtemp[ti * TS + tj]] / 65535.f; - editIFloatTmpB[ti * TS + tj] = Color::gamma2curve[btemp[ti * TS + tj]] / 65535.f; - } - } + fillEditFloat(editIFloatTmpR, editIFloatTmpG, editIFloatTmpB, rtemp, gtemp, btemp, istart, tH, jstart, tW, TS); } else if (editID == EUID_BlackWhiteLuminance) { for (int i = istart, ti = 0; i < tH; i++, ti++) { for (int j = jstart, tj = 0; j < tW; j++, tj++) { @@ -4494,53 +4517,24 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer if (!blackwhite) { + if (editImgFloat || editWhatever) { + for (int i = istart, ti = 0; i < tH; i++, ti++) { + for (int j = jstart, tj = 0; j < tW; j++, tj++) { + + // filling the pipette buffer by the content of the temp pipette buffers + if (editImgFloat) { + editImgFloat->r (i, j) = editIFloatTmpR[ti * TS + tj]; + editImgFloat->g (i, j) = editIFloatTmpG[ti * TS + tj]; + editImgFloat->b (i, j) = editIFloatTmpB[ti * TS + tj]; + } else if (editWhatever) { + editWhatever->v (i, j) = editWhateverTmp[ti * TS + tj]; + } + } + } + } // ready, fill lab for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - - // filling the pipette buffer by the content of the temp pipette buffers - if (editImgFloat) { - editImgFloat->r (i, j) = editIFloatTmpR[ti * TS + tj]; - editImgFloat->g (i, j) = editIFloatTmpG[ti * TS + tj]; - editImgFloat->b (i, j) = editIFloatTmpB[ti * TS + tj]; - } else if (editWhatever) { - editWhatever->v (i, j) = editWhateverTmp[ti * TS + tj]; - } - - float r = rtemp[ti * TS + tj]; - float g = gtemp[ti * TS + tj]; - float b = btemp[ti * TS + tj]; - - float x = toxyz[0][0] * r + toxyz[0][1] * g + toxyz[0][2] * b; - float y = toxyz[1][0] * r + toxyz[1][1] * g + toxyz[1][2] * b; - float z = toxyz[2][0] * r + toxyz[2][1] * g + toxyz[2][2] * b; - - float fx, fy, fz; - - fx = (x < 65535.0f ? Color::cachef[x] : 327.68f * std::cbrt (x / MAXVALF)); - fy = (y < 65535.0f ? Color::cachef[y] : 327.68f * std::cbrt (y / MAXVALF)); - fz = (z < 65535.0f ? Color::cachef[z] : 327.68f * std::cbrt (z / MAXVALF)); - - lab->L[i][j] = (116.0f * fy - 5242.88f); //5242.88=16.0*327.68; - lab->a[i][j] = (500.0f * (fx - fy) ); - lab->b[i][j] = (200.0f * (fy - fz) ); - - //test for color accuracy - /* - float fy = (0.00862069 * lab->L[i][j])/327.68 + 0.137932; // (L+16)/116 - float fx = (0.002 * lab->a[i][j])/327.68 + fy; - float fz = fy - (0.005 * lab->b[i][j])/327.68; - - float x_ = 65535*Lab2xyz(fx)*Color::D50x; - float y_ = 65535*Lab2xyz(fy); - float z_ = 65535*Lab2xyz(fz)*Color::D50z; - - int R,G,B; - xyz2srgb(x_,y_,z_,R,G,B); - r=(float)R; g=(float)G; b=(float)B; - float xxx=1; - */ - } + Color::RGB2Lab(&rtemp[ti * TS], >emp[ti * TS], &btemp[ti * TS], &(lab->L[i][jstart]), &(lab->a[i][jstart]), &(lab->b[i][jstart]), toxyz, tW - jstart); } } else { // black & white // Auto channel mixer needs whole image, so we now copy to tmpImage and close the tiled processing @@ -4952,25 +4946,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer #endif for (int i = 0; i < tH; i++) { - for (int j = 0; j < tW; j++) { - float r = tmpImage->r (i, j); - float g = tmpImage->g (i, j); - float b = tmpImage->b (i, j); - - float x = toxyz[0][0] * r + toxyz[0][1] * g + toxyz[0][2] * b; - float y = toxyz[1][0] * r + toxyz[1][1] * g + toxyz[1][2] * b; - float z = toxyz[2][0] * r + toxyz[2][1] * g + toxyz[2][2] * b; - - float fx, fy, fz; - - fx = (x < MAXVALF ? Color::cachef[x] : 327.68f * std::cbrt (x / MAXVALF)); - fy = (y < MAXVALF ? Color::cachef[y] : 327.68f * std::cbrt (y / MAXVALF)); - fz = (z < MAXVALF ? Color::cachef[z] : 327.68f * std::cbrt (z / MAXVALF)); - - lab->L[i][j] = 116.0f * fy - 5242.88f; //5242.88=16.0*327.68; - lab->a[i][j] = 500.0f * (fx - fy); - lab->b[i][j] = 200.0f * (fy - fz); - } + Color::RGB2Lab(tmpImage->r(i), tmpImage->g(i), tmpImage->b(i), lab->L[i], lab->a[i], lab->b[i], toxyz, tW); }