From ea1e001d15bf3eeb14c60098287a1c364ba47c7b Mon Sep 17 00:00:00 2001 From: heckflosse Date: Thu, 4 Jan 2018 19:03:02 +0100 Subject: [PATCH] Speedup for 'Saturation and Value Blending' tonecurve --- rtengine/color.h | 42 ++++++++++++++--- rtengine/curves.h | 102 ++++++++++++++++++++++++++++++++++++------ rtengine/improcfun.cc | 29 ++++-------- 3 files changed, 134 insertions(+), 39 deletions(-) diff --git a/rtengine/color.h b/rtengine/color.h index 049defb70..f2092f530 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -298,6 +298,36 @@ public: } } + static inline void rgb2hsvtc(float r, float g, float b, float &h, float &s, float &v) + { + const float var_Min = min(r, g, b); + const float var_Max = max(r, g, b); + const float del_Max = var_Max - var_Min; + + v = var_Max / 65535.f; + + if (del_Max < 0.00001f) { + h = 0.f; + s = 0.f; + } else { + s = del_Max / var_Max; + + if (r == var_Max) { + h = (g - b) / del_Max; + } else if (g == var_Max) { + h = 2.f + (b - r) / del_Max; + } else { /*if ( b == var_Max ) */ + h = 4.f + (r - g) / del_Max; + } + + if (h < 0.f) { + h += 6.f; + } else if (h > 6.f) { + h -= 6.f; + } + } + } + /** * @brief Convert hue saturation value in red green blue * @param h hue channel [0 ; 1] @@ -312,14 +342,14 @@ public: static inline void hsv2rgbdcp (float h, float s, float v, float &r, float &g, float &b) { // special version for dcp which saves 1 division (in caller) and six multiplications (inside this function) - int sector = h; // sector 0 to 5, floor() is very slow, and h is always >0 - float f = h - sector; // fractional part of h + const int sector = h; // sector 0 to 5, floor() is very slow, and h is always > 0 + const float f = h - sector; // fractional part of h v *= 65535.f; - float vs = v * s; - float p = v - vs; - float q = v - f * vs; - float t = p + v - q; + const float vs = v * s; + const float p = v - vs; + const float q = v - f * vs; + const float t = p + v - q; switch (sector) { case 1: diff --git a/rtengine/curves.h b/rtengine/curves.h index e8b65c33d..f537bf6c5 100644 --- a/rtengine/curves.h +++ b/rtengine/curves.h @@ -828,8 +828,12 @@ class WeightedStdToneCurve : public ToneCurve { private: float Triangle(float refX, float refY, float X2) const; +#if defined( __SSE2__ ) && defined( __x86_64__ ) + vfloat Triangle(vfloat refX, vfloat refY, vfloat X2) const; +#endif public: void Apply(float& r, float& g, float& b) const; + void BatchApply(const size_t start, const size_t end, float *r, float *g, float *b) const; }; class LuminanceToneCurve : public ToneCurve @@ -1003,6 +1007,17 @@ inline float WeightedStdToneCurve::Triangle(float a, float a1, float b) const return a1; } +#if defined( __SSE2__ ) && defined( __x86_64__ ) +inline vfloat WeightedStdToneCurve::Triangle(vfloat a, vfloat a1, vfloat b) const +{ + vfloat a2 = a1 - a; + vmask cmask = vmaskf_lt(b, a); + vfloat b3 = vself(cmask, b, F2V(65535.f) - b); + vfloat a3 = vself(cmask, a, F2V(65535.f) - a); + return b + a2 * b3 / a3; +} +#endif + // Tone curve modifying the value channel only, preserving hue and saturation // values in 0xffff space inline void WeightedStdToneCurve::Apply (float& r, float& g, float& b) const @@ -1010,6 +1025,9 @@ inline void WeightedStdToneCurve::Apply (float& r, float& g, float& b) const assert (lutToneCurve); + r = CLIP(r); + g = CLIP(g); + b = CLIP(b); float r1 = lutToneCurve[r]; float g1 = Triangle(r, r1, g); float b1 = Triangle(r, r1, b); @@ -1022,11 +1040,70 @@ inline void WeightedStdToneCurve::Apply (float& r, float& g, float& b) const float r3 = Triangle(b, b3, r); float g3 = Triangle(b, b3, g); - r = CLIP( r1 * 0.50f + r2 * 0.25f + r3 * 0.25f); + r = CLIP(r1 * 0.50f + r2 * 0.25f + r3 * 0.25f); g = CLIP(g1 * 0.25f + g2 * 0.50f + g3 * 0.25f); b = CLIP(b1 * 0.25f + b2 * 0.25f + b3 * 0.50f); } +inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t end, float *r, float *g, float *b) const { + assert (lutToneCurve); + assert (lutToneCurve.getClip() & LUT_CLIP_BELOW); + assert (lutToneCurve.getClip() & LUT_CLIP_ABOVE); + + // All pointers must have the same alignment for SSE usage. In the loop body below, + // we will only check `r`, assuming that the same result would hold for `g` and `b`. + assert (reinterpret_cast(r) % 16 == reinterpret_cast(g) % 16); + assert (reinterpret_cast(g) % 16 == reinterpret_cast(b) % 16); + + size_t i = start; + while (true) { + if (i >= end) { + // If we get to the end before getting to an aligned address, just return. + // (Or, for non-SSE mode, if we get to the end.) + return; +#if defined( __SSE2__ ) && defined( __x86_64__ ) + } else if (reinterpret_cast(&r[i]) % 16 == 0) { + // Otherwise, we get to the first aligned address; go to the SSE part. + break; +#endif + } + Apply(r[i], g[i], b[i]); + i++; + } + +#if defined( __SSE2__ ) && defined( __x86_64__ ) + const vfloat c65535v = F2V(65535.f); + const vfloat zd5v = F2V(0.5f); + const vfloat zd25v = F2V(0.25f); + + for (; i + 3 < end; i += 4) { + vfloat r_val = LIMV(LVF(r[i]), ZEROV, c65535v); + vfloat g_val = LIMV(LVF(g[i]), ZEROV, c65535v); + vfloat b_val = LIMV(LVF(b[i]), ZEROV, c65535v); + vfloat r1 = lutToneCurve[r_val]; + vfloat g1 = Triangle(r_val, r1, g_val); + vfloat b1 = Triangle(r_val, r1, b_val); + + vfloat g2 = lutToneCurve[g_val]; + vfloat r2 = Triangle(g_val, g2, r_val); + vfloat b2 = Triangle(g_val, g2, b_val); + + vfloat b3 = lutToneCurve[b_val]; + vfloat r3 = Triangle(b_val, b3, r_val); + vfloat g3 = Triangle(b_val, b3, g_val); + + STVF(r[i], LIMV(r1 * zd5v + r2 * zd25v + r3 * zd25v, ZEROV, c65535v)); + STVF(g[i], LIMV(g1 * zd25v + g2 * zd5v + g3 * zd25v, ZEROV, c65535v)); + STVF(b[i], LIMV(b1 * zd25v + b2 * zd25v + b3 * zd5v, ZEROV, c65535v)); + } + + // Remainder in non-SSE. + for (; i < end; ++i) { + Apply(r[i], g[i], b[i]); + } +#endif +} + // Tone curve modifying the value channel only, preserving hue and saturation // values in 0xffff space inline void SatAndValueBlendingToneCurve::Apply (float& r, float& g, float& b) const @@ -1034,29 +1111,28 @@ inline void SatAndValueBlendingToneCurve::Apply (float& r, float& g, float& b) c assert (lutToneCurve); + r = CLIP(r); + g = CLIP(g); + b = CLIP(b); + + const float lum = (r + g + b) / 3.f; + const float newLum = lutToneCurve[lum]; + float h, s, v; - float lum = (r + g + b) / 3.f; - //float lum = Color::rgbLuminance(r, g, b); - float newLum = lutToneCurve[lum]; - - if (newLum == lum) { - return; - } - - Color::rgb2hsv(r, g, b, h, s, v); + Color::rgb2hsvtc(r, g, b, h, s, v); float dV; if (newLum > lum) { // Linearly targeting Value = 1 and Saturation = 0 - float coef = (newLum - lum) / (65535.f - lum); + const float coef = (newLum - lum) / (65535.f - lum); dV = (1.f - v) * coef; s *= 1.f - coef; } else { // Linearly targeting Value = 0 - float coef = (newLum - lum) / lum ; + const float coef = (newLum - lum) / lum ; dV = v * coef; } - Color::hsv2rgb(h, s, v + dV, r, g, b); + Color::hsv2rgbdcp(h, s, v + dV, r, g, b); } } diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 6a39befef..34c43be0e 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -197,39 +197,28 @@ void proPhotoBlue(float *rtemp, float *gtemp, float *btemp, int istart, int tH, void customToneCurve(const ToneCurve &customToneCurve, ToneCurveParams::TcMode curveMode, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, PerceptualToneCurveState ptcApplyState) { if (curveMode == ToneCurveParams::TcMode::STD) { // Standard + const StandardToneCurve& userToneCurve = static_cast (customToneCurve); for (int i = istart, ti = 0; i < tH; i++, ti++) { - const StandardToneCurve& userToneCurve = static_cast (customToneCurve); - userToneCurve.BatchApply ( - 0, tW - jstart, - &rtemp[ti * tileSize], >emp[ti * tileSize], &btemp[ti * tileSize]); + userToneCurve.BatchApply(0, tW - jstart, &rtemp[ti * tileSize], >emp[ti * tileSize], &btemp[ti * tileSize]); } } else if (curveMode == ToneCurveParams::TcMode::FILMLIKE) { // Adobe like + const AdobeToneCurve& userToneCurve = static_cast (customToneCurve); for (int i = istart, ti = 0; i < tH; i++, ti++) { for (int j = jstart, tj = 0; j < tW; j++, tj++) { - const AdobeToneCurve& userToneCurve = static_cast (customToneCurve); - userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + userToneCurve.Apply(rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); } } } else if (curveMode == ToneCurveParams::TcMode::SATANDVALBLENDING) { // apply the curve on the saturation and value channels + const SatAndValueBlendingToneCurve& userToneCurve = static_cast (customToneCurve); for (int i = istart, ti = 0; i < tH; i++, ti++) { for (int j = jstart, tj = 0; j < tW; j++, tj++) { - const SatAndValueBlendingToneCurve& userToneCurve = static_cast (customToneCurve); - rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); - gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); - btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); - userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + userToneCurve.Apply(rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); } } } else if (curveMode == ToneCurveParams::TcMode::WEIGHTEDSTD) { // apply the curve to the rgb channels, weighted const WeightedStdToneCurve& userToneCurve = static_cast (customToneCurve); - for (int i = istart, ti = 0; i < tH; i++, ti++) { - for (int j = jstart, tj = 0; j < tW; j++, tj++) { - rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); - gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); - btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); - userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); - } + userToneCurve.BatchApply(0, tW - jstart, &rtemp[ti * tileSize], >emp[ti * tileSize], &btemp[ti * tileSize]); } } else if (curveMode == ToneCurveParams::TcMode::LUMINANCE) { // apply the curve to the luminance channel const LuminanceToneCurve& userToneCurve = static_cast (customToneCurve); @@ -239,7 +228,7 @@ void customToneCurve(const ToneCurve &customToneCurve, ToneCurveParams::TcMode c rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); - userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); + userToneCurve.Apply(rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj]); } } } else if (curveMode == ToneCurveParams::TcMode::PERCEPTUAL) { // apply curve while keeping color appearance constant @@ -250,7 +239,7 @@ void customToneCurve(const ToneCurve &customToneCurve, ToneCurveParams::TcMode c rtemp[ti * tileSize + tj] = CLIP (rtemp[ti * tileSize + tj]); gtemp[ti * tileSize + tj] = CLIP (gtemp[ti * tileSize + tj]); btemp[ti * tileSize + tj] = CLIP (btemp[ti * tileSize + tj]); - userToneCurve.Apply (rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj], ptcApplyState); + userToneCurve.Apply(rtemp[ti * tileSize + tj], gtemp[ti * tileSize + tj], btemp[ti * tileSize + tj], ptcApplyState); } } }