From e4b955523e7a46882bd1982c962cfe2bc9e72467 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 15 Aug 2019 23:23:28 +0200 Subject: [PATCH] Capture sharpening: small speedup --- rtengine/color.h | 58 ++++++++++++++++++++++++++++++++++++-- rtengine/rawimagesource.cc | 10 ++----- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/rtengine/color.h b/rtengine/color.h index 22a648634..7b198f284 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -1806,9 +1806,37 @@ public: static inline void RGB2YCbCr(float R, float G, float B, float &Y, float &Cb, float &Cr) { Y = 0.2627f * R + 0.6780f * G + 0.0593f * B; - Cb = -0.2627f * R - 0.6780f * G + (1.f - 0.0593f) * B; - Cr = (1.f - 0.2627f) * R - 0.6780f * G - 0.0593f * B; + Cb = (1.f - 0.0593f) * B - (0.2627f * R + 0.6780f * G); + Cr = (1.f - 0.2627f) * R - (0.6780f * G + 0.0593f * B); + } + static inline void RGB2YCbCr(float* R, float* G, float* B, float* Y, float* Cb, float *Cr, float gamma, int W) { + gamma = 1.f / gamma; + int i = 0; +#ifdef __SSE2__ + const vfloat gammav = F2V(gamma); + const vfloat c1v = F2V(0.2627f); + const vfloat c2v = F2V(0.6780f); + const vfloat c3v = F2V(0.0593f); + const vfloat c4v = F2V(1.f - 0.0593f); + const vfloat c5v = F2V(1.f - 0.2627f); + for (; i < W - 3; i += 4) { + const vfloat Rv = vmaxf(LVFU(R[i]), ZEROV); + const vfloat Gv = vmaxf(LVFU(G[i]), ZEROV); + const vfloat Bv = vmaxf(LVFU(B[i]), ZEROV); + STVFU(Y[i], pow_F(c1v * Rv + c2v * Gv + c3v * Bv, gammav)); + STVFU(Cb[i], c4v * Bv - (c1v * Rv + c2v * Gv)); + STVFU(Cr[i], c5v * Rv - (c2v * Gv + c3v * Bv)); + } +#endif + for (; i < W; ++i) { + const float r = std::max(R[i], 0.f); + const float g = std::max(G[i], 0.f); + const float b = std::max(B[i], 0.f); + Y[i] = pow_F(0.2627f * r + 0.6780f * g + 0.0593f * b, gamma); + Cb[i] = (1.f - 0.0593f) * b - (0.2627f * r + 0.6780f * g); + Cr[i] = (1.f - 0.2627f) * r - (0.6780f * g + 0.0593f * b); + } } static inline void YCbCr2RGB(float Y, float Cb, float Cr, float &R, float &G, float &B) { @@ -1816,6 +1844,32 @@ public: G = std::max(Y - (0.0593f / 0.6780f) * Cb - (0.2627f / 0.6780f) * Cr, 0.f); B = std::max(Y + Cb, 0.f); } + + static inline void YCbCr2RGB(float* Y, float* Cb, float* Cr, float* R, float* G, float* B, float gamma, int W) { + int i = 0; +#ifdef __SSE2__ + const vfloat gammav = F2V(gamma); + const vfloat c1v = F2V(0.0593f / 0.6780f); + const vfloat c2v = F2V(0.2627f / 0.6780f); + + for (; i < W - 3; i += 4) { + const vfloat Yv = pow_F(LVFU(Y[i]), gammav); + const vfloat Crv = LVFU(Cr[i]); + const vfloat Cbv = LVFU(Cb[i]); + STVFU(R[i], vmaxf(Yv + Crv, ZEROV)); + STVFU(G[i], vmaxf(Yv - c1v * Cbv - c2v * Crv, ZEROV)); + STVFU(B[i], vmaxf(Yv + Cbv, ZEROV)); + } +#endif + for (; i < W; ++i) { + const float y = pow_F(Y[i], gamma); + const float cr = Cr[i]; + const float cb = Cb[i]; + R[i] = std::max(y + cr, 0.f); + G[i] = std::max(y - (0.0593f / 0.6780f) * cb - (0.2627f / 0.6780f) * cr, 0.f); + B[i] = std::max(y + cb, 0.f); + } + } }; } diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 96f161d9f..25beb6208 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -4998,10 +4998,7 @@ BENCHFUN #pragma omp parallel for for (int i = 0; i < H; ++i) { Color::RGB2L(red[i], green[i], blue[i], L[i], xyz_rgb, W); - for (int j = 0; j < W; ++j) { - Color::RGB2YCbCr(std::max(red[i][j], 0.f), std::max(green[i][j], 0.f), std::max(blue[i][j], 0.f), Y[i][j], Cb[i][j], Cr[i][j]); - Y[i][j] = pow_F(Y[i][j], 1.f / gamma); - } + Color::RGB2YCbCr(red[i], green[i], blue[i], Y[i], Cb[i], Cr[i], gamma, W); } // calculate contrast based blend factors to reduce sharpening in regions with low contrast JaggedArray blend(W, H); @@ -5015,10 +5012,7 @@ BENCHFUN StopWatch Stop2("Y2RGB"); #pragma omp parallel for for (int i = 0; i < H; ++i) { - for (int j = 0; j < W ; ++j) { - Y[i][j] = pow_F(Y[i][j], gamma); - Color::YCbCr2RGB(Y[i][j], Cb[i][j], Cr[i][j], red[i][j], green[i][j], blue[i][j]); - } + Color::YCbCr2RGB(Y[i], Cb[i], Cr[i], red[i], green[i], blue[i], gamma, W); } Stop2.stop(); }