From 84614a11e82bc2bc2ccbb8366503929e97101976 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 28 Aug 2016 14:47:01 +0200 Subject: [PATCH] B&W: Speedup for Color::trcGammaBW --- rtengine/color.cc | 40 ++++++++++++++++---- rtengine/color.h | 3 ++ rtengine/improcfun.cc | 87 +++++++++++++++++-------------------------- 3 files changed, 70 insertions(+), 60 deletions(-) diff --git a/rtengine/color.cc b/rtengine/color.cc index e6afbd86e..66887f550 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -888,7 +888,7 @@ void Color::xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat void Color::trcGammaBW (float &r, float &g, float &b, float gammabwr, float gammabwg, float gammabwb) { // correct gamma for black and white image : pseudo TRC curve of ICC profil - vfloat rgbv = _mm_set_ps(0.f, b, g, r); + vfloat rgbv = _mm_set_ps(0.f, r, r, r); // input channel is always r vfloat gammabwv = _mm_set_ps(0.f, gammabwb, gammabwg, gammabwr); vfloat c65535v = F2V(65535.f); rgbv /= c65535v; @@ -901,19 +901,45 @@ void Color::trcGammaBW (float &r, float &g, float &b, float gammabwr, float gamm g = temp[1]; b = temp[2]; } +void Color::trcGammaBWRow (float *r, float *g, float *b, int width, float gammabwr, float gammabwg, float gammabwb) +{ + // correct gamma for black and white image : pseudo TRC curve of ICC profil + vfloat c65535v = F2V(65535.f); + vfloat gammabwrv = F2V(gammabwr); + vfloat gammabwgv = F2V(gammabwg); + vfloat gammabwbv = F2V(gammabwb); + int i = 0; + for(; i < width - 3; i += 4 ) { + vfloat inv = _mm_loadu_ps(&r[i]); // input channel is always r + inv /= c65535v; + inv = vmaxf(inv, ZEROV); + vfloat rv = pow_F(inv, gammabwrv); + vfloat gv = pow_F(inv, gammabwgv); + vfloat bv = pow_F(inv, gammabwbv); + rv *= c65535v; + gv *= c65535v; + bv *= c65535v; + _mm_storeu_ps(&r[i], rv); + _mm_storeu_ps(&g[i], gv); + _mm_storeu_ps(&b[i], bv); + } + for(; i < width; i++) { + trcGammaBW(r[i], g[i], b[i], gammabwr, gammabwg, gammabwb); + } +} #else void Color::trcGammaBW (float &r, float &g, float &b, float gammabwr, float gammabwg, float gammabwb) { // correct gamma for black and white image : pseudo TRC curve of ICC profil - b /= 65535.0f; - b = pow_F (max(b, 0.0f), gammabwb); + float in = r; // input channel is always r + in /= 65535.0f; + in = max(in, 0.f); + b = pow_F (in, gammabwb); b *= 65535.0f; - r /= 65535.0f; - r = pow_F (max(r, 0.0f), gammabwr); + r = pow_F (in, gammabwr); r *= 65535.0f; - g /= 65535.0f; - g = pow_F (max(g, 0.0f), gammabwg); + g = pow_F (in, gammabwg); g *= 65535.0f; } #endif diff --git a/rtengine/color.h b/rtengine/color.h index c61861421..e9b38c509 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -894,6 +894,9 @@ public: * @param gammabwb gamma value for red channel [>0] */ static void trcGammaBW (float &r, float &g, float &b, float gammabwr, float gammabwg, float gammabwb); +#ifdef __SSE2__ + static void trcGammaBWRow (float *r, float *g, float *b, int width, float gammabwr, float gammabwg, float gammabwb); +#endif /** @brief Compute the B&W constants for the Black and White processing and its GUI diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 2fadfabe4..7fcee7512 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -4114,15 +4114,23 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer // -------------------------------------------------- +#ifndef __SSE2__ //gamma correction: pseudo TRC curve if (hasgammabw) { Color::trcGammaBW (r, g, b, gammabwr, gammabwg, gammabwb); } - +#endif rtemp[ti * TS + tj] = r; gtemp[ti * TS + tj] = g; btemp[ti * TS + tj] = b; } +#ifdef __SSE2__ + if (hasgammabw) { + //gamma correction: pseudo TRC curve + Color::trcGammaBWRow (&rtemp[ti * TS], >emp[ti * TS], &btemp[ti * TS], tW - jstart, gammabwr, gammabwg, gammabwb); + } +#endif + } } else if (algm == 1) { //Luminance mixer in Lab mode to avoid artifacts for (int i = istart, ti = 0; i < tH; i++, ti++) { @@ -4178,12 +4186,19 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer float newRed; // We use the red channel for bw Color::xyz2r(X, Y, Z, newRed, wip); rtemp[ti * TS + tj] = gtemp[ti * TS + tj] = btemp[ti * TS + tj] = newRed; - +#ifndef __SSE2__ if (hasgammabw) { //gamma correction: pseudo TRC curve Color::trcGammaBW (rtemp[ti * TS + tj], gtemp[ti * TS + tj], btemp[ti * TS + tj], gammabwr, gammabwg, gammabwb); } +#endif } +#ifdef __SSE2__ + if (hasgammabw) { + //gamma correction: pseudo TRC curve + Color::trcGammaBWRow (&rtemp[ti * TS], >emp[ti * TS], &btemp[ti * TS], tW - jstart, gammabwr, gammabwg, gammabwb); + } +#endif } } } @@ -4389,13 +4404,10 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer if (algm == 2) { //channel-mixer //end auto chmix - float mix[3][3]; - if (computeMixerAuto) { // auto channel-mixer - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic, 5) reduction(+:nr,ng,nb) + #pragma omp parallel for schedule(dynamic, 16) reduction(+:nr,ng,nb) #endif for (int i = 0; i < tH; i++) { @@ -4434,44 +4446,29 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer bwr, bwg, bwb, mixerOrange, mixerYellow, mixerCyan, mixerPurple, mixerMagenta, params->blackwhite.autoc, complem, kcorec, rrm, ggm, bbm); - mix[0][0] = bwr; - mix[1][0] = bwr; - mix[2][0] = bwr; - mix[0][1] = bwg; - mix[1][1] = bwg; - mix[2][1] = bwg; - mix[0][2] = bwb; - mix[1][2] = bwb; - mix[2][2] = bwb; - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic, 5) + #pragma omp parallel for schedule(dynamic, 16) #endif for (int i = 0; i < tH; i++) { - float in[3], val[3]; - for (int j = 0; j < tW; j++) { - in[0] = tmpImage->r(i, j); - in[1] = tmpImage->g(i, j); - in[2] = tmpImage->b(i, j); //mix channel - for (int end = 0; end < 3 ; end++) { - val[end] = 0.f; - - for (int beg = 0; beg < 3 ; beg++) { - val[end] += mix[end][beg] * in[beg]; - } - } - - tmpImage->r(i, j) = tmpImage->g(i, j) = tmpImage->b(i, j) = CLIP(val[0] * kcorec); + tmpImage->r(i, j) = tmpImage->g(i, j) = tmpImage->b(i, j) = CLIP((bwr * tmpImage->r(i, j) + bwg * tmpImage->g(i, j) + bwb * tmpImage->b(i, j)) * kcorec); +#ifndef __SSE2__ //gamma correction: pseudo TRC curve if (hasgammabw) { Color::trcGammaBW (tmpImage->r(i, j), tmpImage->g(i, j), tmpImage->b(i, j), gammabwr, gammabwg, gammabwb); } +#endif } +#ifdef __SSE2__ + if (hasgammabw) { + //gamma correction: pseudo TRC curve + Color::trcGammaBWRow (tmpImage->r(i), tmpImage->g(i), tmpImage->b(i), tW, gammabwr, gammabwg, gammabwb); + } +#endif } } @@ -4774,29 +4771,13 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer float fx, fy, fz; - fx = (x < 65535.0f ? Color::cachef[std::max(x, 0.f)] : 327.68f * std::cbrt(x / MAXVALF)); - fy = (y < 65535.0f ? Color::cachef[std::max(y, 0.f)] : 327.68f * std::cbrt(y / MAXVALF)); - fz = (z < 65535.0f ? Color::cachef[std::max(z, 0.f)] : 327.68f * std::cbrt(z / MAXVALF)); - - lab->L[i][j] = (116.0f * fy - 5242.88f); //5242.88=16.0*327.68; - lab->a[i][j] = (500.0f * (fx - fy) ); - lab->b[i][j] = (200.0f * (fy - fz) ); - - - //test for color accuracy - /*float fy = (0.00862069 * lab->L[i][j])/327.68 + 0.137932; // (L+16)/116 - float fx = (0.002 * lab->a[i][j])/327.68 + fy; - float fz = fy - (0.005 * lab->b[i][j])/327.68; - - float x_ = 65535*Lab2xyz(fx)*Color::D50x; - float y_ = 65535*Lab2xyz(fy); - float z_ = 65535*Lab2xyz(fz)*Color::D50z; - - int R,G,B; - xyz2srgb(x_,y_,z_,R,G,B); - r=(float)R; g=(float)G; b=(float)B; - float xxx=1;*/ + fx = (x < MAXVALF ? Color::cachef[x] : 327.68f * std::cbrt(x / MAXVALF)); + fy = (y < MAXVALF ? Color::cachef[y] : 327.68f * std::cbrt(y / MAXVALF)); + fz = (z < MAXVALF ? Color::cachef[z] : 327.68f * std::cbrt(z / MAXVALF)); + lab->L[i][j] = 116.0f * fy - 5242.88f; //5242.88=16.0*327.68; + lab->a[i][j] = 500.0f * (fx - fy); + lab->b[i][j] = 200.0f * (fy - fz); } }