From b4f68adb64d2bc91b0ade144c9745b9f6a5bbba7 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 25 Oct 2020 13:46:34 +0100 Subject: [PATCH] iplab2rgb.cc: speedup for copyAndClamp(), #5964 --- rtengine/color.cc | 51 --------------------------------- rtengine/color.h | 47 ++++++++++++++++++++++++++++--- rtengine/iplab2rgb.cc | 65 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 97 insertions(+), 66 deletions(-) diff --git a/rtengine/color.cc b/rtengine/color.cc index 5761733f0..23f1ad9f6 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -1015,23 +1015,6 @@ void Color::xyz2r (float x, float y, float z, float &r, const double rgb_xyz[3][ r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ; } -// same for float -void Color::xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const float rgb_xyz[3][3]) -{ - r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ; - g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ; - b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ; -} - -#ifdef __SSE2__ -void Color::xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3]) -{ - r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ; - g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ; - b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ; -} -#endif // __SSE2__ - #ifdef __SSE2__ void Color::trcGammaBW (float &r, float &g, float &b, float gammabwr, float gammabwg, float gammabwb) { @@ -1646,19 +1629,6 @@ void Color::gammanf2lut (LUTf &gammacurve, float gamma, float divisor, float fac #endif } -void Color::Lab2XYZ(float L, float a, float b, float &x, float &y, float &z) -{ - float LL = L / 327.68f; - float aa = a / 327.68f; - float bb = b / 327.68f; - float fy = (c1By116 * LL) + c16By116; // (L+16)/116 - float fx = (0.002f * aa) + fy; - float fz = fy - (0.005f * bb); - x = 65535.0f * f2xyz(fx) * D50x; - z = 65535.0f * f2xyz(fz) * D50z; - y = (LL > epskap) ? 65535.0f * fy * fy * fy : 65535.0f * LL / kappa; -} - float Color::L2Y(float L) { const float LL = L / 327.68f; @@ -1676,27 +1646,6 @@ void Color::L2XYZ(float L, float &x, float &y, float &z) // for black & white y = (LL > epskap) ? 65535.0f * fy * fy * fy : 65535.0f * LL / kappa; } - -#ifdef __SSE2__ -void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z) -{ - vfloat c327d68 = F2V(327.68f); - L /= c327d68; - a /= c327d68; - b /= c327d68; - vfloat fy = F2V(c1By116) * L + F2V(c16By116); - vfloat fx = F2V(0.002f) * a + fy; - vfloat fz = fy - (F2V(0.005f) * b); - vfloat c65535 = F2V(65535.f); - x = c65535 * f2xyz(fx) * F2V(D50x); - z = c65535 * f2xyz(fz) * F2V(D50z); - vfloat res1 = fy * fy * fy; - vfloat res2 = L / F2V(kappa); - y = vself(vmaskf_gt(L, F2V(epskap)), res1, res2); - y *= c65535; -} -#endif // __SSE2__ - inline float Color::computeXYZ2Lab(float f) { if (f < 0.f) { diff --git a/rtengine/color.h b/rtengine/color.h index 704871d39..b6bf60818 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -570,9 +570,20 @@ public: */ static void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const double rgb_xyz[3][3]); static void xyz2r (float x, float y, float z, float &r, const double rgb_xyz[3][3]); - static void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const float rgb_xyz[3][3]); + static inline void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const float rgb_xyz[3][3]) + { + r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ; + g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ; + b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ; + } + #ifdef __SSE2__ - static void xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3]); + static inline void xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3]) + { + r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ; + g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ; + b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ; + } #endif @@ -603,12 +614,40 @@ public: * @param y Y coordinate [0 ; 65535] ; can be negative! (return value) * @param z Z coordinate [0 ; 65535] ; can be negative! (return value) */ - static void Lab2XYZ(float L, float a, float b, float &x, float &y, float &z); + static inline void Lab2XYZ(float L, float a, float b, float &x, float &y, float &z) + { + float LL = L / 327.68f; + float aa = a / 327.68f; + float bb = b / 327.68f; + float fy = (c1By116 * LL) + c16By116; // (L+16)/116 + float fx = (0.002f * aa) + fy; + float fz = fy - (0.005f * bb); + x = 65535.f * f2xyz(fx) * D50x; + z = 65535.f * f2xyz(fz) * D50z; + y = (LL > epskapf) ? 65535.f * fy * fy * fy : 65535.f * LL / kappaf; + } + static void L2XYZ(float L, float &x, float &y, float &z); static float L2Y(float L); #ifdef __SSE2__ - static void Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z); +static inline void Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z) +{ + vfloat c327d68 = F2V(327.68f); + L /= c327d68; + a /= c327d68; + b /= c327d68; + vfloat fy = F2V(c1By116) * L + F2V(c16By116); + vfloat fx = F2V(0.002f) * a + fy; + vfloat fz = fy - (F2V(0.005f) * b); + vfloat c65535 = F2V(65535.f); + x = c65535 * f2xyz(fx) * F2V(D50x); + z = c65535 * f2xyz(fz) * F2V(D50z); + vfloat res1 = fy * fy * fy; + vfloat res2 = L / F2V(kappa); + y = vself(vmaskf_gt(L, F2V(epskap)), res1, res2); + y *= c65535; +} #endif // __SSE2__ /** diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index af7c454a3..dd89301ae 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -32,8 +32,6 @@ namespace rtengine { -extern void filmlike_clip(float *r, float *g, float *b); - namespace { inline void copyAndClampLine(const float *src, unsigned char *dst, const int W) @@ -46,9 +44,26 @@ inline void copyAndClampLine(const float *src, unsigned char *dst, const int W) inline void copyAndClamp(const LabImage *src, unsigned char *dst, const double rgb_xyz[3][3], bool multiThread) { - int W = src->W; - int H = src->H; + const int W = src->W; + const int H = src->H; + float rgb_xyzf[3][3]; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + rgb_xyzf[i][j] = rgb_xyz[i][j]; + } + } + +#ifdef __SSE2__ + vfloat rgb_xyzv[3][3]; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + rgb_xyzv[i][j] = F2V(rgb_xyzf[i][j]); + } + } +#endif #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) if (multiThread) #endif @@ -58,17 +73,47 @@ inline void copyAndClamp(const LabImage *src, unsigned char *dst, const double r float* rb = src->b[i]; int ix = i * 3 * W; - float R, G, B; - float x_, y_, z_; - - for (int j = 0; j < W; ++j) { +#ifdef __SSE2__ + float rbuffer[W] ALIGNED16; + float gbuffer[W] ALIGNED16; + float bbuffer[W] ALIGNED16; + int j = 0; + for (; j < W - 3; j += 4) { + vfloat R, G, B; + vfloat x_, y_, z_; + Color::Lab2XYZ(LVFU(rL[j]), LVFU(ra[j]), LVFU(rb[j]), x_, y_, z_ ); + Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzv); + STVF(rbuffer[j], Color::gamma2curve[R]); + STVF(gbuffer[j], Color::gamma2curve[G]); + STVF(bbuffer[j], Color::gamma2curve[B]); + } + for (; j < W; ++j) { + float R, G, B; + float x_, y_, z_; Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ ); - Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyz); + Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzf); + rbuffer[j] = Color::gamma2curve[R]; + gbuffer[j] = Color::gamma2curve[G]; + bbuffer[j] = Color::gamma2curve[B]; + } + for (int j = 0; j < W; ++j) { + dst[ix++] = uint16ToUint8Rounded(rbuffer[j]); + dst[ix++] = uint16ToUint8Rounded(gbuffer[j]); + dst[ix++] = uint16ToUint8Rounded(bbuffer[j]); + } + +#else + for (int j = 0; j < W; ++j) { + float R, G, B; + float x_, y_, z_; + Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ ); + Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzf); dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[R]); dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[G]); dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[B]); } +#endif } } @@ -151,8 +196,6 @@ void ImProcFunctions::lab2monitorRgb(LabImage* lab, Image8* image) // otherwise divide by 327.68, convert to xyz and apply the RGB transform, before converting with gamma2curve Image8* ImProcFunctions::lab2rgb(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm, bool consider_histogram_settings) { - //gamutmap(lab); - if (cx < 0) { cx = 0; }