iplab2rgb.cc: speedup for copyAndClamp(), #5964

This commit is contained in:
Ingo Weyrich
2020-10-25 13:46:34 +01:00
parent fd9f3246f0
commit b4f68adb64
3 changed files with 97 additions and 66 deletions

View File

@@ -32,8 +32,6 @@
namespace rtengine
{
extern void filmlike_clip(float *r, float *g, float *b);
namespace {
inline void copyAndClampLine(const float *src, unsigned char *dst, const int W)
@@ -46,9 +44,26 @@ inline void copyAndClampLine(const float *src, unsigned char *dst, const int W)
inline void copyAndClamp(const LabImage *src, unsigned char *dst, const double rgb_xyz[3][3], bool multiThread)
{
int W = src->W;
int H = src->H;
const int W = src->W;
const int H = src->H;
float rgb_xyzf[3][3];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
rgb_xyzf[i][j] = rgb_xyz[i][j];
}
}
#ifdef __SSE2__
vfloat rgb_xyzv[3][3];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
rgb_xyzv[i][j] = F2V(rgb_xyzf[i][j]);
}
}
#endif
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) if (multiThread)
#endif
@@ -58,17 +73,47 @@ inline void copyAndClamp(const LabImage *src, unsigned char *dst, const double r
float* rb = src->b[i];
int ix = i * 3 * W;
float R, G, B;
float x_, y_, z_;
for (int j = 0; j < W; ++j) {
#ifdef __SSE2__
float rbuffer[W] ALIGNED16;
float gbuffer[W] ALIGNED16;
float bbuffer[W] ALIGNED16;
int j = 0;
for (; j < W - 3; j += 4) {
vfloat R, G, B;
vfloat x_, y_, z_;
Color::Lab2XYZ(LVFU(rL[j]), LVFU(ra[j]), LVFU(rb[j]), x_, y_, z_ );
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzv);
STVF(rbuffer[j], Color::gamma2curve[R]);
STVF(gbuffer[j], Color::gamma2curve[G]);
STVF(bbuffer[j], Color::gamma2curve[B]);
}
for (; j < W; ++j) {
float R, G, B;
float x_, y_, z_;
Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ );
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyz);
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzf);
rbuffer[j] = Color::gamma2curve[R];
gbuffer[j] = Color::gamma2curve[G];
bbuffer[j] = Color::gamma2curve[B];
}
for (int j = 0; j < W; ++j) {
dst[ix++] = uint16ToUint8Rounded(rbuffer[j]);
dst[ix++] = uint16ToUint8Rounded(gbuffer[j]);
dst[ix++] = uint16ToUint8Rounded(bbuffer[j]);
}
#else
for (int j = 0; j < W; ++j) {
float R, G, B;
float x_, y_, z_;
Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ );
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzf);
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[R]);
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[G]);
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[B]);
}
#endif
}
}
@@ -151,8 +196,6 @@ void ImProcFunctions::lab2monitorRgb(LabImage* lab, Image8* image)
// otherwise divide by 327.68, convert to xyz and apply the RGB transform, before converting with gamma2curve
Image8* ImProcFunctions::lab2rgb(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm, bool consider_histogram_settings)
{
//gamutmap(lab);
if (cx < 0) {
cx = 0;
}