iplab2rgb.cc: speedup for copyAndClamp(), #5964
This commit is contained in:
parent
fd9f3246f0
commit
b4f68adb64
@ -1015,23 +1015,6 @@ void Color::xyz2r (float x, float y, float z, float &r, const double rgb_xyz[3][
|
|||||||
r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ;
|
r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
// same for float
|
|
||||||
void Color::xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const float rgb_xyz[3][3])
|
|
||||||
{
|
|
||||||
r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ;
|
|
||||||
g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ;
|
|
||||||
b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __SSE2__
|
|
||||||
void Color::xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3])
|
|
||||||
{
|
|
||||||
r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ;
|
|
||||||
g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ;
|
|
||||||
b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ;
|
|
||||||
}
|
|
||||||
#endif // __SSE2__
|
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
void Color::trcGammaBW (float &r, float &g, float &b, float gammabwr, float gammabwg, float gammabwb)
|
void Color::trcGammaBW (float &r, float &g, float &b, float gammabwr, float gammabwg, float gammabwb)
|
||||||
{
|
{
|
||||||
@ -1646,19 +1629,6 @@ void Color::gammanf2lut (LUTf &gammacurve, float gamma, float divisor, float fac
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void Color::Lab2XYZ(float L, float a, float b, float &x, float &y, float &z)
|
|
||||||
{
|
|
||||||
float LL = L / 327.68f;
|
|
||||||
float aa = a / 327.68f;
|
|
||||||
float bb = b / 327.68f;
|
|
||||||
float fy = (c1By116 * LL) + c16By116; // (L+16)/116
|
|
||||||
float fx = (0.002f * aa) + fy;
|
|
||||||
float fz = fy - (0.005f * bb);
|
|
||||||
x = 65535.0f * f2xyz(fx) * D50x;
|
|
||||||
z = 65535.0f * f2xyz(fz) * D50z;
|
|
||||||
y = (LL > epskap) ? 65535.0f * fy * fy * fy : 65535.0f * LL / kappa;
|
|
||||||
}
|
|
||||||
|
|
||||||
float Color::L2Y(float L)
|
float Color::L2Y(float L)
|
||||||
{
|
{
|
||||||
const float LL = L / 327.68f;
|
const float LL = L / 327.68f;
|
||||||
@ -1676,27 +1646,6 @@ void Color::L2XYZ(float L, float &x, float &y, float &z) // for black & white
|
|||||||
y = (LL > epskap) ? 65535.0f * fy * fy * fy : 65535.0f * LL / kappa;
|
y = (LL > epskap) ? 65535.0f * fy * fy * fy : 65535.0f * LL / kappa;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef __SSE2__
|
|
||||||
void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z)
|
|
||||||
{
|
|
||||||
vfloat c327d68 = F2V(327.68f);
|
|
||||||
L /= c327d68;
|
|
||||||
a /= c327d68;
|
|
||||||
b /= c327d68;
|
|
||||||
vfloat fy = F2V(c1By116) * L + F2V(c16By116);
|
|
||||||
vfloat fx = F2V(0.002f) * a + fy;
|
|
||||||
vfloat fz = fy - (F2V(0.005f) * b);
|
|
||||||
vfloat c65535 = F2V(65535.f);
|
|
||||||
x = c65535 * f2xyz(fx) * F2V(D50x);
|
|
||||||
z = c65535 * f2xyz(fz) * F2V(D50z);
|
|
||||||
vfloat res1 = fy * fy * fy;
|
|
||||||
vfloat res2 = L / F2V(kappa);
|
|
||||||
y = vself(vmaskf_gt(L, F2V(epskap)), res1, res2);
|
|
||||||
y *= c65535;
|
|
||||||
}
|
|
||||||
#endif // __SSE2__
|
|
||||||
|
|
||||||
inline float Color::computeXYZ2Lab(float f)
|
inline float Color::computeXYZ2Lab(float f)
|
||||||
{
|
{
|
||||||
if (f < 0.f) {
|
if (f < 0.f) {
|
||||||
|
@ -570,9 +570,20 @@ public:
|
|||||||
*/
|
*/
|
||||||
static void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const double rgb_xyz[3][3]);
|
static void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const double rgb_xyz[3][3]);
|
||||||
static void xyz2r (float x, float y, float z, float &r, const double rgb_xyz[3][3]);
|
static void xyz2r (float x, float y, float z, float &r, const double rgb_xyz[3][3]);
|
||||||
static void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const float rgb_xyz[3][3]);
|
static inline void xyz2rgb (float x, float y, float z, float &r, float &g, float &b, const float rgb_xyz[3][3])
|
||||||
|
{
|
||||||
|
r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ;
|
||||||
|
g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ;
|
||||||
|
b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
static void xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3]);
|
static inline void xyz2rgb (vfloat x, vfloat y, vfloat z, vfloat &r, vfloat &g, vfloat &b, const vfloat rgb_xyz[3][3])
|
||||||
|
{
|
||||||
|
r = ((rgb_xyz[0][0] * x + rgb_xyz[0][1] * y + rgb_xyz[0][2] * z)) ;
|
||||||
|
g = ((rgb_xyz[1][0] * x + rgb_xyz[1][1] * y + rgb_xyz[1][2] * z)) ;
|
||||||
|
b = ((rgb_xyz[2][0] * x + rgb_xyz[2][1] * y + rgb_xyz[2][2] * z)) ;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -603,12 +614,40 @@ public:
|
|||||||
* @param y Y coordinate [0 ; 65535] ; can be negative! (return value)
|
* @param y Y coordinate [0 ; 65535] ; can be negative! (return value)
|
||||||
* @param z Z coordinate [0 ; 65535] ; can be negative! (return value)
|
* @param z Z coordinate [0 ; 65535] ; can be negative! (return value)
|
||||||
*/
|
*/
|
||||||
static void Lab2XYZ(float L, float a, float b, float &x, float &y, float &z);
|
static inline void Lab2XYZ(float L, float a, float b, float &x, float &y, float &z)
|
||||||
|
{
|
||||||
|
float LL = L / 327.68f;
|
||||||
|
float aa = a / 327.68f;
|
||||||
|
float bb = b / 327.68f;
|
||||||
|
float fy = (c1By116 * LL) + c16By116; // (L+16)/116
|
||||||
|
float fx = (0.002f * aa) + fy;
|
||||||
|
float fz = fy - (0.005f * bb);
|
||||||
|
x = 65535.f * f2xyz(fx) * D50x;
|
||||||
|
z = 65535.f * f2xyz(fz) * D50z;
|
||||||
|
y = (LL > epskapf) ? 65535.f * fy * fy * fy : 65535.f * LL / kappaf;
|
||||||
|
}
|
||||||
|
|
||||||
static void L2XYZ(float L, float &x, float &y, float &z);
|
static void L2XYZ(float L, float &x, float &y, float &z);
|
||||||
static float L2Y(float L);
|
static float L2Y(float L);
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
static void Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z);
|
static inline void Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z)
|
||||||
|
{
|
||||||
|
vfloat c327d68 = F2V(327.68f);
|
||||||
|
L /= c327d68;
|
||||||
|
a /= c327d68;
|
||||||
|
b /= c327d68;
|
||||||
|
vfloat fy = F2V(c1By116) * L + F2V(c16By116);
|
||||||
|
vfloat fx = F2V(0.002f) * a + fy;
|
||||||
|
vfloat fz = fy - (F2V(0.005f) * b);
|
||||||
|
vfloat c65535 = F2V(65535.f);
|
||||||
|
x = c65535 * f2xyz(fx) * F2V(D50x);
|
||||||
|
z = c65535 * f2xyz(fz) * F2V(D50z);
|
||||||
|
vfloat res1 = fy * fy * fy;
|
||||||
|
vfloat res2 = L / F2V(kappa);
|
||||||
|
y = vself(vmaskf_gt(L, F2V(epskap)), res1, res2);
|
||||||
|
y *= c65535;
|
||||||
|
}
|
||||||
#endif // __SSE2__
|
#endif // __SSE2__
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -32,8 +32,6 @@
|
|||||||
namespace rtengine
|
namespace rtengine
|
||||||
{
|
{
|
||||||
|
|
||||||
extern void filmlike_clip(float *r, float *g, float *b);
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
inline void copyAndClampLine(const float *src, unsigned char *dst, const int W)
|
inline void copyAndClampLine(const float *src, unsigned char *dst, const int W)
|
||||||
@ -46,9 +44,26 @@ inline void copyAndClampLine(const float *src, unsigned char *dst, const int W)
|
|||||||
|
|
||||||
inline void copyAndClamp(const LabImage *src, unsigned char *dst, const double rgb_xyz[3][3], bool multiThread)
|
inline void copyAndClamp(const LabImage *src, unsigned char *dst, const double rgb_xyz[3][3], bool multiThread)
|
||||||
{
|
{
|
||||||
int W = src->W;
|
const int W = src->W;
|
||||||
int H = src->H;
|
const int H = src->H;
|
||||||
|
|
||||||
|
float rgb_xyzf[3][3];
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
for (int j = 0; j < 3; j++) {
|
||||||
|
rgb_xyzf[i][j] = rgb_xyz[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __SSE2__
|
||||||
|
vfloat rgb_xyzv[3][3];
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
for (int j = 0; j < 3; j++) {
|
||||||
|
rgb_xyzv[i][j] = F2V(rgb_xyzf[i][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for schedule(dynamic,16) if (multiThread)
|
#pragma omp parallel for schedule(dynamic,16) if (multiThread)
|
||||||
#endif
|
#endif
|
||||||
@ -58,17 +73,47 @@ inline void copyAndClamp(const LabImage *src, unsigned char *dst, const double r
|
|||||||
float* rb = src->b[i];
|
float* rb = src->b[i];
|
||||||
int ix = i * 3 * W;
|
int ix = i * 3 * W;
|
||||||
|
|
||||||
float R, G, B;
|
#ifdef __SSE2__
|
||||||
float x_, y_, z_;
|
float rbuffer[W] ALIGNED16;
|
||||||
|
float gbuffer[W] ALIGNED16;
|
||||||
for (int j = 0; j < W; ++j) {
|
float bbuffer[W] ALIGNED16;
|
||||||
|
int j = 0;
|
||||||
|
for (; j < W - 3; j += 4) {
|
||||||
|
vfloat R, G, B;
|
||||||
|
vfloat x_, y_, z_;
|
||||||
|
Color::Lab2XYZ(LVFU(rL[j]), LVFU(ra[j]), LVFU(rb[j]), x_, y_, z_ );
|
||||||
|
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzv);
|
||||||
|
STVF(rbuffer[j], Color::gamma2curve[R]);
|
||||||
|
STVF(gbuffer[j], Color::gamma2curve[G]);
|
||||||
|
STVF(bbuffer[j], Color::gamma2curve[B]);
|
||||||
|
}
|
||||||
|
for (; j < W; ++j) {
|
||||||
|
float R, G, B;
|
||||||
|
float x_, y_, z_;
|
||||||
Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ );
|
Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ );
|
||||||
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyz);
|
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzf);
|
||||||
|
rbuffer[j] = Color::gamma2curve[R];
|
||||||
|
gbuffer[j] = Color::gamma2curve[G];
|
||||||
|
bbuffer[j] = Color::gamma2curve[B];
|
||||||
|
}
|
||||||
|
for (int j = 0; j < W; ++j) {
|
||||||
|
dst[ix++] = uint16ToUint8Rounded(rbuffer[j]);
|
||||||
|
dst[ix++] = uint16ToUint8Rounded(gbuffer[j]);
|
||||||
|
dst[ix++] = uint16ToUint8Rounded(bbuffer[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
for (int j = 0; j < W; ++j) {
|
||||||
|
float R, G, B;
|
||||||
|
float x_, y_, z_;
|
||||||
|
Color::Lab2XYZ(rL[j], ra[j], rb[j], x_, y_, z_ );
|
||||||
|
Color::xyz2rgb(x_, y_, z_, R, G, B, rgb_xyzf);
|
||||||
|
|
||||||
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[R]);
|
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[R]);
|
||||||
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[G]);
|
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[G]);
|
||||||
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[B]);
|
dst[ix++] = uint16ToUint8Rounded(Color::gamma2curve[B]);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,8 +196,6 @@ void ImProcFunctions::lab2monitorRgb(LabImage* lab, Image8* image)
|
|||||||
// otherwise divide by 327.68, convert to xyz and apply the RGB transform, before converting with gamma2curve
|
// otherwise divide by 327.68, convert to xyz and apply the RGB transform, before converting with gamma2curve
|
||||||
Image8* ImProcFunctions::lab2rgb(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm, bool consider_histogram_settings)
|
Image8* ImProcFunctions::lab2rgb(LabImage* lab, int cx, int cy, int cw, int ch, const procparams::ColorManagementParams &icm, bool consider_histogram_settings)
|
||||||
{
|
{
|
||||||
//gamutmap(lab);
|
|
||||||
|
|
||||||
if (cx < 0) {
|
if (cx < 0) {
|
||||||
cx = 0;
|
cx = 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user