CIECAM02 speedup
This commit is contained in:
parent
b6f8bc675b
commit
1008e0e98d
@ -20,10 +20,12 @@
|
||||
#include "rtengine.h"
|
||||
#include "curves.h"
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "sleef.c"
|
||||
|
||||
#ifdef _DEBUG
|
||||
#include "settings.h"
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#undef CLIPD
|
||||
#define CLIPD(a) ((a)>0.0?((a)<1.0?(a):1.0):0.0)
|
||||
@ -33,7 +35,9 @@
|
||||
namespace rtengine
|
||||
{
|
||||
|
||||
#ifdef _DEBUG
|
||||
extern const Settings* settings;
|
||||
#endif
|
||||
|
||||
void Ciecam02::curvecolor(double satind, double satval, double &sres, double parsat)
|
||||
{
|
||||
@ -407,6 +411,15 @@ void Ciecam02::xyz_to_cat02float( float &r, float &g, float &b, float x, float y
|
||||
b = ( 0.0000f * x) + (0.0000f * y) + (1.0000f * z);
|
||||
}
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
void Ciecam02::xyz_to_cat02float( vfloat &r, vfloat &g, vfloat &b, vfloat x, vfloat y, vfloat z )
|
||||
{
|
||||
//gamut correction M.H.Brill S.Susstrunk
|
||||
r = ( F2V(1.007245f) * x) + (F2V(0.011136f) * y) - (F2V(0.018381f) * z);//Changjun Li
|
||||
g = (F2V(-0.318061f) * x) + (F2V(1.314589f) * y) + (F2V(0.003471f) * z);
|
||||
b = z;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::cat02_to_xyz( double &x, double &y, double &z, double r, double g, double b, int gamu )
|
||||
{
|
||||
@ -425,6 +438,7 @@ void Ciecam02::cat02_to_xyz( double &x, double &y, double &z, double r, double g
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::cat02_to_xyzfloat( float &x, float &y, float &z, float r, float g, float b, int gamu )
|
||||
{
|
||||
gamu=1;
|
||||
@ -441,7 +455,15 @@ void Ciecam02::cat02_to_xyzfloat( float &x, float &y, float &z, float r, float g
|
||||
z = ( 0.000000f * r) - (0.000000f * g) + (1.000000f * b);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void Ciecam02::cat02_to_xyzfloat( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b )
|
||||
{
|
||||
//gamut correction M.H.Brill S.Susstrunk
|
||||
x = ( F2V(0.99015849f) * r) - (F2V(0.00838772f)* g) + (F2V(0.018229217f) * b);//Changjun Li
|
||||
y = ( F2V(0.239565979f) * r) + (F2V(0.758664642f) * g) + (F2V(0.001770137f)* b);
|
||||
z = b;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::hpe_to_xyz( double &x, double &y, double &z, double r, double g, double b )
|
||||
{
|
||||
@ -450,12 +472,21 @@ void Ciecam02::hpe_to_xyz( double &x, double &y, double &z, double r, double g,
|
||||
z = b;
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::hpe_to_xyzfloat( float &x, float &y, float &z, float r, float g, float b )
|
||||
{
|
||||
x = (1.910197f * r) - (1.112124f * g) + (0.201908f * b);
|
||||
y = (0.370950f * r) + (0.629054f * g) - (0.000008f * b);
|
||||
z = b;
|
||||
}
|
||||
#else
|
||||
void Ciecam02::hpe_to_xyzfloat( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b )
|
||||
{
|
||||
x = (F2V(1.910197f) * r) - (F2V(1.112124f) * g) + (F2V(0.201908f) * b);
|
||||
y = (F2V(0.370950f) * r) + (F2V(0.629054f) * g) - (F2V(0.000008f) * b);
|
||||
z = b;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::cat02_to_hpe( double &rh, double &gh, double &bh, double r, double g, double b, int gamu )
|
||||
{
|
||||
@ -485,6 +516,16 @@ void Ciecam02::cat02_to_hpefloat( float &rh, float &gh, float &bh, float r, floa
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
void Ciecam02::cat02_to_hpefloat( vfloat &rh, vfloat &gh, vfloat &bh, vfloat r, vfloat g, vfloat b)
|
||||
{
|
||||
//Changjun Li
|
||||
rh = ( F2V(0.550930835f) * r) + (F2V(0.519435987f)* g) - ( F2V(0.070356303f)* b);
|
||||
gh = ( F2V(0.055954056f) * r) + (F2V(0.89973132f) * g) + (F2V(0.044315524f) * b);
|
||||
bh = b;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::Aab_to_rgb( double &r, double &g, double &b, double A, double aa, double bb, double nbb )
|
||||
{
|
||||
double x = (A / nbb) + 0.305;
|
||||
@ -496,6 +537,8 @@ void Ciecam02::Aab_to_rgb( double &r, double &g, double &b, double A, double aa,
|
||||
/* c1 c6 c7 */
|
||||
b = (0.32787 * x) - (0.15681 * aa) - (4.49038 * bb);
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa, float bb, float nbb )
|
||||
{
|
||||
float x = (A / nbb) + 0.305f;
|
||||
@ -507,6 +550,19 @@ void Ciecam02::Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa,
|
||||
/* c1 c6 c7 */
|
||||
b = (0.32787f * x) - (0.15681f * aa) - (4.49038f * bb);
|
||||
}
|
||||
#else
|
||||
void Ciecam02::Aab_to_rgbfloat( vfloat &r, vfloat &g, vfloat &b, vfloat A, vfloat aa, vfloat bb, vfloat nbb )
|
||||
{
|
||||
vfloat c1 = F2V(0.32787f) * ((A / nbb) + F2V(0.305f));
|
||||
|
||||
/* c1 c2 c3 */
|
||||
r = c1 + (F2V(0.32145f) * aa) + (F2V(0.20527f) * bb);
|
||||
/* c1 c4 c5 */
|
||||
g = c1 - (F2V(0.63507f) * aa) - (F2V(0.18603f) * bb);
|
||||
/* c1 c6 c7 */
|
||||
b = c1 - (F2V(0.15681f) * aa) - (F2V(4.49038f) * bb);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::calculate_ab( double &aa, double &bb, double h, double e, double t, double nbb, double a )
|
||||
{
|
||||
@ -535,7 +591,7 @@ void Ciecam02::calculate_ab( double &aa, double &bb, double h, double e, double
|
||||
bb = (aa * sinh) / cosh;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::calculate_abfloat( float &aa, float &bb, float h, float e, float t, float nbb, float a )
|
||||
{
|
||||
float2 sincosval = xsincosf((h * M_PI) / 180.0f);
|
||||
@ -565,6 +621,34 @@ void Ciecam02::calculate_abfloat( float &aa, float &bb, float h, float e, float
|
||||
std::swap(aa,bb);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void Ciecam02::calculate_abfloat( vfloat &aa, vfloat &bb, vfloat h, vfloat e, vfloat t, vfloat nbb, vfloat a )
|
||||
{
|
||||
vfloat2 sincosval = xsincosf((h * F2V(M_PI)) / F2V(180.0f));
|
||||
vfloat sinh = sincosval.x;
|
||||
vfloat cosh = sincosval.y;
|
||||
vfloat x = (a / nbb) + F2V(0.305f);
|
||||
vfloat p3 = F2V(1.05f);
|
||||
vmask swapMask = vmaskf_gt(vabsf(sinh), vabsf(cosh));
|
||||
vswap(swapMask, sinh, cosh);
|
||||
|
||||
vfloat div = ((e / (t * cosh)) - (F2V(-0.31362f) - (p3 * F2V(0.15681f))) - ((F2V(0.01924f) - (p3 * F2V(4.49038f))) * (sinh / cosh)));
|
||||
// for large values of t the above calculation can change its sign which results in a hue shift of 180 degree
|
||||
// so we have to check the sign to avoid this shift.
|
||||
// Additionally it seems useful to limit the minimum value of div
|
||||
// I limited it, but I'm sure the actual limit is not the best one
|
||||
|
||||
vmask limitMask = vmaskf_neq(vsignf(div), vsignf(cosh));
|
||||
limitMask = vorm(limitMask, vmaskf_le(vabsf(div), vabsf(cosh) * F2V(2.f)));
|
||||
div = vself(limitMask, cosh * F2V(2.f), div);
|
||||
|
||||
aa = ((F2V(0.32787f) * x) * (F2V(2.0f) + p3)) / div;
|
||||
bb = (aa * sinh) / cosh;
|
||||
|
||||
vswap(swapMask, aa, bb);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void Ciecam02::initcam1(double gamu, double yb, double pilotd, double f, double la, double xw, double yw, double zw, double &n, double &d, double &nbb, double &ncb,
|
||||
double &cz, double &aw, double &wh, double &pfl, double &fl, double &c)
|
||||
@ -758,6 +842,63 @@ void Ciecam02::xyz2jchqms_ciecam02float( float &J, float &C, float &h, float &Q,
|
||||
s = 100.0f * sqrtf( M / Q );
|
||||
h = (myh * 180.f) / (float)M_PI;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
void Ciecam02::xyz2jchqms_ciecam02float( vfloat &J, vfloat &C, vfloat &h, vfloat &Q, vfloat &M, vfloat &s, vfloat aw, vfloat fl, vfloat wh,
|
||||
vfloat x, vfloat y, vfloat z, vfloat xw, vfloat yw, vfloat zw,
|
||||
vfloat yb, vfloat la, vfloat f, vfloat c, vfloat nc, vfloat pow1, vfloat nbb, vfloat ncb, vfloat pfl, vfloat cz, vfloat d)
|
||||
|
||||
{
|
||||
vfloat r, g, b;
|
||||
vfloat rw, gw, bw;
|
||||
vfloat rc, gc, bc;
|
||||
vfloat rp, gp, bp;
|
||||
vfloat rpa, gpa, bpa;
|
||||
vfloat a, ca, cb;
|
||||
vfloat e, t;
|
||||
|
||||
xyz_to_cat02float( r, g, b, x, y, z);
|
||||
xyz_to_cat02float( rw, gw, bw, xw, yw, zw);
|
||||
vfloat onev = F2V(1.f);
|
||||
rc = r * (((yw * d) / rw) + (onev - d));
|
||||
gc = g * (((yw * d) / gw) + (onev - d));
|
||||
bc = b * (((yw * d) / bw) + (onev - d));
|
||||
|
||||
cat02_to_hpefloat( rp, gp, bp, rc, gc, bc);
|
||||
//gamut correction M.H.Brill S.Susstrunk
|
||||
rp = _mm_max_ps(rp,ZEROV);
|
||||
gp = _mm_max_ps(gp,ZEROV);
|
||||
bp = _mm_max_ps(bp,ZEROV);
|
||||
rpa = nonlinear_adaptationfloat( rp, fl );
|
||||
gpa = nonlinear_adaptationfloat( gp, fl );
|
||||
bpa = nonlinear_adaptationfloat( bp, fl );
|
||||
|
||||
ca = rpa - ((F2V(12.0f) * gpa) - bpa) / F2V(11.0f);
|
||||
cb = F2V(0.11111111f) * (rpa + gpa - (bpa + bpa));
|
||||
|
||||
vfloat myh = xatan2f( cb, ca );
|
||||
vfloat temp = F2V(M_PI);
|
||||
temp += temp;
|
||||
temp += myh;
|
||||
myh = vself(vmaskf_lt(myh, ZEROV), temp, myh);
|
||||
|
||||
a = ((rpa + rpa) + gpa + (F2V(0.05f) * bpa) - F2V(0.305f)) * nbb;
|
||||
a = _mm_max_ps(a,ZEROV); //gamut correction M.H.Brill S.Susstrunk
|
||||
|
||||
J = pow_F( a / aw, c * cz * F2V(0.5f));
|
||||
|
||||
e = ((F2V(961.53846f)) * nc * ncb) * (xcosf( myh + F2V(2.0f) ) + F2V(3.8f));
|
||||
t = (e * _mm_sqrt_ps( (ca * ca) + (cb * cb) )) / (rpa + gpa + (F2V(1.05f) * bpa));
|
||||
|
||||
C = pow_F( t, F2V(0.9f) ) * J * pow1;
|
||||
|
||||
Q = wh * J;
|
||||
J *= J * F2V(100.0f);
|
||||
M = C * pfl;
|
||||
Q = _mm_max_ps(Q,F2V(0.0001f)); // avoid division by zero
|
||||
s = F2V(100.0f) * _mm_sqrt_ps( M / Q );
|
||||
h = (myh * F2V(180.f)) / F2V(M_PI);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::jch2xyz_ciecam02( double &x, double &y, double &z, double J, double C, double h,
|
||||
double xw, double yw, double zw, double yb, double la,
|
||||
@ -792,7 +933,7 @@ void Ciecam02::jch2xyz_ciecam02( double &x, double &y, double &z, double J, doub
|
||||
|
||||
cat02_to_xyz( x, y, z, r, g, b, gamu );
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::jch2xyz_ciecam02float( float &x, float &y, float &z, float J, float C, float h,
|
||||
float xw, float yw, float zw, float yb, float la,
|
||||
float f, float c, float nc , int gamu, float pow1, float nbb, float ncb, float fl, float cz, float d, float aw)
|
||||
@ -827,6 +968,41 @@ void Ciecam02::jch2xyz_ciecam02float( float &x, float &y, float &z, float J, flo
|
||||
cat02_to_xyzfloat( x, y, z, r, g, b, gamu );
|
||||
}
|
||||
|
||||
#else
|
||||
void Ciecam02::jch2xyz_ciecam02float( vfloat &x, vfloat &y, vfloat &z, vfloat J, vfloat C, vfloat h,
|
||||
vfloat xw, vfloat yw, vfloat zw, vfloat yb, vfloat la,
|
||||
vfloat f, vfloat nc, vfloat pow1, vfloat nbb, vfloat ncb, vfloat fl, vfloat d, vfloat aw, vfloat reccmcz)
|
||||
{
|
||||
vfloat r, g, b;
|
||||
vfloat rc, gc, bc;
|
||||
vfloat rp, gp, bp;
|
||||
vfloat rpa, gpa, bpa;
|
||||
vfloat rw, gw, bw;
|
||||
vfloat a, ca, cb;
|
||||
vfloat e, t;
|
||||
xyz_to_cat02float( rw, gw, bw, xw, yw, zw);
|
||||
e = ((F2V(961.53846f)) * nc * ncb) * (xcosf( ((h * F2V(M_PI)) / F2V(180.0f)) + F2V(2.0f) ) + F2V(3.8f));
|
||||
a = pow_F( J / F2V(100.0f), reccmcz ) * aw;
|
||||
t = pow_F( F2V(10.f) * C / (_mm_sqrt_ps( J ) * pow1), F2V(1.1111111f) );
|
||||
|
||||
calculate_abfloat( ca, cb, h, e, t, nbb, a );
|
||||
Aab_to_rgbfloat( rpa, gpa, bpa, a, ca, cb, nbb );
|
||||
|
||||
rp = inverse_nonlinear_adaptationfloat( rpa, fl );
|
||||
gp = inverse_nonlinear_adaptationfloat( gpa, fl );
|
||||
bp = inverse_nonlinear_adaptationfloat( bpa, fl );
|
||||
|
||||
hpe_to_xyzfloat( x, y, z, rp, gp, bp );
|
||||
xyz_to_cat02float( rc, gc, bc, x, y, z );
|
||||
|
||||
r = rc / (((yw * d) / rw) + (F2V(1.0f) - d));
|
||||
g = gc / (((yw * d) / gw) + (F2V(1.0f) - d));
|
||||
b = bc / (((yw * d) / bw) + (F2V(1.0f) - d));
|
||||
|
||||
cat02_to_xyzfloat( x, y, z, r, g, b );
|
||||
}
|
||||
#endif
|
||||
|
||||
double Ciecam02::nonlinear_adaptation( double c, double fl )
|
||||
{
|
||||
double p;
|
||||
@ -841,6 +1017,20 @@ float Ciecam02::nonlinear_adaptationfloat( float c, float fl )
|
||||
else {p = pow_F( (fl * c) / 100.0f, 0.42f ); return ((400.0f * p) / (27.13f + p)) + 0.1f;}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
vfloat Ciecam02::nonlinear_adaptationfloat( vfloat c, vfloat fl )
|
||||
{
|
||||
vfloat c100 = F2V(100.f);
|
||||
vfloat czd42 = F2V(0.42f);
|
||||
vfloat c400 = vmulsignf(F2V(400.f),c);
|
||||
fl = vmulsignf(fl,c);
|
||||
vfloat p = pow_F( (fl * c) / c100, czd42 );
|
||||
vfloat c27d13 = F2V(27.13);
|
||||
vfloat czd1 = F2V(0.1f);
|
||||
return ((c400 * p) / (c27d13 + p)) + czd1;
|
||||
}
|
||||
#endif
|
||||
|
||||
double Ciecam02::inverse_nonlinear_adaptation( double c, double fl )
|
||||
{
|
||||
int c1;
|
||||
@ -849,6 +1039,7 @@ double Ciecam02::inverse_nonlinear_adaptation( double c, double fl )
|
||||
return c1*(100.0 / fl) * pow( (27.13 * fabs( c - 0.1 )) / (400.0 - fabs( c - 0.1 )), 1.0 / 0.42 );
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
float Ciecam02::inverse_nonlinear_adaptationfloat( float c, float fl )
|
||||
{
|
||||
c -= 0.1f;
|
||||
@ -863,6 +1054,16 @@ float Ciecam02::inverse_nonlinear_adaptationfloat( float c, float fl )
|
||||
return (100.0f / fl) * pow_F( (27.13f * fabsf( c )) / (400.0f - fabsf( c )), 2.38095238f );
|
||||
}
|
||||
|
||||
#else
|
||||
vfloat Ciecam02::inverse_nonlinear_adaptationfloat( vfloat c, vfloat fl )
|
||||
{
|
||||
c -= F2V(0.1f);
|
||||
fl = vmulsignf(fl,c);
|
||||
c = vabsf(c);
|
||||
c = _mm_min_ps( c, F2V(399.99f));
|
||||
return (F2V(100.0f) / fl) * pow_F( (F2V(27.13f) * c) / (F2V(400.0f) - c), F2V(2.38095238f) );
|
||||
}
|
||||
#endif
|
||||
//end CIECAM Billy Bigg
|
||||
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
#define _CIECAM02_
|
||||
#include <cmath>
|
||||
#include "LUT.h"
|
||||
#include "opthelper.h"
|
||||
|
||||
namespace rtengine
|
||||
{
|
||||
@ -40,18 +41,33 @@ private:
|
||||
|
||||
static void xyz_to_cat02float ( float &r, float &g, float &b, float x, float y, float z, int gamu );
|
||||
static void cat02_to_hpefloat ( float &rh, float &gh, float &bh, float r, float g, float b, int gamu );
|
||||
static void cat02_to_xyzfloat ( float &x, float &y, float &z, float r, float g, float b, int gamu );
|
||||
static void hpe_to_xyzfloat ( float &x, float &y, float &z, float r, float g, float b );
|
||||
|
||||
#ifdef __SSE2__
|
||||
static void xyz_to_cat02float ( vfloat &r, vfloat &g, vfloat &b, vfloat x, vfloat y, vfloat z );
|
||||
static void cat02_to_hpefloat ( vfloat &rh, vfloat &gh, vfloat &bh, vfloat r, vfloat g, vfloat b );
|
||||
static vfloat nonlinear_adaptationfloat( vfloat c, vfloat fl );
|
||||
#endif
|
||||
|
||||
static void Aab_to_rgb( double &r, double &g, double &b, double A, double aa, double bb, double nbb );
|
||||
static void Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa, float bb, float nbb );
|
||||
static void calculate_ab( double &aa, double &bb, double h, double e, double t, double nbb, double a );
|
||||
static void calculate_abfloat( float &aa, float &bb, float h, float e, float t, float nbb, float a );
|
||||
|
||||
static double nonlinear_adaptation( double c, double fl );
|
||||
static float nonlinear_adaptationfloat( float c, float fl );
|
||||
static double inverse_nonlinear_adaptation( double c, double fl );
|
||||
|
||||
#ifndef __SSE2__
|
||||
static float inverse_nonlinear_adaptationfloat( float c, float fl );
|
||||
static void calculate_abfloat( float &aa, float &bb, float h, float e, float t, float nbb, float a );
|
||||
static void Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa, float bb, float nbb );
|
||||
static void hpe_to_xyzfloat ( float &x, float &y, float &z, float r, float g, float b );
|
||||
static void cat02_to_xyzfloat ( float &x, float &y, float &z, float r, float g, float b, int gamu );
|
||||
#else
|
||||
static vfloat inverse_nonlinear_adaptationfloat( vfloat c, vfloat fl );
|
||||
static void calculate_abfloat( vfloat &aa, vfloat &bb, vfloat h, vfloat e, vfloat t, vfloat nbb, vfloat a );
|
||||
static void Aab_to_rgbfloat( vfloat &r, vfloat &g, vfloat &b, vfloat A, vfloat aa, vfloat bb, vfloat nbb );
|
||||
static void hpe_to_xyzfloat ( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b );
|
||||
static void cat02_to_xyzfloat ( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b );
|
||||
#endif
|
||||
|
||||
public:
|
||||
Ciecam02 () {}
|
||||
@ -69,12 +85,19 @@ public:
|
||||
double yb, double la,
|
||||
double f, double c, double nc, int gamu, double n, double nbb, double ncb, double fl, double cz, double d, double aw);
|
||||
|
||||
#ifndef __SSE2__
|
||||
static void jch2xyz_ciecam02float( float &x, float &y, float &z,
|
||||
float J, float C, float h,
|
||||
float xw, float yw, float zw,
|
||||
float yb, float la,
|
||||
float f, float c, float nc,int gamu,float n, float nbb, float ncb, float fl, float cz, float d, float aw );
|
||||
|
||||
#else
|
||||
static void jch2xyz_ciecam02float( vfloat &x, vfloat &y, vfloat &z,
|
||||
vfloat J, vfloat C, vfloat h,
|
||||
vfloat xw, vfloat yw, vfloat zw,
|
||||
vfloat yb, vfloat la,
|
||||
vfloat f, vfloat nc, vfloat n, vfloat nbb, vfloat ncb, vfloat fl, vfloat d, vfloat aw, vfloat reccmcz );
|
||||
#endif
|
||||
/**
|
||||
* Forward transform from XYZ to CIECAM02 JCh.
|
||||
*/
|
||||
@ -104,6 +127,16 @@ public:
|
||||
float yb, float la,
|
||||
float f, float c, float nc, float pilotd, int gamu, float n, float nbb, float ncb, float pfl, float cz, float d );
|
||||
|
||||
#ifdef __SSE2__
|
||||
static void xyz2jchqms_ciecam02float( vfloat &J, vfloat &C, vfloat &h,
|
||||
vfloat &Q, vfloat &M, vfloat &s,vfloat aw, vfloat fl, vfloat wh,
|
||||
vfloat x, vfloat y, vfloat z,
|
||||
vfloat xw, vfloat yw, vfloat zw,
|
||||
vfloat yb, vfloat la,
|
||||
vfloat f, vfloat c, vfloat nc, vfloat n, vfloat nbb, vfloat ncb, vfloat pfl, vfloat cz, vfloat d );
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
}
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "color.h"
|
||||
#include "iccmatrices.h"
|
||||
#include "mytime.h"
|
||||
#include "sleef.c"
|
||||
#include "sleef.c"
|
||||
#include "opthelper.h"
|
||||
|
||||
using namespace std;
|
||||
@ -433,7 +433,7 @@ namespace rtengine {
|
||||
y = ((xyz_rgb[1][0]*r + xyz_rgb[1][1]*g + xyz_rgb[1][2]*b)) ;
|
||||
z = ((xyz_rgb[2][0]*r + xyz_rgb[2][1]*g + xyz_rgb[2][2]*b)) ;
|
||||
}
|
||||
|
||||
|
||||
void Color::rgbxyz (float r, float g, float b, float &x, float &y, float &z, const float xyz_rgb[3][3]) {
|
||||
x = ((xyz_rgb[0][0]*r + xyz_rgb[0][1]*g + xyz_rgb[0][2]*b)) ;
|
||||
y = ((xyz_rgb[1][0]*r + xyz_rgb[1][1]*g + xyz_rgb[1][2]*b)) ;
|
||||
@ -859,17 +859,36 @@ namespace rtengine {
|
||||
y=(LL>epskap) ? 65535.0f*fy*fy*fy : 65535.0f*LL/kappa;
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z) {
|
||||
vfloat c327d68 = F2V(327.68f);
|
||||
L /= c327d68;
|
||||
a /= c327d68;
|
||||
b /= c327d68;
|
||||
vfloat fy = F2V(0.00862069f) * L + F2V(0.137932f);
|
||||
vfloat fx = F2V(0.002f) * a + fy;
|
||||
vfloat fz = fy - (F2V(0.005f) * b);
|
||||
vfloat c65535 = F2V(65535.f);
|
||||
x = c65535*f2xyz(fx)*F2V(D50x);
|
||||
z = c65535*f2xyz(fz)*F2V(D50z);
|
||||
vfloat res1 = fy*fy*fy;
|
||||
vfloat res2 = L / F2V(kappa);
|
||||
y = vself(vmaskf_gt(L, F2V(epskap)), res1, res2);
|
||||
y *= c65535;
|
||||
}
|
||||
#endif // __SSE2__
|
||||
|
||||
void Color::XYZ2Lab(float X, float Y, float Z, float &L, float &a, float &b) {
|
||||
|
||||
float x = X/D50x;
|
||||
float z = Z/D50z;
|
||||
float y= Y;
|
||||
float fx,fy,fz;
|
||||
|
||||
|
||||
fx = (x<=65535.0f ? cachef[x] : (327.68f*xcbrtf(x/MAXVALF)));
|
||||
fy = (y<=65535.0f ? cachef[y] : (327.68f*xcbrtf(y/MAXVALF)));
|
||||
fz = (z<=65535.0f ? cachef[z] : (327.68f*xcbrtf(z/MAXVALF)));
|
||||
|
||||
|
||||
L = (116.0f * fy - 5242.88f); //5242.88=16.0*327.68;
|
||||
a = (500.0f * (fx - fy) );
|
||||
b = (200.0f * (fy - fz) );
|
||||
@ -1518,49 +1537,49 @@ SSEFUNCTION void Color::LabGamutMunsell(float *labL, float *laba, float *labb,
|
||||
#endif
|
||||
float correctlum = 0.f;
|
||||
float correctionHuechroma = 0.f;
|
||||
#ifdef __SSE2__
|
||||
// precalculate H and C using SSE
|
||||
float HHBuffer[N];
|
||||
float CCBuffer[N];
|
||||
__m128 c327d68v = _mm_set1_ps(327.68f);
|
||||
__m128 av,bv;
|
||||
int k;
|
||||
for (k=0; k<N-3; k+=4) {
|
||||
av = LVFU(laba[k]);
|
||||
bv = LVFU(labb[k]);
|
||||
_mm_storeu_ps(&HHBuffer[k],xatan2f(bv,av));
|
||||
_mm_storeu_ps(&CCBuffer[k],_mm_sqrt_ps(SQRV(av)+SQRV(bv))/c327d68v);
|
||||
}
|
||||
for(;k<N;k++) {
|
||||
HHBuffer[k] = xatan2f(labb[k],laba[k]);
|
||||
CCBuffer[k] = sqrt(SQR(laba[k]) + SQR(labb[k]))/327.68f;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
// precalculate H and C using SSE
|
||||
float HHBuffer[N];
|
||||
float CCBuffer[N];
|
||||
__m128 c327d68v = _mm_set1_ps(327.68f);
|
||||
__m128 av,bv;
|
||||
int k;
|
||||
for (k=0; k<N-3; k+=4) {
|
||||
av = LVFU(laba[k]);
|
||||
bv = LVFU(labb[k]);
|
||||
_mm_storeu_ps(&HHBuffer[k],xatan2f(bv,av));
|
||||
_mm_storeu_ps(&CCBuffer[k],_mm_sqrt_ps(SQRV(av)+SQRV(bv))/c327d68v);
|
||||
}
|
||||
for(;k<N;k++) {
|
||||
HHBuffer[k] = xatan2f(labb[k],laba[k]);
|
||||
CCBuffer[k] = sqrt(SQR(laba[k]) + SQR(labb[k]))/327.68f;
|
||||
}
|
||||
#endif // __SSE2__
|
||||
for (int j=0; j<N; j++) {
|
||||
#ifdef __SSE2__
|
||||
float HH = HHBuffer[j];
|
||||
float Chprov1 = CCBuffer[j];
|
||||
for (int j=0; j<N; j++) {
|
||||
#ifdef __SSE2__
|
||||
float HH = HHBuffer[j];
|
||||
float Chprov1 = CCBuffer[j];
|
||||
#else
|
||||
float HH=xatan2f(labb[j],laba[j]);
|
||||
float Chprov1=sqrtf(SQR(laba[j]) + SQR(labb[j]))/327.68f;
|
||||
float Chprov1=sqrtf(SQR(laba[j]) + SQR(labb[j]))/327.68f;
|
||||
#endif
|
||||
float Lprov1=labL[j]/327.68f;
|
||||
float Loldd = Lprov1;
|
||||
float Loldd = Lprov1;
|
||||
float Coldd = Chprov1;
|
||||
float2 sincosval;
|
||||
float2 sincosval;
|
||||
if(gamut) {
|
||||
#ifdef _DEBUG
|
||||
bool neg, more_rgb;
|
||||
#endif
|
||||
// According to mathematical laws we can get the sin and cos of HH by simple operations
|
||||
float R,G,B;
|
||||
if(Chprov1 == 0.f) {
|
||||
sincosval.y = 1.f;
|
||||
sincosval.x = 0.f;
|
||||
} else {
|
||||
sincosval.y = laba[j]/(Chprov1*327.68f);
|
||||
sincosval.x = labb[j]/(Chprov1*327.68f);
|
||||
}
|
||||
// According to mathematical laws we can get the sin and cos of HH by simple operations
|
||||
float R,G,B;
|
||||
if(Chprov1 == 0.f) {
|
||||
sincosval.y = 1.f;
|
||||
sincosval.x = 0.f;
|
||||
} else {
|
||||
sincosval.y = laba[j]/(Chprov1*327.68f);
|
||||
sincosval.x = labb[j]/(Chprov1*327.68f);
|
||||
}
|
||||
|
||||
//gamut control : Lab values are in gamut
|
||||
#ifdef _DEBUG
|
||||
@ -1585,24 +1604,24 @@ SSEFUNCTION void Color::LabGamutMunsell(float *labL, float *laba, float *labb,
|
||||
#else
|
||||
AllMunsellLch(lumaMuns, Lprov1, Loldd, HH, Chprov1, Coldd, correctionHuechroma, correctlum);
|
||||
#endif
|
||||
if(correctlum == 0.f && correctionHuechroma == 0.f) {
|
||||
if(!gamut) {
|
||||
if(Coldd == 0.f) {
|
||||
sincosval.y = 1.f;
|
||||
sincosval.x = 0.f;
|
||||
} else {
|
||||
sincosval.y = laba[j]/(Coldd*327.68f);
|
||||
sincosval.x = labb[j]/(Coldd*327.68f);
|
||||
}
|
||||
}
|
||||
|
||||
if(correctlum == 0.f && correctionHuechroma == 0.f) {
|
||||
if(!gamut) {
|
||||
if(Coldd == 0.f) {
|
||||
sincosval.y = 1.f;
|
||||
sincosval.x = 0.f;
|
||||
} else {
|
||||
sincosval.y = laba[j]/(Coldd*327.68f);
|
||||
sincosval.x = labb[j]/(Coldd*327.68f);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
HH+=correctlum; //hue Munsell luminance correction
|
||||
sincosval = xsincosf(HH+correctionHuechroma);
|
||||
sincosval = xsincosf(HH+correctionHuechroma);
|
||||
}
|
||||
laba[j] = Chprov1*sincosval.y*327.68f;
|
||||
labb[j] = Chprov1*sincosval.x*327.68f;
|
||||
}
|
||||
labb[j] = Chprov1*sincosval.x*327.68f;
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
t2e.set();
|
||||
|
@ -316,6 +316,9 @@ public:
|
||||
*/
|
||||
static void Lab2XYZ(float L, float a, float b, float &x, float &y, float &z);
|
||||
|
||||
#ifdef __SSE2__
|
||||
static void Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z);
|
||||
#endif // __SSE2__
|
||||
|
||||
/**
|
||||
* @brief Convert xyz in Lab
|
||||
@ -439,7 +442,15 @@ public:
|
||||
|
||||
return (f > epsilonExpInv3) ? f*f*f : (116.f * f - 16.f) * kappaInv;
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
static inline vfloat f2xyz(vfloat f) {
|
||||
const vfloat epsilonExpInv3 = F2V(0.20689655f); // 6.0f/29.0f;
|
||||
const vfloat kappaInv = F2V(0.0011070565f); // 27.0f/24389.0f; // inverse of kappa
|
||||
vfloat res1 = f*f*f;
|
||||
vfloat res2 = (116.f * f - 16.f) * kappaInv;
|
||||
return vself(vmaskf_gt(f, epsilonExpInv3), res1, res2);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Calculate the effective direction (up or down) to linearly interpolating 2 colors so that it follows the shortest or longest path
|
||||
@ -1013,7 +1024,7 @@ public:
|
||||
* L channel's usual range is [0 ; 100], but values can be negative or >100
|
||||
* @param laba a channel input and output image
|
||||
* @param labb b channel input and output image
|
||||
* a and b channel's range is usually [-128 ; +128], but values can be >128
|
||||
* a and b channel's range is usually [-128 ; +128], but values can be >128
|
||||
* @param N Number of pixels to process
|
||||
* @param corMunsell performs Munsell correction
|
||||
* @param lumaMuns whether to apply luma correction or not (used only if corMuns=true)
|
||||
|
@ -2,14 +2,14 @@
|
||||
#error Please specify -msse2.
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#ifdef __GNUC__
|
||||
#define INLINE __inline
|
||||
//#define INLINE __attribute__((always_inline))
|
||||
#else
|
||||
#define INLINE inline
|
||||
#endif
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <x86intrin.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
@ -64,6 +64,7 @@ typedef __m128i vint2;
|
||||
#endif
|
||||
|
||||
#define ZEROV _mm_setzero_ps()
|
||||
#define F2V(a) _mm_set1_ps((a))
|
||||
|
||||
static INLINE vint vrint_vi_vd(vdouble vd) { return _mm_cvtpd_epi32(vd); }
|
||||
static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm_cvttpd_epi32(vd); }
|
||||
@ -126,6 +127,7 @@ static INLINE vmask vandm(vmask x, vmask y) { return _mm_and_si128(x, y); }
|
||||
static INLINE vmask vandnotm(vmask x, vmask y) { return _mm_andnot_si128(x, y); }
|
||||
static INLINE vmask vorm(vmask x, vmask y) { return _mm_or_si128(x, y); }
|
||||
static INLINE vmask vxorm(vmask x, vmask y) { return _mm_xor_si128(x, y); }
|
||||
static INLINE vmask vnotm(vmask x) { return _mm_xor_si128(x, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); }
|
||||
|
||||
static INLINE vmask vmask_eq(vdouble x, vdouble y) { return (__m128i)_mm_cmpeq_pd(x, y); }
|
||||
static INLINE vmask vmask_neq(vdouble x, vdouble y) { return (__m128i)_mm_cmpneq_pd(x, y); }
|
||||
|
@ -1418,6 +1418,7 @@ if(settings->viewinggreySc==1) yb=18.0f;//fixed
|
||||
const float pow1 = pow_F( 1.64f - pow_F( 0.29f, n ), 0.73f );
|
||||
float nj,dj,nbbj,ncbj,czj,awj,flj;
|
||||
Ciecam02::initcam2float(gamu,yb2, f2, la2, xw2, yw2, zw2, nj, dj, nbbj, ncbj,czj, awj, flj);
|
||||
const float reccmcz = 1.f / (c2*czj);
|
||||
const float pow1n = pow_F( 1.64f - pow_F( 0.29f, nj ), 0.73f );
|
||||
|
||||
const float epsil=0.0001f;
|
||||
@ -1440,29 +1441,97 @@ if(settings->viewinggreySc==1) yb=18.0f;//fixed
|
||||
{wiprof[2][0],wiprof[2][1],wiprof[2][2]}
|
||||
};
|
||||
|
||||
|
||||
#ifdef __SSE2__
|
||||
int bufferLength = ((width+3)/4) * 4; // bufferLength has to be a multiple of 4
|
||||
#endif
|
||||
#ifndef _DEBUG
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
float minQThr = 10000.f;
|
||||
float maxQThr = -1000.f;
|
||||
#ifndef _DEBUG
|
||||
#pragma omp for schedule(dynamic, 10)
|
||||
#ifdef __SSE2__
|
||||
// one line buffer per channel and thread
|
||||
float Jbuffer[bufferLength] ALIGNED16;
|
||||
float Cbuffer[bufferLength] ALIGNED16;
|
||||
float hbuffer[bufferLength] ALIGNED16;
|
||||
float Qbuffer[bufferLength] ALIGNED16;
|
||||
float Mbuffer[bufferLength] ALIGNED16;
|
||||
float sbuffer[bufferLength] ALIGNED16;
|
||||
#endif
|
||||
for (int i=0; i<height; i++)
|
||||
for (int j=0; j<width; j++) {
|
||||
#ifndef _DEBUG
|
||||
#pragma omp for schedule(dynamic, 16)
|
||||
#endif
|
||||
for (int i=0; i<height; i++) {
|
||||
#ifdef __SSE2__
|
||||
// vectorized conversion from Lab to jchqms
|
||||
int k;
|
||||
vfloat x,y,z;
|
||||
vfloat J, C, h, Q, M, s;
|
||||
|
||||
vfloat c655d35 = F2V(655.35f);
|
||||
for(k=0;k<width-3;k+=4) {
|
||||
Color::Lab2XYZ(LVFU(lab->L[i][k]),LVFU(lab->a[i][k]),LVFU(lab->b[i][k]),x,y,z);
|
||||
x = x/c655d35;
|
||||
y = y/c655d35;
|
||||
z = z/c655d35;
|
||||
Ciecam02::xyz2jchqms_ciecam02float( J, C, h,
|
||||
Q, M, s, F2V(aw), F2V(fl), F2V(wh),
|
||||
x, y, z,
|
||||
F2V(xw1), F2V(yw1), F2V(zw1),
|
||||
F2V(yb), F2V(la),
|
||||
F2V(f), F2V(c), F2V(nc), F2V(pow1), F2V(nbb), F2V(ncb), F2V(pfl), F2V(cz), F2V(d));
|
||||
STVF(Jbuffer[k],J);
|
||||
STVF(Cbuffer[k],C);
|
||||
STVF(hbuffer[k],h);
|
||||
STVF(Qbuffer[k],Q);
|
||||
STVF(Mbuffer[k],M);
|
||||
STVF(sbuffer[k],s);
|
||||
}
|
||||
for(;k<width;k++) {
|
||||
float L=lab->L[i][k];
|
||||
float a=lab->a[i][k];
|
||||
float b=lab->b[i][k];
|
||||
float x,y,z;
|
||||
//convert Lab => XYZ
|
||||
Color::Lab2XYZ(L, a, b, x, y, z);
|
||||
x = x/655.35f;
|
||||
y = y/655.35f;
|
||||
z = z/655.35f;
|
||||
float J, C, h, Q, M, s;
|
||||
Ciecam02::xyz2jchqms_ciecam02float( J, C, h,
|
||||
Q, M, s, aw, fl, wh,
|
||||
x, y, z,
|
||||
xw1, yw1, zw1,
|
||||
yb, la,
|
||||
f, c, nc, pilot, gamu, pow1, nbb, ncb, pfl, cz, d);
|
||||
Jbuffer[k] = J;
|
||||
Cbuffer[k] = C;
|
||||
hbuffer[k] = h;
|
||||
Qbuffer[k] = Q;
|
||||
Mbuffer[k] = M;
|
||||
sbuffer[k] = s;
|
||||
}
|
||||
#endif // __SSE2__
|
||||
for (int j=0; j<width; j++) {
|
||||
float J, C, h, Q, M, s;
|
||||
|
||||
#ifdef __SSE2__
|
||||
// use precomputed values from above
|
||||
J = Jbuffer[j];
|
||||
C = Cbuffer[j];
|
||||
h = hbuffer[j];
|
||||
Q = Qbuffer[j];
|
||||
M = Mbuffer[j];
|
||||
s = sbuffer[j];
|
||||
#else
|
||||
float x,y,z;
|
||||
float L=lab->L[i][j];
|
||||
float a=lab->a[i][j];
|
||||
float b=lab->b[i][j];
|
||||
float x1,y1,z1;
|
||||
float x,y,z;
|
||||
//convert Lab => XYZ
|
||||
Color::Lab2XYZ(L, a, b, x1, y1, z1);
|
||||
float J, C, h, Q, M, s;
|
||||
float Jpro,Cpro, hpro, Qpro, Mpro, spro;
|
||||
|
||||
x=(float)x1/655.35f;
|
||||
y=(float)y1/655.35f;
|
||||
z=(float)z1/655.35f;
|
||||
@ -1473,6 +1542,8 @@ if(settings->viewinggreySc==1) yb=18.0f;//fixed
|
||||
xw1, yw1, zw1,
|
||||
yb, la,
|
||||
f, c, nc, pilot, gamu, pow1, nbb, ncb, pfl, cz, d);
|
||||
#endif
|
||||
float Jpro,Cpro, hpro, Qpro, Mpro, spro;
|
||||
Jpro=J;
|
||||
Cpro=C;
|
||||
hpro=h;
|
||||
@ -1797,6 +1868,12 @@ if(settings->viewinggreySc==1) yb=18.0f;//fixed
|
||||
}
|
||||
}
|
||||
if(LabPassOne){
|
||||
#ifdef __SSE2__
|
||||
// write to line buffers
|
||||
Jbuffer[j] = J;
|
||||
Cbuffer[j] = C;
|
||||
hbuffer[j] = h;
|
||||
#else
|
||||
float xx,yy,zz;
|
||||
//process normal==> viewing
|
||||
|
||||
@ -1805,22 +1882,13 @@ if(LabPassOne){
|
||||
xw2, yw2, zw2,
|
||||
yb2, la2,
|
||||
f2, c2, nc2, gamu, pow1n, nbbj, ncbj, flj, czj, dj, awj);
|
||||
float x,y,z;
|
||||
x=(float)xx*655.35f;
|
||||
y=(float)yy*655.35f;
|
||||
z=(float)zz*655.35f;
|
||||
float Ll,aa,bb;
|
||||
//convert xyz=>lab
|
||||
Color::XYZ2Lab(x, y, z, Ll, aa, bb);
|
||||
#ifdef _DEBUG
|
||||
if(Ll > 70000.f && J < 1.f) {
|
||||
#pragma omp critical
|
||||
{
|
||||
printf("Why is Ll so big when J is so small?\n");
|
||||
printf("J : %f, Ll : %f, xx : %f, yy : %f, zz : %f\n",J,Ll,xx,yy,zz);
|
||||
printf("J : %f, C : %f, h : %f\n",J,C,h);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// gamut control in Lab mode; I must study how to do with cIECAM only
|
||||
if(gamu==1) {
|
||||
@ -1857,9 +1925,66 @@ if(LabPassOne){
|
||||
lab->a[i][j]=aa;
|
||||
lab->b[i][j]=bb;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
// process line buffers
|
||||
float *xbuffer = Qbuffer;
|
||||
float *ybuffer = Mbuffer;
|
||||
float *zbuffer = sbuffer;
|
||||
for(k=0;k<bufferLength;k+=4) {
|
||||
Ciecam02::jch2xyz_ciecam02float( x, y, z,
|
||||
LVF(Jbuffer[k]), LVF(Cbuffer[k]), LVF(hbuffer[k]),
|
||||
F2V(xw2), F2V(yw2), F2V(zw2),
|
||||
F2V(yb2), F2V(la2),
|
||||
F2V(f2), F2V(nc2), F2V(pow1n), F2V(nbbj), F2V(ncbj), F2V(flj), F2V(dj), F2V(awj), F2V(reccmcz));
|
||||
STVF(xbuffer[k],x*c655d35);
|
||||
STVF(ybuffer[k],y*c655d35);
|
||||
STVF(zbuffer[k],z*c655d35);
|
||||
}
|
||||
// XYZ2Lab uses a lookup table. The function behind that lut is a cube root.
|
||||
// SSE can't beat the speed of that lut, so it doesn't make sense to use SSE
|
||||
for(int j=0;j<width;j++) {
|
||||
float Ll,aa,bb;
|
||||
//convert xyz=>lab
|
||||
Color::XYZ2Lab(xbuffer[j], ybuffer[j], zbuffer[j], Ll, aa, bb);
|
||||
|
||||
// gamut control in Lab mode; I must study how to do with cIECAM only
|
||||
if(gamu==1) {
|
||||
float HH, Lprov1, Chprov1;
|
||||
Lprov1=Ll/327.68f;
|
||||
Chprov1=sqrtf(SQR(aa) + SQR(bb))/327.68f;
|
||||
HH=xatan2f(bb,aa);
|
||||
float2 sincosval;
|
||||
if(Chprov1==0.0f) {
|
||||
sincosval.y = 1.f;
|
||||
sincosval.x = 0.0f;
|
||||
} else {
|
||||
sincosval.y = aa/(Chprov1*327.68f);
|
||||
sincosval.x = bb/(Chprov1*327.68f);
|
||||
}
|
||||
#ifdef _DEBUG
|
||||
bool neg=false;
|
||||
bool more_rgb=false;
|
||||
//gamut control : Lab values are in gamut
|
||||
Color::gamutLchonly(sincosval,Lprov1,Chprov1, wip, highlight, 0.15f, 0.96f, neg, more_rgb);
|
||||
#else
|
||||
//gamut control : Lab values are in gamut
|
||||
Color::gamutLchonly(sincosval,Lprov1,Chprov1, wip, highlight, 0.15f, 0.96f);
|
||||
#endif
|
||||
lab->L[i][j]=Lprov1*327.68f;
|
||||
lab->a[i][j]=327.68f*Chprov1*sincosval.y;
|
||||
lab->b[i][j]=327.68f*Chprov1*sincosval.x;
|
||||
} else {
|
||||
lab->L[i][j]=Ll;
|
||||
lab->a[i][j]=aa;
|
||||
lab->b[i][j]=bb;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#pragma omp critical
|
||||
{
|
||||
if(minQThr < minQ)
|
||||
@ -2023,12 +2148,20 @@ if((params->colorappearance.tonecie && (epdEnabled)) || (params->sharpening.enab
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
|
||||
#ifdef __SSE2__
|
||||
// one line buffer per channel
|
||||
float Jbuffer[bufferLength] ALIGNED16;
|
||||
float Cbuffer[bufferLength] ALIGNED16;
|
||||
float hbuffer[bufferLength] ALIGNED16;
|
||||
float *xbuffer = Jbuffer; // we can use one of the above buffers
|
||||
float *ybuffer = Cbuffer; // "
|
||||
float *zbuffer = hbuffer; // "
|
||||
#endif
|
||||
|
||||
#ifndef _DEBUG
|
||||
#pragma omp for schedule(dynamic, 10)
|
||||
#endif
|
||||
for (int i=0; i<height; i++) // update CIECAM with new values after tone-mapping
|
||||
for (int i=0; i<height; i++) { // update CIECAM with new values after tone-mapping
|
||||
for (int j=0; j<width; j++) {
|
||||
float xx,yy,zz;
|
||||
float x,y,z;
|
||||
@ -2066,6 +2199,11 @@ if((params->colorappearance.tonecie && (epdEnabled)) || (params->sharpening.enab
|
||||
}
|
||||
//end histograms
|
||||
|
||||
#ifdef __SSE2__
|
||||
Jbuffer[j] = ncie->J_p[i][j];
|
||||
Cbuffer[j] = ncie_C_p;
|
||||
hbuffer[j] = ncie->h_p[i][j];
|
||||
#else
|
||||
Ciecam02::jch2xyz_ciecam02float( xx, yy, zz,
|
||||
ncie->J_p[i][j], ncie_C_p, ncie->h_p[i][j],
|
||||
xw2, yw2, zw2,
|
||||
@ -2109,7 +2247,65 @@ if((params->colorappearance.tonecie && (epdEnabled)) || (params->sharpening.enab
|
||||
lab->a[i][j]=aa;
|
||||
lab->b[i][j]=bb;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
// process line buffers
|
||||
int k;
|
||||
vfloat x,y,z;
|
||||
vfloat c655d35 = F2V(655.35f);
|
||||
for(k=0;k<bufferLength;k+=4) {
|
||||
Ciecam02::jch2xyz_ciecam02float( x, y, z,
|
||||
LVF(Jbuffer[k]), LVF(Cbuffer[k]), LVF(hbuffer[k]),
|
||||
F2V(xw2), F2V(yw2), F2V(zw2),
|
||||
F2V(yb2), F2V(la2),
|
||||
F2V(f2), F2V(nc2), F2V(pow1n), F2V(nbbj), F2V(ncbj), F2V(flj), F2V(dj), F2V(awj), F2V(reccmcz));
|
||||
x *= c655d35;
|
||||
y *= c655d35;
|
||||
z *= c655d35;
|
||||
STVF(xbuffer[k],x);
|
||||
STVF(ybuffer[k],y);
|
||||
STVF(zbuffer[k],z);
|
||||
}
|
||||
// XYZ2Lab uses a lookup table. The function behind that lut is a cube root.
|
||||
// SSE can't beat the speed of that lut, so it doesn't make sense to use SSE
|
||||
for(int j=0;j<width;j++) {
|
||||
float Ll,aa,bb;
|
||||
//convert xyz=>lab
|
||||
Color::XYZ2Lab(xbuffer[j], ybuffer[j], zbuffer[j], Ll, aa, bb);
|
||||
if(gamu==1) {
|
||||
float Lprov1, Chprov1;
|
||||
Lprov1=Ll/327.68f;
|
||||
Chprov1=sqrtf(SQR(aa) + SQR(bb))/327.68f;
|
||||
float2 sincosval;
|
||||
if(Chprov1==0.0f) {
|
||||
sincosval.y = 1.f;
|
||||
sincosval.x = 0.0f;
|
||||
} else {
|
||||
sincosval.y = aa/(Chprov1*327.68f);
|
||||
sincosval.x = bb/(Chprov1*327.68f);
|
||||
}
|
||||
#ifdef _DEBUG
|
||||
bool neg=false;
|
||||
bool more_rgb=false;
|
||||
//gamut control : Lab values are in gamut
|
||||
Color::gamutLchonly(sincosval,Lprov1,Chprov1, wipa, highlight, 0.15f, 0.96f, neg, more_rgb);
|
||||
#else
|
||||
//gamut control : Lab values are in gamut
|
||||
Color::gamutLchonly(sincosval,Lprov1,Chprov1, wipa, highlight, 0.15f, 0.96f);
|
||||
#endif
|
||||
lab->L[i][j]=Lprov1*327.68f;
|
||||
lab->a[i][j]=327.68f*Chprov1*sincosval.y;
|
||||
lab->b[i][j]=327.68f*Chprov1*sincosval.x;
|
||||
} else {
|
||||
lab->L[i][j]=Ll;
|
||||
lab->a[i][j]=aa;
|
||||
lab->b[i][j]=bb;
|
||||
}
|
||||
|
||||
}
|
||||
#endif // __SSE2__
|
||||
}
|
||||
|
||||
} //end parallelization
|
||||
|
||||
|
@ -1315,5 +1315,12 @@ static INLINE vfloat SQRV(vfloat a){
|
||||
return _mm_mul_ps( a,a );
|
||||
}
|
||||
|
||||
static inline void vswap( vmask condition, vfloat &a, vfloat &b) {
|
||||
vfloat temp = vself(condition, a, b); // the larger of the two
|
||||
condition = vnotm(condition); // invert the mask
|
||||
a = vself(condition, a, b); // the smaller of the two
|
||||
b = temp;
|
||||
}
|
||||
|
||||
#endif // __SSE2__
|
||||
#endif // SLEEFSSEAVX
|
||||
|
Loading…
x
Reference in New Issue
Block a user