CIECAM02 speedup
This commit is contained in:
@@ -20,10 +20,12 @@
|
||||
#include "rtengine.h"
|
||||
#include "curves.h"
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "sleef.c"
|
||||
|
||||
#ifdef _DEBUG
|
||||
#include "settings.h"
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#undef CLIPD
|
||||
#define CLIPD(a) ((a)>0.0?((a)<1.0?(a):1.0):0.0)
|
||||
@@ -33,7 +35,9 @@
|
||||
namespace rtengine
|
||||
{
|
||||
|
||||
#ifdef _DEBUG
|
||||
extern const Settings* settings;
|
||||
#endif
|
||||
|
||||
void Ciecam02::curvecolor(double satind, double satval, double &sres, double parsat)
|
||||
{
|
||||
@@ -407,6 +411,15 @@ void Ciecam02::xyz_to_cat02float( float &r, float &g, float &b, float x, float y
|
||||
b = ( 0.0000f * x) + (0.0000f * y) + (1.0000f * z);
|
||||
}
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
void Ciecam02::xyz_to_cat02float( vfloat &r, vfloat &g, vfloat &b, vfloat x, vfloat y, vfloat z )
|
||||
{
|
||||
//gamut correction M.H.Brill S.Susstrunk
|
||||
r = ( F2V(1.007245f) * x) + (F2V(0.011136f) * y) - (F2V(0.018381f) * z);//Changjun Li
|
||||
g = (F2V(-0.318061f) * x) + (F2V(1.314589f) * y) + (F2V(0.003471f) * z);
|
||||
b = z;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::cat02_to_xyz( double &x, double &y, double &z, double r, double g, double b, int gamu )
|
||||
{
|
||||
@@ -425,6 +438,7 @@ void Ciecam02::cat02_to_xyz( double &x, double &y, double &z, double r, double g
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::cat02_to_xyzfloat( float &x, float &y, float &z, float r, float g, float b, int gamu )
|
||||
{
|
||||
gamu=1;
|
||||
@@ -441,7 +455,15 @@ void Ciecam02::cat02_to_xyzfloat( float &x, float &y, float &z, float r, float g
|
||||
z = ( 0.000000f * r) - (0.000000f * g) + (1.000000f * b);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void Ciecam02::cat02_to_xyzfloat( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b )
|
||||
{
|
||||
//gamut correction M.H.Brill S.Susstrunk
|
||||
x = ( F2V(0.99015849f) * r) - (F2V(0.00838772f)* g) + (F2V(0.018229217f) * b);//Changjun Li
|
||||
y = ( F2V(0.239565979f) * r) + (F2V(0.758664642f) * g) + (F2V(0.001770137f)* b);
|
||||
z = b;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::hpe_to_xyz( double &x, double &y, double &z, double r, double g, double b )
|
||||
{
|
||||
@@ -450,12 +472,21 @@ void Ciecam02::hpe_to_xyz( double &x, double &y, double &z, double r, double g,
|
||||
z = b;
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::hpe_to_xyzfloat( float &x, float &y, float &z, float r, float g, float b )
|
||||
{
|
||||
x = (1.910197f * r) - (1.112124f * g) + (0.201908f * b);
|
||||
y = (0.370950f * r) + (0.629054f * g) - (0.000008f * b);
|
||||
z = b;
|
||||
}
|
||||
#else
|
||||
void Ciecam02::hpe_to_xyzfloat( vfloat &x, vfloat &y, vfloat &z, vfloat r, vfloat g, vfloat b )
|
||||
{
|
||||
x = (F2V(1.910197f) * r) - (F2V(1.112124f) * g) + (F2V(0.201908f) * b);
|
||||
y = (F2V(0.370950f) * r) + (F2V(0.629054f) * g) - (F2V(0.000008f) * b);
|
||||
z = b;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::cat02_to_hpe( double &rh, double &gh, double &bh, double r, double g, double b, int gamu )
|
||||
{
|
||||
@@ -485,6 +516,16 @@ void Ciecam02::cat02_to_hpefloat( float &rh, float &gh, float &bh, float r, floa
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
void Ciecam02::cat02_to_hpefloat( vfloat &rh, vfloat &gh, vfloat &bh, vfloat r, vfloat g, vfloat b)
|
||||
{
|
||||
//Changjun Li
|
||||
rh = ( F2V(0.550930835f) * r) + (F2V(0.519435987f)* g) - ( F2V(0.070356303f)* b);
|
||||
gh = ( F2V(0.055954056f) * r) + (F2V(0.89973132f) * g) + (F2V(0.044315524f) * b);
|
||||
bh = b;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::Aab_to_rgb( double &r, double &g, double &b, double A, double aa, double bb, double nbb )
|
||||
{
|
||||
double x = (A / nbb) + 0.305;
|
||||
@@ -496,6 +537,8 @@ void Ciecam02::Aab_to_rgb( double &r, double &g, double &b, double A, double aa,
|
||||
/* c1 c6 c7 */
|
||||
b = (0.32787 * x) - (0.15681 * aa) - (4.49038 * bb);
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa, float bb, float nbb )
|
||||
{
|
||||
float x = (A / nbb) + 0.305f;
|
||||
@@ -507,6 +550,19 @@ void Ciecam02::Aab_to_rgbfloat( float &r, float &g, float &b, float A, float aa,
|
||||
/* c1 c6 c7 */
|
||||
b = (0.32787f * x) - (0.15681f * aa) - (4.49038f * bb);
|
||||
}
|
||||
#else
|
||||
void Ciecam02::Aab_to_rgbfloat( vfloat &r, vfloat &g, vfloat &b, vfloat A, vfloat aa, vfloat bb, vfloat nbb )
|
||||
{
|
||||
vfloat c1 = F2V(0.32787f) * ((A / nbb) + F2V(0.305f));
|
||||
|
||||
/* c1 c2 c3 */
|
||||
r = c1 + (F2V(0.32145f) * aa) + (F2V(0.20527f) * bb);
|
||||
/* c1 c4 c5 */
|
||||
g = c1 - (F2V(0.63507f) * aa) - (F2V(0.18603f) * bb);
|
||||
/* c1 c6 c7 */
|
||||
b = c1 - (F2V(0.15681f) * aa) - (F2V(4.49038f) * bb);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::calculate_ab( double &aa, double &bb, double h, double e, double t, double nbb, double a )
|
||||
{
|
||||
@@ -535,7 +591,7 @@ void Ciecam02::calculate_ab( double &aa, double &bb, double h, double e, double
|
||||
bb = (aa * sinh) / cosh;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::calculate_abfloat( float &aa, float &bb, float h, float e, float t, float nbb, float a )
|
||||
{
|
||||
float2 sincosval = xsincosf((h * M_PI) / 180.0f);
|
||||
@@ -565,6 +621,34 @@ void Ciecam02::calculate_abfloat( float &aa, float &bb, float h, float e, float
|
||||
std::swap(aa,bb);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void Ciecam02::calculate_abfloat( vfloat &aa, vfloat &bb, vfloat h, vfloat e, vfloat t, vfloat nbb, vfloat a )
|
||||
{
|
||||
vfloat2 sincosval = xsincosf((h * F2V(M_PI)) / F2V(180.0f));
|
||||
vfloat sinh = sincosval.x;
|
||||
vfloat cosh = sincosval.y;
|
||||
vfloat x = (a / nbb) + F2V(0.305f);
|
||||
vfloat p3 = F2V(1.05f);
|
||||
vmask swapMask = vmaskf_gt(vabsf(sinh), vabsf(cosh));
|
||||
vswap(swapMask, sinh, cosh);
|
||||
|
||||
vfloat div = ((e / (t * cosh)) - (F2V(-0.31362f) - (p3 * F2V(0.15681f))) - ((F2V(0.01924f) - (p3 * F2V(4.49038f))) * (sinh / cosh)));
|
||||
// for large values of t the above calculation can change its sign which results in a hue shift of 180 degree
|
||||
// so we have to check the sign to avoid this shift.
|
||||
// Additionally it seems useful to limit the minimum value of div
|
||||
// I limited it, but I'm sure the actual limit is not the best one
|
||||
|
||||
vmask limitMask = vmaskf_neq(vsignf(div), vsignf(cosh));
|
||||
limitMask = vorm(limitMask, vmaskf_le(vabsf(div), vabsf(cosh) * F2V(2.f)));
|
||||
div = vself(limitMask, cosh * F2V(2.f), div);
|
||||
|
||||
aa = ((F2V(0.32787f) * x) * (F2V(2.0f) + p3)) / div;
|
||||
bb = (aa * sinh) / cosh;
|
||||
|
||||
vswap(swapMask, aa, bb);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void Ciecam02::initcam1(double gamu, double yb, double pilotd, double f, double la, double xw, double yw, double zw, double &n, double &d, double &nbb, double &ncb,
|
||||
double &cz, double &aw, double &wh, double &pfl, double &fl, double &c)
|
||||
@@ -758,6 +842,63 @@ void Ciecam02::xyz2jchqms_ciecam02float( float &J, float &C, float &h, float &Q,
|
||||
s = 100.0f * sqrtf( M / Q );
|
||||
h = (myh * 180.f) / (float)M_PI;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
void Ciecam02::xyz2jchqms_ciecam02float( vfloat &J, vfloat &C, vfloat &h, vfloat &Q, vfloat &M, vfloat &s, vfloat aw, vfloat fl, vfloat wh,
|
||||
vfloat x, vfloat y, vfloat z, vfloat xw, vfloat yw, vfloat zw,
|
||||
vfloat yb, vfloat la, vfloat f, vfloat c, vfloat nc, vfloat pow1, vfloat nbb, vfloat ncb, vfloat pfl, vfloat cz, vfloat d)
|
||||
|
||||
{
|
||||
vfloat r, g, b;
|
||||
vfloat rw, gw, bw;
|
||||
vfloat rc, gc, bc;
|
||||
vfloat rp, gp, bp;
|
||||
vfloat rpa, gpa, bpa;
|
||||
vfloat a, ca, cb;
|
||||
vfloat e, t;
|
||||
|
||||
xyz_to_cat02float( r, g, b, x, y, z);
|
||||
xyz_to_cat02float( rw, gw, bw, xw, yw, zw);
|
||||
vfloat onev = F2V(1.f);
|
||||
rc = r * (((yw * d) / rw) + (onev - d));
|
||||
gc = g * (((yw * d) / gw) + (onev - d));
|
||||
bc = b * (((yw * d) / bw) + (onev - d));
|
||||
|
||||
cat02_to_hpefloat( rp, gp, bp, rc, gc, bc);
|
||||
//gamut correction M.H.Brill S.Susstrunk
|
||||
rp = _mm_max_ps(rp,ZEROV);
|
||||
gp = _mm_max_ps(gp,ZEROV);
|
||||
bp = _mm_max_ps(bp,ZEROV);
|
||||
rpa = nonlinear_adaptationfloat( rp, fl );
|
||||
gpa = nonlinear_adaptationfloat( gp, fl );
|
||||
bpa = nonlinear_adaptationfloat( bp, fl );
|
||||
|
||||
ca = rpa - ((F2V(12.0f) * gpa) - bpa) / F2V(11.0f);
|
||||
cb = F2V(0.11111111f) * (rpa + gpa - (bpa + bpa));
|
||||
|
||||
vfloat myh = xatan2f( cb, ca );
|
||||
vfloat temp = F2V(M_PI);
|
||||
temp += temp;
|
||||
temp += myh;
|
||||
myh = vself(vmaskf_lt(myh, ZEROV), temp, myh);
|
||||
|
||||
a = ((rpa + rpa) + gpa + (F2V(0.05f) * bpa) - F2V(0.305f)) * nbb;
|
||||
a = _mm_max_ps(a,ZEROV); //gamut correction M.H.Brill S.Susstrunk
|
||||
|
||||
J = pow_F( a / aw, c * cz * F2V(0.5f));
|
||||
|
||||
e = ((F2V(961.53846f)) * nc * ncb) * (xcosf( myh + F2V(2.0f) ) + F2V(3.8f));
|
||||
t = (e * _mm_sqrt_ps( (ca * ca) + (cb * cb) )) / (rpa + gpa + (F2V(1.05f) * bpa));
|
||||
|
||||
C = pow_F( t, F2V(0.9f) ) * J * pow1;
|
||||
|
||||
Q = wh * J;
|
||||
J *= J * F2V(100.0f);
|
||||
M = C * pfl;
|
||||
Q = _mm_max_ps(Q,F2V(0.0001f)); // avoid division by zero
|
||||
s = F2V(100.0f) * _mm_sqrt_ps( M / Q );
|
||||
h = (myh * F2V(180.f)) / F2V(M_PI);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Ciecam02::jch2xyz_ciecam02( double &x, double &y, double &z, double J, double C, double h,
|
||||
double xw, double yw, double zw, double yb, double la,
|
||||
@@ -792,7 +933,7 @@ void Ciecam02::jch2xyz_ciecam02( double &x, double &y, double &z, double J, doub
|
||||
|
||||
cat02_to_xyz( x, y, z, r, g, b, gamu );
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
void Ciecam02::jch2xyz_ciecam02float( float &x, float &y, float &z, float J, float C, float h,
|
||||
float xw, float yw, float zw, float yb, float la,
|
||||
float f, float c, float nc , int gamu, float pow1, float nbb, float ncb, float fl, float cz, float d, float aw)
|
||||
@@ -827,6 +968,41 @@ void Ciecam02::jch2xyz_ciecam02float( float &x, float &y, float &z, float J, flo
|
||||
cat02_to_xyzfloat( x, y, z, r, g, b, gamu );
|
||||
}
|
||||
|
||||
#else
|
||||
void Ciecam02::jch2xyz_ciecam02float( vfloat &x, vfloat &y, vfloat &z, vfloat J, vfloat C, vfloat h,
|
||||
vfloat xw, vfloat yw, vfloat zw, vfloat yb, vfloat la,
|
||||
vfloat f, vfloat nc, vfloat pow1, vfloat nbb, vfloat ncb, vfloat fl, vfloat d, vfloat aw, vfloat reccmcz)
|
||||
{
|
||||
vfloat r, g, b;
|
||||
vfloat rc, gc, bc;
|
||||
vfloat rp, gp, bp;
|
||||
vfloat rpa, gpa, bpa;
|
||||
vfloat rw, gw, bw;
|
||||
vfloat a, ca, cb;
|
||||
vfloat e, t;
|
||||
xyz_to_cat02float( rw, gw, bw, xw, yw, zw);
|
||||
e = ((F2V(961.53846f)) * nc * ncb) * (xcosf( ((h * F2V(M_PI)) / F2V(180.0f)) + F2V(2.0f) ) + F2V(3.8f));
|
||||
a = pow_F( J / F2V(100.0f), reccmcz ) * aw;
|
||||
t = pow_F( F2V(10.f) * C / (_mm_sqrt_ps( J ) * pow1), F2V(1.1111111f) );
|
||||
|
||||
calculate_abfloat( ca, cb, h, e, t, nbb, a );
|
||||
Aab_to_rgbfloat( rpa, gpa, bpa, a, ca, cb, nbb );
|
||||
|
||||
rp = inverse_nonlinear_adaptationfloat( rpa, fl );
|
||||
gp = inverse_nonlinear_adaptationfloat( gpa, fl );
|
||||
bp = inverse_nonlinear_adaptationfloat( bpa, fl );
|
||||
|
||||
hpe_to_xyzfloat( x, y, z, rp, gp, bp );
|
||||
xyz_to_cat02float( rc, gc, bc, x, y, z );
|
||||
|
||||
r = rc / (((yw * d) / rw) + (F2V(1.0f) - d));
|
||||
g = gc / (((yw * d) / gw) + (F2V(1.0f) - d));
|
||||
b = bc / (((yw * d) / bw) + (F2V(1.0f) - d));
|
||||
|
||||
cat02_to_xyzfloat( x, y, z, r, g, b );
|
||||
}
|
||||
#endif
|
||||
|
||||
double Ciecam02::nonlinear_adaptation( double c, double fl )
|
||||
{
|
||||
double p;
|
||||
@@ -841,6 +1017,20 @@ float Ciecam02::nonlinear_adaptationfloat( float c, float fl )
|
||||
else {p = pow_F( (fl * c) / 100.0f, 0.42f ); return ((400.0f * p) / (27.13f + p)) + 0.1f;}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
vfloat Ciecam02::nonlinear_adaptationfloat( vfloat c, vfloat fl )
|
||||
{
|
||||
vfloat c100 = F2V(100.f);
|
||||
vfloat czd42 = F2V(0.42f);
|
||||
vfloat c400 = vmulsignf(F2V(400.f),c);
|
||||
fl = vmulsignf(fl,c);
|
||||
vfloat p = pow_F( (fl * c) / c100, czd42 );
|
||||
vfloat c27d13 = F2V(27.13);
|
||||
vfloat czd1 = F2V(0.1f);
|
||||
return ((c400 * p) / (c27d13 + p)) + czd1;
|
||||
}
|
||||
#endif
|
||||
|
||||
double Ciecam02::inverse_nonlinear_adaptation( double c, double fl )
|
||||
{
|
||||
int c1;
|
||||
@@ -849,6 +1039,7 @@ double Ciecam02::inverse_nonlinear_adaptation( double c, double fl )
|
||||
return c1*(100.0 / fl) * pow( (27.13 * fabs( c - 0.1 )) / (400.0 - fabs( c - 0.1 )), 1.0 / 0.42 );
|
||||
}
|
||||
|
||||
#ifndef __SSE2__
|
||||
float Ciecam02::inverse_nonlinear_adaptationfloat( float c, float fl )
|
||||
{
|
||||
c -= 0.1f;
|
||||
@@ -863,6 +1054,16 @@ float Ciecam02::inverse_nonlinear_adaptationfloat( float c, float fl )
|
||||
return (100.0f / fl) * pow_F( (27.13f * fabsf( c )) / (400.0f - fabsf( c )), 2.38095238f );
|
||||
}
|
||||
|
||||
#else
|
||||
vfloat Ciecam02::inverse_nonlinear_adaptationfloat( vfloat c, vfloat fl )
|
||||
{
|
||||
c -= F2V(0.1f);
|
||||
fl = vmulsignf(fl,c);
|
||||
c = vabsf(c);
|
||||
c = _mm_min_ps( c, F2V(399.99f));
|
||||
return (F2V(100.0f) / fl) * pow_F( (F2V(27.13f) * c) / (F2V(400.0f) - c), F2V(2.38095238f) );
|
||||
}
|
||||
#endif
|
||||
//end CIECAM Billy Bigg
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user