CIECAM02 speedup

This commit is contained in:
Ingo
2015-07-07 16:34:05 +02:00
parent b6f8bc675b
commit 1008e0e98d
7 changed files with 555 additions and 86 deletions

View File

@@ -21,7 +21,7 @@
#include "color.h"
#include "iccmatrices.h"
#include "mytime.h"
#include "sleef.c"
#include "sleef.c"
#include "opthelper.h"
using namespace std;
@@ -433,7 +433,7 @@ namespace rtengine {
y = ((xyz_rgb[1][0]*r + xyz_rgb[1][1]*g + xyz_rgb[1][2]*b)) ;
z = ((xyz_rgb[2][0]*r + xyz_rgb[2][1]*g + xyz_rgb[2][2]*b)) ;
}
void Color::rgbxyz (float r, float g, float b, float &x, float &y, float &z, const float xyz_rgb[3][3]) {
x = ((xyz_rgb[0][0]*r + xyz_rgb[0][1]*g + xyz_rgb[0][2]*b)) ;
y = ((xyz_rgb[1][0]*r + xyz_rgb[1][1]*g + xyz_rgb[1][2]*b)) ;
@@ -859,17 +859,36 @@ namespace rtengine {
y=(LL>epskap) ? 65535.0f*fy*fy*fy : 65535.0f*LL/kappa;
}
#ifdef __SSE2__
void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &z) {
vfloat c327d68 = F2V(327.68f);
L /= c327d68;
a /= c327d68;
b /= c327d68;
vfloat fy = F2V(0.00862069f) * L + F2V(0.137932f);
vfloat fx = F2V(0.002f) * a + fy;
vfloat fz = fy - (F2V(0.005f) * b);
vfloat c65535 = F2V(65535.f);
x = c65535*f2xyz(fx)*F2V(D50x);
z = c65535*f2xyz(fz)*F2V(D50z);
vfloat res1 = fy*fy*fy;
vfloat res2 = L / F2V(kappa);
y = vself(vmaskf_gt(L, F2V(epskap)), res1, res2);
y *= c65535;
}
#endif // __SSE2__
void Color::XYZ2Lab(float X, float Y, float Z, float &L, float &a, float &b) {
float x = X/D50x;
float z = Z/D50z;
float y= Y;
float fx,fy,fz;
fx = (x<=65535.0f ? cachef[x] : (327.68f*xcbrtf(x/MAXVALF)));
fy = (y<=65535.0f ? cachef[y] : (327.68f*xcbrtf(y/MAXVALF)));
fz = (z<=65535.0f ? cachef[z] : (327.68f*xcbrtf(z/MAXVALF)));
L = (116.0f * fy - 5242.88f); //5242.88=16.0*327.68;
a = (500.0f * (fx - fy) );
b = (200.0f * (fy - fz) );
@@ -1518,49 +1537,49 @@ SSEFUNCTION void Color::LabGamutMunsell(float *labL, float *laba, float *labb,
#endif
float correctlum = 0.f;
float correctionHuechroma = 0.f;
#ifdef __SSE2__
// precalculate H and C using SSE
float HHBuffer[N];
float CCBuffer[N];
__m128 c327d68v = _mm_set1_ps(327.68f);
__m128 av,bv;
int k;
for (k=0; k<N-3; k+=4) {
av = LVFU(laba[k]);
bv = LVFU(labb[k]);
_mm_storeu_ps(&HHBuffer[k],xatan2f(bv,av));
_mm_storeu_ps(&CCBuffer[k],_mm_sqrt_ps(SQRV(av)+SQRV(bv))/c327d68v);
}
for(;k<N;k++) {
HHBuffer[k] = xatan2f(labb[k],laba[k]);
CCBuffer[k] = sqrt(SQR(laba[k]) + SQR(labb[k]))/327.68f;
}
#ifdef __SSE2__
// precalculate H and C using SSE
float HHBuffer[N];
float CCBuffer[N];
__m128 c327d68v = _mm_set1_ps(327.68f);
__m128 av,bv;
int k;
for (k=0; k<N-3; k+=4) {
av = LVFU(laba[k]);
bv = LVFU(labb[k]);
_mm_storeu_ps(&HHBuffer[k],xatan2f(bv,av));
_mm_storeu_ps(&CCBuffer[k],_mm_sqrt_ps(SQRV(av)+SQRV(bv))/c327d68v);
}
for(;k<N;k++) {
HHBuffer[k] = xatan2f(labb[k],laba[k]);
CCBuffer[k] = sqrt(SQR(laba[k]) + SQR(labb[k]))/327.68f;
}
#endif // __SSE2__
for (int j=0; j<N; j++) {
#ifdef __SSE2__
float HH = HHBuffer[j];
float Chprov1 = CCBuffer[j];
for (int j=0; j<N; j++) {
#ifdef __SSE2__
float HH = HHBuffer[j];
float Chprov1 = CCBuffer[j];
#else
float HH=xatan2f(labb[j],laba[j]);
float Chprov1=sqrtf(SQR(laba[j]) + SQR(labb[j]))/327.68f;
float Chprov1=sqrtf(SQR(laba[j]) + SQR(labb[j]))/327.68f;
#endif
float Lprov1=labL[j]/327.68f;
float Loldd = Lprov1;
float Loldd = Lprov1;
float Coldd = Chprov1;
float2 sincosval;
float2 sincosval;
if(gamut) {
#ifdef _DEBUG
bool neg, more_rgb;
#endif
// According to mathematical laws we can get the sin and cos of HH by simple operations
float R,G,B;
if(Chprov1 == 0.f) {
sincosval.y = 1.f;
sincosval.x = 0.f;
} else {
sincosval.y = laba[j]/(Chprov1*327.68f);
sincosval.x = labb[j]/(Chprov1*327.68f);
}
// According to mathematical laws we can get the sin and cos of HH by simple operations
float R,G,B;
if(Chprov1 == 0.f) {
sincosval.y = 1.f;
sincosval.x = 0.f;
} else {
sincosval.y = laba[j]/(Chprov1*327.68f);
sincosval.x = labb[j]/(Chprov1*327.68f);
}
//gamut control : Lab values are in gamut
#ifdef _DEBUG
@@ -1585,24 +1604,24 @@ SSEFUNCTION void Color::LabGamutMunsell(float *labL, float *laba, float *labb,
#else
AllMunsellLch(lumaMuns, Lprov1, Loldd, HH, Chprov1, Coldd, correctionHuechroma, correctlum);
#endif
if(correctlum == 0.f && correctionHuechroma == 0.f) {
if(!gamut) {
if(Coldd == 0.f) {
sincosval.y = 1.f;
sincosval.x = 0.f;
} else {
sincosval.y = laba[j]/(Coldd*327.68f);
sincosval.x = labb[j]/(Coldd*327.68f);
}
}
if(correctlum == 0.f && correctionHuechroma == 0.f) {
if(!gamut) {
if(Coldd == 0.f) {
sincosval.y = 1.f;
sincosval.x = 0.f;
} else {
sincosval.y = laba[j]/(Coldd*327.68f);
sincosval.x = labb[j]/(Coldd*327.68f);
}
}
} else {
HH+=correctlum; //hue Munsell luminance correction
sincosval = xsincosf(HH+correctionHuechroma);
sincosval = xsincosf(HH+correctionHuechroma);
}
laba[j] = Chprov1*sincosval.y*327.68f;
labb[j] = Chprov1*sincosval.x*327.68f;
}
labb[j] = Chprov1*sincosval.x*327.68f;
}
#ifdef _DEBUG
t2e.set();