Retinex, precalculate some expensive stuff (sin, cos, atan2) using SSE and line buffers

This commit is contained in:
heckflosse 2016-02-12 18:42:08 +01:00
parent ba5161d446
commit 73b0bb14c0

View File

@ -2447,27 +2447,94 @@ void RawImageSource::retinex(ColorManagementParams cmp, RetinexParams deh, ToneC
// gamut control only in Lab mode // gamut control only in Lab mode
const bool highlight = Tc.hrenabled; const bool highlight = Tc.hrenabled;
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel
#endif
{
#ifdef __SSE2__
// we need some line buffers to precalculate some expensive stuff using SSE
float atan2Buffer[W] ALIGNED16;
float sqrtBuffer[W] ALIGNED16;
float sincosxBuffer[W] ALIGNED16;
float sincosyBuffer[W] ALIGNED16;
const vfloat c327d68v = F2V(327.68);
const vfloat onev = F2V(1.f);
#endif // __SSE2__
#ifdef _OPENMP
#pragma omp for
#endif #endif
for (int i = border; i < H - border; i++ ) { for (int i = border; i < H - border; i++ ) {
for (int j = border; j < W - border; j++) { #ifdef __SSE2__
// vectorized precalculation
{
int j = border;
for (; j < W - border - 3; j += 4)
{
vfloat av = LVFU(conversionBuffer[0][i - border][j - border]);
vfloat bv = LVFU(conversionBuffer[1][i - border][j - border]);
vfloat chprovv = vsqrtf(SQRV(av) + SQRV(bv));
STVF(sqrtBuffer[j - border], chprovv / c327d68v);
vfloat HHv = xatan2f(bv, av);
STVF(atan2Buffer[j - border], HHv);
av /= chprovv;
bv /= chprovv;
vmask selMask = vmaskf_eq(chprovv, ZEROV);
STVF(sincosyBuffer[j - border], vself(selMask, onev, av));
STVF(sincosxBuffer[j - border], vselfnotzero(selMask, bv));
}
for (; j < W - border; j++)
{
float aa = conversionBuffer[0][i - border][j - border];
float bb = conversionBuffer[1][i - border][j - border];
float Chprov1 = sqrt(SQR(aa) + SQR(bb)) / 327.68f;
sqrtBuffer[j - border] = Chprov1;
float HH = xatan2f(bb, aa);
atan2Buffer[j - border] = HH;
if(Chprov1 == 0.0f) {
sincosyBuffer[j - border] = 1.f;
sincosxBuffer[j - border] = 0.0f;
} else {
sincosyBuffer[j - border] = aa / (Chprov1 * 327.68f);
sincosxBuffer[j - border] = bb / (Chprov1 * 327.68f);
}
}
}
#endif // __SSE2__
for (int j = border; j < W - border; j++) {
float Lprov1 = (LBuffer[i - border][j - border]) / 327.68f; float Lprov1 = (LBuffer[i - border][j - border]) / 327.68f;
float Chprov1 = sqrt(SQR(conversionBuffer[0][i - border][j - border]) + SQR(conversionBuffer[1][i - border][j - border])) / 327.68f; #ifdef __SSE2__
float HH = xatan2f(conversionBuffer[1][i - border][j - border], conversionBuffer[0][i - border][j - border]); float Chprov1 = sqrtBuffer[j - border];
float HH = atan2Buffer[j - border];
float2 sincosval; float2 sincosval;
float valp; sincosval.x = sincosxBuffer[j - border];
float chr; sincosval.y = sincosyBuffer[j - border];
#else
float aa = conversionBuffer[0][i - border][j - border];
float bb = conversionBuffer[1][i - border][j - border];
float Chprov1 = sqrt(SQR(aa) + SQR(bb)) / 327.68f;
float HH = xatan2f(bb, aa);
float2 sincosval;// = xsincosf(HH);
if(Chprov1 == 0.0f) {
sincosval.y = 1.f;
sincosval.x = 0.0f;
} else {
sincosval.y = aa / (Chprov1 * 327.68f);
sincosval.x = bb / (Chprov1 * 327.68f);
}
#endif
if(chutili) { // c=f(H) if(chutili) { // c=f(H)
{ float valp = float((chcurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f));
valp = float((chcurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f));
Chprov1 *= (1.f + 2.f * valp); Chprov1 *= (1.f + 2.f * valp);
} }
}
sincosval = xsincosf(HH);
float R, G, B; float R, G, B;
#ifdef _DEBUG #ifdef _DEBUG
bool neg = false; bool neg = false;
@ -2486,7 +2553,7 @@ void RawImageSource::retinex(ColorManagementParams cmp, RetinexParams deh, ToneC
LBuffer[i - border][j - border] = Lprov1 * 327.68f; LBuffer[i - border][j - border] = Lprov1 * 327.68f;
} }
} }
}
//end gamut control //end gamut control
#ifdef __SSE2__ #ifdef __SSE2__
vfloat wipv[3][3]; vfloat wipv[3][3];