diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index c121ce7a1..ba7ae693e 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -37,7 +37,6 @@ #include "opthelper.h" #include "cplx_wavelet_dec.h" #include "median.h" - #ifdef _OPENMP #include #endif @@ -588,18 +587,14 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef } } - float gamslope = exp(log(static_cast(gamthresh)) / gam) / gamthresh; LUTf gamcurve(65536, LUT_CLIP_BELOW); + float gamslope = exp(log(static_cast(gamthresh)) / gam) / gamthresh; if (denoiseMethodRgb) { - for (int i = 0; i < 65536; ++i) { - gamcurve[i] = (Color::gamma(static_cast(i) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0)) * 32768.0f; - } + Color::gammaf2lut(gamcurve, gam, gamthresh, gamslope, 65535.f, 32768.f); } else { - for (int i = 0; i < 65536; ++i) { - gamcurve[i] = (Color::gamman(static_cast(i) / 65535.0, gam)) * 32768.0f; - } + Color::gammanf2lut(gamcurve, gam, 65535.f, 32768.f); } // inverse gamma transform for output data @@ -610,48 +605,14 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef LUTf igamcurve(65536, LUT_CLIP_BELOW); if (denoiseMethodRgb) { - for (int i = 0; i < 65536; ++i) { - igamcurve[i] = (Color::gamma(static_cast(i) / 32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); - } + Color::gammaf2lut(igamcurve, igam, igamthresh, igamslope, 32768.f, 65535.f); } else { - for (int i = 0; i < 65536; ++i) { - igamcurve[i] = (Color::gamman(static_cast(i) / 32768.0f, igam) * 65535.0f); - } + Color::gammanf2lut(igamcurve, igam, 32768.f, 65535.f); } const float gain = pow (2.0f, float(expcomp)); float noisevar_Ldetail = SQR(static_cast(SQR(100. - dnparams.Ldetail) + 50.*(100. - dnparams.Ldetail)) * TS * 0.5f); - if (settings->verbose) { - printf("Denoise Lab=%i\n", settings->denoiselabgamma); - } - - // To avoid branches in loops we access the gammatabs by pointers - // modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10 - // we can put other as gamma g=2.6 slope=11, etc. - // but noting to do with real gamma !!!: it's only for data Lab # data RGB - // finally I opted fot gamma55 and with options we can change - - LUTf *denoisegamtab; - LUTf *denoiseigamtab; - - switch(settings->denoiselabgamma) { - case 0: - denoisegamtab = &(Color::gammatab_26_11); - denoiseigamtab = &(Color::igammatab_26_11); - break; - - case 1: - denoisegamtab = &(Color::gammatab_4); - denoiseigamtab = &(Color::igammatab_4); - break; - - default: - denoisegamtab = &(Color::gammatab_55); - denoiseigamtab = &(Color::igammatab_55); - break; - } - array2D tilemask_in(TS, TS); array2D tilemask_out(TS, TS); @@ -818,7 +779,6 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef {static_cast(wprof[2][0]), static_cast(wprof[2][1]), static_cast(wprof[2][2])} }; - // begin tile processing of image #ifdef _OPENMP #pragma omp parallel num_threads(numthreads) if (numthreads>1) @@ -917,14 +877,14 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef float G_ = gain * src->g(i, j); float B_ = gain * src->b(i, j); - R_ = (*denoiseigamtab)[R_]; - G_ = (*denoiseigamtab)[G_]; - B_ = (*denoiseigamtab)[B_]; + R_ = Color::denoiseIGammaTab[R_]; + G_ = Color::denoiseIGammaTab[G_]; + B_ = Color::denoiseIGammaTab[B_]; //apply gamma noise standard (slider) - R_ = R_ < 65535.0f ? gamcurve[R_] : (Color::gammanf(R_ / 65535.f, gam) * 32768.0f); - G_ = G_ < 65535.0f ? gamcurve[G_] : (Color::gammanf(G_ / 65535.f, gam) * 32768.0f); - B_ = B_ < 65535.0f ? gamcurve[B_] : (Color::gammanf(B_ / 65535.f, gam) * 32768.0f); + R_ = R_ < 65535.f ? gamcurve[R_] : (Color::gammanf(R_ / 65535.f, gam) * 32768.f); + G_ = G_ < 65535.f ? gamcurve[G_] : (Color::gammanf(G_ / 65535.f, gam) * 32768.f); + B_ = B_ < 65535.f ? gamcurve[B_] : (Color::gammanf(B_ / 65535.f, gam) * 32768.f); //true conversion xyz=>Lab float X, Y, Z; @@ -966,9 +926,9 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef float Y = gain * src->g(i, j); float Z = gain * src->b(i, j); //conversion colorspace to determine luminance with no gamma - X = X < 65535.0f ? gamcurve[X] : (Color::gamma(static_cast(X) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0) * 32768.0f); - Y = Y < 65535.0f ? gamcurve[Y] : (Color::gamma(static_cast(Y) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0) * 32768.0f); - Z = Z < 65535.0f ? gamcurve[Z] : (Color::gamma(static_cast(Z) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0) * 32768.0f); + X = X < 65535.f ? gamcurve[X] : (Color::gammaf(X / 65535.f, gam, gamthresh, gamslope) * 32768.f); + Y = Y < 65535.f ? gamcurve[Y] : (Color::gammaf(Y / 65535.f, gam, gamthresh, gamslope) * 32768.f); + Z = Z < 65535.f ? gamcurve[Z] : (Color::gammaf(Z / 65535.f, gam, gamthresh, gamslope) * 32768.f); //end chroma labdn->L[i1][j1] = Y; labdn->a[i1][j1] = (X - Y); @@ -1009,9 +969,9 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef float btmp = Color::igammatab_srgb[ src->b(i, j) ]; //modification Jacques feb 2013 // gamma slider different from raw - rtmp = rtmp < 65535.0f ? gamcurve[rtmp] : (Color::gamman(static_cast(rtmp) / 65535.0, gam) * 32768.0f); - gtmp = gtmp < 65535.0f ? gamcurve[gtmp] : (Color::gamman(static_cast(gtmp) / 65535.0, gam) * 32768.0f); - btmp = btmp < 65535.0f ? gamcurve[btmp] : (Color::gamman(static_cast(btmp) / 65535.0, gam) * 32768.0f); + rtmp = rtmp < 65535.f ? gamcurve[rtmp] : (Color::gammanf(rtmp / 65535.f, gam) * 32768.f); + gtmp = gtmp < 65535.f ? gamcurve[gtmp] : (Color::gammanf(gtmp / 65535.f, gam) * 32768.f); + btmp = btmp < 65535.f ? gamcurve[btmp] : (Color::gammanf(btmp / 65535.f, gam) * 32768.f); float X, Y, Z; Color::rgbxyz(rtmp, gtmp, btmp, X, Y, Z, wp); @@ -1612,9 +1572,9 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef b_ = b_ < 32768.f ? igamcurve[b_] : (Color::gammanf(b_ / 32768.f, igam) * 65535.f); //readapt arbitrary gamma (inverse from beginning) - r_ = (*denoisegamtab)[r_]; - g_ = (*denoisegamtab)[g_]; - b_ = (*denoisegamtab)[b_]; + r_ = Color::denoiseGammaTab[r_]; + g_ = Color::denoiseGammaTab[g_]; + b_ = Color::denoiseGammaTab[b_]; if (numtiles == 1) { dsttmp->r(i, j) = newGain * r_; @@ -1650,9 +1610,9 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef float Z = Y - (labdn->b[i1][j1]); - X = X < 32768.0f ? igamcurve[X] : (Color::gamma(X / 32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); - Y = Y < 32768.0f ? igamcurve[Y] : (Color::gamma(Y / 32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); - Z = Z < 32768.0f ? igamcurve[Z] : (Color::gamma(Z / 32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); + X = X < 32768.f ? igamcurve[X] : (Color::gammaf(X / 32768.f, igam, igamthresh, igamslope) * 65535.f); + Y = Y < 32768.f ? igamcurve[Y] : (Color::gammaf(Y / 32768.f, igam, igamthresh, igamslope) * 65535.f); + Z = Z < 32768.f ? igamcurve[Z] : (Color::gammaf(Z / 32768.f, igam, igamthresh, igamslope) * 65535.f); if (numtiles == 1) { dsttmp->r(i, j) = newGain * X; @@ -1695,9 +1655,9 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef float r_, g_, b_; Color::xyz2rgb(X, Y, Z, r_, g_, b_, wip); //gamma slider is different from Raw - r_ = r_ < 32768.0f ? igamcurve[r_] : (Color::gamman(r_ / 32768.0f, igam) * 65535.0f); - g_ = g_ < 32768.0f ? igamcurve[g_] : (Color::gamman(g_ / 32768.0f, igam) * 65535.0f); - b_ = b_ < 32768.0f ? igamcurve[b_] : (Color::gamman(b_ / 32768.0f, igam) * 65535.0f); + r_ = r_ < 32768.f ? igamcurve[r_] : (Color::gammanf(r_ / 32768.f, igam) * 65535.f); + g_ = g_ < 32768.f ? igamcurve[g_] : (Color::gammanf(g_ / 32768.f, igam) * 65535.f); + b_ = b_ < 32768.f ? igamcurve[b_] : (Color::gammanf(b_ / 32768.f, igam) * 65535.f); if (numtiles == 1) { dsttmp->r(i, j) = newGain * r_; @@ -2146,7 +2106,7 @@ float ImProcFunctions::MadMax(float * DataList, int & max, int datalen) float ImProcFunctions::Mad(float * DataList, const int datalen) { - if(datalen <= 0) { // Avoid possible buffer underrun + if(datalen <= 1) { // Avoid possible buffer underrun return 0; } @@ -2175,7 +2135,7 @@ float ImProcFunctions::Mad(float * DataList, const int datalen) float ImProcFunctions::MadRgb(float * DataList, const int datalen) { - if(datalen <= 0) { // Avoid possible buffer underrun + if(datalen <= 1) { // Avoid possible buffer underrun return 0; } @@ -2982,7 +2942,7 @@ void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition & } } -void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope) +SSEFUNCTION void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope) { gam = dnparams.gamma; gamthresh = 0.001f; @@ -2995,17 +2955,13 @@ void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoisePa } } - gamslope = exp(log(static_cast(gamthresh)) / gam) / gamthresh; bool denoiseMethodRgb = (dnparams.dmethod == "RGB"); if (denoiseMethodRgb) { - for (int i = 0; i < 65536; ++i) { - gamcurve[i] = (Color::gamma(static_cast(i) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0)) * 32768.0f; - } + gamslope = exp(log(static_cast(gamthresh)) / gam) / gamthresh; + Color::gammaf2lut(gamcurve, gam, gamthresh, gamslope, 65535.f, 32768.f); } else { - for (int i = 0; i < 65536; ++i) { - gamcurve[i] = (Color::gamman(static_cast(i) / 65535.0, gam)) * 32768.0f; - } + Color::gammanf2lut(gamcurve, gam, 65535.f, 32768.f); } } @@ -3256,24 +3212,6 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat int nb = 0; int comptlevel = 0; - // To avoid branches in loops we access the gammatabs by pointers - LUTf *denoiseigamtab; - - switch(settings->denoiselabgamma) { - case 0: - denoiseigamtab = &(Color::igammatab_26_11); - break; - - case 1: - denoiseigamtab = &(Color::igammatab_4); - break; - - default: - denoiseigamtab = &(Color::igammatab_55); - break; - } - - for (int tiletop = 0; tiletop < imheight; tiletop += tileHskip) { for (int tileleft = 0; tileleft < imwidth; tileleft += tileWskip) { @@ -3419,14 +3357,14 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat float G_ = gain * src->g(i, j); float B_ = gain * src->b(i, j); - R_ = (*denoiseigamtab)[R_]; - G_ = (*denoiseigamtab)[G_]; - B_ = (*denoiseigamtab)[B_]; + R_ = Color::denoiseIGammaTab[R_]; + G_ = Color::denoiseIGammaTab[G_]; + B_ = Color::denoiseIGammaTab[B_]; //apply gamma noise standard (slider) - R_ = R_ < 65535.0f ? gamcurve[R_] : (Color::gamman(static_cast(R_) / 65535.0, gam) * 32768.0f); - G_ = G_ < 65535.0f ? gamcurve[G_] : (Color::gamman(static_cast(G_) / 65535.0, gam) * 32768.0f); - B_ = B_ < 65535.0f ? gamcurve[B_] : (Color::gamman(static_cast(B_) / 65535.0, gam) * 32768.0f); + R_ = R_ < 65535.f ? gamcurve[R_] : (Color::gammanf(R_ / 65535.f, gam) * 32768.f); + G_ = G_ < 65535.f ? gamcurve[G_] : (Color::gammanf(G_ / 65535.f, gam) * 32768.f); + B_ = B_ < 65535.f ? gamcurve[B_] : (Color::gammanf(B_ / 65535.f, gam) * 32768.f); //true conversion xyz=>Lab float X, Y, Z; Color::rgbxyz(R_, G_, B_, X, Y, Z, wp); @@ -3451,9 +3389,9 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat float Y = gain * src->g(i, j); float Z = gain * src->b(i, j); - X = X < 65535.0f ? gamcurve[X] : (Color::gamma(static_cast(X) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0) * 32768.0f); - Y = Y < 65535.0f ? gamcurve[Y] : (Color::gamma(static_cast(Y) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0) * 32768.0f); - Z = Z < 65535.0f ? gamcurve[Z] : (Color::gamma(static_cast(Z) / 65535.0, gam, gamthresh, gamslope, 1.0, 0.0) * 32768.0f); + X = X < 65535.f ? gamcurve[X] : (Color::gammaf(X / 65535.f, gam, gamthresh, gamslope) * 32768.f); + Y = Y < 65535.f ? gamcurve[Y] : (Color::gammaf(Y / 65535.f, gam, gamthresh, gamslope) * 32768.f); + Z = Z < 65535.f ? gamcurve[Z] : (Color::gammaf(Z / 65535.f, gam, gamthresh, gamslope) * 32768.f); labdn->a[i1][j1] = (X - Y); labdn->b[i1][j1] = (Y - Z); @@ -3480,9 +3418,9 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat float btmp = Color::igammatab_srgb[ src->b(i, j) ]; //modification Jacques feb 2013 // gamma slider different from raw - rtmp = rtmp < 65535.0f ? gamcurve[rtmp] : (Color::gamman(static_cast(rtmp) / 65535.0, gam) * 32768.0f); - gtmp = gtmp < 65535.0f ? gamcurve[gtmp] : (Color::gamman(static_cast(gtmp) / 65535.0, gam) * 32768.0f); - btmp = btmp < 65535.0f ? gamcurve[btmp] : (Color::gamman(static_cast(btmp) / 65535.0, gam) * 32768.0f); + rtmp = rtmp < 65535.f ? gamcurve[rtmp] : (Color::gammanf(rtmp / 65535.f, gam) * 32768.f); + gtmp = gtmp < 65535.f ? gamcurve[gtmp] : (Color::gammanf(gtmp / 65535.f, gam) * 32768.f); + btmp = btmp < 65535.f ? gamcurve[btmp] : (Color::gammanf(btmp / 65535.f, gam) * 32768.f); float X, Y, Z; Color::rgbxyz(rtmp, gtmp, btmp, X, Y, Z, wp); diff --git a/rtengine/color.cc b/rtengine/color.cc index 5227719dd..58dc60320 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -43,15 +43,10 @@ LUTf Color::igammatab_srgb; LUTf Color::igammatab_srgb1; LUTf Color::gammatab_srgb; LUTf Color::gammatab_srgb1; -// LUTf Color::igammatab_709; -// LUTf Color::gammatab_709; -LUTf Color::igammatab_55; -LUTf Color::gammatab_55; -LUTf Color::igammatab_4; -LUTf Color::gammatab_4; -LUTf Color::igammatab_26_11; -LUTf Color::gammatab_26_11; +LUTf Color::denoiseGammaTab; +LUTf Color::denoiseIGammaTab; + LUTf Color::igammatab_24_17; LUTf Color::gammatab_24_17a; LUTf Color::gammatab_13_2; @@ -150,13 +145,10 @@ void Color::init () igammatab_srgb1(maxindex, 0); gammatab_srgb(maxindex, 0); gammatab_srgb1(maxindex, 0); - igammatab_55(maxindex, 0); - gammatab_55(maxindex, 0); - igammatab_4(maxindex, 0); - gammatab_4(maxindex, 0); - igammatab_26_11(maxindex, 0); - gammatab_26_11(maxindex, 0); + denoiseGammaTab(maxindex, 0); + denoiseIGammaTab(maxindex, 0); + igammatab_24_17(maxindex, 0); gammatab_24_17a(maxindex, LUT_CLIP_ABOVE | LUT_CLIP_BELOW); gammatab_13_2(maxindex, 0); @@ -195,6 +187,7 @@ void Color::init () { gammatab_srgb[i] = gammatab_srgb1[i] = gamma2(i / 65535.0); } + gammatab_srgb *= 65535.f; gamma2curve.share(gammatab_srgb, LUT_CLIP_BELOW | LUT_CLIP_ABOVE); // shares the buffer with gammatab_srgb but has different clip flags } @@ -202,9 +195,11 @@ void Color::init () #pragma omp section #endif { - for (int i = 0; i < maxindex; i++) { + for (int i = 0; i < maxindex; i++) + { igammatab_srgb[i] = igammatab_srgb1[i] = igamma2 (i / 65535.0); } + igammatab_srgb *= 65535.f; } #ifdef _OPENMP @@ -213,42 +208,74 @@ void Color::init () { double rsRGBGamma = 1.0 / sRGBGamma; - for (int i = 0; i < maxindex; i++) { + for (int i = 0; i < maxindex; i++) + { double val = pow (i / 65535.0, rsRGBGamma); gammatab[i] = 65535.0 * val; gammatabThumb[i] = (unsigned char)(255.0 * val); } } + #ifdef _OPENMP #pragma omp section #endif + // modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10 + // we can put other as gamma g=2.6 slope=11, etc. + // but noting to do with real gamma !!!: it's only for data Lab # data RGB + // finally I opted for gamma55 and with options we can change - for (int i = 0; i < maxindex; i++) { - gammatab_55[i] = 65535.0 * gamma55 (i / 65535.0); + switch(settings->denoiselabgamma) { + case 0: + for (int i = 0; i < maxindex; i++) { + denoiseGammaTab[i] = 65535.0 * gamma26_11 (i / 65535.0); + } + + break; + + case 1: + for (int i = 0; i < maxindex; i++) { + denoiseGammaTab[i] = 65535.0 * gamma4 (i / 65535.0); + } + + break; + + default: + for (int i = 0; i < maxindex; i++) { + denoiseGammaTab[i] = 65535.0 * gamma55 (i / 65535.0); + } + + break; } #ifdef _OPENMP #pragma omp section #endif + // modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10 + // we can put other as gamma g=2.6 slope=11, etc. + // but noting to do with real gamma !!!: it's only for data Lab # data RGB + // finally I opted for gamma55 and with options we can change - for (int i = 0; i < maxindex; i++) { - igammatab_55[i] = 65535.0 * igamma55 (i / 65535.0); - } + switch(settings->denoiselabgamma) { + case 0: + for (int i = 0; i < maxindex; i++) { + denoiseIGammaTab[i] = 65535.0 * igamma26_11 (i / 65535.0); + } -#ifdef _OPENMP - #pragma omp section -#endif + break; - for (int i = 0; i < maxindex; i++) { - gammatab_4[i] = 65535.0 * gamma4 (i / 65535.0); - } + case 1: + for (int i = 0; i < maxindex; i++) { + denoiseIGammaTab[i] = 65535.0 * igamma4 (i / 65535.0); + } -#ifdef _OPENMP - #pragma omp section -#endif + break; - for (int i = 0; i < maxindex; i++) { - igammatab_4[i] = 65535.0 * igamma4 (i / 65535.0); + default: + for (int i = 0; i < maxindex; i++) { + denoiseIGammaTab[i] = 65535.0 * igamma55 (i / 65535.0); + } + + break; } #ifdef _OPENMP @@ -299,22 +326,6 @@ void Color::init () igammatab_145_3[i] = 65535.0 * igamma145_3 (i / 65535.0); } -#ifdef _OPENMP - #pragma omp section -#endif - - for (int i = 0; i < maxindex; i++) { - gammatab_26_11[i] = 65535.0 * gamma26_11 (i / 65535.0); - } - -#ifdef _OPENMP - #pragma omp section -#endif - - for (int i = 0; i < maxindex; i++) { - igammatab_26_11[i] = 65535.0 * igamma26_11 (i / 65535.0); - } - #ifdef _OPENMP #pragma omp section #endif @@ -1534,6 +1545,80 @@ void Color::calcGamma (double pwr, double ts, int mode, int imax, double &gamma0 return; } } +void Color::gammaf2lut (LUTf &gammacurve, float gamma, float start, float slope, float divisor, float factor) +{ +#ifdef __SSE2__ + // SSE2 version is more than 6 times faster than scalar version + vfloat iv = _mm_set_ps(3.f, 2.f, 1.f, 0.f); + vfloat fourv = F2V(4.f); + vfloat gammav = F2V(1.f / gamma); + vfloat slopev = F2V((slope / divisor) * factor); + vfloat divisorv = F2V(xlogf(divisor)); + vfloat factorv = F2V(factor); + vfloat comparev = F2V(start * divisor); + int border = start * divisor; + int border1 = border - (border & 3); + int border2 = border1 + 4; + int i = 0; + + for(; i < border1; i += 4) { + vfloat resultv = iv * slopev; + STVFU(gammacurve[i], resultv); + iv += fourv; + } + + for(; i < border2; i += 4) { + vfloat result0v = iv * slopev; + vfloat result1v = xexpf((xlogf(iv) - divisorv) * gammav) * factorv; + STVFU(gammacurve[i], vself(vmaskf_le(iv, comparev), result0v, result1v)); + iv += fourv; + } + + for(; i < 65536; i += 4) { + vfloat resultv = xexpfNoCheck((xlogfNoCheck(iv) - divisorv) * gammav) * factorv; + STVFU(gammacurve[i], resultv); + iv += fourv; + } + +#else + + for (int i = 0; i < 65536; ++i) { + gammacurve[i] = gammaf(static_cast(i) / divisor, gamma, start, slope) * factor; + } + +#endif +} + +void Color::gammanf2lut (LUTf &gammacurve, float gamma, float divisor, float factor) //standard gamma without slope... +{ +#ifdef __SSE2__ + // SSE2 version is more than 6 times faster than scalar version + vfloat iv = _mm_set_ps(3.f, 2.f, 1.f, 0.f); + vfloat fourv = F2V(4.f); + vfloat gammav = F2V(1.f / gamma); + vfloat divisorv = F2V(xlogf(divisor)); + vfloat factorv = F2V(factor); + + // first input value is zero => we have to use the xlogf function which checks this + vfloat resultv = xexpf((xlogf(iv) - divisorv) * gammav) * factorv; + STVFU(gammacurve[0], resultv); + iv += fourv; + + // inside the loop we can use xlogfNoCheck and xexpfNoCheck because we know about the input values + for(int i = 4; i < 65536; i += 4) { + resultv = xexpfNoCheck((xlogfNoCheck(iv) - divisorv) * gammav) * factorv; + STVFU(gammacurve[i], resultv); + iv += fourv; + } + +#else + + for (int i = 0; i < 65536; ++i) { + gammacurve[i] = Color::gammanf(static_cast(i) / divisor, gamma) * factor; + } + +#endif +} void Color::Lab2XYZ(float L, float a, float b, float &x, float &y, float &z) { @@ -2210,6 +2295,7 @@ void Color::gamutLchonly (float HH, float2 sincosval, float &Lprov1, float &Chpr neg = false, more_rgb = false; #endif float ChprovSave = Chprov1; + do { inGamut = true; @@ -2232,6 +2318,7 @@ void Color::gamutLchonly (float HH, float2 sincosval, float &Lprov1, float &Chpr #ifdef _DEBUG neg = true; #endif + if (isnan(HH)) { float atemp = ChprovSave * sincosval.y * 327.68; float btemp = ChprovSave * sincosval.x * 327.68; diff --git a/rtengine/color.h b/rtengine/color.h index e9b38c509..350cd3b2a 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -132,13 +132,10 @@ public: static LUTf igammatab_srgb1; static LUTf gammatab_srgb; static LUTf gammatab_srgb1; - static LUTf igammatab_55; - static LUTf gammatab_55; - static LUTf igammatab_4; - static LUTf gammatab_4; - static LUTf igammatab_26_11; - static LUTf gammatab_26_11; + static LUTf denoiseGammaTab; + static LUTf denoiseIGammaTab; + static LUTf igammatab_24_17; static LUTf gammatab_24_17a; static LUTf gammatab_13_2; @@ -1109,6 +1106,15 @@ public: { return (x <= start ? x*slope : exp(log(x) / gamma) * mul - add); } + + static inline float gammaf (float x, float gamma, float start, float slope) + { + return x <= start ? x * slope : xexpf(xlogf(x) / gamma); + } + + //fills a LUT of size 65536 using gamma with slope... + static void gammaf2lut (LUTf &gammacurve, float gamma, float start, float slope, float divisor, float factor); + static inline double igamma (double x, double gamma, double start, double slope, double mul, double add) { return (x <= start * slope ? x / slope : exp(log((x + add) / mul) * gamma) ); @@ -1123,7 +1129,7 @@ public: */ static inline double gamman (double x, double gamma) //standard gamma without slope... { - return (x = exp(log(x) / gamma)); + return exp(log(x) / gamma); } /** @@ -1134,9 +1140,10 @@ public: */ static inline float gammanf (float x, float gamma) //standard gamma without slope... { - return (x = xexpf(xlogf(x) / gamma)); + return xexpf(xlogf(x) / gamma); } - + //fills a LUT of size 65536 using gamma without slope... + static void gammanf2lut (LUTf &gammacurve, float gamma, float divisor, float factor); /** * @brief Very simply inverse gamma @@ -1146,7 +1153,7 @@ public: */ static inline double igamman (double x, double gamma) //standard inverse gamma without slope... { - return (x = exp(log(x) * gamma) ); + return exp(log(x) * gamma); } diff --git a/rtengine/dcrop.cc b/rtengine/dcrop.cc index 93aca0f6d..1e5aba77c 100644 --- a/rtengine/dcrop.cc +++ b/rtengine/dcrop.cc @@ -191,9 +191,6 @@ void Crop::update (int todo) parent->ipf.Tile_calc (tilesize, overlap, kall, widIm, heiIm, numtiles_W, numtiles_H, tilewidth, tileheight, tileWskip, tileHskip); kall = 0; - float *ch_M = new float [9];//allocate memory - float *max_r = new float [9]; - float *max_b = new float [9]; float *min_b = new float [9]; float *min_r = new float [9]; float *lumL = new float [9]; @@ -380,7 +377,7 @@ void Crop::update (int todo) } } - if(skip == 1 && params.dirpyrDenoise.enabled && ((settings->leveldnautsimpl == 1 && params.dirpyrDenoise.Cmethod == "AUT") || (settings->leveldnautsimpl == 0 && params.dirpyrDenoise.C2method == "AUTO"))) { + if(skip == 1 && params.dirpyrDenoise.enabled && !parent->denoiseInfoStore.valid && ((settings->leveldnautsimpl == 1 && params.dirpyrDenoise.Cmethod == "AUT") || (settings->leveldnautsimpl == 0 && params.dirpyrDenoise.C2method == "AUTO"))) { MyTime t1aue, t2aue; t1aue.set(); @@ -462,9 +459,9 @@ void Crop::update (int todo) //printf("DCROP skip=%d cha=%f red=%f bl=%f redM=%f bluM=%f chrom=%f sigm=%f lum=%f\n",skip, chaut,redaut,blueaut, maxredaut, maxblueaut, chromina, sigma, lumema); Nb[hcr * 3 + wcr] = nb; - ch_M[hcr * 3 + wcr] = pondcorrec * chaut; - max_r[hcr * 3 + wcr] = pondcorrec * maxredaut; - max_b[hcr * 3 + wcr] = pondcorrec * maxblueaut; + parent->denoiseInfoStore.ch_M[hcr * 3 + wcr] = pondcorrec * chaut; + parent->denoiseInfoStore.max_r[hcr * 3 + wcr] = pondcorrec * maxredaut; + parent->denoiseInfoStore.max_b[hcr * 3 + wcr] = pondcorrec * maxblueaut; min_r[hcr * 3 + wcr] = pondcorrec * minredaut; min_b[hcr * 3 + wcr] = pondcorrec * minblueaut; lumL[hcr * 3 + wcr] = lumema; @@ -524,20 +521,20 @@ void Crop::update (int todo) int lissage = settings->leveldnliss; for (int k = 0; k < 9; k++) { - float maxmax = max(max_r[k], max_b[k]); - parent->ipf.calcautodn_info (ch_M[k], delta[k], Nb[k], levaut, maxmax, lumL[k], chromC[k], mode, lissage, ry[k], sk[k], pcsk[k]); + float maxmax = max(parent->denoiseInfoStore.max_r[k], parent->denoiseInfoStore.max_b[k]); + parent->ipf.calcautodn_info (parent->denoiseInfoStore.ch_M[k], delta[k], Nb[k], levaut, maxmax, lumL[k], chromC[k], mode, lissage, ry[k], sk[k], pcsk[k]); // printf("ch_M=%f delta=%f\n",ch_M[k], delta[k]); } for (int k = 0; k < 9; k++) { - if(max_r[k] > max_b[k]) { + if(parent->denoiseInfoStore.max_r[k] > parent->denoiseInfoStore.max_b[k]) { Max_R[k] = (delta[k]) / ((autoNRmax * multip * adjustr * lowdenoise) / 2.f); - Min_B[k] = -(ch_M[k] - min_b[k]) / (autoNRmax * multip * adjustr * lowdenoise); + Min_B[k] = -(parent->denoiseInfoStore.ch_M[k] - min_b[k]) / (autoNRmax * multip * adjustr * lowdenoise); Max_B[k] = 0.f; Min_R[k] = 0.f; } else { Max_B[k] = (delta[k]) / ((autoNRmax * multip * adjustr * lowdenoise) / 2.f); - Min_R[k] = - (ch_M[k] - min_r[k]) / (autoNRmax * multip * adjustr * lowdenoise); + Min_R[k] = - (parent->denoiseInfoStore.ch_M[k] - min_r[k]) / (autoNRmax * multip * adjustr * lowdenoise); Min_B[k] = 0.f; Max_R[k] = 0.f; } @@ -545,7 +542,7 @@ void Crop::update (int todo) for (int k = 0; k < 9; k++) { // printf("ch_M= %f Max_R=%f Max_B=%f min_r=%f min_b=%f\n",ch_M[k],Max_R[k], Max_B[k],Min_R[k], Min_B[k]); - chM += ch_M[k]; + chM += parent->denoiseInfoStore.ch_M[k]; MaxBMoy += Max_B[k]; MaxRMoy += Max_R[k]; MinRMoy += Min_R[k]; @@ -587,7 +584,7 @@ void Crop::update (int todo) params.dirpyrDenoise.chroma = chM / (autoNR * multip * adjustr); params.dirpyrDenoise.redchro = maxr; params.dirpyrDenoise.bluechro = maxb; - + parent->denoiseInfoStore.valid = true; if(parent->adnListener) { parent->adnListener->chromaChanged(params.dirpyrDenoise.chroma, params.dirpyrDenoise.redchro, params.dirpyrDenoise.bluechro); } @@ -644,7 +641,7 @@ void Crop::update (int todo) int kall = 0; float chaut, redaut, blueaut, maxredaut, maxblueaut, nresi, highresi; - parent->ipf.RGB_denoise(kall, origCrop, origCrop, calclum, ch_M, max_r, max_b, parent->imgsrc->isRAW(), /*Roffset,*/ denoiseParams, parent->imgsrc->getDirPyrDenoiseExpComp(), noiseLCurve, noiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, nresi, highresi); + parent->ipf.RGB_denoise(kall, origCrop, origCrop, calclum, parent->denoiseInfoStore.ch_M, parent->denoiseInfoStore.max_r, parent->denoiseInfoStore.max_b, parent->imgsrc->isRAW(), /*Roffset,*/ denoiseParams, parent->imgsrc->getDirPyrDenoiseExpComp(), noiseLCurve, noiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, nresi, highresi); if (parent->adnListener) { parent->adnListener->noiseChanged(nresi, highresi); @@ -665,9 +662,6 @@ void Crop::update (int todo) parent->imgsrc->convertColorSpace(origCrop, params.icm, parent->currWB); - delete [] ch_M; - delete [] max_r; - delete [] max_b; delete [] min_r; delete [] min_b; delete [] lumL; diff --git a/rtengine/helperavx.h b/rtengine/helperavx.h index eb32277c3..528760a92 100644 --- a/rtengine/helperavx.h +++ b/rtengine/helperavx.h @@ -1,3 +1,9 @@ +//////////////////////////////////////////////////////////////// +// +// this code was taken from http://shibatch.sourceforge.net/ +// Many thanks to the author: Naoki Shibata +// +//////////////////////////////////////////////////////////////// #ifndef __AVX__ #error Please specify -mavx. #endif diff --git a/rtengine/helpersse2.h b/rtengine/helpersse2.h index 0f1fc5759..23dd016fa 100644 --- a/rtengine/helpersse2.h +++ b/rtengine/helpersse2.h @@ -1,3 +1,12 @@ +//////////////////////////////////////////////////////////////// +// +// this code was taken from http://shibatch.sourceforge.net/ +// Many thanks to the author of original version: Naoki Shibata +// +// This version contains modifications made by Ingo Weyrich +// +//////////////////////////////////////////////////////////////// + #ifndef __SSE2__ #error Please specify -msse2. #endif diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index a6354bd56..8925b29e0 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -320,6 +320,7 @@ void ImProcCoordinator::updatePreviewImage (int todo, Crop* cropCall) ipf.setScale (scale); imgsrc->getImage (currWB, tr, orig_prev, pp, params.toneCurve, params.icm, params.raw); + denoiseInfoStore.valid = false; //ColorTemp::CAT02 (orig_prev, ¶ms) ; // printf("orig_prevW=%d\n scale=%d",orig_prev->width, scale); /* Issue 2785, disabled some 1:1 tools diff --git a/rtengine/improccoordinator.h b/rtengine/improccoordinator.h index 5cc03cb72..0fb0041f3 100644 --- a/rtengine/improccoordinator.h +++ b/rtengine/improccoordinator.h @@ -325,6 +325,17 @@ public: { return imgsrc; } + + struct DenoiseInfoStore { + DenoiseInfoStore () : valid(false) {} + float chM; + float max_r[9]; + float max_b[9]; + float ch_M[9]; + bool valid; + + } denoiseInfoStore; + }; } #endif diff --git a/rtengine/sleef.c b/rtengine/sleef.c index 2377aea79..bc38a3cfb 100644 --- a/rtengine/sleef.c +++ b/rtengine/sleef.c @@ -1,3 +1,12 @@ +//////////////////////////////////////////////////////////////// +// +// this code was taken from http://shibatch.sourceforge.net/ +// Many thanks to the author of original version: Naoki Shibata +// +// This version contains modifications made by Ingo Weyrich +// +//////////////////////////////////////////////////////////////// + #ifndef _SLEEFC_ #define _SLEEFC_ diff --git a/rtengine/sleef.h b/rtengine/sleef.h deleted file mode 100644 index 101a4faff..000000000 --- a/rtengine/sleef.h +++ /dev/null @@ -1,51 +0,0 @@ -typedef struct { - double x, y; -} double2; - -typedef struct { - float x, y; -} float2; - -double xsin(double d); -double xcos(double d); -double2 xsincos(double d); -double xtan(double d); -double xasin(double s); -double xacos(double s); -double xatan(double s); -double xatan2(double y, double x); -double xlog(double d); -double xexp(double d); -double xpow(double x, double y); - -double xsinh(double x); -double xcosh(double x); -double xtanh(double x); -double xasinh(double x); -double xacosh(double x); -double xatanh(double x); -double xldexp(double x, int q); -int xilogb(double d); - -double xfma(double x, double y, double z); -double xsqrt(double d); -double xcbrt(double d); - -double xexp2(double a); -double xexp10(double a); -double xexpm1(double a); -double xlog10(double a); -double xlog1p(double a); - -float xsinf(float d); -float xcosf(float d); -float2 xsincosf(float d); -float xtanf(float d); -float xasinf(float s); -float xacosf(float s); -float xatanf(float s); -float xatan2f(float y, float x); -float xlogf(float d); -float xexpf(float d); -float xpowf(float x, float y); -float xcbrtf(float d); diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index a9f49f143..a55fcf897 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -1,3 +1,13 @@ +//////////////////////////////////////////////////////////////// +// +// this code was taken from http://shibatch.sourceforge.net/ +// Many thanks to the author of original version: Naoki Shibata +// +// This version contains modifications made by Ingo Weyrich +// +//////////////////////////////////////////////////////////////// + + #ifndef SLEEFSSEAVX #define SLEEFSSEAVX @@ -1275,6 +1285,25 @@ static INLINE vfloat xlogf0(vfloat d) { return x; } +static INLINE vfloat xlogfNoCheck(vfloat d) { // this version does not check input values. Use it only when you know the input values are > 0 e.g. when filling a lookup table + vfloat x, x2, t, m; + vint2 e; + + e = vilogbp1f(vmulf(d, vcast_vf_f(0.7071f))); + m = vldexpf(d, vsubi2(vcast_vi2_i(0), e)); + + x = vdivf(vaddf(vcast_vf_f(-1.0f), m), vaddf(vcast_vf_f(1.0f), m)); + x2 = vmulf(x, x); + + t = vcast_vf_f(0.2371599674224853515625f); + t = vmlaf(t, x2, vcast_vf_f(0.285279005765914916992188f)); + t = vmlaf(t, x2, vcast_vf_f(0.400005519390106201171875f)); + t = vmlaf(t, x2, vcast_vf_f(0.666666567325592041015625f)); + t = vmlaf(t, x2, vcast_vf_f(2.0f)); + + return vaddf(vmulf(x, t), vmulf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); + +} static INLINE vfloat xexpf(vfloat d) { vint2 q = vrint_vi2_vf(vmulf(d, vcast_vf_f(R_LN2f))); @@ -1299,6 +1328,24 @@ static INLINE vfloat xexpf(vfloat d) { return u; } +static INLINE vfloat xexpfNoCheck(vfloat d) { // this version does not check input values. Use it only when you know the input values are > -104.f e.g. when filling a lookup table + vint2 q = vrint_vi2_vf(vmulf(d, vcast_vf_f(R_LN2f))); + vfloat s, u; + + s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf),d); + s = vmlaf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf),s); + + u = vcast_vf_f(0.00136324646882712841033936f); + u = vmlaf(u, s, vcast_vf_f(0.00836596917361021041870117f)); + u = vmlaf(u, s, vcast_vf_f(0.0416710823774337768554688f)); + u = vmlaf(u, s, vcast_vf_f(0.166665524244308471679688f)); + u = vmlaf(u, s, vcast_vf_f(0.499999850988388061523438f)); + + u = vaddf(vcast_vf_f(1.0f), vmlaf(vmulf(s, s), u, s)); + + return vldexpf(u, q); +} + static INLINE vfloat xcbrtf(vfloat d) { vfloat x, y, q = vcast_vf_f(1.0), t; vint2 e, qu, re;