merge with dev and fixed some bugs

This commit is contained in:
Desmis
2019-10-27 17:09:53 +01:00
19 changed files with 1088 additions and 1607 deletions

View File

@@ -687,8 +687,8 @@ BENCHFUN
}
}
int tilesize;
int overlap;
int tilesize = 0;
int overlap = 0;
if (settings->leveldnti == 0) {
tilesize = 1024;
@@ -1342,8 +1342,6 @@ BENCHFUN
#ifdef _OPENMP
int masterThread = omp_get_thread_num();
#endif
#ifdef _OPENMP
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
#endif
{
@@ -1352,11 +1350,11 @@ BENCHFUN
#else
int subThread = 0;
#endif
float blurbuffer[TS * TS] ALIGNED64;
// float blurbuffer[TS * TS] ALIGNED64;
float *Lblox = LbloxArray[subThread];
float *fLblox = fLbloxArray[subThread];
float pBuf[width + TS + 2 * blkrad * offset] ALIGNED16;
float nbrwt[TS * TS] ALIGNED64;
// float nbrwt[TS * TS] ALIGNED64;
#ifdef _OPENMP
#pragma omp for
#endif
@@ -1431,7 +1429,8 @@ BENCHFUN
for (int hblk = 0; hblk < numblox_W; ++hblk) {
RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer);
RGBtile_denoise(fLblox, hblk, noisevar_Ldetail);
// RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer);
}//end of horizontal block loop
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -2048,19 +2047,21 @@ BENCHFUN
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT
//void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT
void ImProcFunctions::RGBtile_denoise(float* fLblox, int hblproc, float noisevar_Ldetail) //for DCT
{
int blkstart = hblproc * TS * TS;
boxabsblur(fLblox + blkstart, nbrwt, 3, 3, TS, TS, blurbuffer); //blur neighbor weights for more robust estimation //for DCT
float nbrwt[TS * TS] ALIGNED64;
const int blkstart = hblproc * TS * TS;
boxabsblur(fLblox + blkstart, nbrwt, 3, TS, TS, false); //blur neighbor weights for more robust estimation //for DCT
#ifdef __SSE2__
__m128 tempv;
__m128 noisevar_Ldetailv = _mm_set1_ps(noisevar_Ldetail);
__m128 onev = _mm_set1_ps(1.0f);
const vfloat noisevar_Ldetailv = F2V(-1.f / noisevar_Ldetail);
const vfloat onev = F2V(1.f);
for (int n = 0; n < TS * TS; n += 4) { //for DCT
tempv = onev - xexpf(-SQRV(LVF(nbrwt[n])) / noisevar_Ldetailv);
_mm_storeu_ps(&fLblox[blkstart + n], LVFU(fLblox[blkstart + n]) * tempv);
const vfloat tempv = onev - xexpf(SQRV(LVF(nbrwt[n])) * noisevar_Ldetailv);
STVF(fLblox[blkstart + n], LVF(fLblox[blkstart + n]) * tempv);
}//output neighbor averaged result
#else
@@ -2071,10 +2072,7 @@ void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noiseva
#endif
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//printf("vblk=%d hlk=%d wsqave=%f || ",vblproc,hblproc,wsqave);
}//end of function tile_denoise
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -2207,7 +2205,7 @@ void ImProcFunctions::Noise_residualAB(const wavelet_decomposition &WaveletCoeff
chmaxresid = maxresid;
}
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge, int denoiseNestedLevels)
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge, int denoiseNestedLevels)
{
int maxlvl = min(WaveletCoeffs_L.maxlevel(), 5);
const float eps = 0.01f;
@@ -2238,13 +2236,12 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
#endif
{
float *buffer[4];
float *buffer[3];
buffer[0] = new (std::nothrow) float[maxWL * maxHL + 32];
buffer[1] = new (std::nothrow) float[maxWL * maxHL + 64];
buffer[2] = new (std::nothrow) float[maxWL * maxHL + 96];
buffer[3] = new (std::nothrow) float[maxWL * maxHL + 128];
if (buffer[0] == nullptr || buffer[1] == nullptr || buffer[2] == nullptr || buffer[3] == nullptr) {
if (buffer[0] == nullptr || buffer[1] == nullptr || buffer[2] == nullptr) {
memoryAllocationFailed = true;
}
@@ -2267,8 +2264,8 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
} else {
//simple wavelet shrinkage
float * sfave = buffer[0] + 32;
float * sfaved = buffer[1] + 64;
float * blurBuffer = buffer[2] + 96;
float * sfaved = buffer[2] + 96;
// float * blurBuffer = buffer[2] + 96;
float mad_Lr = madL[lvl][dir - 1];
/*
@@ -2313,17 +2310,17 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
float levelFactor = mad_Lr * 5.f / (lvl + 1);
#ifdef __SSE2__
__m128 mad_Lv;
__m128 ninev = _mm_set1_ps(9.0f);
__m128 epsv = _mm_set1_ps(eps);
__m128 mag_Lv;
__m128 levelFactorv = _mm_set1_ps(levelFactor);
vfloat mad_Lv;
vfloat ninev = F2V(9.0f);
vfloat epsv = F2V(eps);
vfloat mag_Lv;
vfloat levelFactorv = F2V(levelFactor);
int coeffloc_L;
for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) {
mad_Lv = LVFU(nvl[coeffloc_L]) * levelFactorv;
mag_Lv = SQRV(LVFU(WavCoeffs_L[dir][coeffloc_L]));
_mm_storeu_ps(&sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv));
STVFU(sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv));
}
for (; coeffloc_L < Hlvl_L * Wlvl_L; ++coeffloc_L) {
@@ -2343,15 +2340,16 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
}
#endif
boxblur(sfave, sfaved, blurBuffer, lvl + 2, lvl + 2, Wlvl_L, Hlvl_L); //increase smoothness by locally averaging shrinkage
boxblur(sfave, sfaved, lvl + 2, Wlvl_L, Hlvl_L, false); //increase smoothness by locally averaging shrinkage
// boxblur(sfave, sfaved, blurBuffer, lvl + 2, lvl + 2, Wlvl_L, Hlvl_L); //increase smoothness by locally averaging shrinkage
#ifdef __SSE2__
__m128 sfavev;
__m128 sf_Lv;
vfloat sfavev;
vfloat sf_Lv;
for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) {
sfavev = LVFU(sfaved[coeffloc_L]);
sf_Lv = LVFU(sfave[coeffloc_L]);
_mm_storeu_ps(&WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv));
STVFU(WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv));
//use smoothed shrinkage unless local shrinkage is much less
}
@@ -2392,7 +2390,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
}
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float *variC, int local, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, int denoiseNestedLevels)
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float *variC, int local, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, int denoiseNestedLevels)
{
int maxlvl = WaveletCoeffs_L.maxlevel();
@@ -2504,12 +2502,12 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
if (noisevarfc > 0.001f) {
#ifdef __SSE2__
__m128 onev = _mm_set1_ps(1.f);
__m128 mad_abrv = _mm_set1_ps(mad_abr);
__m128 rmad_Lm9v = onev / _mm_set1_ps(mad_Lr * 9.f);
__m128 mad_abv;
__m128 mag_Lv, mag_abv;
__m128 tempabv;
vfloat onev = F2V(1.f);
vfloat mad_abrv = F2V(mad_abr);
vfloat rmad_Lm9v = onev / F2V(mad_Lr * 9.f);
vfloat mad_abv;
vfloat mag_Lv, mag_abv;
vfloat tempabv;
int coeffloc_ab;
for (coeffloc_ab = 0; coeffloc_ab < Hlvl_ab * Wlvl_ab - 3; coeffloc_ab += 4) {
@@ -2519,7 +2517,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]);
mag_abv = SQRV(tempabv);
mag_Lv = SQRV(mag_Lv) * rmad_Lm9v;
_mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))));
STVFU(WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))));
}
// few remaining pixels
@@ -2555,9 +2553,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
}
for (int i = 2; i >= 0; i--) {
if (buffer[i] != nullptr) {
delete[] buffer[i];
}
delete[] buffer[i];
}
}
@@ -2565,7 +2561,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
}
bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge, int denoiseNestedLevels)//mod JD
bool ImProcFunctions::WaveletDenoiseAllL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge, int denoiseNestedLevels)//mod JD
{
@@ -2619,16 +2615,14 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L,
}
for (int i = 3; i >= 0; i--) {
if (buffer[i] != nullptr) {
delete[] buffer[i];
}
delete[] buffer[i];
}
}
return (!memoryAllocationFailed);
}
bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab,
bool ImProcFunctions::WaveletDenoiseAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab,
float *noisevarchrom, float madL[8][3], float *variC, int local, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, int denoiseNestedLevels)//mod JD
{
@@ -2690,11 +2684,9 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L
return (!memoryAllocationFailed);
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
void ImProcFunctions::ShrinkAllL(const wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
float *noisevarlum, float * madL, float * vari, int edge)
{
@@ -2705,11 +2697,12 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float *
float * sfaved = buffer[1] + 64;
float * blurBuffer = buffer[2] + 96;
int W_L = WaveletCoeffs_L.level_W(level);
int H_L = WaveletCoeffs_L.level_H(level);
const int W_L = WaveletCoeffs_L.level_W(level);
const int H_L = WaveletCoeffs_L.level_H(level);
float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(level);
float mad_L = madL[dir - 1] ;
const float mad_L = madL[dir - 1] ;
const float levelFactor = mad_L * 5.f / static_cast<float>(level + 1);
float *nvl = nullptr;
nvl = new float[ H_L * W_L];
@@ -2769,29 +2762,47 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float *
}
*/
float levelFactor = mad_L * 5.f / static_cast<float>(level + 1);
int i = 0;
#ifdef __SSE2__
__m128 magv;
__m128 levelFactorv = _mm_set1_ps(levelFactor);
__m128 mad_Lv;
__m128 ninev = _mm_set1_ps(9.0f);
__m128 epsv = _mm_set1_ps(eps);
int i;
const vfloat levelFactorv = F2V(levelFactor);
const vfloat ninev = F2V(9.f);
const vfloat epsv = F2V(eps);
for (i = 0; i < W_L * H_L - 3; i += 4) {
mad_Lv = LVFU(nvl[i]) * levelFactorv;
magv = SQRV(LVFU(WavCoeffs_L[dir][i]));
_mm_storeu_ps(&sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv));
const vfloat mad_Lv = LVFU(noisevarlum[i]) * levelFactorv;
const vfloat magv = SQRV(LVFU(WavCoeffs_L[dir][i]));
STVFU(sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv));
}
#endif
// few remaining pixels
for (; i < W_L * H_L; ++i) {
float mag = SQR(WavCoeffs_L[dir][i]);
sfave[i] = mag / (mag + levelFactor * nvl[i] * xexpf(-mag / (9 * levelFactor * nvl[i])) + eps);
}
#else
boxblur(sfave, sfaved, level + 2, W_L, H_L, false); //increase smoothness by locally averaging shrinkage
i = 0;
#ifdef __SSE2__
for (; i < W_L * H_L - 3; i += 4) {
const vfloat sfv = LVFU(sfave[i]);
//use smoothed shrinkage unless local shrinkage is much less
STVFU(WavCoeffs_L[dir][i], LVFU(WavCoeffs_L[dir][i]) * (SQRV(LVFU(sfaved[i])) + SQRV(sfv)) / (LVFU(sfaved[i]) + sfv + epsv));
}
#endif
// few remaining pixels
for (; i < W_L * H_L; ++i) {
const float sf = sfave[i];
//use smoothed shrinkage unless local shrinkage is much less
WavCoeffs_L[dir][i] *= (SQR(sfaved[i]) + SQR(sf)) / (sfaved[i] + sf + eps);
}//now luminance coefficients are denoised
delete [] nvl;
}
/*
for (int i = 0; i < W_L * H_L; ++i) {
float mag = SQR(WavCoeffs_L[dir][i]);
@@ -2832,9 +2843,9 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float *
#endif
}
*/
void ImProcFunctions::ShrinkAllAB(wavelet_decomposition & WaveletCoeffs_L, wavelet_decomposition & WaveletCoeffs_ab, float **buffer, int level, int dir,
void ImProcFunctions::ShrinkAllAB(const wavelet_decomposition & WaveletCoeffs_L, const wavelet_decomposition & WaveletCoeffs_ab, float **buffer, int level, int dir,
float * noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch,
bool denoiseMethodRgb, float * madL, float * variC, int local, float * madaab, bool madCalculated)
@@ -2848,7 +2859,7 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition & WaveletCoeffs_L, wavel
float * sfaveab = buffer[0] + 32;
float * sfaveabd = buffer[1] + 64;
float * blurBuffer = buffer[2] + 96;
// float * blurBuffer = buffer[2] + 96;
int W_ab = WaveletCoeffs_ab.level_W(level);
int H_ab = WaveletCoeffs_ab.level_H(level);
@@ -2893,12 +2904,13 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition & WaveletCoeffs_L, wavel
//madab = useNoiseCCurve ? madab : madab * noisevar_ab;
madab = useNoiseCCurve ? madab : madab * noisevarfc;
#ifdef __SSE2__
__m128 onev = _mm_set1_ps(1.f);
__m128 mad_abrv = _mm_set1_ps(madab);
vfloat onev = F2V(1.f);
vfloat mad_abrv = F2V(madab);
vfloat rmadLm9v = onev / F2V(mad_L * 9.f);
vfloat mad_abv ;
vfloat mag_Lv, mag_abv;
__m128 rmadLm9v = onev / _mm_set1_ps(mad_L * 9.f);
__m128 mad_abv ;
__m128 mag_Lv, mag_abv;
int coeffloc_ab;
for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) {
@@ -2907,7 +2919,7 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition & WaveletCoeffs_L, wavel
mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]);
mag_abv = SQRV(LVFU(WavCoeffs_ab[dir][coeffloc_ab]));
mag_Lv = (SQRV(mag_Lv)) * rmadLm9v;
_mm_storeu_ps(&sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))));
STVFU(sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))));
}
// few remaining pixels
@@ -2929,19 +2941,20 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition & WaveletCoeffs_L, wavel
}//now chrominance coefficients are denoised
#endif
boxblur(sfaveab, sfaveabd, level + 2, W_ab, H_ab, false); //increase smoothness by locally averaging shrinkage
boxblur(sfaveab, sfaveabd, blurBuffer, level + 2, level + 2, W_ab, H_ab); //increase smoothness by locally averaging shrinkage
// boxblur(sfaveab, sfaveabd, blurBuffer, level + 2, level + 2, W_ab, H_ab); //increase smoothness by locally averaging shrinkage
#ifdef __SSE2__
__m128 epsv = _mm_set1_ps(eps);
__m128 sfabv;
__m128 sfaveabv;
vfloat epsv = F2V(eps);
vfloat sfabv;
vfloat sfaveabv;
for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) {
sfabv = LVFU(sfaveab[coeffloc_ab]);
sfaveabv = LVFU(sfaveabd[coeffloc_ab]);
//use smoothed shrinkage unless local shrinkage is much less
_mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv));
STVFU(WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv));
}
// few remaining pixels
@@ -3089,8 +3102,8 @@ void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b,
}
void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition & WaveletCoeffs_a,
wavelet_decomposition & WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float & chaut, int &Nb, float & redaut, float & blueaut, float & maxredaut, float & maxblueaut, float & minredaut, float & minblueaut, int schoice,
void ImProcFunctions::WaveletDenoiseAll_info(int levwav, const wavelet_decomposition & WaveletCoeffs_a,
const wavelet_decomposition & WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float & chaut, int &Nb, float & redaut, float & blueaut, float & maxredaut, float & maxblueaut, float & minredaut, float & minblueaut, int schoice,
float & chromina, float & sigma, float & lumema, float & sigma_L, float & redyel, float & skinc, float & nsknc, float & maxchred, float & maxchblue, float & minchred, float & minchblue, int &nb, float & chau, float & chred, float & chblue, bool denoiseMethodRgb)
{
@@ -3276,7 +3289,7 @@ void ImProcFunctions::calcautodn_info(float & chaut, float & delta, int Nb, int
}
void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, const LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
{
if ((settings->leveldnautsimpl == 1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl == 0 && dnparams.C2method == "MANU")) {
//nothing to do
@@ -3343,8 +3356,8 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc,
const float gain = pow(2.0f, float(expcomp));
int tilesize;
int overlap;
int tilesize = 0;
int overlap = 0;
if (settings->leveldnti == 0) {
tilesize = 1024;
@@ -3445,16 +3458,16 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc,
for (int i = tiletop; i < tilebottom; i += 2) {
int i1 = i - tiletop;
#ifdef __SSE2__
__m128 aNv, bNv;
__m128 c100v = _mm_set1_ps(100.f);
vfloat aNv, bNv;
vfloat c100v = F2V(100.f);
int j;
for (j = tileleft; j < tileright - 7; j += 8) {
int j1 = j - tileleft;
aNv = LVFU(acalc[i >> 1][j >> 1]);
bNv = LVFU(bcalc[i >> 1][j >> 1]);
_mm_storeu_ps(&noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv));
_mm_storeu_ps(&noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v));
STVFU(noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv));
STVFU(noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v));
}
for (; j < tileright; j += 2) {