reviewed boxblur code and usage

This commit is contained in:
Ingo Weyrich
2019-09-26 15:03:09 +02:00
parent 6026c110fa
commit 6bebc19f02
15 changed files with 1015 additions and 1595 deletions

View File

@@ -479,6 +479,7 @@ enum nrquality {QUALITY_STANDARD, QUALITY_HIGH};
void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi)
{
BENCHFUN
//#ifdef _DEBUG
MyTime t1e, t2e;
t1e.set();
@@ -687,8 +688,8 @@ BENCHFUN
}
}
int tilesize;
int overlap;
int tilesize = 0;
int overlap = 0;
if (settings->leveldnti == 0) {
tilesize = 1024;
@@ -1341,8 +1342,6 @@ BENCHFUN
#ifdef _OPENMP
int masterThread = omp_get_thread_num();
#endif
#ifdef _OPENMP
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
#endif
{
@@ -1351,11 +1350,9 @@ BENCHFUN
#else
int subThread = 0;
#endif
float blurbuffer[TS * TS] ALIGNED64;
float *Lblox = LbloxArray[subThread];
float *fLblox = fLbloxArray[subThread];
float pBuf[width + TS + 2 * blkrad * offset] ALIGNED16;
float nbrwt[TS * TS] ALIGNED64;
#ifdef _OPENMP
#pragma omp for
#endif
@@ -1430,7 +1427,7 @@ BENCHFUN
for (int hblk = 0; hblk < numblox_W; ++hblk) {
RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer);
RGBtile_denoise(fLblox, hblk, noisevar_Ldetail);
}//end of horizontal block loop
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1447,14 +1444,8 @@ BENCHFUN
//add row of blocks to output image tile
RGBoutput_tile_row(Lblox, Ldetail, tilemask_out, height, width, topproc);
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
}//end of vertical block loop
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#ifdef _OPENMP
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
@@ -2041,26 +2032,20 @@ BENCHFUN
}//end of main RGB_denoise
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT
void ImProcFunctions::RGBtile_denoise(float* fLblox, int hblproc, float noisevar_Ldetail) //for DCT
{
int blkstart = hblproc * TS * TS;
float nbrwt[TS * TS] ALIGNED64;
const int blkstart = hblproc * TS * TS;
boxabsblur(fLblox + blkstart, nbrwt, 3, 3, TS, TS, blurbuffer); //blur neighbor weights for more robust estimation //for DCT
boxabsblur(fLblox + blkstart, nbrwt, 3, TS, TS, false); //blur neighbor weights for more robust estimation //for DCT
#ifdef __SSE2__
__m128 tempv;
__m128 noisevar_Ldetailv = _mm_set1_ps(noisevar_Ldetail);
__m128 onev = _mm_set1_ps(1.0f);
const vfloat noisevar_Ldetailv = F2V(-1.f / noisevar_Ldetail);
const vfloat onev = F2V(1.f);
for (int n = 0; n < TS * TS; n += 4) { //for DCT
tempv = onev - xexpf(-SQRV(LVF(nbrwt[n])) / noisevar_Ldetailv);
_mm_storeu_ps(&fLblox[blkstart + n], LVFU(fLblox[blkstart + n]) * tempv);
const vfloat tempv = onev - xexpf(SQRV(LVF(nbrwt[n])) * noisevar_Ldetailv);
STVF(fLblox[blkstart + n], LVF(fLblox[blkstart + n]) * tempv);
}//output neighbor averaged result
#else
@@ -2071,14 +2056,7 @@ void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noiseva
#endif
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//printf("vblk=%d hlk=%d wsqave=%f || ",vblproc,hblproc,wsqave);
}//end of function tile_denoise
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
}
void ImProcFunctions::RGBoutput_tile_row(float *bloxrow_L, float ** Ldetail, float ** tilemask_out, int height, int width, int top)
{
@@ -2207,7 +2185,7 @@ void ImProcFunctions::Noise_residualAB(const wavelet_decomposition &WaveletCoeff
chmaxresid = maxresid;
}
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3])
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3])
{
int maxlvl = min(WaveletCoeffs_L.maxlevel(), 5);
const float eps = 0.01f;
@@ -2258,23 +2236,22 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
//simple wavelet shrinkage
float * sfave = buffer[0] + 32;
float * sfaved = buffer[2] + 96;
float * blurBuffer = buffer[1] + 64;
float mad_Lr = madL[lvl][dir - 1];
float levelFactor = mad_Lr * 5.f / (lvl + 1);
#ifdef __SSE2__
__m128 mad_Lv;
__m128 ninev = _mm_set1_ps(9.0f);
__m128 epsv = _mm_set1_ps(eps);
__m128 mag_Lv;
__m128 levelFactorv = _mm_set1_ps(levelFactor);
vfloat mad_Lv;
vfloat ninev = F2V(9.0f);
vfloat epsv = F2V(eps);
vfloat mag_Lv;
vfloat levelFactorv = F2V(levelFactor);
int coeffloc_L;
for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) {
mad_Lv = LVFU(noisevarlum[coeffloc_L]) * levelFactorv;
mag_Lv = SQRV(LVFU(WavCoeffs_L[dir][coeffloc_L]));
_mm_storeu_ps(&sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv));
STVFU(sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv));
}
for (; coeffloc_L < Hlvl_L * Wlvl_L; ++coeffloc_L) {
@@ -2294,15 +2271,15 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
}
#endif
boxblur(sfave, sfaved, blurBuffer, lvl + 2, lvl + 2, Wlvl_L, Hlvl_L); //increase smoothness by locally averaging shrinkage
boxblur(sfave, sfaved, lvl + 2, Wlvl_L, Hlvl_L, false); //increase smoothness by locally averaging shrinkage
#ifdef __SSE2__
__m128 sfavev;
__m128 sf_Lv;
vfloat sfavev;
vfloat sf_Lv;
for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) {
sfavev = LVFU(sfaved[coeffloc_L]);
sf_Lv = LVFU(sfave[coeffloc_L]);
_mm_storeu_ps(&WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv));
STVFU(WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv));
//use smoothed shrinkage unless local shrinkage is much less
}
@@ -2340,7 +2317,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
return (!memoryAllocationFailed);
}
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab,
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab,
float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)
{
int maxlvl = WaveletCoeffs_L.maxlevel();
@@ -2422,12 +2399,12 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
if (noisevar_ab > 0.001f) {
#ifdef __SSE2__
__m128 onev = _mm_set1_ps(1.f);
__m128 mad_abrv = _mm_set1_ps(mad_abr);
__m128 rmad_Lm9v = onev / _mm_set1_ps(mad_Lr * 9.f);
__m128 mad_abv;
__m128 mag_Lv, mag_abv;
__m128 tempabv;
vfloat onev = F2V(1.f);
vfloat mad_abrv = F2V(mad_abr);
vfloat rmad_Lm9v = onev / F2V(mad_Lr * 9.f);
vfloat mad_abv;
vfloat mag_Lv, mag_abv;
vfloat tempabv;
int coeffloc_ab;
for (coeffloc_ab = 0; coeffloc_ab < Hlvl_ab * Wlvl_ab - 3; coeffloc_ab += 4) {
@@ -2437,7 +2414,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]);
mag_abv = SQRV(tempabv);
mag_Lv = SQRV(mag_Lv) * rmad_Lm9v;
_mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))));
STVFU(WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))));
}
// few remaining pixels
@@ -2470,9 +2447,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
}
for (int i = 2; i >= 0; i--) {
if (buffer[i] != nullptr) {
delete[] buffer[i];
}
delete[] buffer[i];
}
}
@@ -2480,7 +2455,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
}
bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge)//mod JD
bool ImProcFunctions::WaveletDenoiseAllL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge)//mod JD
{
@@ -2530,16 +2505,14 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L,
}
for (int i = 3; i >= 0; i--) {
if (buffer[i] != nullptr) {
delete[] buffer[i];
}
delete[] buffer[i];
}
}
return (!memoryAllocationFailed);
}
bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab,
bool ImProcFunctions::WaveletDenoiseAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab,
float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)//mod JD
{
@@ -2596,7 +2569,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
void ImProcFunctions::ShrinkAllL(const wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
float *noisevarlum, float * madL, float * vari, int edge)
{
@@ -2607,12 +2580,12 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float *
float * sfaved = buffer[1] + 64;
float * blurBuffer = buffer[2] + 96;
int W_L = WaveletCoeffs_L.level_W(level);
int H_L = WaveletCoeffs_L.level_H(level);
const int W_L = WaveletCoeffs_L.level_W(level);
const int H_L = WaveletCoeffs_L.level_H(level);
float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(level);
// printf("OK lev=%d\n",level);
float mad_L = madL[dir - 1] ;
const float mad_L = madL[dir - 1] ;
const float levelFactor = mad_L * 5.f / static_cast<float>(level + 1);
if (edge == 1 && vari) {
noisevarlum = blurBuffer; // we need one buffer, but fortunately we don't have to allocate a new one because we can use blurBuffer
@@ -2622,71 +2595,45 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float *
}
}
float levelFactor = mad_L * 5.f / static_cast<float>(level + 1);
int i = 0;
#ifdef __SSE2__
__m128 magv;
__m128 levelFactorv = _mm_set1_ps(levelFactor);
__m128 mad_Lv;
__m128 ninev = _mm_set1_ps(9.0f);
__m128 epsv = _mm_set1_ps(eps);
int i;
const vfloat levelFactorv = F2V(levelFactor);
const vfloat ninev = F2V(9.f);
const vfloat epsv = F2V(eps);
for (i = 0; i < W_L * H_L - 3; i += 4) {
mad_Lv = LVFU(noisevarlum[i]) * levelFactorv;
magv = SQRV(LVFU(WavCoeffs_L[dir][i]));
_mm_storeu_ps(&sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv));
for (; i < W_L * H_L - 3; i += 4) {
const vfloat mad_Lv = LVFU(noisevarlum[i]) * levelFactorv;
const vfloat magv = SQRV(LVFU(WavCoeffs_L[dir][i]));
STVFU(sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv));
}
#endif
// few remaining pixels
for (; i < W_L * H_L; ++i) {
float mag = SQR(WavCoeffs_L[dir][i]);
const float mag = SQR(WavCoeffs_L[dir][i]);
sfave[i] = mag / (mag + levelFactor * noisevarlum[i] * xexpf(-mag / (9 * levelFactor * noisevarlum[i])) + eps);
}
#else
for (int i = 0; i < W_L * H_L; ++i) {
float mag = SQR(WavCoeffs_L[dir][i]);
float shrinkfactor = mag / (mag + levelFactor * noisevarlum[i] * xexpf(-mag / (9 * levelFactor * noisevarlum[i])) + eps);
sfave[i] = shrinkfactor;
}
#endif
boxblur(sfave, sfaved, blurBuffer, level + 2, level + 2, W_L, H_L); //increase smoothness by locally averaging shrinkage
boxblur(sfave, sfaved, level + 2, W_L, H_L, false); //increase smoothness by locally averaging shrinkage
i = 0;
#ifdef __SSE2__
__m128 sfv;
for (i = 0; i < W_L * H_L - 3; i += 4) {
sfv = LVFU(sfave[i]);
for (; i < W_L * H_L - 3; i += 4) {
const vfloat sfv = LVFU(sfave[i]);
//use smoothed shrinkage unless local shrinkage is much less
_mm_storeu_ps(&WavCoeffs_L[dir][i], _mm_loadu_ps(&WavCoeffs_L[dir][i]) * (SQRV(LVFU(sfaved[i])) + SQRV(sfv)) / (LVFU(sfaved[i]) + sfv + epsv));
STVFU(WavCoeffs_L[dir][i], LVFU(WavCoeffs_L[dir][i]) * (SQRV(LVFU(sfaved[i])) + SQRV(sfv)) / (LVFU(sfaved[i]) + sfv + epsv));
}
#endif
// few remaining pixels
for (; i < W_L * H_L; ++i) {
float sf = sfave[i];
const float sf = sfave[i];
//use smoothed shrinkage unless local shrinkage is much less
WavCoeffs_L[dir][i] *= (SQR(sfaved[i]) + SQR(sf)) / (sfaved[i] + sf + eps);
}//now luminance coefficients are denoised
#else
for (int i = 0; i < W_L * H_L; ++i) {
float sf = sfave[i];
//use smoothed shrinkage unless local shrinkage is much less
WavCoeffs_L[dir][i] *= (SQR(sfaved[i]) + SQR(sf)) / (sfaved[i] + sf + eps);
}//now luminance coefficients are denoised
#endif
}
void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
void ImProcFunctions::ShrinkAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch,
bool denoiseMethodRgb, float * madL, float * madaab, bool madCalculated)
@@ -2700,7 +2647,6 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
float * sfaveab = buffer[0] + 32;
float * sfaveabd = buffer[1] + 64;
float * blurBuffer = buffer[2] + 96;
int W_ab = WaveletCoeffs_ab.level_W(level);
int H_ab = WaveletCoeffs_ab.level_H(level);
@@ -2724,12 +2670,12 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
if (noisevar_ab > 0.001f) {
madab = useNoiseCCurve ? madab : madab * noisevar_ab;
#ifdef __SSE2__
__m128 onev = _mm_set1_ps(1.f);
__m128 mad_abrv = _mm_set1_ps(madab);
vfloat onev = F2V(1.f);
vfloat mad_abrv = F2V(madab);
__m128 rmadLm9v = onev / _mm_set1_ps(mad_L * 9.f);
__m128 mad_abv ;
__m128 mag_Lv, mag_abv;
vfloat rmadLm9v = onev / F2V(mad_L * 9.f);
vfloat mad_abv ;
vfloat mag_Lv, mag_abv;
int coeffloc_ab;
for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) {
@@ -2738,7 +2684,7 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]);
mag_abv = SQRV(LVFU(WavCoeffs_ab[dir][coeffloc_ab]));
mag_Lv = (SQRV(mag_Lv)) * rmadLm9v;
_mm_storeu_ps(&sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))));
STVFU(sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))));
}
// few remaining pixels
@@ -2761,18 +2707,18 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
#endif
boxblur(sfaveab, sfaveabd, blurBuffer, level + 2, level + 2, W_ab, H_ab); //increase smoothness by locally averaging shrinkage
boxblur(sfaveab, sfaveabd, level + 2, W_ab, H_ab, false); //increase smoothness by locally averaging shrinkage
#ifdef __SSE2__
__m128 epsv = _mm_set1_ps(eps);
__m128 sfabv;
__m128 sfaveabv;
vfloat epsv = F2V(eps);
vfloat sfabv;
vfloat sfaveabv;
for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) {
sfabv = LVFU(sfaveab[coeffloc_ab]);
sfaveabv = LVFU(sfaveabd[coeffloc_ab]);
//use smoothed shrinkage unless local shrinkage is much less
_mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv));
STVFU(WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv));
}
// few remaining pixels
@@ -2919,8 +2865,8 @@ void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b,
}
void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a,
wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice,
void ImProcFunctions::WaveletDenoiseAll_info(int levwav, const wavelet_decomposition &WaveletCoeffs_a,
const wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice,
float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb)
{
@@ -3106,7 +3052,7 @@ void ImProcFunctions::calcautodn_info(float &chaut, float &delta, int Nb, int le
}
void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, const LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
{
if ((settings->leveldnautsimpl == 1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl == 0 && dnparams.C2method == "MANU")) {
//nothing to do
@@ -3173,8 +3119,8 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc,
const float gain = pow(2.0f, float(expcomp));
int tilesize;
int overlap;
int tilesize = 0;
int overlap = 0;
if (settings->leveldnti == 0) {
tilesize = 1024;
@@ -3275,16 +3221,16 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc,
for (int i = tiletop; i < tilebottom; i += 2) {
int i1 = i - tiletop;
#ifdef __SSE2__
__m128 aNv, bNv;
__m128 c100v = _mm_set1_ps(100.f);
vfloat aNv, bNv;
vfloat c100v = F2V(100.f);
int j;
for (j = tileleft; j < tileright - 7; j += 8) {
int j1 = j - tileleft;
aNv = LVFU(acalc[i >> 1][j >> 1]);
bNv = LVFU(bcalc[i >> 1][j >> 1]);
_mm_storeu_ps(&noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv));
_mm_storeu_ps(&noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v));
STVFU(noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv));
STVFU(noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v));
}
for (; j < tileright; j += 2) {